From 8d97d4b0ea29ee6103930714a320ad787cfb5fce Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Thu, 28 Aug 2025 17:04:45 -0700 Subject: [PATCH] use fs.gguf.File to show models --- fs/gguf/gguf.go | 41 +++++++++++------- fs/gguf/keyvalue.go | 89 +++++++++++++++++++------------------- fs/gguf/tensor.go | 103 +++++++++++++++++++++++--------------------- server/routes.go | 71 +++++++++++++++++------------- 4 files changed, 166 insertions(+), 138 deletions(-) diff --git a/fs/gguf/gguf.go b/fs/gguf/gguf.go index aba316e2c..9db662e12 100644 --- a/fs/gguf/gguf.go +++ b/fs/gguf/gguf.go @@ -35,9 +35,10 @@ type File struct { Magic [4]byte Version uint32 - keyValues *lazy[KeyValue] - tensors *lazy[TensorInfo] - offset int64 + keyValues *lazy[KeyValue] + tensorInfos *lazy[TensorInfo] + offset int64 + n uint64 file *os.File reader *bufferedReader @@ -69,12 +70,12 @@ func Open(path string) (f *File, err error) { return nil, fmt.Errorf("%w version %v", ErrUnsupported, f.Version) } - f.tensors, err = newLazy(f, f.readTensor) + f.tensorInfos, err = newLazy(f, f.readTensor) if err != nil { return nil, err } - f.tensors.successFunc = func() error { + f.tensorInfos.successFunc = func() error { offset := f.reader.offset alignment := cmp.Or(f.KeyValue("general.alignment").Int(), 32) @@ -119,12 +120,15 @@ func (f *File) readTensor() (TensorInfo, error) { return TensorInfo{}, err } - return TensorInfo{ + tensorInfo := TensorInfo{ Name: name, Offset: offset, Shape: shape, Type: TensorType(type_), - }, nil + } + + f.n += tensorInfo.NumValues() + return tensorInfo, nil } func (f *File) readKeyValue() (KeyValue, error) { @@ -308,7 +312,7 @@ func readArrayString(f *File, n uint64) (*lazy[string], error) { func (f *File) Close() error { f.keyValues.stop() - f.tensors.stop() + f.tensorInfos.stop() return f.file.Close() } @@ -341,15 +345,15 @@ func (f *File) KeyValues() iter.Seq2[int, KeyValue] { } func (f *File) TensorInfo(name string) TensorInfo { - if index := slices.IndexFunc(f.tensors.values, func(t TensorInfo) bool { + if index := slices.IndexFunc(f.tensorInfos.values, func(t TensorInfo) bool { return t.Name == name }); index >= 0 { - return f.tensors.values[index] + return f.tensorInfos.values[index] } // fast-forward through key values if we haven't already _ = f.keyValues.rest() - for tensor, ok := f.tensors.next(); ok; tensor, ok = f.tensors.next() { + for tensor, ok := f.tensorInfos.next(); ok; tensor, ok = f.tensorInfos.next() { if tensor.Name == name { return tensor } @@ -359,13 +363,13 @@ func (f *File) TensorInfo(name string) TensorInfo { } func (f *File) NumTensors() int { - return int(f.tensors.count) + return int(f.tensorInfos.count) } func (f *File) TensorInfos() iter.Seq2[int, TensorInfo] { // fast forward through key values if we haven't already - f.keyValues.rest() - return f.tensors.All() + _ = f.keyValues.rest() + return f.tensorInfos.All() } func (f *File) TensorReader(name string) (TensorInfo, io.Reader, error) { @@ -375,6 +379,11 @@ func (f *File) TensorReader(name string) (TensorInfo, io.Reader, error) { } // fast forward through tensor info if we haven't already - _ = f.tensors.rest() - return t, io.NewSectionReader(f.file, f.offset+int64(t.Offset), t.NumBytes()), nil + _ = f.tensorInfos.rest() + return t, io.NewSectionReader(f.file, f.offset+int64(t.Offset), int64(t.NumBytes())), nil +} + +func (f *File) NumValues() uint64 { + _ = f.tensorInfos.rest() + return f.n } diff --git a/fs/gguf/keyvalue.go b/fs/gguf/keyvalue.go index 8cbbee133..e2f815acf 100644 --- a/fs/gguf/keyvalue.go +++ b/fs/gguf/keyvalue.go @@ -1,6 +1,7 @@ package gguf import ( + "encoding/json" "iter" "log/slog" "reflect" @@ -13,56 +14,15 @@ type KeyValue struct { } func (kv KeyValue) Valid() bool { - return kv.Key != "" && kv.Value.value != nil + return kv.Key != "" && kv.value != nil } type Value struct { value any } -func value[T any](v Value, kinds ...reflect.Kind) (t T) { - vv := reflect.ValueOf(v.value) - if slices.Contains(kinds, vv.Kind()) { - t = vv.Convert(reflect.TypeOf(t)).Interface().(T) - } - return -} - -func values[T any](v Value, kinds ...reflect.Kind) (ts []T) { - switch vv := reflect.ValueOf(v.value); vv.Kind() { - case reflect.Ptr: - out := vv.MethodByName("Values").Call(nil) - if len(out) > 0 && out[0].IsValid() { - next, stop := iter.Pull(out[0].Seq()) - defer stop() - - ts = make([]T, vv.Elem().FieldByName("count").Uint()) - for i := range ts { - t, ok := next() - if !ok { - slog.Error("error reading value", "index", i) - return nil - } - - ts[i] = t.Convert(reflect.TypeOf(ts[i])).Interface().(T) - } - - return ts - } - - case reflect.Slice: - if slices.Contains(kinds, vv.Type().Elem().Kind()) { - ts = make([]T, vv.Len()) - for i := range vv.Len() { - ts[i] = vv.Index(i).Convert(reflect.TypeOf(ts[i])).Interface().(T) - } - } - } - return -} - -func (v Value) Any() any { - return v.value +func (v Value) MarshalJSON() ([]byte, error) { + return json.Marshal(v.value) } // Int returns Value as a signed integer. If it is not a signed integer, it returns 0. @@ -114,3 +74,44 @@ func (v Value) String() string { func (v Value) Strings() (strings []string) { return values[string](v, reflect.String) } + +func value[T any](v Value, kinds ...reflect.Kind) (t T) { + vv := reflect.ValueOf(v.value) + if slices.Contains(kinds, vv.Kind()) { + t = vv.Convert(reflect.TypeOf(t)).Interface().(T) + } + return +} + +func values[T any](v Value, kinds ...reflect.Kind) (ts []T) { + switch vv := reflect.ValueOf(v.value); vv.Kind() { + case reflect.Ptr: + out := vv.MethodByName("Values").Call(nil) + if len(out) > 0 && out[0].IsValid() { + next, stop := iter.Pull(out[0].Seq()) + defer stop() + + ts = make([]T, vv.Elem().FieldByName("count").Uint()) + for i := range ts { + t, ok := next() + if !ok { + slog.Error("error reading value", "index", i) + return nil + } + + ts[i] = t.Convert(reflect.TypeOf(ts[i])).Interface().(T) + } + + return ts + } + + case reflect.Slice: + if slices.Contains(kinds, vv.Type().Elem().Kind()) { + ts = make([]T, vv.Len()) + for i := range vv.Len() { + ts[i] = vv.Index(i).Convert(reflect.TypeOf(ts[i])).Interface().(T) + } + } + } + return +} diff --git a/fs/gguf/tensor.go b/fs/gguf/tensor.go index 194c1d739..1030b2385 100644 --- a/fs/gguf/tensor.go +++ b/fs/gguf/tensor.go @@ -16,17 +16,17 @@ func (ti TensorInfo) Valid() bool { return ti.Name != "" && ti.NumBytes() > 0 } -func (ti TensorInfo) NumValues() int64 { - var numItems int64 = 1 +func (ti TensorInfo) NumValues() uint64 { + var numItems uint64 = 1 for _, dim := range ti.Shape { - numItems *= int64(dim) + numItems *= dim } return numItems } // NumBytes returns the number of bytes in the tensor. -func (ti TensorInfo) NumBytes() int64 { - return int64(float64(ti.NumValues()) * ti.Type.NumBytes()) +func (ti TensorInfo) NumBytes() uint64 { + return uint64(float64(ti.NumValues()) * ti.Type.NumBytes()) } func (ti TensorInfo) LogValue() slog.Value { @@ -34,8 +34,8 @@ func (ti TensorInfo) LogValue() slog.Value { slog.String("name", ti.Name), slog.Int64("offset", int64(ti.Offset)), slog.Any("shape", ti.Shape), - slog.Int64("num_values", ti.NumValues()), - slog.Int64("num_bytes", ti.NumBytes()), + slog.Uint64("num_values", ti.NumValues()), + slog.Uint64("num_bytes", ti.NumBytes()), slog.Any("type", ti.Type), ) } @@ -97,6 +97,8 @@ const ( tensorTypeIQ4_NL_4_4 tensorTypeIQ4_NL_4_8 tensorTypeIQ4_NL_8_8 + + TensorTypeMXFP4 ) func (tt TensorType) NumBytes() float64 { @@ -163,6 +165,8 @@ func (tt TensorType) typeSize() int64 { return tt.blockSize()/8 + tt.blockSize()/16 + tt.blockSize()/32 case TensorTypeBF16: return 2 + case 4, TensorTypeMXFP4: + return 1 + tt.blockSize() / 2 default: return 0 } @@ -185,7 +189,8 @@ func (tt TensorType) blockSize() int64 { TensorTypeQ5_1, TensorTypeQ8_0, TensorTypeQ8_1, - tensorTypeIQ4_NL: + tensorTypeIQ4_NL, + 4, TensorTypeMXFP4: return 32 default: return 256 @@ -195,83 +200,85 @@ func (tt TensorType) blockSize() int64 { func (tt TensorType) String() string { switch tt { case TensorTypeF32: - return "f32" + return "F32" case TensorTypeF16: - return "f16" + return "F16" case TensorTypeQ4_0: - return "q4_0" + return "Q4_0" case TensorTypeQ4_1: - return "q4_1" - case tensorTypeQ4_2: - return "q4_2" + return "Q4_1" + // case tensorTypeQ4_2: + // return "Q4_2" case tensorTypeQ4_3: - return "q4_3" + return "Q4_3" case TensorTypeQ5_0: - return "q5_0" + return "Q5_0" case TensorTypeQ5_1: - return "q5_1" + return "Q5_1" case TensorTypeQ8_0: - return "q8_0" + return "Q8_0" case TensorTypeQ8_1: - return "q8_1" + return "Q8_1" case TensorTypeQ2_K: - return "q2_k" + return "Q2_K" case TensorTypeQ3_K: - return "q3_k" + return "Q3_K" case TensorTypeQ4_K: - return "q4_k" + return "Q4_K" case TensorTypeQ5_K: - return "q5_k" + return "Q5_K" case TensorTypeQ6_K: - return "q6_k" + return "Q6_K" case TensorTypeQ8_K: - return "q8_k" + return "Q8_K" case tensorTypeIQ2_XXS: - return "iq2_xxs" + return "IQ2_XXS" case tensorTypeIQ2_XS: - return "iq2_xs" + return "IQ2_XS" case tensorTypeIQ3_XXS: - return "iq3_xxs" + return "IQ3_XXS" case tensorTypeIQ1_S: - return "iq1_s" + return "IQ1_S" case tensorTypeIQ4_NL: - return "iq4_nl" + return "IQ4_NL" case tensorTypeIQ3_S: - return "iq3_s" + return "IQ3_S" case tensorTypeIQ2_S: - return "iq2_s" + return "IQ2_S" case tensorTypeIQ4_XS: - return "iq4_xs" + return "IQ4_XS" case TensorTypeI8: - return "i8" + return "I8" case TensorTypeI16: - return "i16" + return "I16" case TensorTypeI32: - return "i32" + return "I32" case TensorTypeI64: - return "i64" + return "I64" case TensorTypeF64: - return "f64" + return "F64" case tensorTypeIQ1_M: - return "iq1_m" + return "IQ1_M" case TensorTypeBF16: - return "bf16" + return "BF16" case tensorTypeQ4_0_4_4: - return "q4_0_4_4" + return "Q4_0_4_4" case tensorTypeQ4_0_4_8: - return "q4_0_4_8" + return "Q4_0_4_8" case tensorTypeQ4_0_8_8: - return "q4_0_8_8" + return "Q4_0_8_8" case tensorTypeTQ1_0: - return "tq1_0" + return "TQ1_0" case tensorTypeTQ2_0: - return "tq2_0" + return "TQ2_0" case tensorTypeIQ4_NL_4_4: - return "iq4_nl_4_4" + return "IQ4_NL_4_4" case tensorTypeIQ4_NL_4_8: - return "iq4_nl_4_8" + return "IQ4_NL_4_8" case tensorTypeIQ4_NL_8_8: - return "iq4_nl_8_8" + return "IQ4_NL_8_8" + case 4, TensorTypeMXFP4: + return "MXFP4" default: return "unknown" } diff --git a/server/routes.go b/server/routes.go index e6e4e2c47..d1ff3ad9a 100644 --- a/server/routes.go +++ b/server/routes.go @@ -31,7 +31,7 @@ import ( "github.com/ollama/ollama/discover" "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/format" - "github.com/ollama/ollama/fs/ggml" + "github.com/ollama/ollama/fs/gguf" "github.com/ollama/ollama/harmony" "github.com/ollama/ollama/llm" "github.com/ollama/ollama/logutil" @@ -534,11 +534,12 @@ func (s *Server) EmbedHandler(c *gin.Context) { return } - kvData, _, err := getModelData(m.ModelPath, false) + f, err := gguf.Open(m.ModelPath) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return } + defer f.Close() var count int for i, s := range input { @@ -548,7 +549,7 @@ func (s *Server) EmbedHandler(c *gin.Context) { return } - ctxLen := min(opts.NumCtx, int(kvData.ContextLength())) + ctxLen := min(opts.NumCtx, int(f.KeyValue("context_length").Int())) if len(tokens) > ctxLen { if !truncate { c.JSON(http.StatusBadRequest, gin.H{"error": "input length exceeds maximum context length"}) @@ -951,53 +952,63 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) { fmt.Fprint(&sb, m.String()) resp.Modelfile = sb.String() - kvData, tensors, err := getModelData(m.ModelPath, req.Verbose) + f, err := gguf.Open(m.ModelPath) if err != nil { return nil, err } + defer f.Close() - delete(kvData, "general.name") - delete(kvData, "tokenizer.chat_template") - resp.ModelInfo = kvData - - tensorData := make([]api.Tensor, len(tensors.Items())) - for cnt, t := range tensors.Items() { - tensorData[cnt] = api.Tensor{Name: t.Name, Type: t.Type(), Shape: t.Shape} + resp.ModelInfo = make(map[string]any, f.NumKeyValues()) + for _, keyValue := range f.KeyValues() { + if !slices.Contains([]string{"general.name", "tokenizer.chat_template"}, keyValue.Key) { + resp.ModelInfo[keyValue.Key] = keyValue.Value + } } - resp.Tensors = tensorData + + resp.Tensors = make([]api.Tensor, f.NumTensors()) + for i, tensorInfo := range f.TensorInfos() { + resp.Tensors[i] = api.Tensor{ + Name: tensorInfo.Name, + Type: tensorInfo.Type.String(), + Shape: tensorInfo.Shape, + } + } + resp.ModelInfo["general.parameter_count"] = f.NumValues() if len(m.ProjectorPaths) > 0 { - projectorData, _, err := getModelData(m.ProjectorPaths[0], req.Verbose) + f, err := gguf.Open(m.ProjectorPaths[0]) if err != nil { return nil, err } - resp.ProjectorInfo = projectorData + defer f.Close() + + resp.ProjectorInfo = make(map[string]any, f.NumKeyValues()) + for _, keyValue := range f.KeyValues() { + resp.ProjectorInfo[keyValue.Key] = keyValue.Value + } } return resp, nil } -func getModelData(digest string, verbose bool) (ggml.KV, ggml.Tensors, error) { - maxArraySize := 0 - if verbose { - maxArraySize = -1 - } - data, err := llm.LoadModel(digest, maxArraySize) +func getModelData(digest string, verbose bool) ([]gguf.KeyValue, []gguf.TensorInfo, error) { + f, err := gguf.Open(digest) if err != nil { - return nil, ggml.Tensors{}, err + return nil, nil, err + } + defer f.Close() + + keyValues := make([]gguf.KeyValue, f.NumKeyValues()) + for i, keyValue := range f.KeyValues() { + keyValues[i] = keyValue } - kv := data.KV() - - if !verbose { - for k := range kv { - if t, ok := kv[k].([]any); len(t) > 5 && ok { - kv[k] = []any{} - } - } + tensorInfos := make([]gguf.TensorInfo, f.NumTensors()) + for i, info := range f.TensorInfos() { + tensorInfos[i] = info } - return kv, data.Tensors(), nil + return keyValues, tensorInfos, nil } func (s *Server) ListHandler(c *gin.Context) {