From 8d51d92f3b97e98f9c7fa4c8cc4d6b13cfc4e390 Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Wed, 11 Jun 2025 11:28:40 -0700 Subject: [PATCH] server: cache gguf model capabilities rather than reading off disc --- server/cache/capabilities.go | 94 ++++++++++++++++++++++++++++++++++++ server/images.go | 59 +++------------------- server/routes.go | 19 +++++--- 3 files changed, 112 insertions(+), 60 deletions(-) create mode 100644 server/cache/capabilities.go diff --git a/server/cache/capabilities.go b/server/cache/capabilities.go new file mode 100644 index 000000000..354944319 --- /dev/null +++ b/server/cache/capabilities.go @@ -0,0 +1,94 @@ +package cache + +import ( + "fmt" + "log/slog" + "os" + "slices" + "sync" + + "github.com/ollama/ollama/fs/ggml" + "github.com/ollama/ollama/template" + "github.com/ollama/ollama/thinking" + "github.com/ollama/ollama/types/model" +) + +// ggufCapabilities is a cache for gguf model capabilities +var ggufCapabilities = &sync.Map{} + +// ModelInfo contains the minimal information needed to determine capabilities +type ModelInfo struct { + ModelPath string + ProjectorPaths []string + Template *template.Template +} + +// Capabilities returns the capabilities that the model supports +func Capabilities(info ModelInfo) []model.Capability { + capabilities, err := ggufCapabilties(info.ModelPath) + if err != nil { + slog.Error("could not determine gguf capabilities", "error", err) + } + + if info.Template == nil { + return capabilities + } + + // Check for tools capability + if slices.Contains(info.Template.Vars(), "tools") { + capabilities = append(capabilities, model.CapabilityTools) + } + + // Check for insert capability + if slices.Contains(info.Template.Vars(), "suffix") { + capabilities = append(capabilities, model.CapabilityInsert) + } + + // Check for vision capability in projector-based models + if len(info.ProjectorPaths) > 0 { + capabilities = append(capabilities, model.CapabilityVision) + } + + // Check for thinking capability + openingTag, closingTag := thinking.InferTags(info.Template.Template) + if openingTag != "" && closingTag != "" { + capabilities = append(capabilities, model.CapabilityThinking) + } + + return capabilities +} + +func ggufCapabilties(modelPath string) ([]model.Capability, error) { + if ggufCapabilities, ok := ggufCapabilities.Load(modelPath); ok { + capabilities := ggufCapabilities.([]model.Capability) + return capabilities, nil + } + + // If not cached, read the model file to determine capabilities + capabilities := []model.Capability{} + + r, err := os.Open(modelPath) + if err != nil { + return nil, err + } + defer r.Close() + + f, err := ggml.Decode(r, 1024) + if err != nil { + return nil, err + } + + if _, ok := f.KV()[fmt.Sprintf("%s.pooling_type", f.KV().Architecture())]; ok { + capabilities = append(capabilities, model.CapabilityEmbedding) + } else { + capabilities = append(capabilities, model.CapabilityCompletion) + } + if _, ok := f.KV()[fmt.Sprintf("%s.vision.block_count", f.KV().Architecture())]; ok { + capabilities = append(capabilities, model.CapabilityVision) + } + + // Cache the capabilities for future use + ggufCapabilities.Store(modelPath, capabilities) + + return capabilities, nil +} diff --git a/server/images.go b/server/images.go index 38505cc51..b253310b0 100644 --- a/server/images.go +++ b/server/images.go @@ -23,10 +23,9 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/envconfig" - "github.com/ollama/ollama/fs/gguf" "github.com/ollama/ollama/parser" + "github.com/ollama/ollama/server/cache" "github.com/ollama/ollama/template" - "github.com/ollama/ollama/thinking" "github.com/ollama/ollama/types/model" "github.com/ollama/ollama/version" ) @@ -68,60 +67,14 @@ type Model struct { Template *template.Template } -// Capabilities returns the capabilities that the model supports -func (m *Model) Capabilities() []model.Capability { - capabilities := []model.Capability{} - - // Check for completion capability - f, err := gguf.Open(m.ModelPath) - if err == nil { - defer f.Close() - - if f.KeyValue("pooling_type").Valid() { - capabilities = append(capabilities, model.CapabilityEmbedding) - } else { - // If no embedding is specified, we assume the model supports completion - capabilities = append(capabilities, model.CapabilityCompletion) - } - if f.KeyValue("vision.block_count").Valid() { - capabilities = append(capabilities, model.CapabilityVision) - } - } else { - slog.Error("couldn't open model file", "error", err) - } - - if m.Template == nil { - return capabilities - } - - // Check for tools capability - if slices.Contains(m.Template.Vars(), "tools") { - capabilities = append(capabilities, model.CapabilityTools) - } - - // Check for insert capability - if slices.Contains(m.Template.Vars(), "suffix") { - capabilities = append(capabilities, model.CapabilityInsert) - } - - // Check for vision capability in projector-based models - if len(m.ProjectorPaths) > 0 { - capabilities = append(capabilities, model.CapabilityVision) - } - - // Check for thinking capability - openingTag, closingTag := thinking.InferTags(m.Template.Template) - if openingTag != "" && closingTag != "" { - capabilities = append(capabilities, model.CapabilityThinking) - } - - return capabilities -} - // CheckCapabilities checks if the model has the specified capabilities returning an error describing // any missing or unknown capabilities func (m *Model) CheckCapabilities(want ...model.Capability) error { - available := m.Capabilities() + available := cache.Capabilities(cache.ModelInfo{ + ModelPath: m.ModelPath, + ProjectorPaths: m.ProjectorPaths, + Template: m.Template, + }) var errs []error // Map capabilities to their corresponding error diff --git a/server/routes.go b/server/routes.go index cb46cef11..c89f8838b 100644 --- a/server/routes.go +++ b/server/routes.go @@ -34,6 +34,7 @@ import ( "github.com/ollama/ollama/llm" "github.com/ollama/ollama/logutil" "github.com/ollama/ollama/openai" + "github.com/ollama/ollama/server/cache" "github.com/ollama/ollama/server/internal/client/ollama" "github.com/ollama/ollama/server/internal/registry" "github.com/ollama/ollama/template" @@ -819,13 +820,17 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) { } resp := &api.ShowResponse{ - License: strings.Join(m.License, "\n"), - System: m.System, - Template: m.Template.String(), - Details: modelDetails, - Messages: msgs, - Capabilities: m.Capabilities(), - ModifiedAt: manifest.fi.ModTime(), + License: strings.Join(m.License, "\n"), + System: m.System, + Template: m.Template.String(), + Details: modelDetails, + Messages: msgs, + Capabilities: cache.Capabilities(cache.ModelInfo{ + ModelPath: m.ModelPath, + Template: m.Template, + ProjectorPaths: m.ProjectorPaths, + }), + ModifiedAt: manifest.fi.ModTime(), } var params []string