server: cache gguf model capabilities rather than reading off disc

This commit is contained in:
Bruce MacDonald 2025-06-11 11:28:40 -07:00
parent 2348fef568
commit 8d51d92f3b
3 changed files with 112 additions and 60 deletions

94
server/cache/capabilities.go vendored Normal file
View File

@ -0,0 +1,94 @@
package cache
import (
"fmt"
"log/slog"
"os"
"slices"
"sync"
"github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/thinking"
"github.com/ollama/ollama/types/model"
)
// ggufCapabilities is a cache for gguf model capabilities
var ggufCapabilities = &sync.Map{}
// ModelInfo contains the minimal information needed to determine capabilities
type ModelInfo struct {
ModelPath string
ProjectorPaths []string
Template *template.Template
}
// Capabilities returns the capabilities that the model supports
func Capabilities(info ModelInfo) []model.Capability {
capabilities, err := ggufCapabilties(info.ModelPath)
if err != nil {
slog.Error("could not determine gguf capabilities", "error", err)
}
if info.Template == nil {
return capabilities
}
// Check for tools capability
if slices.Contains(info.Template.Vars(), "tools") {
capabilities = append(capabilities, model.CapabilityTools)
}
// Check for insert capability
if slices.Contains(info.Template.Vars(), "suffix") {
capabilities = append(capabilities, model.CapabilityInsert)
}
// Check for vision capability in projector-based models
if len(info.ProjectorPaths) > 0 {
capabilities = append(capabilities, model.CapabilityVision)
}
// Check for thinking capability
openingTag, closingTag := thinking.InferTags(info.Template.Template)
if openingTag != "" && closingTag != "" {
capabilities = append(capabilities, model.CapabilityThinking)
}
return capabilities
}
func ggufCapabilties(modelPath string) ([]model.Capability, error) {
if ggufCapabilities, ok := ggufCapabilities.Load(modelPath); ok {
capabilities := ggufCapabilities.([]model.Capability)
return capabilities, nil
}
// If not cached, read the model file to determine capabilities
capabilities := []model.Capability{}
r, err := os.Open(modelPath)
if err != nil {
return nil, err
}
defer r.Close()
f, err := ggml.Decode(r, 1024)
if err != nil {
return nil, err
}
if _, ok := f.KV()[fmt.Sprintf("%s.pooling_type", f.KV().Architecture())]; ok {
capabilities = append(capabilities, model.CapabilityEmbedding)
} else {
capabilities = append(capabilities, model.CapabilityCompletion)
}
if _, ok := f.KV()[fmt.Sprintf("%s.vision.block_count", f.KV().Architecture())]; ok {
capabilities = append(capabilities, model.CapabilityVision)
}
// Cache the capabilities for future use
ggufCapabilities.Store(modelPath, capabilities)
return capabilities, nil
}

View File

@ -23,10 +23,9 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/fs/gguf"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/server/cache"
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/thinking"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
)
@ -68,60 +67,14 @@ type Model struct {
Template *template.Template
}
// Capabilities returns the capabilities that the model supports
func (m *Model) Capabilities() []model.Capability {
capabilities := []model.Capability{}
// Check for completion capability
f, err := gguf.Open(m.ModelPath)
if err == nil {
defer f.Close()
if f.KeyValue("pooling_type").Valid() {
capabilities = append(capabilities, model.CapabilityEmbedding)
} else {
// If no embedding is specified, we assume the model supports completion
capabilities = append(capabilities, model.CapabilityCompletion)
}
if f.KeyValue("vision.block_count").Valid() {
capabilities = append(capabilities, model.CapabilityVision)
}
} else {
slog.Error("couldn't open model file", "error", err)
}
if m.Template == nil {
return capabilities
}
// Check for tools capability
if slices.Contains(m.Template.Vars(), "tools") {
capabilities = append(capabilities, model.CapabilityTools)
}
// Check for insert capability
if slices.Contains(m.Template.Vars(), "suffix") {
capabilities = append(capabilities, model.CapabilityInsert)
}
// Check for vision capability in projector-based models
if len(m.ProjectorPaths) > 0 {
capabilities = append(capabilities, model.CapabilityVision)
}
// Check for thinking capability
openingTag, closingTag := thinking.InferTags(m.Template.Template)
if openingTag != "" && closingTag != "" {
capabilities = append(capabilities, model.CapabilityThinking)
}
return capabilities
}
// CheckCapabilities checks if the model has the specified capabilities returning an error describing
// any missing or unknown capabilities
func (m *Model) CheckCapabilities(want ...model.Capability) error {
available := m.Capabilities()
available := cache.Capabilities(cache.ModelInfo{
ModelPath: m.ModelPath,
ProjectorPaths: m.ProjectorPaths,
Template: m.Template,
})
var errs []error
// Map capabilities to their corresponding error

View File

@ -34,6 +34,7 @@ import (
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/logutil"
"github.com/ollama/ollama/openai"
"github.com/ollama/ollama/server/cache"
"github.com/ollama/ollama/server/internal/client/ollama"
"github.com/ollama/ollama/server/internal/registry"
"github.com/ollama/ollama/template"
@ -819,13 +820,17 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
}
resp := &api.ShowResponse{
License: strings.Join(m.License, "\n"),
System: m.System,
Template: m.Template.String(),
Details: modelDetails,
Messages: msgs,
Capabilities: m.Capabilities(),
ModifiedAt: manifest.fi.ModTime(),
License: strings.Join(m.License, "\n"),
System: m.System,
Template: m.Template.String(),
Details: modelDetails,
Messages: msgs,
Capabilities: cache.Capabilities(cache.ModelInfo{
ModelPath: m.ModelPath,
Template: m.Template,
ProjectorPaths: m.ProjectorPaths,
}),
ModifiedAt: manifest.fi.ModTime(),
}
var params []string