server: cache gguf model capabilities rather than reading off disc
This commit is contained in:
parent
2348fef568
commit
8d51d92f3b
|
|
@ -0,0 +1,94 @@
|
||||||
|
package cache
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"slices"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/fs/ggml"
|
||||||
|
"github.com/ollama/ollama/template"
|
||||||
|
"github.com/ollama/ollama/thinking"
|
||||||
|
"github.com/ollama/ollama/types/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ggufCapabilities is a cache for gguf model capabilities
|
||||||
|
var ggufCapabilities = &sync.Map{}
|
||||||
|
|
||||||
|
// ModelInfo contains the minimal information needed to determine capabilities
|
||||||
|
type ModelInfo struct {
|
||||||
|
ModelPath string
|
||||||
|
ProjectorPaths []string
|
||||||
|
Template *template.Template
|
||||||
|
}
|
||||||
|
|
||||||
|
// Capabilities returns the capabilities that the model supports
|
||||||
|
func Capabilities(info ModelInfo) []model.Capability {
|
||||||
|
capabilities, err := ggufCapabilties(info.ModelPath)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("could not determine gguf capabilities", "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if info.Template == nil {
|
||||||
|
return capabilities
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for tools capability
|
||||||
|
if slices.Contains(info.Template.Vars(), "tools") {
|
||||||
|
capabilities = append(capabilities, model.CapabilityTools)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for insert capability
|
||||||
|
if slices.Contains(info.Template.Vars(), "suffix") {
|
||||||
|
capabilities = append(capabilities, model.CapabilityInsert)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for vision capability in projector-based models
|
||||||
|
if len(info.ProjectorPaths) > 0 {
|
||||||
|
capabilities = append(capabilities, model.CapabilityVision)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for thinking capability
|
||||||
|
openingTag, closingTag := thinking.InferTags(info.Template.Template)
|
||||||
|
if openingTag != "" && closingTag != "" {
|
||||||
|
capabilities = append(capabilities, model.CapabilityThinking)
|
||||||
|
}
|
||||||
|
|
||||||
|
return capabilities
|
||||||
|
}
|
||||||
|
|
||||||
|
func ggufCapabilties(modelPath string) ([]model.Capability, error) {
|
||||||
|
if ggufCapabilities, ok := ggufCapabilities.Load(modelPath); ok {
|
||||||
|
capabilities := ggufCapabilities.([]model.Capability)
|
||||||
|
return capabilities, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If not cached, read the model file to determine capabilities
|
||||||
|
capabilities := []model.Capability{}
|
||||||
|
|
||||||
|
r, err := os.Open(modelPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer r.Close()
|
||||||
|
|
||||||
|
f, err := ggml.Decode(r, 1024)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, ok := f.KV()[fmt.Sprintf("%s.pooling_type", f.KV().Architecture())]; ok {
|
||||||
|
capabilities = append(capabilities, model.CapabilityEmbedding)
|
||||||
|
} else {
|
||||||
|
capabilities = append(capabilities, model.CapabilityCompletion)
|
||||||
|
}
|
||||||
|
if _, ok := f.KV()[fmt.Sprintf("%s.vision.block_count", f.KV().Architecture())]; ok {
|
||||||
|
capabilities = append(capabilities, model.CapabilityVision)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache the capabilities for future use
|
||||||
|
ggufCapabilities.Store(modelPath, capabilities)
|
||||||
|
|
||||||
|
return capabilities, nil
|
||||||
|
}
|
||||||
|
|
@ -23,10 +23,9 @@ import (
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/fs/gguf"
|
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/parser"
|
||||||
|
"github.com/ollama/ollama/server/cache"
|
||||||
"github.com/ollama/ollama/template"
|
"github.com/ollama/ollama/template"
|
||||||
"github.com/ollama/ollama/thinking"
|
|
||||||
"github.com/ollama/ollama/types/model"
|
"github.com/ollama/ollama/types/model"
|
||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
)
|
)
|
||||||
|
|
@ -68,60 +67,14 @@ type Model struct {
|
||||||
Template *template.Template
|
Template *template.Template
|
||||||
}
|
}
|
||||||
|
|
||||||
// Capabilities returns the capabilities that the model supports
|
|
||||||
func (m *Model) Capabilities() []model.Capability {
|
|
||||||
capabilities := []model.Capability{}
|
|
||||||
|
|
||||||
// Check for completion capability
|
|
||||||
f, err := gguf.Open(m.ModelPath)
|
|
||||||
if err == nil {
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
if f.KeyValue("pooling_type").Valid() {
|
|
||||||
capabilities = append(capabilities, model.CapabilityEmbedding)
|
|
||||||
} else {
|
|
||||||
// If no embedding is specified, we assume the model supports completion
|
|
||||||
capabilities = append(capabilities, model.CapabilityCompletion)
|
|
||||||
}
|
|
||||||
if f.KeyValue("vision.block_count").Valid() {
|
|
||||||
capabilities = append(capabilities, model.CapabilityVision)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
slog.Error("couldn't open model file", "error", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if m.Template == nil {
|
|
||||||
return capabilities
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for tools capability
|
|
||||||
if slices.Contains(m.Template.Vars(), "tools") {
|
|
||||||
capabilities = append(capabilities, model.CapabilityTools)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for insert capability
|
|
||||||
if slices.Contains(m.Template.Vars(), "suffix") {
|
|
||||||
capabilities = append(capabilities, model.CapabilityInsert)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for vision capability in projector-based models
|
|
||||||
if len(m.ProjectorPaths) > 0 {
|
|
||||||
capabilities = append(capabilities, model.CapabilityVision)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for thinking capability
|
|
||||||
openingTag, closingTag := thinking.InferTags(m.Template.Template)
|
|
||||||
if openingTag != "" && closingTag != "" {
|
|
||||||
capabilities = append(capabilities, model.CapabilityThinking)
|
|
||||||
}
|
|
||||||
|
|
||||||
return capabilities
|
|
||||||
}
|
|
||||||
|
|
||||||
// CheckCapabilities checks if the model has the specified capabilities returning an error describing
|
// CheckCapabilities checks if the model has the specified capabilities returning an error describing
|
||||||
// any missing or unknown capabilities
|
// any missing or unknown capabilities
|
||||||
func (m *Model) CheckCapabilities(want ...model.Capability) error {
|
func (m *Model) CheckCapabilities(want ...model.Capability) error {
|
||||||
available := m.Capabilities()
|
available := cache.Capabilities(cache.ModelInfo{
|
||||||
|
ModelPath: m.ModelPath,
|
||||||
|
ProjectorPaths: m.ProjectorPaths,
|
||||||
|
Template: m.Template,
|
||||||
|
})
|
||||||
var errs []error
|
var errs []error
|
||||||
|
|
||||||
// Map capabilities to their corresponding error
|
// Map capabilities to their corresponding error
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ import (
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/ollama/ollama/logutil"
|
"github.com/ollama/ollama/logutil"
|
||||||
"github.com/ollama/ollama/openai"
|
"github.com/ollama/ollama/openai"
|
||||||
|
"github.com/ollama/ollama/server/cache"
|
||||||
"github.com/ollama/ollama/server/internal/client/ollama"
|
"github.com/ollama/ollama/server/internal/client/ollama"
|
||||||
"github.com/ollama/ollama/server/internal/registry"
|
"github.com/ollama/ollama/server/internal/registry"
|
||||||
"github.com/ollama/ollama/template"
|
"github.com/ollama/ollama/template"
|
||||||
|
|
@ -819,13 +820,17 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &api.ShowResponse{
|
resp := &api.ShowResponse{
|
||||||
License: strings.Join(m.License, "\n"),
|
License: strings.Join(m.License, "\n"),
|
||||||
System: m.System,
|
System: m.System,
|
||||||
Template: m.Template.String(),
|
Template: m.Template.String(),
|
||||||
Details: modelDetails,
|
Details: modelDetails,
|
||||||
Messages: msgs,
|
Messages: msgs,
|
||||||
Capabilities: m.Capabilities(),
|
Capabilities: cache.Capabilities(cache.ModelInfo{
|
||||||
ModifiedAt: manifest.fi.ModTime(),
|
ModelPath: m.ModelPath,
|
||||||
|
Template: m.Template,
|
||||||
|
ProjectorPaths: m.ProjectorPaths,
|
||||||
|
}),
|
||||||
|
ModifiedAt: manifest.fi.ModTime(),
|
||||||
}
|
}
|
||||||
|
|
||||||
var params []string
|
var params []string
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue