server: do partial gguf kv read for capability check

2025-06-06 15:51:38 -07:00 · 2025-06-06 15:51:38 -07:00 · 3cf62838ce
parent 22aed78048
commit 3cf62838ce
3 changed files with 271 additions and 97 deletions
--- a/fs/gguf/keyvalue.go
+++ b/fs/gguf/keyvalue.go
@ -84,3 +84,19 @@ func (v Value) String() string {
 func (v Value) Strings() (strings []string) {
 	return values[string](v, reflect.String)
 }
+
+// IsNil checks if the Value is nil. It returns true if the value is nil or if it is a nil pointer, interface, slice, map, channel, or function.
+func (v Value) IsNil() bool {
+	if v.value == nil {
+		return true
+	}
+
+	// Check for nil pointers, interfaces, slices, maps, channels, and functions
+	rv := reflect.ValueOf(v.value)
+	switch rv.Kind() {
+	case reflect.Ptr, reflect.Interface, reflect.Slice, reflect.Map, reflect.Chan, reflect.Func:
+		return rv.IsNil()
+	}
+
+	return false
+}
--- a/server/images.go
+++ b/server/images.go
@ -23,7 +23,7 @@ import (

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/envconfig"
-	"github.com/ollama/ollama/fs/ggml"
+	"github.com/ollama/ollama/fs/gguf"
 	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/template"
 	"github.com/ollama/ollama/thinking"
@ -73,22 +73,20 @@ func (m *Model) Capabilities() []model.Capability {
 	capabilities := []model.Capability{}

 	// Check for completion capability
-	r, err := os.Open(m.ModelPath)
+	f, err := gguf.Open(m.ModelPath)
 	if err == nil {
-		defer r.Close()
+		defer f.Close()

-		f, err := ggml.Decode(r, 1024)
-		if err == nil {
-			if _, ok := f.KV()[fmt.Sprintf("%s.pooling_type", f.KV().Architecture())]; ok {
-				capabilities = append(capabilities, model.CapabilityEmbedding)
-			} else {
-				capabilities = append(capabilities, model.CapabilityCompletion)
-			}
-			if _, ok := f.KV()[fmt.Sprintf("%s.vision.block_count", f.KV().Architecture())]; ok {
-				capabilities = append(capabilities, model.CapabilityVision)
-			}
+		embedding := f.KeyValue("pooling_type")
+		if !embedding.Value.IsNil() {
+			capabilities = append(capabilities, model.CapabilityEmbedding)
 		} else {
-			slog.Error("couldn't decode ggml", "error", err)
+			// If no embedding is specified, we assume the model supports completion
+			capabilities = append(capabilities, model.CapabilityCompletion)
+		}
+		vision := f.KeyValue("vision.block_count")
+		if !vision.Value.IsNil() {
+			capabilities = append(capabilities, model.CapabilityVision)
 		}
 	} else {
 		slog.Error("couldn't open model file", "error", err)
--- a/server/images_test.go
+++ b/server/images_test.go
@ -1,9 +1,8 @@
 package server

 import (
-	"bytes"
 	"encoding/binary"
-	"errors"
+	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
@ -13,81 +12,200 @@ import (
 	"github.com/ollama/ollama/types/model"
 )

-// Constants for GGUF magic bytes and version
-var (
-	ggufMagic = []byte{0x47, 0x47, 0x55, 0x46} // "GGUF"
-	ggufVer   = uint32(3)                      // Version 3
+// GGUF type constants (matching gguf package)
+const (
+	typeUint8   = uint32(0)
+	typeInt8    = uint32(1)
+	typeUint16  = uint32(2)
+	typeInt16   = uint32(3)
+	typeUint32  = uint32(4)
+	typeInt32   = uint32(5)
+	typeFloat32 = uint32(6)
+	typeBool    = uint32(7)
+	typeString  = uint32(8)
+	typeArray   = uint32(9)
+	typeUint64  = uint32(10)
+	typeInt64   = uint32(11)
+	typeFloat64 = uint32(12)
 )

-// Helper function to create mock GGUF data
-func createMockGGUFData(architecture string, vision bool) []byte {
-	var buf bytes.Buffer
+type testTensorInfo struct {
+	Name  string
+	Shape []uint64
+	Type  uint32
+}

-	// Write GGUF header
-	buf.Write(ggufMagic)
-	binary.Write(&buf, binary.LittleEndian, ggufVer)
-
-	// Write tensor count (0 for our test)
-	var numTensors uint64 = 0
-	binary.Write(&buf, binary.LittleEndian, numTensors)
-
-	// Calculate number of metadata entries
-	numMetaEntries := uint64(1) // architecture entry
-	if vision {
-		numMetaEntries++
+// Helper function to create test GGUF files (matching gguf package approach)
+func createTestGGUFFile(path string, keyValues map[string]any, tensors []testTensorInfo) error {
+	file, err := os.Create(path)
+	if err != nil {
+		return err
 	}
-	// Add embedding entry if architecture is "bert"
-	if architecture == "bert" {
-		numMetaEntries++
-	}
-	binary.Write(&buf, binary.LittleEndian, numMetaEntries)
+	defer file.Close()

-	// Write architecture metadata
-	archKey := "general.architecture"
-	keyLen := uint64(len(archKey))
-	binary.Write(&buf, binary.LittleEndian, keyLen)
-	buf.WriteString(archKey)
-
-	// String type (8)
-	var strType uint32 = 8
-	binary.Write(&buf, binary.LittleEndian, strType)
-
-	// String length
-	strLen := uint64(len(architecture))
-	binary.Write(&buf, binary.LittleEndian, strLen)
-	buf.WriteString(architecture)
-
-	if vision {
-		visionKey := architecture + ".vision.block_count"
-		keyLen = uint64(len(visionKey))
-		binary.Write(&buf, binary.LittleEndian, keyLen)
-		buf.WriteString(visionKey)
-
-		// uint32 type (4)
-		var uint32Type uint32 = 4
-		binary.Write(&buf, binary.LittleEndian, uint32Type)
-
-		// uint32 value (1)
-		var countVal uint32 = 1
-		binary.Write(&buf, binary.LittleEndian, countVal)
-	}
-	// Write embedding metadata if architecture is "bert"
-	if architecture == "bert" {
-		poolKey := architecture + ".pooling_type"
-		keyLen = uint64(len(poolKey))
-		binary.Write(&buf, binary.LittleEndian, keyLen)
-		buf.WriteString(poolKey)
-
-		// uint32 type (4)
-		var uint32Type uint32 = 4
-		binary.Write(&buf, binary.LittleEndian, uint32Type)
-
-		// uint32 value (1)
-		var poolingVal uint32 = 1
-		binary.Write(&buf, binary.LittleEndian, poolingVal)
+	// Write GGUF magic
+	if _, err := file.Write([]byte("GGUF")); err != nil {
+		return err
 	}

-	return buf.Bytes()
+	// Write version
+	if err := binary.Write(file, binary.LittleEndian, uint32(3)); err != nil {
+		return err
+	}
+
+	// Write tensor count
+	if err := binary.Write(file, binary.LittleEndian, uint64(len(tensors))); err != nil {
+		return err
+	}
+
+	// Write metadata count
+	if err := binary.Write(file, binary.LittleEndian, uint64(len(keyValues))); err != nil {
+		return err
+	}
+
+	// Write metadata
+	for key, value := range keyValues {
+		if err := writeKeyValue(file, key, value); err != nil {
+			return err
+		}
+	}
+
+	// Write tensor info
+	for _, tensor := range tensors {
+		if err := writeTensorInfo(file, tensor); err != nil {
+			return err
+		}
+	}
+
+	// Write some dummy tensor data
+	dummyData := make([]byte, 1024)
+	file.Write(dummyData)
+
+	return nil
+}
+
+func writeKeyValue(file *os.File, key string, value any) error {
+	// Write key length and key
+	if err := binary.Write(file, binary.LittleEndian, uint64(len(key))); err != nil {
+		return err
+	}
+	if _, err := file.Write([]byte(key)); err != nil {
+		return err
+	}
+
+	// Write value based on type
+	switch v := value.(type) {
+	case string:
+		if err := binary.Write(file, binary.LittleEndian, uint32(typeString)); err != nil {
+			return err
+		}
+		if err := binary.Write(file, binary.LittleEndian, uint64(len(v))); err != nil {
+			return err
+		}
+		_, err := file.Write([]byte(v))
+		return err
+	case int64:
+		if err := binary.Write(file, binary.LittleEndian, typeInt64); err != nil {
+			return err
+		}
+		return binary.Write(file, binary.LittleEndian, v)
+	case uint32:
+		if err := binary.Write(file, binary.LittleEndian, typeUint32); err != nil {
+			return err
+		}
+		return binary.Write(file, binary.LittleEndian, v)
+	case bool:
+		if err := binary.Write(file, binary.LittleEndian, typeBool); err != nil {
+			return err
+		}
+		return binary.Write(file, binary.LittleEndian, v)
+	case float64:
+		if err := binary.Write(file, binary.LittleEndian, uint32(typeFloat64)); err != nil {
+			return err
+		}
+		return binary.Write(file, binary.LittleEndian, v)
+	case []string:
+		if err := binary.Write(file, binary.LittleEndian, uint32(typeArray)); err != nil {
+			return err
+		}
+		if err := binary.Write(file, binary.LittleEndian, typeString); err != nil {
+			return err
+		}
+		if err := binary.Write(file, binary.LittleEndian, uint64(len(v))); err != nil {
+			return err
+		}
+		for _, s := range v {
+			if err := binary.Write(file, binary.LittleEndian, uint64(len(s))); err != nil {
+				return err
+			}
+			if _, err := file.Write([]byte(s)); err != nil {
+				return err
+			}
+		}
+		return nil
+	case []int64:
+		if err := binary.Write(file, binary.LittleEndian, uint32(typeArray)); err != nil {
+			return err
+		}
+		if err := binary.Write(file, binary.LittleEndian, typeInt64); err != nil {
+			return err
+		}
+		if err := binary.Write(file, binary.LittleEndian, uint64(len(v))); err != nil {
+			return err
+		}
+		for _, i := range v {
+			if err := binary.Write(file, binary.LittleEndian, i); err != nil {
+				return err
+			}
+		}
+		return nil
+	case []float64:
+		if err := binary.Write(file, binary.LittleEndian, typeArray); err != nil {
+			return err
+		}
+		if err := binary.Write(file, binary.LittleEndian, typeFloat64); err != nil {
+			return err
+		}
+		if err := binary.Write(file, binary.LittleEndian, uint64(len(v))); err != nil {
+			return err
+		}
+		for _, f := range v {
+			if err := binary.Write(file, binary.LittleEndian, f); err != nil {
+				return err
+			}
+		}
+		return nil
+	default:
+		return fmt.Errorf("unsupported value type: %T", value)
+	}
+}
+
+func writeTensorInfo(file *os.File, tensor testTensorInfo) error {
+	// Write tensor name
+	if err := binary.Write(file, binary.LittleEndian, uint64(len(tensor.Name))); err != nil {
+		return err
+	}
+	if _, err := file.Write([]byte(tensor.Name)); err != nil {
+		return err
+	}
+
+	// Write dimensions
+	if err := binary.Write(file, binary.LittleEndian, uint32(len(tensor.Shape))); err != nil {
+		return err
+	}
+	for _, dim := range tensor.Shape {
+		if err := binary.Write(file, binary.LittleEndian, dim); err != nil {
+			return err
+		}
+	}
+
+	// Write type
+	if err := binary.Write(file, binary.LittleEndian, tensor.Type); err != nil {
+		return err
+	}
+
+	// Write offset (dummy value)
+	return binary.Write(file, binary.LittleEndian, uint64(0))
 }

 func TestModelCapabilities(t *testing.T) {
@ -101,13 +219,38 @@ func TestModelCapabilities(t *testing.T) {
 	// Create a simple model file for tests that don't depend on GGUF content
 	simpleModelPath := filepath.Join(tempDir, "simple_model.bin")

-	if err := errors.Join(
-		os.WriteFile(completionModelPath, createMockGGUFData("llama", false), 0o644),
-		os.WriteFile(visionModelPath, createMockGGUFData("llama", true), 0o644),
-		os.WriteFile(embeddingModelPath, createMockGGUFData("bert", false), 0o644),
-		os.WriteFile(simpleModelPath, []byte("dummy model data"), 0o644),
-	); err != nil {
-		t.Fatalf("Failed to create model files: %v", err)
+	// Create completion model (llama architecture without vision)
+	if err := createTestGGUFFile(completionModelPath, map[string]any{
+		"general.architecture": "llama",
+	}, []testTensorInfo{
+		{Name: "token_embd.weight", Shape: []uint64{1000, 512}, Type: 1}, // F16
+	}); err != nil {
+		t.Fatalf("Failed to create completion model file: %v", err)
+	}
+
+	// Create vision model (llama architecture with vision block count)
+	if err := createTestGGUFFile(visionModelPath, map[string]any{
+		"general.architecture":     "llama",
+		"llama.vision.block_count": uint32(1),
+	}, []testTensorInfo{
+		{Name: "token_embd.weight", Shape: []uint64{1000, 512}, Type: 1}, // F16
+	}); err != nil {
+		t.Fatalf("Failed to create vision model file: %v", err)
+	}
+
+	// Create embedding model (bert architecture with pooling type)
+	if err := createTestGGUFFile(embeddingModelPath, map[string]any{
+		"general.architecture": "bert",
+		"bert.pooling_type":    uint32(1),
+	}, []testTensorInfo{
+		{Name: "token_embd.weight", Shape: []uint64{1000, 512}, Type: 1}, // F16
+	}); err != nil {
+		t.Fatalf("Failed to create embedding model file: %v", err)
+	}
+
+	// Create simple model file for tests that don't depend on GGUF content
+	if err := os.WriteFile(simpleModelPath, []byte("dummy model data"), 0o644); err != nil {
+		t.Fatalf("Failed to create simple model file: %v", err)
 	}

 	toolsInsertTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}{{ if .suffix }}{{ .suffix }}{{ end }}")
@ -231,12 +374,29 @@ func TestModelCheckCapabilities(t *testing.T) {
 	simpleModelPath := filepath.Join(tempDir, "model.bin")
 	embeddingModelPath := filepath.Join(tempDir, "embedding_model.bin")

-	if err := errors.Join(
-		os.WriteFile(simpleModelPath, []byte("dummy model data"), 0o644),
-		os.WriteFile(visionModelPath, createMockGGUFData("llama", true), 0o644),
-		os.WriteFile(embeddingModelPath, createMockGGUFData("bert", false), 0o644),
-	); err != nil {
-		t.Fatalf("Failed to create model files: %v", err)
+	// Create vision model (llama architecture with vision block count)
+	if err := createTestGGUFFile(visionModelPath, map[string]any{
+		"general.architecture":     "llama",
+		"llama.vision.block_count": uint32(1),
+	}, []testTensorInfo{
+		{Name: "token_embd.weight", Shape: []uint64{1000, 512}, Type: 1}, // F16
+	}); err != nil {
+		t.Fatalf("Failed to create vision model file: %v", err)
+	}
+
+	// Create embedding model (bert architecture with pooling type)
+	if err := createTestGGUFFile(embeddingModelPath, map[string]any{
+		"general.architecture": "bert",
+		"bert.pooling_type":    uint32(1),
+	}, []testTensorInfo{
+		{Name: "token_embd.weight", Shape: []uint64{1000, 512}, Type: 1}, // F16
+	}); err != nil {
+		t.Fatalf("Failed to create embedding model file: %v", err)
+	}
+
+	// Create simple model file for tests that don't depend on GGUF content
+	if err := os.WriteFile(simpleModelPath, []byte("dummy model data"), 0o644); err != nil {
+		t.Fatalf("Failed to create simple model file: %v", err)
 	}

 	toolsInsertTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}{{ if .suffix }}{{ .suffix }}{{ end }}")