model: add a test for model forward pass during implementation

Adds a new test file to verify model forward pass behavior through JSON-specified test cases. The framework loads model files (.gguf) and their corresponding test specifications to validate expected outputs using greedy sampling.
model: document qwen2 forward pass
2025-02-18 14:11:31 -08:00 · 2025-02-14 14:55:30 -08:00 · 2025-02-14 14:30:06 -08:00 · 2025-02-14 14:21:00 -08:00
5 changed files with 162 additions and 4 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,6 @@ test_data
 __debug_bin*
 llama/build
 llama/vendor
+model/testdata/models/*
+!model/testdata/models/*.md
+!model/testdata/models/*.json
--- a/model/model_external_test.go
+++ b/model/model_external_test.go
@@ -0,0 +1,138 @@
+// Package model_test provides external tests for the model package.
+// This test file specifically tests the forward pass functionality on models.
+// It is in a separate package (model_test) to avoid import cycles while still
+// being able to test the public API of the model package.
+package model_test
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/ollama/ollama/ml"
+	"github.com/ollama/ollama/model"
+	"github.com/ollama/ollama/sample"
+
+	_ "github.com/ollama/ollama/model/models"
+)
+
+type modelTest struct {
+	Prompt            string   `json:"prompt"`
+	OutputContainsOne []string `json:"output_contains_one"`
+}
+
+func TestForwardSimple(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping in short mode")
+	}
+
+	// Read all JSON files from testdata/models
+	files, err := os.ReadDir("testdata/models")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	for _, file := range files {
+		if !strings.HasSuffix(file.Name(), ".json") {
+			continue
+		}
+
+		jsonPath := filepath.Join("testdata/models", file.Name())
+		ggufPath := filepath.Join("testdata/models", strings.TrimSuffix(file.Name(), ".json")+".gguf")
+
+		// Skip if no corresponding .gguf file exists
+		if _, err := os.Stat(ggufPath); err != nil {
+			t.Logf("skipping %s: no corresponding GGUF file found", file.Name())
+			continue
+		}
+
+		data, err := os.ReadFile(jsonPath)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		var test modelTest
+		if err := json.Unmarshal(data, &test); err != nil {
+			t.Fatal(err)
+		}
+
+		t.Run(strings.TrimSuffix(file.Name(), ".json"), func(t *testing.T) {
+			m, err := model.New(ggufPath)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			m.Config().Cache.Init(m.Backend(), ml.DTypeF32, 2048)
+
+			inputs, err := m.(model.TextProcessor).Encode(test.Prompt)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			var result []string
+			for len(result) < 100 { // Limit to 100 tokens max
+				options := model.Options{
+					Inputs:    inputs,
+					Positions: make([]int32, len(inputs)),
+					Sequences: make([]int, len(inputs)),
+					Outputs:   []int32{int32(len(inputs) - 1)},
+				}
+				for i := range options.Positions {
+					options.Positions[i] = int32(i)
+					options.Sequences[i] = 0
+				}
+
+				ctx := m.Backend().NewContext()
+
+				modelOutput, err := model.Forward(ctx, m, options)
+				if err != nil {
+					ctx.Close()
+					t.Fatal(fmt.Errorf("forward pass failed: %v", err))
+				}
+
+				f32s := modelOutput.Floats()
+				logits := make([]float64, len(f32s))
+				for i, f32 := range f32s {
+					logits[i] = float64(f32)
+				}
+
+				token, err := sample.Sample(logits, sample.Greedy())
+				if err != nil {
+					ctx.Close()
+					t.Fatal(fmt.Errorf("sampling failed: %v", err))
+				}
+
+				ctx.Close()
+
+				// Greedy sampling: take the token with the highest logit
+				nextToken := int32(token[0])
+				if m.(model.TextProcessor).Is(nextToken, model.SpecialEOS) {
+					break
+				}
+
+				piece, err := m.(model.TextProcessor).Decode([]int32{nextToken})
+				if err != nil {
+					t.Fatal(err)
+				}
+
+				result = append(result, piece)
+				output := strings.Join(result, "")
+
+				for _, expectedOutput := range test.OutputContainsOne {
+					if strings.Contains(output, expectedOutput) {
+						t.Logf("Test passed with output: %q (matched expected: %q)", output, expectedOutput)
+						return
+					}
+				}
+
+				// Maintain full context by appending new token
+				inputs = append(inputs, nextToken)
+			}
+
+			t.Fatalf("Expected output containing one of %q but got: %q", test.OutputContainsOne, strings.Join(result, ""))
+		})
+	}
+}
--- a/model/models/qwen2/model.go
+++ b/model/models/qwen2/model.go
@@ -16,7 +16,7 @@ type Options struct {
 	numAttnHeads   int
 	numKVHeads     int
 	modelEpsilon   float32
-	ropeFreqBase   float32
+	ropeBaseFreq   float32
 	ropeFreqScale  float32
 	ropeDimensions uint32
 }
@@ -52,7 +52,7 @@ func New(c ml.Config) (model.Model, error) {
 			numKVHeads:     int(c.Uint("attention.head_count_kv")),
 			modelEpsilon:   c.Float("attention.layer_norm_rms_epsilon"),
 			contextLength:  int(c.Uint("context_length")),
-			ropeFreqBase:   c.Float("rope.freq_base"),
+			ropeBaseFreq:   c.Float("rope.freq_base"),
 			ropeFreqScale:  c.Float("rope.freq_scale", 1),
 			ropeDimensions: c.Uint("rope.dimension_count", 64),
 		},
@@ -73,7 +73,7 @@ func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tenso
 			RopeDim:     m.Options.ropeDimensions,
 			RopeType:    ml.RopeTypeNeoX,
 			OrigCtxLen:  m.Options.contextLength,
-			RopeBase:    m.Options.ropeFreqBase,
+			RopeBase:    m.Options.ropeBaseFreq,
 			RopeScale:   m.Options.ropeFreqScale,
 		},
 	), nil
@@ -98,7 +98,7 @@ func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, inputPositions ml.
 		RopeDim:     opts.ropeDimensions,
 		RopeType:    ml.RopeTypeNeoX,
 		OrigCtxLen:  opts.contextLength,
-		RopeBase:    opts.ropeFreqBase,
+		RopeBase:    opts.ropeBaseFreq,
 		RopeScale:   opts.ropeFreqScale,
 	}

--- a/model/testdata/models/README.md
+++ b/model/testdata/models/README.md
@@ -0,0 +1,10 @@
+# Test Model Directory
+
+This directory is used for storing model files (like `.gguf` files) that are required to run the tests in `model_external_test.go`. 
+
+## Usage
+
+- Place any model files you need for testing in this directory
+- The test file will look for any model files here (e.g., `llama3.gguf`)
+- All non-markdown files in this directory are git-ignored to prevent large model files from being committed to the repository
+- Only `.md` files (like this README) will be tracked in git
--- a/model/testdata/models/qwen2_5.json
+++ b/model/testdata/models/qwen2_5.json
@@ -0,0 +1,7 @@
+{
+  "prompt": "<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\nhi<|im_end|>\n<|im_start|>assistant\n",
+  "output_contains_one": [
+    "Hello",
+    "Hi"
+  ]
+}
Author	SHA1	Message	Date
Bruce MacDonald	7fa9694359	model: add a test for model forward pass during implementation Adds a new test file to verify model forward pass behavior through JSON-specified test cases. The framework loads model files (.gguf) and their corresponding test specifications to validate expected outputs using greedy sampling.	2025-02-18 14:11:31 -08:00
Bruce MacDonald	96510b9353	model: document qwen2 forward pass	2025-02-14 14:55:30 -08:00
Bruce MacDonald	9f8c89354b	model: add new engine support for qwen2 family	2025-02-14 14:30:06 -08:00
Bruce MacDonald	8815a8ee25	ml: let model specify rope configuration Add support for model-specific RoPE configuration parameters by: 1. Creating a new `RopeConfig` struct to encapsulate all RoPE parameters 2. Adding `RopeType` enum to specify different RoPE variants (Standard/NeoX) 3. Extracting original context length from model config 4. Refactoring `RoPE()` interface to use the new config struct 5. Updating llama and mllama models to use new RoPE configuration This change allows models to specify their RoPE implementation type and original context length, which is important for proper position embedding calculation and model compatibility.	2025-02-14 14:21:00 -08:00