fix tests

app/ui: add gemini-3-pro-preview to featured list
2025-11-19 15:57:19 -05:00 · 2025-11-19 14:59:29 -05:00
31 changed files with 585 additions and 2028 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -15,12 +15,8 @@ ml/backend/**/*.cu linguist-vendored
 ml/backend/**/*.cuh linguist-vendored
 ml/backend/**/*.m linguist-vendored
 ml/backend/**/*.metal linguist-vendored
-ml/backend/**/*.comp linguist-vendored
-ml/backend/**/*.glsl linguist-vendored
 ml/backend/**/CMakeLists.txt linguist-vendored

-app/webview linguist-vendored
-
 llama/build-info.cpp linguist-generated
 ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s linguist-generated

--- a/.golangci.yaml
+++ b/.golangci.yaml
@@ -11,6 +11,7 @@ linters:
    - errorlint
    - exptostd
    - gocheckcompilerdirectives
+    - gocritic
    - govet
    - ineffassign
    - intrange
--- a/api/client.go
+++ b/api/client.go
@@ -226,14 +226,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f

 		bts := scanner.Bytes()
 		if err := json.Unmarshal(bts, &errorResponse); err != nil {
-			if response.StatusCode >= http.StatusBadRequest {
-				return StatusError{
-					StatusCode:   response.StatusCode,
-					Status:       response.Status,
-					ErrorMessage: string(bts),
-				}
-			}
-			return errors.New(string(bts))
+			return fmt.Errorf("unmarshal: %w", err)
 		}

 		if response.StatusCode == http.StatusUnauthorized {
--- a/api/client_test.go
+++ b/api/client_test.go
@@ -55,7 +55,6 @@ func TestClientFromEnvironment(t *testing.T) {
 type testError struct {
 	message    string
 	statusCode int
-	raw        bool // if true, write message as-is instead of JSON encoding
 }

 func (e testError) Error() string {
@@ -112,20 +111,6 @@ func TestClientStream(t *testing.T) {
 				},
 			},
 		},
-		{
-			name: "plain text error response",
-			responses: []any{
-				"internal server error",
-			},
-			wantErr: "internal server error",
-		},
-		{
-			name: "HTML error page",
-			responses: []any{
-				"<html><body>404 Not Found</body></html>",
-			},
-			wantErr: "404 Not Found",
-		},
 	}

 	for _, tc := range testCases {
@@ -150,12 +135,6 @@ func TestClientStream(t *testing.T) {
 						return
 					}

-					if str, ok := resp.(string); ok {
-						fmt.Fprintln(w, str)
-						flusher.Flush()
-						continue
-					}
-
 					if err := json.NewEncoder(w).Encode(resp); err != nil {
 						t.Fatalf("failed to encode response: %v", err)
 					}
@@ -194,10 +173,9 @@ func TestClientStream(t *testing.T) {

 func TestClientDo(t *testing.T) {
 	testCases := []struct {
-		name           string
-		response       any
-		wantErr        string
-		wantStatusCode int
+		name     string
+		response any
+		wantErr  string
 	}{
 		{
 			name: "immediate error response",
@@ -205,8 +183,7 @@ func TestClientDo(t *testing.T) {
 				message:    "test error message",
 				statusCode: http.StatusBadRequest,
 			},
-			wantErr:        "test error message",
-			wantStatusCode: http.StatusBadRequest,
+			wantErr: "test error message",
 		},
 		{
 			name: "server error response",
@@ -214,8 +191,7 @@ func TestClientDo(t *testing.T) {
 				message:    "internal error",
 				statusCode: http.StatusInternalServerError,
 			},
-			wantErr:        "internal error",
-			wantStatusCode: http.StatusInternalServerError,
+			wantErr: "internal error",
 		},
 		{
 			name: "successful response",
@@ -227,26 +203,6 @@ func TestClientDo(t *testing.T) {
 				Success: true,
 			},
 		},
-		{
-			name: "plain text error response",
-			response: testError{
-				message:    "internal server error",
-				statusCode: http.StatusInternalServerError,
-				raw:        true,
-			},
-			wantErr:        "internal server error",
-			wantStatusCode: http.StatusInternalServerError,
-		},
-		{
-			name: "HTML error page",
-			response: testError{
-				message:    "<html><body>404 Not Found</body></html>",
-				statusCode: http.StatusNotFound,
-				raw:        true,
-			},
-			wantErr:        "<html><body>404 Not Found</body></html>",
-			wantStatusCode: http.StatusNotFound,
-		},
 	}

 	for _, tc := range testCases {
@@ -254,16 +210,11 @@ func TestClientDo(t *testing.T) {
 			ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 				if errResp, ok := tc.response.(testError); ok {
 					w.WriteHeader(errResp.statusCode)
-					if !errResp.raw {
-						err := json.NewEncoder(w).Encode(map[string]string{
-							"error": errResp.message,
-						})
-						if err != nil {
-							t.Fatal("failed to encode error response:", err)
-						}
-					} else {
-						// Write raw message (simulates non-JSON error responses)
-						fmt.Fprint(w, errResp.message)
+					err := json.NewEncoder(w).Encode(map[string]string{
+						"error": errResp.message,
+					})
+					if err != nil {
+						t.Fatal("failed to encode error response:", err)
 					}
 					return
 				}
@@ -290,15 +241,6 @@ func TestClientDo(t *testing.T) {
 				if err.Error() != tc.wantErr {
 					t.Errorf("error message mismatch: got %q, want %q", err.Error(), tc.wantErr)
 				}
-				if tc.wantStatusCode != 0 {
-					if statusErr, ok := err.(StatusError); ok {
-						if statusErr.StatusCode != tc.wantStatusCode {
-							t.Errorf("status code mismatch: got %d, want %d", statusErr.StatusCode, tc.wantStatusCode)
-						}
-					} else {
-						t.Errorf("expected StatusError, got %T", err)
-					}
-				}
 				return
 			}

--- a/app/cmd/app/app.go
+++ b/app/cmd/app/app.go
@@ -397,8 +397,8 @@ func checkUserLoggedIn(uiServerPort int) bool {
 // handleConnectURLScheme fetches the connect URL and opens it in the browser
 func handleConnectURLScheme() {
 	if checkUserLoggedIn(uiServerPort) {
-		slog.Info("user is already logged in, opening app instead")
-		showWindow(wv.webview.Window())
+		slog.Info("user is already logged in, opening settings instead")
+		sendUIRequestMessage("/")
 		return
 	}

@@ -466,8 +466,6 @@ func handleURLSchemeInCurrentInstance(urlSchemeRequest string) {
 	if isConnect {
 		handleConnectURLScheme()
 	} else {
-		if wv.webview != nil {
-			showWindow(wv.webview.Window())
-		}
+		sendUIRequestMessage("/")
 	}
 }
--- a/app/cmd/app/app_darwin.m
+++ b/app/cmd/app/app_darwin.m
@@ -24,14 +24,27 @@ bool firstTimeRun,startHidden; // Set in run before initialization
    for (NSURL *url in urls) {
        if ([url.scheme isEqualToString:@"ollama"]) {
            NSString *path = url.path;
-
-            if (path && ([path isEqualToString:@"/connect"] || [url.host isEqualToString:@"connect"])) {
+            if (!path || [path isEqualToString:@""]) {
+                // For URLs like ollama://settings (without triple slash),
+                // the "settings" part is parsed as the host, not the path.
+                // We need to convert it to a path by prepending "/"
+                if (url.host && ![url.host isEqualToString:@""]) {
+                    path = [@"/" stringByAppendingString:url.host];
+                } else {
+                    path = @"/";
+                }
+            }
+            
+            if ([path isEqualToString:@"/connect"] || [url.host isEqualToString:@"connect"]) {
                // Special case: handle connect by opening browser instead of app
                handleConnectURL();
            } else {
                // Set app to be active and visible
                [NSApp setActivationPolicy:NSApplicationActivationPolicyRegular];
                [NSApp activateIgnoringOtherApps:YES];
+                
+                // Open the path with the UI
+                [self uiRequest:path];
            }
            
            break;
@@ -247,7 +260,7 @@ bool firstTimeRun,startHidden; // Set in run before initialization
 }

 - (void)openHelp:(id)sender {
-    NSURL *url = [NSURL URLWithString:@"https://docs.ollama.com/"];
+    NSURL *url = [NSURL URLWithString:@"https://github.com/ollama/ollama/tree/main/docs"];
    [[NSWorkspace sharedWorkspace] openURL:url];
 }

--- a/app/cmd/app/app_windows.go
+++ b/app/cmd/app/app_windows.go
@@ -147,9 +147,7 @@ func handleURLSchemeRequest(urlScheme string) {
 	if isConnect {
 		handleConnectURLScheme()
 	} else {
-		if wv.webview != nil {
-			showWindow(wv.webview.Window())
-		}
+		sendUIRequestMessage("/")
 	}
 }

--- a/app/ui/app/src/api.ts
+++ b/app/ui/app/src/api.ts
@@ -25,7 +25,7 @@ declare module "@/gotypes" {
 }

 Model.prototype.isCloud = function (): boolean {
-  return this.model.endsWith("cloud");
+  return this.model.endsWith("cloud") || this.model === "gemini-3-pro-preview";
 };

 // Helper function to convert Uint8Array to base64
--- a/app/ui/app/src/utils/mergeModels.test.ts
+++ b/app/ui/app/src/utils/mergeModels.test.ts
@@ -14,8 +14,8 @@ describe("Model merging logic", () => {
    const merged = mergeModels(localModels);

    // First verify cloud models are first and in FEATURED_MODELS order
-    const cloudModels = FEATURED_MODELS.filter((m: string) =>
-      m.endsWith("cloud"),
+    const cloudModels = FEATURED_MODELS.filter(
+      (m: string) => m.endsWith("cloud") || m === "gemini-3-pro-preview",
    );
    for (let i = 0; i < cloudModels.length; i++) {
      expect(merged[i].model).toBe(cloudModels[i]);
@@ -24,7 +24,7 @@ describe("Model merging logic", () => {

    // Then verify non-cloud featured models are next and in FEATURED_MODELS order
    const nonCloudFeatured = FEATURED_MODELS.filter(
-      (m: string) => !m.endsWith("cloud"),
+      (m: string) => !m.endsWith("cloud") && m !== "gemini-3-pro-preview",
    );
    for (let i = 0; i < nonCloudFeatured.length; i++) {
      const model = merged[i + cloudModels.length];
@@ -54,9 +54,9 @@ describe("Model merging logic", () => {
    const cloudModels = merged.filter((m) => m.isCloud());
    expect(cloudModels.length).toBe(0);

-    // Should have non-cloud featured models
+    // Should have non-cloud featured models (excluding gemini-3-pro-preview which is treated as cloud)
    const nonCloudFeatured = FEATURED_MODELS.filter(
-      (m) => !m.endsWith("cloud"),
+      (m) => !m.endsWith("cloud") && m !== "gemini-3-pro-preview",
    );
    for (let i = 0; i < nonCloudFeatured.length; i++) {
      const model = merged[i];
@@ -74,7 +74,9 @@ describe("Model merging logic", () => {
    const merged = mergeModels([]);

    // First verify cloud models are first and in FEATURED_MODELS order
-    const cloudModels = FEATURED_MODELS.filter((m) => m.endsWith("cloud"));
+    const cloudModels = FEATURED_MODELS.filter(
+      (m) => m.endsWith("cloud") || m === "gemini-3-pro-preview",
+    );
    for (let i = 0; i < cloudModels.length; i++) {
      expect(merged[i].model).toBe(cloudModels[i]);
      expect(merged[i].isCloud()).toBe(true);
@@ -82,7 +84,7 @@ describe("Model merging logic", () => {

    // Then verify non-cloud featured models are next and in FEATURED_MODELS order
    const nonCloudFeatured = FEATURED_MODELS.filter(
-      (m) => !m.endsWith("cloud"),
+      (m) => !m.endsWith("cloud") && m !== "gemini-3-pro-preview",
    );
    for (let i = 0; i < nonCloudFeatured.length; i++) {
      const model = merged[i + cloudModels.length];
@@ -104,7 +106,9 @@ describe("Model merging logic", () => {
    const merged = mergeModels(localModels);

    // First verify cloud models are first and in FEATURED_MODELS order
-    const cloudModels = FEATURED_MODELS.filter((m) => m.endsWith("cloud"));
+    const cloudModels = FEATURED_MODELS.filter(
+      (m) => m.endsWith("cloud") || m === "gemini-3-pro-preview",
+    );
    for (let i = 0; i < cloudModels.length; i++) {
      expect(merged[i].model).toBe(cloudModels[i]);
      expect(merged[i].isCloud()).toBe(true);
@@ -112,7 +116,7 @@ describe("Model merging logic", () => {

    // Then verify non-cloud featured models are next and in FEATURED_MODELS order
    const nonCloudFeatured = FEATURED_MODELS.filter(
-      (m) => !m.endsWith("cloud"),
+      (m) => !m.endsWith("cloud") && m !== "gemini-3-pro-preview",
    );
    for (let i = 0; i < nonCloudFeatured.length; i++) {
      const model = merged[i + cloudModels.length];
--- a/app/ui/app/src/utils/mergeModels.ts
+++ b/app/ui/app/src/utils/mergeModels.ts
@@ -4,6 +4,7 @@ import { Model } from "@/gotypes";
 export const FEATURED_MODELS = [
  "gpt-oss:120b-cloud",
  "gpt-oss:20b-cloud",
+  "gemini-3-pro-preview",
  "deepseek-v3.1:671b-cloud",
  "qwen3-coder:480b-cloud",
  "qwen3-vl:235b-cloud",
@@ -40,7 +41,9 @@ export function mergeModels(
  const cloudModels = [...allModels.filter((m) => m.isCloud())];

  // Add any cloud models from FEATURED_MODELS that aren't in local models
-  FEATURED_MODELS.filter((f) => f.endsWith("cloud")).forEach((cloudModel) => {
+  FEATURED_MODELS.filter(
+    (f) => f.endsWith("cloud") || f === "gemini-3-pro-preview",
+  ).forEach((cloudModel) => {
    if (!cloudModels.some((m) => m.model === cloudModel)) {
      cloudModels.push(new Model({ model: cloudModel }));
    }
@@ -48,7 +51,7 @@ export function mergeModels(

  // 2. Get other featured models (non-cloud)
  const featuredModels = FEATURED_MODELS.filter(
-    (f) => !f.endsWith("cloud"),
+    (f) => !f.endsWith("cloud") && f !== "gemini-3-pro-preview",
  ).map((model) => {
    // Check if this model exists in local models
    const localMatch = allModels.find(
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -1430,7 +1430,7 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
 		latest.Summary()
 	}

-	return &api.Message{Role: role, Thinking: thinkingContent.String(), Content: fullResponse.String()}, nil
+	return &api.Message{Role: role, Content: fullResponse.String()}, nil
 }

 func generate(cmd *cobra.Command, opts runOptions) error {
--- a/convert/convert_mistral.go
+++ b/convert/convert_mistral.go
@@ -29,15 +29,6 @@ type mistral3Model struct {
 		SlidingWindow         *uint32 `json:"sliding_window"`
 		HiddenAct             string  `json:"hidden_act"`
 		VocabSize             uint32  `json:"vocab_size"`
-		RopeParameters        struct {
-			BetaFast                  float32 `json:"beta_fast"`
-			BetaSlow                  float32 `json:"beta_slow"`
-			Factor                    float32 `json:"factor"`
-			ScalingBeta               float32 `json:"llama_4_scaling_beta"`
-			OrigMaxPositionEmbeddings uint32  `json:"original_max_position_embeddings"`
-			RopeType                  string  `json:"rope_type"`
-			RopeTheta                 float32 `json:"rope_theta"`
-		} `json:"rope_parameters"`
 	} `json:"text_config"`
 	VisionModel struct {
 		NumAttentionHeads uint32  `json:"num_attention_heads"`
@@ -70,13 +61,8 @@ func (p *mistral3Model) KV(t *Tokenizer) ggml.KV {
 	kv["mistral3.attention.layer_norm_rms_epsilon"] = p.TextModel.RMSNormEPS
 	kv["mistral3.attention.key_length"] = p.TextModel.HeadDim
 	kv["mistral3.attention.value_length"] = p.TextModel.HeadDim
-	kv["mistral3.rope.dimension_count"] = cmp.Or(p.TextModel.HeadDim, p.TextModel.HiddenSize/p.TextModel.NumAttentionHeads)
-	kv["mistral3.rope.freq_base"] = cmp.Or(p.TextModel.RopeTheta, p.TextModel.RopeParameters.RopeTheta)
-
-	if p.TextModel.RopeParameters.OrigMaxPositionEmbeddings > 0 {
-		kv["mistral3.rope.scaling.original_context_length"] = p.TextModel.RopeParameters.OrigMaxPositionEmbeddings
-		kv["mistral3.rope.scaling_beta"] = p.TextModel.RopeParameters.ScalingBeta
-	}
+	kv["mistral3.rope.dimension_count"] = p.TextModel.HiddenSize / p.TextModel.NumHiddenLayers
+	kv["mistral3.rope.freq_base"] = p.TextModel.RopeTheta

 	// Vision configuration
 	kv["mistral3.vision.block_count"] = p.VisionModel.NumHiddenLayers
--- a/discover/runner.go
+++ b/discover/runner.go
@@ -65,7 +65,6 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 		}

 		slog.Info("discovering available GPUs...")
-		detectIncompatibleLibraries()

 		// Warn if any user-overrides are set which could lead to incorrect GPU discovery
 		overrideWarnings()
@@ -99,9 +98,6 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 					continue
 				} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
 					continue
-				} else if jetpack == "" && strings.Contains(filepath.Base(dir), "cuda_jetpack") {
-					slog.Debug("jetpack not detected (set JETSON_JETPACK or OLLAMA_LLM_LIBRARY to override), skipping", "libDir", dir)
-					continue
 				} else if !envconfig.EnableVulkan() && strings.Contains(filepath.Base(dir), "vulkan") {
 					slog.Info("experimental Vulkan support disabled.  To enable, set OLLAMA_VULKAN=1")
 					continue
@@ -129,20 +125,10 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 		supportedMu := sync.Mutex{}
 		supported := make(map[string]map[string]map[string]int) // [Library][libDir][ID] = pre-deletion devices index
 		for i := range devices {
-			libDir := devices[i].LibraryPath[len(devices[i].LibraryPath)-1]
 			if !devices[i].NeedsInitValidation() {
-				// No need to validate, add to the supported map
-				supportedMu.Lock()
-				if _, ok := supported[devices[i].Library]; !ok {
-					supported[devices[i].Library] = make(map[string]map[string]int)
-				}
-				if _, ok := supported[devices[i].Library][libDir]; !ok {
-					supported[devices[i].Library][libDir] = make(map[string]int)
-				}
-				supported[devices[i].Library][libDir][devices[i].ID] = i
-				supportedMu.Unlock()
 				continue
 			}
+			libDir := devices[i].LibraryPath[len(devices[i].LibraryPath)-1]
 			slog.Debug("verifying if device is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "id", devices[i].ID, "pci_id", devices[i].PCIID)
 			wg.Add(1)
 			go func(i int) {
@@ -488,16 +474,3 @@ func overrideWarnings() {
 		slog.Warn("if GPUs are not correctly discovered, unset and try again")
 	}
 }
-
-func detectIncompatibleLibraries() {
-	if runtime.GOOS != "windows" {
-		return
-	}
-	basePath, err := exec.LookPath("ggml-base.dll")
-	if err != nil || basePath == "" {
-		return
-	}
-	if !strings.HasPrefix(basePath, ml.LibOllamaPath) {
-		slog.Warn("potentially incompatible library detected in PATH", "location", basePath)
-	}
-}
--- a/docs/faq.mdx
+++ b/docs/faq.mdx
@@ -57,13 +57,8 @@ ollama ps
 ```

 <Info>
-
-**Output**:
-
-```
-NAME        ID            SIZE    PROCESSOR   UNTIL
-llama3:70b  bcfb190ca3a7  42 GB   100% GPU    4 minutes from now
-```
+  **Output**: ``` NAME ID SIZE PROCESSOR UNTIL llama3:70b bcfb190ca3a7 42 GB
+  100% GPU 4 minutes from now ```
 </Info>

 The `Processor` column will show which memory the model was loaded in to:
@@ -390,4 +385,4 @@ Ollama for Windows and macOS register as a login item during installation.  You
 - In `Task Manager` go to the `Startup apps` tab, search for `ollama` then click `Disable`

 **MacOS**
- Open `Settings` and search for "Login Items", find the `Ollama` entry under "Allow in the Background`, then click the slider to disable.
+- Open `Settings` and search for "Login Items", find the `Ollama` entry under "Allow in the Background`, then click the slider to disable.
--- a/docs/modelfile.mdx
+++ b/docs/modelfile.mdx
@@ -149,6 +149,9 @@ PARAMETER <parameter> <parametervalue>

 | Parameter      | Description                                                                                                                                                                                                                                                                                                                                                                     | Value Type | Example Usage        |
 | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------- | -------------------- |
+| mirostat       | Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)                                                                                                                                                                                                                                                                 | int        | mirostat 0           |
+| mirostat_eta   | Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1)                                                                                                                                                | float      | mirostat_eta 0.1     |
+| mirostat_tau   | Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0)                                                                                                                                                                                                                                 | float      | mirostat_tau 5.0     |
 | num_ctx        | Sets the size of the context window used to generate the next token. (Default: 2048)                                                                                                                                                                                                                                                                                            | int        | num_ctx 4096         |
 | repeat_last_n  | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)                                                                                                                                                                                                                                                                   | int        | repeat_last_n 64     |
 | repeat_penalty | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)                                                                                                                                                                                             | float      | repeat_penalty 1.1   |
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -251,7 +251,6 @@ func (kv KV) OllamaEngineRequired() bool {
 		"qwen3vl", "qwen3vlmoe",
 		"deepseekocr",
 		"deepseek2",
-		"nomic-bert",
 	}, kv.Architecture())
 }

--- a/harmony/harmonyparser.go
+++ b/harmony/harmonyparser.go
@@ -388,9 +388,9 @@ func NewFunctionNameMap() *FunctionNameMap {
 	}
 }

-// Init initializes the handler with tools, optional last message, and think value
+// Init initializes the handler with tools and optional last message
 // Implements the Parser interface
-func (h *HarmonyMessageHandler) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
+func (h *HarmonyMessageHandler) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
 	// Initialize the harmony parser
 	if h.HarmonyParser == nil {
 		h.HarmonyParser = &HarmonyParser{
--- a/kvcache/causal_test.go
+++ b/kvcache/causal_test.go
--- a/model/models/deepseek2/model.go
+++ b/model/models/deepseek2/model.go
@@ -236,6 +236,11 @@ type Model struct {
 }

 func New(c fs.Config) (model.Model, error) {
+	if c.Uint("attention.key_length_mla") == 0 {
+		// non-MLA models aren't yet supported
+		return nil, model.ErrUnsupportedModel
+	}
+
 	layers := make([]Layer, c.Uint("block_count"))

 	firstDenseLayerIndex := int(c.Uint("leading_dense_block_count"))
@@ -254,30 +259,6 @@ func New(c fs.Config) (model.Model, error) {
 	keyLength := int(cmp.Or(c.Uint("attention.key_length_mla"), c.Uint("attention.key_length")))
 	valueLength := int(cmp.Or(c.Uint("attention.value_length_mla"), c.Uint("attention.value_length")))

-	var pre []string
-	switch c.String("tokenizer.ggml.pre") {
-	case "deepseek-v3":
-		pre = []string{
-			// Split regex into multiple parts (according to DeepSeek3's regex)
-			"\\p{N}{1,3}",
-			`[一-龥぀-ゟ゠-ヿ]+`,
-			"[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+| ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+",
-		}
-	case "deepseek-llm":
-		// TODO: these models haven't been vetted so skip for now
-		// pre = []string{
-		// 	"[\r\n]",
-		// 	"\\s?[A-Za-zµÀ-ÖØ-öø-ƺƼ-ƿǄ-ʓʕ-ʯͰ-ͳͶͷͻ-ͽͿΆΈ-ΊΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-ՖႠ-ჅᎠ-Ᏽᏸ-ᏽᲐ-ᲺᲽ-Ჿᴀ-ᴫᵫ-ᵷᵹ-ᶚḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼℂℇℊ-ℓℕℙ-ℝℤΩℨK-ℭℯ-ℴℹℼ-ℿⅅ-ⅉⅎↃↄⰀ-ⱻⱾ-ⳤⳫ-ⳮⳲⳳꙀ-ꙭꚀ-ꚛꜢ-ꝯꝱ-ꞇꞋ-ꞎꭰ-ꮿﬀ-ﬆﬓ-ﬗＡ-Ｚａ-ｚ𐐀-𐑏𐒰-𐓓𐓘-𐓻𐲀-𐲲𐳀-𐳲𑢠-𑣟𞤀-𞥃]+",
-		// 	"\\s?[!-/:-~！-／：-～‘-‟　-。]+",
-		// 	"\\s+$",
-		// 	"[一-龥ࠀ-一가-퟿]+",
-		// 	"[0-9]",
-		// }
-		fallthrough
-	default:
-		return nil, model.ErrUnsupportedTokenizer
-	}
-
 	m := Model{
 		BytePairEncoding: model.NewBytePairEncoding(
 			&model.Vocabulary{
@@ -292,7 +273,10 @@ func New(c fs.Config) (model.Model, error) {
 					c.Ints("tokenizer.ggml.eos_token_ids")...,
 				),
 			},
-			pre...,
+			// Split regex into multiple parts (according to DeepSeek3's regex)
+			"\\p{N}{1,3}",
+			`[一-龥぀-ゟ゠-ヿ]+`,
+			"[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+| ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+",
 		),
 		Layers: layers,
 		Options: &Options{
--- a/model/models/mistral3/model.go
+++ b/model/models/mistral3/model.go
@@ -159,9 +159,8 @@ func (m *Model) PostTokenize(inputs []*input.Input) ([]*input.Input, error) {

 func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
 	positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))
-	positionsScale := m.getScale(ctx, batch.Positions)

-	return m.TextModel.Forward(ctx, batch.Inputs, positions, positionsScale, batch.Outputs, batch, m.Cache), nil
+	return m.TextModel.Forward(ctx, batch.Inputs, positions, batch.Outputs, batch, m.Cache), nil
 }

 func init() {
--- a/model/models/mistral3/model_text.go
+++ b/model/models/mistral3/model_text.go
@@ -16,8 +16,6 @@ type TextOptions struct {
 	hiddenSize, numHeads, numKVHeads int
 	headDim, ropeDim                 int
 	eps, ropeBase, ropeScale         float32
-	ropeOrigPosEmbeddings            int
-	ropeScalingBeta                  float32
 }

 type TextModel struct {
@@ -36,7 +34,7 @@ type SelfAttention struct {
 	Output *nn.Linear `gguf:"attn_output"`
 }

-func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs, positionsScale ml.Tensor, cache kvcache.Cache, opts *TextOptions) ml.Tensor {
+func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *TextOptions) ml.Tensor {
 	batchSize := hiddenState.Dim(1)
 	headDim := cmp.Or(opts.headDim, opts.hiddenSize/opts.numHeads)

@@ -51,10 +49,6 @@ func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs, posit
 	v := sa.Value.Forward(ctx, hiddenState)
 	v = v.Reshape(ctx, headDim, opts.numKVHeads, batchSize)

-	if opts.ropeOrigPosEmbeddings > 0 {
-		q = q.Mul(ctx, positionsScale)
-	}
-
 	kqv := nn.Attention(ctx, q, k, v, 1.0/math.Sqrt(float64(headDim)), cache)
 	kqv = kqv.Reshape(ctx, headDim*opts.numHeads, batchSize)
 	return sa.Output.Forward(ctx, kqv)
@@ -82,11 +76,11 @@ type Layer struct {
 	MLP           *MLP
 }

-func (l *Layer) Forward(ctx ml.Context, hiddenState, positionIDs, positionsScale, outputs ml.Tensor, cache kvcache.Cache, opts *TextOptions) ml.Tensor {
+func (l *Layer) Forward(ctx ml.Context, hiddenState, positionIDs, outputs ml.Tensor, cache kvcache.Cache, opts *TextOptions) ml.Tensor {
 	residual := hiddenState

 	hiddenState = l.AttentionNorm.Forward(ctx, hiddenState, opts.eps)
-	hiddenState = l.SelfAttention.Forward(ctx, hiddenState, positionIDs, positionsScale, cache, opts)
+	hiddenState = l.SelfAttention.Forward(ctx, hiddenState, positionIDs, cache, opts)

 	// In the final layer (outputs != nil), optimize by pruning to just the token positions
 	// we need logits for.
@@ -103,7 +97,7 @@ func (l *Layer) Forward(ctx ml.Context, hiddenState, positionIDs, positionsScale
 	return hiddenState.Add(ctx, residual)
 }

-func (m *TextModel) Forward(ctx ml.Context, inputs, positions, positionsScale, outputs ml.Tensor, batch input.Batch, cache kvcache.Cache) ml.Tensor {
+func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor, batch input.Batch, cache kvcache.Cache) ml.Tensor {
 	hiddenState := m.TokenEmbedding.Forward(ctx, inputs).Duplicate(ctx)

 	// image embeddings
@@ -120,36 +114,25 @@ func (m *TextModel) Forward(ctx ml.Context, inputs, positions, positionsScale, o
 			lastLayerOutputs = outputs
 		}

-		hiddenState = layer.Forward(ctx, hiddenState, positions, positionsScale, lastLayerOutputs, cache, m.TextOptions)
+		hiddenState = layer.Forward(ctx, hiddenState, positions, lastLayerOutputs, cache, m.TextOptions)
 	}

 	hiddenState = m.OutputNorm.Forward(ctx, hiddenState, m.eps)
 	return m.Output.Forward(ctx, hiddenState)
 }

-func (m *TextModel) getScale(ctx ml.Context, positions []int32) ml.Tensor {
-	posScale := make([]float32, len(positions))
-	for n, pos := range positions {
-		interval := math.Floor(float64(pos) / float64(m.ropeOrigPosEmbeddings))
-		posScale[n] = float32(1.0 + float64(m.ropeScalingBeta)*math.Log(1.0+interval))
-	}
-	return ctx.Input().FromFloats(posScale, 1, 1, len(posScale))
-}
-
 func newTextModel(c fs.Config) *TextModel {
 	return &TextModel{
 		Layers: make([]Layer, c.Uint("block_count")),
 		TextOptions: &TextOptions{
-			hiddenSize:            int(c.Uint("embedding_length")),
-			numHeads:              int(c.Uint("attention.head_count")),
-			numKVHeads:            int(c.Uint("attention.head_count_kv")),
-			headDim:               int(c.Uint("attention.key_length")),
-			ropeDim:               int(c.Uint("rope.dimension_count")),
-			eps:                   c.Float("attention.layer_norm_rms_epsilon"),
-			ropeBase:              c.Float("rope.freq_base"),
-			ropeScale:             c.Float("rope.scaling.factor", 1),
-			ropeOrigPosEmbeddings: int(c.Uint("rope.scaling.original_context_length")),
-			ropeScalingBeta:       c.Float("rope.scaling_beta"),
+			hiddenSize: int(c.Uint("embedding_length")),
+			numHeads:   int(c.Uint("attention.head_count")),
+			numKVHeads: int(c.Uint("attention.head_count_kv")),
+			headDim:    int(c.Uint("attention.key_length")),
+			ropeDim:    int(c.Uint("rope.dimension_count")),
+			eps:        c.Float("attention.layer_norm_rms_epsilon"),
+			ropeBase:   c.Float("rope.freq_base"),
+			ropeScale:  c.Float("rope.scaling.factor", 1),
 		},
 	}
 }
--- a/model/parsers/cogito.go
+++ b/model/parsers/cogito.go
@@ -1,319 +0,0 @@
-package parsers
-
-import (
-	"encoding/json"
-	"errors"
-	"log/slog"
-	"strings"
-	"unicode"
-
-	"github.com/ollama/ollama/api"
-)
-
-type CogitoParserState int
-
-const (
-	CogitoCollectingThinking CogitoParserState = iota
-	CogitoCollectingContent
-	CogitoCollectingToolCalls
-	CogitoCollectingToolOutput
-)
-
-const (
-	cogitoThinkingCloseTag    = "</think>"
-	cogitoToolCallsBeginTag   = "<｜tool▁calls▁begin｜>"
-	cogitoToolCallsEndTag     = "<｜tool▁calls▁end｜>"
-	cogitoToolCallBeginTag    = "<｜tool▁call▁begin｜>"
-	cogitoToolCallEndTag      = "<｜tool▁call▁end｜>"
-	cogitoToolSepTag          = "<｜tool▁sep｜>"
-	cogitoToolOutputBeginTag  = "<｜tool▁output▁begin｜>"
-	cogitoToolOutputEndTag    = "<｜tool▁output▁end｜>"
-	cogitoToolOutputsBeginTag = "<｜tool▁outputs▁begin｜>"
-	cogitoToolOutputsEndTag   = "<｜tool▁outputs▁end｜>"
-)
-
-type CogitoParser struct {
-	state  CogitoParserState
-	buffer strings.Builder
-}
-
-func (p *CogitoParser) HasToolSupport() bool {
-	return true
-}
-
-func (p *CogitoParser) HasThinkingSupport() bool {
-	return true
-}
-
-func (p *CogitoParser) setInitialState(lastMessage *api.Message, tools []api.Tool, thinkValue *api.ThinkValue) {
-	prefill := lastMessage != nil && lastMessage.Role == "assistant"
-
-	// Check both model capability AND request preference
-	thinkingEnabled := thinkValue != nil && thinkValue.Bool()
-	// thinkingEnabled should be set to false for tools
-
-	if !thinkingEnabled {
-		p.state = CogitoCollectingContent
-		return
-	}
-
-	if prefill && lastMessage.Content != "" {
-		p.state = CogitoCollectingContent
-		return
-	}
-
-	// Note: for cogito, if there are tools, then we don't want to be thinking
-	if len(tools) > 0 {
-		p.state = CogitoCollectingContent
-		return
-	}
-
-	p.state = CogitoCollectingThinking
-}
-
-func (p *CogitoParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
-	p.setInitialState(lastMessage, tools, thinkValue)
-	return tools
-}
-
-type cogitoEvent interface {
-	isCogitoEvent()
-}
-
-type cogitoEventThinkingContent struct {
-	content string
-}
-
-type cogitoEventContent struct {
-	content string
-}
-
-type cogitoEventToolCall struct {
-	toolCall api.ToolCall
-}
-
-func (cogitoEventThinkingContent) isCogitoEvent() {}
-func (cogitoEventContent) isCogitoEvent()         {}
-func (cogitoEventToolCall) isCogitoEvent()        {}
-
-func (p *CogitoParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
-	p.buffer.WriteString(s)
-	events := p.parseEvents()
-
-	var toolCalls []api.ToolCall
-	var contentSb strings.Builder
-	var thinkingSb strings.Builder
-	for _, event := range events {
-		switch event := event.(type) {
-		case cogitoEventToolCall:
-			toolCalls = append(toolCalls, event.toolCall)
-		case cogitoEventThinkingContent:
-			thinkingSb.WriteString(event.content)
-		case cogitoEventContent:
-			contentSb.WriteString(event.content)
-		}
-	}
-
-	return contentSb.String(), thinkingSb.String(), toolCalls, nil
-}
-
-func (p *CogitoParser) parseEvents() []cogitoEvent {
-	var all []cogitoEvent
-
-	keepLooping := true
-	for keepLooping {
-		var events []cogitoEvent
-		events, keepLooping = p.eat()
-		if len(events) > 0 {
-			all = append(all, events...)
-		}
-	}
-
-	return all
-}
-
-func (p *CogitoParser) eat() ([]cogitoEvent, bool) {
-	var events []cogitoEvent
-	bufStr := p.buffer.String()
-	if bufStr == "" {
-		return events, false
-	}
-
-	switch p.state {
-	case CogitoCollectingThinking:
-		if strings.Contains(bufStr, cogitoThinkingCloseTag) { // thinking[</think>] -> content
-			split := strings.SplitN(bufStr, cogitoThinkingCloseTag, 2)
-			thinking := split[0]
-			thinking = strings.TrimRightFunc(thinking, unicode.IsSpace)
-
-			remaining := split[1]
-			remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
-
-			p.buffer.Reset()
-			p.buffer.WriteString(remaining)
-			p.state = CogitoCollectingContent
-
-			if len(thinking) > 0 {
-				events = append(events, cogitoEventThinkingContent{content: thinking})
-			}
-			return events, true
-		} else if overlapLen := overlap(bufStr, cogitoThinkingCloseTag); overlapLen > 0 { // partial </think>
-			beforePartialTag := bufStr[:len(bufStr)-overlapLen]
-			trailingLen := trailingWhitespaceLen(beforePartialTag)
-			ambiguousStart := len(beforePartialTag) - trailingLen
-
-			unambiguous := bufStr[:ambiguousStart]
-			ambiguous := bufStr[ambiguousStart:]
-			p.buffer.Reset()
-			p.buffer.WriteString(ambiguous)
-			if len(unambiguous) > 0 {
-				events = append(events, cogitoEventThinkingContent{content: unambiguous})
-			}
-			return events, false
-		} else { // otherwise its thinking content
-			whitespaceLen := trailingWhitespaceLen(bufStr)
-			ambiguousStart := len(bufStr) - whitespaceLen
-
-			unambiguous := bufStr[:ambiguousStart]
-			ambiguous := bufStr[ambiguousStart:]
-			p.buffer.Reset()
-			p.buffer.WriteString(ambiguous)
-			if len(unambiguous) > 0 {
-				events = append(events, cogitoEventThinkingContent{content: unambiguous})
-			}
-			return events, false
-		}
-
-	case CogitoCollectingContent:
-		switch {
-		case strings.Contains(bufStr, cogitoToolCallsBeginTag): // content[<｜tool▁calls▁begin｜>] -> tool calls
-			split := strings.SplitN(bufStr, cogitoToolCallsBeginTag, 2)
-			contentBefore := strings.TrimRightFunc(split[0], unicode.IsSpace)
-			remaining := split[1]
-
-			p.buffer.Reset()
-			p.buffer.WriteString(remaining)
-			p.state = CogitoCollectingToolCalls
-
-			if len(contentBefore) > 0 {
-				events = append(events, cogitoEventContent{content: contentBefore})
-			}
-			return events, true
-		case strings.Contains(bufStr, cogitoToolOutputsBeginTag): // content[<｜tool▁outputs▁begin｜>] -> tool outputs
-			split := strings.SplitN(bufStr, cogitoToolOutputsBeginTag, 2)
-			contentBefore := strings.TrimRightFunc(split[0], unicode.IsSpace)
-			remaining := split[1]
-
-			p.buffer.Reset()
-			p.buffer.WriteString(remaining)
-			p.state = CogitoCollectingToolOutput
-
-			if len(contentBefore) > 0 {
-				events = append(events, cogitoEventContent{content: contentBefore})
-			}
-			return events, true
-		default: // otherwise its content
-			p.buffer.Reset()
-			if len(bufStr) > 0 {
-				events = append(events, cogitoEventContent{content: bufStr})
-			}
-			return events, false
-		}
-	case CogitoCollectingToolCalls:
-		if idx := strings.Index(bufStr, cogitoToolCallBeginTag); idx != -1 {
-			startIdx := idx + len(cogitoToolCallBeginTag)
-			if endIdx := strings.Index(bufStr[startIdx:], cogitoToolCallEndTag); endIdx != -1 {
-				toolCallContent := bufStr[startIdx : startIdx+endIdx]
-
-				if toolCall, err := p.parseToolCallContent(toolCallContent); err == nil {
-					remaining := bufStr[startIdx+endIdx+len(cogitoToolCallEndTag):]
-					remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
-
-					p.buffer.Reset()
-					p.buffer.WriteString(remaining)
-
-					events = append(events, cogitoEventToolCall{toolCall: toolCall})
-					return events, true
-				} else {
-					slog.Warn("cogito tool call parsing failed", "error", err)
-				}
-			}
-		}
-
-		if idx := strings.Index(bufStr, cogitoToolCallsEndTag); idx != -1 {
-			remaining := bufStr[idx+len(cogitoToolCallsEndTag):]
-			remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
-
-			p.buffer.Reset()
-			p.buffer.WriteString(remaining)
-			p.state = CogitoCollectingContent
-
-			return events, true
-		}
-
-		return events, false
-
-	case CogitoCollectingToolOutput:
-		if idx := strings.Index(bufStr, cogitoToolOutputBeginTag); idx != -1 {
-			startIdx := idx + len(cogitoToolOutputBeginTag)
-			if endIdx := strings.Index(bufStr[startIdx:], cogitoToolOutputEndTag); endIdx != -1 {
-				remaining := bufStr[startIdx+endIdx+len(cogitoToolOutputEndTag):]
-				remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
-
-				p.buffer.Reset()
-				p.buffer.WriteString(remaining)
-
-				return events, true
-			}
-		}
-
-		if idx := strings.Index(bufStr, cogitoToolOutputsEndTag); idx != -1 {
-			remaining := bufStr[idx+len(cogitoToolOutputsEndTag):]
-			remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
-
-			p.buffer.Reset()
-			p.buffer.WriteString(remaining)
-			p.state = CogitoCollectingContent
-
-			return events, true
-		}
-
-		return events, false
-	}
-
-	return events, false
-}
-
-func (p *CogitoParser) parseToolCallContent(content string) (api.ToolCall, error) {
-	// Expected format: function<｜tool▁sep｜>tool_name\n```json\n{args}\n```
-	parts := strings.SplitN(content, cogitoToolSepTag, 2)
-	if len(parts) < 2 {
-		return api.ToolCall{}, errors.New("invalid format")
-	}
-	nameAndArgs := parts[1]
-
-	jsonStart := strings.Index(nameAndArgs, "\n```json\n")
-	if jsonStart == -1 {
-		return api.ToolCall{}, errors.New("invalid format")
-	}
-	toolName := strings.TrimSpace(nameAndArgs[:jsonStart])
-	jsonContent := nameAndArgs[jsonStart+len("\n```json\n"):]
-
-	jsonEnd := strings.Index(jsonContent, "\n```")
-	if jsonEnd == -1 {
-		return api.ToolCall{}, errors.New("invalid format")
-	}
-	argsJSON := jsonContent[:jsonEnd]
-
-	var args api.ToolCallFunctionArguments
-	if err := json.Unmarshal([]byte(argsJSON), &args); err != nil {
-		return api.ToolCall{}, err
-	}
-
-	return api.ToolCall{
-		Function: api.ToolCallFunction{
-			Name:      toolName,
-			Arguments: args,
-		},
-	}, nil
-}
--- a/model/parsers/cogito_test.go
+++ b/model/parsers/cogito_test.go
@@ -1,565 +0,0 @@
-package parsers
-
-import (
-	"strings"
-	"testing"
-
-	"github.com/google/go-cmp/cmp"
-
-	"github.com/ollama/ollama/api"
-)
-
-func TestCogitoParser(t *testing.T) {
-	tests := []struct {
-		name              string
-		input             string
-		expectedContent   string
-		expectedThinking  string
-		expectedToolCalls []api.ToolCall
-		tools             []api.Tool
-		lastMessage       *api.Message
-	}{
-		{
-			name:             "simple_content",
-			input:            "This is a simple response.",
-			expectedContent:  "This is a simple response.",
-			expectedThinking: "",
-		},
-		{
-			name:             "thinking_only",
-			input:            "This is thinking content.</think>This is response content.",
-			expectedContent:  "This is response content.",
-			expectedThinking: "This is thinking content.",
-		},
-		{
-			name: "tool_call_simple",
-			input: `<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>get_weather
-` + "```json\n" + `{"location":"Paris"}
-` + "```" + `<｜tool▁call▁end｜><｜tool▁calls▁end｜>`,
-			expectedToolCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "get_weather",
-						Arguments: api.ToolCallFunctionArguments{
-							"location": "Paris",
-						},
-					},
-				},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name: "get_weather",
-						Parameters: api.ToolFunctionParameters{
-							Properties: map[string]api.ToolProperty{
-								"location": {Type: api.PropertyType{"string"}},
-							},
-						},
-					},
-				},
-			},
-		},
-		{
-			name: "thinking_with_tool_call",
-			input: `I need to check the weather.</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>get_weather
-` + "```json\n" + `{"location":"Paris"}
-` + "```" + `<｜tool▁call▁end｜><｜tool▁calls▁end｜>`,
-			expectedContent:  "I need to check the weather.</think>",
-			expectedThinking: "", // No thinking when tools are present (Cogito-specific behavior)
-			expectedToolCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "get_weather",
-						Arguments: api.ToolCallFunctionArguments{
-							"location": "Paris",
-						},
-					},
-				},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name: "get_weather",
-						Parameters: api.ToolFunctionParameters{
-							Properties: map[string]api.ToolProperty{
-								"location": {Type: api.PropertyType{"string"}},
-							},
-						},
-					},
-				},
-			},
-		},
-		{
-			name: "multiple_tool_calls",
-			input: `<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>get_weather
-` + "```json\n" + `{"location":"Paris"}
-` + "```" + `<｜tool▁call▁end｜>
-<｜tool▁call▁begin｜>function<｜tool▁sep｜>get_weather
-` + "```json\n" + `{"location":"London"}
-` + "```" + `<｜tool▁call▁end｜><｜tool▁calls▁end｜>`,
-			expectedToolCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "get_weather",
-						Arguments: api.ToolCallFunctionArguments{
-							"location": "Paris",
-						},
-					},
-				},
-				{
-					Function: api.ToolCallFunction{
-						Name: "get_weather",
-						Arguments: api.ToolCallFunctionArguments{
-							"location": "London",
-						},
-					},
-				},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name: "get_weather",
-						Parameters: api.ToolFunctionParameters{
-							Properties: map[string]api.ToolProperty{
-								"location": {Type: api.PropertyType{"string"}},
-							},
-						},
-					},
-				},
-			},
-		},
-		{
-			name: "complex_tool_arguments",
-			input: `<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>process_data
-` + "```json\n" + `{"items":["item1","item2"],"config":{"enabled":true,"threshold":0.95},"count":42}
-` + "```" + `<｜tool▁call▁end｜><｜tool▁calls▁end｜>`,
-			expectedToolCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "process_data",
-						Arguments: api.ToolCallFunctionArguments{
-							"items":  []any{"item1", "item2"},
-							"config": map[string]any{"enabled": true, "threshold": 0.95},
-							"count":  42.0,
-						},
-					},
-				},
-			},
-		},
-		{
-			name:             "tool_output_parsing",
-			input:            `<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>{"temperature": 22, "condition": "sunny"}<｜tool▁output▁end｜><｜tool▁outputs▁end｜>`,
-			expectedContent:  "",
-			expectedThinking: "",
-		},
-		{
-			name: "thinking_with_multiline_content",
-			input: `This is line 1
-This is line 2
-This is line 3</think>Final response here.`,
-			expectedContent:  "Final response here.",
-			expectedThinking: "This is line 1\nThis is line 2\nThis is line 3",
-		},
-		{
-			name:             "no_thinking_simple",
-			input:            "This is content.",
-			expectedContent:  "This is content.",
-			expectedThinking: "",
-		},
-		{
-			name:            "prefill_content_only",
-			input:           "Continuing from previous content.",
-			expectedContent: "Continuing from previous content.",
-			lastMessage: &api.Message{
-				Role:    "assistant",
-				Content: "Previous content",
-			},
-		},
-		{
-			name:             "prefill_with_thinking",
-			input:            "Continuing thinking</think>Continuing content.",
-			expectedContent:  "Continuing content.",
-			expectedThinking: "Continuing thinking",
-			lastMessage: &api.Message{
-				Role: "assistant",
-			},
-		},
-		// Edge cases
-		{
-			name:             "nested_think_tags_in_thinking",
-			input:            "I'm thinking <think>nested</think> more thinking</think>Final content.",
-			expectedContent:  "more thinking</think>Final content.",
-			expectedThinking: "I'm thinking <think>nested",
-		},
-		{
-			name:             "multiple_think_close_tags",
-			input:            "First thinking</think>Content</think>More content.",
-			expectedContent:  "Content</think>More content.",
-			expectedThinking: "First thinking",
-		},
-		{
-			name:             "empty_thinking_content",
-			input:            "</think>Just content here.",
-			expectedContent:  "</think>Just content here.",
-			expectedThinking: "",
-		},
-		{
-			name:             "thinking_disabled_with_think_tags",
-			input:            "Content with </think> tags should be treated as content.",
-			expectedContent:  "Content with </think> tags should be treated as content.",
-			expectedThinking: "",
-			lastMessage: &api.Message{
-				Role:    "assistant",
-				Content: "existing", // Forces non-thinking mode
-			},
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			// Use thinking-enabled parser for tests that expect thinking
-			hasThinking := tt.expectedThinking != ""
-			parser := &CogitoParser{}                                                  // it has thinking support
-			parser.Init(tt.tools, tt.lastMessage, &api.ThinkValue{Value: hasThinking}) // but we should set it with the request that the user wants
-
-			content, thinking, toolCalls, err := parser.Add(tt.input, true)
-			if err != nil {
-				t.Fatalf("Add() error = %v", err)
-			}
-
-			if diff := cmp.Diff(tt.expectedContent, content); diff != "" {
-				t.Errorf("content mismatch (-want +got):\n%s", diff)
-			}
-
-			if diff := cmp.Diff(tt.expectedThinking, thinking); diff != "" {
-				t.Errorf("thinking mismatch (-want +got):\n%s", diff)
-			}
-
-			if diff := cmp.Diff(tt.expectedToolCalls, toolCalls); diff != "" {
-				t.Errorf("tool calls mismatch (-want +got):\n%s", diff)
-			}
-		})
-	}
-}
-
-func TestCogitoParser_Streaming(t *testing.T) {
-	parser := &CogitoParser{}
-	parser.Init(nil, nil, &api.ThinkValue{Value: true})
-
-	chunks := []string{
-		"This is ",
-		"thinking content",
-		".</think>This is ",
-		"content.<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>test_tool\n```json\n{\"arg\":\"value\"}\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-	}
-
-	var finalContent, finalThinking strings.Builder
-	var finalToolCalls []api.ToolCall
-
-	for i, chunk := range chunks {
-		done := i == len(chunks)-1
-		content, thinking, toolCalls, err := parser.Add(chunk, done)
-		if err != nil {
-			t.Fatalf("Add() error on chunk %d: %v", i, err)
-		}
-
-		finalContent.WriteString(content)
-		finalThinking.WriteString(thinking)
-		finalToolCalls = append(finalToolCalls, toolCalls...)
-	}
-
-	expectedContent := "This is content."
-	expectedThinking := "This is thinking content."
-	expectedToolCalls := []api.ToolCall{
-		{
-			Function: api.ToolCallFunction{
-				Name: "test_tool",
-				Arguments: api.ToolCallFunctionArguments{
-					"arg": "value",
-				},
-			},
-		},
-	}
-
-	if finalContent.String() != expectedContent {
-		t.Errorf("expected content %q, got %q", expectedContent, finalContent.String())
-	}
-
-	if finalThinking.String() != expectedThinking {
-		t.Errorf("expected thinking %q, got %q", expectedThinking, finalThinking.String())
-	}
-
-	if diff := cmp.Diff(expectedToolCalls, finalToolCalls); diff != "" {
-		t.Errorf("tool calls mismatch (-want +got):\n%s", diff)
-	}
-}
-
-func TestCogitoParser_StreamingEdgeCases(t *testing.T) {
-	tests := []struct {
-		name               string
-		chunks             []string
-		expectedContent    string
-		expectedThinking   string
-		expectedToolCalls  []api.ToolCall
-		hasThinkingSupport bool
-	}{
-		{
-			name: "split_thinking_tag",
-			chunks: []string{
-				"This is thinking content</thi",
-				"nk>This is content.",
-			},
-			expectedContent:    "This is content.",
-			expectedThinking:   "This is thinking content",
-			hasThinkingSupport: true,
-		},
-		{
-			name: "split_tool_calls_begin_tag_conservative_parsing",
-			chunks: []string{
-				"Content before<｜tool▁calls▁beg",
-				"in｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>test\n```json\n{}\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-			},
-			// Parser is conservative - treats incomplete tags as content
-			expectedContent:    "Content before<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>test\n```json\n{}\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-			expectedToolCalls:  nil,
-			hasThinkingSupport: false,
-		},
-		{
-			name: "thinking_disabled_with_split_tags",
-			chunks: []string{
-				"Content with </thi",
-				"nk> should be treated as content.",
-			},
-			expectedContent:    "Content with </think> should be treated as content.",
-			expectedThinking:   "",
-			hasThinkingSupport: false,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			parser := &CogitoParser{}
-			parser.Init(nil, nil, &api.ThinkValue{Value: tt.hasThinkingSupport})
-
-			var finalContent, finalThinking strings.Builder
-			var finalToolCalls []api.ToolCall
-
-			for i, chunk := range tt.chunks {
-				done := i == len(tt.chunks)-1
-				content, thinking, toolCalls, err := parser.Add(chunk, done)
-				if err != nil {
-					t.Fatalf("Add() error on chunk %d: %v", i, err)
-				}
-
-				finalContent.WriteString(content)
-				finalThinking.WriteString(thinking)
-				finalToolCalls = append(finalToolCalls, toolCalls...)
-			}
-
-			if finalContent.String() != tt.expectedContent {
-				t.Errorf("expected content %q, got %q", tt.expectedContent, finalContent.String())
-			}
-
-			if finalThinking.String() != tt.expectedThinking {
-				t.Errorf("expected thinking %q, got %q", tt.expectedThinking, finalThinking.String())
-			}
-
-			if diff := cmp.Diff(tt.expectedToolCalls, finalToolCalls); diff != "" {
-				t.Errorf("tool calls mismatch (-want +got):\n%s", diff)
-			}
-		})
-	}
-}
-
-func TestCogitoParser_HasToolSupport(t *testing.T) {
-	parser := &CogitoParser{}
-	if !parser.HasToolSupport() {
-		t.Error("CogitoParser should support tools")
-	}
-}
-
-func TestCogitoParser_Init(t *testing.T) {
-	parser := &CogitoParser{}
-
-	tools := []api.Tool{
-		{Function: api.ToolFunction{Name: "test_tool"}},
-	}
-
-	lastMessage := &api.Message{Role: "assistant", Content: "previous"}
-
-	returnedTools := parser.Init(tools, lastMessage, nil)
-
-	if len(returnedTools) != len(tools) {
-		t.Errorf("expected %d tools returned, got %d", len(tools), len(returnedTools))
-	}
-}
-
-func TestCogitoParser_parseToolCallContent(t *testing.T) {
-	tests := []struct {
-		name        string
-		content     string
-		expected    api.ToolCall
-		expectError bool
-	}{
-		{
-			name: "valid_tool_call_standard_format",
-			content: `function<｜tool▁sep｜>get_weather
-` + "```json\n" + `{"location":"Paris"}
-` + "```",
-			expected: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name: "get_weather",
-					Arguments: api.ToolCallFunctionArguments{
-						"location": "Paris",
-					},
-				},
-			},
-			expectError: false,
-		},
-		{
-			name: "valid_tool_call_complex_args",
-			content: `function<｜tool▁sep｜>process_data
-` + "```json\n" + `{"items":["item1","item2"],"config":{"enabled":true},"count":42}
-` + "```",
-			expected: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name: "process_data",
-					Arguments: api.ToolCallFunctionArguments{
-						"items":  []any{"item1", "item2"},
-						"config": map[string]any{"enabled": true},
-						"count":  42.0,
-					},
-				},
-			},
-			expectError: false,
-		},
-		{
-			name: "valid_tool_call_empty_args",
-			content: `function<｜tool▁sep｜>no_args_tool
-` + "```json\n" + `{}
-` + "```",
-			expected: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "no_args_tool",
-					Arguments: api.ToolCallFunctionArguments{},
-				},
-			},
-			expectError: false,
-		},
-		{
-			name:        "missing_separator",
-			content:     `functionget_weather` + "```json\n" + `{"location":"Paris"}` + "\n```",
-			expected:    api.ToolCall{},
-			expectError: true,
-		},
-		{
-			name:        "invalid_function_type",
-			content:     `not_function<｜tool▁sep｜>get_weather` + "```json\n" + `{"location":"Paris"}` + "\n```",
-			expected:    api.ToolCall{},
-			expectError: true,
-		},
-		{
-			name:        "missing_json_block_start",
-			content:     `function<｜tool▁sep｜>get_weather{"location":"Paris"}` + "```",
-			expected:    api.ToolCall{},
-			expectError: true,
-		},
-		{
-			name:        "missing_json_block_end",
-			content:     `function<｜tool▁sep｜>get_weather` + "```json\n" + `{"location":"Paris"}`,
-			expected:    api.ToolCall{},
-			expectError: true,
-		},
-		{
-			name:        "invalid_json",
-			content:     `function<｜tool▁sep｜>get_weather` + "```json\n" + `{location:Paris}` + "\n```",
-			expected:    api.ToolCall{},
-			expectError: true,
-		},
-		{
-			name:        "empty_function_type",
-			content:     `<｜tool▁sep｜>get_weather` + "```json\n" + `{"location":"Paris"}` + "\n```",
-			expected:    api.ToolCall{},
-			expectError: true,
-		},
-		{
-			name: "tool_with_spaces_in_name",
-			content: `function<｜tool▁sep｜>  get_weather  
-` + "```json\n" + `{"location":"Paris"}
-` + "```",
-			expected: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name: "get_weather",
-					Arguments: api.ToolCallFunctionArguments{
-						"location": "Paris",
-					},
-				},
-			},
-			expectError: false,
-		},
-		{
-			name: "tool_with_multiline_json",
-			content: `function<｜tool▁sep｜>get_weather
-` + "```json\n" + `{
-  "location": "Paris",
-  "units": "metric"
-}
-` + "```",
-			expected: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name: "get_weather",
-					Arguments: api.ToolCallFunctionArguments{
-						"location": "Paris",
-						"units":    "metric",
-					},
-				},
-			},
-			expectError: false,
-		},
-		{
-			name: "tool_with_nested_objects",
-			content: `function<｜tool▁sep｜>complex_tool
-` + "```json\n" + `{"nested":{"deep":{"value":123}}}
-` + "```",
-			expected: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name: "complex_tool",
-					Arguments: api.ToolCallFunctionArguments{
-						"nested": map[string]any{
-							"deep": map[string]any{
-								"value": 123.0,
-							},
-						},
-					},
-				},
-			},
-			expectError: false,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			parser := &CogitoParser{}
-
-			result, err := parser.parseToolCallContent(tt.content)
-
-			if tt.expectError {
-				if err == nil {
-					t.Errorf("expected error but got none")
-				}
-				return
-			}
-
-			if err != nil {
-				t.Fatalf("unexpected error: %v", err)
-			}
-
-			if diff := cmp.Diff(tt.expected, result); diff != "" {
-				t.Errorf("tool call mismatch (-want +got):\n%s", diff)
-			}
-		})
-	}
-}
--- a/model/parsers/ministral.go
+++ b/model/parsers/ministral.go
@@ -1,136 +0,0 @@
-package parsers
-
-import (
-	"encoding/json"
-	"fmt"
-	"strings"
-
-	"github.com/ollama/ollama/api"
-)
-
-type ministralParserState int
-
-const (
-	ministralCollectingContent = iota
-	ministralCollectingThinkingContent
-	ministralCollectingToolName
-	ministralCollectingToolArgs
-)
-
-type MinistralParser struct {
-	state              ministralParserState
-	buffer             strings.Builder
-	tools              []api.Tool
-	hasThinkingSupport bool
-	currentTool        *api.Tool
-}
-
-func (p *MinistralParser) HasToolSupport() bool {
-	return true
-}
-
-func (p *MinistralParser) HasThinkingSupport() bool {
-	return p.hasThinkingSupport
-}
-
-func (p *MinistralParser) setInitialState(lastMessage *api.Message) {
-	prefill := lastMessage != nil && lastMessage.Role == "assistant"
-	if !p.HasThinkingSupport() {
-		p.state = ministralCollectingContent
-		return
-	}
-
-	if prefill && lastMessage.Content != "" {
-		p.state = ministralCollectingContent
-		return
-	}
-
-	p.state = ministralCollectingThinkingContent
-}
-
-func (p *MinistralParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
-	p.tools = tools
-	p.setInitialState(lastMessage)
-	return tools
-}
-
-func toolByName(tools []api.Tool, n string) (*api.Tool, error) {
-	for i := range tools {
-		if tools[i].Function.Name == n {
-			return &tools[i], nil
-		}
-	}
-	return nil, fmt.Errorf("tool '%s' not found", n)
-}
-
-func (p *MinistralParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
-	p.buffer.WriteString(s)
-
-	switch p.state {
-	case ministralCollectingContent:
-		if strings.Contains(p.buffer.String(), "[TOOL_CALLS]") {
-			before, _ := splitAtTag(&p.buffer, "[TOOL_CALLS]", false)
-			if before != "" {
-				return before, "", calls, nil
-			}
-			p.state = ministralCollectingToolName
-		} else if strings.Contains(p.buffer.String(), "[THINK]") {
-			p.state = ministralCollectingThinkingContent
-			return "", "", calls, nil
-		} else {
-			p.buffer.Reset()
-			return s, "", calls, nil
-		}
-	case ministralCollectingThinkingContent:
-		if strings.Contains(p.buffer.String(), "[/THINK]") {
-			thinkingContent, after := splitAtTag(&p.buffer, "[/THINK]", true)
-			p.state = ministralCollectingContent
-			if after != "" {
-				p.buffer.Reset()
-				return after, thinkingContent, calls, nil
-			}
-			return "", thinkingContent, calls, nil
-		} else {
-			p.buffer.Reset()
-			return "", s, calls, nil
-		}
-	case ministralCollectingToolName:
-		if strings.Contains(p.buffer.String(), "[ARGS]") {
-			name, _ := splitAtTag(&p.buffer, "[ARGS]", false)
-
-			t, err := toolByName(p.tools, name)
-			if err != nil {
-				return "", "", calls, err
-			}
-			p.currentTool = t
-			p.state = ministralCollectingToolArgs
-			return "", "", calls, nil
-		}
-		return "", "", calls, nil
-	case ministralCollectingToolArgs:
-		if strings.Contains(p.buffer.String(), "}") {
-			before, _ := splitAtTag(&p.buffer, "}", false)
-			before += "}"
-
-			var data map[string]any
-			if err := json.Unmarshal([]byte(before), &data); err != nil {
-				// todo - throw a better error
-				return "", "", calls, err
-			}
-
-			p.state = ministralCollectingContent
-
-			call := api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      p.currentTool.Function.Name,
-					Arguments: api.ToolCallFunctionArguments(data),
-				},
-			}
-			calls = append(calls, call)
-			return "", "", calls, nil
-		}
-		return "", "", calls, nil
-	}
-
-	return p.buffer.String(), thinking, calls, nil
-}
--- a/model/parsers/parsers.go
+++ b/model/parsers/parsers.go
@@ -1,17 +1,14 @@
 package parsers

 import (
-	"strings"
-	"unicode"
-
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/harmony"
 )

 type Parser interface {
-	// Init initializes the parser with tools, optional last message for chat prefill, and think value
+	// Init initializes the parser with tools and optional last message for chat prefill
 	// Returns processed tools if the parser needs to modify them (e.g., harmony renames them)
-	Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool
+	Init(tools []api.Tool, lastMessage *api.Message) []api.Tool
 	// Add processes streamed content and returns parsed content, thinking, and tool calls
 	// The done flag indicates if this is the last chunk (used for draining accumulators)
 	Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error)
@@ -41,32 +38,28 @@ func ParserForName(name string) Parser {
 	if parser, ok := registry.constructors[name]; ok {
 		return parser()
 	}
-	var p Parser
-
 	switch name {
 	case "qwen3-coder":
-		p = &Qwen3CoderParser{}
+		parser := &Qwen3CoderParser{}
+		return parser
 	case "qwen3-vl-instruct":
-		p = &Qwen3VLParser{hasThinkingSupport: false}
+		parser := &Qwen3VLParser{hasThinkingSupport: false}
+		return parser
 	case "qwen3-vl-thinking":
-		p = &Qwen3VLParser{hasThinkingSupport: true}
-	case "ministral":
-		p = &MinistralParser{hasThinkingSupport: false}
+		parser := &Qwen3VLParser{hasThinkingSupport: true}
+		return parser
 	case "passthrough":
 		return &PassthroughParser{}
 	case "harmony":
 		return harmony.NewHarmonyMessageHandler()
-	case "cogito":
-		return &CogitoParser{}
 	default:
 		return nil
 	}
-	return p
 }

 type PassthroughParser struct{}

-func (p *PassthroughParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
+func (p *PassthroughParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
 	return tools // passthrough doesn't modify tools
 }

@@ -81,20 +74,3 @@ func (p *PassthroughParser) HasToolSupport() bool {
 func (p *PassthroughParser) HasThinkingSupport() bool {
 	return false
 }
-
-func splitAtTag(sb *strings.Builder, tag string, trimAfter bool) (string, string) {
-	split := strings.SplitN(sb.String(), tag, 2)
-	if len(split) == 1 {
-		sb.Reset()
-		return split[0], ""
-	}
-	before := split[0]
-	before = strings.TrimRightFunc(before, unicode.IsSpace)
-	after := split[1]
-	if trimAfter {
-		after = strings.TrimLeftFunc(after, unicode.IsSpace)
-	}
-	sb.Reset()
-	sb.WriteString(after)
-	return before, after // return events
-}
--- a/model/parsers/parsers_test.go
+++ b/model/parsers/parsers_test.go
@@ -1,7 +1,6 @@
 package parsers

 import (
-	"strings"
 	"testing"

 	"github.com/ollama/ollama/api"
@@ -11,7 +10,7 @@ type mockParser struct {
 	name string
 }

-func (m *mockParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
+func (m *mockParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
 	return tools
 }

@@ -96,164 +95,3 @@ func TestUnknownParserReturnsNil(t *testing.T) {
 		t.Error("expected nil for unknown parser")
 	}
 }
-
-func TestSplitAtTag(t *testing.T) {
-	tests := []struct {
-		name       string
-		input      string
-		tag        string
-		trimAfter  bool
-		wantBefore string
-		wantAfter  string
-		wantSB     string // expected content of strings.Builder after operation
-	}{
-		{
-			name:       "basic split with trimAfter true",
-			input:      "hello <!-- split --> world",
-			tag:        "<!-- split -->",
-			trimAfter:  true,
-			wantBefore: "hello",
-			wantAfter:  "world",
-			wantSB:     "world",
-		},
-		{
-			name:       "basic split with trimAfter false",
-			input:      "hello <!-- split -->   world",
-			tag:        "<!-- split -->",
-			trimAfter:  false,
-			wantBefore: "hello",
-			wantAfter:  "   world",
-			wantSB:     "   world",
-		},
-		{
-			name:       "tag at beginning with trimAfter true",
-			input:      "<!-- split -->world",
-			tag:        "<!-- split -->",
-			trimAfter:  true,
-			wantBefore: "",
-			wantAfter:  "world",
-			wantSB:     "world",
-		},
-		{
-			name:       "tag at beginning with trimAfter false",
-			input:      "<!-- split -->   world",
-			tag:        "<!-- split -->",
-			trimAfter:  false,
-			wantBefore: "",
-			wantAfter:  "   world",
-			wantSB:     "   world",
-		},
-		{
-			name:       "tag at end with trimAfter true",
-			input:      "hello <!-- split -->",
-			tag:        "<!-- split -->",
-			trimAfter:  true,
-			wantBefore: "hello",
-			wantAfter:  "",
-			wantSB:     "",
-		},
-		{
-			name:       "tag at end with trimAfter false",
-			input:      "hello <!-- split -->",
-			tag:        "<!-- split -->",
-			trimAfter:  false,
-			wantBefore: "hello",
-			wantAfter:  "",
-			wantSB:     "",
-		},
-		{
-			name:       "multiple tags splits at first occurrence",
-			input:      "hello <!-- split --> world <!-- split --> end",
-			tag:        "<!-- split -->",
-			trimAfter:  true,
-			wantBefore: "hello",
-			wantAfter:  "world <!-- split --> end",
-			wantSB:     "world <!-- split --> end",
-		},
-		{
-			name:       "tag not present",
-			input:      "hello world",
-			tag:        "<!-- split -->",
-			trimAfter:  true,
-			wantBefore: "hello world",
-			wantAfter:  "",
-			wantSB:     "",
-		},
-		{
-			name:       "empty input",
-			input:      "",
-			tag:        "<!-- split -->",
-			trimAfter:  true,
-			wantBefore: "",
-			wantAfter:  "",
-			wantSB:     "",
-		},
-		{
-			name:       "only whitespace before tag",
-			input:      "   \t\n<!-- split -->world",
-			tag:        "<!-- split -->",
-			trimAfter:  true,
-			wantBefore: "",
-			wantAfter:  "world",
-			wantSB:     "world",
-		},
-		{
-			name:       "only whitespace after tag with trimAfter true",
-			input:      "hello<!-- split -->   \t\n",
-			tag:        "<!-- split -->",
-			trimAfter:  true,
-			wantBefore: "hello",
-			wantAfter:  "",
-			wantSB:     "",
-		},
-		{
-			name:       "only whitespace after tag with trimAfter false",
-			input:      "hello<!-- split -->   \t\n",
-			tag:        "<!-- split -->",
-			trimAfter:  false,
-			wantBefore: "hello",
-			wantAfter:  "   \t\n",
-			wantSB:     "   \t\n",
-		},
-		{
-			name:       "complex whitespace trimming",
-			input:      "  hello \t\n <!-- split --> \n\t world  ",
-			tag:        "<!-- split -->",
-			trimAfter:  true,
-			wantBefore: "  hello",
-			wantAfter:  "world  ",
-			wantSB:     "world  ",
-		},
-		{
-			name:       "tag with special characters",
-			input:      "text <tag attr=\"value\"> more text",
-			tag:        "<tag attr=\"value\">",
-			trimAfter:  true,
-			wantBefore: "text",
-			wantAfter:  "more text",
-			wantSB:     "more text",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			sb := &strings.Builder{}
-			sb.WriteString(tt.input)
-
-			before, after := splitAtTag(sb, tt.tag, tt.trimAfter)
-
-			// Check return values
-			if before != tt.wantBefore {
-				t.Errorf("splitAtTag() before = %q, want %q", before, tt.wantBefore)
-			}
-			if after != tt.wantAfter {
-				t.Errorf("splitAtTag() after = %q, want %q", after, tt.wantAfter)
-			}
-
-			// Check strings.Builder state
-			if sb.String() != tt.wantSB {
-				t.Errorf("strings.Builder after split = %q, want %q", sb.String(), tt.wantSB)
-			}
-		})
-	}
-}
--- a/model/parsers/qwen3coder.go
+++ b/model/parsers/qwen3coder.go
@@ -43,7 +43,7 @@ func (p *Qwen3CoderParser) HasThinkingSupport() bool {
 	return false
 }

-func (p *Qwen3CoderParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
+func (p *Qwen3CoderParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
 	p.tools = tools
 	return tools // Qwen doesn't modify tools
 }
@@ -432,7 +432,7 @@ func transformToXML(raw string) string {
 		groups := qwenTagRegex.FindStringSubmatch(match)
 		tag := groups[1]
 		var escapedValue strings.Builder
-		_ = xml.EscapeText(&escapedValue, []byte(groups[2])) // error is always nil for strings.Builder
+		xml.EscapeText(&escapedValue, []byte(groups[2]))
 		return fmt.Sprintf(`<%s name="%s">`, tag, escapedValue.String())
 	})

--- a/model/parsers/qwen3vl.go
+++ b/model/parsers/qwen3vl.go
@@ -54,7 +54,7 @@ func (p *Qwen3VLParser) setInitialState(lastMessage *api.Message) {
 	p.state = CollectingThinkingContent
 }

-func (p *Qwen3VLParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
+func (p *Qwen3VLParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
 	p.tools = tools
 	p.setInitialState(lastMessage)
 	return tools
@@ -70,6 +70,7 @@ func (p *Qwen3VLParser) Add(s string, done bool) (content string, thinking strin
 	p.buffer.WriteString(s)
 	events := p.parseEvents()

+	var toolCalls []api.ToolCall
 	var contentSb strings.Builder
 	var thinkingSb strings.Builder
 	for _, event := range events {
@@ -80,7 +81,7 @@ func (p *Qwen3VLParser) Add(s string, done bool) (content string, thinking strin
 				slog.Warn("qwen tool call parsing failed", "error", err)
 				return "", "", nil, err
 			}
-			calls = append(calls, toolCall)
+			toolCalls = append(toolCalls, toolCall)
 		case qwenEventThinkingContent:
 			thinkingSb.WriteString(event.content)
 		case qwenEventContent:
@@ -90,7 +91,7 @@ func (p *Qwen3VLParser) Add(s string, done bool) (content string, thinking strin
 		}
 	}

-	return contentSb.String(), thinkingSb.String(), calls, nil
+	return contentSb.String(), thinkingSb.String(), toolCalls, nil
 }

 func (p *Qwen3VLParser) parseEvents() []qwenEvent {
@@ -112,6 +113,19 @@ func (p *Qwen3VLParser) parseEvents() []qwenEvent {
 	return all
 }

+func splitAtTag(p *Qwen3VLParser, tag string, trimAfter bool) (string, string) {
+	split := strings.SplitN(p.buffer.String(), tag, 2)
+	before := split[0]
+	before = strings.TrimRightFunc(before, unicode.IsSpace)
+	after := split[1]
+	if trimAfter {
+		after = strings.TrimLeftFunc(after, unicode.IsSpace)
+	}
+	p.buffer.Reset()
+	p.buffer.WriteString(after)
+	return before, after // return events
+}
+
 func (p *Qwen3VLParser) eatLeadingWhitespaceAndTransitionTo(nextState qwenParserState) ([]qwenEvent, bool) {
 	trimmed := strings.TrimLeftFunc(p.buffer.String(), unicode.IsSpace)
 	p.buffer.Reset()
@@ -130,7 +144,7 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
 	case CollectingContent:
 		if strings.Contains(p.buffer.String(), toolOpenTag) {
 			// events = emitContentBeforeTag(p, events, toolOpenTag)
-			before, _ := splitAtTag(&p.buffer, toolOpenTag, false)
+			before, _ := splitAtTag(p, toolOpenTag, false)
 			if len(before) > 0 {
 				events = append(events, qwenEventContent{content: before})
 			}
@@ -181,7 +195,7 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
 		}
 	case CollectingThinkingContent:
 		if strings.Contains(p.buffer.String(), thinkingCloseTag) {
-			thinking, remaining := splitAtTag(&p.buffer, thinkingCloseTag, true)
+			thinking, remaining := splitAtTag(p, thinkingCloseTag, true)
 			if len(thinking) > 0 {
 				events = append(events, qwenEventThinkingContent{content: thinking})
 			}
--- a/model/parsers/qwen3vl_nonthinking_test.go
+++ b/model/parsers/qwen3vl_nonthinking_test.go
@@ -198,7 +198,7 @@ func TestQwen3VLNonThinkingParserStreaming(t *testing.T) {

 		t.Run(tc.desc, func(t *testing.T) {
 			parser := Qwen3VLParser{hasThinkingSupport: false}
-			parser.Init([]api.Tool{}, nil, nil)
+			parser.Init([]api.Tool{}, nil)

 			for i, step := range tc.steps {
 				parser.buffer.WriteString(step.input)
@@ -515,7 +515,7 @@ func TestQwenOldParserStreaming(t *testing.T) {

 		t.Run(tc.desc, func(t *testing.T) {
 			parser := Qwen3VLParser{hasThinkingSupport: false}
-			parser.Init([]api.Tool{}, nil, nil)
+			parser.Init([]api.Tool{}, nil)

 			for i, step := range tc.steps {
 				parser.buffer.WriteString(step.input)
@@ -822,7 +822,7 @@ func TestQwen3VLNonThinkingToolCallWhitespaceHandling(t *testing.T) {

 		t.Run(tc.desc, func(t *testing.T) {
 			parser := Qwen3VLParser{hasThinkingSupport: false}
-			parser.Init([]api.Tool{}, nil, nil)
+			parser.Init([]api.Tool{}, nil)

 			for i, step := range tc.steps {
 				parser.buffer.WriteString(step.input)
--- a/model/parsers/qwen3vl_thinking_test.go
+++ b/model/parsers/qwen3vl_thinking_test.go
@@ -205,7 +205,7 @@ func TestQwen3VLThinkingParserStreaming(t *testing.T) {

 		t.Run(tc.desc, func(t *testing.T) {
 			parser := Qwen3VLParser{hasThinkingSupport: true}
-			parser.Init([]api.Tool{}, nil, nil)
+			parser.Init([]api.Tool{}, nil)
 			// parser.state = CollectingThinkingContent

 			for i, step := range tc.steps {
@@ -386,7 +386,7 @@ func TestQwen3VLParserState(t *testing.T) {

 	for _, tc := range cases {
 		parser := Qwen3VLParser{hasThinkingSupport: tc.hasThinking}
-		parser.Init(nil, tc.last, nil)
+		parser.Init(nil, tc.last)
 		if parser.state != tc.wantState {
 			t.Errorf("%s: got state %v, want %v", tc.desc, parser.state, tc.wantState)
 		}
@@ -437,7 +437,7 @@ func TestQwen3VLThinkingParserWithThinkingPrefill(t *testing.T) {
 	for _, tc := range cases {
 		t.Run(tc.desc, func(t *testing.T) {
 			parser := Qwen3VLParser{hasThinkingSupport: true}
-			parser.Init([]api.Tool{}, last, nil)
+			parser.Init([]api.Tool{}, last)

 			for i, step := range tc.steps {
 				parser.buffer.WriteString(step.input)
@@ -500,7 +500,7 @@ func TestQwen3VLThinkingParserWithNonThinkingPrefill(t *testing.T) {
 	for _, tc := range cases {
 		t.Run(tc.desc, func(t *testing.T) {
 			parser := Qwen3VLParser{hasThinkingSupport: true}
-			parser.Init([]api.Tool{}, last, nil)
+			parser.Init([]api.Tool{}, last)

 			for i, step := range tc.steps {
 				parser.buffer.WriteString(step.input)
@@ -523,7 +523,7 @@ func TestQwen3VLThinkingParserStreamingAssistantPrefillContent(t *testing.T) {
 	// last message is assistant with content ⇒ start in CollectingContent
 	last := &api.Message{Role: "assistant", Content: "has content"}
 	parser := Qwen3VLParser{hasThinkingSupport: true}
-	parser.Init([]api.Tool{}, last, nil)
+	parser.Init([]api.Tool{}, last)

 	type step struct {
 		input      string
@@ -750,7 +750,7 @@ func TestQwen3VLThinkingWhitespaceHandling(t *testing.T) {

 		t.Run(tc.desc, func(t *testing.T) {
 			parser := Qwen3VLParser{hasThinkingSupport: true}
-			parser.Init([]api.Tool{}, nil, nil)
+			parser.Init([]api.Tool{}, nil)

 			for i, step := range tc.steps {
 				parser.buffer.WriteString(step.input)
@@ -859,7 +859,7 @@ func TestQwen3VLToolCallWhitespaceHandling(t *testing.T) {

 		t.Run(tc.desc, func(t *testing.T) {
 			parser := Qwen3VLParser{hasThinkingSupport: true}
-			parser.Init([]api.Tool{}, tc.prefillMsg, nil)
+			parser.Init([]api.Tool{}, tc.prefillMsg)

 			for i, step := range tc.steps {
 				parser.buffer.WriteString(step.input)
--- a/server/routes.go
+++ b/server/routes.go
@@ -340,7 +340,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 		builtinParser = parsers.ParserForName(m.Config.Parser)
 		if builtinParser != nil {
 			// no tools or last message for generate endpoint
-			builtinParser.Init(nil, nil, req.Think)
+			builtinParser.Init(nil, nil)
 		}
 	}

@@ -2051,7 +2051,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
 				lastMessage = &msgs[len(msgs)-1]
 			}
 			// Initialize parser and get processed tools
-			processedTools = builtinParser.Init(req.Tools, lastMessage, req.Think)
+			processedTools = builtinParser.Init(req.Tools, lastMessage)
 		}
 	}
Author	SHA1	Message	Date
Eva Ho	5aee34db9f	fix tests	2025-11-19 15:57:19 -05:00
Eva Ho	32393f11d7	app/ui: add gemini-3-pro-preview to featured list	2025-11-19 14:59:29 -05:00