client: add request signing to the client (#10881 )

If OLLAMA_AUTH is set, sign each request w/ a timestamp and pass the signature in the token header
kvcache: Skip computing causal mask for worst case graph reservation
2025-05-27 16:50:57 -07:00 · 2025-05-27 14:25:15 -07:00 · 2025-05-27 11:28:48 -07:00 · 2025-05-26 18:59:06 -07:00 · 2025-05-26 17:16:00 -07:00 · 2025-05-26 12:05:03 -07:00
36 changed files with 1987 additions and 354 deletions
--- a/README.md
+++ b/README.md
@@ -406,6 +406,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [AppFlowy](https://github.com/AppFlowy-IO/AppFlowy) (AI collaborative workspace with Ollama, cross-platform and self-hostable)
 - [Lumina](https://github.com/cushydigit/lumina.git) (A lightweight, minimal React.js frontend for interacting with Ollama servers)
 - [Tiny Notepad](https://pypi.org/project/tiny-notepad) (A lightweight, notepad-like interface to chat with ollama available on PyPI)
+- [macLlama (macOS native)](https://github.com/hellotunamayo/macLlama) (A native macOS GUI application for interacting with Ollama models, featuring a chat interface.) 

 ### Cloud

@@ -449,6 +450,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [orbiton](https://github.com/xyproto/orbiton) Configuration-free text editor and IDE with support for tab completion with Ollama.
 - [orca-cli](https://github.com/molbal/orca-cli) Ollama Registry CLI Application - Browse, pull, and download models from Ollama Registry in your terminal.
 - [GGUF-to-Ollama](https://github.com/jonathanhecl/gguf-to-ollama) - Importing GGUF to Ollama made easy (multiplatform)
+- [AWS-Strands-With-Ollama](https://github.com/rapidarchitect/ollama_strands) - AWS Strands Agents with Ollama Examples

 ### Apple Vision Pro

--- a/api/client.go
+++ b/api/client.go
@@ -24,7 +24,10 @@ import (
 	"net/http"
 	"net/url"
 	"runtime"
+	"strconv"
+	"time"

+	"github.com/ollama/ollama/auth"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/version"
@@ -76,6 +79,14 @@ func NewClient(base *url.URL, http *http.Client) *Client {
 	}
 }

+func getAuthorizationToken(ctx context.Context, challenge string) (string, error) {
+	token, err := auth.Sign(ctx, []byte(challenge))
+	if err != nil {
+		return "", err
+	}
+	return token, nil
+}
+
 func (c *Client) do(ctx context.Context, method, path string, reqData, respData any) error {
 	var reqBody io.Reader
 	var data []byte
@@ -97,6 +108,21 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
 	}

 	requestURL := c.base.JoinPath(path)
+
+	var token string
+	if envconfig.UseAuth() || c.base.Hostname() == "ollama.com" {
+		now := strconv.FormatInt(time.Now().Unix(), 10)
+		chal := fmt.Sprintf("%s,%s?ts=%s", method, path, now)
+		token, err = getAuthorizationToken(ctx, chal)
+		if err != nil {
+			return err
+		}
+
+		q := requestURL.Query()
+		q.Set("ts", now)
+		requestURL.RawQuery = q.Encode()
+	}
+
 	request, err := http.NewRequestWithContext(ctx, method, requestURL.String(), reqBody)
 	if err != nil {
 		return err
@@ -106,6 +132,10 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
 	request.Header.Set("Accept", "application/json")
 	request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))

+	if token != "" {
+		request.Header.Set("Authorization", token)
+	}
+
 	respObj, err := c.http.Do(request)
 	if err != nil {
 		return err
@@ -143,6 +173,22 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 	}

 	requestURL := c.base.JoinPath(path)
+
+	var token string
+	if envconfig.UseAuth() || c.base.Hostname() == "ollama.com" {
+		var err error
+		now := strconv.FormatInt(time.Now().Unix(), 10)
+		chal := fmt.Sprintf("%s,%s?ts=%s", method, path, now)
+		token, err = getAuthorizationToken(ctx, chal)
+		if err != nil {
+			return err
+		}
+
+		q := requestURL.Query()
+		q.Set("ts", now)
+		requestURL.RawQuery = q.Encode()
+	}
+
 	request, err := http.NewRequestWithContext(ctx, method, requestURL.String(), buf)
 	if err != nil {
 		return err
@@ -152,6 +198,10 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 	request.Header.Set("Accept", "application/x-ndjson")
 	request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))

+	if token != "" {
+		request.Header.Set("Authorization", token)
+	}
+
 	response, err := c.http.Do(request)
 	if err != nil {
 		return err
--- a/docs/import.md
+++ b/docs/import.md
@@ -132,22 +132,12 @@ success

 ### Supported Quantizations

- `q4_0`
- `q4_1`
- `q5_0`
- `q5_1`
 - `q8_0`

 #### K-means Quantizations

- `q3_K_S`
- `q3_K_M`
- `q3_K_L`
 - `q4_K_S`
 - `q4_K_M`
- `q5_K_S`
- `q5_K_M`
- `q6_K`


 ## Sharing your model on ollama.com
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -183,6 +183,8 @@ var (
 	NewEngine = Bool("OLLAMA_NEW_ENGINE")
 	// ContextLength sets the default context length
 	ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096)
+	// Auth enables authentication between the Ollama client and server
+	UseAuth = Bool("OLLAMA_AUTH")
 )

 func String(s string) func() string {
--- a/integration/testdata/embed.json
+++ b/integration/testdata/embed.json
--- a/kvcache/causal.go
+++ b/kvcache/causal.go
@@ -30,6 +30,11 @@ type Causal struct {

 	// ** current forward pass **

+	// curReserve indicates that this forward pass is only for
+	// memory reservation and we should not update our metadata
+	// based on it.
+	curReserve bool
+
 	// the active layer for Get and Put
 	curLayer int

@@ -159,12 +164,13 @@ func (c *Causal) Close() {
 }

 func (c *Causal) StartForward(ctx ml.Context, batch input.Batch, reserve bool) error {
+	c.curReserve = reserve
 	c.curBatchSize = len(batch.Positions)
 	c.curSequences = batch.Sequences
 	c.curPositions = batch.Positions
 	c.opts.Except = nil

-	if !reserve {
+	if !c.curReserve {
 		c.updateSlidingWindow()

 		var err error
@@ -304,6 +310,11 @@ func (c *Causal) buildMask(ctx ml.Context) ml.Tensor {
 	c.curCellRange.max = roundUp(c.curCellRange.max+1, c.config.CachePadding) - 1

 	length := c.curCellRange.max - c.curCellRange.min + 1
+
+	if c.curReserve {
+		return ctx.Input().Empty(c.config.MaskDType, length, batchSize)
+	}
+
 	mask := make([]float32, batchSize*length)

 	for i := range c.curBatchSize {
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -5,6 +5,7 @@ import (
 	"context"
 	"encoding/binary"
 	"fmt"
+	"log/slog"
 	"math"
 	"slices"
 	"strconv"
@@ -133,6 +134,27 @@ type DeviceMemory struct {
 	Graph Memory
 }

+func memoryPresent(mem []Memory) bool {
+	return slices.ContainsFunc(mem, func(m Memory) bool { return m.Size != 0 })
+}
+
+func (m DeviceMemory) LogValue() slog.Value {
+	var attrs []slog.Attr
+	if memoryPresent(m.Weights) {
+		attrs = append(attrs, slog.Any("Weights", m.Weights))
+	}
+
+	if memoryPresent(m.Cache) {
+		attrs = append(attrs, slog.Any("Cache", m.Cache))
+	}
+
+	if m.Graph.Size != 0 {
+		attrs = append(attrs, slog.Any("Graph", m.Graph))
+	}
+
+	return slog.GroupValue(attrs...)
+}
+
 // BackendMemory provides the amount of memory required to load the model
 // per device based on the BackendParams. In some cases, not all required
 // allocations will be known at this point. However, the size of the most recent
@@ -150,6 +172,20 @@ type BackendMemory struct {
 	GPUs []DeviceMemory
 }

+func (m BackendMemory) LogValue() slog.Value {
+	var attrs []slog.Attr
+	if m.InputWeights.Size != 0 {
+		attrs = append(attrs, slog.Any("InputWeights", m.InputWeights))
+	}
+
+	attrs = append(attrs, slog.Any(m.CPU.Name, m.CPU))
+	for _, g := range m.GPUs {
+		attrs = append(attrs, slog.Any(g.Name, g))
+	}
+
+	return slog.GroupValue(attrs...)
+}
+
 var backends = make(map[string]func(string, BackendParams) (Backend, error))

 func RegisterBackend(name string, f func(string, BackendParams) (Backend, error)) {
--- a/server/download.go
+++ b/server/download.go
@@ -464,6 +464,10 @@ type downloadOpts struct {

 // downloadBlob downloads a blob from the registry and stores it in the blobs directory
 func downloadBlob(ctx context.Context, opts downloadOpts) (cacheHit bool, _ error) {
+	if opts.digest == "" {
+		return false, fmt.Errorf(("%s: %s"), opts.mp.GetNamespaceRepository(), "digest is is empty")
+	}
+
 	fp, err := GetBlobsPath(opts.digest)
 	if err != nil {
 		return false, err
--- a/server/model.go
+++ b/server/model.go
@@ -10,9 +10,6 @@ import (
 	"log/slog"
 	"net/http"
 	"os"
-	"slices"
-	"strings"
-	"text/template/parse"

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/fs/ggml"
@@ -128,124 +125,3 @@ func detectContentType(r io.Reader) (string, error) {

 	return "unknown", nil
 }
-
-func parseObjects(s string) []map[string]any {
-	var objs []map[string]any
-	for offset := 0; offset < len(s); {
-		var obj map[string]any
-		decoder := json.NewDecoder(strings.NewReader(s[offset:]))
-		if err := decoder.Decode(&obj); errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
-			break
-		} else if syntax := &(json.SyntaxError{}); errors.As(err, &syntax) {
-			// skip over any syntax errors
-			offset += int(syntax.Offset)
-		} else if unmarshalType := &(json.UnmarshalTypeError{}); errors.As(err, &unmarshalType) {
-			// skip over any unmarshalable types
-			offset += int(unmarshalType.Offset)
-		} else if err != nil {
-			return nil
-		} else {
-			offset += int(decoder.InputOffset())
-			objs = append(objs, obj)
-		}
-	}
-
-	return objs
-}
-
-// parseToolCalls attempts to parse a JSON string into a slice of ToolCalls.
-// mxyng: this only really works if the input contains tool calls in some JSON format
-func (m *Model) parseToolCalls(s string) ([]api.ToolCall, bool) {
-	// create a subtree from the node that ranges over .ToolCalls
-	tmpl := m.Template.Subtree(func(n parse.Node) bool {
-		if t, ok := n.(*parse.RangeNode); ok {
-			return slices.Contains(template.Identifiers(t.Pipe), "ToolCalls")
-		}
-
-		return false
-	})
-
-	if tmpl == nil {
-		return nil, false
-	}
-
-	var b bytes.Buffer
-	if err := tmpl.Execute(&b, map[string][]api.ToolCall{
-		"ToolCalls": {
-			{
-				Function: api.ToolCallFunction{
-					Name: "@@name@@",
-					Arguments: api.ToolCallFunctionArguments{
-						"@@argument@@": 1,
-					},
-				},
-			},
-		},
-	}); err != nil {
-		return nil, false
-	}
-
-	templateObjects := parseObjects(b.String())
-	if len(templateObjects) == 0 {
-		return nil, false
-	}
-
-	// find the keys that correspond to the name and arguments fields
-	var name, arguments string
-	for k, v := range templateObjects[0] {
-		switch v.(type) {
-		case string:
-			name = k
-		case map[string]any:
-			arguments = k
-		}
-	}
-
-	if name == "" || arguments == "" {
-		return nil, false
-	}
-
-	responseObjects := parseObjects(s)
-	if len(responseObjects) == 0 {
-		return nil, false
-	}
-
-	// collect all nested objects
-	var collect func(any) []map[string]any
-	collect = func(obj any) (all []map[string]any) {
-		switch o := obj.(type) {
-		case map[string]any:
-			all = append(all, o)
-			for _, v := range o {
-				all = append(all, collect(v)...)
-			}
-		case []any:
-			for _, v := range o {
-				all = append(all, collect(v)...)
-			}
-		}
-
-		return all
-	}
-
-	var objs []map[string]any
-	for _, p := range responseObjects {
-		objs = append(objs, collect(p)...)
-	}
-
-	var toolCalls []api.ToolCall
-	for _, kv := range objs {
-		n, nok := kv[name].(string)
-		a, aok := kv[arguments].(map[string]any)
-		if nok && aok {
-			toolCalls = append(toolCalls, api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      n,
-					Arguments: a,
-				},
-			})
-		}
-	}
-
-	return toolCalls, len(toolCalls) > 0
-}
--- a/server/model_test.go
+++ b/server/model_test.go
@@ -1,179 +0,0 @@
-package server
-
-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"os"
-	"path/filepath"
-	"testing"
-
-	"github.com/google/go-cmp/cmp"
-
-	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/template"
-)
-
-func readFile(t *testing.T, base, name string) *bytes.Buffer {
-	t.Helper()
-
-	bts, err := os.ReadFile(filepath.Join(base, name))
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	return bytes.NewBuffer(bts)
-}
-
-func TestExecuteWithTools(t *testing.T) {
-	p := filepath.Join("testdata", "tools")
-	cases := []struct {
-		model  string
-		output string
-		ok     bool
-	}{
-		{"mistral", `[TOOL_CALLS]  [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`, true},
-		{"mistral", `[TOOL_CALLS]  [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]
-
-The temperature in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.`, true},
-		{"mistral", `[TOOL_CALLS]  [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"To }]`, false},
-		{"mistral", `I'm not aware of that information. However, I can suggest searching for the weather using the "get_current_weather" function:
-
-		[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`, true},
-		{"mistral", " The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.", false},
-		{"command-r-plus", "Action: ```json" + `
-[
-    {
-        "tool_name": "get_current_weather",
-        "parameters": {
-            "format": "fahrenheit",
-            "location": "San Francisco, CA"
-        }
-    },
-    {
-        "tool_name": "get_current_weather",
-        "parameters": {
-            "format": "celsius",
-            "location": "Toronto, Canada"
-        }
-    }
-]
-` + "```", true},
-		{"command-r-plus", " The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.", false},
-		{"firefunction", ` functools[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`, true},
-		{"firefunction", " The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.", false},
-		{"llama3-groq-tool-use", `<tool_call>
-{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}
-{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}
-</tool_call>`, true},
-		{"xlam", `{"tool_calls": [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]}`, true},
-		{"nemotron", `<toolcall>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]} </toolcall>`, true},
-	}
-
-	var tools []api.Tool
-	if err := json.Unmarshal(readFile(t, p, "tools.json").Bytes(), &tools); err != nil {
-		t.Fatal(err)
-	}
-
-	var messages []api.Message
-	if err := json.Unmarshal(readFile(t, p, "messages.json").Bytes(), &messages); err != nil {
-		t.Fatal(err)
-	}
-
-	calls := []api.ToolCall{
-		{
-			Function: api.ToolCallFunction{
-				Name: "get_current_weather",
-				Arguments: api.ToolCallFunctionArguments{
-					"format":   "fahrenheit",
-					"location": "San Francisco, CA",
-				},
-			},
-		},
-		{
-			Function: api.ToolCallFunction{
-				Name: "get_current_weather",
-				Arguments: api.ToolCallFunctionArguments{
-					"format":   "celsius",
-					"location": "Toronto, Canada",
-				},
-			},
-		},
-	}
-
-	for _, tt := range cases {
-		t.Run(tt.model, func(t *testing.T) {
-			tmpl, err := template.Parse(readFile(t, p, fmt.Sprintf("%s.gotmpl", tt.model)).String())
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			t.Run("template", func(t *testing.T) {
-				var actual bytes.Buffer
-				if err := tmpl.Execute(&actual, template.Values{Tools: tools, Messages: messages}); err != nil {
-					t.Fatal(err)
-				}
-
-				if diff := cmp.Diff(actual.String(), readFile(t, p, fmt.Sprintf("%s.out", tt.model)).String()); diff != "" {
-					t.Errorf("mismatch (-got +want):\n%s", diff)
-				}
-			})
-
-			t.Run("parse", func(t *testing.T) {
-				m := &Model{Template: tmpl}
-				actual, ok := m.parseToolCalls(tt.output)
-				if ok != tt.ok {
-					t.Fatalf("expected %t, got %t", tt.ok, ok)
-				}
-
-				if tt.ok {
-					if diff := cmp.Diff(actual, calls); diff != "" {
-						t.Errorf("mismatch (-got +want):\n%s", diff)
-					}
-				}
-			})
-		})
-	}
-}
-
-func TestParseObjects(t *testing.T) {
-	tests := []struct {
-		input string
-		want  []map[string]any
-	}{
-		{
-			input: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
-			want: []map[string]any{
-				{"name": "get_current_weather", "arguments": map[string]any{"format": "fahrenheit", "location": "San Francisco, CA"}},
-				{"name": "get_current_weather", "arguments": map[string]any{"format": "celsius", "location": "Toronto, Canada"}},
-			},
-		},
-		{
-			input: `<toolcall>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </toolcall>`,
-			want: []map[string]any{
-				{"name": "get_current_weather", "arguments": map[string]any{"format": "fahrenheit", "location": "San Francisco, CA"}},
-			},
-		},
-		{
-			input: `<toolcall>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </toolcall> <toolcall>{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, ON"}} </toolcall>`,
-			want: []map[string]any{
-				{"name": "get_current_weather", "arguments": map[string]any{"format": "fahrenheit", "location": "San Francisco, CA"}},
-				{"name": "get_current_weather", "arguments": map[string]any{"format": "celsius", "location": "Toronto, ON"}},
-			},
-		},
-		{
-			input: `{"name": "get_current_weather", "arguments": `,
-			want:  nil,
-		},
-	}
-
-	for _, tc := range tests {
-		t.Run(tc.input, func(t *testing.T) {
-			got := parseObjects(tc.input)
-
-			if diff := cmp.Diff(got, tc.want); diff != "" {
-				t.Errorf("mismatch (-got +want):\n%s", diff)
-			}
-		})
-	}
-}
--- a/server/modelpath.go
+++ b/server/modelpath.go
@@ -116,7 +116,7 @@ func (mp ModelPath) BaseURL() *url.URL {
 func GetManifestPath() (string, error) {
 	path := filepath.Join(envconfig.Models(), "manifests")
 	if err := os.MkdirAll(path, 0o755); err != nil {
-		return "", err
+		return "", fmt.Errorf("%w: ensure path elements are traversable", err)
 	}

 	return path, nil
@@ -139,7 +139,7 @@ func GetBlobsPath(digest string) (string, error) {
 	}

 	if err := os.MkdirAll(dirPath, 0o755); err != nil {
-		return "", err
+		return "", fmt.Errorf("%w: ensure path elements are traversable", err)
 	}

 	return path, nil
--- a/server/routes.go
+++ b/server/routes.go
@@ -38,6 +38,7 @@ import (
 	"github.com/ollama/ollama/server/internal/client/ollama"
 	"github.com/ollama/ollama/server/internal/registry"
 	"github.com/ollama/ollama/template"
+	"github.com/ollama/ollama/tools"
 	"github.com/ollama/ollama/types/errtypes"
 	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
@@ -1482,11 +1483,20 @@ func (s *Server) ChatHandler(c *gin.Context) {
 		return
 	}

+	var toolParser *tools.Parser
+	if len(req.Tools) > 0 {
+		toolParser, err = tools.NewParser(m.Template.Template)
+		if err != nil {
+			slog.Error("failed to create tool parser", "error", err)
+			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+			return
+		}
+	}
+
 	ch := make(chan any)
 	go func() {
 		defer close(ch)
-		var sb strings.Builder
-		var toolCallIndex int = 0
+
 		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
 			Prompt:  prompt,
 			Images:  images,
@@ -1512,37 +1522,21 @@ func (s *Server) ChatHandler(c *gin.Context) {
 				res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
 			}

-			// TODO: tool call checking and filtering should be moved outside of this callback once streaming
-			// however this was a simple change for now without reworking streaming logic of this (and other)
-			// handlers
-			if req.Stream != nil && !*req.Stream || len(req.Tools) == 0 {
-				ch <- res
-				return
-			}
-
-			// Streaming tool calls:
-			// If tools are recognized, use a flag to track the sending of a tool downstream
-			// This ensures that content is cleared from the message on the last chunk sent
-			sb.WriteString(r.Content)
-			if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
-				res.Message.ToolCalls = toolCalls
-				for i := range toolCalls {
-					toolCalls[i].Function.Index = toolCallIndex
-					toolCallIndex++
+			if len(req.Tools) > 0 {
+				toolCalls, content := toolParser.Add(r.Content)
+				if len(content) > 0 {
+					res.Message.Content = content
+				} else if len(toolCalls) > 0 {
+					res.Message.ToolCalls = toolCalls
+					res.Message.Content = ""
+				} else {
+					if r.Done {
+						ch <- res
+					}
+					return
 				}
-				res.Message.Content = ""
-				sb.Reset()
-				ch <- res
-				return
-			}
-
-			if r.Done {
-				// Send any remaining content if no tool calls were detected
-				if toolCallIndex == 0 {
-					res.Message.Content = sb.String()
-				}
-				ch <- res
 			}
+			ch <- res
 		}); err != nil {
 			ch <- gin.H{"error": err.Error()}
 		}
@@ -1551,11 +1545,15 @@ func (s *Server) ChatHandler(c *gin.Context) {
 	if req.Stream != nil && !*req.Stream {
 		var resp api.ChatResponse
 		var sb strings.Builder
+		var toolCalls []api.ToolCall
 		for rr := range ch {
 			switch t := rr.(type) {
 			case api.ChatResponse:
 				sb.WriteString(t.Message.Content)
 				resp = t
+				if len(req.Tools) > 0 {
+					toolCalls = append(toolCalls, t.Message.ToolCalls...)
+				}
 			case gin.H:
 				msg, ok := t["error"].(string)
 				if !ok {
@@ -1571,12 +1569,8 @@ func (s *Server) ChatHandler(c *gin.Context) {
 		}

 		resp.Message.Content = sb.String()
-
-		if len(req.Tools) > 0 {
-			if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
-				resp.Message.ToolCalls = toolCalls
-				resp.Message.Content = ""
-			}
+		if len(toolCalls) > 0 {
+			resp.Message.ToolCalls = toolCalls
 		}

 		c.JSON(http.StatusOK, resp)
--- a/server/testdata/tools/command-r-plus.gotmpl
+++ b/server/testdata/tools/command-r-plus.gotmpl
--- a/server/testdata/tools/command-r-plus.out
+++ b/server/testdata/tools/command-r-plus.out
--- a/server/testdata/tools/firefunction.gotmpl
+++ b/server/testdata/tools/firefunction.gotmpl
--- a/server/testdata/tools/firefunction.out
+++ b/server/testdata/tools/firefunction.out
--- a/server/testdata/tools/llama3-groq-tool-use.gotmpl
+++ b/server/testdata/tools/llama3-groq-tool-use.gotmpl
--- a/server/testdata/tools/llama3-groq-tool-use.out
+++ b/server/testdata/tools/llama3-groq-tool-use.out
--- a/tools/testdata/llama3.2.gotmpl
+++ b/tools/testdata/llama3.2.gotmpl
@@ -0,0 +1,44 @@
+<|start_header_id|>system<|end_header_id|>
+
+Cutting Knowledge Date: December 2023
+
+{{ if .System }}{{ .System }}
+{{- end }}
+{{- if .Tools }}When you receive a tool call response, use the output to format an answer to the orginal user question.
+
+You are a helpful assistant with tool calling capabilities.
+{{- end }}<|eot_id|>
+{{- range $i, $_ := .Messages }}
+{{- $last := eq (len (slice $.Messages $i)) 1 }}
+{{- if eq .Role "user" }}<|start_header_id|>user<|end_header_id|>
+{{- if and $.Tools $last }}
+
+Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.
+
+Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables.
+
+{{ range $.Tools }}
+{{- . }}
+{{ end }}
+{{ .Content }}<|eot_id|>
+{{- else }}
+
+{{ .Content }}<|eot_id|>
+{{- end }}{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
+
+{{ end }}
+{{- else if eq .Role "assistant" }}<|start_header_id|>assistant<|end_header_id|>
+{{- if .ToolCalls }}
+{{ range .ToolCalls }}
+{"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }}}{{ end }}
+{{- else }}
+
+{{ .Content }}
+{{- end }}{{ if not $last }}<|eot_id|>{{ end }}
+{{- else if eq .Role "tool" }}<|start_header_id|>ipython<|end_header_id|>
+
+{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
+
+{{ end }}
+{{- end }}
+{{- end }}
--- a/tools/testdata/llama3.2.out
+++ b/tools/testdata/llama3.2.out
@@ -0,0 +1,24 @@
+<|start_header_id|>system<|end_header_id|>
+
+Cutting Knowledge Date: December 2023
+
+You are a knowledgeable assistant. You can answer questions and perform tasks.When you receive a tool call response, use the output to format an answer to the orginal user question.
+
+You are a helpful assistant with tool calling capabilities.<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+What's the weather like today in Paris?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+{"name": "get_current_weather", "parameters": {"format":"celsius","location":"Paris, France"}}<|eot_id|><|start_header_id|>ipython<|end_header_id|>
+
+22<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+The current temperature in Paris, France is 22 degrees Celsius.<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.
+
+Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables.
+
+{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}
+
+What's the weather like today in San Francisco and Toronto?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
--- a/server/testdata/tools/messages.json
+++ b/server/testdata/tools/messages.json
--- a/server/testdata/tools/mistral.gotmpl
+++ b/server/testdata/tools/mistral.gotmpl
--- a/server/testdata/tools/mistral.out
+++ b/server/testdata/tools/mistral.out
--- a/server/testdata/tools/nemotron.gotmpl
+++ b/server/testdata/tools/nemotron.gotmpl
--- a/server/testdata/tools/nemotron.out
+++ b/server/testdata/tools/nemotron.out
--- a/tools/testdata/qwen2.5.gotmpl
+++ b/tools/testdata/qwen2.5.gotmpl
@@ -0,0 +1,51 @@
+{{- if .Suffix }}<|fim_prefix|>{{ .Prompt }}<|fim_suffix|>{{ .Suffix }}<|fim_middle|>
+{{- else if .Messages }}
+{{- if or .System .Tools }}<|im_start|>system
+{{- if .System }}
+{{ .System }}
+{{- end }}
+{{- if .Tools }}
+
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{{- range .Tools }}
+{"type": "function", "function": {{ .Function }}}
+{{- end }}
+</tools>
+
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call>
+{{- end }}<|im_end|>
+{{ end }}
+{{- range $i, $_ := .Messages }}
+{{- $last := eq (len (slice $.Messages $i)) 1 -}}
+{{- if eq .Role "user" }}<|im_start|>user
+{{ .Content }}<|im_end|>
+{{ else if eq .Role "assistant" }}<|im_start|>assistant
+{{ if .Content }}{{ .Content }}
+{{- else if .ToolCalls }}<tool_call>
+{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
+{{ end }}</tool_call>
+{{- end }}{{ if not $last }}<|im_end|>
+{{ end }}
+{{- else if eq .Role "tool" }}<|im_start|>user
+<tool_response>
+{{ .Content }}
+</tool_response><|im_end|>
+{{ end }}
+{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
+{{ end }}
+{{- end }}
+{{- else }}
+{{- if .System }}<|im_start|>system
+{{ .System }}<|im_end|>
+{{ end }}{{ if .Prompt }}<|im_start|>user
+{{ .Prompt }}<|im_end|>
+{{ end }}<|im_start|>assistant
+{{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}
--- a/tools/testdata/qwen2.5.out
+++ b/tools/testdata/qwen2.5.out
@@ -0,0 +1,31 @@
+<|im_start|>system
+You are a knowledgeable assistant. You can answer questions and perform tasks.
+
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{"type": "function", "function": {"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}
+</tools>
+
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call><|im_end|>
+<|im_start|>user
+What's the weather like today in Paris?<|im_end|>
+<|im_start|>assistant
+<tool_call>
+{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}
+</tool_call><|im_end|>
+<|im_start|>user
+<tool_response>
+22
+</tool_response><|im_end|>
+<|im_start|>assistant
+The current temperature in Paris, France is 22 degrees Celsius.<|im_end|>
+<|im_start|>user
+What's the weather like today in San Francisco and Toronto?<|im_end|>
+<|im_start|>assistant
--- a/tools/testdata/qwen3.gotmpl
+++ b/tools/testdata/qwen3.gotmpl
@@ -0,0 +1,50 @@
+{{- if .Messages }}
+{{- if or .System .Tools }}<|im_start|>system
+{{- if .System }}
+{{ .System }}
+{{- end }}
+{{- if .Tools }}
+
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{{- range .Tools }}
+{"type": "function", "function": {{ .Function }}}
+{{- end }}
+</tools>
+
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call>
+{{- end }}<|im_end|>
+{{ end }}
+{{- range $i, $_ := .Messages }}
+{{- $last := eq (len (slice $.Messages $i)) 1 -}}
+{{- if eq .Role "user" }}<|im_start|>user
+{{ .Content }}<|im_end|>
+{{ else if eq .Role "assistant" }}<|im_start|>assistant
+{{ if .Content }}{{ .Content }}
+{{- else if .ToolCalls }}<tool_call>
+{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
+{{ end }}</tool_call>
+{{- end }}{{ if not $last }}<|im_end|>
+{{ end }}
+{{- else if eq .Role "tool" }}<|im_start|>user
+<tool_response>
+{{ .Content }}
+</tool_response><|im_end|>
+{{ end }}
+{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
+{{ end }}
+{{- end }}
+{{- else }}
+{{- if .System }}<|im_start|>system
+{{ .System }}<|im_end|>
+{{ end }}{{ if .Prompt }}<|im_start|>user
+{{ .Prompt }}<|im_end|>
+{{ end }}<|im_start|>assistant
+{{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}
--- a/tools/testdata/qwen3.out
+++ b/tools/testdata/qwen3.out
@@ -0,0 +1,31 @@
+<|im_start|>system
+You are a knowledgeable assistant. You can answer questions and perform tasks.
+
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{"type": "function", "function": {"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}
+</tools>
+
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call><|im_end|>
+<|im_start|>user
+What's the weather like today in Paris?<|im_end|>
+<|im_start|>assistant
+<tool_call>
+{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}
+</tool_call><|im_end|>
+<|im_start|>user
+<tool_response>
+22
+</tool_response><|im_end|>
+<|im_start|>assistant
+The current temperature in Paris, France is 22 degrees Celsius.<|im_end|>
+<|im_start|>user
+What's the weather like today in San Francisco and Toronto?<|im_end|>
+<|im_start|>assistant
--- a/server/testdata/tools/tools.json
+++ b/server/testdata/tools/tools.json
--- a/server/testdata/tools/xlam.gotmpl
+++ b/server/testdata/tools/xlam.gotmpl
--- a/server/testdata/tools/xlam.out
+++ b/server/testdata/tools/xlam.out
--- a/tools/tools.go
+++ b/tools/tools.go
@@ -0,0 +1,253 @@
+package tools
+
+import (
+	"encoding/json"
+	"errors"
+	"log/slog"
+	"strings"
+	gotmpl "text/template"
+
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/template"
+)
+
+var (
+	errInvalidToolCall = errors.New("invalid tool call format")
+	errAccumulateMore  = errors.New("need to accumulate more content")
+)
+
+type Parser struct {
+	greedyParseJSON bool
+	prefix          string
+	prefixFound     bool
+	tmpl            gotmpl.Template
+	sb              strings.Builder
+	index           int
+	name            string
+	arguments       string
+}
+
+// parseJSONToolCalls attempts to parse a JSON string into a slice of ToolCalls.
+//
+// Parameters:
+//   - s: The string to parse
+//   - name: The field name from template that identifies the tool call name
+//   - arguments: The field name from template that identifies the tool call arguments
+//
+// Returns:
+//   - []api.ToolCall: The parsed tool calls if successful
+//   - error: ErrAccumulateMore if braces unbalanced, ErrInvalidToolCall if invalid, or nil if successful
+func parseJSONToolCalls(s string, name, arguments string, prefix string) ([]api.ToolCall, error) {
+	// Check for balanced braces before attempting to parse
+	braceCount := 0
+	squareCount := 0
+	startIndex := -1
+	var rawToolCalls []string
+	s = strings.TrimSpace(s)
+
+	// Only track these if we don't have a prefix as it will be cut off from the prefix. Also track in the parseLeadingJSON case.
+	trackSquareBrackets := prefix == "" || !strings.HasSuffix(prefix, "[") || strings.HasPrefix(s, "[")
+	for i, c := range s {
+		switch c {
+		case '{':
+			braceCount++
+			if startIndex == -1 {
+				startIndex = i
+			}
+		case '}':
+			braceCount--
+			if braceCount == 0 {
+				rawToolCalls = append(rawToolCalls, s[startIndex:i+1])
+				startIndex = -1
+			}
+		case '[':
+			if trackSquareBrackets {
+				squareCount++
+			}
+		case ']':
+			if trackSquareBrackets {
+				squareCount--
+			}
+		}
+
+		// Negative means we have an extra closing brace/bracket
+		if braceCount < 0 || squareCount < 0 {
+			return nil, errInvalidToolCall
+		}
+	}
+
+	// If braces/brackets aren't balanced, need more input
+	if braceCount > 0 || squareCount > 0 {
+		return nil, errAccumulateMore
+	}
+
+	t := strings.TrimSpace(s)
+	if len(t) == 0 {
+		return nil, errAccumulateMore
+	}
+	// If the input is a single square bracket, it's not a valid tool call
+	if t[0] == '[' && len(t) == 1 {
+		return nil, errAccumulateMore
+	}
+
+	// Attempt full unmarshal of the JSON
+	var toolCalls []api.ToolCall
+	for _, rawToolCall := range rawToolCalls {
+		var resp map[string]any
+		if err := json.Unmarshal([]byte(rawToolCall), &resp); err != nil {
+			continue
+		}
+
+		// Collect nested objects that could contain tool calls
+		objs := collect(resp)
+		if len(objs) == 0 {
+			continue
+		}
+
+		// Extract tool calls from objects
+		for _, kv := range objs {
+			n, nok := kv[name].(string)
+			a, aok := kv[arguments].(map[string]any)
+			if nok && aok {
+				toolCalls = append(toolCalls, api.ToolCall{
+					Function: api.ToolCallFunction{
+						Name:      n,
+						Arguments: a,
+					},
+				})
+			} else {
+				slog.Debug("No valid tool call found in object.", "object", kv)
+			}
+		}
+	}
+
+	// Valid JSON, no tool calls found
+	if len(toolCalls) == 0 {
+		slog.Debug("No valid tool calls found in any raw tool calls.", "rawToolCalls", rawToolCalls)
+		return nil, errInvalidToolCall
+	}
+
+	return toolCalls, nil
+}
+
+// checkPrefix processes a string to find and handle a prefix pattern.
+//
+// Returns:
+//   - The processed string with prefix removed if found
+//   - error: ErrAccumulateMore if prefix is incomplete, or nil if successful
+func (p *Parser) checkPrefix(s string) (string, error) {
+	if s == "" || p.prefix == "" {
+		return s, nil
+	}
+
+	// Check for prefix at start of string
+	if cut, hasPrefix := strings.CutPrefix(s, p.prefix); hasPrefix {
+		// Found prefix at start - accumulate for potential tool
+		p.prefixFound = true
+		return cut, nil
+	}
+
+	// Check if prefix overlaps end of string
+	if idx := suffixOverlap(s, p.prefix); idx != -1 {
+		// Return everything except overlapping portion
+		p.sb.Reset()
+		p.sb.WriteString(s[idx:])
+		return s[:idx], errAccumulateMore
+	}
+
+	// Check if prefix appears in middle of string
+	if idx := strings.Index(s, p.prefix); idx != -1 {
+		// Save remainder starting at prefix for next pass
+		p.sb.Reset()
+		p.sb.WriteString(strings.TrimSpace(s[idx:]))
+		// Return everything before prefix
+		return s[:idx], errAccumulateMore
+	}
+
+	// No partial prefix found
+	return s, nil
+}
+
+// Add processes a string input to parse tool calls and content.
+// It handles prefix detection and JSON parsing to extract tool calls.
+//
+// Returns:
+//   - tools: Any parsed tool calls
+//   - content: Non-tool call content
+func (p *Parser) Add(s string) (tools []api.ToolCall, content string) {
+	p.sb.WriteString(s)
+	s = p.sb.String()
+
+	// Check for prefix pattern in input
+	s, err := p.checkPrefix(s)
+	if err != nil {
+		// Need more input to complete prefix
+		return nil, s
+	}
+
+	// Exit if prefix exists in template, greedy parsing is off, and prefix not found
+	if !p.greedyParseJSON && !p.prefixFound {
+		p.sb.Reset()
+		return nil, s
+	}
+
+	toolCalls, err := parseJSONToolCalls(s, p.name, p.arguments, p.prefix)
+	if err != nil {
+		if errors.Is(err, errAccumulateMore) {
+			return nil, ""
+		}
+		p.sb.Reset()
+		// Only do greedy JSON parsing if there is no prefix from template
+		if p.prefix != "" {
+			p.greedyParseJSON = false
+		}
+		if p.index != 0 && p.prefix == "" {
+			return nil, ""
+		}
+		if p.prefixFound {
+			// Drop tokens since prefix was found
+			return nil, ""
+		}
+		return nil, s
+	}
+
+	for _, tc := range toolCalls {
+		tc.Function.Index = p.index
+		p.index++
+	}
+
+	p.sb.Reset()
+	return toolCalls, ""
+}
+
+// NewParser creates a new tool call parser from a template. It extracts the tool call format,
+// prefix, and field names from the template to use for parsing tool calls from model output.
+//
+// Returns an error if the template does not contain valid tool call formatting.
+func NewParser(templateToProcess *gotmpl.Template) (*Parser, error) {
+	parsed, err := template.Parse(templateToProcess.Root.String())
+	if err != nil {
+		return nil, err
+	}
+
+	tt, err := toolTemplate(parsed)
+	if err != nil {
+		return nil, err
+	}
+
+	tp := toolPrefix(templateToProcess)
+
+	name, arguments, err := extractToolArgs(tt)
+	if err != nil {
+		return nil, err
+	}
+
+	return &Parser{
+		tmpl:            *tt,
+		sb:              strings.Builder{},
+		prefix:          tp,
+		greedyParseJSON: true,
+		name:            name,
+		arguments:       arguments,
+	}, nil
+}
--- a/tools/tools_test.go
+++ b/tools/tools_test.go
@@ -0,0 +1,673 @@
+package tools
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/template"
+)
+
+func readFile(t *testing.T, base, name string) *bytes.Buffer {
+	t.Helper()
+
+	bts, err := os.ReadFile(filepath.Join(base, name))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	return bytes.NewBuffer(bts)
+}
+
+func TestParseJSONToolCalls(t *testing.T) {
+	tests := []struct {
+		name          string
+		input         string
+		nameField     string
+		argsField     string
+		wantToolCalls []api.ToolCall
+		wantErr       error
+		prefix        string
+	}{
+		{
+			name:      "valid single tool call",
+			input:     `{"name": "test_tool", "arguments": {"arg1": "value1"}}`,
+			nameField: "name",
+			argsField: "arguments",
+			wantToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "test_tool",
+						Arguments: map[string]any{
+							"arg1": "value1",
+						},
+					},
+				},
+			},
+			wantErr: nil,
+			prefix:  "",
+		},
+		{
+			name:          "incomplete JSON",
+			input:         `{"name": "test_tool", "arguments": {"arg1": `,
+			nameField:     "name",
+			argsField:     "arguments",
+			wantToolCalls: nil,
+			wantErr:       errAccumulateMore,
+			prefix:        "",
+		},
+		{
+			name:          "invalid JSON",
+			input:         `not json at all`,
+			nameField:     "name",
+			argsField:     "arguments",
+			wantToolCalls: nil,
+			wantErr:       errInvalidToolCall,
+			prefix:        "",
+		},
+		{
+			name:          "missing required fields",
+			input:         `{"other": "field"}`,
+			nameField:     "name",
+			argsField:     "arguments",
+			wantToolCalls: nil,
+			wantErr:       errInvalidToolCall,
+			prefix:        "",
+		},
+		{
+			name: "multiple tool calls in array",
+			input: `[
+				{"name": "tool1", "arguments": {"arg1": 1}},
+				{"name": "tool2", "arguments": {"arg2": "value"}}
+			]`,
+			nameField: "name",
+			argsField: "arguments",
+			wantToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "tool1",
+						Arguments: map[string]any{
+							"arg1": float64(1),
+						},
+					},
+				},
+				{
+					Function: api.ToolCallFunction{
+						Name: "tool2",
+						Arguments: map[string]any{
+							"arg2": "value",
+						},
+					},
+				},
+			},
+			wantErr: nil,
+			prefix:  "",
+		},
+		{
+			name: "multiple tool calls without array",
+			input: `
+				{"name": "tool1", "arguments": {"arg1": 1}},
+				{"name": "tool2", "arguments": {"arg2": "value"}}
+			`,
+			nameField: "name",
+			argsField: "arguments",
+			wantToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "tool1",
+						Arguments: map[string]any{
+							"arg1": float64(1),
+						},
+					},
+				},
+				{
+					Function: api.ToolCallFunction{
+						Name: "tool2",
+						Arguments: map[string]any{
+							"arg2": "value",
+						},
+					},
+				},
+			},
+			wantErr: nil,
+			prefix:  "",
+		},
+		{
+			name: "multiple tool calls with text after",
+			input: `
+				{"name": "tool1", "arguments": {"arg1": 1}} text
+				{"name": "tool2", "arguments": {"arg2": "value"}} text
+			`,
+			nameField: "name",
+			argsField: "arguments",
+			wantToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "tool1",
+						Arguments: map[string]any{
+							"arg1": float64(1),
+						},
+					},
+				},
+				{
+					Function: api.ToolCallFunction{
+						Name: "tool2",
+						Arguments: map[string]any{
+							"arg2": "value",
+						},
+					},
+				},
+			},
+			wantErr: nil,
+			prefix:  "",
+		},
+		{
+			name: "second tool call in array",
+			input: `
+				, {"name": "tool2", "arguments": {"arg2": "value"}}
+			`,
+			nameField: "name",
+			argsField: "arguments",
+			wantToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "tool2",
+						Arguments: map[string]any{
+							"arg2": "value",
+						},
+					},
+				},
+			},
+			wantErr: nil,
+			prefix:  "",
+		},
+		// a bad JSON would not return any tool calls or content as it would always accumulate more
+		{
+			name:          "unbalanced square brackets",
+			input:         `[{"name": "tool1", "arguments": {"arg1": [1, 2}]`,
+			nameField:     "name",
+			argsField:     "arguments",
+			wantToolCalls: nil,
+			wantErr:       errAccumulateMore,
+			prefix:        "",
+		},
+		{
+			name:          "incomplete square brackets",
+			input:         `[{"name": "tool1", "arguments": {"arg1": [1, 2, 3`,
+			nameField:     "name",
+			argsField:     "arguments",
+			wantToolCalls: nil,
+			wantErr:       errAccumulateMore,
+			prefix:        "",
+		},
+		{
+			name:      "nested arrays in arguments",
+			input:     `{"name": "tool1", "arguments": {"arg1": [1, 2, ["nested", "array"]]}}`,
+			nameField: "name",
+			argsField: "arguments",
+			wantToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "tool1",
+						Arguments: map[string]any{
+							"arg1": []any{float64(1), float64(2), []any{"nested", "array"}},
+						},
+					},
+				},
+			},
+			wantErr: nil,
+			prefix:  "",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			gotCalls, err := parseJSONToolCalls(tt.input, tt.nameField, tt.argsField, tt.prefix)
+
+			if err != tt.wantErr {
+				t.Errorf("parseJSONToolCalls() error = %v, want %v", err, tt.wantErr)
+			}
+
+			if len(gotCalls) != 0 && tt.wantErr != nil {
+				t.Errorf("parseJSONToolCalls() valid = %v, want %v", len(gotCalls) == 0, tt.wantErr == nil)
+			}
+
+			if diff := cmp.Diff(gotCalls, tt.wantToolCalls); diff != "" {
+				t.Errorf("parseJSONToolCalls() tool calls mismatch (-got +want):\n%s", diff)
+			}
+		})
+	}
+}
+
+func TestParseToolCalls(t *testing.T) {
+	p := filepath.Join("testdata")
+	t1 := api.ToolCall{
+		Function: api.ToolCallFunction{
+			Name: "get_current_weather",
+			Arguments: api.ToolCallFunctionArguments{
+				"format":   "fahrenheit",
+				"location": "San Francisco, CA",
+			},
+		},
+	}
+	t2 := api.ToolCall{
+		Function: api.ToolCallFunction{
+			Name: "get_current_weather",
+			Arguments: api.ToolCallFunctionArguments{
+				"format":   "celsius",
+				"location": "Toronto, Canada",
+			},
+		},
+	}
+
+	cases := []struct {
+		name             string
+		model            string
+		output           string
+		expectedToolCall []api.ToolCall
+		expectedTokens   string
+	}{
+		{
+			name:             "mistral malformed json with tool calls prefix",
+			model:            "mistral",
+			output:           `[TOOL_CALLS]  [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_curren}]`,
+			expectedToolCall: []api.ToolCall{t1},
+			expectedTokens:   "",
+		},
+		{
+			name:             "mistral multiple tool calls without prefix",
+			model:            "mistral",
+			output:           `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}} ]`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "",
+		},
+		{
+			name:  "mistral tool calls with text between no prefix",
+			model: "mistral",
+			output: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}] 
+			model outputs more tokens here and then [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   `model outputs more tokens here and then [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
+		},
+		{
+			name:             "mistral valid json with tool calls prefix",
+			model:            "mistral",
+			output:           `[TOOL_CALLS]  [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "",
+		},
+		{
+			name:  "mistral multiple tool calls with text between and prefix",
+			model: "mistral",
+			output: `[TOOL_CALLS]  [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]
+			model outputs more tokens here and then [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
+			expectedToolCall: []api.ToolCall{t1, t2, t1, t2},
+			expectedTokens:   "",
+		},
+		{
+			name:             "mistral incomplete json with tool calls prefix",
+			model:            "mistral",
+			output:           `[TOOL_CALLS]  [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, `,
+			expectedToolCall: []api.ToolCall{},
+			expectedTokens:   "",
+		},
+		{
+			name:  "mistral invalid tool call with explanatory text no prefix",
+			model: "mistral",
+			output: `I'm not aware of that information. However, I can suggest searching for the weather using the "get_current_weather" function:
+
+		[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
+			expectedToolCall: []api.ToolCall{},
+			expectedTokens:   `I'm not aware of that information. However, I can suggest searching for the weather using the "get_current_weather" function: [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
+		},
+		{
+			name:             "mistral tool calls without prefix",
+			model:            "mistral",
+			output:           `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "",
+		},
+		{
+			name:  "command r plus tool calls with json block format",
+			model: "command-r-plus",
+			output: "Action: ```json" + `
+		[
+		    {
+		        "tool_name": "get_current_weather",
+		        "parameters": {
+		            "format": "fahrenheit",
+		            "location": "San Francisco, CA"
+		        }
+		    },
+		    {
+		        "tool_name": "get_current_weather",
+		        "parameters": {
+		            "format": "celsius",
+		            "location": "Toronto, Canada"
+		        }
+		    }
+		]
+		` + "```",
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "",
+		},
+		{
+			name:             "firefunction tool calls with functools prefix",
+			model:            "firefunction",
+			output:           ` functools[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "",
+		},
+		{
+			name:  "llama3 groq single tool call with xml tags",
+			model: "llama3-groq-tool-use",
+			output: `<tool_call>
+		{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}
+		</tool_call>`,
+			expectedToolCall: []api.ToolCall{t1},
+			expectedTokens:   "",
+		},
+		{
+			name:             "xlam tool calls with wrapper object",
+			model:            "xlam",
+			output:           `{"tool_calls": [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]}`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "",
+		},
+		{
+			name:             "qwen2.5 single tool call with prefix",
+			model:            "qwen2.5",
+			output:           `<tool_call>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}</tool_call>`,
+			expectedToolCall: []api.ToolCall{t1},
+			expectedTokens:   "",
+		},
+		{
+			name:             "qwen2.5 multiple tool calls with and without prefix",
+			model:            "qwen2.5",
+			output:           `{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} <tool_call>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}</tool_call> <tool_call>{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}</tool_call>`,
+			expectedToolCall: []api.ToolCall{t1, t1, t2},
+			expectedTokens:   "",
+		},
+		{
+			name:             "qwen2.5 plain text response no tool calls",
+			model:            "qwen2.5",
+			output:           "The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.",
+			expectedToolCall: []api.ToolCall{},
+			expectedTokens:   "The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.",
+		},
+		{
+			name:             "qwen2.5 tool calls with trailing text",
+			model:            "qwen2.5",
+			output:           `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}, {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}] some tokens after call`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "some tokens after call",
+		},
+		{
+			name:             "qwen2.5 tool calls with initial text",
+			model:            "qwen2.5",
+			output:           `some tokens before call [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}, {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
+			expectedToolCall: []api.ToolCall{},
+			expectedTokens:   `some tokens before call [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}, {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
+		},
+		{
+			name:             "qwen2.5 tool calls with prefix and trailing text",
+			model:            "qwen2.5",
+			output:           `<tool_call> [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}, {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}] </tool_call> some tokens after call`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "",
+		},
+		{
+			name:             "qwen2.5 tool calls with prefix and initial text",
+			model:            "qwen2.5",
+			output:           `some tokens before call <tool_call> [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}, {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}] </tool_call>`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "some tokens before call",
+		},
+		{
+			name:             "qwen2.5 tool calls without and with prefix",
+			model:            "qwen2.5",
+			output:           `{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} <tool_call>{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}</tool_call>`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "",
+		},
+		{
+			name:             "qwen2.5 tool calls without and with prefix and text between",
+			model:            "qwen2.5",
+			output:           `{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} some tokens between <tool_call>{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}</tool_call> some tokens after call`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "some tokens between",
+		},
+		{
+			name:             "qwen2.5 tool calls without prefix and invalid tool call with other tokens",
+			model:            "qwen2.5",
+			output:           `hi [{"options": "foo"}]`,
+			expectedToolCall: []api.ToolCall{},
+			expectedTokens:   `hi [{"options": "foo"}]`,
+		},
+		{
+			name:             "qwen2.5 tool calls with prefix and invalid tool call",
+			model:            "qwen2.5",
+			output:           `<tool_call> [{"options": "foo"}] </tool_call> `,
+			expectedToolCall: []api.ToolCall{},
+			expectedTokens:   ``,
+		},
+		{
+			name:             "qwen3 tool call with think prefix and tool prefix (sent as a single token)",
+			model:            "qwen3",
+			output:           `<think>Okay, let me think what tool we should use...</think><tool_call>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}</tool_call>`,
+			expectedToolCall: []api.ToolCall{t1},
+			expectedTokens:   "<think>Okay, let me think what tool we should use...</think>",
+		},
+		{
+			name:             "qwen3 tool call with think prefix, tool prefix, and whitespace (sent as separate tokens)",
+			model:            "qwen3",
+			output:           `<think>Okay, let me think what tool we should use...</think> <tool_call>{ "name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
+			expectedToolCall: []api.ToolCall{t1},
+			expectedTokens:   "<think>Okay, let me think what tool we should use...</think>",
+		},
+		{
+			name:             "qwen3 empty think prefix without tool prefix and invalid tool call",
+			model:            "qwen3",
+			output:           `<think></think> {"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
+			expectedToolCall: []api.ToolCall{},
+			expectedTokens:   `<think></think> {"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
+		},
+		{
+			name:             "qwen3 empty think prefix with tool prefix and valid tool call",
+			model:            "qwen3",
+			output:           `<think></think><tool_call>{ "name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}  </tool_call>`,
+			expectedToolCall: []api.ToolCall{t1},
+			expectedTokens:   `<think></think>`,
+		},
+		{
+			name:             "qwen3 invalid tool call with fake tool prefix (single rune suffix match)",
+			model:            "qwen3",
+			output:           `<think></think>< fakeout {"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
+			expectedToolCall: []api.ToolCall{},
+			expectedTokens:   `<think></think>< fakeout {"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
+		},
+		{
+			name:             "qwen3 invalid tool call with partial tool prefix (multiple rune suffix match)",
+			model:            "qwen3",
+			output:           `<think></think><tool_c fakeout {"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
+			expectedToolCall: []api.ToolCall{},
+			expectedTokens:   `<think></think><tool_c fakeout {"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
+		},
+		{
+			name:             "qwen3 invalid tool call with malformed tool prefix",
+			model:            "qwen3",
+			output:           `<think></think><tool_cfakeout {"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
+			expectedToolCall: []api.ToolCall{},
+			expectedTokens:   `<think></think><tool_cfakeout {"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
+		},
+		{
+			name:             "model with prefix in template, no prefix in output",
+			model:            "qwen2.5",
+			output:           `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "",
+		},
+		{
+			name:             "model with prefix in template, prefix in output",
+			model:            "qwen2.5",
+			output:           `<tool_call>[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]</tool_call>`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "",
+		},
+		{
+			name:             "model without prefix in template, no prefix in output",
+			model:            "llama3.2",
+			output:           `[{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}} {"name": "get_current_weather", "parameters": {"format":"celsius","location":"Toronto, Canada"}}]`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "",
+		},
+		{
+			name:             "model without prefix in template, no prefix in output, single tool call",
+			model:            "llama3.2",
+			output:           `{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}}`,
+			expectedToolCall: []api.ToolCall{t1},
+			expectedTokens:   "",
+		},
+		{
+			name:             "model without prefix in template, prefix in output, multiple tool calls in list",
+			model:            "llama3.2",
+			output:           `<tool_call> [{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}} {"name": "get_current_weather", "parameters": {"format":"celsius","location":"Toronto, Canada"}}]</tool_call>`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   `<tool_call>`,
+		},
+		{
+			name:             "model without prefix in template, prefix in output, individual tool calls",
+			model:            "llama3.2",
+			output:           `<tool_call> {"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}} {"name": "get_current_weather", "parameters": {"format":"celsius","location":"Toronto, Canada"}}`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   `<tool_call>`,
+		},
+		{
+			name:             "model with prefix in template, no prefix in output, tokens before",
+			model:            "qwen2.5",
+			output:           `some tokens before [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
+			expectedToolCall: []api.ToolCall{},
+			expectedTokens:   `some tokens before [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
+		},
+		{
+			name:             "model with prefix in template, prefix in output, tokens after",
+			model:            "qwen2.5",
+			output:           `<tool_call>[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]</tool_call> some tokens after`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "",
+		},
+		{
+			name:             "model without prefix in template, no prefix in output, tokens after",
+			model:            "llama3.2",
+			output:           `[{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}} {"name": "get_current_weather", "parameters": {"format":"celsius","location":"Toronto, Canada"}}]</tool_call> some tokens after`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "",
+		},
+		{
+			name:             "model without prefix in template, no prefix in output, tokens before",
+			model:            "llama3.2",
+			output:           `some tokens before [{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}} {"name": "get_current_weather", "parameters": {"format":"celsius","location":"Toronto, Canada"}}]`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   `some tokens before`,
+		},
+		{
+			name:  "model without prefix in template, prefix in output, tokens after",
+			model: "llama3.2",
+			output: `<tool_call> 
+			[{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}} {"name": "get_current_weather", "parameters": {"format":"celsius","location":"Toronto, Canada"}}]</tool_call> some tokens after`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   `<tool_call>`,
+		},
+		{
+			name:             "model without without prefix, match all jsons",
+			model:            "llama3.2",
+			output:           `model outputs some text [{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}} {"name": "get_current_weather", "parameters": {"format":"celsius","location":"Toronto, Canada"}}]</tool_call> some tokens after`,
+			expectedToolCall: []api.ToolCall{t1, t2},
+			expectedTokens:   "model outputs some text",
+		},
+		{
+			name:             "model flushes tokens if tool call doesn't match",
+			model:            "llama3.2",
+			output:           `{ "user": {"id": 12345, "name": "Alice", "preferences": {"theme": "dark", "notifications": true}, "stats": {"points": 987, "level": 42}}}`,
+			expectedToolCall: []api.ToolCall{},
+			expectedTokens:   `{ "user": {"id": 12345, "name": "Alice", "preferences": {"theme": "dark", "notifications": true}, "stats": {"points": 987, "level": 42}}}`,
+		},
+		{
+			name:             "model flushes tokens if tool call doesn't match array",
+			model:            "llama3.2",
+			output:           `[ { "user": {"id": 12345, "name": "Alice", "preferences": {"theme": "dark", "notifications": true}, "stats": {"points": 987, "level": 42}}}]`,
+			expectedToolCall: []api.ToolCall{},
+			expectedTokens:   `[ { "user": {"id": 12345, "name": "Alice", "preferences": {"theme": "dark", "notifications": true}, "stats": {"points": 987, "level": 42}}}]`,
+		},
+	}
+
+	var tools []api.Tool
+	if err := json.Unmarshal(readFile(t, p, "tools.json").Bytes(), &tools); err != nil {
+		t.Fatal(err)
+	}
+
+	var messages []api.Message
+	if err := json.Unmarshal(readFile(t, p, "messages.json").Bytes(), &messages); err != nil {
+		t.Fatal(err)
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			tmpl, err := template.Parse(readFile(t, p, fmt.Sprintf("%s.gotmpl", tt.model)).String())
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			t.Run("template", func(t *testing.T) {
+				actual := &bytes.Buffer{} // Create new buffer for each test
+				if err := tmpl.Execute(actual, template.Values{Tools: tools, Messages: messages}); err != nil {
+					t.Fatal(err)
+				}
+
+				if diff := cmp.Diff(actual.String(), readFile(t, p, fmt.Sprintf("%s.out", tt.model)).String()); diff != "" {
+					t.Errorf("mismatch (-got +want):\n%s", diff)
+				}
+			})
+
+			t.Run("parse", func(t *testing.T) {
+				tp, err := NewParser(tmpl.Template)
+				if err != nil {
+					t.Fatal(err)
+				}
+				got := []api.ToolCall{}
+				var gotTokens strings.Builder
+
+				tokens := strings.Fields(tt.output)
+				for _, tok := range tokens {
+					s := " " + tok
+
+					toolCalls, content := tp.Add(s)
+					if len(content) > 0 {
+						gotTokens.WriteString(content)
+					} else if len(toolCalls) > 0 {
+						got = append(got, toolCalls...)
+					}
+				}
+
+				// Compare tool calls if we expect any
+				if diff := cmp.Diff(got, tt.expectedToolCall); diff != "" {
+					t.Errorf("tool calls mismatch (-got +want):\n%s", diff)
+				}
+
+				// Compare tokens if we expect any
+				stripped := strings.TrimSpace(gotTokens.String())
+				if diff := cmp.Diff(stripped, tt.expectedTokens); diff != "" {
+					t.Log("actualTokens", stripped, "expectedTokens", tt.expectedTokens)
+					t.Errorf("tokens mismatch (-got +want):\n%s", diff)
+				}
+			})
+		})
+	}
+}
--- a/tools/tools_utils.go
+++ b/tools/tools_utils.go
@@ -0,0 +1,227 @@
+package tools
+
+import (
+	"bytes"
+	"encoding/json"
+	"errors"
+	"log/slog"
+	"slices"
+	"strings"
+	gotmpl "text/template"
+	"text/template/parse"
+
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/template"
+)
+
+// extractToolCallsFormat traverses a template AST to find text that follows a ".ToolCalls" condition.
+// It walks the template nodes looking for if-statements containing ".ToolCalls" and extracts any
+// immediate text nodes that follow. This is used to identify tool call prefixes and formatting.
+//
+// Returns:
+//   - string: The extracted text following the first ".ToolCalls" condition found
+//   - bool: Whether a ".ToolCalls" condition was found in the template
+func extractToolCallsFormat(tmpl *gotmpl.Template) (string, bool) {
+	if tmpl == nil || tmpl.Tree == nil {
+		slog.Debug("template or tree is nil")
+		return "", false
+	}
+
+	var result string
+	var found bool
+
+	var walk func(nodes []parse.Node)
+	walk = func(nodes []parse.Node) {
+		for _, node := range nodes {
+			if found {
+				return
+			}
+
+			switch n := node.(type) {
+			case *parse.IfNode:
+				if isToolCallsNode(n) {
+					// Collect immediate TextNode(s) at start of IfNode's list
+					var sb strings.Builder
+					for _, innerNode := range n.List.Nodes {
+						if tn, ok := innerNode.(*parse.TextNode); ok {
+							sb.Write(tn.Text)
+						} else {
+							// Stop at first non-text node
+							break
+						}
+					}
+					result = sb.String()
+					found = true
+					return
+				}
+				// Recurse into child nodes
+				walk(n.List.Nodes)
+				if n.ElseList != nil {
+					walk(n.ElseList.Nodes)
+				}
+			case *parse.ListNode:
+				walk(n.Nodes)
+			case *parse.RangeNode:
+				walk(n.List.Nodes)
+				if n.ElseList != nil {
+					walk(n.ElseList.Nodes)
+				}
+			case *parse.WithNode:
+				walk(n.List.Nodes)
+				if n.ElseList != nil {
+					walk(n.ElseList.Nodes)
+				}
+			default:
+				// Continue to next node
+				continue
+			}
+		}
+	}
+
+	walk(tmpl.Tree.Root.Nodes)
+	return result, found
+}
+
+// isToolCallsNode detects if a node's condition includes ".ToolCalls"
+func isToolCallsNode(n *parse.IfNode) bool {
+	for _, cmd := range n.Pipe.Cmds {
+		for _, arg := range cmd.Args {
+			if field, ok := arg.(*parse.FieldNode); ok {
+				if slices.Contains(field.Ident, "ToolCalls") {
+					return true
+				}
+			}
+		}
+	}
+	return false
+}
+
+func toolPrefix(tmpl *gotmpl.Template) string {
+	tokenText, ok := extractToolCallsFormat(tmpl)
+	if !ok {
+		return ""
+	}
+	tokenText = strings.TrimSpace(tokenText)
+	tokenText = strings.ReplaceAll(tokenText, "\r", "")
+	tokenText = strings.ReplaceAll(tokenText, "\n", " ")
+
+	return tokenText
+}
+
+// toolTemplate creates a subtree from the node that ranges over .ToolCalls
+//
+// Returns:
+//   - *gotmpl.Template: The subtree containing the .ToolCalls range
+//   - error: Error if parsing failed
+func toolTemplate(t *template.Template) (*gotmpl.Template, error) {
+	tmpl := t.Subtree(func(n parse.Node) bool {
+		if t, ok := n.(*parse.RangeNode); ok {
+			return slices.Contains(template.Identifiers(t.Pipe), "ToolCalls")
+		}
+
+		return false
+	})
+
+	if tmpl == nil {
+		return nil, errors.New("failed to find tool template")
+	}
+
+	return tmpl, nil
+}
+
+// suffixOverlap returns the index in s where the longest suffix overlap with prefix begins
+//
+// Returns:
+//   - int: The starting index in s where the suffix overlap begins
+func suffixOverlap(s, prefix string) int {
+	max := min(len(prefix), len(s))
+	for i := max; i > 0; i-- {
+		if strings.HasSuffix(s, prefix[:i]) {
+			return len(s) - i
+		}
+	}
+	return -1
+}
+
+// extractToolArgs executes a template with a known tool call format to extract the name and arguments
+//
+// Returns:
+//   - string: The name of the tool call
+//   - string: The arguments of the tool call
+//   - error: Error if parsing failed
+func extractToolArgs(tmpl *gotmpl.Template) (name, arguments string, err error) {
+	var b bytes.Buffer
+	if err := tmpl.Execute(&b, map[string][]api.ToolCall{
+		"ToolCalls": {
+			{
+				Function: api.ToolCallFunction{
+					Name: "@@name@@",
+					Arguments: api.ToolCallFunctionArguments{
+						"@@argument@@": 1,
+					},
+				},
+			},
+		},
+	}); err != nil {
+		return "", "", err
+	}
+
+	var obj any
+	err = json.Unmarshal(b.Bytes(), &obj)
+	if err != nil {
+		return "", "", err
+	}
+
+	var objs []map[string]any
+	switch v := obj.(type) {
+	case map[string]any:
+		objs = []map[string]any{v}
+	case []map[string]any:
+		objs = v
+	case []any:
+		objs = collect(v)
+	}
+	if len(objs) == 0 {
+		return "", "", errors.New("no template objects found")
+	}
+
+	// find the keys that correspond to the name and arguments fields
+	for k, v := range objs[0] {
+		switch v.(type) {
+		case string:
+			name = k
+		case map[string]any:
+			arguments = k
+		}
+	}
+
+	if name == "" || arguments == "" {
+		slog.Debug("missing required fields in tool call template", "name", name, "arguments", arguments)
+		return "", "", errors.New("missing required fields in tool call template")
+	}
+
+	return name, arguments, nil
+}
+
+// collect recursively traverses an object to collect all nested maps
+//
+// Returns:
+//   - []map[string]any: A slice of all nested maps found in the object
+func collect(obj any) []map[string]any {
+	var all []map[string]any
+	switch o := obj.(type) {
+	case map[string]any:
+		all = append(all, o)
+		for _, v := range o {
+			all = append(all, collect(v)...)
+		}
+	case []any:
+		for _, v := range o {
+			all = append(all, collect(v)...)
+		}
+	default:
+		return nil
+	}
+
+	return all
+}
--- a/tools/tools_utils_test.go
+++ b/tools/tools_utils_test.go
@@ -0,0 +1,464 @@
+package tools
+
+import (
+	"testing"
+	gotmpl "text/template"
+
+	"github.com/ollama/ollama/template"
+)
+
+func TestExtractToolCallsFormat(t *testing.T) {
+	cases := []struct {
+		name     string
+		template string
+		want     string
+		found    bool
+	}{
+		{
+			name:     "nil template",
+			template: "",
+			want:     "",
+			found:    false,
+		},
+		{
+			name:     "basic tool call with text",
+			template: "{{if .ToolCalls}}Hello world{{end}}",
+			want:     "Hello world",
+			found:    true,
+		},
+		{
+			name:     "tool call with json format",
+			template: "{{if .ToolCalls}}```json\n{{end}}",
+			want:     "```json\n",
+			found:    true,
+		},
+		{
+			name:     "tool call in range",
+			template: "{{range .ToolCalls}}tool: {{.}}{{end}}",
+			want:     "",
+			found:    false,
+		},
+		{
+			name:     "tool call with multiple text nodes",
+			template: "{{if .ToolCalls}}First text{{if .Something}}inner{{end}}Second text{{end}}",
+			want:     "First text",
+			found:    true,
+		},
+		{
+			name:     "nested if without tool calls",
+			template: "{{if .Something}}{{if .OtherThing}}text{{end}}{{end}}",
+			want:     "",
+			found:    false,
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			tmpl, err := gotmpl.New("test").Parse(tc.template)
+			if err != nil && tc.template != "" {
+				t.Fatalf("failed to parse template: %v", err)
+			}
+
+			got, found := extractToolCallsFormat(tmpl)
+			if got != tc.want {
+				t.Errorf("got text %q, want %q", got, tc.want)
+			}
+			if found != tc.found {
+				t.Errorf("got found %v, want %v", found, tc.found)
+			}
+		})
+	}
+}
+
+func TestToolPrefix(t *testing.T) {
+	cases := []struct {
+		name     string
+		template string
+		want     string
+	}{
+		{
+			name:     "basic tool call with action prefix",
+			template: "{{if .ToolCalls}}Action: ```json{{end}}",
+			want:     "Action: ```json",
+		},
+		{
+			name:     "incomplete functools bracket",
+			template: "{{if .ToolCalls}}functools[{{end}}",
+			want:     "functools[",
+		},
+		{
+			name:     "tool call with angle brackets",
+			template: "{{if .ToolCalls}}Hello, world! <tool_call>{{end}}",
+			want:     "Hello, world! <tool_call>",
+		},
+		{
+			name:     "multiple tool call formats",
+			template: "{{if .ToolCalls}}[tool_call] <tool_call>{{end}}",
+			want:     "[tool_call] <tool_call>",
+		},
+		{
+			name:     "single angle bracket tool call",
+			template: "{{if .ToolCalls}}<tool_call>{{end}}",
+			want:     "<tool_call>",
+		},
+		{
+			name:     "incomplete angle bracket after tool call",
+			template: "{{if .ToolCalls}}[tool_call] <{{end}}",
+			want:     "[tool_call] <",
+		},
+		{
+			name:     "angle bracket prefix with tool call",
+			template: "{{if .ToolCalls}}> <tool_call>{{end}}",
+			want:     "> <tool_call>",
+		},
+		{
+			name:     "uppercase tool call with incomplete bracket",
+			template: "{{if .ToolCalls}}[TOOL_CALL] [{{end}}",
+			want:     "[TOOL_CALL] [",
+		},
+		{
+			name:     "uppercase tool call with adjacent bracket",
+			template: "{{if .ToolCalls}}[TOOL_CALL][{{end}}",
+			want:     "[TOOL_CALL][",
+		},
+		{
+			name:     "tool call with pipe delimiters",
+			template: "{{if .ToolCalls}}<|tool_call|>{{end}}",
+			want:     "<|tool_call|>",
+		},
+		{
+			name:     "tool with no prefix",
+			template: "{{if .ToolCalls}}{{end}}",
+			want:     "",
+		},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			tmpl, err := gotmpl.New("test").Parse(tt.template)
+			if err != nil {
+				t.Fatalf("failed to parse template: %v", err)
+			}
+			got := toolPrefix(tmpl)
+			if got != tt.want {
+				t.Errorf("ToolToken(%q) = %q; want %q", tt.template, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestToolTemplate(t *testing.T) {
+	cases := []struct {
+		name     string
+		template string
+		want     bool
+	}{
+		{
+			name:     "basic tool call range",
+			template: "{{range .ToolCalls}}test{{end}}",
+			want:     true,
+		},
+		{
+			name:     "no tool calls",
+			template: "{{range .Other}}test{{end}}",
+			want:     false,
+		},
+		{
+			name:     "nested tool calls",
+			template: "{{range .Outer}}{{range .ToolCalls}}test{{end}}{{end}}",
+			want:     true,
+		},
+		{
+			name:     "empty template",
+			template: "",
+			want:     false,
+		},
+		{
+			name:     "tool calls in if statement",
+			template: "{{if .ToolCalls}}test{{end}}",
+			want:     false,
+		},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			tmpl, err := gotmpl.New("test").Parse(tt.template)
+			if err != nil {
+				t.Fatalf("failed to parse template: %v", err)
+			}
+
+			parsed, err := template.Parse(tmpl.Root.String())
+			if err != nil {
+				t.Fatalf("failed to parse template: %v", err)
+			}
+
+			_, err = toolTemplate(parsed)
+			if err != nil && tt.want {
+				t.Errorf("toolTemplate() = %v; want %v", err, tt.want)
+			}
+		})
+	}
+}
+
+func TestSuffixOverlap(t *testing.T) {
+	cases := []struct {
+		name string
+		s    string
+		d    string
+		want int
+	}{
+		{
+			name: "no overlap",
+			s:    "hello world",
+			d:    "<tool_call>",
+			want: -1,
+		},
+		{
+			name: "full overlap",
+			s:    "<tool_call>",
+			d:    "<tool_call>",
+			want: 0,
+		},
+		{
+			name: "partial overlap",
+			s:    "text <tool_call>",
+			d:    "<tool_call>",
+			want: 5,
+		},
+		{
+			name: "delimiter longer than string",
+			s:    "<tool>",
+			d:    "<tool_call>",
+			want: -1,
+		},
+		{
+			name: "empty string",
+			s:    "",
+			d:    "<tool_call>",
+			want: -1,
+		},
+		{
+			name: "empty delimiter",
+			s:    "<tool_call>",
+			d:    "",
+			want: -1,
+		},
+		{
+			name: "single char overlap",
+			s:    "test<",
+			d:    "<tool_call>",
+			want: 4,
+		},
+		{
+			name: "partial tool call",
+			s:    "hello <tool_",
+			d:    "<tool_call>",
+			want: 6,
+		},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			got := suffixOverlap(tt.s, tt.d)
+			if got != tt.want {
+				t.Errorf("suffixOverlap(%q, %q) = %d; want %d", tt.s, tt.d, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestExtractToolArgs(t *testing.T) {
+	cases := []struct {
+		name     string
+		template string
+		want     string
+		ok       bool
+	}{
+		{
+			name:     "basic tool call with text after",
+			template: `{{if .ToolCalls}}tool response{{end}}`,
+			want:     "tool response",
+			ok:       true,
+		},
+		{
+			name:     "tool call with mixed content after",
+			template: `{{if .ToolCalls}}<tool_call>{{.Something}}{{end}}`,
+			want:     "<tool_call>",
+			ok:       true,
+		},
+		{
+			name:     "tool call with no text after",
+			template: `{{if .ToolCalls}}{{.Something}}{{end}}`,
+			want:     "",
+			ok:       true,
+		},
+		{
+			name:     "nested tool call",
+			template: `{{if .Something}}{{if .ToolCalls}}[TOOL_CALL]{{end}}{{end}}`,
+			want:     "[TOOL_CALL]",
+			ok:       true,
+		},
+		{
+			name:     "no tool calls",
+			template: `{{if .Something}}no tools here{{end}}`,
+			want:     "",
+			ok:       false,
+		},
+		{
+			name:     "empty template",
+			template: ``,
+			want:     "",
+			ok:       false,
+		},
+		{
+			name:     "multiple tool calls sections",
+			template: `{{if .ToolCalls}}first{{end}}{{if .ToolCalls}}second{{end}}`,
+			want:     "first",
+			ok:       true,
+		},
+		{
+			name:     "range over tool calls",
+			template: `{{if .ToolCalls}}{{range .ToolCalls}}tool{{end}}{{end}}`,
+			want:     "",
+			ok:       true,
+		},
+		{
+			name:     "tool calls with pipe delimiters",
+			template: `{{if .ToolCalls}}<|tool|>{{end}}`,
+			want:     "<|tool|>",
+			ok:       true,
+		},
+		{
+			name:     "tool calls with nested template",
+			template: `{{if .ToolCalls}}{{template "tool" .}}{{end}}`,
+			want:     "",
+			ok:       true,
+		},
+		{
+			name:     "tool calls with whitespace variations",
+			template: `{{if .ToolCalls}}  tool  {{end}}`,
+			want:     "  tool  ",
+			ok:       true,
+		},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			tmpl, err := gotmpl.New("test").Parse(tt.template)
+			if err != nil {
+				t.Fatalf("failed to parse template: %v", err)
+			}
+
+			got, ok := extractToolCallsFormat(tmpl)
+			if got != tt.want {
+				t.Errorf("TextAfterToolCalls() got = %q, want %q", got, tt.want)
+			}
+			if ok != tt.ok {
+				t.Errorf("TextAfterToolCalls() ok = %v, want %v", ok, tt.ok)
+			}
+		})
+	}
+}
+
+func TestCollect(t *testing.T) {
+	cases := []struct {
+		name string
+		obj  any
+		want []map[string]any
+	}{
+		{
+			name: "simple map",
+			obj: map[string]any{
+				"key": "value",
+			},
+			want: []map[string]any{
+				{"key": "value"},
+			},
+		},
+		{
+			name: "nested map",
+			obj: map[string]any{
+				"outer": map[string]any{
+					"inner": "value",
+				},
+			},
+			want: []map[string]any{
+				{"outer": map[string]any{"inner": "value"}},
+				{"inner": "value"},
+			},
+		},
+		{
+			name: "array of maps",
+			obj: []any{
+				map[string]any{"key1": "val1"},
+				map[string]any{"key2": "val2"},
+			},
+			want: []map[string]any{
+				{"key1": "val1"},
+				{"key2": "val2"},
+			},
+		},
+		{
+			name: "deeply nested",
+			obj: map[string]any{
+				"l1": map[string]any{
+					"l2": map[string]any{
+						"l3": "value",
+					},
+				},
+			},
+			want: []map[string]any{
+				{"l1": map[string]any{"l2": map[string]any{"l3": "value"}}},
+				{"l2": map[string]any{"l3": "value"}},
+				{"l3": "value"},
+			},
+		},
+		{
+			name: "non-map value",
+			obj:  "string",
+			want: nil,
+		},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			got := collect(tt.obj)
+			if len(got) != len(tt.want) {
+				t.Errorf("collect() got %d maps, want %d", len(got), len(tt.want))
+				return
+			}
+
+			// Compare each map in the result
+			for i := range tt.want {
+				if !mapsEqual(got[i], tt.want[i]) {
+					t.Errorf("collect() map[%d] = %v, want %v", i, got[i], tt.want[i])
+				}
+			}
+		})
+	}
+}
+
+// mapsEqual compares two maps for deep equality
+func mapsEqual(m1, m2 map[string]any) bool {
+	if len(m1) != len(m2) {
+		return false
+	}
+	for k, v1 := range m1 {
+		v2, ok := m2[k]
+		if !ok {
+			return false
+		}
+		switch val1 := v1.(type) {
+		case map[string]any:
+			val2, ok := v2.(map[string]any)
+			if !ok || !mapsEqual(val1, val2) {
+				return false
+			}
+		default:
+			if v1 != v2 {
+				return false
+			}
+		}
+	}
+	return true
+}
Author	SHA1	Message	Date
Patrick Devine	aa25aff10d	client: add request signing to the client (#10881 ) If OLLAMA_AUTH is set, sign each request w/ a timestamp and pass the signature in the token header	2025-05-27 16:50:57 -07:00
Jesse Gross	ea79003180	kvcache: Skip computing causal mask for worst case graph reservation Computing an attention mask for a large context and max batch is expensive - over 100ms. Models like Gemma3 that have multiple types of caches and custom attention masks need to do this 4 times, so this adds approximately 500ms to startup time when using 128k context When we are reserving the worst case graph, we don't need the mask, only its shape, so we can skip this.	2025-05-27 14:25:15 -07:00
Kyle Steere	9239a254e0	server: abort download on empty digest Signed-off-by: Kyle Steere <kyle.steere@chainguard.dev>	2025-05-27 11:28:48 -07:00
Parth Sareen	066d0f4746	tools: relax JSON parse constraints for tool calling (#10872 )	2025-05-26 18:59:06 -07:00
Parth Sareen	aea6fb9b58	tools: remove newline stripping (#10869 )	2025-05-26 17:16:00 -07:00
RAPID ARCHITECT	012cf65340	readme: add AWS Strands Agents SDK example to community integrations (#10865 )	2025-05-26 12:05:03 -07:00
Min Yoo	a45231af47	readme: Add macLlama to community integrations (#10790 ) This commit updates the README to include macLlama within the community integrations section. macLlama is a native macOS application built for lightweight and efficient LLM interaction. Key features include: * Lightweight & Native: Designed to be resource-friendly and perform optimally on macOS. * Chat-like Interface: Provides a user-friendly, conversational interface. * Multiple Window Support: Allows users to manage multiple conversations simultaneously. The primary goal of macLlama is to offer a simple and easy-to-run LLM experience on macOS.	2025-05-24 13:18:32 -07:00
Daniel Hiltgen	2307fc2bcd	tests: drop llama3.2-vision embedding tests (#10837 )	2025-05-24 13:17:53 -07:00
frob	6623898198	docs: remove unsupported quantizations (#10842 )	2025-05-24 13:17:26 -07:00
frob	eda472df1b	server: add hint to the error message when model path access fails (#10843 )	2025-05-24 13:17:04 -07:00
Jesse Gross	f18e0cb550	ml: Improve slog formatting for BackendMemory	2025-05-23 20:08:23 -07:00
Parth Sareen	e8b981fa5d	tools: refactor tool call parsing and enable streaming (#10415 )	2025-05-23 14:19:31 -07:00