Merge remote-tracking branch 'origin/main' into GraniteFour

* origin/main: readme: add GMAI - Gradle Managed to community integrations (#11461) tools: fix parsing issue when a tool name is a substring of another (#11456) readme: update argo description to support deep research (#11455) ci: switch mac builder to arm64 (#11379) docs: add the no-Modelfile function of `ollama create` (#9077) openai: allow openai endpoint to accept webp images (#11412) readme: update the llama.cpp github link (#11427) compile bf16 support into ggml-metal (#11430) cmd: add default assistant role to message construction (#11431) api: fix unreachable status err (#11423) docs: fix typo in macos.md (#11425)
2025-07-21 15:04:52 -06:00 · 2025-07-21 15:04:52 -06:00 · 895d5563df
parent e6a22f20d1 82da19c634
commit 895d5563df
11 changed files with 338 additions and 31 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@ -23,7 +23,7 @@ jobs:
          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${GITHUB_REF_NAME#v}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_OUTPUT

  darwin-build:
-    runs-on: macos-13
+    runs-on: macos-13-xlarge
    environment: release
    needs: setup-environment
    strategy:
--- a/README.md
+++ b/README.md
@ -360,7 +360,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Tkinter-based client](https://github.com/chyok/ollama-gui) (Python tkinter-based Client for Ollama)
 - [LLMChat](https://github.com/trendy-design/llmchat) (Privacy focused, 100% local, intuitive all-in-one chat interface)
 - [Local Multimodal AI Chat](https://github.com/Leon-Sander/Local-Multimodal-AI-Chat) (Ollama-based LLM Chat with support for multiple features, including PDF RAG, voice chat, image-based interactions, and integration with OpenAI.)
- [ARGO](https://github.com/xark-argo/argo) (Locally download and run Ollama and Huggingface models with RAG on Mac/Windows/Linux)
+- [ARGO](https://github.com/xark-argo/argo) (Locally download and run Ollama and Huggingface models with RAG and deep research on Mac/Windows/Linux)
 - [OrionChat](https://github.com/EliasPereirah/OrionChat) - OrionChat is a web interface for chatting with different AI providers
 - [G1](https://github.com/bklieger-groq/g1) (Prototype of using prompting strategies to improve the LLM's reasoning through o1-like reasoning chains.)
 - [Web management](https://github.com/lemonit-eric-mao/ollama-web-management) (Web management page)
@ -595,10 +595,11 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [SimpleOllamaUnity](https://github.com/HardCodeDev777/SimpleOllamaUnity) (Unity Engine extension for communicating with Ollama in a few lines of code. Also works at runtime)
 - [UnityCodeLama](https://github.com/HardCodeDev777/UnityCodeLama) (Unity Edtior tool to analyze scripts via Ollama)
 - [NativeMind](https://github.com/NativeMindBrowser/NativeMindExtension) (Private, on-device AI Assistant, no cloud dependencies)
+- [GMAI - Gradle Managed AI](https://gmai.premex.se/) (Gradle plugin for automated Ollama lifecycle management during build phases)

 ### Supported backends

- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
+- [llama.cpp](https://github.com/ggml-org/llama.cpp) project founded by Georgi Gerganov.

 ### Observability
 - [Opik](https://www.comet.com/docs/opik/cookbook/ollama) is an open-source platform to debug, evaluate, and monitor your LLM applications, RAG systems, and agentic workflows with comprehensive tracing, automated evaluations, and production-ready dashboards. Opik supports native intergration to Ollama.
--- a/api/client.go
+++ b/api/client.go
@ -222,10 +222,6 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 			return fmt.Errorf("unmarshal: %w", err)
 		}

-		if errorResponse.Error != "" {
-			return errors.New(errorResponse.Error)
-		}
-
 		if response.StatusCode >= http.StatusBadRequest {
 			return StatusError{
 				StatusCode:   response.StatusCode,
@ -234,6 +230,10 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 			}
 		}

+		if errorResponse.Error != "" {
+			return errors.New(errorResponse.Error)
+		}
+
 		if err := fn(bts); err != nil {
 			return err
 		}
--- a/api/client_test.go
+++ b/api/client_test.go
@ -89,6 +89,16 @@ func TestClientStream(t *testing.T) {
 			},
 			wantErr: "mid-stream error",
 		},
+		{
+			name: "http status error takes precedence over general error",
+			responses: []any{
+				testError{
+					message:    "custom error message",
+					statusCode: http.StatusInternalServerError,
+				},
+			},
+			wantErr: "500",
+		},
 		{
 			name: "successful stream completion",
 			responses: []any{
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@ -1080,10 +1080,11 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
 	var state *displayResponseState = &displayResponseState{}
 	var latest api.ChatResponse
 	var fullResponse strings.Builder
-	var role string
 	var thinkTagOpened bool = false
 	var thinkTagClosed bool = false

+	role := "assistant"
+
 	fn := func(response api.ChatResponse) error {
 		if response.Message.Content != "" || !opts.HideThinking {
 			p.StopAndClear()
@ -1417,13 +1418,13 @@ func NewCLI() *cobra.Command {

 	createCmd := &cobra.Command{
 		Use:     "create MODEL",
-		Short:   "Create a model from a Modelfile",
+		Short:   "Create a model",
 		Args:    cobra.ExactArgs(1),
 		PreRunE: checkServerHeartbeat,
 		RunE:    CreateHandler,
 	}

-	createCmd.Flags().StringP("file", "f", "", "Name of the Modelfile (default \"Modelfile\"")
+	createCmd.Flags().StringP("file", "f", "", "Name of the Modelfile (default \"Modelfile\")")
 	createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_K_M)")

 	showCmd := &cobra.Command{
--- a/docs/import.md
+++ b/docs/import.md
@ -53,6 +53,8 @@ FROM /path/to/safetensors/directory

 If you create the Modelfile in the same directory as the weights, you can use the command `FROM .`.

+If you do not create the Modelfile, ollama will act as if there was a Modelfile with the command `FROM .`.
+
 Now run the `ollama create` command from the directory where you created the `Modelfile`:

 ```shell
--- a/docs/macos.md
+++ b/docs/macos.md
@ -22,7 +22,7 @@ To install the Ollama application somewhere other than `Applications`, place the
 Ollama on MacOS stores files in a few different locations.
 - `~/.ollama` contains models and configuration
 - `~/.ollama/logs` contains logs
-    - *app.log* contains most resent logs from the GUI application
+    - *app.log* contains most recent logs from the GUI application
    - *server.log* contains the most recent server logs
 - `<install location>/Ollama.app/Contents/Resources/ollama` the CLI binary

@ -39,4 +39,4 @@ rm -rf ~/Library/Caches/com.electron.ollama/
 rm -rf ~/Library/Caches/ollama
 rm -rf ~/Library/WebKit/com.electron.ollama
 rm -rf ~/.ollama
-```
+```
--- a/ml/backend/ggml/ggml/src/ggml-metal/metal.go
+++ b/ml/backend/ggml/ggml/src/ggml-metal/metal.go
@ -4,6 +4,6 @@ package metal

 //go:generate sh -c "{ echo // Code generated by 'go generate'. DO NOT EDIT.; sed -e '/__embed_ggml-common.h__/r ../ggml-common.h' -e '/__embed_ggml-common.h__/d' -e '/#include \"ggml-metal-impl.h\"/r ggml-metal-impl.h' -e '/#include \"ggml-metal-impl.h\"/d' ggml-metal.metal; } >ggml-metal-embed.metal"

-// #cgo CPPFLAGS: -DGGML_METAL_NDEBUG -DGGML_METAL_EMBED_LIBRARY -I.. -I../../include
+// #cgo CPPFLAGS: -DGGML_METAL_NDEBUG -DGGML_METAL_EMBED_LIBRARY -DGGML_METAL_USE_BF16 -I.. -I../../include
 // #cgo LDFLAGS: -framework Metal -framework MetalKit
 import "C"
--- a/openai/openai.go
+++ b/openai/openai.go
@ -423,7 +423,7 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
 						}
 					}

-					types := []string{"jpeg", "jpg", "png"}
+					types := []string{"jpeg", "jpg", "png", "webp"}
 					valid := false
 					for _, t := range types {
 						prefix := "data:image/" + t + ";base64,"
--- a/tools/tools.go
+++ b/tools/tools.go
@ -115,21 +115,7 @@ func (p *Parser) findTag() (int, bool) {
 // parseToolCall finds the next complete tool call in the buffer
 // incrementing n and advancing the buffer.
 func (p *Parser) parseToolCall() *api.ToolCall {
-	var tool *api.Tool
-	var end int = len(p.buffer)
-	var i int
-
-	// find tool name
-	for _, t := range p.tools {
-		n := t.Function.Name
-		if i = bytes.Index(p.buffer, []byte(n)); i != -1 {
-			if i+len(n) < end {
-				tool = &t
-				end = i + len(n)
-			}
-		}
-	}
-
+	tool, end := findTool(p.tools, p.buffer)
 	if tool == nil {
 		return nil
 	}
@ -139,10 +125,10 @@ func (p *Parser) parseToolCall() *api.ToolCall {
 	// parsing arguments before the tool name, which may be needed in the future
 	args := map[string]any{}
 	if len(tool.Function.Parameters.Properties) > 0 {
+		var i int
 		if args, i = findArguments(*tool, p.buffer[end:]); args == nil {
 			return nil
 		}
-
 		end += i
 	}

@ -159,9 +145,74 @@ func (p *Parser) parseToolCall() *api.ToolCall {
 	return tc
 }

+// findTool finds the first tool name in the list that matches the
+// beginning of the buffer, returning nil if no tool is found
+// or if the buffer ends with a partial tool name since we need
+// to wait for more data to disambiguate.
+// The second return value is the end position of the tool name
+// if one is found, otherwise 0.
+func findTool(tools []api.Tool, buf []byte) (*api.Tool, int) {
+	if len(buf) == 0 {
+		return nil, 0
+	}
+
+	// check if buffer ends with a partial tool name
+	// this prevents matching "get" when seeing "get_weather"
+	var longest string
+	for _, t := range tools {
+		if len(t.Function.Name) > len(longest) {
+			longest = t.Function.Name
+		}
+	}
+
+	// Only check up to longest characters from the end
+	for i := 1; i <= min(len(buf), len(longest)); i++ {
+		tail := buf[len(buf)-i:]
+		for _, t := range tools {
+			name := []byte(t.Function.Name)
+			if len(tail) < len(name) && bytes.HasPrefix(name, tail) {
+				return nil, 0
+			}
+		}
+	}
+
+	// find first occurrence of the longest tool name
+	var found *api.Tool
+	start := -1
+	end := -1
+
+	for i := range tools {
+		name := []byte(tools[i].Function.Name)
+		pos := bytes.Index(buf, name)
+		if pos == -1 {
+			continue
+		}
+
+		// Skip if we have a better match already
+		if start != -1 {
+			if pos > start {
+				continue
+			}
+			if pos == start && len(name) <= len(found.Function.Name) {
+				continue
+			}
+		}
+
+		found = &tools[i]
+		start = pos
+		end = pos + len(name)
+	}
+
+	if found != nil {
+		return found, end
+	}
+
+	return nil, 0
+}
+
 // findArguments returns the first object that appears to be
 // arguments for the provided tool in the provided buffer,
-// returning nil if no arguments are found.
+// returning nil if no arguments are found and the end position
 // TODO (jmorganca): this does not support parsing omitted arguments
 // objects for functions that have all-optional parameters
 // e.g. `{"name": "get_conditions", "arguments": {}}` will work but
--- a/tools/tools_test.go
+++ b/tools/tools_test.go
@ -112,6 +112,81 @@ func TestParser(t *testing.T) {
 				Description: "Say hello",
 			},
 		},
+		{
+			Type: "function",
+			Function: api.ToolFunction{
+				Name:        "say_hello_world",
+				Description: "Say hello world",
+			},
+		},
+		{
+			Type: "function",
+			Function: api.ToolFunction{
+				Name:        "get_address",
+				Description: "Get the address of a given location",
+				Parameters: struct {
+					Type       string   `json:"type"`
+					Defs       any      `json:"$defs,omitempty"`
+					Items      any      `json:"items,omitempty"`
+					Required   []string `json:"required"`
+					Properties map[string]struct {
+						Type        api.PropertyType `json:"type"`
+						Items       any              `json:"items,omitempty"`
+						Description string           `json:"description"`
+						Enum        []any            `json:"enum,omitempty"`
+					} `json:"properties"`
+				}{
+					Type: "object",
+					Properties: map[string]struct {
+						Type        api.PropertyType `json:"type"`
+						Items       any              `json:"items,omitempty"`
+						Description string           `json:"description"`
+						Enum        []any            `json:"enum,omitempty"`
+					}{
+						"location": {
+							Type:        api.PropertyType{"string"},
+							Description: "The location to get the address for",
+						},
+					},
+				},
+			},
+		},
+		{
+			Type: "function",
+			Function: api.ToolFunction{
+				Name:        "add",
+				Description: "Add two numbers",
+				Parameters: struct {
+					Type       string   `json:"type"`
+					Defs       any      `json:"$defs,omitempty"`
+					Items      any      `json:"items,omitempty"`
+					Required   []string `json:"required"`
+					Properties map[string]struct {
+						Type        api.PropertyType `json:"type"`
+						Items       any              `json:"items,omitempty"`
+						Description string           `json:"description"`
+						Enum        []any            `json:"enum,omitempty"`
+					} `json:"properties"`
+				}{
+					Type: "object",
+					Properties: map[string]struct {
+						Type        api.PropertyType `json:"type"`
+						Items       any              `json:"items,omitempty"`
+						Description string           `json:"description"`
+						Enum        []any            `json:"enum,omitempty"`
+					}{
+						"a": {
+							Type:        api.PropertyType{"string"},
+							Description: "The first number to add",
+						},
+						"b": {
+							Type:        api.PropertyType{"string"},
+							Description: "The second number to add",
+						},
+					},
+				},
+			},
+		},
 	}

 	tests := []struct {
@ -629,6 +704,173 @@ func TestParser(t *testing.T) {
 				},
 			},
 		},
+		{
+			name: "tool name with collision",
+			inputs: []string{
+				"<tool_call>",
+				"{",
+				"\"name\": \"say_hello",
+				"_world\",",
+				"}",
+				"}",
+			},
+			content: "",
+			tmpl:    qwen,
+			calls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Index:     0,
+						Name:      "say_hello_world",
+						Arguments: api.ToolCallFunctionArguments{},
+					},
+				},
+			},
+		},
+		{
+			name: "tool name with collision multiple",
+			inputs: []string{
+				"<tool_call>",
+				"{",
+				"\"name\": \"say_hello",
+				"_world\",",
+				"}",
+				"</tool_call>",
+				"<tool_call>",
+				"{",
+				"\"name\": \"say_hello",
+				"\",",
+				"}",
+				"</tool_call>",
+			},
+			content: "",
+			tmpl:    qwen,
+			calls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Index:     0,
+						Name:      "say_hello_world",
+						Arguments: api.ToolCallFunctionArguments{},
+					},
+				},
+				{
+					Function: api.ToolCallFunction{
+						Index:     1,
+						Name:      "say_hello",
+						Arguments: api.ToolCallFunctionArguments{},
+					},
+				},
+			},
+		},
+		{
+			name: "tool name with collision non streaming",
+			inputs: []string{
+				`<tool_call>{"name": "say_hello`,
+			},
+			content: "",
+			tmpl:    qwen,
+			calls:   nil,
+		},
+		{
+			name: "tool name with collision non streaming multiple",
+			inputs: []string{
+				`<tool_call>{"name": "say_hello"}</tool_call><tool_call>{"name": "say_hello_world"}`,
+			},
+			content: "",
+			tmpl:    qwen,
+			calls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Index:     0,
+						Name:      "say_hello",
+						Arguments: api.ToolCallFunctionArguments{},
+					},
+				},
+				{
+					Function: api.ToolCallFunction{
+						Index:     1,
+						Name:      "say_hello_world",
+						Arguments: api.ToolCallFunctionArguments{},
+					},
+				},
+			},
+		},
+		{
+			name: "tool name with collision non streaming shorter",
+			inputs: []string{
+				`<tool_call>{"name": "say_hello"}</tool_call>`,
+			},
+			content: "",
+			tmpl:    qwen,
+			calls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Index:     0,
+						Name:      "say_hello",
+						Arguments: api.ToolCallFunctionArguments{},
+					},
+				},
+			},
+		},
+		{
+			name: "tool name with collision non streaming longer",
+			inputs: []string{
+				`<tool_call>{"name": "say_hello_world"}</tool_call>`,
+			},
+			content: "",
+			tmpl:    qwen,
+			calls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Index:     0,
+						Name:      "say_hello_world",
+						Arguments: api.ToolCallFunctionArguments{},
+					},
+				},
+			},
+		},
+		{
+			name: "tool name with substring of another",
+			inputs: []string{
+				"{",
+				"\"name\": \"get_address\",",
+				"\"arguments\": {",
+				"\"location\": \"London\"",
+				"}",
+				"}",
+			},
+			content: "",
+			tmpl:    json,
+			calls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Index: 0,
+						Name:  "get_address",
+						Arguments: api.ToolCallFunctionArguments{
+							"location": "London",
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "tool name with substring of another",
+			inputs: []string{
+				`<tool_call>{"name": "get_address", "arguments": {"location": "London"}}</tool_call>`,
+			},
+			content: "",
+			tmpl:    qwen,
+			calls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Index: 0,
+						Name:  "get_address",
+						Arguments: api.ToolCallFunctionArguments{
+							"location": "London",
+						},
+					},
+				},
+			},
+		},
 	}

 	for _, tt := range tests {