api: enable tool streaming (#7836 )

Support Multiple LoRa Adapters (#7667 )
Closes #7627
2024-11-27 13:40:57 -08:00 · 2024-11-27 11:00:04 -08:00
16 changed files with 383 additions and 371 deletions
--- a/api/client.go
+++ b/api/client.go
@@ -163,29 +163,24 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 	scanBuf := make([]byte, 0, maxBufferSize)
 	scanner.Buffer(scanBuf, maxBufferSize)
 	for scanner.Scan() {
-		bts := scanner.Bytes()
+		var errorResponse struct {
+			Error string `json:"error,omitempty"`
+		}

-		var errorResponse ErrorResponse
+		bts := scanner.Bytes()
 		if err := json.Unmarshal(bts, &errorResponse); err != nil {
 			return fmt.Errorf("unmarshal: %w", err)
 		}

-		switch errorResponse.Code {
-		case ErrCodeUnknownKey:
-			return ErrUnknownOllamaKey{
-				Message: errorResponse.Message,
-				Key:     errorResponse.Data["key"].(string),
-			}
-		}
-		if errorResponse.Message != "" {
-			return errors.New(errorResponse.Message)
+		if errorResponse.Error != "" {
+			return errors.New(errorResponse.Error)
 		}

 		if response.StatusCode >= http.StatusBadRequest {
 			return StatusError{
 				StatusCode:   response.StatusCode,
 				Status:       response.Status,
-				ErrorMessage: errorResponse.Message,
+				ErrorMessage: errorResponse.Error,
 			}
 		}

--- a/api/client_test.go
+++ b/api/client_test.go
@@ -1,12 +1,6 @@
 package api

 import (
-	"context"
-	"errors"
-	"fmt"
-	"net/http"
-	"net/http/httptest"
-	"net/url"
 	"testing"
 )

@@ -49,117 +43,3 @@ func TestClientFromEnvironment(t *testing.T) {
 		})
 	}
 }
-
-func TestStream(t *testing.T) {
-	tests := []struct {
-		name           string
-		serverResponse []string
-		statusCode     int
-		expectedError  error
-	}{
-		{
-			name: "unknown key error",
-			serverResponse: []string{
-				`{"error":"unauthorized access","code":"unknown_key","data":{"key":"test-key"}}`,
-			},
-			statusCode: http.StatusUnauthorized,
-			expectedError: &ErrUnknownOllamaKey{
-				Message: "unauthorized access",
-				Key:     "test-key",
-			},
-		},
-		{
-			name: "general error message",
-			serverResponse: []string{
-				`{"error":"something went wrong"}`,
-			},
-			statusCode:    http.StatusInternalServerError,
-			expectedError: fmt.Errorf("something went wrong"),
-		},
-		{
-			name: "malformed json response",
-			serverResponse: []string{
-				`{invalid-json`,
-			},
-			statusCode:    http.StatusOK,
-			expectedError: fmt.Errorf("unmarshal: invalid character 'i' looking for beginning of object key string"),
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				w.Header().Set("Content-Type", "application/x-ndjson")
-				w.WriteHeader(tt.statusCode)
-				for _, resp := range tt.serverResponse {
-					fmt.Fprintln(w, resp)
-				}
-			}))
-			defer server.Close()
-
-			baseURL, err := url.Parse(server.URL)
-			if err != nil {
-				t.Fatalf("failed to parse server URL: %v", err)
-			}
-
-			client := &Client{
-				http: server.Client(),
-				base: baseURL,
-			}
-
-			var responses [][]byte
-			err = client.stream(context.Background(), "POST", "/test", "test", func(bts []byte) error {
-				responses = append(responses, bts)
-				return nil
-			})
-
-			// Error checking
-			if tt.expectedError == nil {
-				if err != nil {
-					t.Fatalf("unexpected error: %v", err)
-				}
-				return
-			}
-
-			if err == nil {
-				t.Fatalf("expected error %v, got nil", tt.expectedError)
-			}
-
-			// Check for specific error types
-			var unknownKeyErr ErrUnknownOllamaKey
-			if errors.As(tt.expectedError, &unknownKeyErr) {
-				var gotErr ErrUnknownOllamaKey
-				if !errors.As(err, &gotErr) {
-					t.Fatalf("expected ErrUnknownOllamaKey, got %T", err)
-				}
-				if unknownKeyErr.Key != gotErr.Key {
-					t.Errorf("expected key %q, got %q", unknownKeyErr.Key, gotErr.Key)
-				}
-				if unknownKeyErr.Message != gotErr.Message {
-					t.Errorf("expected message %q, got %q", unknownKeyErr.Message, gotErr.Message)
-				}
-				return
-			}
-
-			var statusErr StatusError
-			if errors.As(tt.expectedError, &statusErr) {
-				var gotErr StatusError
-				if !errors.As(err, &gotErr) {
-					t.Fatalf("expected StatusError, got %T", err)
-				}
-				if statusErr.StatusCode != gotErr.StatusCode {
-					t.Errorf("expected status code %d, got %d", statusErr.StatusCode, gotErr.StatusCode)
-				}
-				if statusErr.ErrorMessage != gotErr.ErrorMessage {
-					t.Errorf("expected error message %q, got %q", statusErr.ErrorMessage, gotErr.ErrorMessage)
-				}
-				return
-			}
-
-			// For other errors, compare error strings
-			if err.Error() != tt.expectedError.Error() {
-				t.Errorf("expected error %q, got %q", tt.expectedError, err)
-			}
-		})
-	}
-}
--- a/api/errors.go
+++ b/api/errors.go
@@ -1,74 +0,0 @@
-package api
-
-import (
-	"fmt"
-	"slices"
-	"strings"
-)
-
-const InvalidModelNameErrMsg = "invalid model name"
-
-// API error responses
-// ErrorCode represents a standardized error code identifier
-type ErrorCode string
-
-const (
-	ErrCodeUnknownKey ErrorCode = "unknown_key"
-	ErrCodeGeneral    ErrorCode = "general" // Generic fallback error code
-)
-
-// ErrorResponse implements a structured error interface
-type ErrorResponse struct {
-	Message string         `json:"error"` // Human-readable error message, uses 'error' field name for backwards compatibility
-	Code    ErrorCode      `json:"code"`  // Machine-readable error code for programmatic handling, not response code
-	Data    map[string]any `json:"data"`  // Additional error specific data, if any
-}
-
-func (e ErrorResponse) Error() string {
-	return e.Message
-}
-
-type ErrUnknownOllamaKey struct {
-	Message string
-	Key     string
-}
-
-func (e ErrUnknownOllamaKey) Error() string {
-	return fmt.Sprintf("unauthorized: unknown ollama key %q", strings.TrimSpace(e.Key))
-}
-
-func (e *ErrUnknownOllamaKey) FormatUserMessage(localKeys []string) string {
-	// The user should only be told to add the key if it is the same one that exists locally
-	if slices.Index(localKeys, e.Key) == -1 {
-		return e.Message
-	}
-
-	return fmt.Sprintf(`%s
-
-Your ollama key is:
-%s
-Add your key at:
-https://ollama.com/settings/keys`, e.Message, e.Key)
-}
-
-// StatusError is an error with an HTTP status code and message,
-// it is parsed on the client-side and not returned from the API
-type StatusError struct {
-	StatusCode   int    // e.g. 200
-	Status       string // e.g. "200 OK"
-	ErrorMessage string `json:"error"`
-}
-
-func (e StatusError) Error() string {
-	switch {
-	case e.Status != "" && e.ErrorMessage != "":
-		return fmt.Sprintf("%s: %s", e.Status, e.ErrorMessage)
-	case e.Status != "":
-		return e.Status
-	case e.ErrorMessage != "":
-		return e.ErrorMessage
-	default:
-		// this should not happen
-		return "something went wrong, please see the ollama server logs for details"
-	}
-}
--- a/api/types.go
+++ b/api/types.go
@@ -12,6 +12,27 @@ import (
 	"time"
 )

+// StatusError is an error with an HTTP status code and message.
+type StatusError struct {
+	StatusCode   int
+	Status       string
+	ErrorMessage string `json:"error"`
+}
+
+func (e StatusError) Error() string {
+	switch {
+	case e.Status != "" && e.ErrorMessage != "":
+		return fmt.Sprintf("%s: %s", e.Status, e.ErrorMessage)
+	case e.Status != "":
+		return e.Status
+	case e.ErrorMessage != "":
+		return e.ErrorMessage
+	default:
+		// this should not happen
+		return "something went wrong, please see the ollama server logs for details"
+	}
+}
+
 // ImageData represents the raw binary data of an image file.
 type ImageData []byte

--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -34,7 +34,6 @@ import (
 	"golang.org/x/term"

 	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/auth"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/parser"
@@ -514,24 +513,6 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 	return generate(cmd, opts)
 }

-func localPubKeys() ([]string, error) {
-	usrKey, err := auth.GetPublicKey()
-	if err != nil {
-		return nil, err
-	}
-
-	keys := []string{usrKey}
-
-	if runtime.GOOS == "linux" {
-		// try the ollama service public key if on Linux
-		if svcKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub"); err == nil {
-			keys = append(keys, strings.TrimSpace(string(svcKey)))
-		}
-	}
-
-	return keys, nil
-}
-
 func PushHandler(cmd *cobra.Command, args []string) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
@@ -580,29 +561,21 @@ func PushHandler(cmd *cobra.Command, args []string) error {
 	request := api.PushRequest{Name: args[0], Insecure: insecure}

 	n := model.ParseName(args[0])
-	isOllamaHost := strings.HasSuffix(n.Host, ".ollama.ai") || strings.HasSuffix(n.Host, ".ollama.com")
 	if err := client.Push(cmd.Context(), &request, fn); err != nil {
 		if spinner != nil {
 			spinner.Stop()
 		}
-		var ke api.ErrUnknownOllamaKey
-		if errors.As(err, &ke) && isOllamaHost {
-			// the user has not added their ollama key to ollama.com
-			// return an error with a more user-friendly message
-			locals, _ := localPubKeys()
-			return errors.New(ke.FormatUserMessage(locals))
-		}
 		if strings.Contains(err.Error(), "access denied") {
 			return errors.New("you are not authorized to push to this namespace, create the model under a namespace you own")
 		}
-		return fmt.Errorf("yoyoyo: %w", err)
+		return err
 	}

 	p.Stop()
 	spinner.Stop()

 	destination := n.String()
-	if isOllamaHost {
+	if strings.HasSuffix(n.Host, ".ollama.ai") || strings.HasSuffix(n.Host, ".ollama.com") {
 		destination = "https://ollama.com/" + strings.TrimSuffix(n.DisplayShortest(), ":latest")
 	}
 	fmt.Printf("\nYou can find your model at:\n\n")
--- a/cmd/cmd_test.go
+++ b/cmd/cmd_test.go
@@ -373,13 +373,15 @@ func TestGetModelfileName(t *testing.T) {

 func TestPushHandler(t *testing.T) {
 	tests := []struct {
+		name           string
 		modelName      string
 		serverResponse map[string]func(w http.ResponseWriter, r *http.Request)
 		expectedError  string
 		expectedOutput string
 	}{
 		{
-			modelName: "successful-push",
+			name:      "successful push",
+			modelName: "test-model",
 			serverResponse: map[string]func(w http.ResponseWriter, r *http.Request){
 				"/api/push": func(w http.ResponseWriter, r *http.Request) {
 					if r.Method != http.MethodPost {
@@ -392,8 +394,8 @@ func TestPushHandler(t *testing.T) {
 						return
 					}

-					if req.Name != "successful-push" {
-						t.Errorf("expected model name 'successful-push', got %s", req.Name)
+					if req.Name != "test-model" {
+						t.Errorf("expected model name 'test-model', got %s", req.Name)
 					}

 					// Simulate progress updates
@@ -412,10 +414,11 @@ func TestPushHandler(t *testing.T) {
 					}
 				},
 			},
-			expectedOutput: "\nYou can find your model at:\n\n\thttps://ollama.com/successful-push\n",
+			expectedOutput: "\nYou can find your model at:\n\n\thttps://ollama.com/test-model\n",
 		},
 		{
-			modelName: "unauthorized-push",
+			name:      "unauthorized push",
+			modelName: "unauthorized-model",
 			serverResponse: map[string]func(w http.ResponseWriter, r *http.Request){
 				"/api/push": func(w http.ResponseWriter, r *http.Request) {
 					w.Header().Set("Content-Type", "application/json")
@@ -430,29 +433,10 @@ func TestPushHandler(t *testing.T) {
 			},
 			expectedError: "you are not authorized to push to this namespace, create the model under a namespace you own",
 		},
-		{
-			modelName: "unknown-key-err",
-			serverResponse: map[string]func(w http.ResponseWriter, r *http.Request){
-				"/api/push": func(w http.ResponseWriter, r *http.Request) {
-					w.Header().Set("Content-Type", "application/json")
-					w.WriteHeader(http.StatusUnauthorized)
-					uerr := api.ErrUnknownOllamaKey{
-						Key: "aaa",
-					}
-					err := json.NewEncoder(w).Encode(map[string]string{
-						"error": uerr.Error(),
-					})
-					if err != nil {
-						t.Fatal(err)
-					}
-				},
-			},
-			expectedError: "unauthorized: unknown ollama key \"aaa\"",
-		},
 	}

 	for _, tt := range tests {
-		t.Run(tt.modelName, func(t *testing.T) {
+		t.Run(tt.name, func(t *testing.T) {
 			mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 				if handler, ok := tt.serverResponse[r.URL.Path]; ok {
 					handler(w, r)
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -19,6 +19,7 @@ import (
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/readline"
+	"github.com/ollama/ollama/types/errtypes"
 )

 type MultilineState int
@@ -219,7 +220,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 			fn := func(resp api.ProgressResponse) error { return nil }
 			err = client.Create(cmd.Context(), req, fn)
 			if err != nil {
-				if strings.Contains(err.Error(), api.InvalidModelNameErrMsg) {
+				if strings.Contains(err.Error(), errtypes.InvalidModelNameErrMsg) {
 					fmt.Printf("error: The model name '%s' is invalid\n", args[1])
 					continue
 				}
--- a/llama/runner/runner.go
+++ b/llama/runner/runner.go
@@ -833,10 +833,21 @@ func (s *Server) health(w http.ResponseWriter, r *http.Request) {
 	}
 }

+type multiLPath []string
+
+func (m *multiLPath) Set(value string) error {
+	*m = append(*m, value)
+	return nil
+}
+
+func (m *multiLPath) String() string {
+	return strings.Join(*m, ", ")
+}
+
 func (s *Server) loadModel(
 	params llama.ModelParams,
 	mpath string,
-	lpath string,
+	lpath multiLPath,
 	ppath string,
 	kvSize int,
 	flashAttention bool,
@@ -857,10 +868,12 @@ func (s *Server) loadModel(
 		panic(err)
 	}

-	if lpath != "" {
-		err := s.model.ApplyLoraFromFile(s.lc, lpath, 1.0, threads)
-		if err != nil {
-			panic(err)
+	if lpath.String() != "" {
+		for _, path := range lpath {
+			err := s.model.ApplyLoraFromFile(s.lc, path, 1.0, threads)
+			if err != nil {
+				panic(err)
+			}
 		}
 	}

@@ -890,7 +903,6 @@ func main() {
 	mainGpu := flag.Int("main-gpu", 0, "Main GPU")
 	flashAttention := flag.Bool("flash-attn", false, "Enable flash attention")
 	kvSize := flag.Int("ctx-size", 2048, "Context (or KV cache) size")
-	lpath := flag.String("lora", "", "Path to lora layer file")
 	port := flag.Int("port", 8080, "Port to expose the server on")
 	threads := flag.Int("threads", runtime.NumCPU(), "Number of threads to use during generation")
 	verbose := flag.Bool("verbose", false, "verbose output (default: disabled)")
@@ -900,6 +912,9 @@ func main() {
 	multiUserCache := flag.Bool("multiuser-cache", false, "optimize input cache algorithm for multiple users")
 	requirements := flag.Bool("requirements", false, "print json requirement information")

+	var lpaths multiLPath
+	flag.Var(&lpaths, "lora", "Path to lora layer file (can be specified multiple times)")
+
 	flag.Parse()
 	if *requirements {
 		printRequirements(os.Stdout)
@@ -946,7 +961,7 @@ func main() {
 	params := llama.ModelParams{
 		NumGpuLayers: *nGpuLayers,
 		MainGpu:      *mainGpu,
-		UseMmap:      !*noMmap && *lpath == "",
+		UseMmap:      !*noMmap && lpaths.String() == "",
 		UseMlock:     *mlock,
 		TensorSplit:  tensorSplitFloats,
 		Progress: func(progress float32) {
@@ -955,7 +970,7 @@ func main() {
 	}

 	server.ready.Add(1)
-	go server.loadModel(params, *mpath, *lpath, *ppath, *kvSize, *flashAttention, *threads, *multiUserCache)
+	go server.loadModel(params, *mpath, lpaths, *ppath, *kvSize, *flashAttention, *threads, *multiUserCache)

 	server.cond = sync.NewCond(&server.mu)

--- a/llm/server.go
+++ b/llm/server.go
@@ -144,10 +144,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
 	// Loop through potential servers
 	finalErr := errors.New("no suitable llama servers found")

-	if len(adapters) > 1 {
-		return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
-	}
-
 	rDir, err := runners.Refresh(build.EmbedFS)
 	if err != nil {
 		return nil, err
@@ -201,8 +197,9 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
 	}

 	if len(adapters) > 0 {
-		// TODO: applying multiple adapters is not supported by the llama.cpp server yet
-		params = append(params, "--lora", adapters[0])
+		for _, adapter := range adapters {
+			params = append(params, "--lora", adapter)
+		}
 	}

 	if len(projectors) > 0 {
--- a/openai/openai.go
+++ b/openai/openai.go
@@ -200,9 +200,9 @@ func toolCallId() string {
 	return "call_" + strings.ToLower(string(b))
 }

-func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
-	toolCalls := make([]ToolCall, len(r.Message.ToolCalls))
-	for i, tc := range r.Message.ToolCalls {
+func toToolCalls(tc []api.ToolCall) []ToolCall {
+	toolCalls := make([]ToolCall, len(tc))
+	for i, tc := range tc {
 		toolCalls[i].ID = toolCallId()
 		toolCalls[i].Type = "function"
 		toolCalls[i].Function.Name = tc.Function.Name
@@ -215,7 +215,11 @@ func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {

 		toolCalls[i].Function.Arguments = string(args)
 	}
+	return toolCalls
+}

+func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
+	toolCalls := toToolCalls(r.Message.ToolCalls)
 	return ChatCompletion{
 		Id:                id,
 		Object:            "chat.completion",
@@ -244,6 +248,7 @@ func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
 }

 func toChunk(id string, r api.ChatResponse) ChatCompletionChunk {
+	toolCalls := toToolCalls(r.Message.ToolCalls)
 	return ChatCompletionChunk{
 		Id:                id,
 		Object:            "chat.completion.chunk",
@@ -252,7 +257,7 @@ func toChunk(id string, r api.ChatResponse) ChatCompletionChunk {
 		SystemFingerprint: "fp_ollama",
 		Choices: []ChunkChoice{{
 			Index: 0,
-			Delta: Message{Role: "assistant", Content: r.Message.Content},
+			Delta: Message{Role: "assistant", Content: r.Message.Content, ToolCalls: toolCalls},
 			FinishReason: func(reason string) *string {
 				if len(reason) > 0 {
 					return &reason
--- a/server/images.go
+++ b/server/images.go
@@ -23,7 +23,6 @@ import (
 	"strings"

 	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/auth"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/llama"
@@ -31,7 +30,6 @@ import (
 	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/template"
 	"github.com/ollama/ollama/types/model"
-	"github.com/ollama/ollama/types/registry"
 	"github.com/ollama/ollama/version"
 )

@@ -982,6 +980,8 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
 	return fmt.Sprintf("sha256:%x", h.Sum(nil)), n
 }

+var errUnauthorized = errors.New("unauthorized: access denied")
+
 func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *registryOptions) (*http.Response, error) {
 	for range 2 {
 		resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
@@ -1019,33 +1019,13 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
 			if err != nil {
 				return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
 			}
-
-			var re registry.Errs
-			if err := json.Unmarshal(responseBody, &re); err == nil && len(re.Errors) > 0 {
-				if re.HasCode(registry.ErrCodeAnonymous) {
-					// if the error is due to anonymous access return a custom error
-					// this error is used by the CLI to direct a user to add their key to an account
-					pubKey, nestedErr := auth.GetPublicKey()
-					if nestedErr != nil {
-						slog.Error(fmt.Sprintf("couldn't get public key: %v", nestedErr))
-						return nil, re
-					}
-					return nil, api.ErrUnknownOllamaKey{
-						Key: pubKey,
-					}
-				}
-				return nil, re
-			}
-
-			// Fallback to returning the raw response if parsing fails
 			return nil, fmt.Errorf("%d: %s", resp.StatusCode, responseBody)
 		default:
 			return resp, nil
 		}
 	}

-	// should never be reached
-	return nil, fmt.Errorf("failed to make upload request")
+	return nil, errUnauthorized
 }

 // testMakeRequestDialContext specifies the dial function for the http client in
--- a/server/model_test.go
+++ b/server/model_test.go
@@ -39,6 +39,7 @@ func TestExecuteWithTools(t *testing.T) {
 		{"mistral", `[TOOL_CALLS]  [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]

 The temperature in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.`, true},
+		{"mistral", `[TOOL_CALLS]  [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"To }]`, false},
 		{"mistral", `I'm not aware of that information. However, I can suggest searching for the weather using the "get_current_weather" function:

 		[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`, true},
--- a/server/routes.go
+++ b/server/routes.go
@@ -36,6 +36,7 @@ import (
 	"github.com/ollama/ollama/runners"
 	"github.com/ollama/ollama/server/imageproc"
 	"github.com/ollama/ollama/template"
+	"github.com/ollama/ollama/types/errtypes"
 	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
 )
@@ -609,7 +610,7 @@ func (s *Server) PushHandler(c *gin.Context) {
 		defer cancel()

 		if err := PushModel(ctx, model, regOpts, fn); err != nil {
-			ch <- newErr(err)
+			ch <- gin.H{"error": err.Error()}
 		}
 	}()

@@ -649,7 +650,7 @@ func (s *Server) CreateHandler(c *gin.Context) {

 	name := model.ParseName(cmp.Or(r.Model, r.Name))
 	if !name.IsValid() {
-		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": api.InvalidModelNameErrMsg})
+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg})
 		return
 	}

@@ -1457,6 +1458,7 @@ func (s *Server) ChatHandler(c *gin.Context) {

 	prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, msgs, req.Tools)
 	if err != nil {
+		slog.Error("chat prompt error", "error", err)
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
@@ -1466,6 +1468,8 @@ func (s *Server) ChatHandler(c *gin.Context) {
 	ch := make(chan any)
 	go func() {
 		defer close(ch)
+		var sb strings.Builder
+		var hasToolCalls bool
 		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
 			Prompt:  prompt,
 			Images:  images,
@@ -1491,7 +1495,34 @@ func (s *Server) ChatHandler(c *gin.Context) {
 				res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
 			}

-			ch <- res
+			// TODO: tool call checking and filtering should be moved outside of this callback once streaming
+			// however this was a simple change for now without reworking streaming logic of this (and other)
+			// handlers
+			if req.Stream != nil && !*req.Stream || len(req.Tools) == 0 {
+				ch <- res
+				return
+			}
+
+			// Streaming tool calls:
+			// If tools are recognized, use a flag to track the sending of a tool downstream
+			// This ensures that content is cleared from the message on the last chunk sent
+			sb.WriteString(r.Content)
+			if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
+				res.Message.ToolCalls = toolCalls
+				res.Message.Content = ""
+				sb.Reset()
+				hasToolCalls = true
+				ch <- res
+				return
+			}
+
+			if r.Done {
+				// Send any remaining content if no tool calls were detected
+				if !hasToolCalls {
+					res.Message.Content = sb.String()
+				}
+				ch <- res
+			}
 		}); err != nil {
 			ch <- gin.H{"error": err.Error()}
 		}
@@ -1549,24 +1580,3 @@ func handleScheduleError(c *gin.Context, name string, err error) {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 	}
 }
-
-// newErr creates a structured API ErrorResponse from an existing error
-func newErr(err error) api.ErrorResponse {
-	if err == nil {
-		return api.ErrorResponse{}
-	}
-	// Default to just returning the generic error message
-	resp := api.ErrorResponse{
-		Code:    api.ErrCodeGeneral,
-		Message: err.Error(),
-	}
-	// Add additional error specific data, if any
-	var keyErr api.ErrUnknownOllamaKey
-	if errors.As(err, &keyErr) {
-		resp.Code = api.ErrCodeUnknownKey
-		resp.Data = map[string]any{
-			"key": keyErr.Key,
-		}
-	}
-	return resp
-}
--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@@ -8,6 +8,7 @@ import (
 	"io"
 	"net/http"
 	"strings"
+	"sync"
 	"testing"
 	"time"

@@ -25,10 +26,14 @@ type mockRunner struct {
 	// CompletionRequest is only valid until the next call to Completion
 	llm.CompletionRequest
 	llm.CompletionResponse
+	CompletionFn func(context.Context, llm.CompletionRequest, func(llm.CompletionResponse)) error
 }

-func (m *mockRunner) Completion(_ context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
+func (m *mockRunner) Completion(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
 	m.CompletionRequest = r
+	if m.CompletionFn != nil {
+		return m.CompletionFn(ctx, r, fn)
+	}
 	fn(m.CompletionResponse)
 	return nil
 }
@@ -88,9 +93,14 @@ func TestGenerateChat(t *testing.T) {
 		Model: "test",
 		Modelfile: fmt.Sprintf(`FROM %s
 		TEMPLATE """
-{{- if .System }}System: {{ .System }} {{ end }}
-{{- if .Prompt }}User: {{ .Prompt }} {{ end }}
-{{- if .Response }}Assistant: {{ .Response }} {{ end }}"""
+{{- if .Tools }}
+{{ .Tools }}
+{{ end }}
+{{- range .Messages }}
+{{- .Role }}: {{ .Content }}
+{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
+{{- end }}
+{{ end }}"""
 `, createBinFile(t, llm.KV{
 			"general.architecture":          "llama",
 			"llama.block_count":             uint32(1),
@@ -263,7 +273,7 @@ func TestGenerateChat(t *testing.T) {
 			t.Errorf("expected status 200, got %d", w.Code)
 		}

-		if diff := cmp.Diff(mock.CompletionRequest.Prompt, "User: Hello! "); diff != "" {
+		if diff := cmp.Diff(mock.CompletionRequest.Prompt, "user: Hello!\n"); diff != "" {
 			t.Errorf("mismatch (-got +want):\n%s", diff)
 		}

@@ -292,7 +302,7 @@ func TestGenerateChat(t *testing.T) {
 			t.Errorf("expected status 200, got %d", w.Code)
 		}

-		if diff := cmp.Diff(mock.CompletionRequest.Prompt, "System: You are a helpful assistant. User: Hello! "); diff != "" {
+		if diff := cmp.Diff(mock.CompletionRequest.Prompt, "system: You are a helpful assistant.\nuser: Hello!\n"); diff != "" {
 			t.Errorf("mismatch (-got +want):\n%s", diff)
 		}

@@ -314,7 +324,7 @@ func TestGenerateChat(t *testing.T) {
 			t.Errorf("expected status 200, got %d", w.Code)
 		}

-		if diff := cmp.Diff(mock.CompletionRequest.Prompt, "System: You can perform magic tricks. User: Hello! "); diff != "" {
+		if diff := cmp.Diff(mock.CompletionRequest.Prompt, "system: You can perform magic tricks.\nuser: Hello!\n"); diff != "" {
 			t.Errorf("mismatch (-got +want):\n%s", diff)
 		}

@@ -337,12 +347,242 @@ func TestGenerateChat(t *testing.T) {
 			t.Errorf("expected status 200, got %d", w.Code)
 		}

-		if diff := cmp.Diff(mock.CompletionRequest.Prompt, "System: You are a helpful assistant. User: Hello! Assistant: I can help you with that. System: You can perform magic tricks. User: Help me write tests. "); diff != "" {
+		if diff := cmp.Diff(mock.CompletionRequest.Prompt, "system: You are a helpful assistant.\nuser: Hello!\nassistant: I can help you with that.\nsystem: You can perform magic tricks.\nuser: Help me write tests.\n"); diff != "" {
 			t.Errorf("mismatch (-got +want):\n%s", diff)
 		}

 		checkChatResponse(t, w.Body, "test-system", "Abra kadabra!")
 	})
+
+	t.Run("messages with tools (non-streaming)", func(t *testing.T) {
+		if w.Code != http.StatusOK {
+			t.Fatalf("failed to create test-system model: %d", w.Code)
+		}
+
+		tools := []api.Tool{
+			{
+				Type: "function",
+				Function: api.ToolFunction{
+					Name:        "get_weather",
+					Description: "Get the current weather",
+					Parameters: struct {
+						Type       string   `json:"type"`
+						Required   []string `json:"required"`
+						Properties map[string]struct {
+							Type        string   `json:"type"`
+							Description string   `json:"description"`
+							Enum        []string `json:"enum,omitempty"`
+						} `json:"properties"`
+					}{
+						Type:     "object",
+						Required: []string{"location"},
+						Properties: map[string]struct {
+							Type        string   `json:"type"`
+							Description string   `json:"description"`
+							Enum        []string `json:"enum,omitempty"`
+						}{
+							"location": {
+								Type:        "string",
+								Description: "The city and state",
+							},
+							"unit": {
+								Type: "string",
+								Enum: []string{"celsius", "fahrenheit"},
+							},
+						},
+					},
+				},
+			},
+		}
+
+		mock.CompletionResponse = llm.CompletionResponse{
+			Content:            `{"name":"get_weather","arguments":{"location":"Seattle, WA","unit":"celsius"}}`,
+			Done:               true,
+			DoneReason:         "done",
+			PromptEvalCount:    1,
+			PromptEvalDuration: 1,
+			EvalCount:          1,
+			EvalDuration:       1,
+		}
+
+		streamRequest := true
+
+		w := createRequest(t, s.ChatHandler, api.ChatRequest{
+			Model: "test-system",
+			Messages: []api.Message{
+				{Role: "user", Content: "What's the weather in Seattle?"},
+			},
+			Tools:  tools,
+			Stream: &streamRequest,
+		})
+
+		if w.Code != http.StatusOK {
+			var errResp struct {
+				Error string `json:"error"`
+			}
+			if err := json.NewDecoder(w.Body).Decode(&errResp); err != nil {
+				t.Logf("Failed to decode error response: %v", err)
+			} else {
+				t.Logf("Error response: %s", errResp.Error)
+			}
+		}
+
+		if w.Code != http.StatusOK {
+			t.Errorf("expected status 200, got %d", w.Code)
+		}
+
+		var resp api.ChatResponse
+		if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
+			t.Fatal(err)
+		}
+
+		if resp.Message.ToolCalls == nil {
+			t.Error("expected tool calls, got nil")
+		}
+
+		expectedToolCall := api.ToolCall{
+			Function: api.ToolCallFunction{
+				Name: "get_weather",
+				Arguments: api.ToolCallFunctionArguments{
+					"location": "Seattle, WA",
+					"unit":     "celsius",
+				},
+			},
+		}
+
+		if diff := cmp.Diff(resp.Message.ToolCalls[0], expectedToolCall); diff != "" {
+			t.Errorf("tool call mismatch (-got +want):\n%s", diff)
+		}
+	})
+
+	t.Run("messages with tools (streaming)", func(t *testing.T) {
+		tools := []api.Tool{
+			{
+				Type: "function",
+				Function: api.ToolFunction{
+					Name:        "get_weather",
+					Description: "Get the current weather",
+					Parameters: struct {
+						Type       string   `json:"type"`
+						Required   []string `json:"required"`
+						Properties map[string]struct {
+							Type        string   `json:"type"`
+							Description string   `json:"description"`
+							Enum        []string `json:"enum,omitempty"`
+						} `json:"properties"`
+					}{
+						Type:     "object",
+						Required: []string{"location"},
+						Properties: map[string]struct {
+							Type        string   `json:"type"`
+							Description string   `json:"description"`
+							Enum        []string `json:"enum,omitempty"`
+						}{
+							"location": {
+								Type:        "string",
+								Description: "The city and state",
+							},
+							"unit": {
+								Type: "string",
+								Enum: []string{"celsius", "fahrenheit"},
+							},
+						},
+					},
+				},
+			},
+		}
+
+		// Simulate streaming response with multiple chunks
+		var wg sync.WaitGroup
+		wg.Add(1)
+
+		mock.CompletionFn = func(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
+			defer wg.Done()
+
+			// Send chunks with small delays to simulate streaming
+			responses := []llm.CompletionResponse{
+				{
+					Content:            `{"name":"get_`,
+					Done:               false,
+					PromptEvalCount:    1,
+					PromptEvalDuration: 1,
+				},
+				{
+					Content:            `weather","arguments":{"location":"Seattle`,
+					Done:               false,
+					PromptEvalCount:    2,
+					PromptEvalDuration: 1,
+				},
+				{
+					Content:            `, WA","unit":"celsius"}}`,
+					Done:               true,
+					DoneReason:         "tool_call",
+					PromptEvalCount:    3,
+					PromptEvalDuration: 1,
+				},
+			}
+
+			for _, resp := range responses {
+				select {
+				case <-ctx.Done():
+					return ctx.Err()
+				default:
+					fn(resp)
+					time.Sleep(10 * time.Millisecond) // Small delay between chunks
+				}
+			}
+			return nil
+		}
+
+		w := createRequest(t, s.ChatHandler, api.ChatRequest{
+			Model: "test-system",
+			Messages: []api.Message{
+				{Role: "user", Content: "What's the weather in Seattle?"},
+			},
+			Tools:  tools,
+			Stream: &stream,
+		})
+
+		wg.Wait()
+
+		if w.Code != http.StatusOK {
+			t.Errorf("expected status 200, got %d", w.Code)
+		}
+
+		// Read and validate the streamed responses
+		decoder := json.NewDecoder(w.Body)
+		var finalToolCall api.ToolCall
+
+		for {
+			var resp api.ChatResponse
+			if err := decoder.Decode(&resp); err == io.EOF {
+				break
+			} else if err != nil {
+				t.Fatal(err)
+			}
+
+			if resp.Done {
+				if len(resp.Message.ToolCalls) != 1 {
+					t.Errorf("expected 1 tool call in final response, got %d", len(resp.Message.ToolCalls))
+				}
+				finalToolCall = resp.Message.ToolCalls[0]
+			}
+		}
+
+		expectedToolCall := api.ToolCall{
+			Function: api.ToolCallFunction{
+				Name: "get_weather",
+				Arguments: api.ToolCallFunctionArguments{
+					"location": "Seattle, WA",
+					"unit":     "celsius",
+				},
+			},
+		}
+
+		if diff := cmp.Diff(finalToolCall, expectedToolCall); diff != "" {
+			t.Errorf("final tool call mismatch (-got +want):\n%s", diff)
+		}
+	})
 }

 func TestGenerate(t *testing.T) {
--- a/types/errtypes/errtypes.go
+++ b/types/errtypes/errtypes.go
@@ -0,0 +1,21 @@
+// Package errtypes contains custom error types
+package errtypes
+
+import (
+	"fmt"
+	"strings"
+)
+
+const (
+	UnknownOllamaKeyErrMsg = "unknown ollama key"
+	InvalidModelNameErrMsg = "invalid model name"
+)
+
+// TODO: This should have a structured response from the API
+type UnknownOllamaKey struct {
+	Key string
+}
+
+func (e *UnknownOllamaKey) Error() string {
+	return fmt.Sprintf("unauthorized: %s %q", UnknownOllamaKeyErrMsg, strings.TrimSpace(e.Key))
+}
--- a/types/registry/error.go
+++ b/types/registry/error.go
@@ -1,37 +0,0 @@
-package registry
-
-import (
-	"fmt"
-	"slices"
-	"strings"
-)
-
-const ErrCodeAnonymous = "ANONYMOUS_ACCESS_DENIED"
-
-type Err struct {
-	Code    string `json:"code"`
-	Message string `json:"message"`
-}
-
-// Errs represents the structure of error responses from the registry
-// TODO (brucemacd): this struct should be imported from some shared package that is used between the registry and ollama
-type Errs struct {
-	Errors []Err `json:"errors"`
-}
-
-func (e Errs) Error() string {
-	if len(e.Errors) == 0 {
-		return "unknown registry error"
-	}
-	var msgs []string
-	for _, err := range e.Errors {
-		msgs = append(msgs, fmt.Sprintf("%s: %s", err.Code, err.Message))
-	}
-	return strings.Join(msgs, "; ")
-}
-
-func (e Errs) HasCode(code string) bool {
-	return slices.ContainsFunc(e.Errors, func(err Err) bool {
-		return err.Code == code
-	})
-}
Author	SHA1	Message	Date
Parth Sareen	ce7455a8e1	api: enable tool streaming (#7836 )	2024-11-27 13:40:57 -08:00
ItzCrazyKns	e3936d4fb3	Support Multiple LoRa Adapters (#7667 ) Closes #7627	2024-11-27 11:00:04 -08:00