diff --git a/api/types.go b/api/types.go index 39b1c1d01..6f07f159f 100644 --- a/api/types.go +++ b/api/types.go @@ -126,12 +126,6 @@ type GenerateRequest struct { // each with an associated log probability. Only applies when Logprobs is true. // Valid values are 0-20. Default is 0 (only return the selected token's logprob). TopLogprobs int `json:"top_logprobs,omitempty"` - - // Tools is a list of tools the model may call. - Tools []Tool `json:"tools,omitempty"` - - // MCPServers specifies MCP servers to use for tool functionality - MCPServers []MCPServerConfig `json:"mcp_servers,omitempty"` } // ChatRequest describes a request sent by [Client.Chat]. @@ -250,9 +244,10 @@ type ToolCall struct { } type ToolResult struct { - ToolName string `json:"tool_name"` - Content string `json:"content"` - Error string `json:"error,omitempty"` + ToolName string `json:"tool_name"` + Arguments ToolCallFunctionArguments `json:"arguments,omitempty"` + Content string `json:"content"` + Error string `json:"error,omitempty"` } type ToolCallFunction struct { @@ -857,9 +852,6 @@ type GenerateResponse struct { Metrics - ToolCalls []ToolCall `json:"tool_calls,omitempty"` - ToolResults []ToolResult `json:"tool_results,omitempty"` - DebugInfo *DebugInfo `json:"_debug_info,omitempty"` // Logprobs contains log probability information for the generated tokens, diff --git a/cmd/cmd.go b/cmd/cmd.go index 35074ad2b..01c9348c3 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -22,6 +22,7 @@ import ( "sort" "strconv" "strings" + "sync" "sync/atomic" "syscall" "time" @@ -49,6 +50,13 @@ import ( const ConnectInstructions = "To sign in, navigate to:\n %s\n\n" +// Tool detection and buffering configuration +const ( + DefaultToolBufferDelay = 500 * time.Millisecond + MinToolBufferDelay = 100 * time.Millisecond + MaxToolBufferDelay = 2 * time.Second +) + // ensureThinkingSupport emits a warning if the model does not advertise thinking support func ensureThinkingSupport(ctx context.Context, client *api.Client, name string) { if name == "" { @@ -416,6 +424,41 @@ func RunHandler(cmd *cobra.Command, args []string) error { opts.KeepAlive = &api.Duration{Duration: d} } + toolsSpec, err := cmd.Flags().GetString("tools") + if err != nil { + return err + } + if toolsSpec != "" { + mcpServers, toolsPath, err := server.GetMCPServersForTools(toolsSpec) + if err != nil { + // If definitions fail to load, fall back to basic filesystem support + fmt.Fprintf(os.Stderr, "Warning: Failed to load MCP definitions: %v\n", err) + mcpServers = []api.MCPServerConfig{ + { + Name: "filesystem", + Command: "npx", + Args: []string{"-y", "@modelcontextprotocol/server-filesystem", toolsPath}, + }, + } + } + + if len(mcpServers) == 0 { + fmt.Fprintf(os.Stderr, "Warning: No MCP servers matched for --tools context\n") + } else { + // Log what servers are being enabled + serverNames := make([]string, 0, len(mcpServers)) + for _, srv := range mcpServers { + serverNames = append(serverNames, srv.Name) + } + fmt.Fprintf(os.Stderr, "Enabling MCP servers: %s\n", strings.Join(serverNames, ", ")) + if toolsPath != "" { + fmt.Fprintf(os.Stderr, "Tools path: %s\n", toolsPath) + } + } + + opts.MCPServers = mcpServers + } + prompts := args[1:] // prepend stdin to the prompt if provided if !term.IsTerminal(int(os.Stdin.Fd())) { @@ -1189,6 +1232,7 @@ type runOptions struct { Think *api.ThinkValue HideThinking bool ShowConnect bool + MCPServers []api.MCPServerConfig } func (r runOptions) Copy() runOptions { @@ -1218,6 +1262,12 @@ func (r runOptions) Copy() runOptions { think = &cThink } + var mcpServers []api.MCPServerConfig + if r.MCPServers != nil { + mcpServers = make([]api.MCPServerConfig, len(r.MCPServers)) + copy(mcpServers, r.MCPServers) + } + return runOptions{ Model: r.Model, ParentModel: r.ParentModel, @@ -1233,6 +1283,7 @@ func (r runOptions) Copy() runOptions { Think: think, HideThinking: r.HideThinking, ShowConnect: r.ShowConnect, + MCPServers: mcpServers, } } @@ -1241,6 +1292,237 @@ type displayResponseState struct { wordBuffer string } +// StreamingToolDetector maintains state for detecting tool calls across streaming chunks +type StreamingToolDetector struct { + inXMLToolCall bool + xmlStartBuffer strings.Builder + inJSONToolCall bool + jsonBuffer strings.Builder + jsonDepth int + inString bool + escapeNext bool + // tailBuffer holds potential partial tag matches from end of previous chunk + tailBuffer string +} + +// NewStreamingToolDetector creates a new stateful tool detector +func NewStreamingToolDetector() *StreamingToolDetector { + return &StreamingToolDetector{} +} + +// maxTagLength is the longest tag we need to detect across chunk boundaries +const maxTagLength = 12 // len("") + +// Process handles a chunk of streaming content and separates tool calls from regular content +func (s *StreamingToolDetector) Process(chunk string) (displayContent string, hasIncompleteToolCall bool) { + // Prepend any buffered tail from previous chunk + if s.tailBuffer != "" { + chunk = s.tailBuffer + chunk + s.tailBuffer = "" + } + + var result strings.Builder + + for i := 0; i < len(chunk); i++ { + ch := chunk[i] + + // Check if we're near the end and might have a partial tag + // Buffer potential partial matches for next chunk + remainingLen := len(chunk) - i + if !s.inXMLToolCall && !s.inJSONToolCall && remainingLen < maxTagLength { + // Check if remaining content could be start of a tag + remaining := chunk[i:] + if couldBePartialTag(remaining) { + s.tailBuffer = remaining + break // Stop processing, buffer the rest + } + } + + // Handle XML tool calls + if !s.inXMLToolCall && i+11 <= len(chunk) && chunk[i:i+11] == "" { + s.inXMLToolCall = true + s.xmlStartBuffer.Reset() + s.xmlStartBuffer.WriteString("") + i += 10 // Skip past "" + continue + } + + if s.inXMLToolCall { + s.xmlStartBuffer.WriteByte(ch) + if i+12 <= len(chunk) && chunk[i:i+12] == "" { + // Complete XML tool call - skip it entirely + s.inXMLToolCall = false + s.xmlStartBuffer.Reset() + i += 11 // Skip past "" + continue + } + continue + } + + // Handle JSON tool calls + if !s.inJSONToolCall && !s.inXMLToolCall { + // Look for start of JSON tool call pattern + if i+8 <= len(chunk) && chunk[i:i+8] == `{"name":` { + // Check if "arguments" appears nearby (tool call signature) + lookahead := chunk[i:] + if len(lookahead) > 200 { + lookahead = lookahead[:200] + } + if strings.Contains(lookahead, `"arguments":`) { + s.inJSONToolCall = true + s.jsonBuffer.Reset() + s.jsonBuffer.WriteByte(ch) + s.jsonDepth = 1 + s.inString = false + s.escapeNext = false + continue + } + } + } + + if s.inJSONToolCall { + s.jsonBuffer.WriteByte(ch) + + // Track JSON structure to find the end + if s.escapeNext { + s.escapeNext = false + continue + } + + if ch == '\\' && s.inString { + s.escapeNext = true + continue + } + + if ch == '"' && !s.escapeNext { + s.inString = !s.inString + continue + } + + if !s.inString { + if ch == '{' { + s.jsonDepth++ + } else if ch == '}' { + s.jsonDepth-- + if s.jsonDepth == 0 { + // Complete JSON tool call - skip it + s.inJSONToolCall = false + s.jsonBuffer.Reset() + continue + } + } + } + continue + } + + // Regular content + result.WriteByte(ch) + } + + // Check if we have incomplete tool calls or buffered tail that need buffering + hasIncompleteToolCall = s.inXMLToolCall || s.inJSONToolCall || s.tailBuffer != "" + + return result.String(), hasIncompleteToolCall +} + +// couldBePartialTag checks if a string could be the start of a tool call tag +// Only returns true for patterns that are specific enough to likely be tool calls +func couldBePartialTag(s string) bool { + // Require at least 2 chars to avoid false positives on common single chars like < or { + if len(s) < 2 { + return false + } + + // Check for partial XML tags - must start with " MaxToolBufferDelay { + return MaxToolBufferDelay + } + return delay + } + } + return DefaultToolBufferDelay +} + func displayResponse(content string, wordWrap bool, state *displayResponseState) { termWidth, _, _ := term.GetSize(int(os.Stdout.Fd())) if wordWrap && termWidth >= 10 { @@ -1327,6 +1609,7 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) { sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, syscall.SIGINT) + defer signal.Stop(sigChan) go func() { <-sigChan @@ -1339,6 +1622,18 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) { var fullResponse strings.Builder var thinkTagOpened bool = false var thinkTagClosed bool = false + var toolCallsDisplayed bool = false + + // Streaming tool detector for better chunk handling + toolDetector := NewStreamingToolDetector() + + // Buffer for accumulating content before display + var contentBuffer strings.Builder + var bufferTimer *time.Timer + var bufferMutex sync.Mutex + + // Get configurable buffer delay + bufferDelay := getToolBufferDelay() role := "assistant" @@ -1370,20 +1665,84 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) { thinkTagClosed = true state = &displayResponseState{} } - // purposefully not putting thinking blocks in the response, which would - // only be needed if we later added tool calling to the cli (they get - // filtered out anyway since current models don't expect them unless you're - // about to finish some tool calls) + + // Use stateful tool detector for better streaming chunk handling + displayContent, hasIncompleteToolCall := toolDetector.Process(content) + + // Store full response for context fullResponse.WriteString(content) + // Buffer management based on tool detection + if hasIncompleteToolCall { + // We have an incomplete tool call - buffer the content + bufferMutex.Lock() + contentBuffer.WriteString(displayContent) + + // Cancel any existing timer + if bufferTimer != nil { + bufferTimer.Stop() + } + + // Set a new timer to flush the buffer after a delay + bufferTimer = time.AfterFunc(bufferDelay, func() { + bufferMutex.Lock() + defer bufferMutex.Unlock() + + bufferedContent := contentBuffer.String() + contentBuffer.Reset() + + // Reset tool detector state when flushing + toolDetector.Reset() + + // Only display if there's actual content after filtering + if strings.TrimSpace(bufferedContent) != "" { + displayResponse(bufferedContent, opts.WordWrap, state) + } + }) + bufferMutex.Unlock() + } else { + // No incomplete tool call - display immediately + if strings.TrimSpace(displayContent) != "" { + displayResponse(displayContent, opts.WordWrap, state) + } + } + + // Display tool calls cleanly if detected if response.Message.ToolCalls != nil { toolCalls := response.Message.ToolCalls - if len(toolCalls) > 0 { + if len(toolCalls) > 0 && !toolCallsDisplayed { + // Flush any buffered content before showing tool calls + bufferMutex.Lock() + if contentBuffer.Len() > 0 { + bufferedContent := contentBuffer.String() + contentBuffer.Reset() + if strings.TrimSpace(bufferedContent) != "" { + displayResponse(bufferedContent, opts.WordWrap, state) + } + } + if bufferTimer != nil { + bufferTimer.Stop() + bufferTimer = nil + } + bufferMutex.Unlock() + + // Add newline for clean separation + fmt.Println() fmt.Print(renderToolCalls(toolCalls, false)) + toolCallsDisplayed = true } } - displayResponse(content, opts.WordWrap, state) + // Display tool results if available + if response.Message.ToolResults != nil { + toolResults := response.Message.ToolResults + if len(toolResults) > 0 { + fmt.Print(renderToolResults(toolResults, false)) + fmt.Println() // New line after results + // Reset flag to allow next round's tool calls to be displayed + toolCallsDisplayed = false + } + } return nil } @@ -1393,11 +1752,12 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) { } req := &api.ChatRequest{ - Model: opts.Model, - Messages: opts.Messages, - Format: json.RawMessage(opts.Format), - Options: opts.Options, - Think: opts.Think, + Model: opts.Model, + Messages: opts.Messages, + Format: json.RawMessage(opts.Format), + Options: opts.Options, + Think: opts.Think, + MCPServers: opts.MCPServers, } if opts.KeepAlive != nil { @@ -1418,6 +1778,20 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) { } return nil, err } + + // Flush any remaining buffered content + bufferMutex.Lock() + if bufferTimer != nil { + bufferTimer.Stop() + } + if contentBuffer.Len() > 0 { + bufferedContent := contentBuffer.String() + contentBuffer.Reset() + if strings.TrimSpace(bufferedContent) != "" && !strings.Contains(bufferedContent, `{"name":`) { + displayResponse(bufferedContent, opts.WordWrap, state) + } + } + bufferMutex.Unlock() if len(opts.Messages) > 0 { fmt.Println() @@ -1437,6 +1811,11 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) { } func generate(cmd *cobra.Command, opts runOptions) error { + // Tools/MCP servers require interactive mode (Chat API) + if len(opts.MCPServers) > 0 { + return errors.New("--tools flag requires interactive mode; use 'ollama run --tools ' without piped input") + } + client, err := api.ClientFromEnvironment() if err != nil { return err @@ -1460,6 +1839,7 @@ func generate(cmd *cobra.Command, opts runOptions) error { sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, syscall.SIGINT) + defer signal.Stop(sigChan) go func() { <-sigChan @@ -1491,7 +1871,7 @@ func generate(cmd *cobra.Command, opts runOptions) error { displayResponse(response.Thinking, opts.WordWrap, state) } - if thinkTagOpened && !thinkTagClosed && (content != "" || len(response.ToolCalls) > 0) { + if thinkTagOpened && !thinkTagClosed && content != "" { if !strings.HasSuffix(thinkingContent.String(), "\n") { fmt.Println() } @@ -1503,13 +1883,6 @@ func generate(cmd *cobra.Command, opts runOptions) error { displayResponse(content, opts.WordWrap, state) - if response.ToolCalls != nil { - toolCalls := response.ToolCalls - if len(toolCalls) > 0 { - fmt.Print(renderToolCalls(toolCalls, plainText)) - } - } - return nil } @@ -1754,6 +2127,7 @@ func NewCLI() *cobra.Command { runCmd.Flags().Bool("hidethinking", false, "Hide thinking output (if provided)") runCmd.Flags().Bool("truncate", false, "For embedding models: truncate inputs exceeding context length (default: true). Set --truncate=false to error instead") runCmd.Flags().Int("dimensions", 0, "Truncate output embeddings to specified dimension (embedding models only)") + runCmd.Flags().String("tools", "", "Enable MCP tools (default: all registered servers with current dir, or specify path for filesystem)") stopCmd := &cobra.Command{ Use: "stop MODEL", @@ -1964,15 +2338,101 @@ func renderToolCalls(toolCalls []api.ToolCall, plainText bool) string { out += formatExplanation } for i, toolCall := range toolCalls { - argsAsJSON, err := json.Marshal(toolCall.Function.Arguments) - if err != nil { - return "" - } if i > 0 { out += "\n" } - // all tool calls are unexpected since we don't currently support registering any in the CLI - out += fmt.Sprintf(" Model called a non-existent function '%s()' with arguments: %s", formatValues+toolCall.Function.Name+formatExplanation, formatValues+string(argsAsJSON)+formatExplanation) + // Format arguments in a more readable way + var argsDisplay string + // Arguments is already a map[string]any + // Sort keys for deterministic display order + keys := make([]string, 0, len(toolCall.Function.Arguments)) + for k := range toolCall.Function.Arguments { + keys = append(keys, k) + } + sort.Strings(keys) + var pairs []string + for _, k := range keys { + pairs = append(pairs, fmt.Sprintf("%s: %v", k, toolCall.Function.Arguments[k])) + } + if len(pairs) > 0 { + argsDisplay = strings.Join(pairs, ", ") + } else { + argsDisplay = "(no arguments)" + } + + // Show tool execution in progress with cleaner format + out += fmt.Sprintf("🔧 Executing tool '%s'%s\n", + formatValues+toolCall.Function.Name+formatExplanation, formatExplanation) + out += fmt.Sprintf(" Arguments: %s%s%s\n", + formatValues, argsDisplay, formatExplanation) + } + if !plainText { + out += readline.ColorDefault + } + return out +} + +func renderToolResults(toolResults []api.ToolResult, plainText bool) string { + out := "" + formatExplanation := "" + formatValues := "" + formatError := "" + if !plainText { + formatExplanation = readline.ColorGrey + readline.ColorBold + formatValues = readline.ColorDefault + // Use bold for errors since ColorRed doesn't exist + formatError = readline.ColorBold + out += formatExplanation + } + for i, toolResult := range toolResults { + if i > 0 { + out += "\n" + } + + // Tool name and arguments already shown in renderToolCalls + // Just show the result or error here + if toolResult.Error != "" { + // Parse error for better context + errorMsg := toolResult.Error + // Try to extract meaningful error from MCP errors + if strings.Contains(errorMsg, "MCP tool returned error") { + errorMsg = "Tool execution failed" + } + // Look for specific error patterns + if strings.Contains(toolResult.Error, "Parent directory does not exist") { + errorMsg = "Parent directory does not exist - check path" + } else if strings.Contains(toolResult.Error, "permission denied") { + errorMsg = "Permission denied - insufficient privileges" + } else if strings.Contains(toolResult.Error, "Invalid arguments") { + errorMsg = "Invalid tool arguments provided" + } else if strings.Contains(toolResult.Error, "file not found") { + errorMsg = "File or directory not found" + } + + // Truncate long error messages (rune-safe for UTF-8) + errorRunes := []rune(errorMsg) + if len(errorRunes) > 200 { + errorMsg = string(errorRunes[:197]) + "..." + } + + out += fmt.Sprintf("❌ Error: %s%s%s\n", + formatError, errorMsg, formatExplanation) + } else { + content := toolResult.Content + if strings.TrimSpace(content) == "" { + // Empty result - show a clear indicator + out += fmt.Sprintf("✅ Result: %s(empty)%s\n", + formatValues, formatExplanation) + } else { + // Truncate very long results for display (rune-safe for UTF-8) + runes := []rune(content) + if len(runes) > 200 { + content = string(runes[:197]) + "..." + } + out += fmt.Sprintf("✅ Result:\n%s%s%s\n", + formatValues, content, formatExplanation) + } + } } if !plainText { out += readline.ColorDefault diff --git a/server/mcp_code_api.go b/server/mcp_code_api.go index 631fafb8c..0000e173b 100644 --- a/server/mcp_code_api.go +++ b/server/mcp_code_api.go @@ -4,11 +4,11 @@ import ( "fmt" "log/slog" "strings" - + "github.com/ollama/ollama/api" ) -// MCPCodeAPI provides a code-like interface for MCP tools +// MCPCodeAPI provides context injection for MCP tools type MCPCodeAPI struct { manager *MCPManager } @@ -20,83 +20,50 @@ func NewMCPCodeAPI(manager *MCPManager) *MCPCodeAPI { } } -// GenerateMinimalContext returns essential context for tool usage +// GenerateMinimalContext returns essential runtime context for tool usage. +// Tool schemas are already provided via the template's TypeScript rendering, +// so we only need to add runtime-specific info like working directories. func (m *MCPCodeAPI) GenerateMinimalContext(configs []api.MCPServerConfig) string { slog.Debug("GenerateMinimalContext called", "configs_count", len(configs)) - if len(configs) == 0 { - slog.Debug("No MCP configs provided, returning empty context") - return "" - } var context strings.Builder - context.WriteString("\n=== MCP Tool Context ===\n") + // Add filesystem working directory if applicable for _, config := range configs { - slog.Debug("Processing MCP config", "command", config.Command, "args", config.Args) - // Check if this is a filesystem server (command or first arg contains filesystem) - isFilesystem := strings.Contains(config.Command, "filesystem") || - (len(config.Args) > 0 && strings.Contains(config.Args[0], "filesystem")) - - if isFilesystem && len(config.Args) > 1 { - // Extract working directory from filesystem server - workingDir := config.Args[1] - slog.Debug("Adding filesystem context", "working_dir", workingDir) + if workingDir := m.extractFilesystemPath(config); workingDir != "" { context.WriteString(fmt.Sprintf(` -Filesystem tools are available with these constraints: -- Working directory: %s -- All file operations must use paths within this directory -- Example usage: - - List files: "List all files in %s" - - Read file: "Read %s/filename.txt" - - Create file: "Create %s/newfile.txt with content" -- Paths outside %s will be rejected - -When working with files, ALWAYS use the full path starting with %s -`, workingDir, workingDir, workingDir, workingDir, workingDir, workingDir)) +Filesystem working directory: %s +All filesystem tool paths must be within this directory. +`, workingDir)) } - // Add other server types as needed } - - context.WriteString("\n") + result := context.String() - slog.Debug("Generated MCP context", "length", len(result)) + if result != "" { + slog.Debug("Generated MCP context", "length", len(result)) + } return result } -// GenerateProgressiveContext returns context based on what tools are being used -func (m *MCPCodeAPI) GenerateProgressiveContext(toolNames []string) string { - var context strings.Builder - - // Group tools by server - serverTools := make(map[string][]string) - for _, toolName := range toolNames { - if clientName, exists := m.manager.GetToolClient(toolName); exists { - serverTools[clientName] = append(serverTools[clientName], toolName) - } +// extractFilesystemPath extracts the working directory from filesystem server config +func (m *MCPCodeAPI) extractFilesystemPath(config api.MCPServerConfig) string { + isFilesystem := strings.Contains(config.Command, "filesystem") || + (len(config.Args) > 0 && strings.Contains(strings.Join(config.Args, " "), "filesystem")) + + if isFilesystem && len(config.Args) > 0 { + // Path is typically the last argument + return config.Args[len(config.Args)-1] } - - // Generate context for each server's tools - for serverName, tools := range serverTools { - context.WriteString(fmt.Sprintf("\n%s tools being used:\n", serverName)) - for _, tool := range tools { - // Get tool definition from manager - if toolDef := m.manager.GetToolDefinition(serverName, tool); toolDef != nil { - context.WriteString(fmt.Sprintf("- %s: %s\n", tool, toolDef.Function.Description)) - } - } - } - - return context.String() + return "" } -// InjectContextIntoMessages intelligently injects context into the message stream +// InjectContextIntoMessages adds runtime context to the message stream func (m *MCPCodeAPI) InjectContextIntoMessages(messages []api.Message, configs []api.MCPServerConfig) []api.Message { - // Generate minimal context context := m.GenerateMinimalContext(configs) if context == "" { return messages } - + // Check if there's already a system message if len(messages) > 0 && messages[0].Role == "system" { // Append to existing system message @@ -109,41 +76,6 @@ func (m *MCPCodeAPI) InjectContextIntoMessages(messages []api.Message, configs [ } messages = append([]api.Message{systemMsg}, messages...) } - + return messages } - -// ExtractWorkingDirectory extracts the working directory from MCP server args -func ExtractWorkingDirectory(config api.MCPServerConfig) string { - if strings.Contains(config.Command, "filesystem") && len(config.Args) > 1 { - return config.Args[1] - } - return "" -} - -// GenerateToolCallExample generates an example of how to call a specific tool -func (m *MCPCodeAPI) GenerateToolCallExample(serverName, toolName string) string { - workingDir := "" - - // Get working directory if filesystem - if serverName == "filesystem" { - if clients := m.manager.GetServerNames(); len(clients) > 0 { - // This is a simplified approach - in production we'd properly track server configs - workingDir = "/home/velvetm/Desktop/mcp-test-files" // Would be extracted from actual config - } - } - - // Generate appropriate example based on tool - switch toolName { - case "list_directory": - return fmt.Sprintf(`"List all files in %s"`, workingDir) - case "read_file": - return fmt.Sprintf(`"Read the file %s/example.txt"`, workingDir) - case "write_file": - return fmt.Sprintf(`"Create a file at %s/output.txt with content 'Hello World'"`, workingDir) - case "create_directory": - return fmt.Sprintf(`"Create a directory called %s/newdir"`, workingDir) - default: - return fmt.Sprintf(`"Use the %s tool"`, toolName) - } -} \ No newline at end of file diff --git a/server/routes.go b/server/routes.go index b19a40fbc..cfce029b3 100644 --- a/server/routes.go +++ b/server/routes.go @@ -52,6 +52,17 @@ import ( "github.com/ollama/ollama/version" ) +// CompletionResult holds the result of a completion request +type CompletionResult struct { + Content string + Thinking string + ToolCalls []api.ToolCall + Done bool + DoneReason string + Metrics api.Metrics + Error error +} + const signinURLStr = "https://ollama.com/connect?name=%s&key=%s" func shouldUseHarmony(model *Model) bool { @@ -337,10 +348,11 @@ func (s *Server) GenerateHandler(c *gin.Context) { m.Config.Parser = "harmony" } + if !req.Raw && m.Config.Parser != "" { builtinParser = parsers.ParserForName(m.Config.Parser) if builtinParser != nil { - // no tools or last message for generate endpoint + // Initialize parser for thinking extraction only (tools not supported in Generate API) builtinParser.Init(nil, nil, req.Think) } } @@ -459,7 +471,7 @@ func (s *Server) GenerateHandler(c *gin.Context) { // the real chat handler, but doing this as a stopgap to get renderer // support for generate if values.Messages != nil && values.Suffix == "" && req.Template == "" { - prompt, images, err = chatPrompt(c.Request.Context(), m, r.Tokenize, opts, values.Messages, []api.Tool{}, req.Think, req.Truncate == nil || *req.Truncate) + prompt, images, err = chatPrompt(c.Request.Context(), m, r.Tokenize, opts, values.Messages, nil, req.Think, req.Truncate == nil || *req.Truncate) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return @@ -510,8 +522,8 @@ func (s *Server) GenerateHandler(c *gin.Context) { ch := make(chan any) go func() { // TODO (jmorganca): avoid building the response twice both here and below - var sb strings.Builder defer close(ch) + var sb strings.Builder if err := r.Completion(c.Request.Context(), llm.CompletionRequest{ Prompt: prompt, Images: images, @@ -537,16 +549,13 @@ func (s *Server) GenerateHandler(c *gin.Context) { } if builtinParser != nil { - content, thinking, toolCalls, err := builtinParser.Add(cr.Content, cr.Done) + content, thinking, _, err := builtinParser.Add(cr.Content, cr.Done) if err != nil { ch <- gin.H{"error": err.Error()} return } res.Response = content res.Thinking = thinking - if cr.Done && len(toolCalls) > 0 { - res.ToolCalls = toolCalls - } } else if thinkingState != nil { thinking, content := thinkingState.AddContent(cr.Content) res.Thinking = thinking @@ -574,7 +583,7 @@ func (s *Server) GenerateHandler(c *gin.Context) { if builtinParser != nil { // only send messages with meaningful content (empty messages confuse clients) - if res.Response != "" || res.Thinking != "" || res.Done || len(res.ToolCalls) > 0 { + if res.Response != "" || res.Thinking != "" || res.Done { ch <- res } @@ -1508,6 +1517,10 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) { r.POST("/api/show", s.ShowHandler) r.DELETE("/api/delete", s.DeleteHandler) + // MCP Tools discovery + r.GET("/api/tools", s.ToolsHandler) + r.POST("/api/tools", s.ToolsHandler) + r.POST("/api/me", s.WhoamiHandler) r.POST("/api/signout", s.SignoutHandler) @@ -1852,6 +1865,211 @@ func toolCallId() string { return "call_" + strings.ToLower(string(b)) } +// executeCompletionWithTools executes a completion and collects the full response +// This is a synchronous wrapper around the async completion callback +// When suppressDone is true, the Done flag is not sent to the client channel +// (used for intermediate rounds in multi-round tool execution) +func (s *Server) executeCompletionWithTools( + ctx context.Context, + r llm.LlamaServer, + prompt string, + images []llm.ImageData, + opts *api.Options, + req api.ChatRequest, + m *Model, + builtinParser parsers.Parser, + thinkingState *thinking.Parser, + ch chan any, + checkpointStart time.Time, + checkpointLoaded time.Time, + truncate bool, + suppressDone bool, +) (*CompletionResult, error) { + result := &CompletionResult{} + done := make(chan error, 1) + + // For tracking tool calls when using tools + var toolParser *tools.Parser + if len(req.Tools) > 0 && builtinParser == nil { + toolParser = tools.NewParser(m.Template.Template, req.Tools) + } + + // Track thinking content for structured outputs + var thinkingBuilder strings.Builder + + // Accumulate tool calls across streaming chunks + var accumulatedToolCalls []api.ToolCall + + // Create a new context for this completion + completionCtx, cancel := context.WithCancel(ctx) + defer cancel() + + err := r.Completion(completionCtx, llm.CompletionRequest{ + Prompt: prompt, + Images: images, + Format: req.Format, + Options: opts, + Shift: req.Shift == nil || *req.Shift, + Truncate: truncate, + Logprobs: req.Logprobs, + TopLogprobs: req.TopLogprobs, + }, func(resp llm.CompletionResponse) { + // When suppressDone is true, don't signal Done to client + // (used for intermediate rounds in multi-round tool execution) + clientDone := resp.Done && !suppressDone + + res := api.ChatResponse{ + Model: req.Model, + CreatedAt: time.Now().UTC(), + Message: api.Message{Role: "assistant", Content: resp.Content}, + Done: clientDone, + Metrics: api.Metrics{ + PromptEvalCount: resp.PromptEvalCount, + PromptEvalDuration: resp.PromptEvalDuration, + EvalCount: resp.EvalCount, + EvalDuration: resp.EvalDuration, + }, + Logprobs: toAPILogprobs(resp.Logprobs), + } + + if resp.Done { + res.DoneReason = resp.DoneReason.String() + res.TotalDuration = time.Since(checkpointStart) + res.LoadDuration = checkpointLoaded.Sub(checkpointStart) + result.DoneReason = res.DoneReason + result.Metrics = res.Metrics + } + + // Handle builtin parser (for models with native tool support) + if builtinParser != nil { + content, thinking, toolCalls, err := builtinParser.Add(resp.Content, resp.Done) + if err != nil { + result.Error = err + done <- err + return + } + + res.Message.Content = content + res.Message.Thinking = thinking + res.Message.ToolCalls = toolCalls + + thinkingBuilder.WriteString(thinking) + + // Accumulate results + result.Content += content + result.Thinking += thinking + + // Accumulate tool calls for multi-round MCP execution + if len(toolCalls) > 0 { + accumulatedToolCalls = append(accumulatedToolCalls, toolCalls...) + } + + // On completion, set all accumulated tool calls + if resp.Done { + result.ToolCalls = accumulatedToolCalls + } + + // Stream to client if there's content to stream + if res.Message.Content != "" || res.Message.Thinking != "" || len(res.Message.ToolCalls) > 0 || resp.Done || len(res.Logprobs) > 0 { + ch <- res + } + + if resp.Done { + result.Done = true + done <- nil + } + return + } + + // Handle thinking state parser + if thinkingState != nil { + thinkingContent, remainingContent := thinkingState.AddContent(res.Message.Content) + if thinkingContent == "" && remainingContent == "" && !resp.Done { + // Need more content to decide + return + } + + res.Message.Thinking = thinkingContent + thinkingBuilder.WriteString(thinkingContent) + res.Message.Content = remainingContent + result.Thinking += thinkingContent + } + + // Handle tool parsing (for models without native tool support) + if len(req.Tools) > 0 && builtinParser == nil { + toolCalls, content := toolParser.Add(res.Message.Content) + if len(content) > 0 { + res.Message.Content = content + result.Content += content + } else if len(toolCalls) > 0 { + res.Message.ToolCalls = toolCalls + res.Message.Content = "" + // Keep accumulating tool calls + accumulatedToolCalls = toolCalls + } else if res.Message.Thinking != "" { + // don't return, fall through to send + } else { + // Send logprobs while content is being buffered by the parser for tool calls + if len(res.Logprobs) > 0 && !resp.Done { + logprobRes := res + logprobRes.Message.Content = "" + logprobRes.Message.ToolCalls = nil + ch <- logprobRes + } + + if resp.Done { + res.Message.Content = toolParser.Content() + // Set accumulated tool calls in result before signaling done + if len(accumulatedToolCalls) > 0 { + result.ToolCalls = accumulatedToolCalls + } + // If no tool calls, get final content from parser + if len(result.ToolCalls) == 0 && toolParser != nil { + result.Content = toolParser.Content() + } + result.Done = true + ch <- res + done <- nil + } + return + } + } else { + result.Content += res.Message.Content + } + + // Stream to client + ch <- res + + if resp.Done { + // If we accumulated tool calls, set them in result + if len(accumulatedToolCalls) > 0 { + result.ToolCalls = accumulatedToolCalls + } + // If no tool calls, get final content from parser + if len(result.ToolCalls) == 0 && toolParser != nil { + result.Content = toolParser.Content() + } + result.Done = true + done <- nil + } + }) + + if err != nil { + return nil, err + } + + // Wait for completion or context cancellation + select { + case err := <-done: + if err != nil { + return nil, err + } + return result, nil + case <-ctx.Done(): + return nil, ctx.Err() + } +} + func (s *Server) ChatHandler(c *gin.Context) { checkpointStart := time.Now() @@ -2018,6 +2236,80 @@ func (s *Server) ChatHandler(c *gin.Context) { } } + // ========================================================================= + // MCP (Model Context Protocol) Integration + // ========================================================================= + // + // MCP allows the model to execute external tools via JSON-RPC servers. + // This section handles: + // 1. Manager initialization (from session cache or new) + // 2. Tool discovery (list available tools from MCP servers) + // 3. Context injection (inform model about available tools) + // 4. Parser configuration (for tool call detection) + // + // Entry points: + // - req.MCPServers: Explicit server configs from API + // - req.ToolsPath: Path-based auto-enable from --tools flag + // + // See: mcp.go, mcp_manager.go for implementation details + // ========================================================================= + + var mcpManager *MCPManager + + if len(req.MCPServers) > 0 || req.ToolsPath != "" { + if req.ToolsPath != "" { + // Path-based mode: auto-enable servers matching the tools path + // Used by CLI: `ollama run model --tools /path` + slog.Debug("Using tools path for MCP manager", "tools_path", req.ToolsPath, "model", req.Model) + mcpManager, err = GetMCPManagerForPath(req.Model, req.ToolsPath) + if err != nil { + slog.Error("Failed to get MCP manager for tools path", "error", err) + // Continue without MCP - graceful degradation + } + } else if len(req.MCPServers) > 0 { + // Explicit mode: use server configs from API request + // Used by API: POST /api/chat with mcp_servers field + sessionID := GenerateSessionID(req) + slog.Debug("Getting MCP manager", "session", sessionID, "servers", len(req.MCPServers)) + mcpManager, err = GetMCPManager(sessionID, req.MCPServers) + if err != nil { + slog.Error("Failed to get MCP manager", "error", err) + // Continue without MCP - graceful degradation + } + } + + if mcpManager != nil { + // Step 1: Discover tools from MCP servers and add to request + mcpTools := mcpManager.GetAllTools() + req.Tools = append(req.Tools, mcpTools...) + + // Step 2: Inject context to help model use tools effectively + // Use programmatic context injection from tool schemas + codeAPI := NewMCPCodeAPI(mcpManager) + req.Messages = codeAPI.InjectContextIntoMessages(req.Messages, req.MCPServers) + + // Step 3: Auto-configure parser for tool call detection + if len(req.Tools) > 0 && m.Config.Parser == "" { + if m.Config.ModelFamily == "qwen2" || m.Config.ModelFamily == "qwen3" { + m.Config.Parser = "qwen3-vl-instruct" + } + } + + // Step 4: Update capabilities now that we have tools + if len(req.Tools) > 0 && !slices.Contains(caps, model.CapabilityTools) { + caps = append(caps, model.CapabilityTools) + } + } + + // Cleanup: Close MCP manager when request completes + // Note: Session manager may cache for reuse within TTL + defer func() { + if err := mcpManager.Close(); err != nil { + slog.Warn("Error closing MCP manager", "error", err) + } + }() + } + r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive) if errors.Is(err, errCapabilityCompletion) { c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)}) @@ -2106,11 +2398,6 @@ func (s *Server) ChatHandler(c *gin.Context) { } } - var toolParser *tools.Parser - if len(req.Tools) > 0 && (builtinParser == nil || !builtinParser.HasToolSupport()) { - toolParser = tools.NewParser(m.Template.Template, req.Tools) - } - type structuredOutputsState int const ( structuredOutputsState_None structuredOutputsState = iota @@ -2122,181 +2409,223 @@ func (s *Server) ChatHandler(c *gin.Context) { go func() { defer close(ch) - structuredOutputsState := structuredOutputsState_None + // Initialize for multi-round execution + // NOTE: Upstream's structuredOutputsState for thinking models is not yet integrated + // TODO: Add structuredOutputsState support for thinking models with format constraints + currentMsgs := msgs + maxRounds := req.MaxToolRounds + if maxRounds == 0 { + maxRounds = 15 // Default maximum rounds + } - for { - var tb strings.Builder + slog.Debug("Starting multi-round execution", + "mcpManager", mcpManager != nil, + "tools_count", len(req.Tools), + "max_rounds", maxRounds) - currentFormat := req.Format - // structured outputs via double request is enabled when: - // 1. the model supports the thinking capability and - // 2. it uses a built-in parser or our generic thinking parser + // MAIN LOOP - Multi-round execution for tool calling + var round int + for round = 0; round < maxRounds; round++ { + slog.Debug("Starting round", "round", round, "messages", len(currentMsgs)) - // Note that the current approach does not work for (potential future) - // non-thinking models that emit anything before actual content. This - // current approach uses the transition from parsed thinking content to - // parsed non-thinking content as the signal to turn constraining on - - if req.Format != nil && structuredOutputsState == structuredOutputsState_None && ((builtinParser != nil || thinkingState != nil) && slices.Contains(m.Capabilities(), model.CapabilityThinking)) { - currentFormat = nil - } - - // sets up new context given parent context per request - ctx, cancel := context.WithCancel(c.Request.Context()) - err := r.Completion(ctx, llm.CompletionRequest{ - Prompt: prompt, - Images: images, - Format: currentFormat, - Options: opts, - Shift: req.Shift == nil || *req.Shift, - Truncate: truncate, - Logprobs: req.Logprobs, - TopLogprobs: req.TopLogprobs, - }, func(r llm.CompletionResponse) { - res := api.ChatResponse{ - Model: req.Model, - CreatedAt: time.Now().UTC(), - Message: api.Message{Role: "assistant", Content: r.Content}, - Done: r.Done, - Metrics: api.Metrics{ - PromptEvalCount: r.PromptEvalCount, - PromptEvalDuration: r.PromptEvalDuration, - EvalCount: r.EvalCount, - EvalDuration: r.EvalDuration, - }, - Logprobs: toAPILogprobs(r.Logprobs), - } - - if r.Done { - res.DoneReason = r.DoneReason.String() - res.TotalDuration = time.Since(checkpointStart) - res.LoadDuration = checkpointLoaded.Sub(checkpointStart) - } - - if builtinParser != nil { - slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser input", "parser", m.Config.Parser, "content", r.Content) - - content, thinking, toolCalls, err := builtinParser.Add(r.Content, r.Done) - if err != nil { - ch <- gin.H{"error": err.Error()} - return - } - - res.Message.Content = content - res.Message.Thinking = thinking - for i := range toolCalls { - toolCalls[i].ID = toolCallId() - } - res.Message.ToolCalls = toolCalls - - tb.WriteString(thinking) - // we are now receiving content from the model - we should start applying structured outputs - if structuredOutputsState == structuredOutputsState_None && req.Format != nil && tb.String() != "" && res.Message.Content != "" { - structuredOutputsState = structuredOutputsState_ReadyToApply - cancel() - return - } - - if res.Message.Content != "" || res.Message.Thinking != "" || len(res.Message.ToolCalls) > 0 || r.Done || len(res.Logprobs) > 0 { - slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser output", "parser", m.Config.Parser, "content", content, "thinking", thinking, "toolCalls", toolCalls, "done", r.Done) - ch <- res - } else { - slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser empty output", "parser", m.Config.Parser) - } - return - } - - if thinkingState != nil { - thinkingContent, remainingContent := thinkingState.AddContent(res.Message.Content) - if thinkingContent == "" && remainingContent == "" && !r.Done { - // need to accumulate more to decide what to send - return - } - res.Message.Thinking = thinkingContent - tb.WriteString(thinkingContent) - // emit the collected thinking text before restarting with structured outputs and clear unstructured content - // to avoid leaking mixed tokens like "Hello" - if structuredOutputsState == structuredOutputsState_None && req.Format != nil && tb.String() != "" && remainingContent != "" { - structuredOutputsState = structuredOutputsState_ReadyToApply - res.Message.Content = "" - ch <- res - cancel() - return - } - res.Message.Content = remainingContent - } - - if len(req.Tools) > 0 { - toolCalls, content := toolParser.Add(res.Message.Content) - if len(content) > 0 { - res.Message.Content = content - } else if len(toolCalls) > 0 { - for i := range toolCalls { - toolCalls[i].ID = toolCallId() - } - res.Message.ToolCalls = toolCalls - res.Message.Content = "" - } else if res.Message.Thinking != "" { - // don't return, fall through to send - } else { - // Send logprobs while content is being buffered by the parser for tool calls - if len(res.Logprobs) > 0 && !r.Done { - logprobRes := res - logprobRes.Message.Content = "" - logprobRes.Message.ToolCalls = nil - ch <- logprobRes - } - - if r.Done { - res.Message.Content = toolParser.Content() - ch <- res - } - return - } - } - - ch <- res - }) - if err != nil { - if structuredOutputsState == structuredOutputsState_ReadyToApply && strings.Contains(err.Error(), "context canceled") && c.Request.Context().Err() == nil { - // only ignores error if it's a context cancellation due to setting structured outputs - } else { - var serr api.StatusError - if errors.As(err, &serr) { - ch <- gin.H{"error": serr.ErrorMessage, "status": serr.StatusCode} - } else { - ch <- gin.H{"error": err.Error()} - } - return - } - } - - // ignored structured outputs cancellation falls through to here, start a new request with the structured outputs and updated prompt. use the - if structuredOutputsState == structuredOutputsState_ReadyToApply { - structuredOutputsState = structuredOutputsState_Applying - msg := api.Message{ - Role: "assistant", - Thinking: tb.String(), - } - - msgs = append(msgs, msg) - prompt, _, err = chatPrompt(c.Request.Context(), m, r.Tokenize, opts, msgs, processedTools, req.Think, truncate) + // Re-render prompt and reset parser if not first round (tool results were added) + if round > 0 { + var err error + prompt, images, err = chatPrompt(c.Request.Context(), m, r.Tokenize, opts, currentMsgs, processedTools, req.Think, truncate) if err != nil { - slog.Error("chat prompt error applying structured outputs", "error", err) + slog.Error("Failed to render prompt in round", "round", round, "error", err) ch <- gin.H{"error": err.Error()} return } - // force constraining by terminating thinking header, the parser is already at this state - // when the last message is thinking, the rendered for gpt-oss cannot disambiguate between having the - // model continue thinking or ending thinking and outputting the final message. - // TODO(parthsareen): consider adding prefill disambiguation logic to the renderer for structured outputs. - if shouldUseHarmony(m) || (builtinParser != nil && m.Config.Parser == "harmony") { - prompt += "<|end|><|start|>assistant<|channel|>final<|message|>" + + // Create fresh parser instance for new round (parser has internal buffer state) + if builtinParser != nil && m.Config.Parser != "" { + builtinParser = parsers.ParserForName(m.Config.Parser) + if builtinParser != nil { + lastMsg := ¤tMsgs[len(currentMsgs)-1] + builtinParser.Init(req.Tools, lastMsg, req.Think) + } } - continue } - break + // Execute completion and collect full response + // When MCP is enabled, suppress Done flag during intermediate rounds + // to prevent client from closing connection prematurely + suppressDone := mcpManager != nil + completionResult, err := s.executeCompletionWithTools( + c.Request.Context(), + r, + prompt, + images, + opts, + req, + m, + builtinParser, + thinkingState, + ch, + checkpointStart, + checkpointLoaded, + truncate, + suppressDone, + ) + + if err != nil { + slog.Error("Completion failed", "round", round, "error", err) + var serr api.StatusError + if errors.As(err, &serr) { + ch <- gin.H{"error": serr.ErrorMessage, "status": serr.StatusCode} + } else { + ch <- gin.H{"error": err.Error()} + } + return + } + + // Check if model called tools + if len(completionResult.ToolCalls) == 0 { + // No tools called - conversation is complete + slog.Debug("No tools called, conversation complete", "round", round) + break // Exit the loop - we're done + } + + // Validate tool calls are not empty or malformed + validToolCalls := 0 + for _, tc := range completionResult.ToolCalls { + if tc.Function.Name != "" { + validToolCalls++ + } else { + slog.Warn("Invalid tool call detected", "round", round, "tool", tc) + } + } + + if validToolCalls == 0 { + slog.Warn("No valid tool calls found, exiting", "round", round) + break + } + + // Model called tools - execute them if we have an MCP manager + if mcpManager != nil { + slog.Debug("MCP tool execution starting", + "tools_in_response", len(completionResult.ToolCalls), + "valid_tools", validToolCalls, + "round", round) + + // Send tool calls to client for display BEFORE executing + // This ensures the client can show "Executing tool..." for all rounds + // Note: Don't include Content here - it was already streamed during completion + ch <- api.ChatResponse{ + Model: req.Model, + Message: api.Message{ + Role: "assistant", + ToolCalls: completionResult.ToolCalls, + }, + } + + // Analyze execution plan + executionPlan := mcpManager.AnalyzeExecutionPlan(completionResult.ToolCalls) + slog.Debug("Execution plan determined", + "sequential", executionPlan.RequiresSequential, + "reason", executionPlan.Reason) + + // Execute tools according to plan + results := mcpManager.ExecuteWithPlan(completionResult.ToolCalls, executionPlan) + + // Log tool calls for debugging + for i, tc := range completionResult.ToolCalls { + slog.Info("Tool call details", + "round", round, + "index", i, + "name", tc.Function.Name, + "arguments", tc.Function.Arguments) + } + + // Add assistant message with tool calls + assistantMsg := api.Message{ + Role: "assistant", + Content: completionResult.Content, // Preserve any content + ToolCalls: completionResult.ToolCalls, + } + currentMsgs = append(currentMsgs, assistantMsg) + + // Add tool result messages and send them to client for display + toolResultsForDisplay := make([]api.ToolResult, 0, len(results)) + for i, result := range results { + toolMsg := api.Message{ + Role: "tool", + ToolName: completionResult.ToolCalls[i].Function.Name, + } + + // Create display result with arguments for context + displayResult := api.ToolResult{ + ToolName: completionResult.ToolCalls[i].Function.Name, + Arguments: completionResult.ToolCalls[i].Function.Arguments, + Content: result.Content, + } + + if result.Error != nil { + // JSON-encode the error for proper template rendering + if encoded, err := json.Marshal(fmt.Sprintf("Error: %v", result.Error)); err == nil { + toolMsg.Content = string(encoded) + } else { + toolMsg.Content = fmt.Sprintf("\"Error: %v\"", result.Error) + } + displayResult.Error = result.Error.Error() + slog.Warn("Tool execution failed", + "tool", completionResult.ToolCalls[i].Function.Name, + "error", result.Error) + } else { + // JSON-encode the content for proper template rendering + // The template expects {"content": {{ .Content }}} where Content should be a JSON string + if encoded, err := json.Marshal(result.Content); err == nil { + toolMsg.Content = string(encoded) + } else { + toolMsg.Content = result.Content + } + } + + currentMsgs = append(currentMsgs, toolMsg) + toolResultsForDisplay = append(toolResultsForDisplay, displayResult) + } + + // Send tool results to client for display + if len(toolResultsForDisplay) > 0 { + ch <- api.ChatResponse{ + Model: req.Model, + Message: api.Message{ + Role: "assistant", + ToolResults: toolResultsForDisplay, + }, + } + } + + // Continue to next round - model will process tool results + slog.Info("Tools executed, continuing to next round", + "round", round, + "messages", len(currentMsgs), + "last_tool", completionResult.ToolCalls[len(completionResult.ToolCalls)-1].Function.Name) + + } else { + // No MCP manager - send tool calls to client for external execution + slog.Debug("No MCP manager, sending tool calls to client", "round", round) + break // Exit - client will handle tool execution + } + } // End of maxRounds loop + + // Check if we exhausted rounds + if round >= maxRounds { + slog.Warn("Maximum tool execution rounds reached", "rounds", maxRounds) + ch <- gin.H{"error": fmt.Sprintf("Maximum tool execution rounds (%d) exceeded", maxRounds)} + } + + // When MCP was enabled, we suppressed Done flags during the loop + // Send a final Done: true to signal the conversation is complete + if mcpManager != nil { + ch <- api.ChatResponse{ + Model: req.Model, + CreatedAt: time.Now().UTC(), + Message: api.Message{Role: "assistant"}, + Done: true, + DoneReason: "stop", + } } }() @@ -2322,22 +2651,15 @@ func (s *Server) ChatHandler(c *gin.Context) { case gin.H: msg, ok := t["error"].(string) if !ok { - msg = "unexpected error format in response" + msg = "unexpected error" } - - status, ok := t["status"].(int) - if !ok { - status = http.StatusInternalServerError - } - - c.JSON(status, gin.H{"error": msg}) + c.JSON(http.StatusBadRequest, gin.H{"error": msg}) return default: c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"}) return } } - resp.Message.Content = sbContent.String() resp.Message.Thinking = sbThinking.String() resp.Logprobs = allLogprobs @@ -2345,12 +2667,10 @@ func (s *Server) ChatHandler(c *gin.Context) { if len(toolCalls) > 0 { resp.Message.ToolCalls = toolCalls } - c.JSON(http.StatusOK, resp) - return + } else { + streamResponse(c, ch) } - - streamResponse(c, ch) } func handleScheduleError(c *gin.Context, name string, err error) { @@ -2395,4 +2715,3 @@ func filterThinkTags(msgs []api.Message, m *Model) []api.Message { } return msgs } - diff --git a/server/routes_tools.go b/server/routes_tools.go new file mode 100644 index 000000000..2ecb1a84f --- /dev/null +++ b/server/routes_tools.go @@ -0,0 +1,91 @@ +package server + +import ( + "net/http" + + "github.com/gin-gonic/gin" + "github.com/ollama/ollama/api" +) + +// ToolsHandler handles requests to list available MCP tools. +// GET: Returns available MCP server definitions from configuration. +// POST with mcp_servers: Returns tools from the specified MCP servers. +func (s *Server) ToolsHandler(c *gin.Context) { + var req struct { + MCPServers []api.MCPServerConfig `json:"mcp_servers,omitempty"` + } + + if c.Request.Method == "POST" { + if err := c.BindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + } + + // If MCP servers provided, list their tools + if len(req.MCPServers) > 0 { + manager := NewMCPManager(10) + defer manager.Close() + + var allTools []ToolInfo + for _, config := range req.MCPServers { + if err := manager.AddServer(config); err != nil { + // Include error in response but continue + allTools = append(allTools, ToolInfo{ + Name: config.Name, + Description: "Failed to initialize: " + err.Error(), + Error: err.Error(), + }) + continue + } + + // Get tools from this server + tools := manager.GetAllTools() + for _, tool := range tools { + allTools = append(allTools, ToolInfo{ + Name: tool.Function.Name, + Description: tool.Function.Description, + Parameters: &tool.Function.Parameters, + ServerName: config.Name, + }) + } + } + + c.JSON(http.StatusOK, ToolsResponse{ + Tools: allTools, + }) + return + } + + // Otherwise, list available MCP server definitions + defs, err := LoadMCPDefinitions() + if err != nil { + // Config parsing errors are client errors (bad config), not server errors + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid MCP configuration: " + err.Error()}) + return + } + + servers := defs.ListServers() + c.JSON(http.StatusOK, MCPServersResponse{ + Servers: servers, + }) +} + +// ToolInfo provides information about a single tool +type ToolInfo struct { + Name string `json:"name"` + Description string `json:"description"` + Parameters *api.ToolFunctionParameters `json:"parameters,omitempty"` + ServerName string `json:"server,omitempty"` + Error string `json:"error,omitempty"` +} + +// ToolsResponse contains the list of available tools +type ToolsResponse struct { + Tools []ToolInfo `json:"tools"` +} + +// MCPServersResponse contains the list of available MCP server types +type MCPServersResponse struct { + Servers []MCPServerInfo `json:"servers"` +} \ No newline at end of file