cmd: add --tools flag for MCP server integration

Integrate MCP servers with the ollama CLI and add route handlers for tool-enabled chat completions. CLI (cmd/cmd.go): - Add --tools flag to 'ollama run' command - Supports path argument: --tools /path/to/directory - Multi-round tool execution loop with result display - Displays available tools on startup Routes (server/routes.go, server/routes_tools.go): - MCP server handling in chat completions - /api/tools endpoint for tool listing - Session-based MCP manager lifecycle - Detailed documentation of MCP integration flow Parser (harmony/harmonyparser.go): - Handle embedded <think> segments in tool call responses - Extract tool calls from thinking model outputs Relates to #7865
2025-12-14 21:10:05 -08:00 · 2025-12-14 21:10:05 -08:00 · abcb81bb07
parent fc05536d52
commit abcb81bb07
5 changed files with 1117 additions and 323 deletions
--- a/api/types.go
+++ b/api/types.go
@ -126,12 +126,6 @@ type GenerateRequest struct {
 	// each with an associated log probability. Only applies when Logprobs is true.
 	// Valid values are 0-20. Default is 0 (only return the selected token's logprob).
 	TopLogprobs int `json:"top_logprobs,omitempty"`
-
-	// Tools is a list of tools the model may call.
-	Tools []Tool `json:"tools,omitempty"`
-
-	// MCPServers specifies MCP servers to use for tool functionality
-	MCPServers []MCPServerConfig `json:"mcp_servers,omitempty"`
 }

 // ChatRequest describes a request sent by [Client.Chat].
@ -250,9 +244,10 @@ type ToolCall struct {
 }

 type ToolResult struct {
-	ToolName string `json:"tool_name"`
-	Content  string `json:"content"`
-	Error    string `json:"error,omitempty"`
+	ToolName  string                    `json:"tool_name"`
+	Arguments ToolCallFunctionArguments `json:"arguments,omitempty"`
+	Content   string                    `json:"content"`
+	Error     string                    `json:"error,omitempty"`
 }

 type ToolCallFunction struct {
@ -857,9 +852,6 @@ type GenerateResponse struct {

 	Metrics

-	ToolCalls   []ToolCall   `json:"tool_calls,omitempty"`
-	ToolResults []ToolResult `json:"tool_results,omitempty"`
-
 	DebugInfo *DebugInfo `json:"_debug_info,omitempty"`

 	// Logprobs contains log probability information for the generated tokens,
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@ -22,6 +22,7 @@ import (
 	"sort"
 	"strconv"
 	"strings"
+	"sync"
 	"sync/atomic"
 	"syscall"
 	"time"
@ -49,6 +50,13 @@ import (

 const ConnectInstructions = "To sign in, navigate to:\n    %s\n\n"

+// Tool detection and buffering configuration
+const (
+	DefaultToolBufferDelay = 500 * time.Millisecond
+	MinToolBufferDelay     = 100 * time.Millisecond
+	MaxToolBufferDelay     = 2 * time.Second
+)
+
 // ensureThinkingSupport emits a warning if the model does not advertise thinking support
 func ensureThinkingSupport(ctx context.Context, client *api.Client, name string) {
 	if name == "" {
@ -416,6 +424,41 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 		opts.KeepAlive = &api.Duration{Duration: d}
 	}

+	toolsSpec, err := cmd.Flags().GetString("tools")
+	if err != nil {
+		return err
+	}
+	if toolsSpec != "" {
+		mcpServers, toolsPath, err := server.GetMCPServersForTools(toolsSpec)
+		if err != nil {
+			// If definitions fail to load, fall back to basic filesystem support
+			fmt.Fprintf(os.Stderr, "Warning: Failed to load MCP definitions: %v\n", err)
+			mcpServers = []api.MCPServerConfig{
+				{
+					Name:    "filesystem",
+					Command: "npx",
+					Args:    []string{"-y", "@modelcontextprotocol/server-filesystem", toolsPath},
+				},
+			}
+		}
+
+		if len(mcpServers) == 0 {
+			fmt.Fprintf(os.Stderr, "Warning: No MCP servers matched for --tools context\n")
+		} else {
+			// Log what servers are being enabled
+			serverNames := make([]string, 0, len(mcpServers))
+			for _, srv := range mcpServers {
+				serverNames = append(serverNames, srv.Name)
+			}
+			fmt.Fprintf(os.Stderr, "Enabling MCP servers: %s\n", strings.Join(serverNames, ", "))
+			if toolsPath != "" {
+				fmt.Fprintf(os.Stderr, "Tools path: %s\n", toolsPath)
+			}
+		}
+
+		opts.MCPServers = mcpServers
+	}
+
 	prompts := args[1:]
 	// prepend stdin to the prompt if provided
 	if !term.IsTerminal(int(os.Stdin.Fd())) {
@ -1189,6 +1232,7 @@ type runOptions struct {
 	Think        *api.ThinkValue
 	HideThinking bool
 	ShowConnect  bool
+	MCPServers   []api.MCPServerConfig
 }

 func (r runOptions) Copy() runOptions {
@ -1218,6 +1262,12 @@ func (r runOptions) Copy() runOptions {
 		think = &cThink
 	}

+	var mcpServers []api.MCPServerConfig
+	if r.MCPServers != nil {
+		mcpServers = make([]api.MCPServerConfig, len(r.MCPServers))
+		copy(mcpServers, r.MCPServers)
+	}
+
 	return runOptions{
 		Model:        r.Model,
 		ParentModel:  r.ParentModel,
@ -1233,6 +1283,7 @@ func (r runOptions) Copy() runOptions {
 		Think:        think,
 		HideThinking: r.HideThinking,
 		ShowConnect:  r.ShowConnect,
+		MCPServers:   mcpServers,
 	}
 }

@ -1241,6 +1292,237 @@ type displayResponseState struct {
 	wordBuffer string
 }

+// StreamingToolDetector maintains state for detecting tool calls across streaming chunks
+type StreamingToolDetector struct {
+	inXMLToolCall  bool
+	xmlStartBuffer strings.Builder
+	inJSONToolCall bool
+	jsonBuffer     strings.Builder
+	jsonDepth      int
+	inString       bool
+	escapeNext     bool
+	// tailBuffer holds potential partial tag matches from end of previous chunk
+	tailBuffer string
+}
+
+// NewStreamingToolDetector creates a new stateful tool detector
+func NewStreamingToolDetector() *StreamingToolDetector {
+	return &StreamingToolDetector{}
+}
+
+// maxTagLength is the longest tag we need to detect across chunk boundaries
+const maxTagLength = 12 // len("</tool_call>")
+
+// Process handles a chunk of streaming content and separates tool calls from regular content
+func (s *StreamingToolDetector) Process(chunk string) (displayContent string, hasIncompleteToolCall bool) {
+	// Prepend any buffered tail from previous chunk
+	if s.tailBuffer != "" {
+		chunk = s.tailBuffer + chunk
+		s.tailBuffer = ""
+	}
+
+	var result strings.Builder
+
+	for i := 0; i < len(chunk); i++ {
+		ch := chunk[i]
+
+		// Check if we're near the end and might have a partial tag
+		// Buffer potential partial matches for next chunk
+		remainingLen := len(chunk) - i
+		if !s.inXMLToolCall && !s.inJSONToolCall && remainingLen < maxTagLength {
+			// Check if remaining content could be start of a tag
+			remaining := chunk[i:]
+			if couldBePartialTag(remaining) {
+				s.tailBuffer = remaining
+				break // Stop processing, buffer the rest
+			}
+		}
+
+		// Handle XML tool calls
+		if !s.inXMLToolCall && i+11 <= len(chunk) && chunk[i:i+11] == "<tool_call>" {
+			s.inXMLToolCall = true
+			s.xmlStartBuffer.Reset()
+			s.xmlStartBuffer.WriteString("<tool_call>")
+			i += 10 // Skip past "<tool_call>"
+			continue
+		}
+
+		if s.inXMLToolCall {
+			s.xmlStartBuffer.WriteByte(ch)
+			if i+12 <= len(chunk) && chunk[i:i+12] == "</tool_call>" {
+				// Complete XML tool call - skip it entirely
+				s.inXMLToolCall = false
+				s.xmlStartBuffer.Reset()
+				i += 11 // Skip past "</tool_call>"
+				continue
+			}
+			continue
+		}
+
+		// Handle JSON tool calls
+		if !s.inJSONToolCall && !s.inXMLToolCall {
+			// Look for start of JSON tool call pattern
+			if i+8 <= len(chunk) && chunk[i:i+8] == `{"name":` {
+				// Check if "arguments" appears nearby (tool call signature)
+				lookahead := chunk[i:]
+				if len(lookahead) > 200 {
+					lookahead = lookahead[:200]
+				}
+				if strings.Contains(lookahead, `"arguments":`) {
+					s.inJSONToolCall = true
+					s.jsonBuffer.Reset()
+					s.jsonBuffer.WriteByte(ch)
+					s.jsonDepth = 1
+					s.inString = false
+					s.escapeNext = false
+					continue
+				}
+			}
+		}
+
+		if s.inJSONToolCall {
+			s.jsonBuffer.WriteByte(ch)
+
+			// Track JSON structure to find the end
+			if s.escapeNext {
+				s.escapeNext = false
+				continue
+			}
+
+			if ch == '\\' && s.inString {
+				s.escapeNext = true
+				continue
+			}
+
+			if ch == '"' && !s.escapeNext {
+				s.inString = !s.inString
+				continue
+			}
+
+			if !s.inString {
+				if ch == '{' {
+					s.jsonDepth++
+				} else if ch == '}' {
+					s.jsonDepth--
+					if s.jsonDepth == 0 {
+						// Complete JSON tool call - skip it
+						s.inJSONToolCall = false
+						s.jsonBuffer.Reset()
+						continue
+					}
+				}
+			}
+			continue
+		}
+
+		// Regular content
+		result.WriteByte(ch)
+	}
+
+	// Check if we have incomplete tool calls or buffered tail that need buffering
+	hasIncompleteToolCall = s.inXMLToolCall || s.inJSONToolCall || s.tailBuffer != ""
+
+	return result.String(), hasIncompleteToolCall
+}
+
+// couldBePartialTag checks if a string could be the start of a tool call tag
+// Only returns true for patterns that are specific enough to likely be tool calls
+func couldBePartialTag(s string) bool {
+	// Require at least 2 chars to avoid false positives on common single chars like < or {
+	if len(s) < 2 {
+		return false
+	}
+
+	// Check for partial XML tags - must start with "<t" or "</"
+	xmlPrefixes := []string{"<t", "<to", "<too", "<tool", "<tool_", "<tool_c", "<tool_ca", "<tool_cal", "<tool_call",
+		"</", "</t", "</to", "</too", "</tool", "</tool_", "</tool_c", "</tool_ca", "</tool_cal", "</tool_call"}
+
+	for _, prefix := range xmlPrefixes {
+		if strings.HasPrefix(s, prefix) {
+			return true
+		}
+	}
+
+	// Check for partial JSON tool call start - must have at least `{"`
+	jsonPrefixes := []string{`{"`, `{"n`, `{"na`, `{"nam`, `{"name`, `{"name"`, `{"name":`}
+	for _, prefix := range jsonPrefixes {
+		if strings.HasPrefix(s, prefix) {
+			return true
+		}
+	}
+
+	return false
+}
+
+// Reset clears the detector state
+func (s *StreamingToolDetector) Reset() {
+	s.inXMLToolCall = false
+	s.xmlStartBuffer.Reset()
+	s.inJSONToolCall = false
+	s.jsonBuffer.Reset()
+	s.jsonDepth = 0
+	s.inString = false
+	s.escapeNext = false
+	s.tailBuffer = ""
+}
+
+// findJSONEnd finds the end of a JSON object starting from the beginning of the string
+// Returns the index of the closing brace, or -1 if not found
+func findJSONEnd(s string) int {
+	braceCount := 0
+	inString := false
+	escapeNext := false
+	
+	for i, ch := range s {
+		if escapeNext {
+			escapeNext = false
+			continue
+		}
+		
+		if ch == '\\' && inString {
+			escapeNext = true
+			continue
+		}
+		
+		if ch == '"' && !escapeNext {
+			inString = !inString
+			continue
+		}
+		
+		if !inString {
+			if ch == '{' {
+				braceCount++
+			} else if ch == '}' {
+				braceCount--
+				if braceCount == 0 {
+					return i
+				}
+			}
+		}
+	}
+	
+	return -1
+}
+
+// getToolBufferDelay returns the configured tool buffer delay
+// Can be overridden with OLLAMA_TOOL_BUFFER_DELAY environment variable (in milliseconds)
+func getToolBufferDelay() time.Duration {
+	if delayStr := os.Getenv("OLLAMA_TOOL_BUFFER_DELAY"); delayStr != "" {
+		if delayMs, err := strconv.Atoi(delayStr); err == nil {
+			delay := time.Duration(delayMs) * time.Millisecond
+			// Clamp to reasonable bounds
+			if delay < MinToolBufferDelay {
+				return MinToolBufferDelay
+			}
+			if delay > MaxToolBufferDelay {
+				return MaxToolBufferDelay
+			}
+			return delay
+		}
+	}
+	return DefaultToolBufferDelay
+}
+
 func displayResponse(content string, wordWrap bool, state *displayResponseState) {
 	termWidth, _, _ := term.GetSize(int(os.Stdout.Fd()))
 	if wordWrap && termWidth >= 10 {
@ -1327,6 +1609,7 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {

 	sigChan := make(chan os.Signal, 1)
 	signal.Notify(sigChan, syscall.SIGINT)
+	defer signal.Stop(sigChan)

 	go func() {
 		<-sigChan
@ -1339,6 +1622,18 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
 	var fullResponse strings.Builder
 	var thinkTagOpened bool = false
 	var thinkTagClosed bool = false
+	var toolCallsDisplayed bool = false
+	
+	// Streaming tool detector for better chunk handling
+	toolDetector := NewStreamingToolDetector()
+	
+	// Buffer for accumulating content before display
+	var contentBuffer strings.Builder
+	var bufferTimer *time.Timer
+	var bufferMutex sync.Mutex
+	
+	// Get configurable buffer delay
+	bufferDelay := getToolBufferDelay()

 	role := "assistant"

@ -1370,20 +1665,84 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
 			thinkTagClosed = true
 			state = &displayResponseState{}
 		}
-		// purposefully not putting thinking blocks in the response, which would
-		// only be needed if we later added tool calling to the cli (they get
-		// filtered out anyway since current models don't expect them unless you're
-		// about to finish some tool calls)
+		
+		// Use stateful tool detector for better streaming chunk handling
+		displayContent, hasIncompleteToolCall := toolDetector.Process(content)
+		
+		// Store full response for context
 		fullResponse.WriteString(content)

+		// Buffer management based on tool detection
+		if hasIncompleteToolCall {
+			// We have an incomplete tool call - buffer the content
+			bufferMutex.Lock()
+			contentBuffer.WriteString(displayContent)
+			
+			// Cancel any existing timer
+			if bufferTimer != nil {
+				bufferTimer.Stop()
+			}
+			
+			// Set a new timer to flush the buffer after a delay
+			bufferTimer = time.AfterFunc(bufferDelay, func() {
+				bufferMutex.Lock()
+				defer bufferMutex.Unlock()
+				
+				bufferedContent := contentBuffer.String()
+				contentBuffer.Reset()
+				
+				// Reset tool detector state when flushing
+				toolDetector.Reset()
+				
+				// Only display if there's actual content after filtering
+				if strings.TrimSpace(bufferedContent) != "" {
+					displayResponse(bufferedContent, opts.WordWrap, state)
+				}
+			})
+			bufferMutex.Unlock()
+		} else {
+			// No incomplete tool call - display immediately
+			if strings.TrimSpace(displayContent) != "" {
+				displayResponse(displayContent, opts.WordWrap, state)
+			}
+		}
+		
+		// Display tool calls cleanly if detected
 		if response.Message.ToolCalls != nil {
 			toolCalls := response.Message.ToolCalls
-			if len(toolCalls) > 0 {
+			if len(toolCalls) > 0 && !toolCallsDisplayed {
+				// Flush any buffered content before showing tool calls
+				bufferMutex.Lock()
+				if contentBuffer.Len() > 0 {
+					bufferedContent := contentBuffer.String()
+					contentBuffer.Reset()
+					if strings.TrimSpace(bufferedContent) != "" {
+						displayResponse(bufferedContent, opts.WordWrap, state)
+					}
+				}
+				if bufferTimer != nil {
+					bufferTimer.Stop()
+					bufferTimer = nil
+				}
+				bufferMutex.Unlock()
+
+				// Add newline for clean separation
+				fmt.Println()
 				fmt.Print(renderToolCalls(toolCalls, false))
+				toolCallsDisplayed = true
 			}
 		}

-		displayResponse(content, opts.WordWrap, state)
+		// Display tool results if available
+		if response.Message.ToolResults != nil {
+			toolResults := response.Message.ToolResults
+			if len(toolResults) > 0 {
+				fmt.Print(renderToolResults(toolResults, false))
+				fmt.Println() // New line after results
+				// Reset flag to allow next round's tool calls to be displayed
+				toolCallsDisplayed = false
+			}
+		}

 		return nil
 	}
@ -1393,11 +1752,12 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
 	}

 	req := &api.ChatRequest{
-		Model:    opts.Model,
-		Messages: opts.Messages,
-		Format:   json.RawMessage(opts.Format),
-		Options:  opts.Options,
-		Think:    opts.Think,
+		Model:      opts.Model,
+		Messages:   opts.Messages,
+		Format:     json.RawMessage(opts.Format),
+		Options:    opts.Options,
+		Think:      opts.Think,
+		MCPServers: opts.MCPServers,
 	}

 	if opts.KeepAlive != nil {
@ -1418,6 +1778,20 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
 		}
 		return nil, err
 	}
+	
+	// Flush any remaining buffered content
+	bufferMutex.Lock()
+	if bufferTimer != nil {
+		bufferTimer.Stop()
+	}
+	if contentBuffer.Len() > 0 {
+		bufferedContent := contentBuffer.String()
+		contentBuffer.Reset()
+		if strings.TrimSpace(bufferedContent) != "" && !strings.Contains(bufferedContent, `{"name":`) {
+			displayResponse(bufferedContent, opts.WordWrap, state)
+		}
+	}
+	bufferMutex.Unlock()

 	if len(opts.Messages) > 0 {
 		fmt.Println()
@ -1437,6 +1811,11 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
 }

 func generate(cmd *cobra.Command, opts runOptions) error {
+	// Tools/MCP servers require interactive mode (Chat API)
+	if len(opts.MCPServers) > 0 {
+		return errors.New("--tools flag requires interactive mode; use 'ollama run <model> --tools <file>' without piped input")
+	}
+
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
@ -1460,6 +1839,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {

 	sigChan := make(chan os.Signal, 1)
 	signal.Notify(sigChan, syscall.SIGINT)
+	defer signal.Stop(sigChan)

 	go func() {
 		<-sigChan
@ -1491,7 +1871,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 			displayResponse(response.Thinking, opts.WordWrap, state)
 		}

-		if thinkTagOpened && !thinkTagClosed && (content != "" || len(response.ToolCalls) > 0) {
+		if thinkTagOpened && !thinkTagClosed && content != "" {
 			if !strings.HasSuffix(thinkingContent.String(), "\n") {
 				fmt.Println()
 			}
@ -1503,13 +1883,6 @@ func generate(cmd *cobra.Command, opts runOptions) error {

 		displayResponse(content, opts.WordWrap, state)

-		if response.ToolCalls != nil {
-			toolCalls := response.ToolCalls
-			if len(toolCalls) > 0 {
-				fmt.Print(renderToolCalls(toolCalls, plainText))
-			}
-		}
-
 		return nil
 	}

@ -1754,6 +2127,7 @@ func NewCLI() *cobra.Command {
 	runCmd.Flags().Bool("hidethinking", false, "Hide thinking output (if provided)")
 	runCmd.Flags().Bool("truncate", false, "For embedding models: truncate inputs exceeding context length (default: true). Set --truncate=false to error instead")
 	runCmd.Flags().Int("dimensions", 0, "Truncate output embeddings to specified dimension (embedding models only)")
+	runCmd.Flags().String("tools", "", "Enable MCP tools (default: all registered servers with current dir, or specify path for filesystem)")

 	stopCmd := &cobra.Command{
 		Use:     "stop MODEL",
@ -1964,15 +2338,101 @@ func renderToolCalls(toolCalls []api.ToolCall, plainText bool) string {
 		out += formatExplanation
 	}
 	for i, toolCall := range toolCalls {
-		argsAsJSON, err := json.Marshal(toolCall.Function.Arguments)
-		if err != nil {
-			return ""
-		}
 		if i > 0 {
 			out += "\n"
 		}
-		// all tool calls are unexpected since we don't currently support registering any in the CLI
-		out += fmt.Sprintf("  Model called a non-existent function '%s()' with arguments: %s", formatValues+toolCall.Function.Name+formatExplanation, formatValues+string(argsAsJSON)+formatExplanation)
+		// Format arguments in a more readable way
+		var argsDisplay string
+		// Arguments is already a map[string]any
+		// Sort keys for deterministic display order
+		keys := make([]string, 0, len(toolCall.Function.Arguments))
+		for k := range toolCall.Function.Arguments {
+			keys = append(keys, k)
+		}
+		sort.Strings(keys)
+		var pairs []string
+		for _, k := range keys {
+			pairs = append(pairs, fmt.Sprintf("%s: %v", k, toolCall.Function.Arguments[k]))
+		}
+		if len(pairs) > 0 {
+			argsDisplay = strings.Join(pairs, ", ")
+		} else {
+			argsDisplay = "(no arguments)"
+		}
+		
+		// Show tool execution in progress with cleaner format
+		out += fmt.Sprintf("🔧 Executing tool '%s'%s\n",
+			formatValues+toolCall.Function.Name+formatExplanation, formatExplanation)
+		out += fmt.Sprintf("   Arguments: %s%s%s\n",
+			formatValues, argsDisplay, formatExplanation)
+	}
+	if !plainText {
+		out += readline.ColorDefault
+	}
+	return out
+}
+
+func renderToolResults(toolResults []api.ToolResult, plainText bool) string {
+	out := ""
+	formatExplanation := ""
+	formatValues := ""
+	formatError := ""
+	if !plainText {
+		formatExplanation = readline.ColorGrey + readline.ColorBold
+		formatValues = readline.ColorDefault
+		// Use bold for errors since ColorRed doesn't exist
+		formatError = readline.ColorBold
+		out += formatExplanation
+	}
+	for i, toolResult := range toolResults {
+		if i > 0 {
+			out += "\n"
+		}
+
+		// Tool name and arguments already shown in renderToolCalls
+		// Just show the result or error here
+		if toolResult.Error != "" {
+			// Parse error for better context
+			errorMsg := toolResult.Error
+			// Try to extract meaningful error from MCP errors
+			if strings.Contains(errorMsg, "MCP tool returned error") {
+				errorMsg = "Tool execution failed"
+			}
+			// Look for specific error patterns
+			if strings.Contains(toolResult.Error, "Parent directory does not exist") {
+				errorMsg = "Parent directory does not exist - check path"
+			} else if strings.Contains(toolResult.Error, "permission denied") {
+				errorMsg = "Permission denied - insufficient privileges"
+			} else if strings.Contains(toolResult.Error, "Invalid arguments") {
+				errorMsg = "Invalid tool arguments provided"
+			} else if strings.Contains(toolResult.Error, "file not found") {
+				errorMsg = "File or directory not found"
+			}
+
+			// Truncate long error messages (rune-safe for UTF-8)
+			errorRunes := []rune(errorMsg)
+			if len(errorRunes) > 200 {
+				errorMsg = string(errorRunes[:197]) + "..."
+			}
+
+			out += fmt.Sprintf("❌ Error: %s%s%s\n",
+				formatError, errorMsg, formatExplanation)
+		} else {
+			content := toolResult.Content
+			if strings.TrimSpace(content) == "" {
+				// Empty result - show a clear indicator
+				out += fmt.Sprintf("✅ Result: %s(empty)%s\n",
+					formatValues, formatExplanation)
+			} else {
+				// Truncate very long results for display (rune-safe for UTF-8)
+				runes := []rune(content)
+				if len(runes) > 200 {
+					content = string(runes[:197]) + "..."
+				}
+				out += fmt.Sprintf("✅ Result:\n%s%s%s\n",
+					formatValues, content, formatExplanation)
+			}
+		}
 	}
 	if !plainText {
 		out += readline.ColorDefault
--- a/server/mcp_code_api.go
+++ b/server/mcp_code_api.go
@ -4,11 +4,11 @@ import (
 	"fmt"
 	"log/slog"
 	"strings"
-	
+
 	"github.com/ollama/ollama/api"
 )

-// MCPCodeAPI provides a code-like interface for MCP tools
+// MCPCodeAPI provides context injection for MCP tools
 type MCPCodeAPI struct {
 	manager *MCPManager
 }
@ -20,83 +20,50 @@ func NewMCPCodeAPI(manager *MCPManager) *MCPCodeAPI {
 	}
 }

-// GenerateMinimalContext returns essential context for tool usage
+// GenerateMinimalContext returns essential runtime context for tool usage.
+// Tool schemas are already provided via the template's TypeScript rendering,
+// so we only need to add runtime-specific info like working directories.
 func (m *MCPCodeAPI) GenerateMinimalContext(configs []api.MCPServerConfig) string {
 	slog.Debug("GenerateMinimalContext called", "configs_count", len(configs))
-	if len(configs) == 0 {
-		slog.Debug("No MCP configs provided, returning empty context")
-		return ""
-	}

 	var context strings.Builder
-	context.WriteString("\n=== MCP Tool Context ===\n")

+	// Add filesystem working directory if applicable
 	for _, config := range configs {
-		slog.Debug("Processing MCP config", "command", config.Command, "args", config.Args)
-		// Check if this is a filesystem server (command or first arg contains filesystem)
-		isFilesystem := strings.Contains(config.Command, "filesystem") ||
-			(len(config.Args) > 0 && strings.Contains(config.Args[0], "filesystem"))
-
-		if isFilesystem && len(config.Args) > 1 {
-			// Extract working directory from filesystem server
-			workingDir := config.Args[1]
-			slog.Debug("Adding filesystem context", "working_dir", workingDir)
+		if workingDir := m.extractFilesystemPath(config); workingDir != "" {
 			context.WriteString(fmt.Sprintf(`
-Filesystem tools are available with these constraints:
- Working directory: %s
- All file operations must use paths within this directory
- Example usage:
-  - List files: "List all files in %s"
-  - Read file: "Read %s/filename.txt"
-  - Create file: "Create %s/newfile.txt with content"
- Paths outside %s will be rejected
-
-When working with files, ALWAYS use the full path starting with %s
-`, workingDir, workingDir, workingDir, workingDir, workingDir, workingDir))
+Filesystem working directory: %s
+All filesystem tool paths must be within this directory.
+`, workingDir))
 		}
-		// Add other server types as needed
 	}
-	
-	context.WriteString("\n")
+
 	result := context.String()
-	slog.Debug("Generated MCP context", "length", len(result))
+	if result != "" {
+		slog.Debug("Generated MCP context", "length", len(result))
+	}
 	return result
 }

-// GenerateProgressiveContext returns context based on what tools are being used
-func (m *MCPCodeAPI) GenerateProgressiveContext(toolNames []string) string {
-	var context strings.Builder
-	
-	// Group tools by server
-	serverTools := make(map[string][]string)
-	for _, toolName := range toolNames {
-		if clientName, exists := m.manager.GetToolClient(toolName); exists {
-			serverTools[clientName] = append(serverTools[clientName], toolName)
-		}
+// extractFilesystemPath extracts the working directory from filesystem server config
+func (m *MCPCodeAPI) extractFilesystemPath(config api.MCPServerConfig) string {
+	isFilesystem := strings.Contains(config.Command, "filesystem") ||
+		(len(config.Args) > 0 && strings.Contains(strings.Join(config.Args, " "), "filesystem"))
+
+	if isFilesystem && len(config.Args) > 0 {
+		// Path is typically the last argument
+		return config.Args[len(config.Args)-1]
 	}
-	
-	// Generate context for each server's tools
-	for serverName, tools := range serverTools {
-		context.WriteString(fmt.Sprintf("\n%s tools being used:\n", serverName))
-		for _, tool := range tools {
-			// Get tool definition from manager
-			if toolDef := m.manager.GetToolDefinition(serverName, tool); toolDef != nil {
-				context.WriteString(fmt.Sprintf("- %s: %s\n", tool, toolDef.Function.Description))
-			}
-		}
-	}
-	
-	return context.String()
+	return ""
 }

-// InjectContextIntoMessages intelligently injects context into the message stream
+// InjectContextIntoMessages adds runtime context to the message stream
 func (m *MCPCodeAPI) InjectContextIntoMessages(messages []api.Message, configs []api.MCPServerConfig) []api.Message {
-	// Generate minimal context
 	context := m.GenerateMinimalContext(configs)
 	if context == "" {
 		return messages
 	}
-	
+
 	// Check if there's already a system message
 	if len(messages) > 0 && messages[0].Role == "system" {
 		// Append to existing system message
@ -109,41 +76,6 @@ func (m *MCPCodeAPI) InjectContextIntoMessages(messages []api.Message, configs [
 		}
 		messages = append([]api.Message{systemMsg}, messages...)
 	}
-	
+
 	return messages
 }
-
-// ExtractWorkingDirectory extracts the working directory from MCP server args
-func ExtractWorkingDirectory(config api.MCPServerConfig) string {
-	if strings.Contains(config.Command, "filesystem") && len(config.Args) > 1 {
-		return config.Args[1]
-	}
-	return ""
-}
-
-// GenerateToolCallExample generates an example of how to call a specific tool
-func (m *MCPCodeAPI) GenerateToolCallExample(serverName, toolName string) string {
-	workingDir := ""
-	
-	// Get working directory if filesystem
-	if serverName == "filesystem" {
-		if clients := m.manager.GetServerNames(); len(clients) > 0 {
-			// This is a simplified approach - in production we'd properly track server configs
-			workingDir = "/home/velvetm/Desktop/mcp-test-files" // Would be extracted from actual config
-		}
-	}
-	
-	// Generate appropriate example based on tool
-	switch toolName {
-	case "list_directory":
-		return fmt.Sprintf(`"List all files in %s"`, workingDir)
-	case "read_file":
-		return fmt.Sprintf(`"Read the file %s/example.txt"`, workingDir)
-	case "write_file":
-		return fmt.Sprintf(`"Create a file at %s/output.txt with content 'Hello World'"`, workingDir)
-	case "create_directory":
-		return fmt.Sprintf(`"Create a directory called %s/newdir"`, workingDir)
-	default:
-		return fmt.Sprintf(`"Use the %s tool"`, toolName)
-	}
-}
--- a/server/routes.go
+++ b/server/routes.go
@ -52,6 +52,17 @@ import (
 	"github.com/ollama/ollama/version"
 )

+// CompletionResult holds the result of a completion request
+type CompletionResult struct {
+	Content      string
+	Thinking     string
+	ToolCalls    []api.ToolCall
+	Done         bool
+	DoneReason   string
+	Metrics      api.Metrics
+	Error        error
+}
+
 const signinURLStr = "https://ollama.com/connect?name=%s&key=%s"

 func shouldUseHarmony(model *Model) bool {
@ -337,10 +348,11 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 		m.Config.Parser = "harmony"
 	}

+
 	if !req.Raw && m.Config.Parser != "" {
 		builtinParser = parsers.ParserForName(m.Config.Parser)
 		if builtinParser != nil {
-			// no tools or last message for generate endpoint
+			// Initialize parser for thinking extraction only (tools not supported in Generate API)
 			builtinParser.Init(nil, nil, req.Think)
 		}
 	}
@ -459,7 +471,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 		// the real chat handler, but doing this as a stopgap to get renderer
 		// support for generate
 		if values.Messages != nil && values.Suffix == "" && req.Template == "" {
-			prompt, images, err = chatPrompt(c.Request.Context(), m, r.Tokenize, opts, values.Messages, []api.Tool{}, req.Think, req.Truncate == nil || *req.Truncate)
+			prompt, images, err = chatPrompt(c.Request.Context(), m, r.Tokenize, opts, values.Messages, nil, req.Think, req.Truncate == nil || *req.Truncate)
 			if err != nil {
 				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 				return
@ -510,8 +522,8 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 	ch := make(chan any)
 	go func() {
 		// TODO (jmorganca): avoid building the response twice both here and below
-		var sb strings.Builder
 		defer close(ch)
+		var sb strings.Builder
 		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
 			Prompt:      prompt,
 			Images:      images,
@ -537,16 +549,13 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 			}

 			if builtinParser != nil {
-				content, thinking, toolCalls, err := builtinParser.Add(cr.Content, cr.Done)
+				content, thinking, _, err := builtinParser.Add(cr.Content, cr.Done)
 				if err != nil {
 					ch <- gin.H{"error": err.Error()}
 					return
 				}
 				res.Response = content
 				res.Thinking = thinking
-				if cr.Done && len(toolCalls) > 0 {
-					res.ToolCalls = toolCalls
-				}
 			} else if thinkingState != nil {
 				thinking, content := thinkingState.AddContent(cr.Content)
 				res.Thinking = thinking
@ -574,7 +583,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {

 			if builtinParser != nil {
 				// only send messages with meaningful content (empty messages confuse clients)
-				if res.Response != "" || res.Thinking != "" || res.Done || len(res.ToolCalls) > 0 {
+				if res.Response != "" || res.Thinking != "" || res.Done {
 					ch <- res
 				}

@ -1508,6 +1517,10 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
 	r.POST("/api/show", s.ShowHandler)
 	r.DELETE("/api/delete", s.DeleteHandler)

+	// MCP Tools discovery
+	r.GET("/api/tools", s.ToolsHandler)
+	r.POST("/api/tools", s.ToolsHandler)
+
 	r.POST("/api/me", s.WhoamiHandler)

 	r.POST("/api/signout", s.SignoutHandler)
@ -1852,6 +1865,211 @@ func toolCallId() string {
 	return "call_" + strings.ToLower(string(b))
 }

+// executeCompletionWithTools executes a completion and collects the full response
+// This is a synchronous wrapper around the async completion callback
+// When suppressDone is true, the Done flag is not sent to the client channel
+// (used for intermediate rounds in multi-round tool execution)
+func (s *Server) executeCompletionWithTools(
+	ctx context.Context,
+	r llm.LlamaServer,
+	prompt string,
+	images []llm.ImageData,
+	opts *api.Options,
+	req api.ChatRequest,
+	m *Model,
+	builtinParser parsers.Parser,
+	thinkingState *thinking.Parser,
+	ch chan any,
+	checkpointStart time.Time,
+	checkpointLoaded time.Time,
+	truncate bool,
+	suppressDone bool,
+) (*CompletionResult, error) {
+	result := &CompletionResult{}
+	done := make(chan error, 1)
+
+	// For tracking tool calls when using tools
+	var toolParser *tools.Parser
+	if len(req.Tools) > 0 && builtinParser == nil {
+		toolParser = tools.NewParser(m.Template.Template, req.Tools)
+	}
+
+	// Track thinking content for structured outputs
+	var thinkingBuilder strings.Builder
+
+	// Accumulate tool calls across streaming chunks
+	var accumulatedToolCalls []api.ToolCall
+
+	// Create a new context for this completion
+	completionCtx, cancel := context.WithCancel(ctx)
+	defer cancel()
+
+	err := r.Completion(completionCtx, llm.CompletionRequest{
+		Prompt:      prompt,
+		Images:      images,
+		Format:      req.Format,
+		Options:     opts,
+		Shift:       req.Shift == nil || *req.Shift,
+		Truncate:    truncate,
+		Logprobs:    req.Logprobs,
+		TopLogprobs: req.TopLogprobs,
+	}, func(resp llm.CompletionResponse) {
+		// When suppressDone is true, don't signal Done to client
+		// (used for intermediate rounds in multi-round tool execution)
+		clientDone := resp.Done && !suppressDone
+
+		res := api.ChatResponse{
+			Model:     req.Model,
+			CreatedAt: time.Now().UTC(),
+			Message:   api.Message{Role: "assistant", Content: resp.Content},
+			Done:      clientDone,
+			Metrics: api.Metrics{
+				PromptEvalCount:    resp.PromptEvalCount,
+				PromptEvalDuration: resp.PromptEvalDuration,
+				EvalCount:          resp.EvalCount,
+				EvalDuration:       resp.EvalDuration,
+			},
+			Logprobs: toAPILogprobs(resp.Logprobs),
+		}
+
+		if resp.Done {
+			res.DoneReason = resp.DoneReason.String()
+			res.TotalDuration = time.Since(checkpointStart)
+			res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
+			result.DoneReason = res.DoneReason
+			result.Metrics = res.Metrics
+		}
+
+		// Handle builtin parser (for models with native tool support)
+		if builtinParser != nil {
+			content, thinking, toolCalls, err := builtinParser.Add(resp.Content, resp.Done)
+			if err != nil {
+				result.Error = err
+				done <- err
+				return
+			}
+
+			res.Message.Content = content
+			res.Message.Thinking = thinking
+			res.Message.ToolCalls = toolCalls
+
+			thinkingBuilder.WriteString(thinking)
+
+			// Accumulate results
+			result.Content += content
+			result.Thinking += thinking
+
+			// Accumulate tool calls for multi-round MCP execution
+			if len(toolCalls) > 0 {
+				accumulatedToolCalls = append(accumulatedToolCalls, toolCalls...)
+			}
+
+			// On completion, set all accumulated tool calls
+			if resp.Done {
+				result.ToolCalls = accumulatedToolCalls
+			}
+
+			// Stream to client if there's content to stream
+			if res.Message.Content != "" || res.Message.Thinking != "" || len(res.Message.ToolCalls) > 0 || resp.Done || len(res.Logprobs) > 0 {
+				ch <- res
+			}
+
+			if resp.Done {
+				result.Done = true
+				done <- nil
+			}
+			return
+		}
+
+		// Handle thinking state parser
+		if thinkingState != nil {
+			thinkingContent, remainingContent := thinkingState.AddContent(res.Message.Content)
+			if thinkingContent == "" && remainingContent == "" && !resp.Done {
+				// Need more content to decide
+				return
+			}
+
+			res.Message.Thinking = thinkingContent
+			thinkingBuilder.WriteString(thinkingContent)
+			res.Message.Content = remainingContent
+			result.Thinking += thinkingContent
+		}
+
+		// Handle tool parsing (for models without native tool support)
+		if len(req.Tools) > 0 && builtinParser == nil {
+			toolCalls, content := toolParser.Add(res.Message.Content)
+			if len(content) > 0 {
+				res.Message.Content = content
+				result.Content += content
+			} else if len(toolCalls) > 0 {
+				res.Message.ToolCalls = toolCalls
+				res.Message.Content = ""
+				// Keep accumulating tool calls
+				accumulatedToolCalls = toolCalls
+			} else if res.Message.Thinking != "" {
+				// don't return, fall through to send
+			} else {
+				// Send logprobs while content is being buffered by the parser for tool calls
+				if len(res.Logprobs) > 0 && !resp.Done {
+					logprobRes := res
+					logprobRes.Message.Content = ""
+					logprobRes.Message.ToolCalls = nil
+					ch <- logprobRes
+				}
+
+				if resp.Done {
+					res.Message.Content = toolParser.Content()
+					// Set accumulated tool calls in result before signaling done
+					if len(accumulatedToolCalls) > 0 {
+						result.ToolCalls = accumulatedToolCalls
+					}
+					// If no tool calls, get final content from parser
+					if len(result.ToolCalls) == 0 && toolParser != nil {
+						result.Content = toolParser.Content()
+					}
+					result.Done = true
+					ch <- res
+					done <- nil
+				}
+				return
+			}
+		} else {
+			result.Content += res.Message.Content
+		}
+
+		// Stream to client
+		ch <- res
+
+		if resp.Done {
+			// If we accumulated tool calls, set them in result
+			if len(accumulatedToolCalls) > 0 {
+				result.ToolCalls = accumulatedToolCalls
+			}
+			// If no tool calls, get final content from parser
+			if len(result.ToolCalls) == 0 && toolParser != nil {
+				result.Content = toolParser.Content()
+			}
+			result.Done = true
+			done <- nil
+		}
+	})
+
+	if err != nil {
+		return nil, err
+	}
+
+	// Wait for completion or context cancellation
+	select {
+	case err := <-done:
+		if err != nil {
+			return nil, err
+		}
+		return result, nil
+	case <-ctx.Done():
+		return nil, ctx.Err()
+	}
+}
+
 func (s *Server) ChatHandler(c *gin.Context) {
 	checkpointStart := time.Now()

@ -2018,6 +2236,80 @@ func (s *Server) ChatHandler(c *gin.Context) {
 		}
 	}

+	// =========================================================================
+	// MCP (Model Context Protocol) Integration
+	// =========================================================================
+	//
+	// MCP allows the model to execute external tools via JSON-RPC servers.
+	// This section handles:
+	//   1. Manager initialization (from session cache or new)
+	//   2. Tool discovery (list available tools from MCP servers)
+	//   3. Context injection (inform model about available tools)
+	//   4. Parser configuration (for tool call detection)
+	//
+	// Entry points:
+	//   - req.MCPServers: Explicit server configs from API
+	//   - req.ToolsPath: Path-based auto-enable from --tools flag
+	//
+	// See: mcp.go, mcp_manager.go for implementation details
+	// =========================================================================
+
+	var mcpManager *MCPManager
+
+	if len(req.MCPServers) > 0 || req.ToolsPath != "" {
+		if req.ToolsPath != "" {
+			// Path-based mode: auto-enable servers matching the tools path
+			// Used by CLI: `ollama run model --tools /path`
+			slog.Debug("Using tools path for MCP manager", "tools_path", req.ToolsPath, "model", req.Model)
+			mcpManager, err = GetMCPManagerForPath(req.Model, req.ToolsPath)
+			if err != nil {
+				slog.Error("Failed to get MCP manager for tools path", "error", err)
+				// Continue without MCP - graceful degradation
+			}
+		} else if len(req.MCPServers) > 0 {
+			// Explicit mode: use server configs from API request
+			// Used by API: POST /api/chat with mcp_servers field
+			sessionID := GenerateSessionID(req)
+			slog.Debug("Getting MCP manager", "session", sessionID, "servers", len(req.MCPServers))
+			mcpManager, err = GetMCPManager(sessionID, req.MCPServers)
+			if err != nil {
+				slog.Error("Failed to get MCP manager", "error", err)
+				// Continue without MCP - graceful degradation
+			}
+		}
+
+		if mcpManager != nil {
+			// Step 1: Discover tools from MCP servers and add to request
+			mcpTools := mcpManager.GetAllTools()
+			req.Tools = append(req.Tools, mcpTools...)
+
+			// Step 2: Inject context to help model use tools effectively
+			// Use programmatic context injection from tool schemas
+			codeAPI := NewMCPCodeAPI(mcpManager)
+			req.Messages = codeAPI.InjectContextIntoMessages(req.Messages, req.MCPServers)
+
+			// Step 3: Auto-configure parser for tool call detection
+			if len(req.Tools) > 0 && m.Config.Parser == "" {
+				if m.Config.ModelFamily == "qwen2" || m.Config.ModelFamily == "qwen3" {
+					m.Config.Parser = "qwen3-vl-instruct"
+				}
+			}
+
+			// Step 4: Update capabilities now that we have tools
+			if len(req.Tools) > 0 && !slices.Contains(caps, model.CapabilityTools) {
+				caps = append(caps, model.CapabilityTools)
+			}
+		}
+
+		// Cleanup: Close MCP manager when request completes
+		// Note: Session manager may cache for reuse within TTL
+		defer func() {
+			if err := mcpManager.Close(); err != nil {
+				slog.Warn("Error closing MCP manager", "error", err)
+			}
+		}()
+	}
+
 	r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
 	if errors.Is(err, errCapabilityCompletion) {
 		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
@ -2106,11 +2398,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
 		}
 	}

-	var toolParser *tools.Parser
-	if len(req.Tools) > 0 && (builtinParser == nil || !builtinParser.HasToolSupport()) {
-		toolParser = tools.NewParser(m.Template.Template, req.Tools)
-	}
-
 	type structuredOutputsState int
 	const (
 		structuredOutputsState_None structuredOutputsState = iota
@ -2122,181 +2409,223 @@ func (s *Server) ChatHandler(c *gin.Context) {
 	go func() {
 		defer close(ch)

-		structuredOutputsState := structuredOutputsState_None
+		// Initialize for multi-round execution
+		// NOTE: Upstream's structuredOutputsState for thinking models is not yet integrated
+		// TODO: Add structuredOutputsState support for thinking models with format constraints
+		currentMsgs := msgs
+		maxRounds := req.MaxToolRounds
+		if maxRounds == 0 {
+			maxRounds = 15 // Default maximum rounds
+		}

-		for {
-			var tb strings.Builder
+		slog.Debug("Starting multi-round execution",
+			"mcpManager", mcpManager != nil,
+			"tools_count", len(req.Tools),
+			"max_rounds", maxRounds)

-			currentFormat := req.Format
-			// structured outputs via double request is enabled when:
-			// 1. the model supports the thinking capability and
-			// 2. it uses a built-in parser or our generic thinking parser
+		// MAIN LOOP - Multi-round execution for tool calling
+		var round int
+		for round = 0; round < maxRounds; round++ {
+			slog.Debug("Starting round", "round", round, "messages", len(currentMsgs))

-			// Note that the current approach does not work for (potential future)
-			// non-thinking models that emit anything before actual content. This
-			// current approach uses the transition from parsed thinking content to
-			// parsed non-thinking content as the signal to turn constraining on
-
-			if req.Format != nil && structuredOutputsState == structuredOutputsState_None && ((builtinParser != nil || thinkingState != nil) && slices.Contains(m.Capabilities(), model.CapabilityThinking)) {
-				currentFormat = nil
-			}
-
-			// sets up new context given parent context per request
-			ctx, cancel := context.WithCancel(c.Request.Context())
-			err := r.Completion(ctx, llm.CompletionRequest{
-				Prompt:      prompt,
-				Images:      images,
-				Format:      currentFormat,
-				Options:     opts,
-				Shift:       req.Shift == nil || *req.Shift,
-				Truncate:    truncate,
-				Logprobs:    req.Logprobs,
-				TopLogprobs: req.TopLogprobs,
-			}, func(r llm.CompletionResponse) {
-				res := api.ChatResponse{
-					Model:     req.Model,
-					CreatedAt: time.Now().UTC(),
-					Message:   api.Message{Role: "assistant", Content: r.Content},
-					Done:      r.Done,
-					Metrics: api.Metrics{
-						PromptEvalCount:    r.PromptEvalCount,
-						PromptEvalDuration: r.PromptEvalDuration,
-						EvalCount:          r.EvalCount,
-						EvalDuration:       r.EvalDuration,
-					},
-					Logprobs: toAPILogprobs(r.Logprobs),
-				}
-
-				if r.Done {
-					res.DoneReason = r.DoneReason.String()
-					res.TotalDuration = time.Since(checkpointStart)
-					res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
-				}
-
-				if builtinParser != nil {
-					slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser input", "parser", m.Config.Parser, "content", r.Content)
-
-					content, thinking, toolCalls, err := builtinParser.Add(r.Content, r.Done)
-					if err != nil {
-						ch <- gin.H{"error": err.Error()}
-						return
-					}
-
-					res.Message.Content = content
-					res.Message.Thinking = thinking
-					for i := range toolCalls {
-						toolCalls[i].ID = toolCallId()
-					}
-					res.Message.ToolCalls = toolCalls
-
-					tb.WriteString(thinking)
-					// we are now receiving content from the model - we should start applying structured outputs
-					if structuredOutputsState == structuredOutputsState_None && req.Format != nil && tb.String() != "" && res.Message.Content != "" {
-						structuredOutputsState = structuredOutputsState_ReadyToApply
-						cancel()
-						return
-					}
-
-					if res.Message.Content != "" || res.Message.Thinking != "" || len(res.Message.ToolCalls) > 0 || r.Done || len(res.Logprobs) > 0 {
-						slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser output", "parser", m.Config.Parser, "content", content, "thinking", thinking, "toolCalls", toolCalls, "done", r.Done)
-						ch <- res
-					} else {
-						slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser empty output", "parser", m.Config.Parser)
-					}
-					return
-				}
-
-				if thinkingState != nil {
-					thinkingContent, remainingContent := thinkingState.AddContent(res.Message.Content)
-					if thinkingContent == "" && remainingContent == "" && !r.Done {
-						// need to accumulate more to decide what to send
-						return
-					}
-					res.Message.Thinking = thinkingContent
-					tb.WriteString(thinkingContent)
-					// emit the collected thinking text before restarting with structured outputs and clear unstructured content
-					// to avoid leaking mixed tokens like "</think>Hello"
-					if structuredOutputsState == structuredOutputsState_None && req.Format != nil && tb.String() != "" && remainingContent != "" {
-						structuredOutputsState = structuredOutputsState_ReadyToApply
-						res.Message.Content = ""
-						ch <- res
-						cancel()
-						return
-					}
-					res.Message.Content = remainingContent
-				}
-
-				if len(req.Tools) > 0 {
-					toolCalls, content := toolParser.Add(res.Message.Content)
-					if len(content) > 0 {
-						res.Message.Content = content
-					} else if len(toolCalls) > 0 {
-						for i := range toolCalls {
-							toolCalls[i].ID = toolCallId()
-						}
-						res.Message.ToolCalls = toolCalls
-						res.Message.Content = ""
-					} else if res.Message.Thinking != "" {
-						// don't return, fall through to send
-					} else {
-						//  Send logprobs while content is being buffered by the parser for tool calls
-						if len(res.Logprobs) > 0 && !r.Done {
-							logprobRes := res
-							logprobRes.Message.Content = ""
-							logprobRes.Message.ToolCalls = nil
-							ch <- logprobRes
-						}
-
-						if r.Done {
-							res.Message.Content = toolParser.Content()
-							ch <- res
-						}
-						return
-					}
-				}
-
-				ch <- res
-			})
-			if err != nil {
-				if structuredOutputsState == structuredOutputsState_ReadyToApply && strings.Contains(err.Error(), "context canceled") && c.Request.Context().Err() == nil {
-					// only ignores error if it's a context cancellation due to setting structured outputs
-				} else {
-					var serr api.StatusError
-					if errors.As(err, &serr) {
-						ch <- gin.H{"error": serr.ErrorMessage, "status": serr.StatusCode}
-					} else {
-						ch <- gin.H{"error": err.Error()}
-					}
-					return
-				}
-			}
-
-			// ignored structured outputs cancellation falls through to here, start a new request with the structured outputs and updated prompt. use the
-			if structuredOutputsState == structuredOutputsState_ReadyToApply {
-				structuredOutputsState = structuredOutputsState_Applying
-				msg := api.Message{
-					Role:     "assistant",
-					Thinking: tb.String(),
-				}
-
-				msgs = append(msgs, msg)
-				prompt, _, err = chatPrompt(c.Request.Context(), m, r.Tokenize, opts, msgs, processedTools, req.Think, truncate)
+			// Re-render prompt and reset parser if not first round (tool results were added)
+			if round > 0 {
+				var err error
+				prompt, images, err = chatPrompt(c.Request.Context(), m, r.Tokenize, opts, currentMsgs, processedTools, req.Think, truncate)
 				if err != nil {
-					slog.Error("chat prompt error applying structured outputs", "error", err)
+					slog.Error("Failed to render prompt in round", "round", round, "error", err)
 					ch <- gin.H{"error": err.Error()}
 					return
 				}
-				// force constraining by terminating thinking header, the parser is already at this state
-				// when the last message is thinking, the rendered for gpt-oss cannot disambiguate between having the
-				// model continue thinking or ending thinking and outputting the final message.
-				// TODO(parthsareen): consider adding prefill disambiguation logic to the renderer for structured outputs.
-				if shouldUseHarmony(m) || (builtinParser != nil && m.Config.Parser == "harmony") {
-					prompt += "<|end|><|start|>assistant<|channel|>final<|message|>"
+
+				// Create fresh parser instance for new round (parser has internal buffer state)
+				if builtinParser != nil && m.Config.Parser != "" {
+					builtinParser = parsers.ParserForName(m.Config.Parser)
+					if builtinParser != nil {
+						lastMsg := &currentMsgs[len(currentMsgs)-1]
+						builtinParser.Init(req.Tools, lastMsg, req.Think)
+					}
 				}
-				continue
 			}

-			break
+			// Execute completion and collect full response
+			// When MCP is enabled, suppress Done flag during intermediate rounds
+			// to prevent client from closing connection prematurely
+			suppressDone := mcpManager != nil
+			completionResult, err := s.executeCompletionWithTools(
+				c.Request.Context(),
+				r,
+				prompt,
+				images,
+				opts,
+				req,
+				m,
+				builtinParser,
+				thinkingState,
+				ch,
+				checkpointStart,
+				checkpointLoaded,
+				truncate,
+				suppressDone,
+			)
+			
+			if err != nil {
+				slog.Error("Completion failed", "round", round, "error", err)
+				var serr api.StatusError
+				if errors.As(err, &serr) {
+					ch <- gin.H{"error": serr.ErrorMessage, "status": serr.StatusCode}
+				} else {
+					ch <- gin.H{"error": err.Error()}
+				}
+				return
+			}
+
+			// Check if model called tools
+			if len(completionResult.ToolCalls) == 0 {
+				// No tools called - conversation is complete
+				slog.Debug("No tools called, conversation complete", "round", round)
+				break // Exit the loop - we're done
+			}
+			
+			// Validate tool calls are not empty or malformed
+			validToolCalls := 0
+			for _, tc := range completionResult.ToolCalls {
+				if tc.Function.Name != "" {
+					validToolCalls++
+				} else {
+					slog.Warn("Invalid tool call detected", "round", round, "tool", tc)
+				}
+			}
+			
+			if validToolCalls == 0 {
+				slog.Warn("No valid tool calls found, exiting", "round", round)
+				break
+			}
+			
+			// Model called tools - execute them if we have an MCP manager
+			if mcpManager != nil {
+				slog.Debug("MCP tool execution starting",
+					"tools_in_response", len(completionResult.ToolCalls),
+					"valid_tools", validToolCalls,
+					"round", round)
+
+				// Send tool calls to client for display BEFORE executing
+				// This ensures the client can show "Executing tool..." for all rounds
+				// Note: Don't include Content here - it was already streamed during completion
+				ch <- api.ChatResponse{
+					Model: req.Model,
+					Message: api.Message{
+						Role:      "assistant",
+						ToolCalls: completionResult.ToolCalls,
+					},
+				}
+
+				// Analyze execution plan
+				executionPlan := mcpManager.AnalyzeExecutionPlan(completionResult.ToolCalls)
+				slog.Debug("Execution plan determined",
+					"sequential", executionPlan.RequiresSequential,
+					"reason", executionPlan.Reason)
+				
+				// Execute tools according to plan
+				results := mcpManager.ExecuteWithPlan(completionResult.ToolCalls, executionPlan)
+				
+				// Log tool calls for debugging
+				for i, tc := range completionResult.ToolCalls {
+					slog.Info("Tool call details", 
+						"round", round,
+						"index", i,
+						"name", tc.Function.Name,
+						"arguments", tc.Function.Arguments)
+				}
+				
+				// Add assistant message with tool calls
+				assistantMsg := api.Message{
+					Role:      "assistant",
+					Content:   completionResult.Content, // Preserve any content
+					ToolCalls: completionResult.ToolCalls,
+				}
+				currentMsgs = append(currentMsgs, assistantMsg)
+				
+				// Add tool result messages and send them to client for display
+				toolResultsForDisplay := make([]api.ToolResult, 0, len(results))
+				for i, result := range results {
+					toolMsg := api.Message{
+						Role:     "tool",
+						ToolName: completionResult.ToolCalls[i].Function.Name,
+					}
+					
+					// Create display result with arguments for context
+					displayResult := api.ToolResult{
+						ToolName:  completionResult.ToolCalls[i].Function.Name,
+						Arguments: completionResult.ToolCalls[i].Function.Arguments,
+						Content:   result.Content,
+					}
+					
+					if result.Error != nil {
+						// JSON-encode the error for proper template rendering
+						if encoded, err := json.Marshal(fmt.Sprintf("Error: %v", result.Error)); err == nil {
+							toolMsg.Content = string(encoded)
+						} else {
+							toolMsg.Content = fmt.Sprintf("\"Error: %v\"", result.Error)
+						}
+						displayResult.Error = result.Error.Error()
+						slog.Warn("Tool execution failed",
+							"tool", completionResult.ToolCalls[i].Function.Name,
+							"error", result.Error)
+					} else {
+						// JSON-encode the content for proper template rendering
+						// The template expects {"content": {{ .Content }}} where Content should be a JSON string
+						if encoded, err := json.Marshal(result.Content); err == nil {
+							toolMsg.Content = string(encoded)
+						} else {
+							toolMsg.Content = result.Content
+						}
+					}
+					
+					currentMsgs = append(currentMsgs, toolMsg)
+					toolResultsForDisplay = append(toolResultsForDisplay, displayResult)
+				}
+
+				// Send tool results to client for display
+				if len(toolResultsForDisplay) > 0 {
+					ch <- api.ChatResponse{
+						Model: req.Model,
+						Message: api.Message{
+							Role:        "assistant",
+							ToolResults: toolResultsForDisplay,
+						},
+					}
+				}
+				
+				// Continue to next round - model will process tool results
+				slog.Info("Tools executed, continuing to next round",
+					"round", round,
+					"messages", len(currentMsgs),
+					"last_tool", completionResult.ToolCalls[len(completionResult.ToolCalls)-1].Function.Name)
+
+			} else {
+				// No MCP manager - send tool calls to client for external execution
+				slog.Debug("No MCP manager, sending tool calls to client", "round", round)
+				break // Exit - client will handle tool execution
+			}
+		} // End of maxRounds loop
+
+		// Check if we exhausted rounds
+		if round >= maxRounds {
+			slog.Warn("Maximum tool execution rounds reached", "rounds", maxRounds)
+			ch <- gin.H{"error": fmt.Sprintf("Maximum tool execution rounds (%d) exceeded", maxRounds)}
+		}
+
+		// When MCP was enabled, we suppressed Done flags during the loop
+		// Send a final Done: true to signal the conversation is complete
+		if mcpManager != nil {
+			ch <- api.ChatResponse{
+				Model:      req.Model,
+				CreatedAt:  time.Now().UTC(),
+				Message:    api.Message{Role: "assistant"},
+				Done:       true,
+				DoneReason: "stop",
+			}
 		}
 	}()

@ -2322,22 +2651,15 @@ func (s *Server) ChatHandler(c *gin.Context) {
 			case gin.H:
 				msg, ok := t["error"].(string)
 				if !ok {
-					msg = "unexpected error format in response"
+					msg = "unexpected error"
 				}
-
-				status, ok := t["status"].(int)
-				if !ok {
-					status = http.StatusInternalServerError
-				}
-
-				c.JSON(status, gin.H{"error": msg})
+				c.JSON(http.StatusBadRequest, gin.H{"error": msg})
 				return
 			default:
 				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
 				return
 			}
 		}
-
 		resp.Message.Content = sbContent.String()
 		resp.Message.Thinking = sbThinking.String()
 		resp.Logprobs = allLogprobs
@ -2345,12 +2667,10 @@ func (s *Server) ChatHandler(c *gin.Context) {
 		if len(toolCalls) > 0 {
 			resp.Message.ToolCalls = toolCalls
 		}
-
 		c.JSON(http.StatusOK, resp)
-		return
+	} else {
+		streamResponse(c, ch)
 	}
-
-	streamResponse(c, ch)
 }

 func handleScheduleError(c *gin.Context, name string, err error) {
@ -2395,4 +2715,3 @@ func filterThinkTags(msgs []api.Message, m *Model) []api.Message {
 	}
 	return msgs
 }
-
--- a/server/routes_tools.go
+++ b/server/routes_tools.go
@ -0,0 +1,91 @@
+package server
+
+import (
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+	"github.com/ollama/ollama/api"
+)
+
+// ToolsHandler handles requests to list available MCP tools.
+// GET: Returns available MCP server definitions from configuration.
+// POST with mcp_servers: Returns tools from the specified MCP servers.
+func (s *Server) ToolsHandler(c *gin.Context) {
+	var req struct {
+		MCPServers []api.MCPServerConfig `json:"mcp_servers,omitempty"`
+	}
+
+	if c.Request.Method == "POST" {
+		if err := c.BindJSON(&req); err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+			return
+		}
+	}
+	
+	// If MCP servers provided, list their tools
+	if len(req.MCPServers) > 0 {
+		manager := NewMCPManager(10)
+		defer manager.Close()
+		
+		var allTools []ToolInfo
+		for _, config := range req.MCPServers {
+			if err := manager.AddServer(config); err != nil {
+				// Include error in response but continue
+				allTools = append(allTools, ToolInfo{
+					Name:        config.Name,
+					Description: "Failed to initialize: " + err.Error(),
+					Error:       err.Error(),
+				})
+				continue
+			}
+			
+			// Get tools from this server
+			tools := manager.GetAllTools()
+			for _, tool := range tools {
+				allTools = append(allTools, ToolInfo{
+					Name:        tool.Function.Name,
+					Description: tool.Function.Description,
+					Parameters:  &tool.Function.Parameters,
+					ServerName:  config.Name,
+				})
+			}
+		}
+		
+		c.JSON(http.StatusOK, ToolsResponse{
+			Tools: allTools,
+		})
+		return
+	}
+	
+	// Otherwise, list available MCP server definitions
+	defs, err := LoadMCPDefinitions()
+	if err != nil {
+		// Config parsing errors are client errors (bad config), not server errors
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid MCP configuration: " + err.Error()})
+		return
+	}
+
+	servers := defs.ListServers()
+	c.JSON(http.StatusOK, MCPServersResponse{
+		Servers: servers,
+	})
+}
+
+// ToolInfo provides information about a single tool
+type ToolInfo struct {
+	Name        string                      `json:"name"`
+	Description string                      `json:"description"`
+	Parameters  *api.ToolFunctionParameters `json:"parameters,omitempty"`
+	ServerName  string                      `json:"server,omitempty"`
+	Error       string                      `json:"error,omitempty"`
+}
+
+// ToolsResponse contains the list of available tools
+type ToolsResponse struct {
+	Tools []ToolInfo `json:"tools"`
+}
+
+// MCPServersResponse contains the list of available MCP server types
+type MCPServersResponse struct {
+	Servers []MCPServerInfo `json:"servers"`
+}