cmd: add --tools flag for MCP server integration
Integrate MCP servers with the ollama CLI and add route handlers for tool-enabled chat completions. CLI (cmd/cmd.go): - Add --tools flag to 'ollama run' command - Supports path argument: --tools /path/to/directory - Multi-round tool execution loop with result display - Displays available tools on startup Routes (server/routes.go, server/routes_tools.go): - MCP server handling in chat completions - /api/tools endpoint for tool listing - Session-based MCP manager lifecycle - Detailed documentation of MCP integration flow Parser (harmony/harmonyparser.go): - Handle embedded <think> segments in tool call responses - Extract tool calls from thinking model outputs Relates to #7865
This commit is contained in:
parent
fc05536d52
commit
abcb81bb07
16
api/types.go
16
api/types.go
|
|
@ -126,12 +126,6 @@ type GenerateRequest struct {
|
|||
// each with an associated log probability. Only applies when Logprobs is true.
|
||||
// Valid values are 0-20. Default is 0 (only return the selected token's logprob).
|
||||
TopLogprobs int `json:"top_logprobs,omitempty"`
|
||||
|
||||
// Tools is a list of tools the model may call.
|
||||
Tools []Tool `json:"tools,omitempty"`
|
||||
|
||||
// MCPServers specifies MCP servers to use for tool functionality
|
||||
MCPServers []MCPServerConfig `json:"mcp_servers,omitempty"`
|
||||
}
|
||||
|
||||
// ChatRequest describes a request sent by [Client.Chat].
|
||||
|
|
@ -250,9 +244,10 @@ type ToolCall struct {
|
|||
}
|
||||
|
||||
type ToolResult struct {
|
||||
ToolName string `json:"tool_name"`
|
||||
Content string `json:"content"`
|
||||
Error string `json:"error,omitempty"`
|
||||
ToolName string `json:"tool_name"`
|
||||
Arguments ToolCallFunctionArguments `json:"arguments,omitempty"`
|
||||
Content string `json:"content"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type ToolCallFunction struct {
|
||||
|
|
@ -857,9 +852,6 @@ type GenerateResponse struct {
|
|||
|
||||
Metrics
|
||||
|
||||
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
||||
ToolResults []ToolResult `json:"tool_results,omitempty"`
|
||||
|
||||
DebugInfo *DebugInfo `json:"_debug_info,omitempty"`
|
||||
|
||||
// Logprobs contains log probability information for the generated tokens,
|
||||
|
|
|
|||
510
cmd/cmd.go
510
cmd/cmd.go
|
|
@ -22,6 +22,7 @@ import (
|
|||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
|
@ -49,6 +50,13 @@ import (
|
|||
|
||||
const ConnectInstructions = "To sign in, navigate to:\n %s\n\n"
|
||||
|
||||
// Tool detection and buffering configuration
|
||||
const (
|
||||
DefaultToolBufferDelay = 500 * time.Millisecond
|
||||
MinToolBufferDelay = 100 * time.Millisecond
|
||||
MaxToolBufferDelay = 2 * time.Second
|
||||
)
|
||||
|
||||
// ensureThinkingSupport emits a warning if the model does not advertise thinking support
|
||||
func ensureThinkingSupport(ctx context.Context, client *api.Client, name string) {
|
||||
if name == "" {
|
||||
|
|
@ -416,6 +424,41 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
|||
opts.KeepAlive = &api.Duration{Duration: d}
|
||||
}
|
||||
|
||||
toolsSpec, err := cmd.Flags().GetString("tools")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if toolsSpec != "" {
|
||||
mcpServers, toolsPath, err := server.GetMCPServersForTools(toolsSpec)
|
||||
if err != nil {
|
||||
// If definitions fail to load, fall back to basic filesystem support
|
||||
fmt.Fprintf(os.Stderr, "Warning: Failed to load MCP definitions: %v\n", err)
|
||||
mcpServers = []api.MCPServerConfig{
|
||||
{
|
||||
Name: "filesystem",
|
||||
Command: "npx",
|
||||
Args: []string{"-y", "@modelcontextprotocol/server-filesystem", toolsPath},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
if len(mcpServers) == 0 {
|
||||
fmt.Fprintf(os.Stderr, "Warning: No MCP servers matched for --tools context\n")
|
||||
} else {
|
||||
// Log what servers are being enabled
|
||||
serverNames := make([]string, 0, len(mcpServers))
|
||||
for _, srv := range mcpServers {
|
||||
serverNames = append(serverNames, srv.Name)
|
||||
}
|
||||
fmt.Fprintf(os.Stderr, "Enabling MCP servers: %s\n", strings.Join(serverNames, ", "))
|
||||
if toolsPath != "" {
|
||||
fmt.Fprintf(os.Stderr, "Tools path: %s\n", toolsPath)
|
||||
}
|
||||
}
|
||||
|
||||
opts.MCPServers = mcpServers
|
||||
}
|
||||
|
||||
prompts := args[1:]
|
||||
// prepend stdin to the prompt if provided
|
||||
if !term.IsTerminal(int(os.Stdin.Fd())) {
|
||||
|
|
@ -1189,6 +1232,7 @@ type runOptions struct {
|
|||
Think *api.ThinkValue
|
||||
HideThinking bool
|
||||
ShowConnect bool
|
||||
MCPServers []api.MCPServerConfig
|
||||
}
|
||||
|
||||
func (r runOptions) Copy() runOptions {
|
||||
|
|
@ -1218,6 +1262,12 @@ func (r runOptions) Copy() runOptions {
|
|||
think = &cThink
|
||||
}
|
||||
|
||||
var mcpServers []api.MCPServerConfig
|
||||
if r.MCPServers != nil {
|
||||
mcpServers = make([]api.MCPServerConfig, len(r.MCPServers))
|
||||
copy(mcpServers, r.MCPServers)
|
||||
}
|
||||
|
||||
return runOptions{
|
||||
Model: r.Model,
|
||||
ParentModel: r.ParentModel,
|
||||
|
|
@ -1233,6 +1283,7 @@ func (r runOptions) Copy() runOptions {
|
|||
Think: think,
|
||||
HideThinking: r.HideThinking,
|
||||
ShowConnect: r.ShowConnect,
|
||||
MCPServers: mcpServers,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1241,6 +1292,237 @@ type displayResponseState struct {
|
|||
wordBuffer string
|
||||
}
|
||||
|
||||
// StreamingToolDetector maintains state for detecting tool calls across streaming chunks
|
||||
type StreamingToolDetector struct {
|
||||
inXMLToolCall bool
|
||||
xmlStartBuffer strings.Builder
|
||||
inJSONToolCall bool
|
||||
jsonBuffer strings.Builder
|
||||
jsonDepth int
|
||||
inString bool
|
||||
escapeNext bool
|
||||
// tailBuffer holds potential partial tag matches from end of previous chunk
|
||||
tailBuffer string
|
||||
}
|
||||
|
||||
// NewStreamingToolDetector creates a new stateful tool detector
|
||||
func NewStreamingToolDetector() *StreamingToolDetector {
|
||||
return &StreamingToolDetector{}
|
||||
}
|
||||
|
||||
// maxTagLength is the longest tag we need to detect across chunk boundaries
|
||||
const maxTagLength = 12 // len("</tool_call>")
|
||||
|
||||
// Process handles a chunk of streaming content and separates tool calls from regular content
|
||||
func (s *StreamingToolDetector) Process(chunk string) (displayContent string, hasIncompleteToolCall bool) {
|
||||
// Prepend any buffered tail from previous chunk
|
||||
if s.tailBuffer != "" {
|
||||
chunk = s.tailBuffer + chunk
|
||||
s.tailBuffer = ""
|
||||
}
|
||||
|
||||
var result strings.Builder
|
||||
|
||||
for i := 0; i < len(chunk); i++ {
|
||||
ch := chunk[i]
|
||||
|
||||
// Check if we're near the end and might have a partial tag
|
||||
// Buffer potential partial matches for next chunk
|
||||
remainingLen := len(chunk) - i
|
||||
if !s.inXMLToolCall && !s.inJSONToolCall && remainingLen < maxTagLength {
|
||||
// Check if remaining content could be start of a tag
|
||||
remaining := chunk[i:]
|
||||
if couldBePartialTag(remaining) {
|
||||
s.tailBuffer = remaining
|
||||
break // Stop processing, buffer the rest
|
||||
}
|
||||
}
|
||||
|
||||
// Handle XML tool calls
|
||||
if !s.inXMLToolCall && i+11 <= len(chunk) && chunk[i:i+11] == "<tool_call>" {
|
||||
s.inXMLToolCall = true
|
||||
s.xmlStartBuffer.Reset()
|
||||
s.xmlStartBuffer.WriteString("<tool_call>")
|
||||
i += 10 // Skip past "<tool_call>"
|
||||
continue
|
||||
}
|
||||
|
||||
if s.inXMLToolCall {
|
||||
s.xmlStartBuffer.WriteByte(ch)
|
||||
if i+12 <= len(chunk) && chunk[i:i+12] == "</tool_call>" {
|
||||
// Complete XML tool call - skip it entirely
|
||||
s.inXMLToolCall = false
|
||||
s.xmlStartBuffer.Reset()
|
||||
i += 11 // Skip past "</tool_call>"
|
||||
continue
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle JSON tool calls
|
||||
if !s.inJSONToolCall && !s.inXMLToolCall {
|
||||
// Look for start of JSON tool call pattern
|
||||
if i+8 <= len(chunk) && chunk[i:i+8] == `{"name":` {
|
||||
// Check if "arguments" appears nearby (tool call signature)
|
||||
lookahead := chunk[i:]
|
||||
if len(lookahead) > 200 {
|
||||
lookahead = lookahead[:200]
|
||||
}
|
||||
if strings.Contains(lookahead, `"arguments":`) {
|
||||
s.inJSONToolCall = true
|
||||
s.jsonBuffer.Reset()
|
||||
s.jsonBuffer.WriteByte(ch)
|
||||
s.jsonDepth = 1
|
||||
s.inString = false
|
||||
s.escapeNext = false
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if s.inJSONToolCall {
|
||||
s.jsonBuffer.WriteByte(ch)
|
||||
|
||||
// Track JSON structure to find the end
|
||||
if s.escapeNext {
|
||||
s.escapeNext = false
|
||||
continue
|
||||
}
|
||||
|
||||
if ch == '\\' && s.inString {
|
||||
s.escapeNext = true
|
||||
continue
|
||||
}
|
||||
|
||||
if ch == '"' && !s.escapeNext {
|
||||
s.inString = !s.inString
|
||||
continue
|
||||
}
|
||||
|
||||
if !s.inString {
|
||||
if ch == '{' {
|
||||
s.jsonDepth++
|
||||
} else if ch == '}' {
|
||||
s.jsonDepth--
|
||||
if s.jsonDepth == 0 {
|
||||
// Complete JSON tool call - skip it
|
||||
s.inJSONToolCall = false
|
||||
s.jsonBuffer.Reset()
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Regular content
|
||||
result.WriteByte(ch)
|
||||
}
|
||||
|
||||
// Check if we have incomplete tool calls or buffered tail that need buffering
|
||||
hasIncompleteToolCall = s.inXMLToolCall || s.inJSONToolCall || s.tailBuffer != ""
|
||||
|
||||
return result.String(), hasIncompleteToolCall
|
||||
}
|
||||
|
||||
// couldBePartialTag checks if a string could be the start of a tool call tag
|
||||
// Only returns true for patterns that are specific enough to likely be tool calls
|
||||
func couldBePartialTag(s string) bool {
|
||||
// Require at least 2 chars to avoid false positives on common single chars like < or {
|
||||
if len(s) < 2 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for partial XML tags - must start with "<t" or "</"
|
||||
xmlPrefixes := []string{"<t", "<to", "<too", "<tool", "<tool_", "<tool_c", "<tool_ca", "<tool_cal", "<tool_call",
|
||||
"</", "</t", "</to", "</too", "</tool", "</tool_", "</tool_c", "</tool_ca", "</tool_cal", "</tool_call"}
|
||||
|
||||
for _, prefix := range xmlPrefixes {
|
||||
if strings.HasPrefix(s, prefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Check for partial JSON tool call start - must have at least `{"`
|
||||
jsonPrefixes := []string{`{"`, `{"n`, `{"na`, `{"nam`, `{"name`, `{"name"`, `{"name":`}
|
||||
for _, prefix := range jsonPrefixes {
|
||||
if strings.HasPrefix(s, prefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Reset clears the detector state
|
||||
func (s *StreamingToolDetector) Reset() {
|
||||
s.inXMLToolCall = false
|
||||
s.xmlStartBuffer.Reset()
|
||||
s.inJSONToolCall = false
|
||||
s.jsonBuffer.Reset()
|
||||
s.jsonDepth = 0
|
||||
s.inString = false
|
||||
s.escapeNext = false
|
||||
s.tailBuffer = ""
|
||||
}
|
||||
|
||||
// findJSONEnd finds the end of a JSON object starting from the beginning of the string
|
||||
// Returns the index of the closing brace, or -1 if not found
|
||||
func findJSONEnd(s string) int {
|
||||
braceCount := 0
|
||||
inString := false
|
||||
escapeNext := false
|
||||
|
||||
for i, ch := range s {
|
||||
if escapeNext {
|
||||
escapeNext = false
|
||||
continue
|
||||
}
|
||||
|
||||
if ch == '\\' && inString {
|
||||
escapeNext = true
|
||||
continue
|
||||
}
|
||||
|
||||
if ch == '"' && !escapeNext {
|
||||
inString = !inString
|
||||
continue
|
||||
}
|
||||
|
||||
if !inString {
|
||||
if ch == '{' {
|
||||
braceCount++
|
||||
} else if ch == '}' {
|
||||
braceCount--
|
||||
if braceCount == 0 {
|
||||
return i
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
// getToolBufferDelay returns the configured tool buffer delay
|
||||
// Can be overridden with OLLAMA_TOOL_BUFFER_DELAY environment variable (in milliseconds)
|
||||
func getToolBufferDelay() time.Duration {
|
||||
if delayStr := os.Getenv("OLLAMA_TOOL_BUFFER_DELAY"); delayStr != "" {
|
||||
if delayMs, err := strconv.Atoi(delayStr); err == nil {
|
||||
delay := time.Duration(delayMs) * time.Millisecond
|
||||
// Clamp to reasonable bounds
|
||||
if delay < MinToolBufferDelay {
|
||||
return MinToolBufferDelay
|
||||
}
|
||||
if delay > MaxToolBufferDelay {
|
||||
return MaxToolBufferDelay
|
||||
}
|
||||
return delay
|
||||
}
|
||||
}
|
||||
return DefaultToolBufferDelay
|
||||
}
|
||||
|
||||
func displayResponse(content string, wordWrap bool, state *displayResponseState) {
|
||||
termWidth, _, _ := term.GetSize(int(os.Stdout.Fd()))
|
||||
if wordWrap && termWidth >= 10 {
|
||||
|
|
@ -1327,6 +1609,7 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
|
|||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT)
|
||||
defer signal.Stop(sigChan)
|
||||
|
||||
go func() {
|
||||
<-sigChan
|
||||
|
|
@ -1339,6 +1622,18 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
|
|||
var fullResponse strings.Builder
|
||||
var thinkTagOpened bool = false
|
||||
var thinkTagClosed bool = false
|
||||
var toolCallsDisplayed bool = false
|
||||
|
||||
// Streaming tool detector for better chunk handling
|
||||
toolDetector := NewStreamingToolDetector()
|
||||
|
||||
// Buffer for accumulating content before display
|
||||
var contentBuffer strings.Builder
|
||||
var bufferTimer *time.Timer
|
||||
var bufferMutex sync.Mutex
|
||||
|
||||
// Get configurable buffer delay
|
||||
bufferDelay := getToolBufferDelay()
|
||||
|
||||
role := "assistant"
|
||||
|
||||
|
|
@ -1370,20 +1665,84 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
|
|||
thinkTagClosed = true
|
||||
state = &displayResponseState{}
|
||||
}
|
||||
// purposefully not putting thinking blocks in the response, which would
|
||||
// only be needed if we later added tool calling to the cli (they get
|
||||
// filtered out anyway since current models don't expect them unless you're
|
||||
// about to finish some tool calls)
|
||||
|
||||
// Use stateful tool detector for better streaming chunk handling
|
||||
displayContent, hasIncompleteToolCall := toolDetector.Process(content)
|
||||
|
||||
// Store full response for context
|
||||
fullResponse.WriteString(content)
|
||||
|
||||
// Buffer management based on tool detection
|
||||
if hasIncompleteToolCall {
|
||||
// We have an incomplete tool call - buffer the content
|
||||
bufferMutex.Lock()
|
||||
contentBuffer.WriteString(displayContent)
|
||||
|
||||
// Cancel any existing timer
|
||||
if bufferTimer != nil {
|
||||
bufferTimer.Stop()
|
||||
}
|
||||
|
||||
// Set a new timer to flush the buffer after a delay
|
||||
bufferTimer = time.AfterFunc(bufferDelay, func() {
|
||||
bufferMutex.Lock()
|
||||
defer bufferMutex.Unlock()
|
||||
|
||||
bufferedContent := contentBuffer.String()
|
||||
contentBuffer.Reset()
|
||||
|
||||
// Reset tool detector state when flushing
|
||||
toolDetector.Reset()
|
||||
|
||||
// Only display if there's actual content after filtering
|
||||
if strings.TrimSpace(bufferedContent) != "" {
|
||||
displayResponse(bufferedContent, opts.WordWrap, state)
|
||||
}
|
||||
})
|
||||
bufferMutex.Unlock()
|
||||
} else {
|
||||
// No incomplete tool call - display immediately
|
||||
if strings.TrimSpace(displayContent) != "" {
|
||||
displayResponse(displayContent, opts.WordWrap, state)
|
||||
}
|
||||
}
|
||||
|
||||
// Display tool calls cleanly if detected
|
||||
if response.Message.ToolCalls != nil {
|
||||
toolCalls := response.Message.ToolCalls
|
||||
if len(toolCalls) > 0 {
|
||||
if len(toolCalls) > 0 && !toolCallsDisplayed {
|
||||
// Flush any buffered content before showing tool calls
|
||||
bufferMutex.Lock()
|
||||
if contentBuffer.Len() > 0 {
|
||||
bufferedContent := contentBuffer.String()
|
||||
contentBuffer.Reset()
|
||||
if strings.TrimSpace(bufferedContent) != "" {
|
||||
displayResponse(bufferedContent, opts.WordWrap, state)
|
||||
}
|
||||
}
|
||||
if bufferTimer != nil {
|
||||
bufferTimer.Stop()
|
||||
bufferTimer = nil
|
||||
}
|
||||
bufferMutex.Unlock()
|
||||
|
||||
// Add newline for clean separation
|
||||
fmt.Println()
|
||||
fmt.Print(renderToolCalls(toolCalls, false))
|
||||
toolCallsDisplayed = true
|
||||
}
|
||||
}
|
||||
|
||||
displayResponse(content, opts.WordWrap, state)
|
||||
// Display tool results if available
|
||||
if response.Message.ToolResults != nil {
|
||||
toolResults := response.Message.ToolResults
|
||||
if len(toolResults) > 0 {
|
||||
fmt.Print(renderToolResults(toolResults, false))
|
||||
fmt.Println() // New line after results
|
||||
// Reset flag to allow next round's tool calls to be displayed
|
||||
toolCallsDisplayed = false
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
@ -1393,11 +1752,12 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
|
|||
}
|
||||
|
||||
req := &api.ChatRequest{
|
||||
Model: opts.Model,
|
||||
Messages: opts.Messages,
|
||||
Format: json.RawMessage(opts.Format),
|
||||
Options: opts.Options,
|
||||
Think: opts.Think,
|
||||
Model: opts.Model,
|
||||
Messages: opts.Messages,
|
||||
Format: json.RawMessage(opts.Format),
|
||||
Options: opts.Options,
|
||||
Think: opts.Think,
|
||||
MCPServers: opts.MCPServers,
|
||||
}
|
||||
|
||||
if opts.KeepAlive != nil {
|
||||
|
|
@ -1418,6 +1778,20 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
|
|||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Flush any remaining buffered content
|
||||
bufferMutex.Lock()
|
||||
if bufferTimer != nil {
|
||||
bufferTimer.Stop()
|
||||
}
|
||||
if contentBuffer.Len() > 0 {
|
||||
bufferedContent := contentBuffer.String()
|
||||
contentBuffer.Reset()
|
||||
if strings.TrimSpace(bufferedContent) != "" && !strings.Contains(bufferedContent, `{"name":`) {
|
||||
displayResponse(bufferedContent, opts.WordWrap, state)
|
||||
}
|
||||
}
|
||||
bufferMutex.Unlock()
|
||||
|
||||
if len(opts.Messages) > 0 {
|
||||
fmt.Println()
|
||||
|
|
@ -1437,6 +1811,11 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
|
|||
}
|
||||
|
||||
func generate(cmd *cobra.Command, opts runOptions) error {
|
||||
// Tools/MCP servers require interactive mode (Chat API)
|
||||
if len(opts.MCPServers) > 0 {
|
||||
return errors.New("--tools flag requires interactive mode; use 'ollama run <model> --tools <file>' without piped input")
|
||||
}
|
||||
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
@ -1460,6 +1839,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
|||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT)
|
||||
defer signal.Stop(sigChan)
|
||||
|
||||
go func() {
|
||||
<-sigChan
|
||||
|
|
@ -1491,7 +1871,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
|||
displayResponse(response.Thinking, opts.WordWrap, state)
|
||||
}
|
||||
|
||||
if thinkTagOpened && !thinkTagClosed && (content != "" || len(response.ToolCalls) > 0) {
|
||||
if thinkTagOpened && !thinkTagClosed && content != "" {
|
||||
if !strings.HasSuffix(thinkingContent.String(), "\n") {
|
||||
fmt.Println()
|
||||
}
|
||||
|
|
@ -1503,13 +1883,6 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
|||
|
||||
displayResponse(content, opts.WordWrap, state)
|
||||
|
||||
if response.ToolCalls != nil {
|
||||
toolCalls := response.ToolCalls
|
||||
if len(toolCalls) > 0 {
|
||||
fmt.Print(renderToolCalls(toolCalls, plainText))
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
@ -1754,6 +2127,7 @@ func NewCLI() *cobra.Command {
|
|||
runCmd.Flags().Bool("hidethinking", false, "Hide thinking output (if provided)")
|
||||
runCmd.Flags().Bool("truncate", false, "For embedding models: truncate inputs exceeding context length (default: true). Set --truncate=false to error instead")
|
||||
runCmd.Flags().Int("dimensions", 0, "Truncate output embeddings to specified dimension (embedding models only)")
|
||||
runCmd.Flags().String("tools", "", "Enable MCP tools (default: all registered servers with current dir, or specify path for filesystem)")
|
||||
|
||||
stopCmd := &cobra.Command{
|
||||
Use: "stop MODEL",
|
||||
|
|
@ -1964,15 +2338,101 @@ func renderToolCalls(toolCalls []api.ToolCall, plainText bool) string {
|
|||
out += formatExplanation
|
||||
}
|
||||
for i, toolCall := range toolCalls {
|
||||
argsAsJSON, err := json.Marshal(toolCall.Function.Arguments)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if i > 0 {
|
||||
out += "\n"
|
||||
}
|
||||
// all tool calls are unexpected since we don't currently support registering any in the CLI
|
||||
out += fmt.Sprintf(" Model called a non-existent function '%s()' with arguments: %s", formatValues+toolCall.Function.Name+formatExplanation, formatValues+string(argsAsJSON)+formatExplanation)
|
||||
// Format arguments in a more readable way
|
||||
var argsDisplay string
|
||||
// Arguments is already a map[string]any
|
||||
// Sort keys for deterministic display order
|
||||
keys := make([]string, 0, len(toolCall.Function.Arguments))
|
||||
for k := range toolCall.Function.Arguments {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
var pairs []string
|
||||
for _, k := range keys {
|
||||
pairs = append(pairs, fmt.Sprintf("%s: %v", k, toolCall.Function.Arguments[k]))
|
||||
}
|
||||
if len(pairs) > 0 {
|
||||
argsDisplay = strings.Join(pairs, ", ")
|
||||
} else {
|
||||
argsDisplay = "(no arguments)"
|
||||
}
|
||||
|
||||
// Show tool execution in progress with cleaner format
|
||||
out += fmt.Sprintf("🔧 Executing tool '%s'%s\n",
|
||||
formatValues+toolCall.Function.Name+formatExplanation, formatExplanation)
|
||||
out += fmt.Sprintf(" Arguments: %s%s%s\n",
|
||||
formatValues, argsDisplay, formatExplanation)
|
||||
}
|
||||
if !plainText {
|
||||
out += readline.ColorDefault
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func renderToolResults(toolResults []api.ToolResult, plainText bool) string {
|
||||
out := ""
|
||||
formatExplanation := ""
|
||||
formatValues := ""
|
||||
formatError := ""
|
||||
if !plainText {
|
||||
formatExplanation = readline.ColorGrey + readline.ColorBold
|
||||
formatValues = readline.ColorDefault
|
||||
// Use bold for errors since ColorRed doesn't exist
|
||||
formatError = readline.ColorBold
|
||||
out += formatExplanation
|
||||
}
|
||||
for i, toolResult := range toolResults {
|
||||
if i > 0 {
|
||||
out += "\n"
|
||||
}
|
||||
|
||||
// Tool name and arguments already shown in renderToolCalls
|
||||
// Just show the result or error here
|
||||
if toolResult.Error != "" {
|
||||
// Parse error for better context
|
||||
errorMsg := toolResult.Error
|
||||
// Try to extract meaningful error from MCP errors
|
||||
if strings.Contains(errorMsg, "MCP tool returned error") {
|
||||
errorMsg = "Tool execution failed"
|
||||
}
|
||||
// Look for specific error patterns
|
||||
if strings.Contains(toolResult.Error, "Parent directory does not exist") {
|
||||
errorMsg = "Parent directory does not exist - check path"
|
||||
} else if strings.Contains(toolResult.Error, "permission denied") {
|
||||
errorMsg = "Permission denied - insufficient privileges"
|
||||
} else if strings.Contains(toolResult.Error, "Invalid arguments") {
|
||||
errorMsg = "Invalid tool arguments provided"
|
||||
} else if strings.Contains(toolResult.Error, "file not found") {
|
||||
errorMsg = "File or directory not found"
|
||||
}
|
||||
|
||||
// Truncate long error messages (rune-safe for UTF-8)
|
||||
errorRunes := []rune(errorMsg)
|
||||
if len(errorRunes) > 200 {
|
||||
errorMsg = string(errorRunes[:197]) + "..."
|
||||
}
|
||||
|
||||
out += fmt.Sprintf("❌ Error: %s%s%s\n",
|
||||
formatError, errorMsg, formatExplanation)
|
||||
} else {
|
||||
content := toolResult.Content
|
||||
if strings.TrimSpace(content) == "" {
|
||||
// Empty result - show a clear indicator
|
||||
out += fmt.Sprintf("✅ Result: %s(empty)%s\n",
|
||||
formatValues, formatExplanation)
|
||||
} else {
|
||||
// Truncate very long results for display (rune-safe for UTF-8)
|
||||
runes := []rune(content)
|
||||
if len(runes) > 200 {
|
||||
content = string(runes[:197]) + "..."
|
||||
}
|
||||
out += fmt.Sprintf("✅ Result:\n%s%s%s\n",
|
||||
formatValues, content, formatExplanation)
|
||||
}
|
||||
}
|
||||
}
|
||||
if !plainText {
|
||||
out += readline.ColorDefault
|
||||
|
|
|
|||
|
|
@ -4,11 +4,11 @@ import (
|
|||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
// MCPCodeAPI provides a code-like interface for MCP tools
|
||||
// MCPCodeAPI provides context injection for MCP tools
|
||||
type MCPCodeAPI struct {
|
||||
manager *MCPManager
|
||||
}
|
||||
|
|
@ -20,83 +20,50 @@ func NewMCPCodeAPI(manager *MCPManager) *MCPCodeAPI {
|
|||
}
|
||||
}
|
||||
|
||||
// GenerateMinimalContext returns essential context for tool usage
|
||||
// GenerateMinimalContext returns essential runtime context for tool usage.
|
||||
// Tool schemas are already provided via the template's TypeScript rendering,
|
||||
// so we only need to add runtime-specific info like working directories.
|
||||
func (m *MCPCodeAPI) GenerateMinimalContext(configs []api.MCPServerConfig) string {
|
||||
slog.Debug("GenerateMinimalContext called", "configs_count", len(configs))
|
||||
if len(configs) == 0 {
|
||||
slog.Debug("No MCP configs provided, returning empty context")
|
||||
return ""
|
||||
}
|
||||
|
||||
var context strings.Builder
|
||||
context.WriteString("\n=== MCP Tool Context ===\n")
|
||||
|
||||
// Add filesystem working directory if applicable
|
||||
for _, config := range configs {
|
||||
slog.Debug("Processing MCP config", "command", config.Command, "args", config.Args)
|
||||
// Check if this is a filesystem server (command or first arg contains filesystem)
|
||||
isFilesystem := strings.Contains(config.Command, "filesystem") ||
|
||||
(len(config.Args) > 0 && strings.Contains(config.Args[0], "filesystem"))
|
||||
|
||||
if isFilesystem && len(config.Args) > 1 {
|
||||
// Extract working directory from filesystem server
|
||||
workingDir := config.Args[1]
|
||||
slog.Debug("Adding filesystem context", "working_dir", workingDir)
|
||||
if workingDir := m.extractFilesystemPath(config); workingDir != "" {
|
||||
context.WriteString(fmt.Sprintf(`
|
||||
Filesystem tools are available with these constraints:
|
||||
- Working directory: %s
|
||||
- All file operations must use paths within this directory
|
||||
- Example usage:
|
||||
- List files: "List all files in %s"
|
||||
- Read file: "Read %s/filename.txt"
|
||||
- Create file: "Create %s/newfile.txt with content"
|
||||
- Paths outside %s will be rejected
|
||||
|
||||
When working with files, ALWAYS use the full path starting with %s
|
||||
`, workingDir, workingDir, workingDir, workingDir, workingDir, workingDir))
|
||||
Filesystem working directory: %s
|
||||
All filesystem tool paths must be within this directory.
|
||||
`, workingDir))
|
||||
}
|
||||
// Add other server types as needed
|
||||
}
|
||||
|
||||
context.WriteString("\n")
|
||||
|
||||
result := context.String()
|
||||
slog.Debug("Generated MCP context", "length", len(result))
|
||||
if result != "" {
|
||||
slog.Debug("Generated MCP context", "length", len(result))
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// GenerateProgressiveContext returns context based on what tools are being used
|
||||
func (m *MCPCodeAPI) GenerateProgressiveContext(toolNames []string) string {
|
||||
var context strings.Builder
|
||||
|
||||
// Group tools by server
|
||||
serverTools := make(map[string][]string)
|
||||
for _, toolName := range toolNames {
|
||||
if clientName, exists := m.manager.GetToolClient(toolName); exists {
|
||||
serverTools[clientName] = append(serverTools[clientName], toolName)
|
||||
}
|
||||
// extractFilesystemPath extracts the working directory from filesystem server config
|
||||
func (m *MCPCodeAPI) extractFilesystemPath(config api.MCPServerConfig) string {
|
||||
isFilesystem := strings.Contains(config.Command, "filesystem") ||
|
||||
(len(config.Args) > 0 && strings.Contains(strings.Join(config.Args, " "), "filesystem"))
|
||||
|
||||
if isFilesystem && len(config.Args) > 0 {
|
||||
// Path is typically the last argument
|
||||
return config.Args[len(config.Args)-1]
|
||||
}
|
||||
|
||||
// Generate context for each server's tools
|
||||
for serverName, tools := range serverTools {
|
||||
context.WriteString(fmt.Sprintf("\n%s tools being used:\n", serverName))
|
||||
for _, tool := range tools {
|
||||
// Get tool definition from manager
|
||||
if toolDef := m.manager.GetToolDefinition(serverName, tool); toolDef != nil {
|
||||
context.WriteString(fmt.Sprintf("- %s: %s\n", tool, toolDef.Function.Description))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return context.String()
|
||||
return ""
|
||||
}
|
||||
|
||||
// InjectContextIntoMessages intelligently injects context into the message stream
|
||||
// InjectContextIntoMessages adds runtime context to the message stream
|
||||
func (m *MCPCodeAPI) InjectContextIntoMessages(messages []api.Message, configs []api.MCPServerConfig) []api.Message {
|
||||
// Generate minimal context
|
||||
context := m.GenerateMinimalContext(configs)
|
||||
if context == "" {
|
||||
return messages
|
||||
}
|
||||
|
||||
|
||||
// Check if there's already a system message
|
||||
if len(messages) > 0 && messages[0].Role == "system" {
|
||||
// Append to existing system message
|
||||
|
|
@ -109,41 +76,6 @@ func (m *MCPCodeAPI) InjectContextIntoMessages(messages []api.Message, configs [
|
|||
}
|
||||
messages = append([]api.Message{systemMsg}, messages...)
|
||||
}
|
||||
|
||||
|
||||
return messages
|
||||
}
|
||||
|
||||
// ExtractWorkingDirectory extracts the working directory from MCP server args
|
||||
func ExtractWorkingDirectory(config api.MCPServerConfig) string {
|
||||
if strings.Contains(config.Command, "filesystem") && len(config.Args) > 1 {
|
||||
return config.Args[1]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// GenerateToolCallExample generates an example of how to call a specific tool
|
||||
func (m *MCPCodeAPI) GenerateToolCallExample(serverName, toolName string) string {
|
||||
workingDir := ""
|
||||
|
||||
// Get working directory if filesystem
|
||||
if serverName == "filesystem" {
|
||||
if clients := m.manager.GetServerNames(); len(clients) > 0 {
|
||||
// This is a simplified approach - in production we'd properly track server configs
|
||||
workingDir = "/home/velvetm/Desktop/mcp-test-files" // Would be extracted from actual config
|
||||
}
|
||||
}
|
||||
|
||||
// Generate appropriate example based on tool
|
||||
switch toolName {
|
||||
case "list_directory":
|
||||
return fmt.Sprintf(`"List all files in %s"`, workingDir)
|
||||
case "read_file":
|
||||
return fmt.Sprintf(`"Read the file %s/example.txt"`, workingDir)
|
||||
case "write_file":
|
||||
return fmt.Sprintf(`"Create a file at %s/output.txt with content 'Hello World'"`, workingDir)
|
||||
case "create_directory":
|
||||
return fmt.Sprintf(`"Create a directory called %s/newdir"`, workingDir)
|
||||
default:
|
||||
return fmt.Sprintf(`"Use the %s tool"`, toolName)
|
||||
}
|
||||
}
|
||||
703
server/routes.go
703
server/routes.go
|
|
@ -52,6 +52,17 @@ import (
|
|||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
// CompletionResult holds the result of a completion request
|
||||
type CompletionResult struct {
|
||||
Content string
|
||||
Thinking string
|
||||
ToolCalls []api.ToolCall
|
||||
Done bool
|
||||
DoneReason string
|
||||
Metrics api.Metrics
|
||||
Error error
|
||||
}
|
||||
|
||||
const signinURLStr = "https://ollama.com/connect?name=%s&key=%s"
|
||||
|
||||
func shouldUseHarmony(model *Model) bool {
|
||||
|
|
@ -337,10 +348,11 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||
m.Config.Parser = "harmony"
|
||||
}
|
||||
|
||||
|
||||
if !req.Raw && m.Config.Parser != "" {
|
||||
builtinParser = parsers.ParserForName(m.Config.Parser)
|
||||
if builtinParser != nil {
|
||||
// no tools or last message for generate endpoint
|
||||
// Initialize parser for thinking extraction only (tools not supported in Generate API)
|
||||
builtinParser.Init(nil, nil, req.Think)
|
||||
}
|
||||
}
|
||||
|
|
@ -459,7 +471,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||
// the real chat handler, but doing this as a stopgap to get renderer
|
||||
// support for generate
|
||||
if values.Messages != nil && values.Suffix == "" && req.Template == "" {
|
||||
prompt, images, err = chatPrompt(c.Request.Context(), m, r.Tokenize, opts, values.Messages, []api.Tool{}, req.Think, req.Truncate == nil || *req.Truncate)
|
||||
prompt, images, err = chatPrompt(c.Request.Context(), m, r.Tokenize, opts, values.Messages, nil, req.Think, req.Truncate == nil || *req.Truncate)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
|
|
@ -510,8 +522,8 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||
ch := make(chan any)
|
||||
go func() {
|
||||
// TODO (jmorganca): avoid building the response twice both here and below
|
||||
var sb strings.Builder
|
||||
defer close(ch)
|
||||
var sb strings.Builder
|
||||
if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
|
||||
Prompt: prompt,
|
||||
Images: images,
|
||||
|
|
@ -537,16 +549,13 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||
}
|
||||
|
||||
if builtinParser != nil {
|
||||
content, thinking, toolCalls, err := builtinParser.Add(cr.Content, cr.Done)
|
||||
content, thinking, _, err := builtinParser.Add(cr.Content, cr.Done)
|
||||
if err != nil {
|
||||
ch <- gin.H{"error": err.Error()}
|
||||
return
|
||||
}
|
||||
res.Response = content
|
||||
res.Thinking = thinking
|
||||
if cr.Done && len(toolCalls) > 0 {
|
||||
res.ToolCalls = toolCalls
|
||||
}
|
||||
} else if thinkingState != nil {
|
||||
thinking, content := thinkingState.AddContent(cr.Content)
|
||||
res.Thinking = thinking
|
||||
|
|
@ -574,7 +583,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||
|
||||
if builtinParser != nil {
|
||||
// only send messages with meaningful content (empty messages confuse clients)
|
||||
if res.Response != "" || res.Thinking != "" || res.Done || len(res.ToolCalls) > 0 {
|
||||
if res.Response != "" || res.Thinking != "" || res.Done {
|
||||
ch <- res
|
||||
}
|
||||
|
||||
|
|
@ -1508,6 +1517,10 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
|
|||
r.POST("/api/show", s.ShowHandler)
|
||||
r.DELETE("/api/delete", s.DeleteHandler)
|
||||
|
||||
// MCP Tools discovery
|
||||
r.GET("/api/tools", s.ToolsHandler)
|
||||
r.POST("/api/tools", s.ToolsHandler)
|
||||
|
||||
r.POST("/api/me", s.WhoamiHandler)
|
||||
|
||||
r.POST("/api/signout", s.SignoutHandler)
|
||||
|
|
@ -1852,6 +1865,211 @@ func toolCallId() string {
|
|||
return "call_" + strings.ToLower(string(b))
|
||||
}
|
||||
|
||||
// executeCompletionWithTools executes a completion and collects the full response
|
||||
// This is a synchronous wrapper around the async completion callback
|
||||
// When suppressDone is true, the Done flag is not sent to the client channel
|
||||
// (used for intermediate rounds in multi-round tool execution)
|
||||
func (s *Server) executeCompletionWithTools(
|
||||
ctx context.Context,
|
||||
r llm.LlamaServer,
|
||||
prompt string,
|
||||
images []llm.ImageData,
|
||||
opts *api.Options,
|
||||
req api.ChatRequest,
|
||||
m *Model,
|
||||
builtinParser parsers.Parser,
|
||||
thinkingState *thinking.Parser,
|
||||
ch chan any,
|
||||
checkpointStart time.Time,
|
||||
checkpointLoaded time.Time,
|
||||
truncate bool,
|
||||
suppressDone bool,
|
||||
) (*CompletionResult, error) {
|
||||
result := &CompletionResult{}
|
||||
done := make(chan error, 1)
|
||||
|
||||
// For tracking tool calls when using tools
|
||||
var toolParser *tools.Parser
|
||||
if len(req.Tools) > 0 && builtinParser == nil {
|
||||
toolParser = tools.NewParser(m.Template.Template, req.Tools)
|
||||
}
|
||||
|
||||
// Track thinking content for structured outputs
|
||||
var thinkingBuilder strings.Builder
|
||||
|
||||
// Accumulate tool calls across streaming chunks
|
||||
var accumulatedToolCalls []api.ToolCall
|
||||
|
||||
// Create a new context for this completion
|
||||
completionCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
err := r.Completion(completionCtx, llm.CompletionRequest{
|
||||
Prompt: prompt,
|
||||
Images: images,
|
||||
Format: req.Format,
|
||||
Options: opts,
|
||||
Shift: req.Shift == nil || *req.Shift,
|
||||
Truncate: truncate,
|
||||
Logprobs: req.Logprobs,
|
||||
TopLogprobs: req.TopLogprobs,
|
||||
}, func(resp llm.CompletionResponse) {
|
||||
// When suppressDone is true, don't signal Done to client
|
||||
// (used for intermediate rounds in multi-round tool execution)
|
||||
clientDone := resp.Done && !suppressDone
|
||||
|
||||
res := api.ChatResponse{
|
||||
Model: req.Model,
|
||||
CreatedAt: time.Now().UTC(),
|
||||
Message: api.Message{Role: "assistant", Content: resp.Content},
|
||||
Done: clientDone,
|
||||
Metrics: api.Metrics{
|
||||
PromptEvalCount: resp.PromptEvalCount,
|
||||
PromptEvalDuration: resp.PromptEvalDuration,
|
||||
EvalCount: resp.EvalCount,
|
||||
EvalDuration: resp.EvalDuration,
|
||||
},
|
||||
Logprobs: toAPILogprobs(resp.Logprobs),
|
||||
}
|
||||
|
||||
if resp.Done {
|
||||
res.DoneReason = resp.DoneReason.String()
|
||||
res.TotalDuration = time.Since(checkpointStart)
|
||||
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||
result.DoneReason = res.DoneReason
|
||||
result.Metrics = res.Metrics
|
||||
}
|
||||
|
||||
// Handle builtin parser (for models with native tool support)
|
||||
if builtinParser != nil {
|
||||
content, thinking, toolCalls, err := builtinParser.Add(resp.Content, resp.Done)
|
||||
if err != nil {
|
||||
result.Error = err
|
||||
done <- err
|
||||
return
|
||||
}
|
||||
|
||||
res.Message.Content = content
|
||||
res.Message.Thinking = thinking
|
||||
res.Message.ToolCalls = toolCalls
|
||||
|
||||
thinkingBuilder.WriteString(thinking)
|
||||
|
||||
// Accumulate results
|
||||
result.Content += content
|
||||
result.Thinking += thinking
|
||||
|
||||
// Accumulate tool calls for multi-round MCP execution
|
||||
if len(toolCalls) > 0 {
|
||||
accumulatedToolCalls = append(accumulatedToolCalls, toolCalls...)
|
||||
}
|
||||
|
||||
// On completion, set all accumulated tool calls
|
||||
if resp.Done {
|
||||
result.ToolCalls = accumulatedToolCalls
|
||||
}
|
||||
|
||||
// Stream to client if there's content to stream
|
||||
if res.Message.Content != "" || res.Message.Thinking != "" || len(res.Message.ToolCalls) > 0 || resp.Done || len(res.Logprobs) > 0 {
|
||||
ch <- res
|
||||
}
|
||||
|
||||
if resp.Done {
|
||||
result.Done = true
|
||||
done <- nil
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Handle thinking state parser
|
||||
if thinkingState != nil {
|
||||
thinkingContent, remainingContent := thinkingState.AddContent(res.Message.Content)
|
||||
if thinkingContent == "" && remainingContent == "" && !resp.Done {
|
||||
// Need more content to decide
|
||||
return
|
||||
}
|
||||
|
||||
res.Message.Thinking = thinkingContent
|
||||
thinkingBuilder.WriteString(thinkingContent)
|
||||
res.Message.Content = remainingContent
|
||||
result.Thinking += thinkingContent
|
||||
}
|
||||
|
||||
// Handle tool parsing (for models without native tool support)
|
||||
if len(req.Tools) > 0 && builtinParser == nil {
|
||||
toolCalls, content := toolParser.Add(res.Message.Content)
|
||||
if len(content) > 0 {
|
||||
res.Message.Content = content
|
||||
result.Content += content
|
||||
} else if len(toolCalls) > 0 {
|
||||
res.Message.ToolCalls = toolCalls
|
||||
res.Message.Content = ""
|
||||
// Keep accumulating tool calls
|
||||
accumulatedToolCalls = toolCalls
|
||||
} else if res.Message.Thinking != "" {
|
||||
// don't return, fall through to send
|
||||
} else {
|
||||
// Send logprobs while content is being buffered by the parser for tool calls
|
||||
if len(res.Logprobs) > 0 && !resp.Done {
|
||||
logprobRes := res
|
||||
logprobRes.Message.Content = ""
|
||||
logprobRes.Message.ToolCalls = nil
|
||||
ch <- logprobRes
|
||||
}
|
||||
|
||||
if resp.Done {
|
||||
res.Message.Content = toolParser.Content()
|
||||
// Set accumulated tool calls in result before signaling done
|
||||
if len(accumulatedToolCalls) > 0 {
|
||||
result.ToolCalls = accumulatedToolCalls
|
||||
}
|
||||
// If no tool calls, get final content from parser
|
||||
if len(result.ToolCalls) == 0 && toolParser != nil {
|
||||
result.Content = toolParser.Content()
|
||||
}
|
||||
result.Done = true
|
||||
ch <- res
|
||||
done <- nil
|
||||
}
|
||||
return
|
||||
}
|
||||
} else {
|
||||
result.Content += res.Message.Content
|
||||
}
|
||||
|
||||
// Stream to client
|
||||
ch <- res
|
||||
|
||||
if resp.Done {
|
||||
// If we accumulated tool calls, set them in result
|
||||
if len(accumulatedToolCalls) > 0 {
|
||||
result.ToolCalls = accumulatedToolCalls
|
||||
}
|
||||
// If no tool calls, get final content from parser
|
||||
if len(result.ToolCalls) == 0 && toolParser != nil {
|
||||
result.Content = toolParser.Content()
|
||||
}
|
||||
result.Done = true
|
||||
done <- nil
|
||||
}
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Wait for completion or context cancellation
|
||||
select {
|
||||
case err := <-done:
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return result, nil
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) ChatHandler(c *gin.Context) {
|
||||
checkpointStart := time.Now()
|
||||
|
||||
|
|
@ -2018,6 +2236,80 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// MCP (Model Context Protocol) Integration
|
||||
// =========================================================================
|
||||
//
|
||||
// MCP allows the model to execute external tools via JSON-RPC servers.
|
||||
// This section handles:
|
||||
// 1. Manager initialization (from session cache or new)
|
||||
// 2. Tool discovery (list available tools from MCP servers)
|
||||
// 3. Context injection (inform model about available tools)
|
||||
// 4. Parser configuration (for tool call detection)
|
||||
//
|
||||
// Entry points:
|
||||
// - req.MCPServers: Explicit server configs from API
|
||||
// - req.ToolsPath: Path-based auto-enable from --tools flag
|
||||
//
|
||||
// See: mcp.go, mcp_manager.go for implementation details
|
||||
// =========================================================================
|
||||
|
||||
var mcpManager *MCPManager
|
||||
|
||||
if len(req.MCPServers) > 0 || req.ToolsPath != "" {
|
||||
if req.ToolsPath != "" {
|
||||
// Path-based mode: auto-enable servers matching the tools path
|
||||
// Used by CLI: `ollama run model --tools /path`
|
||||
slog.Debug("Using tools path for MCP manager", "tools_path", req.ToolsPath, "model", req.Model)
|
||||
mcpManager, err = GetMCPManagerForPath(req.Model, req.ToolsPath)
|
||||
if err != nil {
|
||||
slog.Error("Failed to get MCP manager for tools path", "error", err)
|
||||
// Continue without MCP - graceful degradation
|
||||
}
|
||||
} else if len(req.MCPServers) > 0 {
|
||||
// Explicit mode: use server configs from API request
|
||||
// Used by API: POST /api/chat with mcp_servers field
|
||||
sessionID := GenerateSessionID(req)
|
||||
slog.Debug("Getting MCP manager", "session", sessionID, "servers", len(req.MCPServers))
|
||||
mcpManager, err = GetMCPManager(sessionID, req.MCPServers)
|
||||
if err != nil {
|
||||
slog.Error("Failed to get MCP manager", "error", err)
|
||||
// Continue without MCP - graceful degradation
|
||||
}
|
||||
}
|
||||
|
||||
if mcpManager != nil {
|
||||
// Step 1: Discover tools from MCP servers and add to request
|
||||
mcpTools := mcpManager.GetAllTools()
|
||||
req.Tools = append(req.Tools, mcpTools...)
|
||||
|
||||
// Step 2: Inject context to help model use tools effectively
|
||||
// Use programmatic context injection from tool schemas
|
||||
codeAPI := NewMCPCodeAPI(mcpManager)
|
||||
req.Messages = codeAPI.InjectContextIntoMessages(req.Messages, req.MCPServers)
|
||||
|
||||
// Step 3: Auto-configure parser for tool call detection
|
||||
if len(req.Tools) > 0 && m.Config.Parser == "" {
|
||||
if m.Config.ModelFamily == "qwen2" || m.Config.ModelFamily == "qwen3" {
|
||||
m.Config.Parser = "qwen3-vl-instruct"
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: Update capabilities now that we have tools
|
||||
if len(req.Tools) > 0 && !slices.Contains(caps, model.CapabilityTools) {
|
||||
caps = append(caps, model.CapabilityTools)
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup: Close MCP manager when request completes
|
||||
// Note: Session manager may cache for reuse within TTL
|
||||
defer func() {
|
||||
if err := mcpManager.Close(); err != nil {
|
||||
slog.Warn("Error closing MCP manager", "error", err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
|
||||
if errors.Is(err, errCapabilityCompletion) {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
|
||||
|
|
@ -2106,11 +2398,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||
}
|
||||
}
|
||||
|
||||
var toolParser *tools.Parser
|
||||
if len(req.Tools) > 0 && (builtinParser == nil || !builtinParser.HasToolSupport()) {
|
||||
toolParser = tools.NewParser(m.Template.Template, req.Tools)
|
||||
}
|
||||
|
||||
type structuredOutputsState int
|
||||
const (
|
||||
structuredOutputsState_None structuredOutputsState = iota
|
||||
|
|
@ -2122,181 +2409,223 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||
go func() {
|
||||
defer close(ch)
|
||||
|
||||
structuredOutputsState := structuredOutputsState_None
|
||||
// Initialize for multi-round execution
|
||||
// NOTE: Upstream's structuredOutputsState for thinking models is not yet integrated
|
||||
// TODO: Add structuredOutputsState support for thinking models with format constraints
|
||||
currentMsgs := msgs
|
||||
maxRounds := req.MaxToolRounds
|
||||
if maxRounds == 0 {
|
||||
maxRounds = 15 // Default maximum rounds
|
||||
}
|
||||
|
||||
for {
|
||||
var tb strings.Builder
|
||||
slog.Debug("Starting multi-round execution",
|
||||
"mcpManager", mcpManager != nil,
|
||||
"tools_count", len(req.Tools),
|
||||
"max_rounds", maxRounds)
|
||||
|
||||
currentFormat := req.Format
|
||||
// structured outputs via double request is enabled when:
|
||||
// 1. the model supports the thinking capability and
|
||||
// 2. it uses a built-in parser or our generic thinking parser
|
||||
// MAIN LOOP - Multi-round execution for tool calling
|
||||
var round int
|
||||
for round = 0; round < maxRounds; round++ {
|
||||
slog.Debug("Starting round", "round", round, "messages", len(currentMsgs))
|
||||
|
||||
// Note that the current approach does not work for (potential future)
|
||||
// non-thinking models that emit anything before actual content. This
|
||||
// current approach uses the transition from parsed thinking content to
|
||||
// parsed non-thinking content as the signal to turn constraining on
|
||||
|
||||
if req.Format != nil && structuredOutputsState == structuredOutputsState_None && ((builtinParser != nil || thinkingState != nil) && slices.Contains(m.Capabilities(), model.CapabilityThinking)) {
|
||||
currentFormat = nil
|
||||
}
|
||||
|
||||
// sets up new context given parent context per request
|
||||
ctx, cancel := context.WithCancel(c.Request.Context())
|
||||
err := r.Completion(ctx, llm.CompletionRequest{
|
||||
Prompt: prompt,
|
||||
Images: images,
|
||||
Format: currentFormat,
|
||||
Options: opts,
|
||||
Shift: req.Shift == nil || *req.Shift,
|
||||
Truncate: truncate,
|
||||
Logprobs: req.Logprobs,
|
||||
TopLogprobs: req.TopLogprobs,
|
||||
}, func(r llm.CompletionResponse) {
|
||||
res := api.ChatResponse{
|
||||
Model: req.Model,
|
||||
CreatedAt: time.Now().UTC(),
|
||||
Message: api.Message{Role: "assistant", Content: r.Content},
|
||||
Done: r.Done,
|
||||
Metrics: api.Metrics{
|
||||
PromptEvalCount: r.PromptEvalCount,
|
||||
PromptEvalDuration: r.PromptEvalDuration,
|
||||
EvalCount: r.EvalCount,
|
||||
EvalDuration: r.EvalDuration,
|
||||
},
|
||||
Logprobs: toAPILogprobs(r.Logprobs),
|
||||
}
|
||||
|
||||
if r.Done {
|
||||
res.DoneReason = r.DoneReason.String()
|
||||
res.TotalDuration = time.Since(checkpointStart)
|
||||
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||
}
|
||||
|
||||
if builtinParser != nil {
|
||||
slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser input", "parser", m.Config.Parser, "content", r.Content)
|
||||
|
||||
content, thinking, toolCalls, err := builtinParser.Add(r.Content, r.Done)
|
||||
if err != nil {
|
||||
ch <- gin.H{"error": err.Error()}
|
||||
return
|
||||
}
|
||||
|
||||
res.Message.Content = content
|
||||
res.Message.Thinking = thinking
|
||||
for i := range toolCalls {
|
||||
toolCalls[i].ID = toolCallId()
|
||||
}
|
||||
res.Message.ToolCalls = toolCalls
|
||||
|
||||
tb.WriteString(thinking)
|
||||
// we are now receiving content from the model - we should start applying structured outputs
|
||||
if structuredOutputsState == structuredOutputsState_None && req.Format != nil && tb.String() != "" && res.Message.Content != "" {
|
||||
structuredOutputsState = structuredOutputsState_ReadyToApply
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
|
||||
if res.Message.Content != "" || res.Message.Thinking != "" || len(res.Message.ToolCalls) > 0 || r.Done || len(res.Logprobs) > 0 {
|
||||
slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser output", "parser", m.Config.Parser, "content", content, "thinking", thinking, "toolCalls", toolCalls, "done", r.Done)
|
||||
ch <- res
|
||||
} else {
|
||||
slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser empty output", "parser", m.Config.Parser)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if thinkingState != nil {
|
||||
thinkingContent, remainingContent := thinkingState.AddContent(res.Message.Content)
|
||||
if thinkingContent == "" && remainingContent == "" && !r.Done {
|
||||
// need to accumulate more to decide what to send
|
||||
return
|
||||
}
|
||||
res.Message.Thinking = thinkingContent
|
||||
tb.WriteString(thinkingContent)
|
||||
// emit the collected thinking text before restarting with structured outputs and clear unstructured content
|
||||
// to avoid leaking mixed tokens like "</think>Hello"
|
||||
if structuredOutputsState == structuredOutputsState_None && req.Format != nil && tb.String() != "" && remainingContent != "" {
|
||||
structuredOutputsState = structuredOutputsState_ReadyToApply
|
||||
res.Message.Content = ""
|
||||
ch <- res
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
res.Message.Content = remainingContent
|
||||
}
|
||||
|
||||
if len(req.Tools) > 0 {
|
||||
toolCalls, content := toolParser.Add(res.Message.Content)
|
||||
if len(content) > 0 {
|
||||
res.Message.Content = content
|
||||
} else if len(toolCalls) > 0 {
|
||||
for i := range toolCalls {
|
||||
toolCalls[i].ID = toolCallId()
|
||||
}
|
||||
res.Message.ToolCalls = toolCalls
|
||||
res.Message.Content = ""
|
||||
} else if res.Message.Thinking != "" {
|
||||
// don't return, fall through to send
|
||||
} else {
|
||||
// Send logprobs while content is being buffered by the parser for tool calls
|
||||
if len(res.Logprobs) > 0 && !r.Done {
|
||||
logprobRes := res
|
||||
logprobRes.Message.Content = ""
|
||||
logprobRes.Message.ToolCalls = nil
|
||||
ch <- logprobRes
|
||||
}
|
||||
|
||||
if r.Done {
|
||||
res.Message.Content = toolParser.Content()
|
||||
ch <- res
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
ch <- res
|
||||
})
|
||||
if err != nil {
|
||||
if structuredOutputsState == structuredOutputsState_ReadyToApply && strings.Contains(err.Error(), "context canceled") && c.Request.Context().Err() == nil {
|
||||
// only ignores error if it's a context cancellation due to setting structured outputs
|
||||
} else {
|
||||
var serr api.StatusError
|
||||
if errors.As(err, &serr) {
|
||||
ch <- gin.H{"error": serr.ErrorMessage, "status": serr.StatusCode}
|
||||
} else {
|
||||
ch <- gin.H{"error": err.Error()}
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// ignored structured outputs cancellation falls through to here, start a new request with the structured outputs and updated prompt. use the
|
||||
if structuredOutputsState == structuredOutputsState_ReadyToApply {
|
||||
structuredOutputsState = structuredOutputsState_Applying
|
||||
msg := api.Message{
|
||||
Role: "assistant",
|
||||
Thinking: tb.String(),
|
||||
}
|
||||
|
||||
msgs = append(msgs, msg)
|
||||
prompt, _, err = chatPrompt(c.Request.Context(), m, r.Tokenize, opts, msgs, processedTools, req.Think, truncate)
|
||||
// Re-render prompt and reset parser if not first round (tool results were added)
|
||||
if round > 0 {
|
||||
var err error
|
||||
prompt, images, err = chatPrompt(c.Request.Context(), m, r.Tokenize, opts, currentMsgs, processedTools, req.Think, truncate)
|
||||
if err != nil {
|
||||
slog.Error("chat prompt error applying structured outputs", "error", err)
|
||||
slog.Error("Failed to render prompt in round", "round", round, "error", err)
|
||||
ch <- gin.H{"error": err.Error()}
|
||||
return
|
||||
}
|
||||
// force constraining by terminating thinking header, the parser is already at this state
|
||||
// when the last message is thinking, the rendered for gpt-oss cannot disambiguate between having the
|
||||
// model continue thinking or ending thinking and outputting the final message.
|
||||
// TODO(parthsareen): consider adding prefill disambiguation logic to the renderer for structured outputs.
|
||||
if shouldUseHarmony(m) || (builtinParser != nil && m.Config.Parser == "harmony") {
|
||||
prompt += "<|end|><|start|>assistant<|channel|>final<|message|>"
|
||||
|
||||
// Create fresh parser instance for new round (parser has internal buffer state)
|
||||
if builtinParser != nil && m.Config.Parser != "" {
|
||||
builtinParser = parsers.ParserForName(m.Config.Parser)
|
||||
if builtinParser != nil {
|
||||
lastMsg := ¤tMsgs[len(currentMsgs)-1]
|
||||
builtinParser.Init(req.Tools, lastMsg, req.Think)
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
break
|
||||
// Execute completion and collect full response
|
||||
// When MCP is enabled, suppress Done flag during intermediate rounds
|
||||
// to prevent client from closing connection prematurely
|
||||
suppressDone := mcpManager != nil
|
||||
completionResult, err := s.executeCompletionWithTools(
|
||||
c.Request.Context(),
|
||||
r,
|
||||
prompt,
|
||||
images,
|
||||
opts,
|
||||
req,
|
||||
m,
|
||||
builtinParser,
|
||||
thinkingState,
|
||||
ch,
|
||||
checkpointStart,
|
||||
checkpointLoaded,
|
||||
truncate,
|
||||
suppressDone,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
slog.Error("Completion failed", "round", round, "error", err)
|
||||
var serr api.StatusError
|
||||
if errors.As(err, &serr) {
|
||||
ch <- gin.H{"error": serr.ErrorMessage, "status": serr.StatusCode}
|
||||
} else {
|
||||
ch <- gin.H{"error": err.Error()}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Check if model called tools
|
||||
if len(completionResult.ToolCalls) == 0 {
|
||||
// No tools called - conversation is complete
|
||||
slog.Debug("No tools called, conversation complete", "round", round)
|
||||
break // Exit the loop - we're done
|
||||
}
|
||||
|
||||
// Validate tool calls are not empty or malformed
|
||||
validToolCalls := 0
|
||||
for _, tc := range completionResult.ToolCalls {
|
||||
if tc.Function.Name != "" {
|
||||
validToolCalls++
|
||||
} else {
|
||||
slog.Warn("Invalid tool call detected", "round", round, "tool", tc)
|
||||
}
|
||||
}
|
||||
|
||||
if validToolCalls == 0 {
|
||||
slog.Warn("No valid tool calls found, exiting", "round", round)
|
||||
break
|
||||
}
|
||||
|
||||
// Model called tools - execute them if we have an MCP manager
|
||||
if mcpManager != nil {
|
||||
slog.Debug("MCP tool execution starting",
|
||||
"tools_in_response", len(completionResult.ToolCalls),
|
||||
"valid_tools", validToolCalls,
|
||||
"round", round)
|
||||
|
||||
// Send tool calls to client for display BEFORE executing
|
||||
// This ensures the client can show "Executing tool..." for all rounds
|
||||
// Note: Don't include Content here - it was already streamed during completion
|
||||
ch <- api.ChatResponse{
|
||||
Model: req.Model,
|
||||
Message: api.Message{
|
||||
Role: "assistant",
|
||||
ToolCalls: completionResult.ToolCalls,
|
||||
},
|
||||
}
|
||||
|
||||
// Analyze execution plan
|
||||
executionPlan := mcpManager.AnalyzeExecutionPlan(completionResult.ToolCalls)
|
||||
slog.Debug("Execution plan determined",
|
||||
"sequential", executionPlan.RequiresSequential,
|
||||
"reason", executionPlan.Reason)
|
||||
|
||||
// Execute tools according to plan
|
||||
results := mcpManager.ExecuteWithPlan(completionResult.ToolCalls, executionPlan)
|
||||
|
||||
// Log tool calls for debugging
|
||||
for i, tc := range completionResult.ToolCalls {
|
||||
slog.Info("Tool call details",
|
||||
"round", round,
|
||||
"index", i,
|
||||
"name", tc.Function.Name,
|
||||
"arguments", tc.Function.Arguments)
|
||||
}
|
||||
|
||||
// Add assistant message with tool calls
|
||||
assistantMsg := api.Message{
|
||||
Role: "assistant",
|
||||
Content: completionResult.Content, // Preserve any content
|
||||
ToolCalls: completionResult.ToolCalls,
|
||||
}
|
||||
currentMsgs = append(currentMsgs, assistantMsg)
|
||||
|
||||
// Add tool result messages and send them to client for display
|
||||
toolResultsForDisplay := make([]api.ToolResult, 0, len(results))
|
||||
for i, result := range results {
|
||||
toolMsg := api.Message{
|
||||
Role: "tool",
|
||||
ToolName: completionResult.ToolCalls[i].Function.Name,
|
||||
}
|
||||
|
||||
// Create display result with arguments for context
|
||||
displayResult := api.ToolResult{
|
||||
ToolName: completionResult.ToolCalls[i].Function.Name,
|
||||
Arguments: completionResult.ToolCalls[i].Function.Arguments,
|
||||
Content: result.Content,
|
||||
}
|
||||
|
||||
if result.Error != nil {
|
||||
// JSON-encode the error for proper template rendering
|
||||
if encoded, err := json.Marshal(fmt.Sprintf("Error: %v", result.Error)); err == nil {
|
||||
toolMsg.Content = string(encoded)
|
||||
} else {
|
||||
toolMsg.Content = fmt.Sprintf("\"Error: %v\"", result.Error)
|
||||
}
|
||||
displayResult.Error = result.Error.Error()
|
||||
slog.Warn("Tool execution failed",
|
||||
"tool", completionResult.ToolCalls[i].Function.Name,
|
||||
"error", result.Error)
|
||||
} else {
|
||||
// JSON-encode the content for proper template rendering
|
||||
// The template expects {"content": {{ .Content }}} where Content should be a JSON string
|
||||
if encoded, err := json.Marshal(result.Content); err == nil {
|
||||
toolMsg.Content = string(encoded)
|
||||
} else {
|
||||
toolMsg.Content = result.Content
|
||||
}
|
||||
}
|
||||
|
||||
currentMsgs = append(currentMsgs, toolMsg)
|
||||
toolResultsForDisplay = append(toolResultsForDisplay, displayResult)
|
||||
}
|
||||
|
||||
// Send tool results to client for display
|
||||
if len(toolResultsForDisplay) > 0 {
|
||||
ch <- api.ChatResponse{
|
||||
Model: req.Model,
|
||||
Message: api.Message{
|
||||
Role: "assistant",
|
||||
ToolResults: toolResultsForDisplay,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Continue to next round - model will process tool results
|
||||
slog.Info("Tools executed, continuing to next round",
|
||||
"round", round,
|
||||
"messages", len(currentMsgs),
|
||||
"last_tool", completionResult.ToolCalls[len(completionResult.ToolCalls)-1].Function.Name)
|
||||
|
||||
} else {
|
||||
// No MCP manager - send tool calls to client for external execution
|
||||
slog.Debug("No MCP manager, sending tool calls to client", "round", round)
|
||||
break // Exit - client will handle tool execution
|
||||
}
|
||||
} // End of maxRounds loop
|
||||
|
||||
// Check if we exhausted rounds
|
||||
if round >= maxRounds {
|
||||
slog.Warn("Maximum tool execution rounds reached", "rounds", maxRounds)
|
||||
ch <- gin.H{"error": fmt.Sprintf("Maximum tool execution rounds (%d) exceeded", maxRounds)}
|
||||
}
|
||||
|
||||
// When MCP was enabled, we suppressed Done flags during the loop
|
||||
// Send a final Done: true to signal the conversation is complete
|
||||
if mcpManager != nil {
|
||||
ch <- api.ChatResponse{
|
||||
Model: req.Model,
|
||||
CreatedAt: time.Now().UTC(),
|
||||
Message: api.Message{Role: "assistant"},
|
||||
Done: true,
|
||||
DoneReason: "stop",
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
|
|
@ -2322,22 +2651,15 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||
case gin.H:
|
||||
msg, ok := t["error"].(string)
|
||||
if !ok {
|
||||
msg = "unexpected error format in response"
|
||||
msg = "unexpected error"
|
||||
}
|
||||
|
||||
status, ok := t["status"].(int)
|
||||
if !ok {
|
||||
status = http.StatusInternalServerError
|
||||
}
|
||||
|
||||
c.JSON(status, gin.H{"error": msg})
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": msg})
|
||||
return
|
||||
default:
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
resp.Message.Content = sbContent.String()
|
||||
resp.Message.Thinking = sbThinking.String()
|
||||
resp.Logprobs = allLogprobs
|
||||
|
|
@ -2345,12 +2667,10 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||
if len(toolCalls) > 0 {
|
||||
resp.Message.ToolCalls = toolCalls
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, resp)
|
||||
return
|
||||
} else {
|
||||
streamResponse(c, ch)
|
||||
}
|
||||
|
||||
streamResponse(c, ch)
|
||||
}
|
||||
|
||||
func handleScheduleError(c *gin.Context, name string, err error) {
|
||||
|
|
@ -2395,4 +2715,3 @@ func filterThinkTags(msgs []api.Message, m *Model) []api.Message {
|
|||
}
|
||||
return msgs
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,91 @@
|
|||
package server
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
// ToolsHandler handles requests to list available MCP tools.
|
||||
// GET: Returns available MCP server definitions from configuration.
|
||||
// POST with mcp_servers: Returns tools from the specified MCP servers.
|
||||
func (s *Server) ToolsHandler(c *gin.Context) {
|
||||
var req struct {
|
||||
MCPServers []api.MCPServerConfig `json:"mcp_servers,omitempty"`
|
||||
}
|
||||
|
||||
if c.Request.Method == "POST" {
|
||||
if err := c.BindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// If MCP servers provided, list their tools
|
||||
if len(req.MCPServers) > 0 {
|
||||
manager := NewMCPManager(10)
|
||||
defer manager.Close()
|
||||
|
||||
var allTools []ToolInfo
|
||||
for _, config := range req.MCPServers {
|
||||
if err := manager.AddServer(config); err != nil {
|
||||
// Include error in response but continue
|
||||
allTools = append(allTools, ToolInfo{
|
||||
Name: config.Name,
|
||||
Description: "Failed to initialize: " + err.Error(),
|
||||
Error: err.Error(),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// Get tools from this server
|
||||
tools := manager.GetAllTools()
|
||||
for _, tool := range tools {
|
||||
allTools = append(allTools, ToolInfo{
|
||||
Name: tool.Function.Name,
|
||||
Description: tool.Function.Description,
|
||||
Parameters: &tool.Function.Parameters,
|
||||
ServerName: config.Name,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, ToolsResponse{
|
||||
Tools: allTools,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Otherwise, list available MCP server definitions
|
||||
defs, err := LoadMCPDefinitions()
|
||||
if err != nil {
|
||||
// Config parsing errors are client errors (bad config), not server errors
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid MCP configuration: " + err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
servers := defs.ListServers()
|
||||
c.JSON(http.StatusOK, MCPServersResponse{
|
||||
Servers: servers,
|
||||
})
|
||||
}
|
||||
|
||||
// ToolInfo provides information about a single tool
|
||||
type ToolInfo struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Parameters *api.ToolFunctionParameters `json:"parameters,omitempty"`
|
||||
ServerName string `json:"server,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// ToolsResponse contains the list of available tools
|
||||
type ToolsResponse struct {
|
||||
Tools []ToolInfo `json:"tools"`
|
||||
}
|
||||
|
||||
// MCPServersResponse contains the list of available MCP server types
|
||||
type MCPServersResponse struct {
|
||||
Servers []MCPServerInfo `json:"servers"`
|
||||
}
|
||||
Loading…
Reference in New Issue