From f760ae1fdd44a8b7c55c14654aa95dba0c98e6af Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Fri, 2 Jan 2026 00:35:27 -0500 Subject: [PATCH 01/13] api: add Anthropic Messages API compatibility layer Add middleware to support the Anthropic Messages API format at /v1/messages. This enables tools like Claude Code to work with Ollama models through the Anthropic API interface. Features: - Request/response transformation between Anthropic and internal formats - Streaming support with SSE events (message_start, content_block_delta, etc.) - Tool calling support (tool_use and tool_result content blocks) - Thinking/extended thinking block support - Image content block support (base64) - System prompt handling - Multi-turn conversation support - Proper stop_reason mapping (end_turn, max_tokens, tool_use) - Error responses in Anthropic format New files: - anthropic/anthropic.go: Types and transformation functions - middleware/anthropic.go: Request/response middleware --- anthropic/anthropic.go | 779 +++++++++++++++++++++++++++ docs/api/anthropic-compatibility.mdx | 339 ++++++++++++ middleware/anthropic.go | 152 ++++++ server/routes.go | 3 + 4 files changed, 1273 insertions(+) create mode 100644 anthropic/anthropic.go create mode 100644 docs/api/anthropic-compatibility.mdx create mode 100644 middleware/anthropic.go diff --git a/anthropic/anthropic.go b/anthropic/anthropic.go new file mode 100644 index 000000000..7bf9e98a0 --- /dev/null +++ b/anthropic/anthropic.go @@ -0,0 +1,779 @@ +// Package anthropic provides core transformation logic for compatibility with the Anthropic Messages API +package anthropic + +import ( + "crypto/rand" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "net/http" + "strings" + "time" + + "github.com/ollama/ollama/api" +) + +// Error types matching Anthropic API +type Error struct { + Type string `json:"type"` + Message string `json:"message"` +} + +type ErrorResponse struct { + Type string `json:"type"` // always "error" + Error Error `json:"error"` + RequestID string `json:"request_id,omitempty"` +} + +// NewError creates a new ErrorResponse with the appropriate error type based on HTTP status code +func NewError(code int, message string) ErrorResponse { + var etype string + switch code { + case http.StatusBadRequest: + etype = "invalid_request_error" + case http.StatusUnauthorized: + etype = "authentication_error" + case http.StatusForbidden: + etype = "permission_error" + case http.StatusNotFound: + etype = "not_found_error" + case http.StatusTooManyRequests: + etype = "rate_limit_error" + case http.StatusServiceUnavailable, 529: + etype = "overloaded_error" + default: + etype = "api_error" + } + + return ErrorResponse{ + Type: "error", + Error: Error{Type: etype, Message: message}, + RequestID: generateID("req"), + } +} + +// Request types + +// MessagesRequest represents an Anthropic Messages API request +type MessagesRequest struct { + Model string `json:"model"` + MaxTokens int `json:"max_tokens"` + Messages []MessageParam `json:"messages"` + System any `json:"system,omitempty"` // string or []ContentBlock + Stream bool `json:"stream,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopK *int `json:"top_k,omitempty"` + StopSequences []string `json:"stop_sequences,omitempty"` + Tools []Tool `json:"tools,omitempty"` + ToolChoice *ToolChoice `json:"tool_choice,omitempty"` + Thinking *ThinkingConfig `json:"thinking,omitempty"` + Metadata *Metadata `json:"metadata,omitempty"` +} + +// MessageParam represents a message in the request +type MessageParam struct { + Role string `json:"role"` // "user" or "assistant" + Content any `json:"content"` // string or []ContentBlock +} + +// ContentBlock represents a content block in a message +type ContentBlock struct { + Type string `json:"type"` // text, image, tool_use, tool_result, thinking + + // For text blocks + Text string `json:"text,omitempty"` + + // For image blocks + Source *ImageSource `json:"source,omitempty"` + + // For tool_use blocks + ID string `json:"id,omitempty"` + Name string `json:"name,omitempty"` + Input any `json:"input,omitempty"` + + // For tool_result blocks + ToolUseID string `json:"tool_use_id,omitempty"` + Content any `json:"content,omitempty"` // string or []ContentBlock + IsError bool `json:"is_error,omitempty"` + + // For thinking blocks + Thinking string `json:"thinking,omitempty"` + Signature string `json:"signature,omitempty"` +} + +// ImageSource represents the source of an image +type ImageSource struct { + Type string `json:"type"` // "base64" or "url" + MediaType string `json:"media_type,omitempty"` + Data string `json:"data,omitempty"` + URL string `json:"url,omitempty"` +} + +// Tool represents a tool definition +type Tool struct { + Type string `json:"type,omitempty"` // "custom" for user-defined tools + Name string `json:"name"` + Description string `json:"description,omitempty"` + InputSchema json.RawMessage `json:"input_schema,omitempty"` +} + +// ToolChoice controls how the model uses tools +type ToolChoice struct { + Type string `json:"type"` // "auto", "any", "tool", "none" + Name string `json:"name,omitempty"` + DisableParallelToolUse bool `json:"disable_parallel_tool_use,omitempty"` +} + +// ThinkingConfig controls extended thinking +type ThinkingConfig struct { + Type string `json:"type"` // "enabled" or "disabled" + BudgetTokens int `json:"budget_tokens,omitempty"` +} + +// Metadata for the request +type Metadata struct { + UserID string `json:"user_id,omitempty"` +} + +// Response types + +// MessagesResponse represents an Anthropic Messages API response +type MessagesResponse struct { + ID string `json:"id"` + Type string `json:"type"` // "message" + Role string `json:"role"` // "assistant" + Model string `json:"model"` + Content []ContentBlock `json:"content"` + StopReason string `json:"stop_reason,omitempty"` + StopSequence string `json:"stop_sequence,omitempty"` + Usage Usage `json:"usage"` +} + +// Usage contains token usage information +type Usage struct { + InputTokens int `json:"input_tokens"` + OutputTokens int `json:"output_tokens"` +} + +// Streaming event types + +// MessageStartEvent is sent at the start of streaming +type MessageStartEvent struct { + Type string `json:"type"` // "message_start" + Message MessagesResponse `json:"message"` +} + +// ContentBlockStartEvent signals the start of a content block +type ContentBlockStartEvent struct { + Type string `json:"type"` // "content_block_start" + Index int `json:"index"` + ContentBlock ContentBlock `json:"content_block"` +} + +// ContentBlockDeltaEvent contains incremental content updates +type ContentBlockDeltaEvent struct { + Type string `json:"type"` // "content_block_delta" + Index int `json:"index"` + Delta Delta `json:"delta"` +} + +// Delta represents an incremental update +type Delta struct { + Type string `json:"type"` // "text_delta", "input_json_delta", "thinking_delta", "signature_delta" + Text string `json:"text,omitempty"` + PartialJSON string `json:"partial_json,omitempty"` + Thinking string `json:"thinking,omitempty"` + Signature string `json:"signature,omitempty"` +} + +// ContentBlockStopEvent signals the end of a content block +type ContentBlockStopEvent struct { + Type string `json:"type"` // "content_block_stop" + Index int `json:"index"` +} + +// MessageDeltaEvent contains updates to the message +type MessageDeltaEvent struct { + Type string `json:"type"` // "message_delta" + Delta MessageDelta `json:"delta"` + Usage DeltaUsage `json:"usage"` +} + +// MessageDelta contains stop information +type MessageDelta struct { + StopReason string `json:"stop_reason,omitempty"` + StopSequence string `json:"stop_sequence,omitempty"` +} + +// DeltaUsage contains cumulative token usage +type DeltaUsage struct { + OutputTokens int `json:"output_tokens"` +} + +// MessageStopEvent signals the end of the message +type MessageStopEvent struct { + Type string `json:"type"` // "message_stop" +} + +// PingEvent is a keepalive event +type PingEvent struct { + Type string `json:"type"` // "ping" +} + +// StreamErrorEvent is an error during streaming +type StreamErrorEvent struct { + Type string `json:"type"` // "error" + Error Error `json:"error"` +} + +// FromMessagesRequest converts an Anthropic MessagesRequest to an Ollama api.ChatRequest +func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) { + var messages []api.Message + + // Handle system prompt + if r.System != nil { + switch sys := r.System.(type) { + case string: + if sys != "" { + messages = append(messages, api.Message{Role: "system", Content: sys}) + } + case []any: + // System can be an array of content blocks + var content strings.Builder + for _, block := range sys { + if blockMap, ok := block.(map[string]any); ok { + if blockMap["type"] == "text" { + if text, ok := blockMap["text"].(string); ok { + content.WriteString(text) + } + } + } + } + if content.Len() > 0 { + messages = append(messages, api.Message{Role: "system", Content: content.String()}) + } + } + } + + // Convert messages + for _, msg := range r.Messages { + converted, err := convertMessage(msg) + if err != nil { + return nil, err + } + messages = append(messages, converted...) + } + + // Build options + options := make(map[string]any) + + options["num_predict"] = r.MaxTokens + + if r.Temperature != nil { + options["temperature"] = *r.Temperature + } + + if r.TopP != nil { + options["top_p"] = *r.TopP + } + + if r.TopK != nil { + options["top_k"] = *r.TopK + } + + if len(r.StopSequences) > 0 { + options["stop"] = r.StopSequences + } + + // Convert tools + var tools api.Tools + for _, t := range r.Tools { + tool, err := convertTool(t) + if err != nil { + return nil, err + } + tools = append(tools, tool) + } + + // Handle thinking + var think *api.ThinkValue + if r.Thinking != nil && r.Thinking.Type == "enabled" { + think = &api.ThinkValue{Value: true} + } + + stream := r.Stream + + return &api.ChatRequest{ + Model: r.Model, + Messages: messages, + Options: options, + Stream: &stream, + Tools: tools, + Think: think, + }, nil +} + +// convertMessage converts an Anthropic MessageParam to Ollama api.Message(s) +func convertMessage(msg MessageParam) ([]api.Message, error) { + var messages []api.Message + role := strings.ToLower(msg.Role) + + switch content := msg.Content.(type) { + case string: + messages = append(messages, api.Message{Role: role, Content: content}) + + case []any: + // Handle array of content blocks + var textContent strings.Builder + var images []api.ImageData + var toolCalls []api.ToolCall + var thinking string + var toolResults []api.Message + + for _, block := range content { + blockMap, ok := block.(map[string]any) + if !ok { + return nil, errors.New("invalid content block format") + } + + blockType, _ := blockMap["type"].(string) + + switch blockType { + case "text": + if text, ok := blockMap["text"].(string); ok { + textContent.WriteString(text) + } + + case "image": + source, ok := blockMap["source"].(map[string]any) + if !ok { + return nil, errors.New("invalid image source") + } + + sourceType, _ := source["type"].(string) + if sourceType == "base64" { + data, _ := source["data"].(string) + decoded, err := base64.StdEncoding.DecodeString(data) + if err != nil { + return nil, fmt.Errorf("invalid base64 image data: %w", err) + } + images = append(images, decoded) + } + // URL images would need to be fetched - skip for now + + case "tool_use": + id, ok := blockMap["id"].(string) + if !ok { + return nil, errors.New("tool_use block missing required 'id' field") + } + name, ok := blockMap["name"].(string) + if !ok { + return nil, errors.New("tool_use block missing required 'name' field") + } + tc := api.ToolCall{ + ID: id, + Function: api.ToolCallFunction{ + Name: name, + }, + } + if input, ok := blockMap["input"].(map[string]any); ok { + tc.Function.Arguments = api.ToolCallFunctionArguments(input) + } + toolCalls = append(toolCalls, tc) + + case "tool_result": + toolUseID, _ := blockMap["tool_use_id"].(string) + var resultContent string + + switch c := blockMap["content"].(type) { + case string: + resultContent = c + case []any: + // Extract text from content blocks + for _, cb := range c { + if cbMap, ok := cb.(map[string]any); ok { + if cbMap["type"] == "text" { + if text, ok := cbMap["text"].(string); ok { + resultContent += text + } + } + } + } + } + + toolResults = append(toolResults, api.Message{ + Role: "tool", + Content: resultContent, + ToolCallID: toolUseID, + }) + + case "thinking": + if t, ok := blockMap["thinking"].(string); ok { + thinking = t + } + } + } + + // Build the main message + if textContent.Len() > 0 || len(images) > 0 || len(toolCalls) > 0 { + m := api.Message{ + Role: role, + Content: textContent.String(), + Images: images, + ToolCalls: toolCalls, + Thinking: thinking, + } + messages = append(messages, m) + } + + // Add tool results as separate messages + messages = append(messages, toolResults...) + + default: + return nil, fmt.Errorf("invalid message content type: %T", content) + } + + return messages, nil +} + +// convertTool converts an Anthropic Tool to an Ollama api.Tool +func convertTool(t Tool) (api.Tool, error) { + var params api.ToolFunctionParameters + if len(t.InputSchema) > 0 { + if err := json.Unmarshal(t.InputSchema, ¶ms); err != nil { + return api.Tool{}, fmt.Errorf("invalid input_schema for tool %q: %w", t.Name, err) + } + } + + return api.Tool{ + Type: "function", + Function: api.ToolFunction{ + Name: t.Name, + Description: t.Description, + Parameters: params, + }, + }, nil +} + +// ToMessagesResponse converts an Ollama api.ChatResponse to an Anthropic MessagesResponse +func ToMessagesResponse(id string, r api.ChatResponse) MessagesResponse { + var content []ContentBlock + + // Add thinking block if present + if r.Message.Thinking != "" { + content = append(content, ContentBlock{ + Type: "thinking", + Thinking: r.Message.Thinking, + }) + } + + // Add text content if present + if r.Message.Content != "" { + content = append(content, ContentBlock{ + Type: "text", + Text: r.Message.Content, + }) + } + + // Add tool use blocks + for _, tc := range r.Message.ToolCalls { + content = append(content, ContentBlock{ + Type: "tool_use", + ID: tc.ID, + Name: tc.Function.Name, + Input: tc.Function.Arguments, + }) + } + + // Map stop reason + stopReason := mapStopReason(r.DoneReason, len(r.Message.ToolCalls) > 0) + + return MessagesResponse{ + ID: id, + Type: "message", + Role: "assistant", + Model: r.Model, + Content: content, + StopReason: stopReason, + Usage: Usage{ + InputTokens: r.Metrics.PromptEvalCount, + OutputTokens: r.Metrics.EvalCount, + }, + } +} + +// mapStopReason converts Ollama done_reason to Anthropic stop_reason +func mapStopReason(reason string, hasToolCalls bool) string { + if hasToolCalls { + return "tool_use" + } + + switch reason { + case "stop": + return "end_turn" + case "length": + return "max_tokens" + default: + if reason != "" { + return "stop_sequence" + } + return "" + } +} + +// StreamConverter manages state for converting Ollama streaming responses to Anthropic format +type StreamConverter struct { + ID string + Model string + firstWrite bool + contentIndex int + inputTokens int + outputTokens int + thinkingStarted bool + thinkingDone bool + textStarted bool + toolCallsSent map[string]bool +} + +// NewStreamConverter creates a new StreamConverter +func NewStreamConverter(id, model string) *StreamConverter { + return &StreamConverter{ + ID: id, + Model: model, + firstWrite: true, + toolCallsSent: make(map[string]bool), + } +} + +// StreamEvent represents a streaming event to be sent to the client +type StreamEvent struct { + Event string + Data any +} + +// Process converts an Ollama ChatResponse to Anthropic streaming events +func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent { + var events []StreamEvent + + // First write: emit message_start + if c.firstWrite { + c.firstWrite = false + c.inputTokens = r.Metrics.PromptEvalCount + + events = append(events, StreamEvent{ + Event: "message_start", + Data: MessageStartEvent{ + Type: "message_start", + Message: MessagesResponse{ + ID: c.ID, + Type: "message", + Role: "assistant", + Model: c.Model, + Content: []ContentBlock{}, + Usage: Usage{ + InputTokens: c.inputTokens, + OutputTokens: 0, + }, + }, + }, + }) + } + + // Handle thinking content + if r.Message.Thinking != "" && !c.thinkingDone { + if !c.thinkingStarted { + c.thinkingStarted = true + events = append(events, StreamEvent{ + Event: "content_block_start", + Data: ContentBlockStartEvent{ + Type: "content_block_start", + Index: c.contentIndex, + ContentBlock: ContentBlock{ + Type: "thinking", + Thinking: "", + }, + }, + }) + } + + events = append(events, StreamEvent{ + Event: "content_block_delta", + Data: ContentBlockDeltaEvent{ + Type: "content_block_delta", + Index: c.contentIndex, + Delta: Delta{ + Type: "thinking_delta", + Thinking: r.Message.Thinking, + }, + }, + }) + } + + // Handle text content + if r.Message.Content != "" { + // Close thinking block if it was open + if c.thinkingStarted && !c.thinkingDone { + c.thinkingDone = true + events = append(events, StreamEvent{ + Event: "content_block_stop", + Data: ContentBlockStopEvent{ + Type: "content_block_stop", + Index: c.contentIndex, + }, + }) + c.contentIndex++ + } + + if !c.textStarted { + c.textStarted = true + events = append(events, StreamEvent{ + Event: "content_block_start", + Data: ContentBlockStartEvent{ + Type: "content_block_start", + Index: c.contentIndex, + ContentBlock: ContentBlock{ + Type: "text", + Text: "", + }, + }, + }) + } + + events = append(events, StreamEvent{ + Event: "content_block_delta", + Data: ContentBlockDeltaEvent{ + Type: "content_block_delta", + Index: c.contentIndex, + Delta: Delta{ + Type: "text_delta", + Text: r.Message.Content, + }, + }, + }) + } + + // Handle tool calls + for _, tc := range r.Message.ToolCalls { + if c.toolCallsSent[tc.ID] { + continue + } + + // Close any previous block + if c.textStarted { + events = append(events, StreamEvent{ + Event: "content_block_stop", + Data: ContentBlockStopEvent{ + Type: "content_block_stop", + Index: c.contentIndex, + }, + }) + c.contentIndex++ + c.textStarted = false + } + + // Start tool use block + events = append(events, StreamEvent{ + Event: "content_block_start", + Data: ContentBlockStartEvent{ + Type: "content_block_start", + Index: c.contentIndex, + ContentBlock: ContentBlock{ + Type: "tool_use", + ID: tc.ID, + Name: tc.Function.Name, + Input: map[string]any{}, + }, + }, + }) + + // Send input as JSON delta + argsJSON, _ := json.Marshal(tc.Function.Arguments) + events = append(events, StreamEvent{ + Event: "content_block_delta", + Data: ContentBlockDeltaEvent{ + Type: "content_block_delta", + Index: c.contentIndex, + Delta: Delta{ + Type: "input_json_delta", + PartialJSON: string(argsJSON), + }, + }, + }) + + // Close tool use block + events = append(events, StreamEvent{ + Event: "content_block_stop", + Data: ContentBlockStopEvent{ + Type: "content_block_stop", + Index: c.contentIndex, + }, + }) + + c.toolCallsSent[tc.ID] = true + c.contentIndex++ + } + + // Handle done + if r.Done { + // Close any open block + if c.textStarted { + events = append(events, StreamEvent{ + Event: "content_block_stop", + Data: ContentBlockStopEvent{ + Type: "content_block_stop", + Index: c.contentIndex, + }, + }) + } else if c.thinkingStarted && !c.thinkingDone { + events = append(events, StreamEvent{ + Event: "content_block_stop", + Data: ContentBlockStopEvent{ + Type: "content_block_stop", + Index: c.contentIndex, + }, + }) + } + + c.outputTokens = r.Metrics.EvalCount + stopReason := mapStopReason(r.DoneReason, len(c.toolCallsSent) > 0) + + events = append(events, StreamEvent{ + Event: "message_delta", + Data: MessageDeltaEvent{ + Type: "message_delta", + Delta: MessageDelta{ + StopReason: stopReason, + }, + Usage: DeltaUsage{ + OutputTokens: c.outputTokens, + }, + }, + }) + + events = append(events, StreamEvent{ + Event: "message_stop", + Data: MessageStopEvent{ + Type: "message_stop", + }, + }) + } + + return events +} + +// generateID generates a unique ID with the given prefix using crypto/rand +func generateID(prefix string) string { + b := make([]byte, 12) + if _, err := rand.Read(b); err != nil { + // Fallback to time-based ID if crypto/rand fails + return fmt.Sprintf("%s_%d", prefix, time.Now().UnixNano()) + } + return fmt.Sprintf("%s_%x", prefix, b) +} + +// GenerateMessageID generates a unique message ID +func GenerateMessageID() string { + return generateID("msg") +} diff --git a/docs/api/anthropic-compatibility.mdx b/docs/api/anthropic-compatibility.mdx new file mode 100644 index 000000000..b8953d1d8 --- /dev/null +++ b/docs/api/anthropic-compatibility.mdx @@ -0,0 +1,339 @@ +--- +title: Anthropic compatibility +--- + +Ollama provides compatibility with the [Anthropic Messages API](https://docs.anthropic.com/en/api/messages) to help connect existing applications to Ollama, including tools like Claude Code. + +## Usage + +### Environment variables + +To use Ollama with tools that expect the Anthropic API (like Claude Code), set these environment variables: + +```shell +export ANTHROPIC_BASE_URL=http://localhost:11434 +export ANTHROPIC_API_KEY=ollama # required but ignored +``` + +### Simple `/v1/messages` example + + + +```python basic.py +import anthropic + +client = anthropic.Anthropic( + base_url='http://localhost:11434', + api_key='ollama', # required but ignored +) + +message = client.messages.create( + model='llama3.2:3b', + max_tokens=1024, + messages=[ + {'role': 'user', 'content': 'Hello, how are you?'} + ] +) +print(message.content[0].text) +``` + +```javascript basic.js +import Anthropic from "@anthropic-ai/sdk"; + +const anthropic = new Anthropic({ + baseURL: "http://localhost:11434", + apiKey: "ollama", // required but ignored +}); + +const message = await anthropic.messages.create({ + model: "llama3.2:3b", + max_tokens: 1024, + messages: [{ role: "user", content: "Hello, how are you?" }], +}); + +console.log(message.content[0].text); +``` + +```shell basic.sh +curl -X POST http://localhost:11434/v1/messages \ +-H "Content-Type: application/json" \ +-H "x-api-key: ollama" \ +-H "anthropic-version: 2023-06-01" \ +-d '{ + "model": "llama3.2:3b", + "max_tokens": 1024, + "messages": [{ "role": "user", "content": "Hello, how are you?" }] +}' +``` + + + +### Streaming example + + + +```python streaming.py +import anthropic + +client = anthropic.Anthropic( + base_url='http://localhost:11434', + api_key='ollama', +) + +with client.messages.stream( + model='llama3.2:3b', + max_tokens=1024, + messages=[{'role': 'user', 'content': 'Count from 1 to 10'}] +) as stream: + for text in stream.text_stream: + print(text, end='', flush=True) +``` + +```javascript streaming.js +import Anthropic from "@anthropic-ai/sdk"; + +const anthropic = new Anthropic({ + baseURL: "http://localhost:11434", + apiKey: "ollama", +}); + +const stream = await anthropic.messages.stream({ + model: "llama3.2:3b", + max_tokens: 1024, + messages: [{ role: "user", content: "Count from 1 to 10" }], +}); + +for await (const event of stream) { + if ( + event.type === "content_block_delta" && + event.delta.type === "text_delta" + ) { + process.stdout.write(event.delta.text); + } +} +``` + +```shell streaming.sh +curl -X POST http://localhost:11434/v1/messages \ +-H "Content-Type: application/json" \ +-d '{ + "model": "llama3.2:3b", + "max_tokens": 1024, + "stream": true, + "messages": [{ "role": "user", "content": "Count from 1 to 10" }] +}' +``` + + + +### Tool calling example + + + +```python tools.py +import anthropic + +client = anthropic.Anthropic( + base_url='http://localhost:11434', + api_key='ollama', +) + +message = client.messages.create( + model='llama3.2:3b', + max_tokens=1024, + tools=[ + { + 'name': 'get_weather', + 'description': 'Get the current weather in a location', + 'input_schema': { + 'type': 'object', + 'properties': { + 'location': { + 'type': 'string', + 'description': 'The city and state, e.g. San Francisco, CA' + } + }, + 'required': ['location'] + } + } + ], + messages=[{'role': 'user', 'content': "What's the weather in San Francisco?"}] +) + +for block in message.content: + if block.type == 'tool_use': + print(f'Tool: {block.name}') + print(f'Input: {block.input}') +``` + +```shell tools.sh +curl -X POST http://localhost:11434/v1/messages \ +-H "Content-Type: application/json" \ +-d '{ + "model": "llama3.2:3b", + "max_tokens": 1024, + "tools": [ + { + "name": "get_weather", + "description": "Get the current weather in a location", + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state" + } + }, + "required": ["location"] + } + } + ], + "messages": [{ "role": "user", "content": "What is the weather in San Francisco?" }] +}' +``` + + + +## Using with Claude Code + +[Claude Code](https://docs.anthropic.com/en/docs/claude-code) can be configured to use Ollama as its backend: + +```shell +ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model llama3.2:3b +``` + +Or set the environment variables in your shell profile: + +```shell +export ANTHROPIC_BASE_URL=http://localhost:11434 +export ANTHROPIC_API_KEY=ollama +``` + +Then run Claude Code with any Ollama model: + +```shell +claude --model llama3.2:3b +claude --model qwen3:8b +claude --model deepseek-r1:14b +``` + +## Endpoints + +### `/v1/messages` + +#### Supported features + +- [x] Messages +- [x] Streaming +- [x] System prompts +- [x] Multi-turn conversations +- [x] Vision (images) +- [x] Tools (function calling) +- [x] Tool results +- [x] Thinking/extended thinking + +#### Supported request fields + +- [x] `model` +- [x] `max_tokens` +- [x] `messages` + - [x] Text `content` + - [x] Image `content` (base64) + - [x] Array of content blocks + - [x] `tool_use` blocks + - [x] `tool_result` blocks + - [x] `thinking` blocks +- [x] `system` (string or array) +- [x] `stream` +- [x] `temperature` +- [x] `top_p` +- [x] `top_k` +- [x] `stop_sequences` +- [x] `tools` +- [x] `thinking` +- [ ] `tool_choice` +- [ ] `metadata` + +#### Supported response fields + +- [x] `id` +- [x] `type` +- [x] `role` +- [x] `model` +- [x] `content` (text, tool_use, thinking blocks) +- [x] `stop_reason` (end_turn, max_tokens, tool_use) +- [x] `usage` (input_tokens, output_tokens) + +#### Streaming events + +- [x] `message_start` +- [x] `content_block_start` +- [x] `content_block_delta` (text_delta, input_json_delta, thinking_delta) +- [x] `content_block_stop` +- [x] `message_delta` +- [x] `message_stop` +- [x] `ping` +- [x] `error` + +## Models + +Before using a model, pull it locally with `ollama pull`: + +```shell +ollama pull llama3.2:3b +``` + +### Default model names + +For tooling that relies on default Anthropic model names such as `claude-3-5-sonnet`, use `ollama cp` to copy an existing model name: + +```shell +ollama cp llama3.2:3b claude-3-5-sonnet +``` + +Afterwards, this new model name can be specified in the `model` field: + +```shell +curl http://localhost:11434/v1/messages \ + -H "Content-Type: application/json" \ + -d '{ + "model": "claude-3-5-sonnet", + "max_tokens": 1024, + "messages": [ + { + "role": "user", + "content": "Hello!" + } + ] + }' +``` + +## Differences from the Anthropic API + +### Behavior differences + +- API key is accepted but not validated +- `anthropic-version` header is accepted but not used +- Token counts are approximations based on the underlying model's tokenizer + +### Not supported + +The following Anthropic API features are not currently supported: + +| Feature | Description | +|---------|-------------| +| `/v1/messages/count_tokens` | Token counting endpoint | +| `tool_choice` | Forcing specific tool use or disabling tools | +| `metadata` | Request metadata (user_id) | +| Prompt caching | `cache_control` blocks for caching prefixes | +| Batches API | `/v1/messages/batches` for async batch processing | +| Citations | `citations` content blocks | +| PDF support | `document` content blocks with PDF files | +| Server-sent errors | `error` events during streaming (errors return HTTP status) | + +### Partial support + +| Feature | Status | +|---------|--------| +| Image content | Base64 images supported; URL images not supported | +| Extended thinking | Basic support; `budget_tokens` accepted but not enforced | diff --git a/middleware/anthropic.go b/middleware/anthropic.go new file mode 100644 index 000000000..a5f0ed8d6 --- /dev/null +++ b/middleware/anthropic.go @@ -0,0 +1,152 @@ +package middleware + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + + "github.com/gin-gonic/gin" + + "github.com/ollama/ollama/anthropic" + "github.com/ollama/ollama/api" +) + +// AnthropicWriter wraps the response writer to transform Ollama responses to Anthropic format +type AnthropicWriter struct { + BaseWriter + stream bool + id string + model string + converter *anthropic.StreamConverter +} + +func (w *AnthropicWriter) writeError(data []byte) (int, error) { + var serr api.StatusError + err := json.Unmarshal(data, &serr) + if err != nil { + return 0, err + } + + w.ResponseWriter.Header().Set("Content-Type", "application/json") + err = json.NewEncoder(w.ResponseWriter).Encode(anthropic.NewError(serr.StatusCode, serr.Error())) + if err != nil { + return 0, err + } + + return len(data), nil +} + +func (w *AnthropicWriter) writeEvent(eventType string, data any) error { + d, err := json.Marshal(data) + if err != nil { + return err + } + _, err = w.ResponseWriter.Write([]byte(fmt.Sprintf("event: %s\ndata: %s\n\n", eventType, d))) + if err != nil { + return err + } + if f, ok := w.ResponseWriter.(http.Flusher); ok { + f.Flush() + } + return nil +} + +func (w *AnthropicWriter) writeResponse(data []byte) (int, error) { + var chatResponse api.ChatResponse + err := json.Unmarshal(data, &chatResponse) + if err != nil { + return 0, err + } + + if w.stream { + w.ResponseWriter.Header().Set("Content-Type", "text/event-stream") + + events := w.converter.Process(chatResponse) + for _, event := range events { + if err := w.writeEvent(event.Event, event.Data); err != nil { + return 0, err + } + } + return len(data), nil + } + + // Non-streaming response + w.ResponseWriter.Header().Set("Content-Type", "application/json") + response := anthropic.ToMessagesResponse(w.id, chatResponse) + return len(data), json.NewEncoder(w.ResponseWriter).Encode(response) +} + +func (w *AnthropicWriter) Write(data []byte) (int, error) { + code := w.ResponseWriter.Status() + if code != http.StatusOK { + return w.writeError(data) + } + + return w.writeResponse(data) +} + +// AnthropicMessagesMiddleware handles Anthropic Messages API requests +func AnthropicMessagesMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + var req anthropic.MessagesRequest + err := c.ShouldBindJSON(&req) + if err != nil { + c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, err.Error())) + return + } + + // Validate required fields + if req.Model == "" { + c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, "model is required")) + return + } + + if req.MaxTokens <= 0 { + c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, "max_tokens is required and must be positive")) + return + } + + if len(req.Messages) == 0 { + c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, "messages is required")) + return + } + + // Convert to internal format + chatReq, err := anthropic.FromMessagesRequest(req) + if err != nil { + c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, err.Error())) + return + } + + var b bytes.Buffer + if err := json.NewEncoder(&b).Encode(chatReq); err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, anthropic.NewError(http.StatusInternalServerError, err.Error())) + return + } + + c.Request.Body = io.NopCloser(&b) + + messageID := anthropic.GenerateMessageID() + + w := &AnthropicWriter{ + BaseWriter: BaseWriter{ResponseWriter: c.Writer}, + stream: req.Stream, + id: messageID, + model: req.Model, + converter: anthropic.NewStreamConverter(messageID, req.Model), + } + + // Set headers based on streaming mode + if req.Stream { + c.Writer.Header().Set("Content-Type", "text/event-stream") + c.Writer.Header().Set("Cache-Control", "no-cache") + c.Writer.Header().Set("Connection", "keep-alive") + } + + c.Writer = w + + c.Next() + } +} diff --git a/server/routes.go b/server/routes.go index 977a13ff2..8e199bada 100644 --- a/server/routes.go +++ b/server/routes.go @@ -1544,6 +1544,9 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) { r.GET("/v1/models/:model", middleware.RetrieveMiddleware(), s.ShowHandler) r.POST("/v1/responses", middleware.ResponsesMiddleware(), s.ChatHandler) + // Inference (Anthropic compatibility) + r.POST("/v1/messages", middleware.AnthropicMessagesMiddleware(), s.ChatHandler) + if rc != nil { // wrap old with new rs := ®istry.Local{ From 6229df5b90711da2a93f4246b558d7441370b305 Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Fri, 2 Jan 2026 01:31:43 -0500 Subject: [PATCH 02/13] anthropic: add unit and integration tests - Unit tests for transformation functions (FromMessagesRequest, ToMessagesResponse) - Unit tests for error handling and edge cases - Middleware integration tests with httptest - Fix lint issues (gofmt) - Fix unused struct fields in StreamConverter - Add fallback for crypto/rand errors --- anthropic/anthropic.go | 12 +- anthropic/anthropic_test.go | 667 +++++++++++++++++++++++++++++++++++ docs/README.md | 1 + docs/docs.json | 3 +- middleware/anthropic_test.go | 487 +++++++++++++++++++++++++ 5 files changed, 1163 insertions(+), 7 deletions(-) create mode 100644 anthropic/anthropic_test.go create mode 100644 middleware/anthropic_test.go diff --git a/anthropic/anthropic.go b/anthropic/anthropic.go index 7bf9e98a0..ef0bdd953 100644 --- a/anthropic/anthropic.go +++ b/anthropic/anthropic.go @@ -60,7 +60,7 @@ type MessagesRequest struct { Model string `json:"model"` MaxTokens int `json:"max_tokens"` Messages []MessageParam `json:"messages"` - System any `json:"system,omitempty"` // string or []ContentBlock + System any `json:"system,omitempty"` // string or []ContentBlock Stream bool `json:"stream,omitempty"` Temperature *float64 `json:"temperature,omitempty"` TopP *float64 `json:"top_p,omitempty"` @@ -74,7 +74,7 @@ type MessagesRequest struct { // MessageParam represents a message in the request type MessageParam struct { - Role string `json:"role"` // "user" or "assistant" + Role string `json:"role"` // "user" or "assistant" Content any `json:"content"` // string or []ContentBlock } @@ -181,11 +181,11 @@ type ContentBlockDeltaEvent struct { // Delta represents an incremental update type Delta struct { - Type string `json:"type"` // "text_delta", "input_json_delta", "thinking_delta", "signature_delta" - Text string `json:"text,omitempty"` + Type string `json:"type"` // "text_delta", "input_json_delta", "thinking_delta", "signature_delta" + Text string `json:"text,omitempty"` PartialJSON string `json:"partial_json,omitempty"` - Thinking string `json:"thinking,omitempty"` - Signature string `json:"signature,omitempty"` + Thinking string `json:"thinking,omitempty"` + Signature string `json:"signature,omitempty"` } // ContentBlockStopEvent signals the end of a content block diff --git a/anthropic/anthropic_test.go b/anthropic/anthropic_test.go new file mode 100644 index 000000000..31a2ec67c --- /dev/null +++ b/anthropic/anthropic_test.go @@ -0,0 +1,667 @@ +package anthropic + +import ( + "encoding/base64" + "encoding/json" + "testing" + + "github.com/google/go-cmp/cmp" + + "github.com/ollama/ollama/api" +) + +const ( + testImage = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=` +) + +func TestFromMessagesRequest_Basic(t *testing.T) { + req := MessagesRequest{ + Model: "test-model", + MaxTokens: 1024, + Messages: []MessageParam{ + {Role: "user", Content: "Hello"}, + }, + } + + result, err := FromMessagesRequest(req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if result.Model != "test-model" { + t.Errorf("expected model 'test-model', got %q", result.Model) + } + + if len(result.Messages) != 1 { + t.Fatalf("expected 1 message, got %d", len(result.Messages)) + } + + if result.Messages[0].Role != "user" || result.Messages[0].Content != "Hello" { + t.Errorf("unexpected message: %+v", result.Messages[0]) + } + + if numPredict, ok := result.Options["num_predict"].(int); !ok || numPredict != 1024 { + t.Errorf("expected num_predict 1024, got %v", result.Options["num_predict"]) + } +} + +func TestFromMessagesRequest_WithSystemPrompt(t *testing.T) { + req := MessagesRequest{ + Model: "test-model", + MaxTokens: 1024, + System: "You are a helpful assistant.", + Messages: []MessageParam{ + {Role: "user", Content: "Hello"}, + }, + } + + result, err := FromMessagesRequest(req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(result.Messages) != 2 { + t.Fatalf("expected 2 messages, got %d", len(result.Messages)) + } + + if result.Messages[0].Role != "system" || result.Messages[0].Content != "You are a helpful assistant." { + t.Errorf("unexpected system message: %+v", result.Messages[0]) + } +} + +func TestFromMessagesRequest_WithSystemPromptArray(t *testing.T) { + req := MessagesRequest{ + Model: "test-model", + MaxTokens: 1024, + System: []any{ + map[string]any{"type": "text", "text": "You are helpful."}, + map[string]any{"type": "text", "text": " Be concise."}, + }, + Messages: []MessageParam{ + {Role: "user", Content: "Hello"}, + }, + } + + result, err := FromMessagesRequest(req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(result.Messages) != 2 { + t.Fatalf("expected 2 messages, got %d", len(result.Messages)) + } + + if result.Messages[0].Content != "You are helpful. Be concise." { + t.Errorf("unexpected system message content: %q", result.Messages[0].Content) + } +} + +func TestFromMessagesRequest_WithOptions(t *testing.T) { + temp := 0.7 + topP := 0.9 + topK := 40 + req := MessagesRequest{ + Model: "test-model", + MaxTokens: 2048, + Messages: []MessageParam{{Role: "user", Content: "Hello"}}, + Temperature: &temp, + TopP: &topP, + TopK: &topK, + StopSequences: []string{"\n", "END"}, + } + + result, err := FromMessagesRequest(req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if result.Options["temperature"] != 0.7 { + t.Errorf("expected temperature 0.7, got %v", result.Options["temperature"]) + } + if result.Options["top_p"] != 0.9 { + t.Errorf("expected top_p 0.9, got %v", result.Options["top_p"]) + } + if result.Options["top_k"] != 40 { + t.Errorf("expected top_k 40, got %v", result.Options["top_k"]) + } + if diff := cmp.Diff([]string{"\n", "END"}, result.Options["stop"]); diff != "" { + t.Errorf("stop sequences mismatch: %s", diff) + } +} + +func TestFromMessagesRequest_WithImage(t *testing.T) { + imgData, _ := base64.StdEncoding.DecodeString(testImage) + + req := MessagesRequest{ + Model: "test-model", + MaxTokens: 1024, + Messages: []MessageParam{ + { + Role: "user", + Content: []any{ + map[string]any{"type": "text", "text": "What's in this image?"}, + map[string]any{ + "type": "image", + "source": map[string]any{ + "type": "base64", + "media_type": "image/png", + "data": testImage, + }, + }, + }, + }, + }, + } + + result, err := FromMessagesRequest(req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(result.Messages) != 1 { + t.Fatalf("expected 1 message, got %d", len(result.Messages)) + } + + if result.Messages[0].Content != "What's in this image?" { + t.Errorf("expected content 'What's in this image?', got %q", result.Messages[0].Content) + } + + if len(result.Messages[0].Images) != 1 { + t.Fatalf("expected 1 image, got %d", len(result.Messages[0].Images)) + } + + if string(result.Messages[0].Images[0]) != string(imgData) { + t.Error("image data mismatch") + } +} + +func TestFromMessagesRequest_WithToolUse(t *testing.T) { + req := MessagesRequest{ + Model: "test-model", + MaxTokens: 1024, + Messages: []MessageParam{ + {Role: "user", Content: "What's the weather in Paris?"}, + { + Role: "assistant", + Content: []any{ + map[string]any{ + "type": "tool_use", + "id": "call_123", + "name": "get_weather", + "input": map[string]any{"location": "Paris"}, + }, + }, + }, + }, + } + + result, err := FromMessagesRequest(req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(result.Messages) != 2 { + t.Fatalf("expected 2 messages, got %d", len(result.Messages)) + } + + if len(result.Messages[1].ToolCalls) != 1 { + t.Fatalf("expected 1 tool call, got %d", len(result.Messages[1].ToolCalls)) + } + + tc := result.Messages[1].ToolCalls[0] + if tc.ID != "call_123" { + t.Errorf("expected tool call ID 'call_123', got %q", tc.ID) + } + if tc.Function.Name != "get_weather" { + t.Errorf("expected tool name 'get_weather', got %q", tc.Function.Name) + } +} + +func TestFromMessagesRequest_WithToolResult(t *testing.T) { + req := MessagesRequest{ + Model: "test-model", + MaxTokens: 1024, + Messages: []MessageParam{ + { + Role: "user", + Content: []any{ + map[string]any{ + "type": "tool_result", + "tool_use_id": "call_123", + "content": "The weather in Paris is sunny, 22°C", + }, + }, + }, + }, + } + + result, err := FromMessagesRequest(req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(result.Messages) != 1 { + t.Fatalf("expected 1 message, got %d", len(result.Messages)) + } + + msg := result.Messages[0] + if msg.Role != "tool" { + t.Errorf("expected role 'tool', got %q", msg.Role) + } + if msg.ToolCallID != "call_123" { + t.Errorf("expected tool_call_id 'call_123', got %q", msg.ToolCallID) + } + if msg.Content != "The weather in Paris is sunny, 22°C" { + t.Errorf("unexpected content: %q", msg.Content) + } +} + +func TestFromMessagesRequest_WithTools(t *testing.T) { + req := MessagesRequest{ + Model: "test-model", + MaxTokens: 1024, + Messages: []MessageParam{{Role: "user", Content: "Hello"}}, + Tools: []Tool{ + { + Name: "get_weather", + Description: "Get current weather", + InputSchema: json.RawMessage(`{"type":"object","properties":{"location":{"type":"string"}},"required":["location"]}`), + }, + }, + } + + result, err := FromMessagesRequest(req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(result.Tools) != 1 { + t.Fatalf("expected 1 tool, got %d", len(result.Tools)) + } + + tool := result.Tools[0] + if tool.Type != "function" { + t.Errorf("expected type 'function', got %q", tool.Type) + } + if tool.Function.Name != "get_weather" { + t.Errorf("expected name 'get_weather', got %q", tool.Function.Name) + } + if tool.Function.Description != "Get current weather" { + t.Errorf("expected description 'Get current weather', got %q", tool.Function.Description) + } +} + +func TestFromMessagesRequest_WithThinking(t *testing.T) { + req := MessagesRequest{ + Model: "test-model", + MaxTokens: 1024, + Messages: []MessageParam{{Role: "user", Content: "Hello"}}, + Thinking: &ThinkingConfig{Type: "enabled", BudgetTokens: 1000}, + } + + result, err := FromMessagesRequest(req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if result.Think == nil { + t.Fatal("expected Think to be set") + } + if v, ok := result.Think.Value.(bool); !ok || !v { + t.Errorf("expected Think.Value to be true, got %v", result.Think.Value) + } +} + +func TestFromMessagesRequest_ToolUseMissingID(t *testing.T) { + req := MessagesRequest{ + Model: "test-model", + MaxTokens: 1024, + Messages: []MessageParam{ + { + Role: "assistant", + Content: []any{ + map[string]any{ + "type": "tool_use", + "name": "get_weather", + }, + }, + }, + }, + } + + _, err := FromMessagesRequest(req) + if err == nil { + t.Fatal("expected error for missing tool_use id") + } + if err.Error() != "tool_use block missing required 'id' field" { + t.Errorf("unexpected error message: %v", err) + } +} + +func TestFromMessagesRequest_ToolUseMissingName(t *testing.T) { + req := MessagesRequest{ + Model: "test-model", + MaxTokens: 1024, + Messages: []MessageParam{ + { + Role: "assistant", + Content: []any{ + map[string]any{ + "type": "tool_use", + "id": "call_123", + }, + }, + }, + }, + } + + _, err := FromMessagesRequest(req) + if err == nil { + t.Fatal("expected error for missing tool_use name") + } + if err.Error() != "tool_use block missing required 'name' field" { + t.Errorf("unexpected error message: %v", err) + } +} + +func TestFromMessagesRequest_InvalidToolSchema(t *testing.T) { + req := MessagesRequest{ + Model: "test-model", + MaxTokens: 1024, + Messages: []MessageParam{{Role: "user", Content: "Hello"}}, + Tools: []Tool{ + { + Name: "bad_tool", + InputSchema: json.RawMessage(`{invalid json`), + }, + }, + } + + _, err := FromMessagesRequest(req) + if err == nil { + t.Fatal("expected error for invalid tool schema") + } +} + +func TestToMessagesResponse_Basic(t *testing.T) { + resp := api.ChatResponse{ + Model: "test-model", + Message: api.Message{ + Role: "assistant", + Content: "Hello there!", + }, + Done: true, + DoneReason: "stop", + Metrics: api.Metrics{ + PromptEvalCount: 10, + EvalCount: 5, + }, + } + + result := ToMessagesResponse("msg_123", resp) + + if result.ID != "msg_123" { + t.Errorf("expected ID 'msg_123', got %q", result.ID) + } + if result.Type != "message" { + t.Errorf("expected type 'message', got %q", result.Type) + } + if result.Role != "assistant" { + t.Errorf("expected role 'assistant', got %q", result.Role) + } + if len(result.Content) != 1 { + t.Fatalf("expected 1 content block, got %d", len(result.Content)) + } + if result.Content[0].Type != "text" || result.Content[0].Text != "Hello there!" { + t.Errorf("unexpected content: %+v", result.Content[0]) + } + if result.StopReason != "end_turn" { + t.Errorf("expected stop_reason 'end_turn', got %q", result.StopReason) + } + if result.Usage.InputTokens != 10 || result.Usage.OutputTokens != 5 { + t.Errorf("unexpected usage: %+v", result.Usage) + } +} + +func TestToMessagesResponse_WithToolCalls(t *testing.T) { + resp := api.ChatResponse{ + Model: "test-model", + Message: api.Message{ + Role: "assistant", + ToolCalls: []api.ToolCall{ + { + ID: "call_123", + Function: api.ToolCallFunction{ + Name: "get_weather", + Arguments: map[string]any{"location": "Paris"}, + }, + }, + }, + }, + Done: true, + DoneReason: "stop", + } + + result := ToMessagesResponse("msg_123", resp) + + if len(result.Content) != 1 { + t.Fatalf("expected 1 content block, got %d", len(result.Content)) + } + if result.Content[0].Type != "tool_use" { + t.Errorf("expected type 'tool_use', got %q", result.Content[0].Type) + } + if result.Content[0].ID != "call_123" { + t.Errorf("expected ID 'call_123', got %q", result.Content[0].ID) + } + if result.Content[0].Name != "get_weather" { + t.Errorf("expected name 'get_weather', got %q", result.Content[0].Name) + } + if result.StopReason != "tool_use" { + t.Errorf("expected stop_reason 'tool_use', got %q", result.StopReason) + } +} + +func TestToMessagesResponse_WithThinking(t *testing.T) { + resp := api.ChatResponse{ + Model: "test-model", + Message: api.Message{ + Role: "assistant", + Content: "The answer is 42.", + Thinking: "Let me think about this...", + }, + Done: true, + DoneReason: "stop", + } + + result := ToMessagesResponse("msg_123", resp) + + if len(result.Content) != 2 { + t.Fatalf("expected 2 content blocks, got %d", len(result.Content)) + } + if result.Content[0].Type != "thinking" { + t.Errorf("expected first block type 'thinking', got %q", result.Content[0].Type) + } + if result.Content[0].Thinking != "Let me think about this..." { + t.Errorf("unexpected thinking content: %q", result.Content[0].Thinking) + } + if result.Content[1].Type != "text" { + t.Errorf("expected second block type 'text', got %q", result.Content[1].Type) + } +} + +func TestMapStopReason(t *testing.T) { + tests := []struct { + reason string + hasToolCalls bool + want string + }{ + {"stop", false, "end_turn"}, + {"length", false, "max_tokens"}, + {"stop", true, "tool_use"}, + {"other", false, "stop_sequence"}, + {"", false, ""}, + } + + for _, tt := range tests { + got := mapStopReason(tt.reason, tt.hasToolCalls) + if got != tt.want { + t.Errorf("mapStopReason(%q, %v) = %q, want %q", tt.reason, tt.hasToolCalls, got, tt.want) + } + } +} + +func TestNewError(t *testing.T) { + tests := []struct { + code int + want string + }{ + {400, "invalid_request_error"}, + {401, "authentication_error"}, + {403, "permission_error"}, + {404, "not_found_error"}, + {429, "rate_limit_error"}, + {500, "api_error"}, + {503, "overloaded_error"}, + {529, "overloaded_error"}, + } + + for _, tt := range tests { + result := NewError(tt.code, "test message") + if result.Type != "error" { + t.Errorf("NewError(%d) type = %q, want 'error'", tt.code, result.Type) + } + if result.Error.Type != tt.want { + t.Errorf("NewError(%d) error.type = %q, want %q", tt.code, result.Error.Type, tt.want) + } + if result.Error.Message != "test message" { + t.Errorf("NewError(%d) message = %q, want 'test message'", tt.code, result.Error.Message) + } + if result.RequestID == "" { + t.Errorf("NewError(%d) request_id should not be empty", tt.code) + } + } +} + +func TestGenerateMessageID(t *testing.T) { + id1 := GenerateMessageID() + id2 := GenerateMessageID() + + if id1 == "" { + t.Error("GenerateMessageID returned empty string") + } + if id1 == id2 { + t.Error("GenerateMessageID returned duplicate IDs") + } + if len(id1) < 10 { + t.Errorf("GenerateMessageID returned short ID: %q", id1) + } + if id1[:4] != "msg_" { + t.Errorf("GenerateMessageID should start with 'msg_', got %q", id1[:4]) + } +} + +func TestStreamConverter_Basic(t *testing.T) { + conv := NewStreamConverter("msg_123", "test-model") + + // First chunk + resp1 := api.ChatResponse{ + Model: "test-model", + Message: api.Message{ + Role: "assistant", + Content: "Hello", + }, + Metrics: api.Metrics{PromptEvalCount: 10}, + } + + events1 := conv.Process(resp1) + if len(events1) < 3 { + t.Fatalf("expected at least 3 events for first chunk, got %d", len(events1)) + } + + // Should have message_start, content_block_start, content_block_delta + if events1[0].Event != "message_start" { + t.Errorf("expected first event 'message_start', got %q", events1[0].Event) + } + if events1[1].Event != "content_block_start" { + t.Errorf("expected second event 'content_block_start', got %q", events1[1].Event) + } + if events1[2].Event != "content_block_delta" { + t.Errorf("expected third event 'content_block_delta', got %q", events1[2].Event) + } + + // Final chunk + resp2 := api.ChatResponse{ + Model: "test-model", + Message: api.Message{ + Role: "assistant", + Content: " world!", + }, + Done: true, + DoneReason: "stop", + Metrics: api.Metrics{EvalCount: 5}, + } + + events2 := conv.Process(resp2) + + // Should have content_block_delta, content_block_stop, message_delta, message_stop + hasStop := false + for _, e := range events2 { + if e.Event == "message_stop" { + hasStop = true + } + } + if !hasStop { + t.Error("expected message_stop event in final chunk") + } +} + +func TestStreamConverter_WithToolCalls(t *testing.T) { + conv := NewStreamConverter("msg_123", "test-model") + + resp := api.ChatResponse{ + Model: "test-model", + Message: api.Message{ + Role: "assistant", + ToolCalls: []api.ToolCall{ + { + ID: "call_123", + Function: api.ToolCallFunction{ + Name: "get_weather", + Arguments: map[string]any{"location": "Paris"}, + }, + }, + }, + }, + Done: true, + DoneReason: "stop", + Metrics: api.Metrics{PromptEvalCount: 10, EvalCount: 5}, + } + + events := conv.Process(resp) + + hasToolStart := false + hasToolDelta := false + for _, e := range events { + if e.Event == "content_block_start" { + if start, ok := e.Data.(ContentBlockStartEvent); ok { + if start.ContentBlock.Type == "tool_use" { + hasToolStart = true + } + } + } + if e.Event == "content_block_delta" { + if delta, ok := e.Data.(ContentBlockDeltaEvent); ok { + if delta.Delta.Type == "input_json_delta" { + hasToolDelta = true + } + } + } + } + + if !hasToolStart { + t.Error("expected tool_use content_block_start event") + } + if !hasToolDelta { + t.Error("expected input_json_delta event") + } +} diff --git a/docs/README.md b/docs/README.md index 74544a321..4483eb550 100644 --- a/docs/README.md +++ b/docs/README.md @@ -14,6 +14,7 @@ * [API Reference](https://docs.ollama.com/api) * [Modelfile Reference](https://docs.ollama.com/modelfile) * [OpenAI Compatibility](https://docs.ollama.com/api/openai-compatibility) +* [Anthropic Compatibility](./api/anthropic-compatibility.mdx) ### Resources diff --git a/docs/docs.json b/docs/docs.json index 71a6f17a0..47b865d20 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -139,7 +139,8 @@ "/api/streaming", "/api/usage", "/api/errors", - "/api/openai-compatibility" + "/api/openai-compatibility", + "/api/anthropic-compatibility" ] }, { diff --git a/middleware/anthropic_test.go b/middleware/anthropic_test.go new file mode 100644 index 000000000..81c68cce1 --- /dev/null +++ b/middleware/anthropic_test.go @@ -0,0 +1,487 @@ +package middleware + +import ( + "bytes" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/gin-gonic/gin" + "github.com/google/go-cmp/cmp" + + "github.com/ollama/ollama/anthropic" + "github.com/ollama/ollama/api" +) + +func captureAnthropicRequest(capturedRequest any) gin.HandlerFunc { + return func(c *gin.Context) { + bodyBytes, _ := io.ReadAll(c.Request.Body) + c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes)) + _ = json.Unmarshal(bodyBytes, capturedRequest) + c.Next() + } +} + +func TestAnthropicMessagesMiddleware(t *testing.T) { + type testCase struct { + name string + body string + req api.ChatRequest + err anthropic.ErrorResponse + } + + var capturedRequest *api.ChatRequest + stream := true + + testCases := []testCase{ + { + name: "basic message", + body: `{ + "model": "test-model", + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "Hello"} + ] + }`, + req: api.ChatRequest{ + Model: "test-model", + Messages: []api.Message{ + {Role: "user", Content: "Hello"}, + }, + Options: map[string]any{"num_predict": 1024}, + Stream: &False, + }, + }, + { + name: "with system prompt", + body: `{ + "model": "test-model", + "max_tokens": 1024, + "system": "You are helpful.", + "messages": [ + {"role": "user", "content": "Hello"} + ] + }`, + req: api.ChatRequest{ + Model: "test-model", + Messages: []api.Message{ + {Role: "system", Content: "You are helpful."}, + {Role: "user", Content: "Hello"}, + }, + Options: map[string]any{"num_predict": 1024}, + Stream: &False, + }, + }, + { + name: "with options", + body: `{ + "model": "test-model", + "max_tokens": 2048, + "temperature": 0.7, + "top_p": 0.9, + "top_k": 40, + "stop_sequences": ["\n", "END"], + "messages": [ + {"role": "user", "content": "Hello"} + ] + }`, + req: api.ChatRequest{ + Model: "test-model", + Messages: []api.Message{ + {Role: "user", Content: "Hello"}, + }, + Options: map[string]any{ + "num_predict": 2048, + "temperature": 0.7, + "top_p": 0.9, + "top_k": 40, + "stop": []string{"\n", "END"}, + }, + Stream: &False, + }, + }, + { + name: "streaming", + body: `{ + "model": "test-model", + "max_tokens": 1024, + "stream": true, + "messages": [ + {"role": "user", "content": "Hello"} + ] + }`, + req: api.ChatRequest{ + Model: "test-model", + Messages: []api.Message{ + {Role: "user", Content: "Hello"}, + }, + Options: map[string]any{"num_predict": 1024}, + Stream: &stream, + }, + }, + { + name: "with tools", + body: `{ + "model": "test-model", + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "What's the weather?"} + ], + "tools": [{ + "name": "get_weather", + "description": "Get current weather", + "input_schema": { + "type": "object", + "properties": { + "location": {"type": "string"} + }, + "required": ["location"] + } + }] + }`, + req: api.ChatRequest{ + Model: "test-model", + Messages: []api.Message{ + {Role: "user", Content: "What's the weather?"}, + }, + Tools: []api.Tool{ + { + Type: "function", + Function: api.ToolFunction{ + Name: "get_weather", + Description: "Get current weather", + Parameters: api.ToolFunctionParameters{ + Type: "object", + Required: []string{"location"}, + Properties: map[string]api.ToolProperty{ + "location": {Type: api.PropertyType{"string"}}, + }, + }, + }, + }, + }, + Options: map[string]any{"num_predict": 1024}, + Stream: &False, + }, + }, + { + name: "with tool result", + body: `{ + "model": "test-model", + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "What's the weather?"}, + {"role": "assistant", "content": [ + {"type": "tool_use", "id": "call_123", "name": "get_weather", "input": {"location": "Paris"}} + ]}, + {"role": "user", "content": [ + {"type": "tool_result", "tool_use_id": "call_123", "content": "Sunny, 22°C"} + ]} + ] + }`, + req: api.ChatRequest{ + Model: "test-model", + Messages: []api.Message{ + {Role: "user", Content: "What's the weather?"}, + { + Role: "assistant", + ToolCalls: []api.ToolCall{ + { + ID: "call_123", + Function: api.ToolCallFunction{ + Name: "get_weather", + Arguments: api.ToolCallFunctionArguments{"location": "Paris"}, + }, + }, + }, + }, + {Role: "tool", Content: "Sunny, 22°C", ToolCallID: "call_123"}, + }, + Options: map[string]any{"num_predict": 1024}, + Stream: &False, + }, + }, + { + name: "with thinking enabled", + body: `{ + "model": "test-model", + "max_tokens": 1024, + "thinking": {"type": "enabled", "budget_tokens": 1000}, + "messages": [ + {"role": "user", "content": "Hello"} + ] + }`, + req: api.ChatRequest{ + Model: "test-model", + Messages: []api.Message{ + {Role: "user", Content: "Hello"}, + }, + Options: map[string]any{"num_predict": 1024}, + Stream: &False, + Think: &api.ThinkValue{Value: true}, + }, + }, + { + name: "missing model error", + body: `{ + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "Hello"} + ] + }`, + err: anthropic.ErrorResponse{ + Type: "error", + Error: anthropic.Error{ + Type: "invalid_request_error", + Message: "model is required", + }, + }, + }, + { + name: "missing max_tokens error", + body: `{ + "model": "test-model", + "messages": [ + {"role": "user", "content": "Hello"} + ] + }`, + err: anthropic.ErrorResponse{ + Type: "error", + Error: anthropic.Error{ + Type: "invalid_request_error", + Message: "max_tokens is required and must be positive", + }, + }, + }, + { + name: "missing messages error", + body: `{ + "model": "test-model", + "max_tokens": 1024 + }`, + err: anthropic.ErrorResponse{ + Type: "error", + Error: anthropic.Error{ + Type: "invalid_request_error", + Message: "messages is required", + }, + }, + }, + { + name: "tool_use missing id error", + body: `{ + "model": "test-model", + "max_tokens": 1024, + "messages": [ + {"role": "assistant", "content": [ + {"type": "tool_use", "name": "test"} + ]} + ] + }`, + err: anthropic.ErrorResponse{ + Type: "error", + Error: anthropic.Error{ + Type: "invalid_request_error", + Message: "tool_use block missing required 'id' field", + }, + }, + }, + } + + endpoint := func(c *gin.Context) { + c.Status(http.StatusOK) + } + + gin.SetMode(gin.TestMode) + router := gin.New() + router.Use(AnthropicMessagesMiddleware(), captureAnthropicRequest(&capturedRequest)) + router.Handle(http.MethodPost, "/v1/messages", endpoint) + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + req, _ := http.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(tc.body)) + req.Header.Set("Content-Type", "application/json") + + defer func() { capturedRequest = nil }() + + resp := httptest.NewRecorder() + router.ServeHTTP(resp, req) + + if tc.err.Type != "" { + // Expect error + if resp.Code == http.StatusOK { + t.Fatalf("expected error response, got 200 OK") + } + var errResp anthropic.ErrorResponse + if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil { + t.Fatalf("failed to unmarshal error: %v", err) + } + if errResp.Type != tc.err.Type { + t.Errorf("expected error type %q, got %q", tc.err.Type, errResp.Type) + } + if errResp.Error.Type != tc.err.Error.Type { + t.Errorf("expected error.type %q, got %q", tc.err.Error.Type, errResp.Error.Type) + } + if errResp.Error.Message != tc.err.Error.Message { + t.Errorf("expected error.message %q, got %q", tc.err.Error.Message, errResp.Error.Message) + } + return + } + + if resp.Code != http.StatusOK { + t.Fatalf("unexpected status code: %d, body: %s", resp.Code, resp.Body.String()) + } + + if capturedRequest == nil { + t.Fatal("request was not captured") + } + + // Compare relevant fields + if capturedRequest.Model != tc.req.Model { + t.Errorf("model mismatch: got %q, want %q", capturedRequest.Model, tc.req.Model) + } + + if diff := cmp.Diff(tc.req.Messages, capturedRequest.Messages); diff != "" { + t.Errorf("messages mismatch (-want +got):\n%s", diff) + } + + if tc.req.Stream != nil && capturedRequest.Stream != nil { + if *tc.req.Stream != *capturedRequest.Stream { + t.Errorf("stream mismatch: got %v, want %v", *capturedRequest.Stream, *tc.req.Stream) + } + } + + if tc.req.Think != nil { + if capturedRequest.Think == nil { + t.Error("expected Think to be set") + } else if capturedRequest.Think.Value != tc.req.Think.Value { + t.Errorf("Think mismatch: got %v, want %v", capturedRequest.Think.Value, tc.req.Think.Value) + } + } + }) + } +} + +func TestAnthropicMessagesMiddleware_Headers(t *testing.T) { + gin.SetMode(gin.TestMode) + + t.Run("streaming sets correct headers", func(t *testing.T) { + router := gin.New() + router.Use(AnthropicMessagesMiddleware()) + router.POST("/v1/messages", func(c *gin.Context) { + // Check headers were set + if c.Writer.Header().Get("Content-Type") != "text/event-stream" { + t.Errorf("expected Content-Type text/event-stream, got %q", c.Writer.Header().Get("Content-Type")) + } + if c.Writer.Header().Get("Cache-Control") != "no-cache" { + t.Errorf("expected Cache-Control no-cache, got %q", c.Writer.Header().Get("Cache-Control")) + } + c.Status(http.StatusOK) + }) + + body := `{"model": "test", "max_tokens": 100, "stream": true, "messages": [{"role": "user", "content": "Hi"}]}` + req, _ := http.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + + resp := httptest.NewRecorder() + router.ServeHTTP(resp, req) + }) +} + +func TestAnthropicMessagesMiddleware_InvalidJSON(t *testing.T) { + gin.SetMode(gin.TestMode) + router := gin.New() + router.Use(AnthropicMessagesMiddleware()) + router.POST("/v1/messages", func(c *gin.Context) { + c.Status(http.StatusOK) + }) + + req, _ := http.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(`{invalid json`)) + req.Header.Set("Content-Type", "application/json") + + resp := httptest.NewRecorder() + router.ServeHTTP(resp, req) + + if resp.Code != http.StatusBadRequest { + t.Errorf("expected status 400, got %d", resp.Code) + } + + var errResp anthropic.ErrorResponse + if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil { + t.Fatalf("failed to unmarshal error: %v", err) + } + + if errResp.Type != "error" { + t.Errorf("expected type 'error', got %q", errResp.Type) + } + if errResp.Error.Type != "invalid_request_error" { + t.Errorf("expected error type 'invalid_request_error', got %q", errResp.Error.Type) + } +} + +func TestAnthropicWriter_NonStreaming(t *testing.T) { + gin.SetMode(gin.TestMode) + + router := gin.New() + router.Use(AnthropicMessagesMiddleware()) + router.POST("/v1/messages", func(c *gin.Context) { + // Simulate Ollama response + resp := api.ChatResponse{ + Model: "test-model", + Message: api.Message{ + Role: "assistant", + Content: "Hello there!", + }, + Done: true, + DoneReason: "stop", + Metrics: api.Metrics{ + PromptEvalCount: 10, + EvalCount: 5, + }, + } + data, _ := json.Marshal(resp) + c.Writer.WriteHeader(http.StatusOK) + _, _ = c.Writer.Write(data) + }) + + body := `{"model": "test-model", "max_tokens": 100, "messages": [{"role": "user", "content": "Hi"}]}` + req, _ := http.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + + resp := httptest.NewRecorder() + router.ServeHTTP(resp, req) + + if resp.Code != http.StatusOK { + t.Fatalf("expected status 200, got %d", resp.Code) + } + + var result anthropic.MessagesResponse + if err := json.Unmarshal(resp.Body.Bytes(), &result); err != nil { + t.Fatalf("failed to unmarshal response: %v", err) + } + + if result.Type != "message" { + t.Errorf("expected type 'message', got %q", result.Type) + } + if result.Role != "assistant" { + t.Errorf("expected role 'assistant', got %q", result.Role) + } + if len(result.Content) != 1 { + t.Fatalf("expected 1 content block, got %d", len(result.Content)) + } + if result.Content[0].Text != "Hello there!" { + t.Errorf("expected text 'Hello there!', got %q", result.Content[0].Text) + } + if result.StopReason != "end_turn" { + t.Errorf("expected stop_reason 'end_turn', got %q", result.StopReason) + } + if result.Usage.InputTokens != 10 { + t.Errorf("expected input_tokens 10, got %d", result.Usage.InputTokens) + } + if result.Usage.OutputTokens != 5 { + t.Errorf("expected output_tokens 5, got %d", result.Usage.OutputTokens) + } +} From ed1e17bb351a950bdc26944321e9fafd600a468a Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Sun, 4 Jan 2026 22:53:11 -0800 Subject: [PATCH 03/13] anthropic: fix error handling and update docs - Add proper error handling for JSON marshal in StreamConverter to prevent corrupted streams when tool arguments cannot be serialized - Add tests for unmarshalable arguments and mixed validity scenarios - Fix documentation typo and update recommended models to qwen3-coder --- anthropic/anthropic.go | 10 ++- anthropic/anthropic_test.go | 110 +++++++++++++++++++++++++++ docs/api/anthropic-compatibility.mdx | 59 ++++++++++---- 3 files changed, 162 insertions(+), 17 deletions(-) diff --git a/anthropic/anthropic.go b/anthropic/anthropic.go index ef0bdd953..e13612ac3 100644 --- a/anthropic/anthropic.go +++ b/anthropic/anthropic.go @@ -1,4 +1,3 @@ -// Package anthropic provides core transformation logic for compatibility with the Anthropic Messages API package anthropic import ( @@ -7,6 +6,7 @@ import ( "encoding/json" "errors" "fmt" + "log/slog" "net/http" "strings" "time" @@ -673,6 +673,13 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent { c.textStarted = false } + // Marshal arguments first to check for errors before starting block + argsJSON, err := json.Marshal(tc.Function.Arguments) + if err != nil { + slog.Error("failed to marshal tool arguments", "error", err, "tool_id", tc.ID) + continue + } + // Start tool use block events = append(events, StreamEvent{ Event: "content_block_start", @@ -689,7 +696,6 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent { }) // Send input as JSON delta - argsJSON, _ := json.Marshal(tc.Function.Arguments) events = append(events, StreamEvent{ Event: "content_block_delta", Data: ContentBlockDeltaEvent{ diff --git a/anthropic/anthropic_test.go b/anthropic/anthropic_test.go index 31a2ec67c..e331f6dce 100644 --- a/anthropic/anthropic_test.go +++ b/anthropic/anthropic_test.go @@ -665,3 +665,113 @@ func TestStreamConverter_WithToolCalls(t *testing.T) { t.Error("expected input_json_delta event") } } + +func TestStreamConverter_ToolCallWithUnmarshalableArgs(t *testing.T) { + // Test that unmarshalable arguments (like channels) are handled gracefully + // and don't cause a panic or corrupt stream + conv := NewStreamConverter("msg_123", "test-model") + + // Create a channel which cannot be JSON marshaled + unmarshalable := make(chan int) + + resp := api.ChatResponse{ + Model: "test-model", + Message: api.Message{ + Role: "assistant", + ToolCalls: []api.ToolCall{ + { + ID: "call_bad", + Function: api.ToolCallFunction{ + Name: "bad_function", + Arguments: map[string]any{"channel": unmarshalable}, + }, + }, + }, + }, + Done: true, + DoneReason: "stop", + } + + // Should not panic and should skip the unmarshalable tool call + events := conv.Process(resp) + + // Verify no tool_use block was started (since marshal failed before block start) + hasToolStart := false + for _, e := range events { + if e.Event == "content_block_start" { + if start, ok := e.Data.(ContentBlockStartEvent); ok { + if start.ContentBlock.Type == "tool_use" { + hasToolStart = true + } + } + } + } + + if hasToolStart { + t.Error("expected no tool_use block when arguments cannot be marshaled") + } +} + +func TestStreamConverter_MultipleToolCallsWithMixedValidity(t *testing.T) { + // Test that valid tool calls still work when mixed with invalid ones + conv := NewStreamConverter("msg_123", "test-model") + + unmarshalable := make(chan int) + + resp := api.ChatResponse{ + Model: "test-model", + Message: api.Message{ + Role: "assistant", + ToolCalls: []api.ToolCall{ + { + ID: "call_good", + Function: api.ToolCallFunction{ + Name: "good_function", + Arguments: map[string]any{"location": "Paris"}, + }, + }, + { + ID: "call_bad", + Function: api.ToolCallFunction{ + Name: "bad_function", + Arguments: map[string]any{"channel": unmarshalable}, + }, + }, + }, + }, + Done: true, + DoneReason: "stop", + } + + events := conv.Process(resp) + + // Count tool_use blocks - should only have 1 (the valid one) + toolStartCount := 0 + toolDeltaCount := 0 + for _, e := range events { + if e.Event == "content_block_start" { + if start, ok := e.Data.(ContentBlockStartEvent); ok { + if start.ContentBlock.Type == "tool_use" { + toolStartCount++ + if start.ContentBlock.Name != "good_function" { + t.Errorf("expected tool name 'good_function', got %q", start.ContentBlock.Name) + } + } + } + } + if e.Event == "content_block_delta" { + if delta, ok := e.Data.(ContentBlockDeltaEvent); ok { + if delta.Delta.Type == "input_json_delta" { + toolDeltaCount++ + } + } + } + } + + if toolStartCount != 1 { + t.Errorf("expected 1 tool_use block, got %d", toolStartCount) + } + if toolDeltaCount != 1 { + t.Errorf("expected 1 input_json_delta, got %d", toolDeltaCount) + } +} diff --git a/docs/api/anthropic-compatibility.mdx b/docs/api/anthropic-compatibility.mdx index b8953d1d8..67c266b17 100644 --- a/docs/api/anthropic-compatibility.mdx +++ b/docs/api/anthropic-compatibility.mdx @@ -4,6 +4,16 @@ title: Anthropic compatibility Ollama provides compatibility with the [Anthropic Messages API](https://docs.anthropic.com/en/api/messages) to help connect existing applications to Ollama, including tools like Claude Code. +## Recommended models + +For coding use cases, models like `qwen3-coder` are recommended. + +Pull a model before use: +```shell +ollama pull qwen3-coder +ollama pull glm-4.7:cloud +``` + ## Usage ### Environment variables @@ -28,7 +38,7 @@ client = anthropic.Anthropic( ) message = client.messages.create( - model='llama3.2:3b', + model='qwen3-coder', max_tokens=1024, messages=[ {'role': 'user', 'content': 'Hello, how are you?'} @@ -46,7 +56,7 @@ const anthropic = new Anthropic({ }); const message = await anthropic.messages.create({ - model: "llama3.2:3b", + model: "qwen3-coder", max_tokens: 1024, messages: [{ role: "user", content: "Hello, how are you?" }], }); @@ -60,7 +70,7 @@ curl -X POST http://localhost:11434/v1/messages \ -H "x-api-key: ollama" \ -H "anthropic-version: 2023-06-01" \ -d '{ - "model": "llama3.2:3b", + "model": "qwen3-coder", "max_tokens": 1024, "messages": [{ "role": "user", "content": "Hello, how are you?" }] }' @@ -81,7 +91,7 @@ client = anthropic.Anthropic( ) with client.messages.stream( - model='llama3.2:3b', + model='qwen3-coder', max_tokens=1024, messages=[{'role': 'user', 'content': 'Count from 1 to 10'}] ) as stream: @@ -98,7 +108,7 @@ const anthropic = new Anthropic({ }); const stream = await anthropic.messages.stream({ - model: "llama3.2:3b", + model: "qwen3-coder", max_tokens: 1024, messages: [{ role: "user", content: "Count from 1 to 10" }], }); @@ -117,7 +127,7 @@ for await (const event of stream) { curl -X POST http://localhost:11434/v1/messages \ -H "Content-Type: application/json" \ -d '{ - "model": "llama3.2:3b", + "model": "qwen3-coder", "max_tokens": 1024, "stream": true, "messages": [{ "role": "user", "content": "Count from 1 to 10" }] @@ -139,7 +149,7 @@ client = anthropic.Anthropic( ) message = client.messages.create( - model='llama3.2:3b', + model='qwen3-coder', max_tokens=1024, tools=[ { @@ -170,7 +180,7 @@ for block in message.content: curl -X POST http://localhost:11434/v1/messages \ -H "Content-Type: application/json" \ -d '{ - "model": "llama3.2:3b", + "model": "qwen3-coder", "max_tokens": 1024, "tools": [ { @@ -199,7 +209,7 @@ curl -X POST http://localhost:11434/v1/messages \ [Claude Code](https://docs.anthropic.com/en/docs/claude-code) can be configured to use Ollama as its backend: ```shell -ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model llama3.2:3b +ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3-coder ``` Or set the environment variables in your shell profile: @@ -212,9 +222,13 @@ export ANTHROPIC_API_KEY=ollama Then run Claude Code with any Ollama model: ```shell -claude --model llama3.2:3b -claude --model qwen3:8b -claude --model deepseek-r1:14b +# Local models +claude --model qwen3-coder +claude --model gpt-oss:20b + +# Cloud models +claude --model glm-4.7:cloud +claude --model minimax-m2.1:cloud ``` ## Endpoints @@ -277,18 +291,33 @@ claude --model deepseek-r1:14b ## Models -Before using a model, pull it locally with `ollama pull`: +Ollama supports both local and cloud models. + +### Local models + +Pull a local model before use: ```shell -ollama pull llama3.2:3b +ollama pull qwen3-coder ``` +Recommended local models: +- `qwen3-coder` - Excellent for coding tasks +- `gpt-oss:20b` - Strong general-purpose model + +### Cloud models + +Cloud models are available immediately without pulling: + +- `glm-4.7:cloud` - High-performance cloud model +- `minimax-m2.1:cloud` - Fast cloud model + ### Default model names For tooling that relies on default Anthropic model names such as `claude-3-5-sonnet`, use `ollama cp` to copy an existing model name: ```shell -ollama cp llama3.2:3b claude-3-5-sonnet +ollama cp qwen3-coder claude-3-5-sonnet ``` Afterwards, this new model name can be specified in the `model` field: From 90cf232df2bbdb09b0751086dc77129c0f34c071 Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Sun, 4 Jan 2026 23:11:01 -0800 Subject: [PATCH 04/13] anthropic: remove redundant comments Remove obvious comments that don't add value (e.g., "// Convert messages", "// Handle done"). Keep godoc comments and those explaining API mappings. --- anthropic/anthropic.go | 27 ++------------------------- middleware/anthropic.go | 4 ---- 2 files changed, 2 insertions(+), 29 deletions(-) diff --git a/anthropic/anthropic.go b/anthropic/anthropic.go index e13612ac3..28112ac0f 100644 --- a/anthropic/anthropic.go +++ b/anthropic/anthropic.go @@ -232,7 +232,6 @@ type StreamErrorEvent struct { func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) { var messages []api.Message - // Handle system prompt if r.System != nil { switch sys := r.System.(type) { case string: @@ -257,7 +256,6 @@ func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) { } } - // Convert messages for _, msg := range r.Messages { converted, err := convertMessage(msg) if err != nil { @@ -266,7 +264,6 @@ func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) { messages = append(messages, converted...) } - // Build options options := make(map[string]any) options["num_predict"] = r.MaxTokens @@ -287,7 +284,6 @@ func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) { options["stop"] = r.StopSequences } - // Convert tools var tools api.Tools for _, t := range r.Tools { tool, err := convertTool(t) @@ -297,7 +293,6 @@ func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) { tools = append(tools, tool) } - // Handle thinking var think *api.ThinkValue if r.Thinking != nil && r.Thinking.Type == "enabled" { think = &api.ThinkValue{Value: true} @@ -325,7 +320,6 @@ func convertMessage(msg MessageParam) ([]api.Message, error) { messages = append(messages, api.Message{Role: role, Content: content}) case []any: - // Handle array of content blocks var textContent strings.Builder var images []api.ImageData var toolCalls []api.ToolCall @@ -360,6 +354,8 @@ func convertMessage(msg MessageParam) ([]api.Message, error) { return nil, fmt.Errorf("invalid base64 image data: %w", err) } images = append(images, decoded) + } else { + return nil, fmt.Errorf("invalid image source type: %s. Only base64 images are supported.", sourceType) } // URL images would need to be fetched - skip for now @@ -391,7 +387,6 @@ func convertMessage(msg MessageParam) ([]api.Message, error) { case string: resultContent = c case []any: - // Extract text from content blocks for _, cb := range c { if cbMap, ok := cb.(map[string]any); ok { if cbMap["type"] == "text" { @@ -416,7 +411,6 @@ func convertMessage(msg MessageParam) ([]api.Message, error) { } } - // Build the main message if textContent.Len() > 0 || len(images) > 0 || len(toolCalls) > 0 { m := api.Message{ Role: role, @@ -461,7 +455,6 @@ func convertTool(t Tool) (api.Tool, error) { func ToMessagesResponse(id string, r api.ChatResponse) MessagesResponse { var content []ContentBlock - // Add thinking block if present if r.Message.Thinking != "" { content = append(content, ContentBlock{ Type: "thinking", @@ -469,7 +462,6 @@ func ToMessagesResponse(id string, r api.ChatResponse) MessagesResponse { }) } - // Add text content if present if r.Message.Content != "" { content = append(content, ContentBlock{ Type: "text", @@ -477,7 +469,6 @@ func ToMessagesResponse(id string, r api.ChatResponse) MessagesResponse { }) } - // Add tool use blocks for _, tc := range r.Message.ToolCalls { content = append(content, ContentBlock{ Type: "tool_use", @@ -487,7 +478,6 @@ func ToMessagesResponse(id string, r api.ChatResponse) MessagesResponse { }) } - // Map stop reason stopReason := mapStopReason(r.DoneReason, len(r.Message.ToolCalls) > 0) return MessagesResponse{ @@ -537,7 +527,6 @@ type StreamConverter struct { toolCallsSent map[string]bool } -// NewStreamConverter creates a new StreamConverter func NewStreamConverter(id, model string) *StreamConverter { return &StreamConverter{ ID: id, @@ -557,7 +546,6 @@ type StreamEvent struct { func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent { var events []StreamEvent - // First write: emit message_start if c.firstWrite { c.firstWrite = false c.inputTokens = r.Metrics.PromptEvalCount @@ -581,7 +569,6 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent { }) } - // Handle thinking content if r.Message.Thinking != "" && !c.thinkingDone { if !c.thinkingStarted { c.thinkingStarted = true @@ -611,9 +598,7 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent { }) } - // Handle text content if r.Message.Content != "" { - // Close thinking block if it was open if c.thinkingStarted && !c.thinkingDone { c.thinkingDone = true events = append(events, StreamEvent{ @@ -654,13 +639,11 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent { }) } - // Handle tool calls for _, tc := range r.Message.ToolCalls { if c.toolCallsSent[tc.ID] { continue } - // Close any previous block if c.textStarted { events = append(events, StreamEvent{ Event: "content_block_stop", @@ -673,14 +656,12 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent { c.textStarted = false } - // Marshal arguments first to check for errors before starting block argsJSON, err := json.Marshal(tc.Function.Arguments) if err != nil { slog.Error("failed to marshal tool arguments", "error", err, "tool_id", tc.ID) continue } - // Start tool use block events = append(events, StreamEvent{ Event: "content_block_start", Data: ContentBlockStartEvent{ @@ -695,7 +676,6 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent { }, }) - // Send input as JSON delta events = append(events, StreamEvent{ Event: "content_block_delta", Data: ContentBlockDeltaEvent{ @@ -708,7 +688,6 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent { }, }) - // Close tool use block events = append(events, StreamEvent{ Event: "content_block_stop", Data: ContentBlockStopEvent{ @@ -721,9 +700,7 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent { c.contentIndex++ } - // Handle done if r.Done { - // Close any open block if c.textStarted { events = append(events, StreamEvent{ Event: "content_block_stop", diff --git a/middleware/anthropic.go b/middleware/anthropic.go index a5f0ed8d6..533b0de1e 100644 --- a/middleware/anthropic.go +++ b/middleware/anthropic.go @@ -72,7 +72,6 @@ func (w *AnthropicWriter) writeResponse(data []byte) (int, error) { return len(data), nil } - // Non-streaming response w.ResponseWriter.Header().Set("Content-Type", "application/json") response := anthropic.ToMessagesResponse(w.id, chatResponse) return len(data), json.NewEncoder(w.ResponseWriter).Encode(response) @@ -97,7 +96,6 @@ func AnthropicMessagesMiddleware() gin.HandlerFunc { return } - // Validate required fields if req.Model == "" { c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, "model is required")) return @@ -113,7 +111,6 @@ func AnthropicMessagesMiddleware() gin.HandlerFunc { return } - // Convert to internal format chatReq, err := anthropic.FromMessagesRequest(req) if err != nil { c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, err.Error())) @@ -138,7 +135,6 @@ func AnthropicMessagesMiddleware() gin.HandlerFunc { converter: anthropic.NewStreamConverter(messageID, req.Model), } - // Set headers based on streaming mode if req.Stream { c.Writer.Header().Set("Content-Type", "text/event-stream") c.Writer.Header().Set("Cache-Control", "no-cache") From 5ba2092f0ae038ce3a7480d3dfaed1fc5f780e95 Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Sun, 4 Jan 2026 23:48:46 -0800 Subject: [PATCH 05/13] anthropic: fix streaming with SDK by including empty fields Remove omitempty from Text and Thinking fields in ContentBlock struct. The Anthropic SDK requires these fields to be present (even if empty) in content_block_start events to properly accumulate streaming deltas. --- anthropic/anthropic.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/anthropic/anthropic.go b/anthropic/anthropic.go index 28112ac0f..3a1d28099 100644 --- a/anthropic/anthropic.go +++ b/anthropic/anthropic.go @@ -82,8 +82,8 @@ type MessageParam struct { type ContentBlock struct { Type string `json:"type"` // text, image, tool_use, tool_result, thinking - // For text blocks - Text string `json:"text,omitempty"` + // For text blocks (no omitempty - SDK requires field to be present for accumulation) + Text string `json:"text"` // For image blocks Source *ImageSource `json:"source,omitempty"` @@ -98,8 +98,8 @@ type ContentBlock struct { Content any `json:"content,omitempty"` // string or []ContentBlock IsError bool `json:"is_error,omitempty"` - // For thinking blocks - Thinking string `json:"thinking,omitempty"` + // For thinking blocks (no omitempty - SDK requires field to be present for accumulation) + Thinking string `json:"thinking"` Signature string `json:"signature,omitempty"` } From b44d9b3347921ca7dd811cce2cc62d1c5891ff10 Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Sun, 4 Jan 2026 23:50:59 -0800 Subject: [PATCH 06/13] anthropic: add tests for SDK-required empty fields Add tests documenting that Text and Thinking fields must be present in JSON output even when empty. The Anthropic SDK requires these fields in content_block_start events to accumulate streaming deltas properly. Tests verify: - ContentBlock JSON includes empty text/thinking fields - StreamConverter emits content_block_start with required fields --- anthropic/anthropic_test.go | 128 ++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/anthropic/anthropic_test.go b/anthropic/anthropic_test.go index e331f6dce..2c4f5a68a 100644 --- a/anthropic/anthropic_test.go +++ b/anthropic/anthropic_test.go @@ -775,3 +775,131 @@ func TestStreamConverter_MultipleToolCallsWithMixedValidity(t *testing.T) { t.Errorf("expected 1 input_json_delta, got %d", toolDeltaCount) } } + +// TestContentBlockJSON_EmptyFieldsPresent verifies that empty text and thinking fields +// are serialized in JSON output. The Anthropic SDK requires these fields to be present +// (even when empty) in content_block_start events to properly accumulate streaming deltas. +// Without these fields, the SDK throws: "TypeError: unsupported operand type(s) for +=: 'NoneType' and 'str'" +func TestContentBlockJSON_EmptyFieldsPresent(t *testing.T) { + tests := []struct { + name string + block ContentBlock + wantKeys []string + }{ + { + name: "text block includes empty text field", + block: ContentBlock{ + Type: "text", + Text: "", + }, + wantKeys: []string{"type", "text"}, + }, + { + name: "thinking block includes empty thinking field", + block: ContentBlock{ + Type: "thinking", + Thinking: "", + }, + wantKeys: []string{"type", "thinking"}, + }, + { + name: "text block with content", + block: ContentBlock{ + Type: "text", + Text: "hello", + }, + wantKeys: []string{"type", "text"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + data, err := json.Marshal(tt.block) + if err != nil { + t.Fatalf("failed to marshal: %v", err) + } + + var result map[string]any + if err := json.Unmarshal(data, &result); err != nil { + t.Fatalf("failed to unmarshal: %v", err) + } + + for _, key := range tt.wantKeys { + if _, ok := result[key]; !ok { + t.Errorf("expected key %q to be present in JSON output, got: %s", key, string(data)) + } + } + }) + } +} + +// TestStreamConverter_ContentBlockStartIncludesEmptyFields verifies that content_block_start +// events include the required empty fields for SDK compatibility. +func TestStreamConverter_ContentBlockStartIncludesEmptyFields(t *testing.T) { + t.Run("text block start includes empty text", func(t *testing.T) { + conv := NewStreamConverter("msg_123", "test-model") + + resp := api.ChatResponse{ + Model: "test-model", + Message: api.Message{Role: "assistant", Content: "hello"}, + } + + events := conv.Process(resp) + + var foundTextStart bool + for _, e := range events { + if e.Event == "content_block_start" { + if start, ok := e.Data.(ContentBlockStartEvent); ok { + if start.ContentBlock.Type == "text" { + foundTextStart = true + // Marshal and verify the text field is present + data, _ := json.Marshal(start) + var result map[string]any + json.Unmarshal(data, &result) + cb := result["content_block"].(map[string]any) + if _, ok := cb["text"]; !ok { + t.Error("content_block_start for text should include 'text' field") + } + } + } + } + } + + if !foundTextStart { + t.Error("expected text content_block_start event") + } + }) + + t.Run("thinking block start includes empty thinking", func(t *testing.T) { + conv := NewStreamConverter("msg_123", "test-model") + + resp := api.ChatResponse{ + Model: "test-model", + Message: api.Message{Role: "assistant", Thinking: "let me think..."}, + } + + events := conv.Process(resp) + + var foundThinkingStart bool + for _, e := range events { + if e.Event == "content_block_start" { + if start, ok := e.Data.(ContentBlockStartEvent); ok { + if start.ContentBlock.Type == "thinking" { + foundThinkingStart = true + data, _ := json.Marshal(start) + var result map[string]any + json.Unmarshal(data, &result) + cb := result["content_block"].(map[string]any) + if _, ok := cb["thinking"]; !ok { + t.Error("content_block_start for thinking should include 'thinking' field") + } + } + } + } + } + + if !foundThinkingStart { + t.Error("expected thinking content_block_start event") + } + }) +} From 515c46c176a2981acd02c1578c4499fa26b5dbb7 Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Mon, 5 Jan 2026 00:32:07 -0800 Subject: [PATCH 07/13] docs: add Claude Code integration guide --- docs/api/anthropic-compatibility.mdx | 4 +- docs/docs.json | 9 +++- docs/integrations/claude-code.mdx | 69 ++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 4 deletions(-) create mode 100644 docs/integrations/claude-code.mdx diff --git a/docs/api/anthropic-compatibility.mdx b/docs/api/anthropic-compatibility.mdx index 67c266b17..61f79f139 100644 --- a/docs/api/anthropic-compatibility.mdx +++ b/docs/api/anthropic-compatibility.mdx @@ -6,7 +6,7 @@ Ollama provides compatibility with the [Anthropic Messages API](https://docs.ant ## Recommended models -For coding use cases, models like `qwen3-coder` are recommended. +For coding use cases, models like `glm-4.7:cloud`, `minimax-m2.1:cloud`, and `qwen3-coder` are recommended. Pull a model before use: ```shell @@ -206,7 +206,7 @@ curl -X POST http://localhost:11434/v1/messages \ ## Using with Claude Code -[Claude Code](https://docs.anthropic.com/en/docs/claude-code) can be configured to use Ollama as its backend: +[Claude Code](https://code.claude.com/docs/en/overview) can be configured to use Ollama as its backend: ```shell ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3-coder diff --git a/docs/docs.json b/docs/docs.json index 47b865d20..810e94733 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -32,7 +32,9 @@ "codeblocks": "system" }, "contextual": { - "options": ["copy"] + "options": [ + "copy" + ] }, "navbar": { "links": [ @@ -52,7 +54,9 @@ "display": "simple" }, "examples": { - "languages": ["curl"] + "languages": [ + "curl" + ] } }, "redirects": [ @@ -97,6 +101,7 @@ { "group": "Integrations", "pages": [ + "/integrations/claude-code", "/integrations/vscode", "/integrations/jetbrains", "/integrations/codex", diff --git a/docs/integrations/claude-code.mdx b/docs/integrations/claude-code.mdx new file mode 100644 index 000000000..6d1d8322a --- /dev/null +++ b/docs/integrations/claude-code.mdx @@ -0,0 +1,69 @@ +--- +title: Claude Code +--- + +## Install + +Install [Claude Code](https://code.claude.com/docs/en/overview): + + + +```shell macOS / Linux +curl -fsSL https://claude.ai/install.sh | bash +``` + +```powershell Windows +irm https://claude.ai/install.ps1 | iex +``` + + + +## Usage with Ollama + +Claude Code connects to Ollama using the Anthropic-compatible API. + +1. Set the environment variables: + +```shell +export ANTHROPIC_BASE_URL=http://localhost:11434 +export ANTHROPIC_API_KEY=ollama +``` + +2. Run Claude Code with an Ollama model: + +```shell +claude --model qwen3-coder +``` + +Or run with environment variables inline: + +```shell +ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3-coder +``` + +## Connecting to ollama.com + +1. Create an [API key](https://ollama.com/settings/keys) on ollama.com +2. Set the environment variables: + +```shell +export ANTHROPIC_BASE_URL=https://ollama.com +export ANTHROPIC_API_KEY= +``` + +3. Run Claude Code with a cloud model: + +```shell +claude --model glm-4.7:cloud +``` + +## Recommended Models + +### Cloud models +- `glm-4.7:cloud` - High-performance cloud model +- `minimax-m2.1:cloud` - Fast cloud model +- `qwen3-coder:480b` - Large coding model + +### Local models +- `qwen3-coder` - Excellent for coding tasks +- `gpt-oss:20b` - Strong general-purpose model From 6188e90aab0d88976ef28999c019632a45c9c750 Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Mon, 5 Jan 2026 00:43:02 -0800 Subject: [PATCH 08/13] anthropic: preserve messages with only thinking content Fix edge case where messages containing only a thinking block (no text, images, or tool calls) would be dropped. Add thinking != "" to the condition that creates messages from content blocks. --- anthropic/anthropic.go | 2 +- anthropic/anthropic_test.go | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/anthropic/anthropic.go b/anthropic/anthropic.go index 3a1d28099..bd1465792 100644 --- a/anthropic/anthropic.go +++ b/anthropic/anthropic.go @@ -411,7 +411,7 @@ func convertMessage(msg MessageParam) ([]api.Message, error) { } } - if textContent.Len() > 0 || len(images) > 0 || len(toolCalls) > 0 { + if textContent.Len() > 0 || len(images) > 0 || len(toolCalls) > 0 || thinking != "" { m := api.Message{ Role: role, Content: textContent.String(), diff --git a/anthropic/anthropic_test.go b/anthropic/anthropic_test.go index 2c4f5a68a..3cebe6710 100644 --- a/anthropic/anthropic_test.go +++ b/anthropic/anthropic_test.go @@ -312,6 +312,41 @@ func TestFromMessagesRequest_WithThinking(t *testing.T) { } } +// TestFromMessagesRequest_ThinkingOnlyBlock verifies that messages containing only +// a thinking block (no text, images, or tool calls) are preserved and not dropped. +func TestFromMessagesRequest_ThinkingOnlyBlock(t *testing.T) { + req := MessagesRequest{ + Model: "test-model", + MaxTokens: 1024, + Messages: []MessageParam{ + {Role: "user", Content: "Hello"}, + { + Role: "assistant", + Content: []any{ + map[string]any{ + "type": "thinking", + "thinking": "Let me think about this...", + }, + }, + }, + }, + } + + result, err := FromMessagesRequest(req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(result.Messages) != 2 { + t.Fatalf("expected 2 messages, got %d", len(result.Messages)) + } + + assistantMsg := result.Messages[1] + if assistantMsg.Thinking != "Let me think about this..." { + t.Errorf("expected thinking content, got %q", assistantMsg.Thinking) + } +} + func TestFromMessagesRequest_ToolUseMissingID(t *testing.T) { req := MessagesRequest{ Model: "test-model", From fa42204da87570b23ca6bb07d2c022582c555012 Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Mon, 5 Jan 2026 12:18:04 -0800 Subject: [PATCH 09/13] anthropic: use pointer types for Text and Thinking fields Use *string instead of string for Text and Thinking fields in ContentBlock so that omitempty works correctly: - nil pointer: field omitted from JSON (for blocks that don't use it) - ptr(""): field present as "" (for SDK streaming accumulation) - ptr("content"): field present with content This keeps the JSON output clean (text blocks don't have thinking field, thinking blocks don't have text field) while still satisfying SDK requirements for field presence during streaming. --- anthropic/anthropic.go | 27 +++++++++++++++++---------- anthropic/anthropic_test.go | 12 ++++++------ 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/anthropic/anthropic.go b/anthropic/anthropic.go index bd1465792..f4d9a6e23 100644 --- a/anthropic/anthropic.go +++ b/anthropic/anthropic.go @@ -78,12 +78,14 @@ type MessageParam struct { Content any `json:"content"` // string or []ContentBlock } -// ContentBlock represents a content block in a message +// ContentBlock represents a content block in a message. +// Text and Thinking use pointers so they serialize as the field being present (even if empty) +// only when set, which is required for SDK streaming accumulation. type ContentBlock struct { Type string `json:"type"` // text, image, tool_use, tool_result, thinking - // For text blocks (no omitempty - SDK requires field to be present for accumulation) - Text string `json:"text"` + // For text blocks - pointer so field only appears when set (SDK requires it for accumulation) + Text *string `json:"text,omitempty"` // For image blocks Source *ImageSource `json:"source,omitempty"` @@ -98,9 +100,9 @@ type ContentBlock struct { Content any `json:"content,omitempty"` // string or []ContentBlock IsError bool `json:"is_error,omitempty"` - // For thinking blocks (no omitempty - SDK requires field to be present for accumulation) - Thinking string `json:"thinking"` - Signature string `json:"signature,omitempty"` + // For thinking blocks - pointer so field only appears when set (SDK requires it for accumulation) + Thinking *string `json:"thinking,omitempty"` + Signature string `json:"signature,omitempty"` } // ImageSource represents the source of an image @@ -458,14 +460,14 @@ func ToMessagesResponse(id string, r api.ChatResponse) MessagesResponse { if r.Message.Thinking != "" { content = append(content, ContentBlock{ Type: "thinking", - Thinking: r.Message.Thinking, + Thinking: ptr(r.Message.Thinking), }) } if r.Message.Content != "" { content = append(content, ContentBlock{ Type: "text", - Text: r.Message.Content, + Text: ptr(r.Message.Content), }) } @@ -579,7 +581,7 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent { Index: c.contentIndex, ContentBlock: ContentBlock{ Type: "thinking", - Thinking: "", + Thinking: ptr(""), }, }, }) @@ -620,7 +622,7 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent { Index: c.contentIndex, ContentBlock: ContentBlock{ Type: "text", - Text: "", + Text: ptr(""), }, }, }) @@ -760,3 +762,8 @@ func generateID(prefix string) string { func GenerateMessageID() string { return generateID("msg") } + +// ptr returns a pointer to the given string value +func ptr(s string) *string { + return &s +} diff --git a/anthropic/anthropic_test.go b/anthropic/anthropic_test.go index 3cebe6710..8228bd37b 100644 --- a/anthropic/anthropic_test.go +++ b/anthropic/anthropic_test.go @@ -447,7 +447,7 @@ func TestToMessagesResponse_Basic(t *testing.T) { if len(result.Content) != 1 { t.Fatalf("expected 1 content block, got %d", len(result.Content)) } - if result.Content[0].Type != "text" || result.Content[0].Text != "Hello there!" { + if result.Content[0].Type != "text" || result.Content[0].Text == nil || *result.Content[0].Text != "Hello there!" { t.Errorf("unexpected content: %+v", result.Content[0]) } if result.StopReason != "end_turn" { @@ -516,8 +516,8 @@ func TestToMessagesResponse_WithThinking(t *testing.T) { if result.Content[0].Type != "thinking" { t.Errorf("expected first block type 'thinking', got %q", result.Content[0].Type) } - if result.Content[0].Thinking != "Let me think about this..." { - t.Errorf("unexpected thinking content: %q", result.Content[0].Thinking) + if result.Content[0].Thinking == nil || *result.Content[0].Thinking != "Let me think about this..." { + t.Errorf("unexpected thinking content: %v", result.Content[0].Thinking) } if result.Content[1].Type != "text" { t.Errorf("expected second block type 'text', got %q", result.Content[1].Type) @@ -825,7 +825,7 @@ func TestContentBlockJSON_EmptyFieldsPresent(t *testing.T) { name: "text block includes empty text field", block: ContentBlock{ Type: "text", - Text: "", + Text: ptr(""), }, wantKeys: []string{"type", "text"}, }, @@ -833,7 +833,7 @@ func TestContentBlockJSON_EmptyFieldsPresent(t *testing.T) { name: "thinking block includes empty thinking field", block: ContentBlock{ Type: "thinking", - Thinking: "", + Thinking: ptr(""), }, wantKeys: []string{"type", "thinking"}, }, @@ -841,7 +841,7 @@ func TestContentBlockJSON_EmptyFieldsPresent(t *testing.T) { name: "text block with content", block: ContentBlock{ Type: "text", - Text: "hello", + Text: ptr("hello"), }, wantKeys: []string{"type", "text"}, }, From 9c27c7295244fa60a0e4d473e805a6ff1e84aad6 Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Mon, 5 Jan 2026 17:45:20 -0800 Subject: [PATCH 10/13] middleware: fix test for pointer type Text field --- middleware/anthropic_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/middleware/anthropic_test.go b/middleware/anthropic_test.go index 81c68cce1..5006a7cf0 100644 --- a/middleware/anthropic_test.go +++ b/middleware/anthropic_test.go @@ -472,8 +472,8 @@ func TestAnthropicWriter_NonStreaming(t *testing.T) { if len(result.Content) != 1 { t.Fatalf("expected 1 content block, got %d", len(result.Content)) } - if result.Content[0].Text != "Hello there!" { - t.Errorf("expected text 'Hello there!', got %q", result.Content[0].Text) + if result.Content[0].Text == nil || *result.Content[0].Text != "Hello there!" { + t.Errorf("expected text 'Hello there!', got %v", result.Content[0].Text) } if result.StopReason != "end_turn" { t.Errorf("expected stop_reason 'end_turn', got %q", result.StopReason) From c1a6aa8be512073033da11e12ce87083df5e3a92 Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Mon, 5 Jan 2026 18:24:33 -0800 Subject: [PATCH 11/13] docs: add JavaScript example for tool calling --- docs/api/anthropic-compatibility.mdx | 38 ++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/docs/api/anthropic-compatibility.mdx b/docs/api/anthropic-compatibility.mdx index 61f79f139..a0f2cd7fd 100644 --- a/docs/api/anthropic-compatibility.mdx +++ b/docs/api/anthropic-compatibility.mdx @@ -176,6 +176,44 @@ for block in message.content: print(f'Input: {block.input}') ``` +```javascript tools.js +import Anthropic from "@anthropic-ai/sdk"; + +const anthropic = new Anthropic({ + baseURL: "http://localhost:11434", + apiKey: "ollama", +}); + +const message = await anthropic.messages.create({ + model: "qwen3-coder", + max_tokens: 1024, + tools: [ + { + name: "get_weather", + description: "Get the current weather in a location", + input_schema: { + type: "object", + properties: { + location: { + type: "string", + description: "The city and state, e.g. San Francisco, CA", + }, + }, + required: ["location"], + }, + }, + ], + messages: [{ role: "user", content: "What's the weather in San Francisco?" }], +}); + +for (const block of message.content) { + if (block.type === "tool_use") { + console.log("Tool:", block.name); + console.log("Input:", block.input); + } +} +``` + ```shell tools.sh curl -X POST http://localhost:11434/v1/messages \ -H "Content-Type: application/json" \ From bd4ab011ac68ddb494d6e896edafaafbacb35097 Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Mon, 5 Jan 2026 18:58:24 -0800 Subject: [PATCH 12/13] middleware: use HTTP status code for Anthropic error mapping Use w.ResponseWriter.Status() instead of parsing StatusCode from JSON payload. routes.go typically sends errors as gin.H{"error": "..."} without a StatusCode field, causing all errors to be mapped to "api_error" instead of the appropriate type (not_found_error, invalid_request_error, etc.). Added tests to verify error handling for common routes.go patterns. --- middleware/anthropic.go | 9 ++-- middleware/anthropic_test.go | 86 ++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 4 deletions(-) diff --git a/middleware/anthropic.go b/middleware/anthropic.go index 533b0de1e..f697f4078 100644 --- a/middleware/anthropic.go +++ b/middleware/anthropic.go @@ -23,14 +23,15 @@ type AnthropicWriter struct { } func (w *AnthropicWriter) writeError(data []byte) (int, error) { - var serr api.StatusError - err := json.Unmarshal(data, &serr) - if err != nil { + var errData struct { + Error string `json:"error"` + } + if err := json.Unmarshal(data, &errData); err != nil { return 0, err } w.ResponseWriter.Header().Set("Content-Type", "application/json") - err = json.NewEncoder(w.ResponseWriter).Encode(anthropic.NewError(serr.StatusCode, serr.Error())) + err := json.NewEncoder(w.ResponseWriter).Encode(anthropic.NewError(w.ResponseWriter.Status(), errData.Error)) if err != nil { return 0, err } diff --git a/middleware/anthropic_test.go b/middleware/anthropic_test.go index 5006a7cf0..b444e83ab 100644 --- a/middleware/anthropic_test.go +++ b/middleware/anthropic_test.go @@ -485,3 +485,89 @@ func TestAnthropicWriter_NonStreaming(t *testing.T) { t.Errorf("expected output_tokens 5, got %d", result.Usage.OutputTokens) } } + +// TestAnthropicWriter_ErrorFromRoutes tests error handling when routes.go sends +// gin.H{"error": "message"} without a StatusCode field (which is the common case) +func TestAnthropicWriter_ErrorFromRoutes(t *testing.T) { + gin.SetMode(gin.TestMode) + + tests := []struct { + name string + statusCode int + errorPayload any + wantErrorType string + wantMessage string + }{ + // routes.go sends errors without StatusCode in JSON, so we must use HTTP status + { + name: "404 with gin.H error (model not found)", + statusCode: http.StatusNotFound, + errorPayload: gin.H{"error": "model 'nonexistent' not found"}, + wantErrorType: "not_found_error", + wantMessage: "model 'nonexistent' not found", + }, + { + name: "400 with gin.H error (bad request)", + statusCode: http.StatusBadRequest, + errorPayload: gin.H{"error": "model is required"}, + wantErrorType: "invalid_request_error", + wantMessage: "model is required", + }, + { + name: "500 with gin.H error (internal error)", + statusCode: http.StatusInternalServerError, + errorPayload: gin.H{"error": "something went wrong"}, + wantErrorType: "api_error", + wantMessage: "something went wrong", + }, + { + name: "404 with api.StatusError", + statusCode: http.StatusNotFound, + errorPayload: api.StatusError{ + StatusCode: http.StatusNotFound, + ErrorMessage: "model not found via StatusError", + }, + wantErrorType: "not_found_error", + wantMessage: "model not found via StatusError", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + router := gin.New() + router.Use(AnthropicMessagesMiddleware()) + router.POST("/v1/messages", func(c *gin.Context) { + // Simulate what routes.go does - set status and write error JSON + data, _ := json.Marshal(tt.errorPayload) + c.Writer.WriteHeader(tt.statusCode) + _, _ = c.Writer.Write(data) + }) + + body := `{"model": "test-model", "max_tokens": 100, "messages": [{"role": "user", "content": "Hi"}]}` + req, _ := http.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + + resp := httptest.NewRecorder() + router.ServeHTTP(resp, req) + + if resp.Code != tt.statusCode { + t.Errorf("expected status %d, got %d", tt.statusCode, resp.Code) + } + + var errResp anthropic.ErrorResponse + if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil { + t.Fatalf("failed to unmarshal error response: %v\nbody: %s", err, resp.Body.String()) + } + + if errResp.Type != "error" { + t.Errorf("expected type 'error', got %q", errResp.Type) + } + if errResp.Error.Type != tt.wantErrorType { + t.Errorf("expected error type %q, got %q", tt.wantErrorType, errResp.Error.Type) + } + if errResp.Error.Message != tt.wantMessage { + t.Errorf("expected message %q, got %q", tt.wantMessage, errResp.Error.Message) + } + }) + } +} From fceafefdcef3a040b52f815cf25f9d6a6c181fac Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Mon, 5 Jan 2026 21:10:29 -0800 Subject: [PATCH 13/13] anthropic: fix ToolCallFunctionArguments type after rebase Update tests and implementation to use the new ordered map-based ToolCallFunctionArguments type which replaces the previous map[string]any. - Add mapToArgs helper to convert map[string]any to ToolCallFunctionArguments - Add testArgs and testProps helpers in tests - Use cmpopts.IgnoreUnexported for cmp.Diff comparisons --- anthropic/anthropic.go | 11 ++++++++++- anthropic/anthropic_test.go | 23 ++++++++++++++++++----- middleware/anthropic_test.go | 29 ++++++++++++++++++++--------- 3 files changed, 48 insertions(+), 15 deletions(-) diff --git a/anthropic/anthropic.go b/anthropic/anthropic.go index f4d9a6e23..9cb2c75c4 100644 --- a/anthropic/anthropic.go +++ b/anthropic/anthropic.go @@ -377,7 +377,7 @@ func convertMessage(msg MessageParam) ([]api.Message, error) { }, } if input, ok := blockMap["input"].(map[string]any); ok { - tc.Function.Arguments = api.ToolCallFunctionArguments(input) + tc.Function.Arguments = mapToArgs(input) } toolCalls = append(toolCalls, tc) @@ -767,3 +767,12 @@ func GenerateMessageID() string { func ptr(s string) *string { return &s } + +// mapToArgs converts a map to ToolCallFunctionArguments +func mapToArgs(m map[string]any) api.ToolCallFunctionArguments { + args := api.NewToolCallFunctionArguments() + for k, v := range m { + args.Set(k, v) + } + return args +} diff --git a/anthropic/anthropic_test.go b/anthropic/anthropic_test.go index 8228bd37b..117d183c9 100644 --- a/anthropic/anthropic_test.go +++ b/anthropic/anthropic_test.go @@ -14,6 +14,15 @@ const ( testImage = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=` ) +// testArgs creates ToolCallFunctionArguments from a map (convenience function for tests) +func testArgs(m map[string]any) api.ToolCallFunctionArguments { + args := api.NewToolCallFunctionArguments() + for k, v := range m { + args.Set(k, v) + } + return args +} + func TestFromMessagesRequest_Basic(t *testing.T) { req := MessagesRequest{ Model: "test-model", @@ -468,7 +477,7 @@ func TestToMessagesResponse_WithToolCalls(t *testing.T) { ID: "call_123", Function: api.ToolCallFunction{ Name: "get_weather", - Arguments: map[string]any{"location": "Paris"}, + Arguments: testArgs(map[string]any{"location": "Paris"}), }, }, }, @@ -662,7 +671,7 @@ func TestStreamConverter_WithToolCalls(t *testing.T) { ID: "call_123", Function: api.ToolCallFunction{ Name: "get_weather", - Arguments: map[string]any{"location": "Paris"}, + Arguments: testArgs(map[string]any{"location": "Paris"}), }, }, }, @@ -708,6 +717,8 @@ func TestStreamConverter_ToolCallWithUnmarshalableArgs(t *testing.T) { // Create a channel which cannot be JSON marshaled unmarshalable := make(chan int) + badArgs := api.NewToolCallFunctionArguments() + badArgs.Set("channel", unmarshalable) resp := api.ChatResponse{ Model: "test-model", @@ -718,7 +729,7 @@ func TestStreamConverter_ToolCallWithUnmarshalableArgs(t *testing.T) { ID: "call_bad", Function: api.ToolCallFunction{ Name: "bad_function", - Arguments: map[string]any{"channel": unmarshalable}, + Arguments: badArgs, }, }, }, @@ -752,6 +763,8 @@ func TestStreamConverter_MultipleToolCallsWithMixedValidity(t *testing.T) { conv := NewStreamConverter("msg_123", "test-model") unmarshalable := make(chan int) + badArgs := api.NewToolCallFunctionArguments() + badArgs.Set("channel", unmarshalable) resp := api.ChatResponse{ Model: "test-model", @@ -762,14 +775,14 @@ func TestStreamConverter_MultipleToolCallsWithMixedValidity(t *testing.T) { ID: "call_good", Function: api.ToolCallFunction{ Name: "good_function", - Arguments: map[string]any{"location": "Paris"}, + Arguments: testArgs(map[string]any{"location": "Paris"}), }, }, { ID: "call_bad", Function: api.ToolCallFunction{ Name: "bad_function", - Arguments: map[string]any{"channel": unmarshalable}, + Arguments: badArgs, }, }, }, diff --git a/middleware/anthropic_test.go b/middleware/anthropic_test.go index b444e83ab..40df7fbb4 100644 --- a/middleware/anthropic_test.go +++ b/middleware/anthropic_test.go @@ -11,6 +11,7 @@ import ( "github.com/gin-gonic/gin" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/ollama/ollama/anthropic" "github.com/ollama/ollama/api" @@ -25,6 +26,15 @@ func captureAnthropicRequest(capturedRequest any) gin.HandlerFunc { } } +// testProps creates ToolPropertiesMap from a map (convenience function for tests) +func testProps(m map[string]api.ToolProperty) *api.ToolPropertiesMap { + props := api.NewToolPropertiesMap() + for k, v := range m { + props.Set(k, v) + } + return props +} + func TestAnthropicMessagesMiddleware(t *testing.T) { type testCase struct { name string @@ -156,9 +166,9 @@ func TestAnthropicMessagesMiddleware(t *testing.T) { Parameters: api.ToolFunctionParameters{ Type: "object", Required: []string{"location"}, - Properties: map[string]api.ToolProperty{ + Properties: testProps(map[string]api.ToolProperty{ "location": {Type: api.PropertyType{"string"}}, - }, + }), }, }, }, @@ -193,7 +203,7 @@ func TestAnthropicMessagesMiddleware(t *testing.T) { ID: "call_123", Function: api.ToolCallFunction{ Name: "get_weather", - Arguments: api.ToolCallFunctionArguments{"location": "Paris"}, + Arguments: testArgs(map[string]any{"location": "Paris"}), }, }, }, @@ -344,7 +354,8 @@ func TestAnthropicMessagesMiddleware(t *testing.T) { t.Errorf("model mismatch: got %q, want %q", capturedRequest.Model, tc.req.Model) } - if diff := cmp.Diff(tc.req.Messages, capturedRequest.Messages); diff != "" { + if diff := cmp.Diff(tc.req.Messages, capturedRequest.Messages, + cmpopts.IgnoreUnexported(api.ToolCallFunctionArguments{}, api.ToolPropertiesMap{})); diff != "" { t.Errorf("messages mismatch (-want +got):\n%s", diff) } @@ -492,11 +503,11 @@ func TestAnthropicWriter_ErrorFromRoutes(t *testing.T) { gin.SetMode(gin.TestMode) tests := []struct { - name string - statusCode int - errorPayload any - wantErrorType string - wantMessage string + name string + statusCode int + errorPayload any + wantErrorType string + wantMessage string }{ // routes.go sends errors without StatusCode in JSON, so we must use HTTP status {