diff --git a/anthropic/anthropic.go b/anthropic/anthropic.go new file mode 100644 index 000000000..7bf9e98a0 --- /dev/null +++ b/anthropic/anthropic.go @@ -0,0 +1,779 @@ +// Package anthropic provides core transformation logic for compatibility with the Anthropic Messages API +package anthropic + +import ( + "crypto/rand" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "net/http" + "strings" + "time" + + "github.com/ollama/ollama/api" +) + +// Error types matching Anthropic API +type Error struct { + Type string `json:"type"` + Message string `json:"message"` +} + +type ErrorResponse struct { + Type string `json:"type"` // always "error" + Error Error `json:"error"` + RequestID string `json:"request_id,omitempty"` +} + +// NewError creates a new ErrorResponse with the appropriate error type based on HTTP status code +func NewError(code int, message string) ErrorResponse { + var etype string + switch code { + case http.StatusBadRequest: + etype = "invalid_request_error" + case http.StatusUnauthorized: + etype = "authentication_error" + case http.StatusForbidden: + etype = "permission_error" + case http.StatusNotFound: + etype = "not_found_error" + case http.StatusTooManyRequests: + etype = "rate_limit_error" + case http.StatusServiceUnavailable, 529: + etype = "overloaded_error" + default: + etype = "api_error" + } + + return ErrorResponse{ + Type: "error", + Error: Error{Type: etype, Message: message}, + RequestID: generateID("req"), + } +} + +// Request types + +// MessagesRequest represents an Anthropic Messages API request +type MessagesRequest struct { + Model string `json:"model"` + MaxTokens int `json:"max_tokens"` + Messages []MessageParam `json:"messages"` + System any `json:"system,omitempty"` // string or []ContentBlock + Stream bool `json:"stream,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopK *int `json:"top_k,omitempty"` + StopSequences []string `json:"stop_sequences,omitempty"` + Tools []Tool `json:"tools,omitempty"` + ToolChoice *ToolChoice `json:"tool_choice,omitempty"` + Thinking *ThinkingConfig `json:"thinking,omitempty"` + Metadata *Metadata `json:"metadata,omitempty"` +} + +// MessageParam represents a message in the request +type MessageParam struct { + Role string `json:"role"` // "user" or "assistant" + Content any `json:"content"` // string or []ContentBlock +} + +// ContentBlock represents a content block in a message +type ContentBlock struct { + Type string `json:"type"` // text, image, tool_use, tool_result, thinking + + // For text blocks + Text string `json:"text,omitempty"` + + // For image blocks + Source *ImageSource `json:"source,omitempty"` + + // For tool_use blocks + ID string `json:"id,omitempty"` + Name string `json:"name,omitempty"` + Input any `json:"input,omitempty"` + + // For tool_result blocks + ToolUseID string `json:"tool_use_id,omitempty"` + Content any `json:"content,omitempty"` // string or []ContentBlock + IsError bool `json:"is_error,omitempty"` + + // For thinking blocks + Thinking string `json:"thinking,omitempty"` + Signature string `json:"signature,omitempty"` +} + +// ImageSource represents the source of an image +type ImageSource struct { + Type string `json:"type"` // "base64" or "url" + MediaType string `json:"media_type,omitempty"` + Data string `json:"data,omitempty"` + URL string `json:"url,omitempty"` +} + +// Tool represents a tool definition +type Tool struct { + Type string `json:"type,omitempty"` // "custom" for user-defined tools + Name string `json:"name"` + Description string `json:"description,omitempty"` + InputSchema json.RawMessage `json:"input_schema,omitempty"` +} + +// ToolChoice controls how the model uses tools +type ToolChoice struct { + Type string `json:"type"` // "auto", "any", "tool", "none" + Name string `json:"name,omitempty"` + DisableParallelToolUse bool `json:"disable_parallel_tool_use,omitempty"` +} + +// ThinkingConfig controls extended thinking +type ThinkingConfig struct { + Type string `json:"type"` // "enabled" or "disabled" + BudgetTokens int `json:"budget_tokens,omitempty"` +} + +// Metadata for the request +type Metadata struct { + UserID string `json:"user_id,omitempty"` +} + +// Response types + +// MessagesResponse represents an Anthropic Messages API response +type MessagesResponse struct { + ID string `json:"id"` + Type string `json:"type"` // "message" + Role string `json:"role"` // "assistant" + Model string `json:"model"` + Content []ContentBlock `json:"content"` + StopReason string `json:"stop_reason,omitempty"` + StopSequence string `json:"stop_sequence,omitempty"` + Usage Usage `json:"usage"` +} + +// Usage contains token usage information +type Usage struct { + InputTokens int `json:"input_tokens"` + OutputTokens int `json:"output_tokens"` +} + +// Streaming event types + +// MessageStartEvent is sent at the start of streaming +type MessageStartEvent struct { + Type string `json:"type"` // "message_start" + Message MessagesResponse `json:"message"` +} + +// ContentBlockStartEvent signals the start of a content block +type ContentBlockStartEvent struct { + Type string `json:"type"` // "content_block_start" + Index int `json:"index"` + ContentBlock ContentBlock `json:"content_block"` +} + +// ContentBlockDeltaEvent contains incremental content updates +type ContentBlockDeltaEvent struct { + Type string `json:"type"` // "content_block_delta" + Index int `json:"index"` + Delta Delta `json:"delta"` +} + +// Delta represents an incremental update +type Delta struct { + Type string `json:"type"` // "text_delta", "input_json_delta", "thinking_delta", "signature_delta" + Text string `json:"text,omitempty"` + PartialJSON string `json:"partial_json,omitempty"` + Thinking string `json:"thinking,omitempty"` + Signature string `json:"signature,omitempty"` +} + +// ContentBlockStopEvent signals the end of a content block +type ContentBlockStopEvent struct { + Type string `json:"type"` // "content_block_stop" + Index int `json:"index"` +} + +// MessageDeltaEvent contains updates to the message +type MessageDeltaEvent struct { + Type string `json:"type"` // "message_delta" + Delta MessageDelta `json:"delta"` + Usage DeltaUsage `json:"usage"` +} + +// MessageDelta contains stop information +type MessageDelta struct { + StopReason string `json:"stop_reason,omitempty"` + StopSequence string `json:"stop_sequence,omitempty"` +} + +// DeltaUsage contains cumulative token usage +type DeltaUsage struct { + OutputTokens int `json:"output_tokens"` +} + +// MessageStopEvent signals the end of the message +type MessageStopEvent struct { + Type string `json:"type"` // "message_stop" +} + +// PingEvent is a keepalive event +type PingEvent struct { + Type string `json:"type"` // "ping" +} + +// StreamErrorEvent is an error during streaming +type StreamErrorEvent struct { + Type string `json:"type"` // "error" + Error Error `json:"error"` +} + +// FromMessagesRequest converts an Anthropic MessagesRequest to an Ollama api.ChatRequest +func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) { + var messages []api.Message + + // Handle system prompt + if r.System != nil { + switch sys := r.System.(type) { + case string: + if sys != "" { + messages = append(messages, api.Message{Role: "system", Content: sys}) + } + case []any: + // System can be an array of content blocks + var content strings.Builder + for _, block := range sys { + if blockMap, ok := block.(map[string]any); ok { + if blockMap["type"] == "text" { + if text, ok := blockMap["text"].(string); ok { + content.WriteString(text) + } + } + } + } + if content.Len() > 0 { + messages = append(messages, api.Message{Role: "system", Content: content.String()}) + } + } + } + + // Convert messages + for _, msg := range r.Messages { + converted, err := convertMessage(msg) + if err != nil { + return nil, err + } + messages = append(messages, converted...) + } + + // Build options + options := make(map[string]any) + + options["num_predict"] = r.MaxTokens + + if r.Temperature != nil { + options["temperature"] = *r.Temperature + } + + if r.TopP != nil { + options["top_p"] = *r.TopP + } + + if r.TopK != nil { + options["top_k"] = *r.TopK + } + + if len(r.StopSequences) > 0 { + options["stop"] = r.StopSequences + } + + // Convert tools + var tools api.Tools + for _, t := range r.Tools { + tool, err := convertTool(t) + if err != nil { + return nil, err + } + tools = append(tools, tool) + } + + // Handle thinking + var think *api.ThinkValue + if r.Thinking != nil && r.Thinking.Type == "enabled" { + think = &api.ThinkValue{Value: true} + } + + stream := r.Stream + + return &api.ChatRequest{ + Model: r.Model, + Messages: messages, + Options: options, + Stream: &stream, + Tools: tools, + Think: think, + }, nil +} + +// convertMessage converts an Anthropic MessageParam to Ollama api.Message(s) +func convertMessage(msg MessageParam) ([]api.Message, error) { + var messages []api.Message + role := strings.ToLower(msg.Role) + + switch content := msg.Content.(type) { + case string: + messages = append(messages, api.Message{Role: role, Content: content}) + + case []any: + // Handle array of content blocks + var textContent strings.Builder + var images []api.ImageData + var toolCalls []api.ToolCall + var thinking string + var toolResults []api.Message + + for _, block := range content { + blockMap, ok := block.(map[string]any) + if !ok { + return nil, errors.New("invalid content block format") + } + + blockType, _ := blockMap["type"].(string) + + switch blockType { + case "text": + if text, ok := blockMap["text"].(string); ok { + textContent.WriteString(text) + } + + case "image": + source, ok := blockMap["source"].(map[string]any) + if !ok { + return nil, errors.New("invalid image source") + } + + sourceType, _ := source["type"].(string) + if sourceType == "base64" { + data, _ := source["data"].(string) + decoded, err := base64.StdEncoding.DecodeString(data) + if err != nil { + return nil, fmt.Errorf("invalid base64 image data: %w", err) + } + images = append(images, decoded) + } + // URL images would need to be fetched - skip for now + + case "tool_use": + id, ok := blockMap["id"].(string) + if !ok { + return nil, errors.New("tool_use block missing required 'id' field") + } + name, ok := blockMap["name"].(string) + if !ok { + return nil, errors.New("tool_use block missing required 'name' field") + } + tc := api.ToolCall{ + ID: id, + Function: api.ToolCallFunction{ + Name: name, + }, + } + if input, ok := blockMap["input"].(map[string]any); ok { + tc.Function.Arguments = api.ToolCallFunctionArguments(input) + } + toolCalls = append(toolCalls, tc) + + case "tool_result": + toolUseID, _ := blockMap["tool_use_id"].(string) + var resultContent string + + switch c := blockMap["content"].(type) { + case string: + resultContent = c + case []any: + // Extract text from content blocks + for _, cb := range c { + if cbMap, ok := cb.(map[string]any); ok { + if cbMap["type"] == "text" { + if text, ok := cbMap["text"].(string); ok { + resultContent += text + } + } + } + } + } + + toolResults = append(toolResults, api.Message{ + Role: "tool", + Content: resultContent, + ToolCallID: toolUseID, + }) + + case "thinking": + if t, ok := blockMap["thinking"].(string); ok { + thinking = t + } + } + } + + // Build the main message + if textContent.Len() > 0 || len(images) > 0 || len(toolCalls) > 0 { + m := api.Message{ + Role: role, + Content: textContent.String(), + Images: images, + ToolCalls: toolCalls, + Thinking: thinking, + } + messages = append(messages, m) + } + + // Add tool results as separate messages + messages = append(messages, toolResults...) + + default: + return nil, fmt.Errorf("invalid message content type: %T", content) + } + + return messages, nil +} + +// convertTool converts an Anthropic Tool to an Ollama api.Tool +func convertTool(t Tool) (api.Tool, error) { + var params api.ToolFunctionParameters + if len(t.InputSchema) > 0 { + if err := json.Unmarshal(t.InputSchema, ¶ms); err != nil { + return api.Tool{}, fmt.Errorf("invalid input_schema for tool %q: %w", t.Name, err) + } + } + + return api.Tool{ + Type: "function", + Function: api.ToolFunction{ + Name: t.Name, + Description: t.Description, + Parameters: params, + }, + }, nil +} + +// ToMessagesResponse converts an Ollama api.ChatResponse to an Anthropic MessagesResponse +func ToMessagesResponse(id string, r api.ChatResponse) MessagesResponse { + var content []ContentBlock + + // Add thinking block if present + if r.Message.Thinking != "" { + content = append(content, ContentBlock{ + Type: "thinking", + Thinking: r.Message.Thinking, + }) + } + + // Add text content if present + if r.Message.Content != "" { + content = append(content, ContentBlock{ + Type: "text", + Text: r.Message.Content, + }) + } + + // Add tool use blocks + for _, tc := range r.Message.ToolCalls { + content = append(content, ContentBlock{ + Type: "tool_use", + ID: tc.ID, + Name: tc.Function.Name, + Input: tc.Function.Arguments, + }) + } + + // Map stop reason + stopReason := mapStopReason(r.DoneReason, len(r.Message.ToolCalls) > 0) + + return MessagesResponse{ + ID: id, + Type: "message", + Role: "assistant", + Model: r.Model, + Content: content, + StopReason: stopReason, + Usage: Usage{ + InputTokens: r.Metrics.PromptEvalCount, + OutputTokens: r.Metrics.EvalCount, + }, + } +} + +// mapStopReason converts Ollama done_reason to Anthropic stop_reason +func mapStopReason(reason string, hasToolCalls bool) string { + if hasToolCalls { + return "tool_use" + } + + switch reason { + case "stop": + return "end_turn" + case "length": + return "max_tokens" + default: + if reason != "" { + return "stop_sequence" + } + return "" + } +} + +// StreamConverter manages state for converting Ollama streaming responses to Anthropic format +type StreamConverter struct { + ID string + Model string + firstWrite bool + contentIndex int + inputTokens int + outputTokens int + thinkingStarted bool + thinkingDone bool + textStarted bool + toolCallsSent map[string]bool +} + +// NewStreamConverter creates a new StreamConverter +func NewStreamConverter(id, model string) *StreamConverter { + return &StreamConverter{ + ID: id, + Model: model, + firstWrite: true, + toolCallsSent: make(map[string]bool), + } +} + +// StreamEvent represents a streaming event to be sent to the client +type StreamEvent struct { + Event string + Data any +} + +// Process converts an Ollama ChatResponse to Anthropic streaming events +func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent { + var events []StreamEvent + + // First write: emit message_start + if c.firstWrite { + c.firstWrite = false + c.inputTokens = r.Metrics.PromptEvalCount + + events = append(events, StreamEvent{ + Event: "message_start", + Data: MessageStartEvent{ + Type: "message_start", + Message: MessagesResponse{ + ID: c.ID, + Type: "message", + Role: "assistant", + Model: c.Model, + Content: []ContentBlock{}, + Usage: Usage{ + InputTokens: c.inputTokens, + OutputTokens: 0, + }, + }, + }, + }) + } + + // Handle thinking content + if r.Message.Thinking != "" && !c.thinkingDone { + if !c.thinkingStarted { + c.thinkingStarted = true + events = append(events, StreamEvent{ + Event: "content_block_start", + Data: ContentBlockStartEvent{ + Type: "content_block_start", + Index: c.contentIndex, + ContentBlock: ContentBlock{ + Type: "thinking", + Thinking: "", + }, + }, + }) + } + + events = append(events, StreamEvent{ + Event: "content_block_delta", + Data: ContentBlockDeltaEvent{ + Type: "content_block_delta", + Index: c.contentIndex, + Delta: Delta{ + Type: "thinking_delta", + Thinking: r.Message.Thinking, + }, + }, + }) + } + + // Handle text content + if r.Message.Content != "" { + // Close thinking block if it was open + if c.thinkingStarted && !c.thinkingDone { + c.thinkingDone = true + events = append(events, StreamEvent{ + Event: "content_block_stop", + Data: ContentBlockStopEvent{ + Type: "content_block_stop", + Index: c.contentIndex, + }, + }) + c.contentIndex++ + } + + if !c.textStarted { + c.textStarted = true + events = append(events, StreamEvent{ + Event: "content_block_start", + Data: ContentBlockStartEvent{ + Type: "content_block_start", + Index: c.contentIndex, + ContentBlock: ContentBlock{ + Type: "text", + Text: "", + }, + }, + }) + } + + events = append(events, StreamEvent{ + Event: "content_block_delta", + Data: ContentBlockDeltaEvent{ + Type: "content_block_delta", + Index: c.contentIndex, + Delta: Delta{ + Type: "text_delta", + Text: r.Message.Content, + }, + }, + }) + } + + // Handle tool calls + for _, tc := range r.Message.ToolCalls { + if c.toolCallsSent[tc.ID] { + continue + } + + // Close any previous block + if c.textStarted { + events = append(events, StreamEvent{ + Event: "content_block_stop", + Data: ContentBlockStopEvent{ + Type: "content_block_stop", + Index: c.contentIndex, + }, + }) + c.contentIndex++ + c.textStarted = false + } + + // Start tool use block + events = append(events, StreamEvent{ + Event: "content_block_start", + Data: ContentBlockStartEvent{ + Type: "content_block_start", + Index: c.contentIndex, + ContentBlock: ContentBlock{ + Type: "tool_use", + ID: tc.ID, + Name: tc.Function.Name, + Input: map[string]any{}, + }, + }, + }) + + // Send input as JSON delta + argsJSON, _ := json.Marshal(tc.Function.Arguments) + events = append(events, StreamEvent{ + Event: "content_block_delta", + Data: ContentBlockDeltaEvent{ + Type: "content_block_delta", + Index: c.contentIndex, + Delta: Delta{ + Type: "input_json_delta", + PartialJSON: string(argsJSON), + }, + }, + }) + + // Close tool use block + events = append(events, StreamEvent{ + Event: "content_block_stop", + Data: ContentBlockStopEvent{ + Type: "content_block_stop", + Index: c.contentIndex, + }, + }) + + c.toolCallsSent[tc.ID] = true + c.contentIndex++ + } + + // Handle done + if r.Done { + // Close any open block + if c.textStarted { + events = append(events, StreamEvent{ + Event: "content_block_stop", + Data: ContentBlockStopEvent{ + Type: "content_block_stop", + Index: c.contentIndex, + }, + }) + } else if c.thinkingStarted && !c.thinkingDone { + events = append(events, StreamEvent{ + Event: "content_block_stop", + Data: ContentBlockStopEvent{ + Type: "content_block_stop", + Index: c.contentIndex, + }, + }) + } + + c.outputTokens = r.Metrics.EvalCount + stopReason := mapStopReason(r.DoneReason, len(c.toolCallsSent) > 0) + + events = append(events, StreamEvent{ + Event: "message_delta", + Data: MessageDeltaEvent{ + Type: "message_delta", + Delta: MessageDelta{ + StopReason: stopReason, + }, + Usage: DeltaUsage{ + OutputTokens: c.outputTokens, + }, + }, + }) + + events = append(events, StreamEvent{ + Event: "message_stop", + Data: MessageStopEvent{ + Type: "message_stop", + }, + }) + } + + return events +} + +// generateID generates a unique ID with the given prefix using crypto/rand +func generateID(prefix string) string { + b := make([]byte, 12) + if _, err := rand.Read(b); err != nil { + // Fallback to time-based ID if crypto/rand fails + return fmt.Sprintf("%s_%d", prefix, time.Now().UnixNano()) + } + return fmt.Sprintf("%s_%x", prefix, b) +} + +// GenerateMessageID generates a unique message ID +func GenerateMessageID() string { + return generateID("msg") +} diff --git a/docs/api/anthropic-compatibility.mdx b/docs/api/anthropic-compatibility.mdx new file mode 100644 index 000000000..b8953d1d8 --- /dev/null +++ b/docs/api/anthropic-compatibility.mdx @@ -0,0 +1,339 @@ +--- +title: Anthropic compatibility +--- + +Ollama provides compatibility with the [Anthropic Messages API](https://docs.anthropic.com/en/api/messages) to help connect existing applications to Ollama, including tools like Claude Code. + +## Usage + +### Environment variables + +To use Ollama with tools that expect the Anthropic API (like Claude Code), set these environment variables: + +```shell +export ANTHROPIC_BASE_URL=http://localhost:11434 +export ANTHROPIC_API_KEY=ollama # required but ignored +``` + +### Simple `/v1/messages` example + + + +```python basic.py +import anthropic + +client = anthropic.Anthropic( + base_url='http://localhost:11434', + api_key='ollama', # required but ignored +) + +message = client.messages.create( + model='llama3.2:3b', + max_tokens=1024, + messages=[ + {'role': 'user', 'content': 'Hello, how are you?'} + ] +) +print(message.content[0].text) +``` + +```javascript basic.js +import Anthropic from "@anthropic-ai/sdk"; + +const anthropic = new Anthropic({ + baseURL: "http://localhost:11434", + apiKey: "ollama", // required but ignored +}); + +const message = await anthropic.messages.create({ + model: "llama3.2:3b", + max_tokens: 1024, + messages: [{ role: "user", content: "Hello, how are you?" }], +}); + +console.log(message.content[0].text); +``` + +```shell basic.sh +curl -X POST http://localhost:11434/v1/messages \ +-H "Content-Type: application/json" \ +-H "x-api-key: ollama" \ +-H "anthropic-version: 2023-06-01" \ +-d '{ + "model": "llama3.2:3b", + "max_tokens": 1024, + "messages": [{ "role": "user", "content": "Hello, how are you?" }] +}' +``` + + + +### Streaming example + + + +```python streaming.py +import anthropic + +client = anthropic.Anthropic( + base_url='http://localhost:11434', + api_key='ollama', +) + +with client.messages.stream( + model='llama3.2:3b', + max_tokens=1024, + messages=[{'role': 'user', 'content': 'Count from 1 to 10'}] +) as stream: + for text in stream.text_stream: + print(text, end='', flush=True) +``` + +```javascript streaming.js +import Anthropic from "@anthropic-ai/sdk"; + +const anthropic = new Anthropic({ + baseURL: "http://localhost:11434", + apiKey: "ollama", +}); + +const stream = await anthropic.messages.stream({ + model: "llama3.2:3b", + max_tokens: 1024, + messages: [{ role: "user", content: "Count from 1 to 10" }], +}); + +for await (const event of stream) { + if ( + event.type === "content_block_delta" && + event.delta.type === "text_delta" + ) { + process.stdout.write(event.delta.text); + } +} +``` + +```shell streaming.sh +curl -X POST http://localhost:11434/v1/messages \ +-H "Content-Type: application/json" \ +-d '{ + "model": "llama3.2:3b", + "max_tokens": 1024, + "stream": true, + "messages": [{ "role": "user", "content": "Count from 1 to 10" }] +}' +``` + + + +### Tool calling example + + + +```python tools.py +import anthropic + +client = anthropic.Anthropic( + base_url='http://localhost:11434', + api_key='ollama', +) + +message = client.messages.create( + model='llama3.2:3b', + max_tokens=1024, + tools=[ + { + 'name': 'get_weather', + 'description': 'Get the current weather in a location', + 'input_schema': { + 'type': 'object', + 'properties': { + 'location': { + 'type': 'string', + 'description': 'The city and state, e.g. San Francisco, CA' + } + }, + 'required': ['location'] + } + } + ], + messages=[{'role': 'user', 'content': "What's the weather in San Francisco?"}] +) + +for block in message.content: + if block.type == 'tool_use': + print(f'Tool: {block.name}') + print(f'Input: {block.input}') +``` + +```shell tools.sh +curl -X POST http://localhost:11434/v1/messages \ +-H "Content-Type: application/json" \ +-d '{ + "model": "llama3.2:3b", + "max_tokens": 1024, + "tools": [ + { + "name": "get_weather", + "description": "Get the current weather in a location", + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state" + } + }, + "required": ["location"] + } + } + ], + "messages": [{ "role": "user", "content": "What is the weather in San Francisco?" }] +}' +``` + + + +## Using with Claude Code + +[Claude Code](https://docs.anthropic.com/en/docs/claude-code) can be configured to use Ollama as its backend: + +```shell +ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model llama3.2:3b +``` + +Or set the environment variables in your shell profile: + +```shell +export ANTHROPIC_BASE_URL=http://localhost:11434 +export ANTHROPIC_API_KEY=ollama +``` + +Then run Claude Code with any Ollama model: + +```shell +claude --model llama3.2:3b +claude --model qwen3:8b +claude --model deepseek-r1:14b +``` + +## Endpoints + +### `/v1/messages` + +#### Supported features + +- [x] Messages +- [x] Streaming +- [x] System prompts +- [x] Multi-turn conversations +- [x] Vision (images) +- [x] Tools (function calling) +- [x] Tool results +- [x] Thinking/extended thinking + +#### Supported request fields + +- [x] `model` +- [x] `max_tokens` +- [x] `messages` + - [x] Text `content` + - [x] Image `content` (base64) + - [x] Array of content blocks + - [x] `tool_use` blocks + - [x] `tool_result` blocks + - [x] `thinking` blocks +- [x] `system` (string or array) +- [x] `stream` +- [x] `temperature` +- [x] `top_p` +- [x] `top_k` +- [x] `stop_sequences` +- [x] `tools` +- [x] `thinking` +- [ ] `tool_choice` +- [ ] `metadata` + +#### Supported response fields + +- [x] `id` +- [x] `type` +- [x] `role` +- [x] `model` +- [x] `content` (text, tool_use, thinking blocks) +- [x] `stop_reason` (end_turn, max_tokens, tool_use) +- [x] `usage` (input_tokens, output_tokens) + +#### Streaming events + +- [x] `message_start` +- [x] `content_block_start` +- [x] `content_block_delta` (text_delta, input_json_delta, thinking_delta) +- [x] `content_block_stop` +- [x] `message_delta` +- [x] `message_stop` +- [x] `ping` +- [x] `error` + +## Models + +Before using a model, pull it locally with `ollama pull`: + +```shell +ollama pull llama3.2:3b +``` + +### Default model names + +For tooling that relies on default Anthropic model names such as `claude-3-5-sonnet`, use `ollama cp` to copy an existing model name: + +```shell +ollama cp llama3.2:3b claude-3-5-sonnet +``` + +Afterwards, this new model name can be specified in the `model` field: + +```shell +curl http://localhost:11434/v1/messages \ + -H "Content-Type: application/json" \ + -d '{ + "model": "claude-3-5-sonnet", + "max_tokens": 1024, + "messages": [ + { + "role": "user", + "content": "Hello!" + } + ] + }' +``` + +## Differences from the Anthropic API + +### Behavior differences + +- API key is accepted but not validated +- `anthropic-version` header is accepted but not used +- Token counts are approximations based on the underlying model's tokenizer + +### Not supported + +The following Anthropic API features are not currently supported: + +| Feature | Description | +|---------|-------------| +| `/v1/messages/count_tokens` | Token counting endpoint | +| `tool_choice` | Forcing specific tool use or disabling tools | +| `metadata` | Request metadata (user_id) | +| Prompt caching | `cache_control` blocks for caching prefixes | +| Batches API | `/v1/messages/batches` for async batch processing | +| Citations | `citations` content blocks | +| PDF support | `document` content blocks with PDF files | +| Server-sent errors | `error` events during streaming (errors return HTTP status) | + +### Partial support + +| Feature | Status | +|---------|--------| +| Image content | Base64 images supported; URL images not supported | +| Extended thinking | Basic support; `budget_tokens` accepted but not enforced | diff --git a/middleware/anthropic.go b/middleware/anthropic.go new file mode 100644 index 000000000..a5f0ed8d6 --- /dev/null +++ b/middleware/anthropic.go @@ -0,0 +1,152 @@ +package middleware + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + + "github.com/gin-gonic/gin" + + "github.com/ollama/ollama/anthropic" + "github.com/ollama/ollama/api" +) + +// AnthropicWriter wraps the response writer to transform Ollama responses to Anthropic format +type AnthropicWriter struct { + BaseWriter + stream bool + id string + model string + converter *anthropic.StreamConverter +} + +func (w *AnthropicWriter) writeError(data []byte) (int, error) { + var serr api.StatusError + err := json.Unmarshal(data, &serr) + if err != nil { + return 0, err + } + + w.ResponseWriter.Header().Set("Content-Type", "application/json") + err = json.NewEncoder(w.ResponseWriter).Encode(anthropic.NewError(serr.StatusCode, serr.Error())) + if err != nil { + return 0, err + } + + return len(data), nil +} + +func (w *AnthropicWriter) writeEvent(eventType string, data any) error { + d, err := json.Marshal(data) + if err != nil { + return err + } + _, err = w.ResponseWriter.Write([]byte(fmt.Sprintf("event: %s\ndata: %s\n\n", eventType, d))) + if err != nil { + return err + } + if f, ok := w.ResponseWriter.(http.Flusher); ok { + f.Flush() + } + return nil +} + +func (w *AnthropicWriter) writeResponse(data []byte) (int, error) { + var chatResponse api.ChatResponse + err := json.Unmarshal(data, &chatResponse) + if err != nil { + return 0, err + } + + if w.stream { + w.ResponseWriter.Header().Set("Content-Type", "text/event-stream") + + events := w.converter.Process(chatResponse) + for _, event := range events { + if err := w.writeEvent(event.Event, event.Data); err != nil { + return 0, err + } + } + return len(data), nil + } + + // Non-streaming response + w.ResponseWriter.Header().Set("Content-Type", "application/json") + response := anthropic.ToMessagesResponse(w.id, chatResponse) + return len(data), json.NewEncoder(w.ResponseWriter).Encode(response) +} + +func (w *AnthropicWriter) Write(data []byte) (int, error) { + code := w.ResponseWriter.Status() + if code != http.StatusOK { + return w.writeError(data) + } + + return w.writeResponse(data) +} + +// AnthropicMessagesMiddleware handles Anthropic Messages API requests +func AnthropicMessagesMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + var req anthropic.MessagesRequest + err := c.ShouldBindJSON(&req) + if err != nil { + c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, err.Error())) + return + } + + // Validate required fields + if req.Model == "" { + c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, "model is required")) + return + } + + if req.MaxTokens <= 0 { + c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, "max_tokens is required and must be positive")) + return + } + + if len(req.Messages) == 0 { + c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, "messages is required")) + return + } + + // Convert to internal format + chatReq, err := anthropic.FromMessagesRequest(req) + if err != nil { + c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, err.Error())) + return + } + + var b bytes.Buffer + if err := json.NewEncoder(&b).Encode(chatReq); err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, anthropic.NewError(http.StatusInternalServerError, err.Error())) + return + } + + c.Request.Body = io.NopCloser(&b) + + messageID := anthropic.GenerateMessageID() + + w := &AnthropicWriter{ + BaseWriter: BaseWriter{ResponseWriter: c.Writer}, + stream: req.Stream, + id: messageID, + model: req.Model, + converter: anthropic.NewStreamConverter(messageID, req.Model), + } + + // Set headers based on streaming mode + if req.Stream { + c.Writer.Header().Set("Content-Type", "text/event-stream") + c.Writer.Header().Set("Cache-Control", "no-cache") + c.Writer.Header().Set("Connection", "keep-alive") + } + + c.Writer = w + + c.Next() + } +} diff --git a/server/routes.go b/server/routes.go index 977a13ff2..8e199bada 100644 --- a/server/routes.go +++ b/server/routes.go @@ -1544,6 +1544,9 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) { r.GET("/v1/models/:model", middleware.RetrieveMiddleware(), s.ShowHandler) r.POST("/v1/responses", middleware.ResponsesMiddleware(), s.ChatHandler) + // Inference (Anthropic compatibility) + r.POST("/v1/messages", middleware.AnthropicMessagesMiddleware(), s.ChatHandler) + if rc != nil { // wrap old with new rs := ®istry.Local{