diff --git a/anthropic/anthropic.go b/anthropic/anthropic.go
new file mode 100644
index 000000000..7bf9e98a0
--- /dev/null
+++ b/anthropic/anthropic.go
@@ -0,0 +1,779 @@
+// Package anthropic provides core transformation logic for compatibility with the Anthropic Messages API
+package anthropic
+
+import (
+ "crypto/rand"
+ "encoding/base64"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "net/http"
+ "strings"
+ "time"
+
+ "github.com/ollama/ollama/api"
+)
+
+// Error types matching Anthropic API
+type Error struct {
+ Type string `json:"type"`
+ Message string `json:"message"`
+}
+
+type ErrorResponse struct {
+ Type string `json:"type"` // always "error"
+ Error Error `json:"error"`
+ RequestID string `json:"request_id,omitempty"`
+}
+
+// NewError creates a new ErrorResponse with the appropriate error type based on HTTP status code
+func NewError(code int, message string) ErrorResponse {
+ var etype string
+ switch code {
+ case http.StatusBadRequest:
+ etype = "invalid_request_error"
+ case http.StatusUnauthorized:
+ etype = "authentication_error"
+ case http.StatusForbidden:
+ etype = "permission_error"
+ case http.StatusNotFound:
+ etype = "not_found_error"
+ case http.StatusTooManyRequests:
+ etype = "rate_limit_error"
+ case http.StatusServiceUnavailable, 529:
+ etype = "overloaded_error"
+ default:
+ etype = "api_error"
+ }
+
+ return ErrorResponse{
+ Type: "error",
+ Error: Error{Type: etype, Message: message},
+ RequestID: generateID("req"),
+ }
+}
+
+// Request types
+
+// MessagesRequest represents an Anthropic Messages API request
+type MessagesRequest struct {
+ Model string `json:"model"`
+ MaxTokens int `json:"max_tokens"`
+ Messages []MessageParam `json:"messages"`
+ System any `json:"system,omitempty"` // string or []ContentBlock
+ Stream bool `json:"stream,omitempty"`
+ Temperature *float64 `json:"temperature,omitempty"`
+ TopP *float64 `json:"top_p,omitempty"`
+ TopK *int `json:"top_k,omitempty"`
+ StopSequences []string `json:"stop_sequences,omitempty"`
+ Tools []Tool `json:"tools,omitempty"`
+ ToolChoice *ToolChoice `json:"tool_choice,omitempty"`
+ Thinking *ThinkingConfig `json:"thinking,omitempty"`
+ Metadata *Metadata `json:"metadata,omitempty"`
+}
+
+// MessageParam represents a message in the request
+type MessageParam struct {
+ Role string `json:"role"` // "user" or "assistant"
+ Content any `json:"content"` // string or []ContentBlock
+}
+
+// ContentBlock represents a content block in a message
+type ContentBlock struct {
+ Type string `json:"type"` // text, image, tool_use, tool_result, thinking
+
+ // For text blocks
+ Text string `json:"text,omitempty"`
+
+ // For image blocks
+ Source *ImageSource `json:"source,omitempty"`
+
+ // For tool_use blocks
+ ID string `json:"id,omitempty"`
+ Name string `json:"name,omitempty"`
+ Input any `json:"input,omitempty"`
+
+ // For tool_result blocks
+ ToolUseID string `json:"tool_use_id,omitempty"`
+ Content any `json:"content,omitempty"` // string or []ContentBlock
+ IsError bool `json:"is_error,omitempty"`
+
+ // For thinking blocks
+ Thinking string `json:"thinking,omitempty"`
+ Signature string `json:"signature,omitempty"`
+}
+
+// ImageSource represents the source of an image
+type ImageSource struct {
+ Type string `json:"type"` // "base64" or "url"
+ MediaType string `json:"media_type,omitempty"`
+ Data string `json:"data,omitempty"`
+ URL string `json:"url,omitempty"`
+}
+
+// Tool represents a tool definition
+type Tool struct {
+ Type string `json:"type,omitempty"` // "custom" for user-defined tools
+ Name string `json:"name"`
+ Description string `json:"description,omitempty"`
+ InputSchema json.RawMessage `json:"input_schema,omitempty"`
+}
+
+// ToolChoice controls how the model uses tools
+type ToolChoice struct {
+ Type string `json:"type"` // "auto", "any", "tool", "none"
+ Name string `json:"name,omitempty"`
+ DisableParallelToolUse bool `json:"disable_parallel_tool_use,omitempty"`
+}
+
+// ThinkingConfig controls extended thinking
+type ThinkingConfig struct {
+ Type string `json:"type"` // "enabled" or "disabled"
+ BudgetTokens int `json:"budget_tokens,omitempty"`
+}
+
+// Metadata for the request
+type Metadata struct {
+ UserID string `json:"user_id,omitempty"`
+}
+
+// Response types
+
+// MessagesResponse represents an Anthropic Messages API response
+type MessagesResponse struct {
+ ID string `json:"id"`
+ Type string `json:"type"` // "message"
+ Role string `json:"role"` // "assistant"
+ Model string `json:"model"`
+ Content []ContentBlock `json:"content"`
+ StopReason string `json:"stop_reason,omitempty"`
+ StopSequence string `json:"stop_sequence,omitempty"`
+ Usage Usage `json:"usage"`
+}
+
+// Usage contains token usage information
+type Usage struct {
+ InputTokens int `json:"input_tokens"`
+ OutputTokens int `json:"output_tokens"`
+}
+
+// Streaming event types
+
+// MessageStartEvent is sent at the start of streaming
+type MessageStartEvent struct {
+ Type string `json:"type"` // "message_start"
+ Message MessagesResponse `json:"message"`
+}
+
+// ContentBlockStartEvent signals the start of a content block
+type ContentBlockStartEvent struct {
+ Type string `json:"type"` // "content_block_start"
+ Index int `json:"index"`
+ ContentBlock ContentBlock `json:"content_block"`
+}
+
+// ContentBlockDeltaEvent contains incremental content updates
+type ContentBlockDeltaEvent struct {
+ Type string `json:"type"` // "content_block_delta"
+ Index int `json:"index"`
+ Delta Delta `json:"delta"`
+}
+
+// Delta represents an incremental update
+type Delta struct {
+ Type string `json:"type"` // "text_delta", "input_json_delta", "thinking_delta", "signature_delta"
+ Text string `json:"text,omitempty"`
+ PartialJSON string `json:"partial_json,omitempty"`
+ Thinking string `json:"thinking,omitempty"`
+ Signature string `json:"signature,omitempty"`
+}
+
+// ContentBlockStopEvent signals the end of a content block
+type ContentBlockStopEvent struct {
+ Type string `json:"type"` // "content_block_stop"
+ Index int `json:"index"`
+}
+
+// MessageDeltaEvent contains updates to the message
+type MessageDeltaEvent struct {
+ Type string `json:"type"` // "message_delta"
+ Delta MessageDelta `json:"delta"`
+ Usage DeltaUsage `json:"usage"`
+}
+
+// MessageDelta contains stop information
+type MessageDelta struct {
+ StopReason string `json:"stop_reason,omitempty"`
+ StopSequence string `json:"stop_sequence,omitempty"`
+}
+
+// DeltaUsage contains cumulative token usage
+type DeltaUsage struct {
+ OutputTokens int `json:"output_tokens"`
+}
+
+// MessageStopEvent signals the end of the message
+type MessageStopEvent struct {
+ Type string `json:"type"` // "message_stop"
+}
+
+// PingEvent is a keepalive event
+type PingEvent struct {
+ Type string `json:"type"` // "ping"
+}
+
+// StreamErrorEvent is an error during streaming
+type StreamErrorEvent struct {
+ Type string `json:"type"` // "error"
+ Error Error `json:"error"`
+}
+
+// FromMessagesRequest converts an Anthropic MessagesRequest to an Ollama api.ChatRequest
+func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) {
+ var messages []api.Message
+
+ // Handle system prompt
+ if r.System != nil {
+ switch sys := r.System.(type) {
+ case string:
+ if sys != "" {
+ messages = append(messages, api.Message{Role: "system", Content: sys})
+ }
+ case []any:
+ // System can be an array of content blocks
+ var content strings.Builder
+ for _, block := range sys {
+ if blockMap, ok := block.(map[string]any); ok {
+ if blockMap["type"] == "text" {
+ if text, ok := blockMap["text"].(string); ok {
+ content.WriteString(text)
+ }
+ }
+ }
+ }
+ if content.Len() > 0 {
+ messages = append(messages, api.Message{Role: "system", Content: content.String()})
+ }
+ }
+ }
+
+ // Convert messages
+ for _, msg := range r.Messages {
+ converted, err := convertMessage(msg)
+ if err != nil {
+ return nil, err
+ }
+ messages = append(messages, converted...)
+ }
+
+ // Build options
+ options := make(map[string]any)
+
+ options["num_predict"] = r.MaxTokens
+
+ if r.Temperature != nil {
+ options["temperature"] = *r.Temperature
+ }
+
+ if r.TopP != nil {
+ options["top_p"] = *r.TopP
+ }
+
+ if r.TopK != nil {
+ options["top_k"] = *r.TopK
+ }
+
+ if len(r.StopSequences) > 0 {
+ options["stop"] = r.StopSequences
+ }
+
+ // Convert tools
+ var tools api.Tools
+ for _, t := range r.Tools {
+ tool, err := convertTool(t)
+ if err != nil {
+ return nil, err
+ }
+ tools = append(tools, tool)
+ }
+
+ // Handle thinking
+ var think *api.ThinkValue
+ if r.Thinking != nil && r.Thinking.Type == "enabled" {
+ think = &api.ThinkValue{Value: true}
+ }
+
+ stream := r.Stream
+
+ return &api.ChatRequest{
+ Model: r.Model,
+ Messages: messages,
+ Options: options,
+ Stream: &stream,
+ Tools: tools,
+ Think: think,
+ }, nil
+}
+
+// convertMessage converts an Anthropic MessageParam to Ollama api.Message(s)
+func convertMessage(msg MessageParam) ([]api.Message, error) {
+ var messages []api.Message
+ role := strings.ToLower(msg.Role)
+
+ switch content := msg.Content.(type) {
+ case string:
+ messages = append(messages, api.Message{Role: role, Content: content})
+
+ case []any:
+ // Handle array of content blocks
+ var textContent strings.Builder
+ var images []api.ImageData
+ var toolCalls []api.ToolCall
+ var thinking string
+ var toolResults []api.Message
+
+ for _, block := range content {
+ blockMap, ok := block.(map[string]any)
+ if !ok {
+ return nil, errors.New("invalid content block format")
+ }
+
+ blockType, _ := blockMap["type"].(string)
+
+ switch blockType {
+ case "text":
+ if text, ok := blockMap["text"].(string); ok {
+ textContent.WriteString(text)
+ }
+
+ case "image":
+ source, ok := blockMap["source"].(map[string]any)
+ if !ok {
+ return nil, errors.New("invalid image source")
+ }
+
+ sourceType, _ := source["type"].(string)
+ if sourceType == "base64" {
+ data, _ := source["data"].(string)
+ decoded, err := base64.StdEncoding.DecodeString(data)
+ if err != nil {
+ return nil, fmt.Errorf("invalid base64 image data: %w", err)
+ }
+ images = append(images, decoded)
+ }
+ // URL images would need to be fetched - skip for now
+
+ case "tool_use":
+ id, ok := blockMap["id"].(string)
+ if !ok {
+ return nil, errors.New("tool_use block missing required 'id' field")
+ }
+ name, ok := blockMap["name"].(string)
+ if !ok {
+ return nil, errors.New("tool_use block missing required 'name' field")
+ }
+ tc := api.ToolCall{
+ ID: id,
+ Function: api.ToolCallFunction{
+ Name: name,
+ },
+ }
+ if input, ok := blockMap["input"].(map[string]any); ok {
+ tc.Function.Arguments = api.ToolCallFunctionArguments(input)
+ }
+ toolCalls = append(toolCalls, tc)
+
+ case "tool_result":
+ toolUseID, _ := blockMap["tool_use_id"].(string)
+ var resultContent string
+
+ switch c := blockMap["content"].(type) {
+ case string:
+ resultContent = c
+ case []any:
+ // Extract text from content blocks
+ for _, cb := range c {
+ if cbMap, ok := cb.(map[string]any); ok {
+ if cbMap["type"] == "text" {
+ if text, ok := cbMap["text"].(string); ok {
+ resultContent += text
+ }
+ }
+ }
+ }
+ }
+
+ toolResults = append(toolResults, api.Message{
+ Role: "tool",
+ Content: resultContent,
+ ToolCallID: toolUseID,
+ })
+
+ case "thinking":
+ if t, ok := blockMap["thinking"].(string); ok {
+ thinking = t
+ }
+ }
+ }
+
+ // Build the main message
+ if textContent.Len() > 0 || len(images) > 0 || len(toolCalls) > 0 {
+ m := api.Message{
+ Role: role,
+ Content: textContent.String(),
+ Images: images,
+ ToolCalls: toolCalls,
+ Thinking: thinking,
+ }
+ messages = append(messages, m)
+ }
+
+ // Add tool results as separate messages
+ messages = append(messages, toolResults...)
+
+ default:
+ return nil, fmt.Errorf("invalid message content type: %T", content)
+ }
+
+ return messages, nil
+}
+
+// convertTool converts an Anthropic Tool to an Ollama api.Tool
+func convertTool(t Tool) (api.Tool, error) {
+ var params api.ToolFunctionParameters
+ if len(t.InputSchema) > 0 {
+ if err := json.Unmarshal(t.InputSchema, ¶ms); err != nil {
+ return api.Tool{}, fmt.Errorf("invalid input_schema for tool %q: %w", t.Name, err)
+ }
+ }
+
+ return api.Tool{
+ Type: "function",
+ Function: api.ToolFunction{
+ Name: t.Name,
+ Description: t.Description,
+ Parameters: params,
+ },
+ }, nil
+}
+
+// ToMessagesResponse converts an Ollama api.ChatResponse to an Anthropic MessagesResponse
+func ToMessagesResponse(id string, r api.ChatResponse) MessagesResponse {
+ var content []ContentBlock
+
+ // Add thinking block if present
+ if r.Message.Thinking != "" {
+ content = append(content, ContentBlock{
+ Type: "thinking",
+ Thinking: r.Message.Thinking,
+ })
+ }
+
+ // Add text content if present
+ if r.Message.Content != "" {
+ content = append(content, ContentBlock{
+ Type: "text",
+ Text: r.Message.Content,
+ })
+ }
+
+ // Add tool use blocks
+ for _, tc := range r.Message.ToolCalls {
+ content = append(content, ContentBlock{
+ Type: "tool_use",
+ ID: tc.ID,
+ Name: tc.Function.Name,
+ Input: tc.Function.Arguments,
+ })
+ }
+
+ // Map stop reason
+ stopReason := mapStopReason(r.DoneReason, len(r.Message.ToolCalls) > 0)
+
+ return MessagesResponse{
+ ID: id,
+ Type: "message",
+ Role: "assistant",
+ Model: r.Model,
+ Content: content,
+ StopReason: stopReason,
+ Usage: Usage{
+ InputTokens: r.Metrics.PromptEvalCount,
+ OutputTokens: r.Metrics.EvalCount,
+ },
+ }
+}
+
+// mapStopReason converts Ollama done_reason to Anthropic stop_reason
+func mapStopReason(reason string, hasToolCalls bool) string {
+ if hasToolCalls {
+ return "tool_use"
+ }
+
+ switch reason {
+ case "stop":
+ return "end_turn"
+ case "length":
+ return "max_tokens"
+ default:
+ if reason != "" {
+ return "stop_sequence"
+ }
+ return ""
+ }
+}
+
+// StreamConverter manages state for converting Ollama streaming responses to Anthropic format
+type StreamConverter struct {
+ ID string
+ Model string
+ firstWrite bool
+ contentIndex int
+ inputTokens int
+ outputTokens int
+ thinkingStarted bool
+ thinkingDone bool
+ textStarted bool
+ toolCallsSent map[string]bool
+}
+
+// NewStreamConverter creates a new StreamConverter
+func NewStreamConverter(id, model string) *StreamConverter {
+ return &StreamConverter{
+ ID: id,
+ Model: model,
+ firstWrite: true,
+ toolCallsSent: make(map[string]bool),
+ }
+}
+
+// StreamEvent represents a streaming event to be sent to the client
+type StreamEvent struct {
+ Event string
+ Data any
+}
+
+// Process converts an Ollama ChatResponse to Anthropic streaming events
+func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent {
+ var events []StreamEvent
+
+ // First write: emit message_start
+ if c.firstWrite {
+ c.firstWrite = false
+ c.inputTokens = r.Metrics.PromptEvalCount
+
+ events = append(events, StreamEvent{
+ Event: "message_start",
+ Data: MessageStartEvent{
+ Type: "message_start",
+ Message: MessagesResponse{
+ ID: c.ID,
+ Type: "message",
+ Role: "assistant",
+ Model: c.Model,
+ Content: []ContentBlock{},
+ Usage: Usage{
+ InputTokens: c.inputTokens,
+ OutputTokens: 0,
+ },
+ },
+ },
+ })
+ }
+
+ // Handle thinking content
+ if r.Message.Thinking != "" && !c.thinkingDone {
+ if !c.thinkingStarted {
+ c.thinkingStarted = true
+ events = append(events, StreamEvent{
+ Event: "content_block_start",
+ Data: ContentBlockStartEvent{
+ Type: "content_block_start",
+ Index: c.contentIndex,
+ ContentBlock: ContentBlock{
+ Type: "thinking",
+ Thinking: "",
+ },
+ },
+ })
+ }
+
+ events = append(events, StreamEvent{
+ Event: "content_block_delta",
+ Data: ContentBlockDeltaEvent{
+ Type: "content_block_delta",
+ Index: c.contentIndex,
+ Delta: Delta{
+ Type: "thinking_delta",
+ Thinking: r.Message.Thinking,
+ },
+ },
+ })
+ }
+
+ // Handle text content
+ if r.Message.Content != "" {
+ // Close thinking block if it was open
+ if c.thinkingStarted && !c.thinkingDone {
+ c.thinkingDone = true
+ events = append(events, StreamEvent{
+ Event: "content_block_stop",
+ Data: ContentBlockStopEvent{
+ Type: "content_block_stop",
+ Index: c.contentIndex,
+ },
+ })
+ c.contentIndex++
+ }
+
+ if !c.textStarted {
+ c.textStarted = true
+ events = append(events, StreamEvent{
+ Event: "content_block_start",
+ Data: ContentBlockStartEvent{
+ Type: "content_block_start",
+ Index: c.contentIndex,
+ ContentBlock: ContentBlock{
+ Type: "text",
+ Text: "",
+ },
+ },
+ })
+ }
+
+ events = append(events, StreamEvent{
+ Event: "content_block_delta",
+ Data: ContentBlockDeltaEvent{
+ Type: "content_block_delta",
+ Index: c.contentIndex,
+ Delta: Delta{
+ Type: "text_delta",
+ Text: r.Message.Content,
+ },
+ },
+ })
+ }
+
+ // Handle tool calls
+ for _, tc := range r.Message.ToolCalls {
+ if c.toolCallsSent[tc.ID] {
+ continue
+ }
+
+ // Close any previous block
+ if c.textStarted {
+ events = append(events, StreamEvent{
+ Event: "content_block_stop",
+ Data: ContentBlockStopEvent{
+ Type: "content_block_stop",
+ Index: c.contentIndex,
+ },
+ })
+ c.contentIndex++
+ c.textStarted = false
+ }
+
+ // Start tool use block
+ events = append(events, StreamEvent{
+ Event: "content_block_start",
+ Data: ContentBlockStartEvent{
+ Type: "content_block_start",
+ Index: c.contentIndex,
+ ContentBlock: ContentBlock{
+ Type: "tool_use",
+ ID: tc.ID,
+ Name: tc.Function.Name,
+ Input: map[string]any{},
+ },
+ },
+ })
+
+ // Send input as JSON delta
+ argsJSON, _ := json.Marshal(tc.Function.Arguments)
+ events = append(events, StreamEvent{
+ Event: "content_block_delta",
+ Data: ContentBlockDeltaEvent{
+ Type: "content_block_delta",
+ Index: c.contentIndex,
+ Delta: Delta{
+ Type: "input_json_delta",
+ PartialJSON: string(argsJSON),
+ },
+ },
+ })
+
+ // Close tool use block
+ events = append(events, StreamEvent{
+ Event: "content_block_stop",
+ Data: ContentBlockStopEvent{
+ Type: "content_block_stop",
+ Index: c.contentIndex,
+ },
+ })
+
+ c.toolCallsSent[tc.ID] = true
+ c.contentIndex++
+ }
+
+ // Handle done
+ if r.Done {
+ // Close any open block
+ if c.textStarted {
+ events = append(events, StreamEvent{
+ Event: "content_block_stop",
+ Data: ContentBlockStopEvent{
+ Type: "content_block_stop",
+ Index: c.contentIndex,
+ },
+ })
+ } else if c.thinkingStarted && !c.thinkingDone {
+ events = append(events, StreamEvent{
+ Event: "content_block_stop",
+ Data: ContentBlockStopEvent{
+ Type: "content_block_stop",
+ Index: c.contentIndex,
+ },
+ })
+ }
+
+ c.outputTokens = r.Metrics.EvalCount
+ stopReason := mapStopReason(r.DoneReason, len(c.toolCallsSent) > 0)
+
+ events = append(events, StreamEvent{
+ Event: "message_delta",
+ Data: MessageDeltaEvent{
+ Type: "message_delta",
+ Delta: MessageDelta{
+ StopReason: stopReason,
+ },
+ Usage: DeltaUsage{
+ OutputTokens: c.outputTokens,
+ },
+ },
+ })
+
+ events = append(events, StreamEvent{
+ Event: "message_stop",
+ Data: MessageStopEvent{
+ Type: "message_stop",
+ },
+ })
+ }
+
+ return events
+}
+
+// generateID generates a unique ID with the given prefix using crypto/rand
+func generateID(prefix string) string {
+ b := make([]byte, 12)
+ if _, err := rand.Read(b); err != nil {
+ // Fallback to time-based ID if crypto/rand fails
+ return fmt.Sprintf("%s_%d", prefix, time.Now().UnixNano())
+ }
+ return fmt.Sprintf("%s_%x", prefix, b)
+}
+
+// GenerateMessageID generates a unique message ID
+func GenerateMessageID() string {
+ return generateID("msg")
+}
diff --git a/docs/api/anthropic-compatibility.mdx b/docs/api/anthropic-compatibility.mdx
new file mode 100644
index 000000000..b8953d1d8
--- /dev/null
+++ b/docs/api/anthropic-compatibility.mdx
@@ -0,0 +1,339 @@
+---
+title: Anthropic compatibility
+---
+
+Ollama provides compatibility with the [Anthropic Messages API](https://docs.anthropic.com/en/api/messages) to help connect existing applications to Ollama, including tools like Claude Code.
+
+## Usage
+
+### Environment variables
+
+To use Ollama with tools that expect the Anthropic API (like Claude Code), set these environment variables:
+
+```shell
+export ANTHROPIC_BASE_URL=http://localhost:11434
+export ANTHROPIC_API_KEY=ollama # required but ignored
+```
+
+### Simple `/v1/messages` example
+
+
+
+```python basic.py
+import anthropic
+
+client = anthropic.Anthropic(
+ base_url='http://localhost:11434',
+ api_key='ollama', # required but ignored
+)
+
+message = client.messages.create(
+ model='llama3.2:3b',
+ max_tokens=1024,
+ messages=[
+ {'role': 'user', 'content': 'Hello, how are you?'}
+ ]
+)
+print(message.content[0].text)
+```
+
+```javascript basic.js
+import Anthropic from "@anthropic-ai/sdk";
+
+const anthropic = new Anthropic({
+ baseURL: "http://localhost:11434",
+ apiKey: "ollama", // required but ignored
+});
+
+const message = await anthropic.messages.create({
+ model: "llama3.2:3b",
+ max_tokens: 1024,
+ messages: [{ role: "user", content: "Hello, how are you?" }],
+});
+
+console.log(message.content[0].text);
+```
+
+```shell basic.sh
+curl -X POST http://localhost:11434/v1/messages \
+-H "Content-Type: application/json" \
+-H "x-api-key: ollama" \
+-H "anthropic-version: 2023-06-01" \
+-d '{
+ "model": "llama3.2:3b",
+ "max_tokens": 1024,
+ "messages": [{ "role": "user", "content": "Hello, how are you?" }]
+}'
+```
+
+
+
+### Streaming example
+
+
+
+```python streaming.py
+import anthropic
+
+client = anthropic.Anthropic(
+ base_url='http://localhost:11434',
+ api_key='ollama',
+)
+
+with client.messages.stream(
+ model='llama3.2:3b',
+ max_tokens=1024,
+ messages=[{'role': 'user', 'content': 'Count from 1 to 10'}]
+) as stream:
+ for text in stream.text_stream:
+ print(text, end='', flush=True)
+```
+
+```javascript streaming.js
+import Anthropic from "@anthropic-ai/sdk";
+
+const anthropic = new Anthropic({
+ baseURL: "http://localhost:11434",
+ apiKey: "ollama",
+});
+
+const stream = await anthropic.messages.stream({
+ model: "llama3.2:3b",
+ max_tokens: 1024,
+ messages: [{ role: "user", content: "Count from 1 to 10" }],
+});
+
+for await (const event of stream) {
+ if (
+ event.type === "content_block_delta" &&
+ event.delta.type === "text_delta"
+ ) {
+ process.stdout.write(event.delta.text);
+ }
+}
+```
+
+```shell streaming.sh
+curl -X POST http://localhost:11434/v1/messages \
+-H "Content-Type: application/json" \
+-d '{
+ "model": "llama3.2:3b",
+ "max_tokens": 1024,
+ "stream": true,
+ "messages": [{ "role": "user", "content": "Count from 1 to 10" }]
+}'
+```
+
+
+
+### Tool calling example
+
+
+
+```python tools.py
+import anthropic
+
+client = anthropic.Anthropic(
+ base_url='http://localhost:11434',
+ api_key='ollama',
+)
+
+message = client.messages.create(
+ model='llama3.2:3b',
+ max_tokens=1024,
+ tools=[
+ {
+ 'name': 'get_weather',
+ 'description': 'Get the current weather in a location',
+ 'input_schema': {
+ 'type': 'object',
+ 'properties': {
+ 'location': {
+ 'type': 'string',
+ 'description': 'The city and state, e.g. San Francisco, CA'
+ }
+ },
+ 'required': ['location']
+ }
+ }
+ ],
+ messages=[{'role': 'user', 'content': "What's the weather in San Francisco?"}]
+)
+
+for block in message.content:
+ if block.type == 'tool_use':
+ print(f'Tool: {block.name}')
+ print(f'Input: {block.input}')
+```
+
+```shell tools.sh
+curl -X POST http://localhost:11434/v1/messages \
+-H "Content-Type: application/json" \
+-d '{
+ "model": "llama3.2:3b",
+ "max_tokens": 1024,
+ "tools": [
+ {
+ "name": "get_weather",
+ "description": "Get the current weather in a location",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state"
+ }
+ },
+ "required": ["location"]
+ }
+ }
+ ],
+ "messages": [{ "role": "user", "content": "What is the weather in San Francisco?" }]
+}'
+```
+
+
+
+## Using with Claude Code
+
+[Claude Code](https://docs.anthropic.com/en/docs/claude-code) can be configured to use Ollama as its backend:
+
+```shell
+ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model llama3.2:3b
+```
+
+Or set the environment variables in your shell profile:
+
+```shell
+export ANTHROPIC_BASE_URL=http://localhost:11434
+export ANTHROPIC_API_KEY=ollama
+```
+
+Then run Claude Code with any Ollama model:
+
+```shell
+claude --model llama3.2:3b
+claude --model qwen3:8b
+claude --model deepseek-r1:14b
+```
+
+## Endpoints
+
+### `/v1/messages`
+
+#### Supported features
+
+- [x] Messages
+- [x] Streaming
+- [x] System prompts
+- [x] Multi-turn conversations
+- [x] Vision (images)
+- [x] Tools (function calling)
+- [x] Tool results
+- [x] Thinking/extended thinking
+
+#### Supported request fields
+
+- [x] `model`
+- [x] `max_tokens`
+- [x] `messages`
+ - [x] Text `content`
+ - [x] Image `content` (base64)
+ - [x] Array of content blocks
+ - [x] `tool_use` blocks
+ - [x] `tool_result` blocks
+ - [x] `thinking` blocks
+- [x] `system` (string or array)
+- [x] `stream`
+- [x] `temperature`
+- [x] `top_p`
+- [x] `top_k`
+- [x] `stop_sequences`
+- [x] `tools`
+- [x] `thinking`
+- [ ] `tool_choice`
+- [ ] `metadata`
+
+#### Supported response fields
+
+- [x] `id`
+- [x] `type`
+- [x] `role`
+- [x] `model`
+- [x] `content` (text, tool_use, thinking blocks)
+- [x] `stop_reason` (end_turn, max_tokens, tool_use)
+- [x] `usage` (input_tokens, output_tokens)
+
+#### Streaming events
+
+- [x] `message_start`
+- [x] `content_block_start`
+- [x] `content_block_delta` (text_delta, input_json_delta, thinking_delta)
+- [x] `content_block_stop`
+- [x] `message_delta`
+- [x] `message_stop`
+- [x] `ping`
+- [x] `error`
+
+## Models
+
+Before using a model, pull it locally with `ollama pull`:
+
+```shell
+ollama pull llama3.2:3b
+```
+
+### Default model names
+
+For tooling that relies on default Anthropic model names such as `claude-3-5-sonnet`, use `ollama cp` to copy an existing model name:
+
+```shell
+ollama cp llama3.2:3b claude-3-5-sonnet
+```
+
+Afterwards, this new model name can be specified in the `model` field:
+
+```shell
+curl http://localhost:11434/v1/messages \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "claude-3-5-sonnet",
+ "max_tokens": 1024,
+ "messages": [
+ {
+ "role": "user",
+ "content": "Hello!"
+ }
+ ]
+ }'
+```
+
+## Differences from the Anthropic API
+
+### Behavior differences
+
+- API key is accepted but not validated
+- `anthropic-version` header is accepted but not used
+- Token counts are approximations based on the underlying model's tokenizer
+
+### Not supported
+
+The following Anthropic API features are not currently supported:
+
+| Feature | Description |
+|---------|-------------|
+| `/v1/messages/count_tokens` | Token counting endpoint |
+| `tool_choice` | Forcing specific tool use or disabling tools |
+| `metadata` | Request metadata (user_id) |
+| Prompt caching | `cache_control` blocks for caching prefixes |
+| Batches API | `/v1/messages/batches` for async batch processing |
+| Citations | `citations` content blocks |
+| PDF support | `document` content blocks with PDF files |
+| Server-sent errors | `error` events during streaming (errors return HTTP status) |
+
+### Partial support
+
+| Feature | Status |
+|---------|--------|
+| Image content | Base64 images supported; URL images not supported |
+| Extended thinking | Basic support; `budget_tokens` accepted but not enforced |
diff --git a/middleware/anthropic.go b/middleware/anthropic.go
new file mode 100644
index 000000000..a5f0ed8d6
--- /dev/null
+++ b/middleware/anthropic.go
@@ -0,0 +1,152 @@
+package middleware
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+
+ "github.com/gin-gonic/gin"
+
+ "github.com/ollama/ollama/anthropic"
+ "github.com/ollama/ollama/api"
+)
+
+// AnthropicWriter wraps the response writer to transform Ollama responses to Anthropic format
+type AnthropicWriter struct {
+ BaseWriter
+ stream bool
+ id string
+ model string
+ converter *anthropic.StreamConverter
+}
+
+func (w *AnthropicWriter) writeError(data []byte) (int, error) {
+ var serr api.StatusError
+ err := json.Unmarshal(data, &serr)
+ if err != nil {
+ return 0, err
+ }
+
+ w.ResponseWriter.Header().Set("Content-Type", "application/json")
+ err = json.NewEncoder(w.ResponseWriter).Encode(anthropic.NewError(serr.StatusCode, serr.Error()))
+ if err != nil {
+ return 0, err
+ }
+
+ return len(data), nil
+}
+
+func (w *AnthropicWriter) writeEvent(eventType string, data any) error {
+ d, err := json.Marshal(data)
+ if err != nil {
+ return err
+ }
+ _, err = w.ResponseWriter.Write([]byte(fmt.Sprintf("event: %s\ndata: %s\n\n", eventType, d)))
+ if err != nil {
+ return err
+ }
+ if f, ok := w.ResponseWriter.(http.Flusher); ok {
+ f.Flush()
+ }
+ return nil
+}
+
+func (w *AnthropicWriter) writeResponse(data []byte) (int, error) {
+ var chatResponse api.ChatResponse
+ err := json.Unmarshal(data, &chatResponse)
+ if err != nil {
+ return 0, err
+ }
+
+ if w.stream {
+ w.ResponseWriter.Header().Set("Content-Type", "text/event-stream")
+
+ events := w.converter.Process(chatResponse)
+ for _, event := range events {
+ if err := w.writeEvent(event.Event, event.Data); err != nil {
+ return 0, err
+ }
+ }
+ return len(data), nil
+ }
+
+ // Non-streaming response
+ w.ResponseWriter.Header().Set("Content-Type", "application/json")
+ response := anthropic.ToMessagesResponse(w.id, chatResponse)
+ return len(data), json.NewEncoder(w.ResponseWriter).Encode(response)
+}
+
+func (w *AnthropicWriter) Write(data []byte) (int, error) {
+ code := w.ResponseWriter.Status()
+ if code != http.StatusOK {
+ return w.writeError(data)
+ }
+
+ return w.writeResponse(data)
+}
+
+// AnthropicMessagesMiddleware handles Anthropic Messages API requests
+func AnthropicMessagesMiddleware() gin.HandlerFunc {
+ return func(c *gin.Context) {
+ var req anthropic.MessagesRequest
+ err := c.ShouldBindJSON(&req)
+ if err != nil {
+ c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, err.Error()))
+ return
+ }
+
+ // Validate required fields
+ if req.Model == "" {
+ c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, "model is required"))
+ return
+ }
+
+ if req.MaxTokens <= 0 {
+ c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, "max_tokens is required and must be positive"))
+ return
+ }
+
+ if len(req.Messages) == 0 {
+ c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, "messages is required"))
+ return
+ }
+
+ // Convert to internal format
+ chatReq, err := anthropic.FromMessagesRequest(req)
+ if err != nil {
+ c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, err.Error()))
+ return
+ }
+
+ var b bytes.Buffer
+ if err := json.NewEncoder(&b).Encode(chatReq); err != nil {
+ c.AbortWithStatusJSON(http.StatusInternalServerError, anthropic.NewError(http.StatusInternalServerError, err.Error()))
+ return
+ }
+
+ c.Request.Body = io.NopCloser(&b)
+
+ messageID := anthropic.GenerateMessageID()
+
+ w := &AnthropicWriter{
+ BaseWriter: BaseWriter{ResponseWriter: c.Writer},
+ stream: req.Stream,
+ id: messageID,
+ model: req.Model,
+ converter: anthropic.NewStreamConverter(messageID, req.Model),
+ }
+
+ // Set headers based on streaming mode
+ if req.Stream {
+ c.Writer.Header().Set("Content-Type", "text/event-stream")
+ c.Writer.Header().Set("Cache-Control", "no-cache")
+ c.Writer.Header().Set("Connection", "keep-alive")
+ }
+
+ c.Writer = w
+
+ c.Next()
+ }
+}
diff --git a/server/routes.go b/server/routes.go
index 977a13ff2..8e199bada 100644
--- a/server/routes.go
+++ b/server/routes.go
@@ -1544,6 +1544,9 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
r.GET("/v1/models/:model", middleware.RetrieveMiddleware(), s.ShowHandler)
r.POST("/v1/responses", middleware.ResponsesMiddleware(), s.ChatHandler)
+ // Inference (Anthropic compatibility)
+ r.POST("/v1/messages", middleware.AnthropicMessagesMiddleware(), s.ChatHandler)
+
if rc != nil {
// wrap old with new
rs := ®istry.Local{