ollama/integration/tools_openai_test.go

//go:build integration

package integration

import (
	"context"
	"encoding/json"
	"fmt"
	"os"
	"slices"
	"strings"
	"testing"
	"time"

	"github.com/google/go-cmp/cmp"
	"github.com/ollama/ollama/api"
	"github.com/openai/openai-go/v3"
	"github.com/openai/openai-go/v3/option"
	"github.com/openai/openai-go/v3/shared"
)

var agenticModels = []string{
	"gpt-oss:20b",
	"gpt-oss:120b",
	"qwen3-coder:30b",
	"qwen3:4b",
	"qwen3:8b",
}

var cloudModels = []string{
	"gpt-oss:120b-cloud",
	"gpt-oss:20b-cloud",
	"qwen3-vl:235b-cloud",
	"qwen3-coder:480b-cloud",
	"kimi-k2-thinking:cloud",
	"kimi-k2:1t-cloud",
}

// validateBashCommand validates a bash command with flexible matching
// It checks that the core command matches and required arguments are present
func validateBashCommand(cmd string, expectedCmd string, requiredArgs []string) error {
	parts := strings.Fields(cmd)
	if len(parts) == 0 {
		return fmt.Errorf("empty command")
	}

	actualCmd := parts[0]
	if actualCmd != expectedCmd {
		return fmt.Errorf("expected command '%s', got '%s'", expectedCmd, actualCmd)
	}

	cmdStr := strings.Join(parts[1:], " ")
	for _, arg := range requiredArgs {
		if !strings.Contains(cmdStr, arg) {
			return fmt.Errorf("missing required argument: %s", arg)
		}
	}

	return nil
}

// validateBashCommandFlexible validates a bash command with flexible matching
// It accepts alternative command forms (e.g., find vs ls) and checks required patterns
func validateBashCommandFlexible(cmd string, allowedCommands []string, requiredPatterns []string) error {
	parts := strings.Fields(cmd)
	if len(parts) == 0 {
		return fmt.Errorf("empty command")
	}

	actualCmd := parts[0]
	commandMatched := false
	for _, allowedCmd := range allowedCommands {
		if actualCmd == allowedCmd {
			commandMatched = true
			break
		}
	}
	if !commandMatched {
		return fmt.Errorf("expected one of commands %v, got '%s'", allowedCommands, actualCmd)
	}

	cmdStr := strings.ToLower(strings.Join(parts[1:], " "))
	for _, pattern := range requiredPatterns {
		if !strings.Contains(cmdStr, strings.ToLower(pattern)) {
			return fmt.Errorf("missing required pattern: %s", pattern)
		}
	}

	return nil
}

func TestOpenAIToolCallingMultiStep(t *testing.T) {
	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
	defer cancel()

	var baseURL string
	var apiKey string
	var modelsToTest []string
	var cleanup func()

	if openaiBaseURL := os.Getenv("OPENAI_BASE_URL"); openaiBaseURL != "" {
		baseURL = openaiBaseURL
		apiKey = os.Getenv("OLLAMA_API_KEY")
		if apiKey == "" {
			t.Fatal("OPENAI_API_KEY must be set when using OPENAI_BASE_URL")
		}

		// only test cloud models unless OPENAI_TEST_MODELS is set
		modelsToTest = cloudModels
		if modelsEnv := os.Getenv("OPENAI_TEST_MODELS"); modelsEnv != "" {
			modelsToTest = []string{modelsEnv}
		}
		cleanup = func() {}
	} else {
		_, testEndpoint, cleanupFn := InitServerConnection(ctx, t)
		cleanup = cleanupFn
		baseURL = fmt.Sprintf("http://%s/v1", testEndpoint)
		apiKey = "ollama"
		modelsToTest = append(agenticModels, cloudModels...)
	}
	t.Cleanup(cleanup)

	opts := []option.RequestOption{
		option.WithBaseURL(baseURL),
		option.WithAPIKey(apiKey),
	}
	openaiClient := openai.NewClient(opts...)

	var ollamaClient *api.Client
	if baseURL == "" {
		ollamaClient, _, _ = InitServerConnection(ctx, t)
	}

	for _, model := range modelsToTest {
		t.Run(model, func(t *testing.T) {
			testCtx := ctx
			if slices.Contains(cloudModels, model) {
				t.Parallel()
				// Create a new context for parallel tests to avoid cancellation
				var cancel context.CancelFunc
				testCtx, cancel = context.WithTimeout(context.Background(), 10*time.Minute)
				defer cancel()
			}
			if v, ok := minVRAM[model]; ok {
				skipUnderMinVRAM(t, v)
			}

			if ollamaClient != nil {
				if err := PullIfMissing(testCtx, ollamaClient, model); err != nil {
					t.Fatalf("pull failed %s", err)
				}
			}

			tools := []openai.ChatCompletionToolUnionParam{
				openai.ChatCompletionFunctionTool(shared.FunctionDefinitionParam{
					Name:        "list_files",
					Description: openai.Opt("List all files in a directory"),
					Parameters: shared.FunctionParameters{
						"type": "object",
						"properties": map[string]any{
							"path": map[string]any{
								"type":        "string",
								"description": "The directory path to list files from",
							},
						},
						"required": []string{"path"},
					},
				}),
				openai.ChatCompletionFunctionTool(shared.FunctionDefinitionParam{
					Name:        "read_file",
					Description: openai.Opt("Read the contents of a file"),
					Parameters: shared.FunctionParameters{
						"type": "object",
						"properties": map[string]any{
							"path": map[string]any{
								"type":        "string",
								"description": "The file path to read",
							},
						},
						"required": []string{"path"},
					},
				}),
			}

			mockFileContents := "line 1\nline 2\nline 3\nline 4\nline 5"
			userContent := "Find the file named 'config.json' in /tmp and read its contents"
			userMessage := openai.UserMessage(userContent)

			messages := []openai.ChatCompletionMessageParamUnion{
				userMessage,
			}
			stepCount := 0
			maxSteps := 10

			normalizePath := func(path string) string {
				if path != "" && path[0] != '/' {
					return "/" + path
				}
				return path
			}

			expectedSteps := []struct {
				functionName string
				validateArgs func(map[string]any) error
				result       string
			}{
				{
					functionName: "list_files",
					validateArgs: func(args map[string]any) error {
						path, ok := args["path"]
						if !ok {
							return fmt.Errorf("missing required argument 'path'")
						}
						pathStr, ok := path.(string)
						if !ok {
							return fmt.Errorf("expected 'path' to be string, got %T", path)
						}
						normalizedPath := normalizePath(pathStr)
						if normalizedPath != "/tmp" {
							return fmt.Errorf("expected list_files(\"/tmp\"), got list_files(%q)", pathStr)
						}
						return nil
					},
					result: `["config.json", "other.txt", "data.log"]`,
				},
				{
					functionName: "read_file",
					validateArgs: func(args map[string]any) error {
						path, ok := args["path"]
						if !ok {
							return fmt.Errorf("missing required argument 'path'")
						}
						pathStr, ok := path.(string)
						if !ok {
							return fmt.Errorf("expected 'path' to be string, got %T", path)
						}
						normalizedPath := normalizePath(pathStr)
						if normalizedPath != "/tmp/config.json" {
							return fmt.Errorf("expected read_file(\"/tmp/config.json\"), got read_file(%q)", pathStr)
						}
						return nil
					},
					result: mockFileContents,
				},
			}

			for stepCount < maxSteps {
				req := openai.ChatCompletionNewParams{
					Model:       shared.ChatModel(model),
					Messages:    messages,
					Tools:       tools,
					Temperature: openai.Opt(0.0),
				}

				completion, err := openaiClient.Chat.Completions.New(testCtx, req)
				if err != nil {
					t.Fatalf("step %d chat failed: %v", stepCount+1, err)
				}

				if len(completion.Choices) == 0 {
					t.Fatalf("step %d: no choices in response", stepCount+1)
				}

				choice := completion.Choices[0]
				message := choice.Message

				toolCalls := message.ToolCalls
				content := message.Content
				gotToolCall := len(toolCalls) > 0
				var toolCallID string
				if gotToolCall && toolCalls[0].ID != "" {
					toolCallID = toolCalls[0].ID
				}

				var assistantMessage openai.ChatCompletionMessageParamUnion
				if gotToolCall {
					toolCallsJSON, err := json.Marshal(toolCalls)
					if err != nil {
						t.Fatalf("step %d: failed to marshal tool calls: %v", stepCount+1, err)
					}
					var toolCallParams []openai.ChatCompletionMessageToolCallUnionParam
					if err := json.Unmarshal(toolCallsJSON, &toolCallParams); err != nil {
						t.Fatalf("step %d: failed to unmarshal tool calls: %v", stepCount+1, err)
					}
					contentUnion := openai.ChatCompletionAssistantMessageParamContentUnion{
						OfString: openai.Opt(content),
					}
					assistantMsg := openai.ChatCompletionAssistantMessageParam{
						Content:   contentUnion,
						ToolCalls: toolCallParams,
					}
					assistantMessage = openai.ChatCompletionMessageParamUnion{
						OfAssistant: &assistantMsg,
					}
				} else {
					assistantMessage = openai.AssistantMessage(content)
				}

				if !gotToolCall && content != "" {
					if stepCount < len(expectedSteps) {
						t.Logf("EXPECTED: Step %d should call '%s'", stepCount+1, expectedSteps[stepCount].functionName)
						t.Logf("ACTUAL: Model stopped with content: %s", content)
						t.Fatalf("model stopped making tool calls after %d steps, expected %d steps. Final response: %s", stepCount, len(expectedSteps), content)
					}
					return
				}

				if !gotToolCall || len(toolCalls) == 0 {
					if stepCount < len(expectedSteps) {
						expectedStep := expectedSteps[stepCount]
						t.Logf("EXPECTED: Step %d should call '%s'", stepCount+1, expectedStep.functionName)
						t.Logf("ACTUAL: No tool call, got content: %s", content)
						t.Fatalf("step %d: expected tool call but got none. Response: %s", stepCount+1, content)
					}
					return
				}

				if stepCount >= len(expectedSteps) {
					actualCallJSON, _ := json.MarshalIndent(toolCalls[0], "", "  ")
					t.Logf("EXPECTED: All %d steps completed", len(expectedSteps))
					t.Logf("ACTUAL: Extra step %d with tool call:\n%s", stepCount+1, string(actualCallJSON))
					funcName := "unknown"
					if toolCalls[0].Function.Name != "" {
						funcName = toolCalls[0].Function.Name
					}
					t.Fatalf("model made more tool calls than expected. Expected %d steps, got step %d with tool call: %s", len(expectedSteps), stepCount+1, funcName)
				}

				expectedStep := expectedSteps[stepCount]
				firstToolCall := toolCalls[0]
				funcCall := firstToolCall.Function
				if funcCall.Name == "" {
					t.Fatalf("step %d: tool call missing function name", stepCount+1)
				}

				funcName := funcCall.Name

				var args map[string]any
				if funcCall.Arguments != "" {
					if err := json.Unmarshal([]byte(funcCall.Arguments), &args); err != nil {
						t.Fatalf("step %d: failed to parse tool call arguments: %v", stepCount+1, err)
					}
				}

				if funcName != expectedStep.functionName {
					t.Logf("DIFF: Function name mismatch")
					t.Logf("  Expected: %s", expectedStep.functionName)
					t.Logf("  Got:      %s", funcName)
					t.Logf("  Arguments: %v", args)
					t.Fatalf("step %d: expected tool call '%s', got '%s'. Arguments: %v", stepCount+1, expectedStep.functionName, funcName, args)
				}

				if err := expectedStep.validateArgs(args); err != nil {
					expectedArgsForDisplay := map[string]any{}
					if expectedStep.functionName == "list_files" {
						expectedArgsForDisplay = map[string]any{"path": "/tmp"}
					} else if expectedStep.functionName == "read_file" {
						expectedArgsForDisplay = map[string]any{"path": "/tmp/config.json"}
					}
					if diff := cmp.Diff(expectedArgsForDisplay, args); diff != "" {
						t.Logf("DIFF: Arguments mismatch for function '%s' (-want +got):\n%s", expectedStep.functionName, diff)
					}
					t.Logf("Error: %v", err)
					t.Fatalf("step %d: tool call '%s' has invalid arguments: %v. Arguments: %v", stepCount+1, expectedStep.functionName, err, args)
				}

				toolMessage := openai.ToolMessage(expectedStep.result, toolCallID)
				messages = append(messages, assistantMessage, toolMessage)
				stepCount++
			}

			if stepCount < len(expectedSteps) {
				t.Fatalf("test exceeded max steps (%d) before completing all expected steps (%d)", maxSteps, len(expectedSteps))
			}
		})
	}
}

func TestOpenAIToolCallingBash(t *testing.T) {
	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
	defer cancel()

	var baseURL string
	var apiKey string
	var modelsToTest []string
	var cleanup func()

	if openaiBaseURL := os.Getenv("OPENAI_BASE_URL"); openaiBaseURL != "" {
		baseURL = openaiBaseURL
		apiKey = os.Getenv("OLLAMA_API_KEY")
		if apiKey == "" {
			t.Fatal("OPENAI_API_KEY must be set when using OPENAI_BASE_URL")
		}
		modelsToTest = cloudModels
		if modelsEnv := os.Getenv("OPENAI_TEST_MODELS"); modelsEnv != "" {
			modelsToTest = []string{modelsEnv}
		}
		cleanup = func() {}
	} else {
		_, testEndpoint, cleanupFn := InitServerConnection(ctx, t)
		cleanup = cleanupFn
		baseURL = fmt.Sprintf("http://%s/v1", testEndpoint)
		apiKey = "ollama"
		modelsToTest = append(agenticModels, cloudModels...)
	}
	t.Cleanup(cleanup)

	opts := []option.RequestOption{
		option.WithBaseURL(baseURL),
		option.WithAPIKey(apiKey),
	}
	openaiClient := openai.NewClient(opts...)

	var ollamaClient *api.Client
	if baseURL == "" {
		ollamaClient, _, _ = InitServerConnection(ctx, t)
	}

	for _, model := range modelsToTest {
		t.Run(model, func(t *testing.T) {
			testCtx := ctx
			if slices.Contains(cloudModels, model) {
				t.Parallel()
				// Create a new context for parallel tests to avoid cancellation
				var cancel context.CancelFunc
				testCtx, cancel = context.WithTimeout(context.Background(), 10*time.Minute)
				defer cancel()
			}
			if v, ok := minVRAM[model]; ok {
				skipUnderMinVRAM(t, v)
			}

			if ollamaClient != nil {
				if err := PullIfMissing(testCtx, ollamaClient, model); err != nil {
					t.Fatalf("pull failed %s", err)
				}
			}

			tools := []openai.ChatCompletionToolUnionParam{
				openai.ChatCompletionFunctionTool(shared.FunctionDefinitionParam{
					Name:        "execute_bash",
					Description: openai.Opt("Execute a bash/shell command and return stdout, stderr, and exit code"),
					Parameters: shared.FunctionParameters{
						"type": "object",
						"properties": map[string]any{
							"command": map[string]any{
								"type":        "string",
								"description": "The bash command to execute",
							},
							"working_directory": map[string]any{
								"type":        "string",
								"description": "Optional working directory for command execution",
							},
						},
						"required": []string{"command"},
					},
				}),
			}

			userContent := "List all files in /tmp directory"
			userMessage := openai.UserMessage(userContent)

			req := openai.ChatCompletionNewParams{
				Model:       shared.ChatModel(model),
				Messages:    []openai.ChatCompletionMessageParamUnion{userMessage},
				Tools:       tools,
				Temperature: openai.Opt(0.0),
			}

			completion, err := openaiClient.Chat.Completions.New(testCtx, req)
			if err != nil {
				t.Fatalf("chat failed: %v", err)
			}

			if len(completion.Choices) == 0 {
				t.Fatalf("no choices in response")
			}

			choice := completion.Choices[0]
			message := choice.Message

			if len(message.ToolCalls) == 0 {
				finishReason := choice.FinishReason
				if finishReason == "" {
					finishReason = "unknown"
				}
				content := message.Content
				if content == "" {
					content = "(empty)"
				}
				t.Logf("User prompt: %q", userContent)
				t.Logf("Finish reason: %s", finishReason)
				t.Logf("Message content: %q", content)
				t.Logf("Tool calls count: %d", len(message.ToolCalls))
				if messageJSON, err := json.MarshalIndent(message, "", "  "); err == nil {
					t.Logf("Full message: %s", string(messageJSON))
				}
				t.Fatalf("expected at least one tool call, got none. Finish reason: %s, Content: %q", finishReason, content)
			}

			firstToolCall := message.ToolCalls[0]
			if firstToolCall.Function.Name != "execute_bash" {
				t.Fatalf("unexpected tool called: got %q want %q", firstToolCall.Function.Name, "execute_bash")
			}

			var args map[string]any
			if firstToolCall.Function.Arguments != "" {
				if err := json.Unmarshal([]byte(firstToolCall.Function.Arguments), &args); err != nil {
					t.Fatalf("failed to parse tool call arguments: %v", err)
				}
			}

			command, ok := args["command"]
			if !ok {
				t.Fatalf("expected tool arguments to include 'command', got: %v", args)
			}

			cmdStr, ok := command.(string)
			if !ok {
				t.Fatalf("expected command to be string, got %T", command)
			}

			if err := validateBashCommand(cmdStr, "ls", []string{"/tmp"}); err != nil {
				t.Errorf("bash command validation failed: %v. Command: %q", err, cmdStr)
			}
		})
	}
}

func TestOpenAIToolCallingBashMultiStep(t *testing.T) {
	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
	defer cancel()

	var baseURL string
	var apiKey string
	var modelsToTest []string
	var cleanup func()

	if openaiBaseURL := os.Getenv("OPENAI_BASE_URL"); openaiBaseURL != "" {
		baseURL = openaiBaseURL
		apiKey = os.Getenv("OLLAMA_API_KEY")
		if apiKey == "" {
			t.Fatal("OPENAI_API_KEY must be set when using OPENAI_BASE_URL")
		}
		modelsToTest = cloudModels
		if modelsEnv := os.Getenv("OPENAI_TEST_MODELS"); modelsEnv != "" {
			modelsToTest = []string{modelsEnv}
		}
		cleanup = func() {}
	} else {
		_, testEndpoint, cleanupFn := InitServerConnection(ctx, t)
		cleanup = cleanupFn
		baseURL = fmt.Sprintf("http://%s/v1", testEndpoint)
		apiKey = "ollama"
		modelsToTest = append(agenticModels, cloudModels...)
	}
	t.Cleanup(cleanup)

	opts := []option.RequestOption{
		option.WithBaseURL(baseURL),
		option.WithAPIKey(apiKey),
	}
	openaiClient := openai.NewClient(opts...)

	var ollamaClient *api.Client
	if baseURL == "" {
		ollamaClient, _, _ = InitServerConnection(ctx, t)
	}

	for _, model := range modelsToTest {
		t.Run(model, func(t *testing.T) {
			testCtx := ctx
			if slices.Contains(cloudModels, model) {
				t.Parallel()
				// Create a new context for parallel tests to avoid cancellation
				var cancel context.CancelFunc
				testCtx, cancel = context.WithTimeout(context.Background(), 10*time.Minute)
				defer cancel()
			}
			if v, ok := minVRAM[model]; ok {
				skipUnderMinVRAM(t, v)
			}

			if ollamaClient != nil {
				if err := PullIfMissing(testCtx, ollamaClient, model); err != nil {
					t.Fatalf("pull failed %s", err)
				}
			}

			tools := []openai.ChatCompletionToolUnionParam{
				openai.ChatCompletionFunctionTool(shared.FunctionDefinitionParam{
					Name:        "execute_bash",
					Description: openai.Opt("Execute a bash/shell command and return stdout, stderr, and exit code"),
					Parameters: shared.FunctionParameters{
						"type": "object",
						"properties": map[string]any{
							"command": map[string]any{
								"type":        "string",
								"description": "The bash command to execute",
							},
							"working_directory": map[string]any{
								"type":        "string",
								"description": "Optional working directory for command execution",
							},
						},
						"required": []string{"command"},
					},
				}),
			}

			userContent := "Find all log files in /tmp. use the bash tool"
			userMessage := openai.UserMessage(userContent)

			req := openai.ChatCompletionNewParams{
				Model:       shared.ChatModel(model),
				Messages:    []openai.ChatCompletionMessageParamUnion{userMessage},
				Tools:       tools,
				Temperature: openai.Opt(0.0),
			}

			completion, err := openaiClient.Chat.Completions.New(testCtx, req)
			if err != nil {
				t.Fatalf("chat failed: %v", err)
			}

			if len(completion.Choices) == 0 {
				t.Fatalf("no choices in response")
			}

			choice := completion.Choices[0]
			message := choice.Message

			if len(message.ToolCalls) == 0 {
				finishReason := choice.FinishReason
				if finishReason == "" {
					finishReason = "unknown"
				}
				content := message.Content
				if content == "" {
					content = "(empty)"
				}
				t.Logf("User prompt: %q", userContent)
				t.Logf("Finish reason: %s", finishReason)
				t.Logf("Message content: %q", content)
				t.Logf("Tool calls count: %d", len(message.ToolCalls))
				if messageJSON, err := json.MarshalIndent(message, "", "  "); err == nil {
					t.Logf("Full message: %s", string(messageJSON))
				}
				t.Fatalf("expected at least one tool call, got none. Finish reason: %s, Content: %q", finishReason, content)
			}

			firstToolCall := message.ToolCalls[0]
			if firstToolCall.Function.Name != "execute_bash" {
				t.Fatalf("unexpected tool called: got %q want %q", firstToolCall.Function.Name, "execute_bash")
			}

			var args map[string]any
			if firstToolCall.Function.Arguments != "" {
				if err := json.Unmarshal([]byte(firstToolCall.Function.Arguments), &args); err != nil {
					t.Fatalf("failed to parse tool call arguments: %v", err)
				}
			}

			command, ok := args["command"]
			if !ok {
				t.Fatalf("expected tool arguments to include 'command', got: %v", args)
			}

			cmdStr, ok := command.(string)
			if !ok {
				t.Fatalf("expected command to be string, got %T", command)
			}

			if err := validateBashCommandFlexible(cmdStr, []string{"find", "ls"}, []string{"/tmp"}); err != nil {
				t.Errorf("bash command validation failed: %v. Command: %q", err, cmdStr)
			}
		})
	}
}

func TestOpenAIToolCallingBashAmpersand(t *testing.T) {
	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
	defer cancel()

	var baseURL string
	var apiKey string
	var modelsToTest []string
	var cleanup func()

	if openaiBaseURL := os.Getenv("OPENAI_BASE_URL"); openaiBaseURL != "" {
		baseURL = openaiBaseURL
		apiKey = os.Getenv("OLLAMA_API_KEY")
		if apiKey == "" {
			t.Fatal("OPENAI_API_KEY must be set when using OPENAI_BASE_URL")
		}
		modelsToTest = cloudModels
		if modelsEnv := os.Getenv("OPENAI_TEST_MODELS"); modelsEnv != "" {
			modelsToTest = []string{modelsEnv}
		}
		cleanup = func() {}
	} else {
		_, testEndpoint, cleanupFn := InitServerConnection(ctx, t)
		cleanup = cleanupFn
		baseURL = fmt.Sprintf("http://%s/v1", testEndpoint)
		apiKey = "ollama"
		modelsToTest = append(agenticModels, cloudModels...)
	}
	t.Cleanup(cleanup)

	opts := []option.RequestOption{
		option.WithBaseURL(baseURL),
		option.WithAPIKey(apiKey),
	}
	openaiClient := openai.NewClient(opts...)

	var ollamaClient *api.Client
	if baseURL == "" {
		ollamaClient, _, _ = InitServerConnection(ctx, t)
	}

	for _, model := range modelsToTest {
		t.Run(model, func(t *testing.T) {
			testCtx := ctx
			if slices.Contains(cloudModels, model) {
				t.Parallel()
				// Create a new context for parallel tests to avoid cancellation
				var cancel context.CancelFunc
				testCtx, cancel = context.WithTimeout(context.Background(), 10*time.Minute)
				defer cancel()
			}
			if v, ok := minVRAM[model]; ok {
				skipUnderMinVRAM(t, v)
			}

			if ollamaClient != nil {
				if err := PullIfMissing(testCtx, ollamaClient, model); err != nil {
					t.Fatalf("pull failed %s", err)
				}
			}

			tools := []openai.ChatCompletionToolUnionParam{
				openai.ChatCompletionFunctionTool(shared.FunctionDefinitionParam{
					Name:        "execute_bash",
					Description: openai.Opt("Execute a bash/shell command and return stdout, stderr, and exit code"),
					Parameters: shared.FunctionParameters{
						"type": "object",
						"properties": map[string]any{
							"command": map[string]any{
								"type":        "string",
								"description": "The bash command to execute",
							},
							"working_directory": map[string]any{
								"type":        "string",
								"description": "Optional working directory for command execution",
							},
						},
						"required": []string{"command"},
					},
				}),
			}

			userContent := "Echo the text 'A & B' using bash with the bash tool"
			userMessage := openai.UserMessage(userContent)

			req := openai.ChatCompletionNewParams{
				Model:       shared.ChatModel(model),
				Messages:    []openai.ChatCompletionMessageParamUnion{userMessage},
				Tools:       tools,
				Temperature: openai.Opt(0.0),
			}

			completion, err := openaiClient.Chat.Completions.New(testCtx, req)
			if err != nil {
				t.Fatalf("chat failed: %v", err)
			}

			if len(completion.Choices) == 0 {
				t.Fatalf("no choices in response")
			}

			choice := completion.Choices[0]
			message := choice.Message

			if len(message.ToolCalls) == 0 {
				finishReason := choice.FinishReason
				if finishReason == "" {
					finishReason = "unknown"
				}
				content := message.Content
				if content == "" {
					content = "(empty)"
				}
				t.Logf("User prompt: %q", userContent)
				t.Logf("Finish reason: %s", finishReason)
				t.Logf("Message content: %q", content)
				t.Logf("Tool calls count: %d", len(message.ToolCalls))
				if messageJSON, err := json.MarshalIndent(message, "", "  "); err == nil {
					t.Logf("Full message: %s", string(messageJSON))
				}
				t.Fatalf("expected at least one tool call, got none. Finish reason: %s, Content: %q", finishReason, content)
			}

			firstToolCall := message.ToolCalls[0]
			if firstToolCall.Function.Name != "execute_bash" {
				t.Fatalf("unexpected tool called: got %q want %q", firstToolCall.Function.Name, "execute_bash")
			}

			var args map[string]any
			if firstToolCall.Function.Arguments != "" {
				if err := json.Unmarshal([]byte(firstToolCall.Function.Arguments), &args); err != nil {
					t.Fatalf("failed to parse tool call arguments: %v", err)
				}
			}

			command, ok := args["command"]
			if !ok {
				t.Fatalf("expected tool arguments to include 'command', got: %v", args)
			}

			cmdStr, ok := command.(string)
			if !ok {
				t.Fatalf("expected command to be string, got %T", command)
			}

			if !strings.Contains(cmdStr, "&") {
				t.Errorf("expected command to contain '&' character for parsing test, got: %q", cmdStr)
			}

			if !strings.Contains(cmdStr, "echo") && !strings.Contains(cmdStr, "printf") {
				t.Errorf("expected command to use echo or printf, got: %q", cmdStr)
			}
		})
	}
}