From 00c96eed1f47493d0b9188f691959710cb264644 Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Fri, 14 Nov 2025 11:06:01 -0800 Subject: [PATCH] rm cloud test --- integration/README.md | 57 --- integration/tools_openai_test.go | 834 ------------------------------- 2 files changed, 891 deletions(-) delete mode 100644 integration/tools_openai_test.go diff --git a/integration/README.md b/integration/README.md index 5fb171167..0e24ea452 100644 --- a/integration/README.md +++ b/integration/README.md @@ -13,60 +13,3 @@ The integration tests have 2 modes of operating. > [!IMPORTANT] > Before running the tests locally without the "test existing" setting, compile ollama from the top of the source tree `go build .` in addition to GPU support with cmake if applicable on your platform. The integration tests expect to find an ollama binary at the top of the tree. - -Many tests use a default small model suitable to run on many systems. You can override this default model by setting `OLLAMA_TEST_DEFAULT_MODEL` - -## Tool Calling Tests - -The tool calling tests are split into two files: - -- **`tools_test.go`** - Tests using the native Ollama API (`api.Tool`) -- **`tools_openai_test.go`** - Tests using the OpenAI-compatible API format - -### Running Tool Calling Tests - -Run all tool calling tests: -```bash -go test -tags=integration -v -run Test.*Tool.* ./integration -``` - -Run only OpenAI-compatible tests: -```bash -go test -tags=integration -v -run TestOpenAI ./integration -``` - -Run only native API tests: -```bash -go test -tags=integration -v -run TestAPIToolCalling ./integration -``` - -### Parallel Execution - -The OpenAI-compatible tests (`tools_openai_test.go`) support parallel execution for cloud models. Run with parallel execution: -```bash -go test -tags=integration -v -run TestOpenAI -parallel 3 ./integration -``` - -Cloud models (models ending with `-cloud`) will run in parallel, while local models run sequentially. This significantly speeds up test execution when testing against external endpoints. - -### Testing Specific Models - -To test a specific model, set the `OPENAI_TEST_MODELS` environment variable: -```bash -OPENAI_TEST_MODELS="gpt-oss:120b-cloud" go test -tags=integration -v -run TestOpenAI ./integration -``` - -### External Endpoints - -To test against an external OpenAI-compatible endpoint (e.g., Ollama Cloud): -```bash -OPENAI_BASE_URL="https://ollama.com/v1" OLLAMA_API_KEY="your-key" go test -tags=integration -v -run TestOpenAI ./integration -``` - -### Environment Variables - -The tool calling tests support the following environment variables: - -- **`OPENAI_BASE_URL`** - When set, tests will run against an external OpenAI-compatible endpoint instead of a local server. If set, `OLLAMA_API_KEY` must also be provided. -- **`OLLAMA_API_KEY`** - API key for authenticating with external endpoints (required when `OPENAI_BASE_URL` is set). -- **`OPENAI_TEST_MODELS`** - Override the default model list and test only the specified model(s). Can be a single model or comma-separated list. \ No newline at end of file diff --git a/integration/tools_openai_test.go b/integration/tools_openai_test.go deleted file mode 100644 index ebabc2bc3..000000000 --- a/integration/tools_openai_test.go +++ /dev/null @@ -1,834 +0,0 @@ -//go:build integration - -package integration - -import ( - "context" - "encoding/json" - "fmt" - "os" - "slices" - "strings" - "testing" - "time" - - "github.com/google/go-cmp/cmp" - "github.com/ollama/ollama/api" - "github.com/openai/openai-go/v3" - "github.com/openai/openai-go/v3/option" - "github.com/openai/openai-go/v3/shared" -) - -var agenticModels = []string{ - "gpt-oss:20b", - "gpt-oss:120b", - "qwen3-coder:30b", - "qwen3:4b", - "qwen3:8b", -} - -var cloudModels = []string{ - "gpt-oss:120b-cloud", - "gpt-oss:20b-cloud", - "qwen3-vl:235b-cloud", - "qwen3-coder:480b-cloud", - "kimi-k2-thinking:cloud", - "kimi-k2:1t-cloud", -} - -// validateBashCommand validates a bash command with flexible matching -// It checks that the core command matches and required arguments are present -func validateBashCommand(cmd string, expectedCmd string, requiredArgs []string) error { - parts := strings.Fields(cmd) - if len(parts) == 0 { - return fmt.Errorf("empty command") - } - - actualCmd := parts[0] - if actualCmd != expectedCmd { - return fmt.Errorf("expected command '%s', got '%s'", expectedCmd, actualCmd) - } - - cmdStr := strings.Join(parts[1:], " ") - for _, arg := range requiredArgs { - if !strings.Contains(cmdStr, arg) { - return fmt.Errorf("missing required argument: %s", arg) - } - } - - return nil -} - -// validateBashCommandFlexible validates a bash command with flexible matching -// It accepts alternative command forms (e.g., find vs ls) and checks required patterns -func validateBashCommandFlexible(cmd string, allowedCommands []string, requiredPatterns []string) error { - parts := strings.Fields(cmd) - if len(parts) == 0 { - return fmt.Errorf("empty command") - } - - actualCmd := parts[0] - commandMatched := false - for _, allowedCmd := range allowedCommands { - if actualCmd == allowedCmd { - commandMatched = true - break - } - } - if !commandMatched { - return fmt.Errorf("expected one of commands %v, got '%s'", allowedCommands, actualCmd) - } - - cmdStr := strings.ToLower(strings.Join(parts[1:], " ")) - for _, pattern := range requiredPatterns { - if !strings.Contains(cmdStr, strings.ToLower(pattern)) { - return fmt.Errorf("missing required pattern: %s", pattern) - } - } - - return nil -} - -func TestOpenAIToolCallingMultiStep(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - - var baseURL string - var apiKey string - var modelsToTest []string - var cleanup func() - - if openaiBaseURL := os.Getenv("OPENAI_BASE_URL"); openaiBaseURL != "" { - baseURL = openaiBaseURL - apiKey = os.Getenv("OLLAMA_API_KEY") - if apiKey == "" { - t.Fatal("OPENAI_API_KEY must be set when using OPENAI_BASE_URL") - } - - // only test cloud models unless OPENAI_TEST_MODELS is set - modelsToTest = cloudModels - if modelsEnv := os.Getenv("OPENAI_TEST_MODELS"); modelsEnv != "" { - modelsToTest = []string{modelsEnv} - } - cleanup = func() {} - } else { - _, testEndpoint, cleanupFn := InitServerConnection(ctx, t) - cleanup = cleanupFn - baseURL = fmt.Sprintf("http://%s/v1", testEndpoint) - apiKey = "ollama" - modelsToTest = append(agenticModels, cloudModels...) - } - t.Cleanup(cleanup) - - opts := []option.RequestOption{ - option.WithBaseURL(baseURL), - option.WithAPIKey(apiKey), - } - openaiClient := openai.NewClient(opts...) - - var ollamaClient *api.Client - if baseURL == "" { - ollamaClient, _, _ = InitServerConnection(ctx, t) - } - - for _, model := range modelsToTest { - t.Run(model, func(t *testing.T) { - testCtx := ctx - if slices.Contains(cloudModels, model) { - t.Parallel() - // Create a new context for parallel tests to avoid cancellation - var cancel context.CancelFunc - testCtx, cancel = context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - } - if v, ok := minVRAM[model]; ok { - skipUnderMinVRAM(t, v) - } - - if ollamaClient != nil { - if err := PullIfMissing(testCtx, ollamaClient, model); err != nil { - t.Fatalf("pull failed %s", err) - } - } - - tools := []openai.ChatCompletionToolUnionParam{ - openai.ChatCompletionFunctionTool(shared.FunctionDefinitionParam{ - Name: "list_files", - Description: openai.Opt("List all files in a directory"), - Parameters: shared.FunctionParameters{ - "type": "object", - "properties": map[string]any{ - "path": map[string]any{ - "type": "string", - "description": "The directory path to list files from", - }, - }, - "required": []string{"path"}, - }, - }), - openai.ChatCompletionFunctionTool(shared.FunctionDefinitionParam{ - Name: "read_file", - Description: openai.Opt("Read the contents of a file"), - Parameters: shared.FunctionParameters{ - "type": "object", - "properties": map[string]any{ - "path": map[string]any{ - "type": "string", - "description": "The file path to read", - }, - }, - "required": []string{"path"}, - }, - }), - } - - mockFileContents := "line 1\nline 2\nline 3\nline 4\nline 5" - userContent := "Find the file named 'config.json' in /tmp and read its contents" - userMessage := openai.UserMessage(userContent) - - messages := []openai.ChatCompletionMessageParamUnion{ - userMessage, - } - stepCount := 0 - maxSteps := 10 - - normalizePath := func(path string) string { - if path != "" && path[0] != '/' { - return "/" + path - } - return path - } - - expectedSteps := []struct { - functionName string - validateArgs func(map[string]any) error - result string - }{ - { - functionName: "list_files", - validateArgs: func(args map[string]any) error { - path, ok := args["path"] - if !ok { - return fmt.Errorf("missing required argument 'path'") - } - pathStr, ok := path.(string) - if !ok { - return fmt.Errorf("expected 'path' to be string, got %T", path) - } - normalizedPath := normalizePath(pathStr) - if normalizedPath != "/tmp" { - return fmt.Errorf("expected list_files(\"/tmp\"), got list_files(%q)", pathStr) - } - return nil - }, - result: `["config.json", "other.txt", "data.log"]`, - }, - { - functionName: "read_file", - validateArgs: func(args map[string]any) error { - path, ok := args["path"] - if !ok { - return fmt.Errorf("missing required argument 'path'") - } - pathStr, ok := path.(string) - if !ok { - return fmt.Errorf("expected 'path' to be string, got %T", path) - } - normalizedPath := normalizePath(pathStr) - if normalizedPath != "/tmp/config.json" { - return fmt.Errorf("expected read_file(\"/tmp/config.json\"), got read_file(%q)", pathStr) - } - return nil - }, - result: mockFileContents, - }, - } - - for stepCount < maxSteps { - req := openai.ChatCompletionNewParams{ - Model: shared.ChatModel(model), - Messages: messages, - Tools: tools, - Temperature: openai.Opt(0.0), - } - - completion, err := openaiClient.Chat.Completions.New(testCtx, req) - if err != nil { - t.Fatalf("step %d chat failed: %v", stepCount+1, err) - } - - if len(completion.Choices) == 0 { - t.Fatalf("step %d: no choices in response", stepCount+1) - } - - choice := completion.Choices[0] - message := choice.Message - - toolCalls := message.ToolCalls - content := message.Content - gotToolCall := len(toolCalls) > 0 - var toolCallID string - if gotToolCall && toolCalls[0].ID != "" { - toolCallID = toolCalls[0].ID - } - - var assistantMessage openai.ChatCompletionMessageParamUnion - if gotToolCall { - toolCallsJSON, err := json.Marshal(toolCalls) - if err != nil { - t.Fatalf("step %d: failed to marshal tool calls: %v", stepCount+1, err) - } - var toolCallParams []openai.ChatCompletionMessageToolCallUnionParam - if err := json.Unmarshal(toolCallsJSON, &toolCallParams); err != nil { - t.Fatalf("step %d: failed to unmarshal tool calls: %v", stepCount+1, err) - } - contentUnion := openai.ChatCompletionAssistantMessageParamContentUnion{ - OfString: openai.Opt(content), - } - assistantMsg := openai.ChatCompletionAssistantMessageParam{ - Content: contentUnion, - ToolCalls: toolCallParams, - } - assistantMessage = openai.ChatCompletionMessageParamUnion{ - OfAssistant: &assistantMsg, - } - } else { - assistantMessage = openai.AssistantMessage(content) - } - - if !gotToolCall && content != "" { - if stepCount < len(expectedSteps) { - t.Logf("EXPECTED: Step %d should call '%s'", stepCount+1, expectedSteps[stepCount].functionName) - t.Logf("ACTUAL: Model stopped with content: %s", content) - t.Fatalf("model stopped making tool calls after %d steps, expected %d steps. Final response: %s", stepCount, len(expectedSteps), content) - } - return - } - - if !gotToolCall || len(toolCalls) == 0 { - if stepCount < len(expectedSteps) { - expectedStep := expectedSteps[stepCount] - t.Logf("EXPECTED: Step %d should call '%s'", stepCount+1, expectedStep.functionName) - t.Logf("ACTUAL: No tool call, got content: %s", content) - t.Fatalf("step %d: expected tool call but got none. Response: %s", stepCount+1, content) - } - return - } - - if stepCount >= len(expectedSteps) { - actualCallJSON, _ := json.MarshalIndent(toolCalls[0], "", " ") - t.Logf("EXPECTED: All %d steps completed", len(expectedSteps)) - t.Logf("ACTUAL: Extra step %d with tool call:\n%s", stepCount+1, string(actualCallJSON)) - funcName := "unknown" - if toolCalls[0].Function.Name != "" { - funcName = toolCalls[0].Function.Name - } - t.Fatalf("model made more tool calls than expected. Expected %d steps, got step %d with tool call: %s", len(expectedSteps), stepCount+1, funcName) - } - - expectedStep := expectedSteps[stepCount] - firstToolCall := toolCalls[0] - funcCall := firstToolCall.Function - if funcCall.Name == "" { - t.Fatalf("step %d: tool call missing function name", stepCount+1) - } - - funcName := funcCall.Name - - var args map[string]any - if funcCall.Arguments != "" { - if err := json.Unmarshal([]byte(funcCall.Arguments), &args); err != nil { - t.Fatalf("step %d: failed to parse tool call arguments: %v", stepCount+1, err) - } - } - - if funcName != expectedStep.functionName { - t.Logf("DIFF: Function name mismatch") - t.Logf(" Expected: %s", expectedStep.functionName) - t.Logf(" Got: %s", funcName) - t.Logf(" Arguments: %v", args) - t.Fatalf("step %d: expected tool call '%s', got '%s'. Arguments: %v", stepCount+1, expectedStep.functionName, funcName, args) - } - - if err := expectedStep.validateArgs(args); err != nil { - expectedArgsForDisplay := map[string]any{} - if expectedStep.functionName == "list_files" { - expectedArgsForDisplay = map[string]any{"path": "/tmp"} - } else if expectedStep.functionName == "read_file" { - expectedArgsForDisplay = map[string]any{"path": "/tmp/config.json"} - } - if diff := cmp.Diff(expectedArgsForDisplay, args); diff != "" { - t.Logf("DIFF: Arguments mismatch for function '%s' (-want +got):\n%s", expectedStep.functionName, diff) - } - t.Logf("Error: %v", err) - t.Fatalf("step %d: tool call '%s' has invalid arguments: %v. Arguments: %v", stepCount+1, expectedStep.functionName, err, args) - } - - toolMessage := openai.ToolMessage(expectedStep.result, toolCallID) - messages = append(messages, assistantMessage, toolMessage) - stepCount++ - } - - if stepCount < len(expectedSteps) { - t.Fatalf("test exceeded max steps (%d) before completing all expected steps (%d)", maxSteps, len(expectedSteps)) - } - }) - } -} - -func TestOpenAIToolCallingBash(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - - var baseURL string - var apiKey string - var modelsToTest []string - var cleanup func() - - if openaiBaseURL := os.Getenv("OPENAI_BASE_URL"); openaiBaseURL != "" { - baseURL = openaiBaseURL - apiKey = os.Getenv("OLLAMA_API_KEY") - if apiKey == "" { - t.Fatal("OPENAI_API_KEY must be set when using OPENAI_BASE_URL") - } - modelsToTest = cloudModels - if modelsEnv := os.Getenv("OPENAI_TEST_MODELS"); modelsEnv != "" { - modelsToTest = []string{modelsEnv} - } - cleanup = func() {} - } else { - _, testEndpoint, cleanupFn := InitServerConnection(ctx, t) - cleanup = cleanupFn - baseURL = fmt.Sprintf("http://%s/v1", testEndpoint) - apiKey = "ollama" - modelsToTest = append(agenticModels, cloudModels...) - } - t.Cleanup(cleanup) - - opts := []option.RequestOption{ - option.WithBaseURL(baseURL), - option.WithAPIKey(apiKey), - } - openaiClient := openai.NewClient(opts...) - - var ollamaClient *api.Client - if baseURL == "" { - ollamaClient, _, _ = InitServerConnection(ctx, t) - } - - for _, model := range modelsToTest { - t.Run(model, func(t *testing.T) { - testCtx := ctx - if slices.Contains(cloudModels, model) { - t.Parallel() - // Create a new context for parallel tests to avoid cancellation - var cancel context.CancelFunc - testCtx, cancel = context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - } - if v, ok := minVRAM[model]; ok { - skipUnderMinVRAM(t, v) - } - - if ollamaClient != nil { - if err := PullIfMissing(testCtx, ollamaClient, model); err != nil { - t.Fatalf("pull failed %s", err) - } - } - - tools := []openai.ChatCompletionToolUnionParam{ - openai.ChatCompletionFunctionTool(shared.FunctionDefinitionParam{ - Name: "execute_bash", - Description: openai.Opt("Execute a bash/shell command and return stdout, stderr, and exit code"), - Parameters: shared.FunctionParameters{ - "type": "object", - "properties": map[string]any{ - "command": map[string]any{ - "type": "string", - "description": "The bash command to execute", - }, - "working_directory": map[string]any{ - "type": "string", - "description": "Optional working directory for command execution", - }, - }, - "required": []string{"command"}, - }, - }), - } - - userContent := "List all files in /tmp directory" - userMessage := openai.UserMessage(userContent) - - req := openai.ChatCompletionNewParams{ - Model: shared.ChatModel(model), - Messages: []openai.ChatCompletionMessageParamUnion{userMessage}, - Tools: tools, - Temperature: openai.Opt(0.0), - } - - completion, err := openaiClient.Chat.Completions.New(testCtx, req) - if err != nil { - t.Fatalf("chat failed: %v", err) - } - - if len(completion.Choices) == 0 { - t.Fatalf("no choices in response") - } - - choice := completion.Choices[0] - message := choice.Message - - if len(message.ToolCalls) == 0 { - finishReason := choice.FinishReason - if finishReason == "" { - finishReason = "unknown" - } - content := message.Content - if content == "" { - content = "(empty)" - } - t.Logf("User prompt: %q", userContent) - t.Logf("Finish reason: %s", finishReason) - t.Logf("Message content: %q", content) - t.Logf("Tool calls count: %d", len(message.ToolCalls)) - if messageJSON, err := json.MarshalIndent(message, "", " "); err == nil { - t.Logf("Full message: %s", string(messageJSON)) - } - t.Fatalf("expected at least one tool call, got none. Finish reason: %s, Content: %q", finishReason, content) - } - - firstToolCall := message.ToolCalls[0] - if firstToolCall.Function.Name != "execute_bash" { - t.Fatalf("unexpected tool called: got %q want %q", firstToolCall.Function.Name, "execute_bash") - } - - var args map[string]any - if firstToolCall.Function.Arguments != "" { - if err := json.Unmarshal([]byte(firstToolCall.Function.Arguments), &args); err != nil { - t.Fatalf("failed to parse tool call arguments: %v", err) - } - } - - command, ok := args["command"] - if !ok { - t.Fatalf("expected tool arguments to include 'command', got: %v", args) - } - - cmdStr, ok := command.(string) - if !ok { - t.Fatalf("expected command to be string, got %T", command) - } - - if err := validateBashCommand(cmdStr, "ls", []string{"/tmp"}); err != nil { - t.Errorf("bash command validation failed: %v. Command: %q", err, cmdStr) - } - }) - } -} - -func TestOpenAIToolCallingBashMultiStep(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - - var baseURL string - var apiKey string - var modelsToTest []string - var cleanup func() - - if openaiBaseURL := os.Getenv("OPENAI_BASE_URL"); openaiBaseURL != "" { - baseURL = openaiBaseURL - apiKey = os.Getenv("OLLAMA_API_KEY") - if apiKey == "" { - t.Fatal("OPENAI_API_KEY must be set when using OPENAI_BASE_URL") - } - modelsToTest = cloudModels - if modelsEnv := os.Getenv("OPENAI_TEST_MODELS"); modelsEnv != "" { - modelsToTest = []string{modelsEnv} - } - cleanup = func() {} - } else { - _, testEndpoint, cleanupFn := InitServerConnection(ctx, t) - cleanup = cleanupFn - baseURL = fmt.Sprintf("http://%s/v1", testEndpoint) - apiKey = "ollama" - modelsToTest = append(agenticModels, cloudModels...) - } - t.Cleanup(cleanup) - - opts := []option.RequestOption{ - option.WithBaseURL(baseURL), - option.WithAPIKey(apiKey), - } - openaiClient := openai.NewClient(opts...) - - var ollamaClient *api.Client - if baseURL == "" { - ollamaClient, _, _ = InitServerConnection(ctx, t) - } - - for _, model := range modelsToTest { - t.Run(model, func(t *testing.T) { - testCtx := ctx - if slices.Contains(cloudModels, model) { - t.Parallel() - // Create a new context for parallel tests to avoid cancellation - var cancel context.CancelFunc - testCtx, cancel = context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - } - if v, ok := minVRAM[model]; ok { - skipUnderMinVRAM(t, v) - } - - if ollamaClient != nil { - if err := PullIfMissing(testCtx, ollamaClient, model); err != nil { - t.Fatalf("pull failed %s", err) - } - } - - tools := []openai.ChatCompletionToolUnionParam{ - openai.ChatCompletionFunctionTool(shared.FunctionDefinitionParam{ - Name: "execute_bash", - Description: openai.Opt("Execute a bash/shell command and return stdout, stderr, and exit code"), - Parameters: shared.FunctionParameters{ - "type": "object", - "properties": map[string]any{ - "command": map[string]any{ - "type": "string", - "description": "The bash command to execute", - }, - "working_directory": map[string]any{ - "type": "string", - "description": "Optional working directory for command execution", - }, - }, - "required": []string{"command"}, - }, - }), - } - - userContent := "Find all log files in /tmp. use the bash tool" - userMessage := openai.UserMessage(userContent) - - req := openai.ChatCompletionNewParams{ - Model: shared.ChatModel(model), - Messages: []openai.ChatCompletionMessageParamUnion{userMessage}, - Tools: tools, - Temperature: openai.Opt(0.0), - } - - completion, err := openaiClient.Chat.Completions.New(testCtx, req) - if err != nil { - t.Fatalf("chat failed: %v", err) - } - - if len(completion.Choices) == 0 { - t.Fatalf("no choices in response") - } - - choice := completion.Choices[0] - message := choice.Message - - if len(message.ToolCalls) == 0 { - finishReason := choice.FinishReason - if finishReason == "" { - finishReason = "unknown" - } - content := message.Content - if content == "" { - content = "(empty)" - } - t.Logf("User prompt: %q", userContent) - t.Logf("Finish reason: %s", finishReason) - t.Logf("Message content: %q", content) - t.Logf("Tool calls count: %d", len(message.ToolCalls)) - if messageJSON, err := json.MarshalIndent(message, "", " "); err == nil { - t.Logf("Full message: %s", string(messageJSON)) - } - t.Fatalf("expected at least one tool call, got none. Finish reason: %s, Content: %q", finishReason, content) - } - - firstToolCall := message.ToolCalls[0] - if firstToolCall.Function.Name != "execute_bash" { - t.Fatalf("unexpected tool called: got %q want %q", firstToolCall.Function.Name, "execute_bash") - } - - var args map[string]any - if firstToolCall.Function.Arguments != "" { - if err := json.Unmarshal([]byte(firstToolCall.Function.Arguments), &args); err != nil { - t.Fatalf("failed to parse tool call arguments: %v", err) - } - } - - command, ok := args["command"] - if !ok { - t.Fatalf("expected tool arguments to include 'command', got: %v", args) - } - - cmdStr, ok := command.(string) - if !ok { - t.Fatalf("expected command to be string, got %T", command) - } - - if err := validateBashCommandFlexible(cmdStr, []string{"find", "ls"}, []string{"/tmp"}); err != nil { - t.Errorf("bash command validation failed: %v. Command: %q", err, cmdStr) - } - }) - } -} - -func TestOpenAIToolCallingBashAmpersand(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - - var baseURL string - var apiKey string - var modelsToTest []string - var cleanup func() - - if openaiBaseURL := os.Getenv("OPENAI_BASE_URL"); openaiBaseURL != "" { - baseURL = openaiBaseURL - apiKey = os.Getenv("OLLAMA_API_KEY") - if apiKey == "" { - t.Fatal("OPENAI_API_KEY must be set when using OPENAI_BASE_URL") - } - modelsToTest = cloudModels - if modelsEnv := os.Getenv("OPENAI_TEST_MODELS"); modelsEnv != "" { - modelsToTest = []string{modelsEnv} - } - cleanup = func() {} - } else { - _, testEndpoint, cleanupFn := InitServerConnection(ctx, t) - cleanup = cleanupFn - baseURL = fmt.Sprintf("http://%s/v1", testEndpoint) - apiKey = "ollama" - modelsToTest = append(agenticModels, cloudModels...) - } - t.Cleanup(cleanup) - - opts := []option.RequestOption{ - option.WithBaseURL(baseURL), - option.WithAPIKey(apiKey), - } - openaiClient := openai.NewClient(opts...) - - var ollamaClient *api.Client - if baseURL == "" { - ollamaClient, _, _ = InitServerConnection(ctx, t) - } - - for _, model := range modelsToTest { - t.Run(model, func(t *testing.T) { - testCtx := ctx - if slices.Contains(cloudModels, model) { - t.Parallel() - // Create a new context for parallel tests to avoid cancellation - var cancel context.CancelFunc - testCtx, cancel = context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - } - if v, ok := minVRAM[model]; ok { - skipUnderMinVRAM(t, v) - } - - if ollamaClient != nil { - if err := PullIfMissing(testCtx, ollamaClient, model); err != nil { - t.Fatalf("pull failed %s", err) - } - } - - tools := []openai.ChatCompletionToolUnionParam{ - openai.ChatCompletionFunctionTool(shared.FunctionDefinitionParam{ - Name: "execute_bash", - Description: openai.Opt("Execute a bash/shell command and return stdout, stderr, and exit code"), - Parameters: shared.FunctionParameters{ - "type": "object", - "properties": map[string]any{ - "command": map[string]any{ - "type": "string", - "description": "The bash command to execute", - }, - "working_directory": map[string]any{ - "type": "string", - "description": "Optional working directory for command execution", - }, - }, - "required": []string{"command"}, - }, - }), - } - - userContent := "Echo the text 'A & B' using bash with the bash tool" - userMessage := openai.UserMessage(userContent) - - req := openai.ChatCompletionNewParams{ - Model: shared.ChatModel(model), - Messages: []openai.ChatCompletionMessageParamUnion{userMessage}, - Tools: tools, - Temperature: openai.Opt(0.0), - } - - completion, err := openaiClient.Chat.Completions.New(testCtx, req) - if err != nil { - t.Fatalf("chat failed: %v", err) - } - - if len(completion.Choices) == 0 { - t.Fatalf("no choices in response") - } - - choice := completion.Choices[0] - message := choice.Message - - if len(message.ToolCalls) == 0 { - finishReason := choice.FinishReason - if finishReason == "" { - finishReason = "unknown" - } - content := message.Content - if content == "" { - content = "(empty)" - } - t.Logf("User prompt: %q", userContent) - t.Logf("Finish reason: %s", finishReason) - t.Logf("Message content: %q", content) - t.Logf("Tool calls count: %d", len(message.ToolCalls)) - if messageJSON, err := json.MarshalIndent(message, "", " "); err == nil { - t.Logf("Full message: %s", string(messageJSON)) - } - t.Fatalf("expected at least one tool call, got none. Finish reason: %s, Content: %q", finishReason, content) - } - - firstToolCall := message.ToolCalls[0] - if firstToolCall.Function.Name != "execute_bash" { - t.Fatalf("unexpected tool called: got %q want %q", firstToolCall.Function.Name, "execute_bash") - } - - var args map[string]any - if firstToolCall.Function.Arguments != "" { - if err := json.Unmarshal([]byte(firstToolCall.Function.Arguments), &args); err != nil { - t.Fatalf("failed to parse tool call arguments: %v", err) - } - } - - command, ok := args["command"] - if !ok { - t.Fatalf("expected tool arguments to include 'command', got: %v", args) - } - - cmdStr, ok := command.(string) - if !ok { - t.Fatalf("expected command to be string, got %T", command) - } - - if !strings.Contains(cmdStr, "&") { - t.Errorf("expected command to contain '&' character for parsing test, got: %q", cmdStr) - } - - if !strings.Contains(cmdStr, "echo") && !strings.Contains(cmdStr, "printf") { - t.Errorf("expected command to use echo or printf, got: %q", cmdStr) - } - }) - } -}