835 lines
25 KiB
Go
835 lines
25 KiB
Go
//go:build integration
|
|
|
|
package integration
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"slices"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/google/go-cmp/cmp"
|
|
"github.com/ollama/ollama/api"
|
|
"github.com/openai/openai-go/v3"
|
|
"github.com/openai/openai-go/v3/option"
|
|
"github.com/openai/openai-go/v3/shared"
|
|
)
|
|
|
|
var agenticModels = []string{
|
|
"gpt-oss:20b",
|
|
"gpt-oss:120b",
|
|
"qwen3-coder:30b",
|
|
"qwen3:4b",
|
|
"qwen3:8b",
|
|
}
|
|
|
|
var cloudModels = []string{
|
|
"gpt-oss:120b-cloud",
|
|
"gpt-oss:20b-cloud",
|
|
"qwen3-vl:235b-cloud",
|
|
"qwen3-coder:480b-cloud",
|
|
"kimi-k2-thinking:cloud",
|
|
"kimi-k2:1t-cloud",
|
|
}
|
|
|
|
// validateBashCommand validates a bash command with flexible matching
|
|
// It checks that the core command matches and required arguments are present
|
|
func validateBashCommand(cmd string, expectedCmd string, requiredArgs []string) error {
|
|
parts := strings.Fields(cmd)
|
|
if len(parts) == 0 {
|
|
return fmt.Errorf("empty command")
|
|
}
|
|
|
|
actualCmd := parts[0]
|
|
if actualCmd != expectedCmd {
|
|
return fmt.Errorf("expected command '%s', got '%s'", expectedCmd, actualCmd)
|
|
}
|
|
|
|
cmdStr := strings.Join(parts[1:], " ")
|
|
for _, arg := range requiredArgs {
|
|
if !strings.Contains(cmdStr, arg) {
|
|
return fmt.Errorf("missing required argument: %s", arg)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// validateBashCommandFlexible validates a bash command with flexible matching
|
|
// It accepts alternative command forms (e.g., find vs ls) and checks required patterns
|
|
func validateBashCommandFlexible(cmd string, allowedCommands []string, requiredPatterns []string) error {
|
|
parts := strings.Fields(cmd)
|
|
if len(parts) == 0 {
|
|
return fmt.Errorf("empty command")
|
|
}
|
|
|
|
actualCmd := parts[0]
|
|
commandMatched := false
|
|
for _, allowedCmd := range allowedCommands {
|
|
if actualCmd == allowedCmd {
|
|
commandMatched = true
|
|
break
|
|
}
|
|
}
|
|
if !commandMatched {
|
|
return fmt.Errorf("expected one of commands %v, got '%s'", allowedCommands, actualCmd)
|
|
}
|
|
|
|
cmdStr := strings.ToLower(strings.Join(parts[1:], " "))
|
|
for _, pattern := range requiredPatterns {
|
|
if !strings.Contains(cmdStr, strings.ToLower(pattern)) {
|
|
return fmt.Errorf("missing required pattern: %s", pattern)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func TestOpenAIToolCallingMultiStep(t *testing.T) {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
|
defer cancel()
|
|
|
|
var baseURL string
|
|
var apiKey string
|
|
var modelsToTest []string
|
|
var cleanup func()
|
|
|
|
if openaiBaseURL := os.Getenv("OPENAI_BASE_URL"); openaiBaseURL != "" {
|
|
baseURL = openaiBaseURL
|
|
apiKey = os.Getenv("OLLAMA_API_KEY")
|
|
if apiKey == "" {
|
|
t.Fatal("OPENAI_API_KEY must be set when using OPENAI_BASE_URL")
|
|
}
|
|
|
|
// only test cloud models unless OPENAI_TEST_MODELS is set
|
|
modelsToTest = cloudModels
|
|
if modelsEnv := os.Getenv("OPENAI_TEST_MODELS"); modelsEnv != "" {
|
|
modelsToTest = []string{modelsEnv}
|
|
}
|
|
cleanup = func() {}
|
|
} else {
|
|
_, testEndpoint, cleanupFn := InitServerConnection(ctx, t)
|
|
cleanup = cleanupFn
|
|
baseURL = fmt.Sprintf("http://%s/v1", testEndpoint)
|
|
apiKey = "ollama"
|
|
modelsToTest = append(agenticModels, cloudModels...)
|
|
}
|
|
t.Cleanup(cleanup)
|
|
|
|
opts := []option.RequestOption{
|
|
option.WithBaseURL(baseURL),
|
|
option.WithAPIKey(apiKey),
|
|
}
|
|
openaiClient := openai.NewClient(opts...)
|
|
|
|
var ollamaClient *api.Client
|
|
if baseURL == "" {
|
|
ollamaClient, _, _ = InitServerConnection(ctx, t)
|
|
}
|
|
|
|
for _, model := range modelsToTest {
|
|
t.Run(model, func(t *testing.T) {
|
|
testCtx := ctx
|
|
if slices.Contains(cloudModels, model) {
|
|
t.Parallel()
|
|
// Create a new context for parallel tests to avoid cancellation
|
|
var cancel context.CancelFunc
|
|
testCtx, cancel = context.WithTimeout(context.Background(), 10*time.Minute)
|
|
defer cancel()
|
|
}
|
|
if v, ok := minVRAM[model]; ok {
|
|
skipUnderMinVRAM(t, v)
|
|
}
|
|
|
|
if ollamaClient != nil {
|
|
if err := PullIfMissing(testCtx, ollamaClient, model); err != nil {
|
|
t.Fatalf("pull failed %s", err)
|
|
}
|
|
}
|
|
|
|
tools := []openai.ChatCompletionToolUnionParam{
|
|
openai.ChatCompletionFunctionTool(shared.FunctionDefinitionParam{
|
|
Name: "list_files",
|
|
Description: openai.Opt("List all files in a directory"),
|
|
Parameters: shared.FunctionParameters{
|
|
"type": "object",
|
|
"properties": map[string]any{
|
|
"path": map[string]any{
|
|
"type": "string",
|
|
"description": "The directory path to list files from",
|
|
},
|
|
},
|
|
"required": []string{"path"},
|
|
},
|
|
}),
|
|
openai.ChatCompletionFunctionTool(shared.FunctionDefinitionParam{
|
|
Name: "read_file",
|
|
Description: openai.Opt("Read the contents of a file"),
|
|
Parameters: shared.FunctionParameters{
|
|
"type": "object",
|
|
"properties": map[string]any{
|
|
"path": map[string]any{
|
|
"type": "string",
|
|
"description": "The file path to read",
|
|
},
|
|
},
|
|
"required": []string{"path"},
|
|
},
|
|
}),
|
|
}
|
|
|
|
mockFileContents := "line 1\nline 2\nline 3\nline 4\nline 5"
|
|
userContent := "Find the file named 'config.json' in /tmp and read its contents"
|
|
userMessage := openai.UserMessage(userContent)
|
|
|
|
messages := []openai.ChatCompletionMessageParamUnion{
|
|
userMessage,
|
|
}
|
|
stepCount := 0
|
|
maxSteps := 10
|
|
|
|
normalizePath := func(path string) string {
|
|
if path != "" && path[0] != '/' {
|
|
return "/" + path
|
|
}
|
|
return path
|
|
}
|
|
|
|
expectedSteps := []struct {
|
|
functionName string
|
|
validateArgs func(map[string]any) error
|
|
result string
|
|
}{
|
|
{
|
|
functionName: "list_files",
|
|
validateArgs: func(args map[string]any) error {
|
|
path, ok := args["path"]
|
|
if !ok {
|
|
return fmt.Errorf("missing required argument 'path'")
|
|
}
|
|
pathStr, ok := path.(string)
|
|
if !ok {
|
|
return fmt.Errorf("expected 'path' to be string, got %T", path)
|
|
}
|
|
normalizedPath := normalizePath(pathStr)
|
|
if normalizedPath != "/tmp" {
|
|
return fmt.Errorf("expected list_files(\"/tmp\"), got list_files(%q)", pathStr)
|
|
}
|
|
return nil
|
|
},
|
|
result: `["config.json", "other.txt", "data.log"]`,
|
|
},
|
|
{
|
|
functionName: "read_file",
|
|
validateArgs: func(args map[string]any) error {
|
|
path, ok := args["path"]
|
|
if !ok {
|
|
return fmt.Errorf("missing required argument 'path'")
|
|
}
|
|
pathStr, ok := path.(string)
|
|
if !ok {
|
|
return fmt.Errorf("expected 'path' to be string, got %T", path)
|
|
}
|
|
normalizedPath := normalizePath(pathStr)
|
|
if normalizedPath != "/tmp/config.json" {
|
|
return fmt.Errorf("expected read_file(\"/tmp/config.json\"), got read_file(%q)", pathStr)
|
|
}
|
|
return nil
|
|
},
|
|
result: mockFileContents,
|
|
},
|
|
}
|
|
|
|
for stepCount < maxSteps {
|
|
req := openai.ChatCompletionNewParams{
|
|
Model: shared.ChatModel(model),
|
|
Messages: messages,
|
|
Tools: tools,
|
|
Temperature: openai.Opt(0.0),
|
|
}
|
|
|
|
completion, err := openaiClient.Chat.Completions.New(testCtx, req)
|
|
if err != nil {
|
|
t.Fatalf("step %d chat failed: %v", stepCount+1, err)
|
|
}
|
|
|
|
if len(completion.Choices) == 0 {
|
|
t.Fatalf("step %d: no choices in response", stepCount+1)
|
|
}
|
|
|
|
choice := completion.Choices[0]
|
|
message := choice.Message
|
|
|
|
toolCalls := message.ToolCalls
|
|
content := message.Content
|
|
gotToolCall := len(toolCalls) > 0
|
|
var toolCallID string
|
|
if gotToolCall && toolCalls[0].ID != "" {
|
|
toolCallID = toolCalls[0].ID
|
|
}
|
|
|
|
var assistantMessage openai.ChatCompletionMessageParamUnion
|
|
if gotToolCall {
|
|
toolCallsJSON, err := json.Marshal(toolCalls)
|
|
if err != nil {
|
|
t.Fatalf("step %d: failed to marshal tool calls: %v", stepCount+1, err)
|
|
}
|
|
var toolCallParams []openai.ChatCompletionMessageToolCallUnionParam
|
|
if err := json.Unmarshal(toolCallsJSON, &toolCallParams); err != nil {
|
|
t.Fatalf("step %d: failed to unmarshal tool calls: %v", stepCount+1, err)
|
|
}
|
|
contentUnion := openai.ChatCompletionAssistantMessageParamContentUnion{
|
|
OfString: openai.Opt(content),
|
|
}
|
|
assistantMsg := openai.ChatCompletionAssistantMessageParam{
|
|
Content: contentUnion,
|
|
ToolCalls: toolCallParams,
|
|
}
|
|
assistantMessage = openai.ChatCompletionMessageParamUnion{
|
|
OfAssistant: &assistantMsg,
|
|
}
|
|
} else {
|
|
assistantMessage = openai.AssistantMessage(content)
|
|
}
|
|
|
|
if !gotToolCall && content != "" {
|
|
if stepCount < len(expectedSteps) {
|
|
t.Logf("EXPECTED: Step %d should call '%s'", stepCount+1, expectedSteps[stepCount].functionName)
|
|
t.Logf("ACTUAL: Model stopped with content: %s", content)
|
|
t.Fatalf("model stopped making tool calls after %d steps, expected %d steps. Final response: %s", stepCount, len(expectedSteps), content)
|
|
}
|
|
return
|
|
}
|
|
|
|
if !gotToolCall || len(toolCalls) == 0 {
|
|
if stepCount < len(expectedSteps) {
|
|
expectedStep := expectedSteps[stepCount]
|
|
t.Logf("EXPECTED: Step %d should call '%s'", stepCount+1, expectedStep.functionName)
|
|
t.Logf("ACTUAL: No tool call, got content: %s", content)
|
|
t.Fatalf("step %d: expected tool call but got none. Response: %s", stepCount+1, content)
|
|
}
|
|
return
|
|
}
|
|
|
|
if stepCount >= len(expectedSteps) {
|
|
actualCallJSON, _ := json.MarshalIndent(toolCalls[0], "", " ")
|
|
t.Logf("EXPECTED: All %d steps completed", len(expectedSteps))
|
|
t.Logf("ACTUAL: Extra step %d with tool call:\n%s", stepCount+1, string(actualCallJSON))
|
|
funcName := "unknown"
|
|
if toolCalls[0].Function.Name != "" {
|
|
funcName = toolCalls[0].Function.Name
|
|
}
|
|
t.Fatalf("model made more tool calls than expected. Expected %d steps, got step %d with tool call: %s", len(expectedSteps), stepCount+1, funcName)
|
|
}
|
|
|
|
expectedStep := expectedSteps[stepCount]
|
|
firstToolCall := toolCalls[0]
|
|
funcCall := firstToolCall.Function
|
|
if funcCall.Name == "" {
|
|
t.Fatalf("step %d: tool call missing function name", stepCount+1)
|
|
}
|
|
|
|
funcName := funcCall.Name
|
|
|
|
var args map[string]any
|
|
if funcCall.Arguments != "" {
|
|
if err := json.Unmarshal([]byte(funcCall.Arguments), &args); err != nil {
|
|
t.Fatalf("step %d: failed to parse tool call arguments: %v", stepCount+1, err)
|
|
}
|
|
}
|
|
|
|
if funcName != expectedStep.functionName {
|
|
t.Logf("DIFF: Function name mismatch")
|
|
t.Logf(" Expected: %s", expectedStep.functionName)
|
|
t.Logf(" Got: %s", funcName)
|
|
t.Logf(" Arguments: %v", args)
|
|
t.Fatalf("step %d: expected tool call '%s', got '%s'. Arguments: %v", stepCount+1, expectedStep.functionName, funcName, args)
|
|
}
|
|
|
|
if err := expectedStep.validateArgs(args); err != nil {
|
|
expectedArgsForDisplay := map[string]any{}
|
|
if expectedStep.functionName == "list_files" {
|
|
expectedArgsForDisplay = map[string]any{"path": "/tmp"}
|
|
} else if expectedStep.functionName == "read_file" {
|
|
expectedArgsForDisplay = map[string]any{"path": "/tmp/config.json"}
|
|
}
|
|
if diff := cmp.Diff(expectedArgsForDisplay, args); diff != "" {
|
|
t.Logf("DIFF: Arguments mismatch for function '%s' (-want +got):\n%s", expectedStep.functionName, diff)
|
|
}
|
|
t.Logf("Error: %v", err)
|
|
t.Fatalf("step %d: tool call '%s' has invalid arguments: %v. Arguments: %v", stepCount+1, expectedStep.functionName, err, args)
|
|
}
|
|
|
|
toolMessage := openai.ToolMessage(expectedStep.result, toolCallID)
|
|
messages = append(messages, assistantMessage, toolMessage)
|
|
stepCount++
|
|
}
|
|
|
|
if stepCount < len(expectedSteps) {
|
|
t.Fatalf("test exceeded max steps (%d) before completing all expected steps (%d)", maxSteps, len(expectedSteps))
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestOpenAIToolCallingBash(t *testing.T) {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
|
defer cancel()
|
|
|
|
var baseURL string
|
|
var apiKey string
|
|
var modelsToTest []string
|
|
var cleanup func()
|
|
|
|
if openaiBaseURL := os.Getenv("OPENAI_BASE_URL"); openaiBaseURL != "" {
|
|
baseURL = openaiBaseURL
|
|
apiKey = os.Getenv("OLLAMA_API_KEY")
|
|
if apiKey == "" {
|
|
t.Fatal("OPENAI_API_KEY must be set when using OPENAI_BASE_URL")
|
|
}
|
|
modelsToTest = cloudModels
|
|
if modelsEnv := os.Getenv("OPENAI_TEST_MODELS"); modelsEnv != "" {
|
|
modelsToTest = []string{modelsEnv}
|
|
}
|
|
cleanup = func() {}
|
|
} else {
|
|
_, testEndpoint, cleanupFn := InitServerConnection(ctx, t)
|
|
cleanup = cleanupFn
|
|
baseURL = fmt.Sprintf("http://%s/v1", testEndpoint)
|
|
apiKey = "ollama"
|
|
modelsToTest = append(agenticModels, cloudModels...)
|
|
}
|
|
t.Cleanup(cleanup)
|
|
|
|
opts := []option.RequestOption{
|
|
option.WithBaseURL(baseURL),
|
|
option.WithAPIKey(apiKey),
|
|
}
|
|
openaiClient := openai.NewClient(opts...)
|
|
|
|
var ollamaClient *api.Client
|
|
if baseURL == "" {
|
|
ollamaClient, _, _ = InitServerConnection(ctx, t)
|
|
}
|
|
|
|
for _, model := range modelsToTest {
|
|
t.Run(model, func(t *testing.T) {
|
|
testCtx := ctx
|
|
if slices.Contains(cloudModels, model) {
|
|
t.Parallel()
|
|
// Create a new context for parallel tests to avoid cancellation
|
|
var cancel context.CancelFunc
|
|
testCtx, cancel = context.WithTimeout(context.Background(), 10*time.Minute)
|
|
defer cancel()
|
|
}
|
|
if v, ok := minVRAM[model]; ok {
|
|
skipUnderMinVRAM(t, v)
|
|
}
|
|
|
|
if ollamaClient != nil {
|
|
if err := PullIfMissing(testCtx, ollamaClient, model); err != nil {
|
|
t.Fatalf("pull failed %s", err)
|
|
}
|
|
}
|
|
|
|
tools := []openai.ChatCompletionToolUnionParam{
|
|
openai.ChatCompletionFunctionTool(shared.FunctionDefinitionParam{
|
|
Name: "execute_bash",
|
|
Description: openai.Opt("Execute a bash/shell command and return stdout, stderr, and exit code"),
|
|
Parameters: shared.FunctionParameters{
|
|
"type": "object",
|
|
"properties": map[string]any{
|
|
"command": map[string]any{
|
|
"type": "string",
|
|
"description": "The bash command to execute",
|
|
},
|
|
"working_directory": map[string]any{
|
|
"type": "string",
|
|
"description": "Optional working directory for command execution",
|
|
},
|
|
},
|
|
"required": []string{"command"},
|
|
},
|
|
}),
|
|
}
|
|
|
|
userContent := "List all files in /tmp directory"
|
|
userMessage := openai.UserMessage(userContent)
|
|
|
|
req := openai.ChatCompletionNewParams{
|
|
Model: shared.ChatModel(model),
|
|
Messages: []openai.ChatCompletionMessageParamUnion{userMessage},
|
|
Tools: tools,
|
|
Temperature: openai.Opt(0.0),
|
|
}
|
|
|
|
completion, err := openaiClient.Chat.Completions.New(testCtx, req)
|
|
if err != nil {
|
|
t.Fatalf("chat failed: %v", err)
|
|
}
|
|
|
|
if len(completion.Choices) == 0 {
|
|
t.Fatalf("no choices in response")
|
|
}
|
|
|
|
choice := completion.Choices[0]
|
|
message := choice.Message
|
|
|
|
if len(message.ToolCalls) == 0 {
|
|
finishReason := choice.FinishReason
|
|
if finishReason == "" {
|
|
finishReason = "unknown"
|
|
}
|
|
content := message.Content
|
|
if content == "" {
|
|
content = "(empty)"
|
|
}
|
|
t.Logf("User prompt: %q", userContent)
|
|
t.Logf("Finish reason: %s", finishReason)
|
|
t.Logf("Message content: %q", content)
|
|
t.Logf("Tool calls count: %d", len(message.ToolCalls))
|
|
if messageJSON, err := json.MarshalIndent(message, "", " "); err == nil {
|
|
t.Logf("Full message: %s", string(messageJSON))
|
|
}
|
|
t.Fatalf("expected at least one tool call, got none. Finish reason: %s, Content: %q", finishReason, content)
|
|
}
|
|
|
|
firstToolCall := message.ToolCalls[0]
|
|
if firstToolCall.Function.Name != "execute_bash" {
|
|
t.Fatalf("unexpected tool called: got %q want %q", firstToolCall.Function.Name, "execute_bash")
|
|
}
|
|
|
|
var args map[string]any
|
|
if firstToolCall.Function.Arguments != "" {
|
|
if err := json.Unmarshal([]byte(firstToolCall.Function.Arguments), &args); err != nil {
|
|
t.Fatalf("failed to parse tool call arguments: %v", err)
|
|
}
|
|
}
|
|
|
|
command, ok := args["command"]
|
|
if !ok {
|
|
t.Fatalf("expected tool arguments to include 'command', got: %v", args)
|
|
}
|
|
|
|
cmdStr, ok := command.(string)
|
|
if !ok {
|
|
t.Fatalf("expected command to be string, got %T", command)
|
|
}
|
|
|
|
if err := validateBashCommand(cmdStr, "ls", []string{"/tmp"}); err != nil {
|
|
t.Errorf("bash command validation failed: %v. Command: %q", err, cmdStr)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestOpenAIToolCallingBashMultiStep(t *testing.T) {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
|
defer cancel()
|
|
|
|
var baseURL string
|
|
var apiKey string
|
|
var modelsToTest []string
|
|
var cleanup func()
|
|
|
|
if openaiBaseURL := os.Getenv("OPENAI_BASE_URL"); openaiBaseURL != "" {
|
|
baseURL = openaiBaseURL
|
|
apiKey = os.Getenv("OLLAMA_API_KEY")
|
|
if apiKey == "" {
|
|
t.Fatal("OPENAI_API_KEY must be set when using OPENAI_BASE_URL")
|
|
}
|
|
modelsToTest = cloudModels
|
|
if modelsEnv := os.Getenv("OPENAI_TEST_MODELS"); modelsEnv != "" {
|
|
modelsToTest = []string{modelsEnv}
|
|
}
|
|
cleanup = func() {}
|
|
} else {
|
|
_, testEndpoint, cleanupFn := InitServerConnection(ctx, t)
|
|
cleanup = cleanupFn
|
|
baseURL = fmt.Sprintf("http://%s/v1", testEndpoint)
|
|
apiKey = "ollama"
|
|
modelsToTest = append(agenticModels, cloudModels...)
|
|
}
|
|
t.Cleanup(cleanup)
|
|
|
|
opts := []option.RequestOption{
|
|
option.WithBaseURL(baseURL),
|
|
option.WithAPIKey(apiKey),
|
|
}
|
|
openaiClient := openai.NewClient(opts...)
|
|
|
|
var ollamaClient *api.Client
|
|
if baseURL == "" {
|
|
ollamaClient, _, _ = InitServerConnection(ctx, t)
|
|
}
|
|
|
|
for _, model := range modelsToTest {
|
|
t.Run(model, func(t *testing.T) {
|
|
testCtx := ctx
|
|
if slices.Contains(cloudModels, model) {
|
|
t.Parallel()
|
|
// Create a new context for parallel tests to avoid cancellation
|
|
var cancel context.CancelFunc
|
|
testCtx, cancel = context.WithTimeout(context.Background(), 10*time.Minute)
|
|
defer cancel()
|
|
}
|
|
if v, ok := minVRAM[model]; ok {
|
|
skipUnderMinVRAM(t, v)
|
|
}
|
|
|
|
if ollamaClient != nil {
|
|
if err := PullIfMissing(testCtx, ollamaClient, model); err != nil {
|
|
t.Fatalf("pull failed %s", err)
|
|
}
|
|
}
|
|
|
|
tools := []openai.ChatCompletionToolUnionParam{
|
|
openai.ChatCompletionFunctionTool(shared.FunctionDefinitionParam{
|
|
Name: "execute_bash",
|
|
Description: openai.Opt("Execute a bash/shell command and return stdout, stderr, and exit code"),
|
|
Parameters: shared.FunctionParameters{
|
|
"type": "object",
|
|
"properties": map[string]any{
|
|
"command": map[string]any{
|
|
"type": "string",
|
|
"description": "The bash command to execute",
|
|
},
|
|
"working_directory": map[string]any{
|
|
"type": "string",
|
|
"description": "Optional working directory for command execution",
|
|
},
|
|
},
|
|
"required": []string{"command"},
|
|
},
|
|
}),
|
|
}
|
|
|
|
userContent := "Find all log files in /tmp. use the bash tool"
|
|
userMessage := openai.UserMessage(userContent)
|
|
|
|
req := openai.ChatCompletionNewParams{
|
|
Model: shared.ChatModel(model),
|
|
Messages: []openai.ChatCompletionMessageParamUnion{userMessage},
|
|
Tools: tools,
|
|
Temperature: openai.Opt(0.0),
|
|
}
|
|
|
|
completion, err := openaiClient.Chat.Completions.New(testCtx, req)
|
|
if err != nil {
|
|
t.Fatalf("chat failed: %v", err)
|
|
}
|
|
|
|
if len(completion.Choices) == 0 {
|
|
t.Fatalf("no choices in response")
|
|
}
|
|
|
|
choice := completion.Choices[0]
|
|
message := choice.Message
|
|
|
|
if len(message.ToolCalls) == 0 {
|
|
finishReason := choice.FinishReason
|
|
if finishReason == "" {
|
|
finishReason = "unknown"
|
|
}
|
|
content := message.Content
|
|
if content == "" {
|
|
content = "(empty)"
|
|
}
|
|
t.Logf("User prompt: %q", userContent)
|
|
t.Logf("Finish reason: %s", finishReason)
|
|
t.Logf("Message content: %q", content)
|
|
t.Logf("Tool calls count: %d", len(message.ToolCalls))
|
|
if messageJSON, err := json.MarshalIndent(message, "", " "); err == nil {
|
|
t.Logf("Full message: %s", string(messageJSON))
|
|
}
|
|
t.Fatalf("expected at least one tool call, got none. Finish reason: %s, Content: %q", finishReason, content)
|
|
}
|
|
|
|
firstToolCall := message.ToolCalls[0]
|
|
if firstToolCall.Function.Name != "execute_bash" {
|
|
t.Fatalf("unexpected tool called: got %q want %q", firstToolCall.Function.Name, "execute_bash")
|
|
}
|
|
|
|
var args map[string]any
|
|
if firstToolCall.Function.Arguments != "" {
|
|
if err := json.Unmarshal([]byte(firstToolCall.Function.Arguments), &args); err != nil {
|
|
t.Fatalf("failed to parse tool call arguments: %v", err)
|
|
}
|
|
}
|
|
|
|
command, ok := args["command"]
|
|
if !ok {
|
|
t.Fatalf("expected tool arguments to include 'command', got: %v", args)
|
|
}
|
|
|
|
cmdStr, ok := command.(string)
|
|
if !ok {
|
|
t.Fatalf("expected command to be string, got %T", command)
|
|
}
|
|
|
|
if err := validateBashCommandFlexible(cmdStr, []string{"find", "ls"}, []string{"/tmp"}); err != nil {
|
|
t.Errorf("bash command validation failed: %v. Command: %q", err, cmdStr)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestOpenAIToolCallingBashAmpersand(t *testing.T) {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
|
defer cancel()
|
|
|
|
var baseURL string
|
|
var apiKey string
|
|
var modelsToTest []string
|
|
var cleanup func()
|
|
|
|
if openaiBaseURL := os.Getenv("OPENAI_BASE_URL"); openaiBaseURL != "" {
|
|
baseURL = openaiBaseURL
|
|
apiKey = os.Getenv("OLLAMA_API_KEY")
|
|
if apiKey == "" {
|
|
t.Fatal("OPENAI_API_KEY must be set when using OPENAI_BASE_URL")
|
|
}
|
|
modelsToTest = cloudModels
|
|
if modelsEnv := os.Getenv("OPENAI_TEST_MODELS"); modelsEnv != "" {
|
|
modelsToTest = []string{modelsEnv}
|
|
}
|
|
cleanup = func() {}
|
|
} else {
|
|
_, testEndpoint, cleanupFn := InitServerConnection(ctx, t)
|
|
cleanup = cleanupFn
|
|
baseURL = fmt.Sprintf("http://%s/v1", testEndpoint)
|
|
apiKey = "ollama"
|
|
modelsToTest = append(agenticModels, cloudModels...)
|
|
}
|
|
t.Cleanup(cleanup)
|
|
|
|
opts := []option.RequestOption{
|
|
option.WithBaseURL(baseURL),
|
|
option.WithAPIKey(apiKey),
|
|
}
|
|
openaiClient := openai.NewClient(opts...)
|
|
|
|
var ollamaClient *api.Client
|
|
if baseURL == "" {
|
|
ollamaClient, _, _ = InitServerConnection(ctx, t)
|
|
}
|
|
|
|
for _, model := range modelsToTest {
|
|
t.Run(model, func(t *testing.T) {
|
|
testCtx := ctx
|
|
if slices.Contains(cloudModels, model) {
|
|
t.Parallel()
|
|
// Create a new context for parallel tests to avoid cancellation
|
|
var cancel context.CancelFunc
|
|
testCtx, cancel = context.WithTimeout(context.Background(), 10*time.Minute)
|
|
defer cancel()
|
|
}
|
|
if v, ok := minVRAM[model]; ok {
|
|
skipUnderMinVRAM(t, v)
|
|
}
|
|
|
|
if ollamaClient != nil {
|
|
if err := PullIfMissing(testCtx, ollamaClient, model); err != nil {
|
|
t.Fatalf("pull failed %s", err)
|
|
}
|
|
}
|
|
|
|
tools := []openai.ChatCompletionToolUnionParam{
|
|
openai.ChatCompletionFunctionTool(shared.FunctionDefinitionParam{
|
|
Name: "execute_bash",
|
|
Description: openai.Opt("Execute a bash/shell command and return stdout, stderr, and exit code"),
|
|
Parameters: shared.FunctionParameters{
|
|
"type": "object",
|
|
"properties": map[string]any{
|
|
"command": map[string]any{
|
|
"type": "string",
|
|
"description": "The bash command to execute",
|
|
},
|
|
"working_directory": map[string]any{
|
|
"type": "string",
|
|
"description": "Optional working directory for command execution",
|
|
},
|
|
},
|
|
"required": []string{"command"},
|
|
},
|
|
}),
|
|
}
|
|
|
|
userContent := "Echo the text 'A & B' using bash with the bash tool"
|
|
userMessage := openai.UserMessage(userContent)
|
|
|
|
req := openai.ChatCompletionNewParams{
|
|
Model: shared.ChatModel(model),
|
|
Messages: []openai.ChatCompletionMessageParamUnion{userMessage},
|
|
Tools: tools,
|
|
Temperature: openai.Opt(0.0),
|
|
}
|
|
|
|
completion, err := openaiClient.Chat.Completions.New(testCtx, req)
|
|
if err != nil {
|
|
t.Fatalf("chat failed: %v", err)
|
|
}
|
|
|
|
if len(completion.Choices) == 0 {
|
|
t.Fatalf("no choices in response")
|
|
}
|
|
|
|
choice := completion.Choices[0]
|
|
message := choice.Message
|
|
|
|
if len(message.ToolCalls) == 0 {
|
|
finishReason := choice.FinishReason
|
|
if finishReason == "" {
|
|
finishReason = "unknown"
|
|
}
|
|
content := message.Content
|
|
if content == "" {
|
|
content = "(empty)"
|
|
}
|
|
t.Logf("User prompt: %q", userContent)
|
|
t.Logf("Finish reason: %s", finishReason)
|
|
t.Logf("Message content: %q", content)
|
|
t.Logf("Tool calls count: %d", len(message.ToolCalls))
|
|
if messageJSON, err := json.MarshalIndent(message, "", " "); err == nil {
|
|
t.Logf("Full message: %s", string(messageJSON))
|
|
}
|
|
t.Fatalf("expected at least one tool call, got none. Finish reason: %s, Content: %q", finishReason, content)
|
|
}
|
|
|
|
firstToolCall := message.ToolCalls[0]
|
|
if firstToolCall.Function.Name != "execute_bash" {
|
|
t.Fatalf("unexpected tool called: got %q want %q", firstToolCall.Function.Name, "execute_bash")
|
|
}
|
|
|
|
var args map[string]any
|
|
if firstToolCall.Function.Arguments != "" {
|
|
if err := json.Unmarshal([]byte(firstToolCall.Function.Arguments), &args); err != nil {
|
|
t.Fatalf("failed to parse tool call arguments: %v", err)
|
|
}
|
|
}
|
|
|
|
command, ok := args["command"]
|
|
if !ok {
|
|
t.Fatalf("expected tool arguments to include 'command', got: %v", args)
|
|
}
|
|
|
|
cmdStr, ok := command.(string)
|
|
if !ok {
|
|
t.Fatalf("expected command to be string, got %T", command)
|
|
}
|
|
|
|
if !strings.Contains(cmdStr, "&") {
|
|
t.Errorf("expected command to contain '&' character for parsing test, got: %q", cmdStr)
|
|
}
|
|
|
|
if !strings.Contains(cmdStr, "echo") && !strings.Contains(cmdStr, "printf") {
|
|
t.Errorf("expected command to use echo or printf, got: %q", cmdStr)
|
|
}
|
|
})
|
|
}
|
|
}
|