package main import ( "context" "encoding/json" "flag" "fmt" "os" "strings" "time" "github.com/ollama/ollama/api" ) func main() { model := flag.String("model", "", "model to evaluate") suite := flag.String("suite", "", "comma-separated list of suites to run (empty runs all)") list := flag.Bool("list", false, "list available suites") verbose := flag.Bool("v", false, "verbose output") timeout := flag.Int("timeout", 60, "timeout per test in seconds") export := flag.String("export", "eval-results.json", "export results to file") flag.Parse() if *list { for _, s := range suites { fmt.Printf("%s (%d tests)\n", s.Name, len(s.Tests)) } return } if *model == "" { fmt.Fprintf(os.Stderr, "error: -model parameter is required\n") os.Exit(1) } client, err := api.ClientFromEnvironment() if err != nil { fmt.Fprintf(os.Stderr, "error: %v\n", err) os.Exit(1) } ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) if err := client.Heartbeat(ctx); err != nil { cancel() fmt.Fprintf(os.Stderr, "error: cannot connect to ollama\n") os.Exit(1) } cancel() selected := suites if *suite != "" { suiteNames := strings.Split(*suite, ",") selected = []Suite{} var notFound []string for _, name := range suiteNames { name = strings.TrimSpace(name) if name == "" { continue } found := false for _, s := range suites { if s.Name == name { selected = append(selected, s) found = true break } } if !found { notFound = append(notFound, name) } } if len(notFound) > 0 { fmt.Fprintf(os.Stderr, "error: suite(s) not found: %s\n", strings.Join(notFound, ", ")) os.Exit(1) } } var results []Result for _, s := range selected { if *verbose { fmt.Printf("\n%s (%d tests)\n", s.Name, len(s.Tests)) } for i, test := range s.Tests { if test.Options == nil { test.Options = map[string]any{"temperature": 0.1} } if test.Check == nil { test.Check = HasResponse() } if *verbose { fmt.Printf(" [%d/%d] %s... ", i+1, len(s.Tests), test.Name) } ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*timeout)*time.Second) result := Run(ctx, client, *model, test) cancel() results = append(results, result) if *verbose { if result.Error != nil { fmt.Printf("ERROR: %v\n", result.Error) } else if result.Passed { fmt.Printf("PASS (%.2fs)", result.Duration.Seconds()) if len(result.Tools) > 0 || result.Thinking { fmt.Printf(" [") if len(result.Tools) > 0 { fmt.Printf("tools: %s", strings.Join(result.Tools, ",")) } if result.Thinking { if len(result.Tools) > 0 { fmt.Printf(", ") } fmt.Printf("thinking") } fmt.Printf("]") } fmt.Println() // Print tool calls with details if len(result.ToolCalls) > 0 { fmt.Printf(" Tool Calls:\n") for _, tc := range result.ToolCalls { argsJSON, _ := json.Marshal(tc.Function.Arguments) fmt.Printf(" - %s: %s\n", tc.Function.Name, string(argsJSON)) } } // Print response if there is one if result.Response != "" { fmt.Printf(" Response: %s\n", result.Response) } } else { fmt.Printf("FAIL (%.2fs)\n", result.Duration.Seconds()) // Print tool calls with details even on failure if len(result.ToolCalls) > 0 { fmt.Printf(" Tool Calls:\n") for _, tc := range result.ToolCalls { argsJSON, _ := json.Marshal(tc.Function.Arguments) fmt.Printf(" - %s: %s\n", tc.Function.Name, string(argsJSON)) } } // Print response even on failure if result.Response != "" { fmt.Printf(" Response: %s\n", result.Response) } } } } } printSummary(results) if *export != "" { if err := writeJSON(*export, results); err != nil { fmt.Fprintf(os.Stderr, "warning: export failed: %v\n", err) } else if *verbose { fmt.Printf("\nResults: %s\n", *export) } } if anyFailed(results) { os.Exit(1) } } func printSummary(results []Result) { var passed, failed, errors int for _, r := range results { if r.Error != nil { errors++ } else if r.Passed { passed++ } else { failed++ } } total := len(results) rate := 0.0 if total > 0 { rate = float64(passed) / float64(total) * 100 } fmt.Printf("\n%d/%d passed (%.1f%%)", passed, total, rate) if errors > 0 { fmt.Printf(", %d errors", errors) } fmt.Println() } func anyFailed(results []Result) bool { for _, r := range results { if !r.Passed || r.Error != nil { return true } } return false } func writeJSON(path string, results []Result) error { f, err := os.Create(path) if err != nil { return err } defer f.Close() enc := json.NewEncoder(f) enc.SetIndent("", " ") return enc.Encode(results) }