i forgot what i did here

reverse changes
embeddings: removed redundant TestAPIEmbeddings test (#12863 )
2025-11-04 00:22:02 -08:00 · 2025-11-03 15:51:48 -08:00 · 2025-10-30 17:14:01 -07:00 · 2025-10-30 15:55:42 -07:00 · 2025-10-30 15:55:42 -07:00 · 2025-10-30 15:55:42 -07:00
12 changed files with 302 additions and 37 deletions
--- a/discover/path.go
+++ b/discover/path.go
@@ -1,4 +1,4 @@
-package ml
+package discover

 import (
 	"os"
--- a/discover/runner.go
+++ b/discover/runner.go
@@ -53,7 +53,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 		if eval, err := filepath.EvalSymlinks(exe); err == nil {
 			exe = eval
 		}
-		files, err := filepath.Glob(filepath.Join(ml.LibOllamaPath, "*", "*ggml-*"))
+		files, err := filepath.Glob(filepath.Join(LibOllamaPath, "*", "*ggml-*"))
 		if err != nil {
 			slog.Debug("unable to lookup runner library directories", "error", err)
 		}
@@ -64,7 +64,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 		// Our current packaging model places ggml-hip in the main directory
 		// but keeps rocm in an isolated directory.  We have to add it to
 		// the [LD_LIBRARY_]PATH so ggml-hip will load properly
-		rocmDir = filepath.Join(ml.LibOllamaPath, "rocm")
+		rocmDir = filepath.Join(LibOllamaPath, "rocm")
 		if _, err := os.Stat(rocmDir); err != nil {
 			rocmDir = ""
 		}
@@ -95,9 +95,9 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 				}
 			}
 			if dir == "" {
-				dirs = []string{ml.LibOllamaPath}
+				dirs = []string{LibOllamaPath}
 			} else {
-				dirs = []string{ml.LibOllamaPath, dir}
+				dirs = []string{LibOllamaPath, dir}
 			}

 			// ROCm can take a long time on some systems, so give it more time before giving up
@@ -249,7 +249,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 		libDirs = make(map[string]struct{})
 		for _, dev := range devices {
 			dir := dev.LibraryPath[len(dev.LibraryPath)-1]
-			if dir != ml.LibOllamaPath {
+			if dir != LibOllamaPath {
 				libDirs[dir] = struct{}{}
 			}
 		}
@@ -339,7 +339,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 			devFilter := ml.GetVisibleDevicesEnv(devices)

 			for dir := range libDirs {
-				updatedDevices := bootstrapDevices(ctx, []string{ml.LibOllamaPath, dir}, devFilter)
+				updatedDevices := bootstrapDevices(ctx, []string{LibOllamaPath, dir}, devFilter)
 				for _, u := range updatedDevices {
 					for i := range devices {
 						if u.DeviceID == devices[i].DeviceID && u.PCIID == devices[i].PCIID {
--- a/integration/api_test.go
+++ b/integration/api_test.go
@@ -381,3 +381,30 @@ func TestAPIShowModel(t *testing.T) {
 		t.Errorf("%s missing modified_at: %#v", modelName, resp)
 	}
 }
+
+func TestAPIEmbeddings(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
+	defer cancel()
+	client, _, cleanup := InitServerConnection(ctx, t)
+	defer cleanup()
+	req := api.EmbeddingRequest{
+		Model:  libraryEmbedModels[0],
+		Prompt: "why is the sky blue?",
+		Options: map[string]interface{}{
+			"temperature": 0,
+			"seed":        123,
+		},
+	}
+
+	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+		t.Fatalf("pull failed %s", err)
+	}
+
+	resp, err := client.Embeddings(ctx, &req)
+	if err != nil {
+		t.Fatalf("embeddings call failed %s", err)
+	}
+	if len(resp.Embedding) == 0 {
+		t.Errorf("zero length embedding response")
+	}
+}
--- a/integration/qwen3vl_test.go
+++ b/integration/qwen3vl_test.go
@@ -0,0 +1,259 @@
+//go:build integration
+
+package integration
+
+import (
+	"context"
+	"os"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/ollama/ollama/api"
+)
+
+// getTestConfig returns model and streaming mode based on environment variables or defaults
+func getTestConfig() (model string, stream bool) {
+	model = os.Getenv("QWEN3VL_MODEL")
+	if model == "" {
+		// model = "qwen3-vl:235b-cloud" // default
+		model = "qwen3vl-thinking-odc-dev"
+	}
+
+	streamStr := os.Getenv("QWEN3VL_STREAM")
+	stream = streamStr != "false" // default to true
+
+	return model, stream
+}
+
+func TestQwen3VL(t *testing.T) {
+	model, stream := getTestConfig()
+
+	tests := []struct {
+		name     string
+		messages []api.Message
+		tools    []api.Tool
+		images   []string
+	}{
+		{
+			name: "Text-Only Scenario",
+			messages: []api.Message{
+				{Role: "system", Content: "You are a helpful assistant."},
+				{Role: "user", Content: "Write a short haiku about autumn."},
+			},
+		},
+		{
+			name: "Single Image Scenario",
+			messages: []api.Message{
+				{
+					Role:    "system",
+					Content: "You are a helpful assistant that can see images.",
+				},
+				{
+					Role:    "user",
+					Content: "What is in this image?",
+				},
+			},
+			images: []string{"testdata/menu.png"},
+		},
+		{
+			name: "Multiple Images Scenario",
+			messages: []api.Message{
+				{
+					Role:    "system",
+					Content: "You are a helpful assistant that can see images.",
+				},
+				{
+					Role:    "user",
+					Content: "Use both images to answer the question.",
+				},
+			},
+			images: []string{"testdata/satmath1.png", "testdata/satmath2.png"},
+		},
+		{
+			name: "Tools Scenario",
+			messages: []api.Message{
+				{
+					Role:    "system",
+					Content: "You can call tools when needed. Return tool calls when actions are needed.",
+				},
+				{Role: "user", Content: "What's the weather in San Francisco now?"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get current weather for a city.",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"city": {
+									Type:        api.PropertyType{"string"},
+									Description: "The city to get the weather for",
+								},
+							},
+							Required: []string{"city"},
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "Multi-Turn Tools With Image",
+			messages: []api.Message{
+				{Role: "system", Content: "Use tools when actions are required."},
+				{Role: "user", Content: "What's the current temperature in San Francisco?"},
+				{Role: "assistant", Content: "", ToolCalls: []api.ToolCall{
+					{Function: api.ToolCallFunction{
+						Name: "get_weather",
+						Arguments: api.ToolCallFunctionArguments{
+							"city": "San Francisco",
+						},
+					}},
+				}},
+				{Role: "tool", ToolName: "get_weather", Content: "Sunny"},
+				{Role: "user", Content: "Given that weather, what are the top 10 activities to do in San Francisco? Consider this photo as context."},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get current weather for a city.",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"city": {
+									Type:        api.PropertyType{"string"},
+									Description: "The city to get the weather for",
+								},
+							},
+							Required: []string{"city"},
+						},
+					},
+				},
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_top_10_activities",
+						Description: "Get the top 10 activities for a city given the weather.",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"weather": {
+									Type:        api.PropertyType{"string"},
+									Description: "The weather in the city",
+								},
+								"city": {
+									Type:        api.PropertyType{"string"},
+									Description: "The city to get the activities for",
+								},
+								"image": {
+									Type:        api.PropertyType{"base64"},
+									Description: "The image of the city",
+								},
+							},
+							Required: []string{"weather", "city", "image"},
+						},
+					},
+				},
+			},
+			images: []string{"testdata/sf-city.jpeg"},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Load and attach images to last user message
+			messages := tt.messages
+			if len(tt.images) > 0 {
+				var imgs []api.ImageData
+				for _, path := range tt.images {
+					imgs = append(imgs, loadImageData(t, path))
+				}
+				// Find last user message and attach images
+				for i := len(messages) - 1; i >= 0; i-- {
+					if messages[i].Role == "user" {
+						messages[i].Images = imgs
+						break
+					}
+				}
+			}
+
+			ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
+			defer cancel()
+			client, _, cleanup := InitServerConnection(ctx, t)
+			defer cleanup()
+
+			// Pull/preload model if not using remote server
+			if os.Getenv("OLLAMA_TEST_EXISTING") == "" {
+				if err := PullIfMissing(ctx, client, model); err != nil {
+					t.Fatal(err)
+				}
+				// Preload to reduce startup latency
+				_ = client.Generate(ctx, &api.GenerateRequest{Model: model}, func(api.GenerateResponse) error { return nil })
+			}
+
+			// Build and execute chat request
+			req := &api.ChatRequest{
+				Model:    model,
+				Messages: messages,
+				Tools:    tt.tools,
+				Stream:   &stream,
+				Options:  map[string]any{"seed": 42, "temperature": 0.0},
+			}
+
+			var contentBuf, thinkingBuf strings.Builder
+			var toolCalls []api.ToolCall
+
+			err := client.Chat(ctx, req, func(r api.ChatResponse) error {
+				contentBuf.WriteString(r.Message.Content)
+				thinkingBuf.WriteString(r.Message.Thinking)
+				toolCalls = append(toolCalls, r.Message.ToolCalls...)
+				return nil
+			})
+			if err != nil {
+				t.Fatalf("chat error: %v", err)
+			}
+
+			// Log truncated responses
+			logTruncated := func(label, text string) {
+				if text != "" {
+					if len(text) > 800 {
+						text = text[:800] + "... [truncated]"
+					}
+					t.Logf("%s: %s", label, text)
+				}
+			}
+			logTruncated("Thinking", thinkingBuf.String())
+			logTruncated("Content", contentBuf.String())
+
+			if len(toolCalls) > 0 {
+				t.Logf("Tool calls: %d", len(toolCalls))
+				for i, call := range toolCalls {
+					t.Logf("  [%d] %s(%+v)", i, call.Function.Name, call.Function.Arguments)
+				}
+			}
+
+			// Validate tool calls if tools were provided
+			if len(tt.tools) > 0 {
+				if len(toolCalls) == 0 {
+					t.Fatal("expected at least one tool call, got none")
+				}
+				if toolCalls[0].Function.Name == "" {
+					t.Fatalf("tool call missing function name: %#v", toolCalls[0])
+				}
+			}
+		})
+	}
+}
+
+// loadImageData loads image data from a file path
+func loadImageData(t *testing.T, imagePath string) []byte {
+	data, err := os.ReadFile(imagePath)
+	if err != nil {
+		t.Fatalf("Failed to load image %s: %v", imagePath, err)
+	}
+	return data
+}
--- a/integration/testdata/menu.png
+++ b/integration/testdata/menu.png
--- a/integration/testdata/satmath1.png
+++ b/integration/testdata/satmath1.png
--- a/integration/testdata/satmath2.png
+++ b/integration/testdata/satmath2.png
--- a/integration/testdata/sf-city.jpeg
+++ b/integration/testdata/sf-city.jpeg
--- a/llama/patches/0022-ggml-No-alloc-mode.patch
+++ b/llama/patches/0022-ggml-No-alloc-mode.patch
@@ -11,9 +11,9 @@ must be recreated with no-alloc set to false before loading data.
 ggml/include/ggml-backend.h     |   1 +
 ggml/src/ggml-backend-impl.h    |  16 +++
 ggml/src/ggml-backend.cpp       |  72 ++++++++++-
- ggml/src/ggml-cuda/common.cuh   |  58 ++++++++-
+ ggml/src/ggml-cuda/common.cuh   |  48 ++++++-
 ggml/src/ggml-cuda/ggml-cuda.cu | 217 ++++++++++++++++++++++++++------
- 5 files changed, 320 insertions(+), 44 deletions(-)
+ 5 files changed, 310 insertions(+), 44 deletions(-)

 diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
 index 2763f2bd6..b3b5b356a 100644
@@ -219,10 +219,10 @@ index 41eef3b5f..c81a2e48a 100644
 
 void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) {
 diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
-index e0abde542..e98044bd8 100644
+index e0abde542..28d6bcd71 100644
 --- a/ggml/src/ggml-cuda/common.cuh
 +++ b/ggml/src/ggml-cuda/common.cuh
-@@ -35,6 +35,41 @@
+@@ -35,6 +35,31 @@
 #include "vendors/cuda.h"
 #endif // defined(GGML_USE_HIP)
 
@@ -246,25 +246,15 @@ index e0abde542..e98044bd8 100644
 +    }
 +}
 +
-+static cudaError_t cudaMemsetAsyncReserve ( void* devPtr, int value, size_t count, cudaStream_t stream = 0 ) {
-+    if (!reserving_graph) {
-+        return cudaMemsetAsync(devPtr, value, count, stream);
-+    } else {
-+        return cudaSuccess;
-+    }
-+}
-+
 +#undef cudaMemcpyAsync
 +#define cudaMemcpyAsync cudaMemcpyAsyncReserve
 +#undef cudaMemcpy2DAsync
 +#define cudaMemcpy2DAsync cudaMemcpy2DAsyncReserve
-+#undef cudaMemsetAsync
-+#define cudaMemsetAsync cudaMemsetAsyncReserve
 +
 #define STRINGIZE_IMPL(...) #__VA_ARGS__
 #define STRINGIZE(...) STRINGIZE_IMPL(__VA_ARGS__)
 
-@@ -856,6 +891,9 @@ struct ggml_cuda_pool {
+@@ -856,6 +881,9 @@ struct ggml_cuda_pool {
 
     virtual void * alloc(size_t size, size_t * actual_size) = 0;
     virtual void free(void * ptr, size_t size) = 0;
@@ -274,7 +264,7 @@ index e0abde542..e98044bd8 100644
 };
 
 template<typename T>
-@@ -999,11 +1037,11 @@ struct ggml_backend_cuda_context {
+@@ -999,11 +1027,11 @@ struct ggml_backend_cuda_context {
     // pool
     std::unique_ptr<ggml_cuda_pool> pools[GGML_CUDA_MAX_DEVICES];
 
@@ -288,7 +278,7 @@ index e0abde542..e98044bd8 100644
         }
         return *pools[device];
     }
-@@ -1011,4 +1049,20 @@ struct ggml_backend_cuda_context {
+@@ -1011,4 +1039,20 @@ struct ggml_backend_cuda_context {
     ggml_cuda_pool & pool() {
         return pool(device);
     }
@@ -310,7 +300,7 @@ index e0abde542..e98044bd8 100644
 +    }
 };
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index c555cd30f..eb3db0f19 100644
+index f4d4a4267..ac70dcac8 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -350,6 +350,8 @@ const ggml_cuda_device_info & ggml_cuda_info() {
--- a/llm/status.go
+++ b/llm/status.go
@@ -23,7 +23,6 @@ func NewStatusWriter(out *os.File) *StatusWriter {
 var errorPrefixes = []string{
 	"error:",
 	"CUDA error",
-	"ROCm error",
 	"cudaMalloc failed",
 	"\"ERR\"",
 	"error loading model",
--- a/ml/backend/ggml/ggml/src/ggml-cuda/common.cuh
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/common.cuh
@@ -55,20 +55,10 @@ static cudaError_t cudaMemcpy2DAsyncReserve ( void* dst, size_t dpitch, const vo
    }
 }

-static cudaError_t cudaMemsetAsyncReserve ( void* devPtr, int value, size_t count, cudaStream_t stream = 0 ) {
-    if (!reserving_graph) {
-        return cudaMemsetAsync(devPtr, value, count, stream);
-    } else {
-        return cudaSuccess;
-    }
-}
-
 #undef cudaMemcpyAsync
 #define cudaMemcpyAsync cudaMemcpyAsyncReserve
 #undef cudaMemcpy2DAsync
 #define cudaMemcpy2DAsync cudaMemcpy2DAsyncReserve
-#undef cudaMemsetAsync
-#define cudaMemsetAsync cudaMemsetAsyncReserve

 #define STRINGIZE_IMPL(...) #__VA_ARGS__
 #define STRINGIZE(...) STRINGIZE_IMPL(__VA_ARGS__)
--- a/ml/device.go
+++ b/ml/device.go
@@ -361,7 +361,7 @@ func ByLibrary(l []DeviceInfo) [][]DeviceInfo {
 }

 func LibraryPaths(l []DeviceInfo) []string {
-	gpuLibs := []string{LibOllamaPath}
+	var gpuLibs []string
 	for _, gpu := range l {
 		for _, dir := range gpu.LibraryPath {
 			needed := true
Author	SHA1	Message	Date
Grace Guo	fc0cf9a141	i forgot what i did here	2025-11-04 00:22:02 -08:00
Grace Guo	c51ab4aa80	reverse changes	2025-11-03 15:51:48 -08:00
nicole pardal	2a5214930e	embeddings: removed redundant TestAPIEmbeddings test (#12863 ) This PR removes a redundant test from TestAPIEmbeddings Contents of this test already exists in embed_test.go and model_arch_test.go	2025-10-30 17:14:01 -07:00
Grace Guo	ab92a8ce43	for instruct and thinking cloud versions	2025-10-30 15:55:42 -07:00
Grace Guo	509d7ff114	streaming and non-streaming	2025-10-30 15:55:42 -07:00
Grace Guo	30357816f9	sat math images	2025-10-30 15:55:42 -07:00
Grace Guo	b123efc8bf	added some more tests for qwen3vl	2025-10-30 15:55:42 -07:00
Grace Guo	b0cd31c9c1	some of the tests starting to work	2025-10-30 15:55:42 -07:00