Compare commits
4 Commits
grace/qwen
...
v0.12.9
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
392a270261 | ||
|
|
3bee3af6ed | ||
|
|
83537993d7 | ||
|
|
7dd4862a89 |
@@ -53,7 +53,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
if eval, err := filepath.EvalSymlinks(exe); err == nil {
|
||||
exe = eval
|
||||
}
|
||||
files, err := filepath.Glob(filepath.Join(LibOllamaPath, "*", "*ggml-*"))
|
||||
files, err := filepath.Glob(filepath.Join(ml.LibOllamaPath, "*", "*ggml-*"))
|
||||
if err != nil {
|
||||
slog.Debug("unable to lookup runner library directories", "error", err)
|
||||
}
|
||||
@@ -64,7 +64,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
// Our current packaging model places ggml-hip in the main directory
|
||||
// but keeps rocm in an isolated directory. We have to add it to
|
||||
// the [LD_LIBRARY_]PATH so ggml-hip will load properly
|
||||
rocmDir = filepath.Join(LibOllamaPath, "rocm")
|
||||
rocmDir = filepath.Join(ml.LibOllamaPath, "rocm")
|
||||
if _, err := os.Stat(rocmDir); err != nil {
|
||||
rocmDir = ""
|
||||
}
|
||||
@@ -95,9 +95,9 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
}
|
||||
}
|
||||
if dir == "" {
|
||||
dirs = []string{LibOllamaPath}
|
||||
dirs = []string{ml.LibOllamaPath}
|
||||
} else {
|
||||
dirs = []string{LibOllamaPath, dir}
|
||||
dirs = []string{ml.LibOllamaPath, dir}
|
||||
}
|
||||
|
||||
// ROCm can take a long time on some systems, so give it more time before giving up
|
||||
@@ -249,7 +249,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
libDirs = make(map[string]struct{})
|
||||
for _, dev := range devices {
|
||||
dir := dev.LibraryPath[len(dev.LibraryPath)-1]
|
||||
if dir != LibOllamaPath {
|
||||
if dir != ml.LibOllamaPath {
|
||||
libDirs[dir] = struct{}{}
|
||||
}
|
||||
}
|
||||
@@ -339,7 +339,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
devFilter := ml.GetVisibleDevicesEnv(devices)
|
||||
|
||||
for dir := range libDirs {
|
||||
updatedDevices := bootstrapDevices(ctx, []string{LibOllamaPath, dir}, devFilter)
|
||||
updatedDevices := bootstrapDevices(ctx, []string{ml.LibOllamaPath, dir}, devFilter)
|
||||
for _, u := range updatedDevices {
|
||||
for i := range devices {
|
||||
if u.DeviceID == devices[i].DeviceID && u.PCIID == devices[i].PCIID {
|
||||
|
||||
@@ -381,30 +381,3 @@ func TestAPIShowModel(t *testing.T) {
|
||||
t.Errorf("%s missing modified_at: %#v", modelName, resp)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAPIEmbeddings(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
req := api.EmbeddingRequest{
|
||||
Model: libraryEmbedModels[0],
|
||||
Prompt: "why is the sky blue?",
|
||||
Options: map[string]interface{}{
|
||||
"temperature": 0,
|
||||
"seed": 123,
|
||||
},
|
||||
}
|
||||
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatalf("pull failed %s", err)
|
||||
}
|
||||
|
||||
resp, err := client.Embeddings(ctx, &req)
|
||||
if err != nil {
|
||||
t.Fatalf("embeddings call failed %s", err)
|
||||
}
|
||||
if len(resp.Embedding) == 0 {
|
||||
t.Errorf("zero length embedding response")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,259 +0,0 @@
|
||||
//go:build integration
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
// getTestConfig returns model and streaming mode based on environment variables or defaults
|
||||
func getTestConfig() (model string, stream bool) {
|
||||
model = os.Getenv("QWEN3VL_MODEL")
|
||||
if model == "" {
|
||||
// model = "qwen3-vl:235b-cloud" // default
|
||||
model = "qwen3vl-thinking-odc-dev"
|
||||
}
|
||||
|
||||
streamStr := os.Getenv("QWEN3VL_STREAM")
|
||||
stream = streamStr != "false" // default to true
|
||||
|
||||
return model, stream
|
||||
}
|
||||
|
||||
func TestQwen3VL(t *testing.T) {
|
||||
model, stream := getTestConfig()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
messages []api.Message
|
||||
tools []api.Tool
|
||||
images []string
|
||||
}{
|
||||
{
|
||||
name: "Text-Only Scenario",
|
||||
messages: []api.Message{
|
||||
{Role: "system", Content: "You are a helpful assistant."},
|
||||
{Role: "user", Content: "Write a short haiku about autumn."},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Single Image Scenario",
|
||||
messages: []api.Message{
|
||||
{
|
||||
Role: "system",
|
||||
Content: "You are a helpful assistant that can see images.",
|
||||
},
|
||||
{
|
||||
Role: "user",
|
||||
Content: "What is in this image?",
|
||||
},
|
||||
},
|
||||
images: []string{"testdata/menu.png"},
|
||||
},
|
||||
{
|
||||
name: "Multiple Images Scenario",
|
||||
messages: []api.Message{
|
||||
{
|
||||
Role: "system",
|
||||
Content: "You are a helpful assistant that can see images.",
|
||||
},
|
||||
{
|
||||
Role: "user",
|
||||
Content: "Use both images to answer the question.",
|
||||
},
|
||||
},
|
||||
images: []string{"testdata/satmath1.png", "testdata/satmath2.png"},
|
||||
},
|
||||
{
|
||||
name: "Tools Scenario",
|
||||
messages: []api.Message{
|
||||
{
|
||||
Role: "system",
|
||||
Content: "You can call tools when needed. Return tool calls when actions are needed.",
|
||||
},
|
||||
{Role: "user", Content: "What's the weather in San Francisco now?"},
|
||||
},
|
||||
tools: []api.Tool{
|
||||
{
|
||||
Type: "function",
|
||||
Function: api.ToolFunction{
|
||||
Name: "get_weather",
|
||||
Description: "Get current weather for a city.",
|
||||
Parameters: api.ToolFunctionParameters{
|
||||
Type: "object",
|
||||
Properties: map[string]api.ToolProperty{
|
||||
"city": {
|
||||
Type: api.PropertyType{"string"},
|
||||
Description: "The city to get the weather for",
|
||||
},
|
||||
},
|
||||
Required: []string{"city"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Multi-Turn Tools With Image",
|
||||
messages: []api.Message{
|
||||
{Role: "system", Content: "Use tools when actions are required."},
|
||||
{Role: "user", Content: "What's the current temperature in San Francisco?"},
|
||||
{Role: "assistant", Content: "", ToolCalls: []api.ToolCall{
|
||||
{Function: api.ToolCallFunction{
|
||||
Name: "get_weather",
|
||||
Arguments: api.ToolCallFunctionArguments{
|
||||
"city": "San Francisco",
|
||||
},
|
||||
}},
|
||||
}},
|
||||
{Role: "tool", ToolName: "get_weather", Content: "Sunny"},
|
||||
{Role: "user", Content: "Given that weather, what are the top 10 activities to do in San Francisco? Consider this photo as context."},
|
||||
},
|
||||
tools: []api.Tool{
|
||||
{
|
||||
Type: "function",
|
||||
Function: api.ToolFunction{
|
||||
Name: "get_weather",
|
||||
Description: "Get current weather for a city.",
|
||||
Parameters: api.ToolFunctionParameters{
|
||||
Type: "object",
|
||||
Properties: map[string]api.ToolProperty{
|
||||
"city": {
|
||||
Type: api.PropertyType{"string"},
|
||||
Description: "The city to get the weather for",
|
||||
},
|
||||
},
|
||||
Required: []string{"city"},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: "function",
|
||||
Function: api.ToolFunction{
|
||||
Name: "get_top_10_activities",
|
||||
Description: "Get the top 10 activities for a city given the weather.",
|
||||
Parameters: api.ToolFunctionParameters{
|
||||
Type: "object",
|
||||
Properties: map[string]api.ToolProperty{
|
||||
"weather": {
|
||||
Type: api.PropertyType{"string"},
|
||||
Description: "The weather in the city",
|
||||
},
|
||||
"city": {
|
||||
Type: api.PropertyType{"string"},
|
||||
Description: "The city to get the activities for",
|
||||
},
|
||||
"image": {
|
||||
Type: api.PropertyType{"base64"},
|
||||
Description: "The image of the city",
|
||||
},
|
||||
},
|
||||
Required: []string{"weather", "city", "image"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
images: []string{"testdata/sf-city.jpeg"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Load and attach images to last user message
|
||||
messages := tt.messages
|
||||
if len(tt.images) > 0 {
|
||||
var imgs []api.ImageData
|
||||
for _, path := range tt.images {
|
||||
imgs = append(imgs, loadImageData(t, path))
|
||||
}
|
||||
// Find last user message and attach images
|
||||
for i := len(messages) - 1; i >= 0; i-- {
|
||||
if messages[i].Role == "user" {
|
||||
messages[i].Images = imgs
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
// Pull/preload model if not using remote server
|
||||
if os.Getenv("OLLAMA_TEST_EXISTING") == "" {
|
||||
if err := PullIfMissing(ctx, client, model); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// Preload to reduce startup latency
|
||||
_ = client.Generate(ctx, &api.GenerateRequest{Model: model}, func(api.GenerateResponse) error { return nil })
|
||||
}
|
||||
|
||||
// Build and execute chat request
|
||||
req := &api.ChatRequest{
|
||||
Model: model,
|
||||
Messages: messages,
|
||||
Tools: tt.tools,
|
||||
Stream: &stream,
|
||||
Options: map[string]any{"seed": 42, "temperature": 0.0},
|
||||
}
|
||||
|
||||
var contentBuf, thinkingBuf strings.Builder
|
||||
var toolCalls []api.ToolCall
|
||||
|
||||
err := client.Chat(ctx, req, func(r api.ChatResponse) error {
|
||||
contentBuf.WriteString(r.Message.Content)
|
||||
thinkingBuf.WriteString(r.Message.Thinking)
|
||||
toolCalls = append(toolCalls, r.Message.ToolCalls...)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("chat error: %v", err)
|
||||
}
|
||||
|
||||
// Log truncated responses
|
||||
logTruncated := func(label, text string) {
|
||||
if text != "" {
|
||||
if len(text) > 800 {
|
||||
text = text[:800] + "... [truncated]"
|
||||
}
|
||||
t.Logf("%s: %s", label, text)
|
||||
}
|
||||
}
|
||||
logTruncated("Thinking", thinkingBuf.String())
|
||||
logTruncated("Content", contentBuf.String())
|
||||
|
||||
if len(toolCalls) > 0 {
|
||||
t.Logf("Tool calls: %d", len(toolCalls))
|
||||
for i, call := range toolCalls {
|
||||
t.Logf(" [%d] %s(%+v)", i, call.Function.Name, call.Function.Arguments)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate tool calls if tools were provided
|
||||
if len(tt.tools) > 0 {
|
||||
if len(toolCalls) == 0 {
|
||||
t.Fatal("expected at least one tool call, got none")
|
||||
}
|
||||
if toolCalls[0].Function.Name == "" {
|
||||
t.Fatalf("tool call missing function name: %#v", toolCalls[0])
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// loadImageData loads image data from a file path
|
||||
func loadImageData(t *testing.T, imagePath string) []byte {
|
||||
data, err := os.ReadFile(imagePath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to load image %s: %v", imagePath, err)
|
||||
}
|
||||
return data
|
||||
}
|
||||
BIN
integration/testdata/menu.png
vendored
BIN
integration/testdata/menu.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 729 KiB |
BIN
integration/testdata/satmath1.png
vendored
BIN
integration/testdata/satmath1.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 21 KiB |
BIN
integration/testdata/satmath2.png
vendored
BIN
integration/testdata/satmath2.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 22 KiB |
BIN
integration/testdata/sf-city.jpeg
vendored
BIN
integration/testdata/sf-city.jpeg
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 68 KiB |
@@ -11,9 +11,9 @@ must be recreated with no-alloc set to false before loading data.
|
||||
ggml/include/ggml-backend.h | 1 +
|
||||
ggml/src/ggml-backend-impl.h | 16 +++
|
||||
ggml/src/ggml-backend.cpp | 72 ++++++++++-
|
||||
ggml/src/ggml-cuda/common.cuh | 48 ++++++-
|
||||
ggml/src/ggml-cuda/common.cuh | 58 ++++++++-
|
||||
ggml/src/ggml-cuda/ggml-cuda.cu | 217 ++++++++++++++++++++++++++------
|
||||
5 files changed, 310 insertions(+), 44 deletions(-)
|
||||
5 files changed, 320 insertions(+), 44 deletions(-)
|
||||
|
||||
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
|
||||
index 2763f2bd6..b3b5b356a 100644
|
||||
@@ -219,10 +219,10 @@ index 41eef3b5f..c81a2e48a 100644
|
||||
|
||||
void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) {
|
||||
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
|
||||
index e0abde542..28d6bcd71 100644
|
||||
index e0abde542..e98044bd8 100644
|
||||
--- a/ggml/src/ggml-cuda/common.cuh
|
||||
+++ b/ggml/src/ggml-cuda/common.cuh
|
||||
@@ -35,6 +35,31 @@
|
||||
@@ -35,6 +35,41 @@
|
||||
#include "vendors/cuda.h"
|
||||
#endif // defined(GGML_USE_HIP)
|
||||
|
||||
@@ -246,15 +246,25 @@ index e0abde542..28d6bcd71 100644
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static cudaError_t cudaMemsetAsyncReserve ( void* devPtr, int value, size_t count, cudaStream_t stream = 0 ) {
|
||||
+ if (!reserving_graph) {
|
||||
+ return cudaMemsetAsync(devPtr, value, count, stream);
|
||||
+ } else {
|
||||
+ return cudaSuccess;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+#undef cudaMemcpyAsync
|
||||
+#define cudaMemcpyAsync cudaMemcpyAsyncReserve
|
||||
+#undef cudaMemcpy2DAsync
|
||||
+#define cudaMemcpy2DAsync cudaMemcpy2DAsyncReserve
|
||||
+#undef cudaMemsetAsync
|
||||
+#define cudaMemsetAsync cudaMemsetAsyncReserve
|
||||
+
|
||||
#define STRINGIZE_IMPL(...) #__VA_ARGS__
|
||||
#define STRINGIZE(...) STRINGIZE_IMPL(__VA_ARGS__)
|
||||
|
||||
@@ -856,6 +881,9 @@ struct ggml_cuda_pool {
|
||||
@@ -856,6 +891,9 @@ struct ggml_cuda_pool {
|
||||
|
||||
virtual void * alloc(size_t size, size_t * actual_size) = 0;
|
||||
virtual void free(void * ptr, size_t size) = 0;
|
||||
@@ -264,7 +274,7 @@ index e0abde542..28d6bcd71 100644
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
@@ -999,11 +1027,11 @@ struct ggml_backend_cuda_context {
|
||||
@@ -999,11 +1037,11 @@ struct ggml_backend_cuda_context {
|
||||
// pool
|
||||
std::unique_ptr<ggml_cuda_pool> pools[GGML_CUDA_MAX_DEVICES];
|
||||
|
||||
@@ -278,7 +288,7 @@ index e0abde542..28d6bcd71 100644
|
||||
}
|
||||
return *pools[device];
|
||||
}
|
||||
@@ -1011,4 +1039,20 @@ struct ggml_backend_cuda_context {
|
||||
@@ -1011,4 +1049,20 @@ struct ggml_backend_cuda_context {
|
||||
ggml_cuda_pool & pool() {
|
||||
return pool(device);
|
||||
}
|
||||
@@ -300,7 +310,7 @@ index e0abde542..28d6bcd71 100644
|
||||
+ }
|
||||
};
|
||||
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
index f4d4a4267..ac70dcac8 100644
|
||||
index c555cd30f..eb3db0f19 100644
|
||||
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
@@ -350,6 +350,8 @@ const ggml_cuda_device_info & ggml_cuda_info() {
|
||||
|
||||
@@ -23,6 +23,7 @@ func NewStatusWriter(out *os.File) *StatusWriter {
|
||||
var errorPrefixes = []string{
|
||||
"error:",
|
||||
"CUDA error",
|
||||
"ROCm error",
|
||||
"cudaMalloc failed",
|
||||
"\"ERR\"",
|
||||
"error loading model",
|
||||
|
||||
10
ml/backend/ggml/ggml/src/ggml-cuda/common.cuh
vendored
10
ml/backend/ggml/ggml/src/ggml-cuda/common.cuh
vendored
@@ -55,10 +55,20 @@ static cudaError_t cudaMemcpy2DAsyncReserve ( void* dst, size_t dpitch, const vo
|
||||
}
|
||||
}
|
||||
|
||||
static cudaError_t cudaMemsetAsyncReserve ( void* devPtr, int value, size_t count, cudaStream_t stream = 0 ) {
|
||||
if (!reserving_graph) {
|
||||
return cudaMemsetAsync(devPtr, value, count, stream);
|
||||
} else {
|
||||
return cudaSuccess;
|
||||
}
|
||||
}
|
||||
|
||||
#undef cudaMemcpyAsync
|
||||
#define cudaMemcpyAsync cudaMemcpyAsyncReserve
|
||||
#undef cudaMemcpy2DAsync
|
||||
#define cudaMemcpy2DAsync cudaMemcpy2DAsyncReserve
|
||||
#undef cudaMemsetAsync
|
||||
#define cudaMemsetAsync cudaMemsetAsyncReserve
|
||||
|
||||
#define STRINGIZE_IMPL(...) #__VA_ARGS__
|
||||
#define STRINGIZE(...) STRINGIZE_IMPL(__VA_ARGS__)
|
||||
|
||||
@@ -361,7 +361,7 @@ func ByLibrary(l []DeviceInfo) [][]DeviceInfo {
|
||||
}
|
||||
|
||||
func LibraryPaths(l []DeviceInfo) []string {
|
||||
var gpuLibs []string
|
||||
gpuLibs := []string{LibOllamaPath}
|
||||
for _, gpu := range l {
|
||||
for _, dir := range gpu.LibraryPath {
|
||||
needed := true
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package discover
|
||||
package ml
|
||||
|
||||
import (
|
||||
"os"
|
||||
Reference in New Issue
Block a user