Merge remote-tracking branch 'upstream/main' into vulkanV3

This commit is contained in:
Inforithmics 2025-08-14 22:11:08 +02:00
commit d71c83f2ba
8 changed files with 52 additions and 32 deletions

View File

@ -22,7 +22,7 @@
"name": "CUDA 12",
"inherits": [ "CUDA" ],
"cacheVariables": {
"CMAKE_CUDA_ARCHITECTURES": "50;60;61;70;75;80;86;87;89;90;90a;120",
"CMAKE_CUDA_ARCHITECTURES": "50-virtual;60-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-virtual;89-virtual;90-virtual;90a-virtual;100-virtual;120-virtual",
"CMAKE_CUDA_FLAGS": "-Wno-deprecated-gpu-targets -t 2"
}
},
@ -30,14 +30,14 @@
"name": "JetPack 5",
"inherits": [ "CUDA" ],
"cacheVariables": {
"CMAKE_CUDA_ARCHITECTURES": "72;87"
"CMAKE_CUDA_ARCHITECTURES": "72-virtual;87-virtual"
}
},
{
"name": "JetPack 6",
"inherits": [ "CUDA" ],
"cacheVariables": {
"CMAKE_CUDA_ARCHITECTURES": "87"
"CMAKE_CUDA_ARCHITECTURES": "87-virtual"
}
},
{

View File

@ -300,6 +300,8 @@ func GetGPUInfo() GpuInfoList {
var driverMinor int
if cHandles.cudart != nil {
C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
driverMajor = int(cHandles.cudart.driver_major)
driverMinor = int(cHandles.cudart.driver_minor)
} else {
C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
driverMajor = int(cHandles.nvcuda.driver_major)

View File

@ -69,18 +69,15 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
}
int version = 0;
cudartDriverVersion_t driverVersion;
driverVersion.major = 0;
driverVersion.minor = 0;
// Report driver version if we're in verbose mode, ignore errors
ret = (*resp->ch.cudaDriverGetVersion)(&version);
if (ret != CUDART_SUCCESS) {
LOG(resp->ch.verbose, "cudaDriverGetVersion failed: %d\n", ret);
} else {
driverVersion.major = version / 1000;
driverVersion.minor = (version - (driverVersion.major * 1000)) / 10;
LOG(resp->ch.verbose, "CUDA driver version: %d-%d\n", driverVersion.major, driverVersion.minor);
resp->ch.driver_major = version / 1000;
resp->ch.driver_minor = (version - (resp->ch.driver_major * 1000)) / 10;
LOG(resp->ch.verbose, "CUDA driver version: %d-%d\n", resp->ch.driver_major, resp->ch.driver_minor);
}
ret = (*resp->ch.cudaGetDeviceCount)(&resp->num_devices);

View File

@ -29,11 +29,6 @@ typedef struct cudartMemory_st {
size_t used;
} cudartMemory_t;
typedef struct cudartDriverVersion {
int major;
int minor;
} cudartDriverVersion_t;
typedef struct cudaUUID {
unsigned char bytes[16];
} cudaUUID_t;
@ -123,6 +118,8 @@ typedef struct cudaDeviceProp {
typedef struct cudart_handle {
void *handle;
uint16_t verbose;
int driver_major;
int driver_minor;
cudartReturn_t (*cudaSetDevice)(int device);
cudartReturn_t (*cudaDeviceSynchronize)(void);
cudartReturn_t (*cudaDeviceReset)(void);

View File

@ -34,7 +34,11 @@ ollama -v
### AMD GPU install
If you have an AMD GPU, also download and extract the additional ROCm package:
If you have an AMD GPU, **also** download and extract the additional ROCm package:
> [!IMPORTANT]
> The ROCm tgz contains only AMD dependent libraries. You must extract **both** `ollama-linux-amd64.tgz` and `ollama-linux-amd64-rocm.tgz` into the same location.
```shell
curl -L https://ollama.com/download/ollama-linux-amd64-rocm.tgz -o ollama-linux-amd64-rocm.tgz

View File

@ -68,9 +68,9 @@ If you'd like to install or integrate Ollama as a service, a standalone
`ollama-windows-amd64.zip` zip file is available containing only the Ollama CLI
and GPU library dependencies for Nvidia. If you have an AMD GPU, also download
and extract the additional ROCm package `ollama-windows-amd64-rocm.zip` into the
same directory. This allows for embedding Ollama in existing applications, or
running it as a system service via `ollama serve` with tools such as
[NSSM](https://nssm.cc/).
same directory. Both zip files are necessary for a complete AMD installation.
This allows for embedding Ollama in existing applications, or running it as a
system service via `ollama serve` with tools such as [NSSM](https://nssm.cc/).
> [!NOTE]
> If you are upgrading from a prior version, you should remove the old directories first.

View File

@ -4,7 +4,9 @@ package integration
import (
"context"
"fmt"
"log/slog"
"math"
"os"
"strconv"
"sync"
@ -21,7 +23,7 @@ func TestMultiModelConcurrency(t *testing.T) {
var (
req = [2]api.GenerateRequest{
{
Model: "llama3.2:1b",
Model: smol,
Prompt: "why is the ocean blue?",
Stream: &stream,
KeepAlive: &api.Duration{Duration: 10 * time.Second},
@ -30,7 +32,7 @@ func TestMultiModelConcurrency(t *testing.T) {
"temperature": 0.0,
},
}, {
Model: "tinydolphin",
Model: "qwen3:0.6b",
Prompt: "what is the origin of the us thanksgiving holiday?",
Stream: &stream,
KeepAlive: &api.Duration{Duration: 10 * time.Second},
@ -132,16 +134,16 @@ func TestMultiModelStress(t *testing.T) {
size: 2876 * format.MebiByte,
},
{
name: "phi",
size: 2616 * format.MebiByte,
name: "qwen3:0.6b",
size: 1600 * format.MebiByte,
},
{
name: "gemma:2b",
size: 2364 * format.MebiByte,
},
{
name: "stable-code:3b",
size: 2608 * format.MebiByte,
name: "deepseek-r1:1.5b",
size: 2048 * format.MebiByte,
},
{
name: "starcoder2:3b",
@ -149,17 +151,21 @@ func TestMultiModelStress(t *testing.T) {
},
}
mediumModels := []model{
{
name: "qwen3:8b",
size: 6600 * format.MebiByte,
},
{
name: "llama2",
size: 5118 * format.MebiByte,
},
{
name: "mistral",
size: 4620 * format.MebiByte,
name: "deepseek-r1:7b",
size: 5600 * format.MebiByte,
},
{
name: "orca-mini:7b",
size: 5118 * format.MebiByte,
name: "mistral",
size: 4620 * format.MebiByte,
},
{
name: "dolphin-mistral",
@ -254,7 +260,7 @@ func TestMultiModelStress(t *testing.T) {
}
go func() {
for {
time.Sleep(2 * time.Second)
time.Sleep(10 * time.Second)
select {
case <-ctx.Done():
return
@ -265,7 +271,21 @@ func TestMultiModelStress(t *testing.T) {
continue
}
for _, m := range models.Models {
slog.Info("loaded model snapshot", "model", m)
var procStr string
switch {
case m.SizeVRAM == 0:
procStr = "100% CPU"
case m.SizeVRAM == m.Size:
procStr = "100% GPU"
case m.SizeVRAM > m.Size || m.Size == 0:
procStr = "Unknown"
default:
sizeCPU := m.Size - m.SizeVRAM
cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 100)
procStr = fmt.Sprintf("%d%%/%d%%", int(cpuPercent), int(100-cpuPercent))
}
slog.Info("loaded model snapshot", "model", m.Name, "CPU/GPU", procStr, "expires", format.HumanTime(m.ExpiresAt, "Never"))
}
}
}

View File

@ -574,8 +574,8 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
},
},
[][]string{
{"sunlight"},
{"soil", "organic", "earth", "black", "tan"},
{"sunlight", "scattering", "interact"},
{"soil", "organic", "earth", "black", "tan", "chemical", "processes", "pigments", "particles"},
{"england", "english", "massachusetts", "pilgrims", "british"},
{"fourth", "july", "declaration", "independence"},
{"nitrogen", "oxygen", "carbon", "dioxide"},