Merge remote-tracking branch 'upstream/main' into vulkanV3
This commit is contained in:
commit
d71c83f2ba
|
|
@ -22,7 +22,7 @@
|
|||
"name": "CUDA 12",
|
||||
"inherits": [ "CUDA" ],
|
||||
"cacheVariables": {
|
||||
"CMAKE_CUDA_ARCHITECTURES": "50;60;61;70;75;80;86;87;89;90;90a;120",
|
||||
"CMAKE_CUDA_ARCHITECTURES": "50-virtual;60-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-virtual;89-virtual;90-virtual;90a-virtual;100-virtual;120-virtual",
|
||||
"CMAKE_CUDA_FLAGS": "-Wno-deprecated-gpu-targets -t 2"
|
||||
}
|
||||
},
|
||||
|
|
@ -30,14 +30,14 @@
|
|||
"name": "JetPack 5",
|
||||
"inherits": [ "CUDA" ],
|
||||
"cacheVariables": {
|
||||
"CMAKE_CUDA_ARCHITECTURES": "72;87"
|
||||
"CMAKE_CUDA_ARCHITECTURES": "72-virtual;87-virtual"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "JetPack 6",
|
||||
"inherits": [ "CUDA" ],
|
||||
"cacheVariables": {
|
||||
"CMAKE_CUDA_ARCHITECTURES": "87"
|
||||
"CMAKE_CUDA_ARCHITECTURES": "87-virtual"
|
||||
}
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -300,6 +300,8 @@ func GetGPUInfo() GpuInfoList {
|
|||
var driverMinor int
|
||||
if cHandles.cudart != nil {
|
||||
C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
|
||||
driverMajor = int(cHandles.cudart.driver_major)
|
||||
driverMinor = int(cHandles.cudart.driver_minor)
|
||||
} else {
|
||||
C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
|
||||
driverMajor = int(cHandles.nvcuda.driver_major)
|
||||
|
|
|
|||
|
|
@ -69,18 +69,15 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
|
|||
}
|
||||
|
||||
int version = 0;
|
||||
cudartDriverVersion_t driverVersion;
|
||||
driverVersion.major = 0;
|
||||
driverVersion.minor = 0;
|
||||
|
||||
// Report driver version if we're in verbose mode, ignore errors
|
||||
ret = (*resp->ch.cudaDriverGetVersion)(&version);
|
||||
if (ret != CUDART_SUCCESS) {
|
||||
LOG(resp->ch.verbose, "cudaDriverGetVersion failed: %d\n", ret);
|
||||
} else {
|
||||
driverVersion.major = version / 1000;
|
||||
driverVersion.minor = (version - (driverVersion.major * 1000)) / 10;
|
||||
LOG(resp->ch.verbose, "CUDA driver version: %d-%d\n", driverVersion.major, driverVersion.minor);
|
||||
resp->ch.driver_major = version / 1000;
|
||||
resp->ch.driver_minor = (version - (resp->ch.driver_major * 1000)) / 10;
|
||||
LOG(resp->ch.verbose, "CUDA driver version: %d-%d\n", resp->ch.driver_major, resp->ch.driver_minor);
|
||||
}
|
||||
|
||||
ret = (*resp->ch.cudaGetDeviceCount)(&resp->num_devices);
|
||||
|
|
|
|||
|
|
@ -29,11 +29,6 @@ typedef struct cudartMemory_st {
|
|||
size_t used;
|
||||
} cudartMemory_t;
|
||||
|
||||
typedef struct cudartDriverVersion {
|
||||
int major;
|
||||
int minor;
|
||||
} cudartDriverVersion_t;
|
||||
|
||||
typedef struct cudaUUID {
|
||||
unsigned char bytes[16];
|
||||
} cudaUUID_t;
|
||||
|
|
@ -123,6 +118,8 @@ typedef struct cudaDeviceProp {
|
|||
typedef struct cudart_handle {
|
||||
void *handle;
|
||||
uint16_t verbose;
|
||||
int driver_major;
|
||||
int driver_minor;
|
||||
cudartReturn_t (*cudaSetDevice)(int device);
|
||||
cudartReturn_t (*cudaDeviceSynchronize)(void);
|
||||
cudartReturn_t (*cudaDeviceReset)(void);
|
||||
|
|
|
|||
|
|
@ -34,7 +34,11 @@ ollama -v
|
|||
|
||||
### AMD GPU install
|
||||
|
||||
If you have an AMD GPU, also download and extract the additional ROCm package:
|
||||
If you have an AMD GPU, **also** download and extract the additional ROCm package:
|
||||
|
||||
> [!IMPORTANT]
|
||||
> The ROCm tgz contains only AMD dependent libraries. You must extract **both** `ollama-linux-amd64.tgz` and `ollama-linux-amd64-rocm.tgz` into the same location.
|
||||
|
||||
|
||||
```shell
|
||||
curl -L https://ollama.com/download/ollama-linux-amd64-rocm.tgz -o ollama-linux-amd64-rocm.tgz
|
||||
|
|
|
|||
|
|
@ -68,9 +68,9 @@ If you'd like to install or integrate Ollama as a service, a standalone
|
|||
`ollama-windows-amd64.zip` zip file is available containing only the Ollama CLI
|
||||
and GPU library dependencies for Nvidia. If you have an AMD GPU, also download
|
||||
and extract the additional ROCm package `ollama-windows-amd64-rocm.zip` into the
|
||||
same directory. This allows for embedding Ollama in existing applications, or
|
||||
running it as a system service via `ollama serve` with tools such as
|
||||
[NSSM](https://nssm.cc/).
|
||||
same directory. Both zip files are necessary for a complete AMD installation.
|
||||
This allows for embedding Ollama in existing applications, or running it as a
|
||||
system service via `ollama serve` with tools such as [NSSM](https://nssm.cc/).
|
||||
|
||||
> [!NOTE]
|
||||
> If you are upgrading from a prior version, you should remove the old directories first.
|
||||
|
|
|
|||
|
|
@ -4,7 +4,9 @@ package integration
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
|
@ -21,7 +23,7 @@ func TestMultiModelConcurrency(t *testing.T) {
|
|||
var (
|
||||
req = [2]api.GenerateRequest{
|
||||
{
|
||||
Model: "llama3.2:1b",
|
||||
Model: smol,
|
||||
Prompt: "why is the ocean blue?",
|
||||
Stream: &stream,
|
||||
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
||||
|
|
@ -30,7 +32,7 @@ func TestMultiModelConcurrency(t *testing.T) {
|
|||
"temperature": 0.0,
|
||||
},
|
||||
}, {
|
||||
Model: "tinydolphin",
|
||||
Model: "qwen3:0.6b",
|
||||
Prompt: "what is the origin of the us thanksgiving holiday?",
|
||||
Stream: &stream,
|
||||
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
||||
|
|
@ -132,16 +134,16 @@ func TestMultiModelStress(t *testing.T) {
|
|||
size: 2876 * format.MebiByte,
|
||||
},
|
||||
{
|
||||
name: "phi",
|
||||
size: 2616 * format.MebiByte,
|
||||
name: "qwen3:0.6b",
|
||||
size: 1600 * format.MebiByte,
|
||||
},
|
||||
{
|
||||
name: "gemma:2b",
|
||||
size: 2364 * format.MebiByte,
|
||||
},
|
||||
{
|
||||
name: "stable-code:3b",
|
||||
size: 2608 * format.MebiByte,
|
||||
name: "deepseek-r1:1.5b",
|
||||
size: 2048 * format.MebiByte,
|
||||
},
|
||||
{
|
||||
name: "starcoder2:3b",
|
||||
|
|
@ -149,17 +151,21 @@ func TestMultiModelStress(t *testing.T) {
|
|||
},
|
||||
}
|
||||
mediumModels := []model{
|
||||
{
|
||||
name: "qwen3:8b",
|
||||
size: 6600 * format.MebiByte,
|
||||
},
|
||||
{
|
||||
name: "llama2",
|
||||
size: 5118 * format.MebiByte,
|
||||
},
|
||||
{
|
||||
name: "mistral",
|
||||
size: 4620 * format.MebiByte,
|
||||
name: "deepseek-r1:7b",
|
||||
size: 5600 * format.MebiByte,
|
||||
},
|
||||
{
|
||||
name: "orca-mini:7b",
|
||||
size: 5118 * format.MebiByte,
|
||||
name: "mistral",
|
||||
size: 4620 * format.MebiByte,
|
||||
},
|
||||
{
|
||||
name: "dolphin-mistral",
|
||||
|
|
@ -254,7 +260,7 @@ func TestMultiModelStress(t *testing.T) {
|
|||
}
|
||||
go func() {
|
||||
for {
|
||||
time.Sleep(2 * time.Second)
|
||||
time.Sleep(10 * time.Second)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
|
|
@ -265,7 +271,21 @@ func TestMultiModelStress(t *testing.T) {
|
|||
continue
|
||||
}
|
||||
for _, m := range models.Models {
|
||||
slog.Info("loaded model snapshot", "model", m)
|
||||
var procStr string
|
||||
switch {
|
||||
case m.SizeVRAM == 0:
|
||||
procStr = "100% CPU"
|
||||
case m.SizeVRAM == m.Size:
|
||||
procStr = "100% GPU"
|
||||
case m.SizeVRAM > m.Size || m.Size == 0:
|
||||
procStr = "Unknown"
|
||||
default:
|
||||
sizeCPU := m.Size - m.SizeVRAM
|
||||
cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 100)
|
||||
procStr = fmt.Sprintf("%d%%/%d%%", int(cpuPercent), int(100-cpuPercent))
|
||||
}
|
||||
|
||||
slog.Info("loaded model snapshot", "model", m.Name, "CPU/GPU", procStr, "expires", format.HumanTime(m.ExpiresAt, "Never"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -574,8 +574,8 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
|
|||
},
|
||||
},
|
||||
[][]string{
|
||||
{"sunlight"},
|
||||
{"soil", "organic", "earth", "black", "tan"},
|
||||
{"sunlight", "scattering", "interact"},
|
||||
{"soil", "organic", "earth", "black", "tan", "chemical", "processes", "pigments", "particles"},
|
||||
{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||
{"fourth", "july", "declaration", "independence"},
|
||||
{"nitrogen", "oxygen", "carbon", "dioxide"},
|
||||
|
|
|
|||
Loading…
Reference in New Issue