diff --git a/CMakePresets.json b/CMakePresets.json
index 82da950bc..0c9b67225 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -22,7 +22,7 @@
       "name": "CUDA 12",
       "inherits": [ "CUDA" ],
       "cacheVariables": {
-        "CMAKE_CUDA_ARCHITECTURES": "50;60;61;70;75;80;86;87;89;90;90a;120",
+        "CMAKE_CUDA_ARCHITECTURES": "50-virtual;60-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-virtual;89-virtual;90-virtual;90a-virtual;100-virtual;120-virtual",
         "CMAKE_CUDA_FLAGS": "-Wno-deprecated-gpu-targets -t 2"
       }
     },
@@ -30,14 +30,14 @@
       "name": "JetPack 5",
       "inherits": [ "CUDA" ],
       "cacheVariables": {
-        "CMAKE_CUDA_ARCHITECTURES": "72;87"
+        "CMAKE_CUDA_ARCHITECTURES": "72-virtual;87-virtual"
       }
     },
     {
       "name": "JetPack 6",
       "inherits": [ "CUDA" ],
       "cacheVariables": {
-        "CMAKE_CUDA_ARCHITECTURES": "87"
+        "CMAKE_CUDA_ARCHITECTURES": "87-virtual"
       }
     },
     {
diff --git a/discover/gpu.go b/discover/gpu.go
index 123177d3a..0ca0dde8d 100644
--- a/discover/gpu.go
+++ b/discover/gpu.go
@@ -300,6 +300,8 @@ func GetGPUInfo() GpuInfoList {
 				var driverMinor int
 				if cHandles.cudart != nil {
 					C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
+					driverMajor = int(cHandles.cudart.driver_major)
+					driverMinor = int(cHandles.cudart.driver_minor)
 				} else {
 					C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
 					driverMajor = int(cHandles.nvcuda.driver_major)
diff --git a/discover/gpu_info_cudart.c b/discover/gpu_info_cudart.c
index bc5115bfd..76c17b9d8 100644
--- a/discover/gpu_info_cudart.c
+++ b/discover/gpu_info_cudart.c
@@ -69,18 +69,15 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
   }
 
   int version = 0;
-  cudartDriverVersion_t driverVersion;
-  driverVersion.major = 0;
-  driverVersion.minor = 0;
 
   // Report driver version if we're in verbose mode, ignore errors
   ret = (*resp->ch.cudaDriverGetVersion)(&version);
   if (ret != CUDART_SUCCESS) {
     LOG(resp->ch.verbose, "cudaDriverGetVersion failed: %d\n", ret);
   } else {
-    driverVersion.major = version / 1000;
-    driverVersion.minor = (version - (driverVersion.major * 1000)) / 10;
-    LOG(resp->ch.verbose, "CUDA driver version: %d-%d\n", driverVersion.major, driverVersion.minor);
+    resp->ch.driver_major = version / 1000;
+    resp->ch.driver_minor = (version - (resp->ch.driver_major * 1000)) / 10;
+    LOG(resp->ch.verbose, "CUDA driver version: %d-%d\n", resp->ch.driver_major, resp->ch.driver_minor);
   }
 
   ret = (*resp->ch.cudaGetDeviceCount)(&resp->num_devices);
diff --git a/discover/gpu_info_cudart.h b/discover/gpu_info_cudart.h
index ff0c0af19..893f3f7bd 100644
--- a/discover/gpu_info_cudart.h
+++ b/discover/gpu_info_cudart.h
@@ -29,11 +29,6 @@ typedef struct cudartMemory_st {
   size_t used;
 } cudartMemory_t;
 
-typedef struct cudartDriverVersion {
-  int major;
-  int minor;
-} cudartDriverVersion_t;
-
 typedef struct cudaUUID {
     unsigned char bytes[16];
 } cudaUUID_t;
@@ -123,6 +118,8 @@ typedef struct cudaDeviceProp {
 typedef struct cudart_handle {
   void *handle;
   uint16_t verbose;
+  int driver_major;
+  int driver_minor;
   cudartReturn_t (*cudaSetDevice)(int device);
   cudartReturn_t (*cudaDeviceSynchronize)(void);
   cudartReturn_t (*cudaDeviceReset)(void);
diff --git a/docs/linux.md b/docs/linux.md
index 0c19ef0b4..9a156d1dc 100644
--- a/docs/linux.md
+++ b/docs/linux.md
@@ -34,7 +34,11 @@ ollama -v
 
 ### AMD GPU install
 
-If you have an AMD GPU, also download and extract the additional ROCm package:
+If you have an AMD GPU, **also** download and extract the additional ROCm package:
+
+> [!IMPORTANT]
+> The ROCm tgz contains only AMD dependent libraries.  You must extract **both** `ollama-linux-amd64.tgz` and `ollama-linux-amd64-rocm.tgz` into the same location.
+
 
 ```shell
 curl -L https://ollama.com/download/ollama-linux-amd64-rocm.tgz -o ollama-linux-amd64-rocm.tgz
diff --git a/docs/windows.md b/docs/windows.md
index 2e495e49d..eb067ed04 100644
--- a/docs/windows.md
+++ b/docs/windows.md
@@ -68,9 +68,9 @@ If you'd like to install or integrate Ollama as a service, a standalone
 `ollama-windows-amd64.zip` zip file is available containing only the Ollama CLI
 and GPU library dependencies for Nvidia.  If you have an AMD GPU, also download
 and extract the additional ROCm package `ollama-windows-amd64-rocm.zip` into the
-same directory.  This allows for embedding Ollama in existing applications, or
-running it as a system service via `ollama serve` with tools such as
-[NSSM](https://nssm.cc/). 
+same directory.  Both zip files are necessary for a complete AMD installation.
+This allows for embedding Ollama in existing applications, or running it as a
+system service via `ollama serve` with tools such as [NSSM](https://nssm.cc/). 
 
 > [!NOTE]  
 > If you are upgrading from a prior version, you should remove the old directories first.
diff --git a/integration/concurrency_test.go b/integration/concurrency_test.go
index dbf1e6fa3..bb0348ebc 100644
--- a/integration/concurrency_test.go
+++ b/integration/concurrency_test.go
@@ -4,7 +4,9 @@ package integration
 
 import (
 	"context"
+	"fmt"
 	"log/slog"
+	"math"
 	"os"
 	"strconv"
 	"sync"
@@ -21,7 +23,7 @@ func TestMultiModelConcurrency(t *testing.T) {
 	var (
 		req = [2]api.GenerateRequest{
 			{
-				Model:     "llama3.2:1b",
+				Model:     smol,
 				Prompt:    "why is the ocean blue?",
 				Stream:    &stream,
 				KeepAlive: &api.Duration{Duration: 10 * time.Second},
@@ -30,7 +32,7 @@ func TestMultiModelConcurrency(t *testing.T) {
 					"temperature": 0.0,
 				},
 			}, {
-				Model:     "tinydolphin",
+				Model:     "qwen3:0.6b",
 				Prompt:    "what is the origin of the us thanksgiving holiday?",
 				Stream:    &stream,
 				KeepAlive: &api.Duration{Duration: 10 * time.Second},
@@ -132,16 +134,16 @@ func TestMultiModelStress(t *testing.T) {
 			size: 2876 * format.MebiByte,
 		},
 		{
-			name: "phi",
-			size: 2616 * format.MebiByte,
+			name: "qwen3:0.6b",
+			size: 1600 * format.MebiByte,
 		},
 		{
 			name: "gemma:2b",
 			size: 2364 * format.MebiByte,
 		},
 		{
-			name: "stable-code:3b",
-			size: 2608 * format.MebiByte,
+			name: "deepseek-r1:1.5b",
+			size: 2048 * format.MebiByte,
 		},
 		{
 			name: "starcoder2:3b",
@@ -149,17 +151,21 @@ func TestMultiModelStress(t *testing.T) {
 		},
 	}
 	mediumModels := []model{
+		{
+			name: "qwen3:8b",
+			size: 6600 * format.MebiByte,
+		},
 		{
 			name: "llama2",
 			size: 5118 * format.MebiByte,
 		},
 		{
-			name: "mistral",
-			size: 4620 * format.MebiByte,
+			name: "deepseek-r1:7b",
+			size: 5600 * format.MebiByte,
 		},
 		{
-			name: "orca-mini:7b",
-			size: 5118 * format.MebiByte,
+			name: "mistral",
+			size: 4620 * format.MebiByte,
 		},
 		{
 			name: "dolphin-mistral",
@@ -254,7 +260,7 @@ func TestMultiModelStress(t *testing.T) {
 	}
 	go func() {
 		for {
-			time.Sleep(2 * time.Second)
+			time.Sleep(10 * time.Second)
 			select {
 			case <-ctx.Done():
 				return
@@ -265,7 +271,21 @@ func TestMultiModelStress(t *testing.T) {
 					continue
 				}
 				for _, m := range models.Models {
-					slog.Info("loaded model snapshot", "model", m)
+					var procStr string
+					switch {
+					case m.SizeVRAM == 0:
+						procStr = "100% CPU"
+					case m.SizeVRAM == m.Size:
+						procStr = "100% GPU"
+					case m.SizeVRAM > m.Size || m.Size == 0:
+						procStr = "Unknown"
+					default:
+						sizeCPU := m.Size - m.SizeVRAM
+						cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 100)
+						procStr = fmt.Sprintf("%d%%/%d%%", int(cpuPercent), int(100-cpuPercent))
+					}
+
+					slog.Info("loaded model snapshot", "model", m.Name, "CPU/GPU", procStr, "expires", format.HumanTime(m.ExpiresAt, "Never"))
 				}
 			}
 		}
diff --git a/integration/utils_test.go b/integration/utils_test.go
index 727825a41..6375b1f97 100644
--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -574,8 +574,8 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
 			},
 		},
 		[][]string{
-			{"sunlight"},
-			{"soil", "organic", "earth", "black", "tan"},
+			{"sunlight", "scattering", "interact"},
+			{"soil", "organic", "earth", "black", "tan", "chemical", "processes", "pigments", "particles"},
 			{"england", "english", "massachusetts", "pilgrims", "british"},
 			{"fourth", "july", "declaration", "independence"},
 			{"nitrogen", "oxygen", "carbon", "dioxide"},