From 3a45922c018781d09c8836888eef308119abb697 Mon Sep 17 00:00:00 2001 From: Inforithmics Date: Thu, 25 Sep 2025 03:22:01 +0200 Subject: [PATCH 1/5] Test if Vulkan device is supported --- discover/gpu.go | 9 ++++ discover/gpu_info_vulkan.c | 33 +++++++++++++++ discover/gpu_info_vulkan.h | 87 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+) diff --git a/discover/gpu.go b/discover/gpu.go index f6152bf0d..2cb77e1e5 100644 --- a/discover/gpu.go +++ b/discover/gpu.go @@ -466,6 +466,15 @@ func GetGPUInfo() GpuInfoList { continue } + if C.vk_device_is_supported(*vHandles.vulkan, C.int(i)) == 0 { + unsupportedGPUs = append(unsupportedGPUs, + UnsupportedGPUInfo{ + GpuInfo: gpuInfo.GpuInfo, + }) + slog.Info(fmt.Sprintf("[%d] Vulkan GPU does not support required Vulkan features. (StorageBuffer16BitAccess)", i)) + continue + } + gpuInfo.TotalMemory = uint64(memInfo.total) gpuInfo.FreeMemory = uint64(memInfo.free) gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) diff --git a/discover/gpu_info_vulkan.c b/discover/gpu_info_vulkan.c index 65033ad8a..7179ec9a3 100644 --- a/discover/gpu_info_vulkan.c +++ b/discover/gpu_info_vulkan.c @@ -48,6 +48,7 @@ void vk_init(char* vk_lib_path, vk_init_resp_t *resp) { {"vkEnumeratePhysicalDevices", (void *)&resp->ch.vkEnumeratePhysicalDevices}, {"vkGetPhysicalDeviceMemoryProperties2", (void *)&resp->ch.vkGetPhysicalDeviceMemoryProperties2}, {"vkDestroyInstance", (void *)&resp->ch.vkDestroyInstance}, + {"vkGetPhysicalDeviceFeatures2", (void *)&resp->ch.vkGetPhysicalDeviceFeatures2}, {NULL, NULL}, }; @@ -117,6 +118,38 @@ void vk_init(char* vk_lib_path, vk_init_resp_t *resp) { resp->num_devices = deviceCount; } +int vk_device_is_supported(vk_handle_t rh, int i) { + VkInstance instance = rh.vk; + uint32_t deviceCount = rh.num_devices; + + VkPhysicalDevice* devices = malloc(deviceCount * sizeof(VkPhysicalDevice)); + if (devices == NULL) { + return 0; + } + + VkResult result = (*rh.vkEnumeratePhysicalDevices)(instance, &deviceCount, devices); + if (result != VK_SUCCESS) { + free(devices); + return 0; + } + + VkPhysicalDeviceVulkan11Features vk11_features = {}; + vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; + vk11_features.pNext = NULL; + + VkPhysicalDeviceFeatures2 device_features2 = {}; + device_features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + device_features2.pNext = &vk11_features; + + // make sure you have the right function pointer from your loader + (*rh.vkGetPhysicalDeviceFeatures2)(devices[i], &device_features2); + + int supported = vk11_features.storageBuffer16BitAccess ? 1 : 0; + + free(devices); + return supported; +} + int vk_check_flash_attention(vk_handle_t rh, int i) { VkInstance instance = rh.vk; uint32_t deviceCount = rh.num_devices; diff --git a/discover/gpu_info_vulkan.h b/discover/gpu_info_vulkan.h index 42e4b1610..26d00d601 100644 --- a/discover/gpu_info_vulkan.h +++ b/discover/gpu_info_vulkan.h @@ -95,6 +95,8 @@ typedef enum VkResult { typedef enum VkStructureType { VK_STRUCTURE_TYPE_APPLICATION_INFO = 0, VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 1, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES = 49, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2 = 1000059000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2 = 1000059001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2 = 1000059006, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES = 1000071004, @@ -284,6 +286,87 @@ typedef struct VkPhysicalDeviceIDProperties { VkBool32 deviceLUIDValid; } VkPhysicalDeviceIDProperties; +typedef struct VkPhysicalDeviceFeatures { + VkBool32 robustBufferAccess; + VkBool32 fullDrawIndexUint32; + VkBool32 imageCubeArray; + VkBool32 independentBlend; + VkBool32 geometryShader; + VkBool32 tessellationShader; + VkBool32 sampleRateShading; + VkBool32 dualSrcBlend; + VkBool32 logicOp; + VkBool32 multiDrawIndirect; + VkBool32 drawIndirectFirstInstance; + VkBool32 depthClamp; + VkBool32 depthBiasClamp; + VkBool32 fillModeNonSolid; + VkBool32 depthBounds; + VkBool32 wideLines; + VkBool32 largePoints; + VkBool32 alphaToOne; + VkBool32 multiViewport; + VkBool32 samplerAnisotropy; + VkBool32 textureCompressionETC2; + VkBool32 textureCompressionASTC_LDR; + VkBool32 textureCompressionBC; + VkBool32 occlusionQueryPrecise; + VkBool32 pipelineStatisticsQuery; + VkBool32 vertexPipelineStoresAndAtomics; + VkBool32 fragmentStoresAndAtomics; + VkBool32 shaderTessellationAndGeometryPointSize; + VkBool32 shaderImageGatherExtended; + VkBool32 shaderStorageImageExtendedFormats; + VkBool32 shaderStorageImageMultisample; + VkBool32 shaderStorageImageReadWithoutFormat; + VkBool32 shaderStorageImageWriteWithoutFormat; + VkBool32 shaderUniformBufferArrayDynamicIndexing; + VkBool32 shaderSampledImageArrayDynamicIndexing; + VkBool32 shaderStorageBufferArrayDynamicIndexing; + VkBool32 shaderStorageImageArrayDynamicIndexing; + VkBool32 shaderClipDistance; + VkBool32 shaderCullDistance; + VkBool32 shaderFloat64; + VkBool32 shaderInt64; + VkBool32 shaderInt16; + VkBool32 shaderResourceResidency; + VkBool32 shaderResourceMinLod; + VkBool32 sparseBinding; + VkBool32 sparseResidencyBuffer; + VkBool32 sparseResidencyImage2D; + VkBool32 sparseResidencyImage3D; + VkBool32 sparseResidency2Samples; + VkBool32 sparseResidency4Samples; + VkBool32 sparseResidency8Samples; + VkBool32 sparseResidency16Samples; + VkBool32 sparseResidencyAliased; + VkBool32 variableMultisampleRate; + VkBool32 inheritedQueries; +} VkPhysicalDeviceFeatures; + +typedef struct VkPhysicalDeviceFeatures2 { + VkStructureType sType; + void* pNext; + VkPhysicalDeviceFeatures features; +} VkPhysicalDeviceFeatures2; + +typedef struct VkPhysicalDeviceVulkan11Features { + VkStructureType sType; + void* pNext; + VkBool32 storageBuffer16BitAccess; + VkBool32 uniformAndStorageBuffer16BitAccess; + VkBool32 storagePushConstant16; + VkBool32 storageInputOutput16; + VkBool32 multiview; + VkBool32 multiviewGeometryShader; + VkBool32 multiviewTessellationShader; + VkBool32 variablePointersStorageBuffer; + VkBool32 variablePointers; + VkBool32 protectedMemory; + VkBool32 samplerYcbcrConversion; + VkBool32 shaderDrawParameters; +} VkPhysicalDeviceVulkan11Features; + typedef struct VkMemoryType { VkMemoryPropertyFlags propertyFlags; uint32_t heapIndex; @@ -376,6 +459,9 @@ typedef struct { void (*vkDestroyInstance)( VkInstance instance, const VkAllocationCallbacks* pAllocator); + void (*vkGetPhysicalDeviceFeatures2)( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures2* pFeatures); } vk_handle_t; typedef struct vk_init_resp @@ -388,6 +474,7 @@ typedef struct vk_init_resp void vk_init(char* vk_lib_path, vk_init_resp_t *resp); void vk_check_vram(vk_handle_t rh, int i, mem_info_t *resp); int vk_check_flash_attention(vk_handle_t rh, int i); +int vk_device_is_supported(vk_handle_t rh, int i); void vk_release(vk_handle_t rh); #endif From a7e2d21f598ce218f0a8073bfe17be5df1f46fd1 Mon Sep 17 00:00:00 2001 From: Inforithmics Date: Thu, 25 Sep 2025 06:33:15 +0200 Subject: [PATCH 2/5] vk_check_flash_attention is not needed (coompat2 coopmapt and scalar implementation exist) --- discover/gpu_info_vulkan.c | 28 ---------------------------- discover/gpu_info_vulkan.h | 1 - 2 files changed, 29 deletions(-) diff --git a/discover/gpu_info_vulkan.c b/discover/gpu_info_vulkan.c index 7179ec9a3..0929fdee5 100644 --- a/discover/gpu_info_vulkan.c +++ b/discover/gpu_info_vulkan.c @@ -150,34 +150,6 @@ int vk_device_is_supported(vk_handle_t rh, int i) { return supported; } -int vk_check_flash_attention(vk_handle_t rh, int i) { - VkInstance instance = rh.vk; - uint32_t deviceCount = rh.num_devices; - - VkPhysicalDevice* devices = malloc(deviceCount * sizeof(VkPhysicalDevice)); - if (devices == NULL) { - return 0; - } - - VkResult result = (*rh.vkEnumeratePhysicalDevices)(instance, &deviceCount, devices); - if (result != VK_SUCCESS) { - free(devices); - return 0; - } - - VkPhysicalDeviceProperties properties = {}; - (*rh.vkGetPhysicalDeviceProperties)(devices[i], &properties); - - int supports_nv_coopmat2 = is_extension_supported(&rh, devices[i], VK_NV_COOPERATIVE_MATRIX_2_EXTENSION_NAME); - if (!supports_nv_coopmat2) { - free(devices); - return 1; - } - - free(devices); - return 0; -} - void vk_check_vram(vk_handle_t rh, int i, mem_info_t *resp) { VkInstance instance = rh.vk; uint32_t deviceCount = rh.num_devices; diff --git a/discover/gpu_info_vulkan.h b/discover/gpu_info_vulkan.h index 26d00d601..3cd8b0b39 100644 --- a/discover/gpu_info_vulkan.h +++ b/discover/gpu_info_vulkan.h @@ -473,7 +473,6 @@ typedef struct vk_init_resp void vk_init(char* vk_lib_path, vk_init_resp_t *resp); void vk_check_vram(vk_handle_t rh, int i, mem_info_t *resp); -int vk_check_flash_attention(vk_handle_t rh, int i); int vk_device_is_supported(vk_handle_t rh, int i); void vk_release(vk_handle_t rh); From 05bdfedb56c61f331d263898cdf0d31c20c272e7 Mon Sep 17 00:00:00 2001 From: Inforithmics Date: Thu, 25 Sep 2025 08:23:13 +0200 Subject: [PATCH 3/5] Handle GGML_VK_VISIBLE_DEVICES --- discover/gpu_info_vulkan.c | 112 +++++++++++++++++++++++++++++++++++++ discover/gpu_info_vulkan.h | 3 + 2 files changed, 115 insertions(+) diff --git a/discover/gpu_info_vulkan.c b/discover/gpu_info_vulkan.c index 0929fdee5..8bb0a9d3a 100644 --- a/discover/gpu_info_vulkan.c +++ b/discover/gpu_info_vulkan.c @@ -2,6 +2,85 @@ #include "gpu_info_vulkan.h" #include +#include +#include + +#define INITIAL_ARRAY_SIZE 10 + +// Function to parse an environment variable into a list of int values. +// Returns a pointer to the allocated array, and stores the count in out_count. +// Returns NULL in case of any error. +int* parse_envvar_to_int_list(const char* envvar_name, size_t *out_count) { + char *env_str = getenv(envvar_name); + if (env_str == NULL) { + *out_count = 0; + return NULL; + } + + // Duplicate the string since strtok modifies it. + char *tmp = strdup(env_str); + if (!tmp) { + *out_count = 0; + return NULL; + } + + size_t capacity = INITIAL_ARRAY_SIZE; + size_t count = 0; + int *list = malloc(capacity * sizeof(uint32_t)); + if (!list) { + free(tmp); + *out_count = 0; + return NULL; + } + + char *token = strtok(tmp, ","); + while (token != NULL) { + char *endptr = NULL; + errno = 0; + unsigned long val = strtoul(token, &endptr, 10); + if (errno != 0 || endptr == token) { + free(list); + free(tmp); + *out_count = 0; + return NULL; + } + // Optional: Check trailing characters. + while (*endptr != '\0') { + if (!isspace((unsigned char)*endptr)) { + free(list); + free(tmp); + *out_count = 0; + return NULL; + } + endptr++; + } + if (val > UINT32_MAX) { + free(list); + free(tmp); + *out_count = 0; + return NULL; + } + + // Save the value, reallocating if necessary. + if (count == capacity) { + capacity *= 2; + int *temp = realloc(list, capacity * sizeof(uint32_t)); + if (!temp) { + free(list); + free(tmp); + *out_count = 0; + return NULL; + } + list = temp; + } + list[count++] = (int)val; + token = strtok(NULL, ","); + } + + free(tmp); + *out_count = count; + return list; +} int is_extension_supported(vk_handle_t* rh, VkPhysicalDevice device, char* extension) { VkPhysicalDeviceProperties properties = {}; @@ -112,10 +191,21 @@ void vk_init(char* vk_lib_path, vk_init_resp_t *resp) { return; } + size_t visDevIdCount; + int* visDevIds = parse_envvar_to_int_list("GGML_VK_VISIBLE_DEVICES", &visDevIdCount); + resp->err = NULL; resp->ch.vk = instance; resp->ch.num_devices = deviceCount; resp->num_devices = deviceCount; + + if (visDevIds && visDevIdCount > 0) { + resp->ch.num_visible_devices = visDevIdCount; + resp->ch.visible_devices = visDevIds; + } else { + resp->ch.num_visible_devices = -1; + resp->ch.visible_devices = NULL; + } } int vk_device_is_supported(vk_handle_t rh, int i) { @@ -192,6 +282,24 @@ void vk_check_vram(vk_handle_t rh, int i, mem_info_t *resp) { device_props2.pNext = &id_props; (*rh.vkGetPhysicalDeviceProperties2)(devices[i], &device_props2); + if (rh.num_visible_devices > 0) { + LOG(rh.verbose, "Checking if device %d is visible\n", i); + int is_visible = 0; + for (uint32_t visDevId = 0; visDevId < rh.num_visible_devices; visDevId++) { + if (i == rh.visible_devices[visDevId]) { + LOG(rh.verbose, "Device %d is visible!\n", i); + is_visible = 1; + break; + } + } + if (!is_visible) { + LOG(rh.verbose, "Device %d is NOT visible!\n", i); + free(devices); + resp->err = strdup("device is hidden with GGML_VK_VISIBLE_DEVICES"); + return; + } + } + VkPhysicalDeviceMemoryBudgetPropertiesEXT physical_device_memory_budget_properties = {}; physical_device_memory_budget_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; physical_device_memory_budget_properties.pNext = NULL; @@ -241,6 +349,10 @@ void vk_release(vk_handle_t rh) { (*rh.vkDestroyInstance)(rh.vk, NULL); UNLOAD_LIBRARY(rh.vk_handle); rh.vk_handle = NULL; + + if (rh.visible_devices) { + free(rh.visible_devices); + } } #endif // __APPLE__ diff --git a/discover/gpu_info_vulkan.h b/discover/gpu_info_vulkan.h index 3cd8b0b39..c249d9855 100644 --- a/discover/gpu_info_vulkan.h +++ b/discover/gpu_info_vulkan.h @@ -434,6 +434,9 @@ typedef struct { VkInstance vk; int num_devices; + int num_visible_devices; + int* visible_devices; + void (*vkGetPhysicalDeviceProperties)( VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); From 82f0c7e6a518a975dc103d75932f9cf1d1285349 Mon Sep 17 00:00:00 2001 From: Inforithmics Date: Thu, 25 Sep 2025 08:47:04 +0200 Subject: [PATCH 4/5] ask for supported first --- discover/gpu.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/discover/gpu.go b/discover/gpu.go index 2cb77e1e5..aba6dae0a 100644 --- a/discover/gpu.go +++ b/discover/gpu.go @@ -459,13 +459,6 @@ func GetGPUInfo() GpuInfoList { index: i, } - C.vk_check_vram(*vHandles.vulkan, C.int(i), &memInfo) - if memInfo.err != nil { - slog.Info("error looking up vulkan GPU memory", "error", C.GoString(memInfo.err)) - C.free(unsafe.Pointer(memInfo.err)) - continue - } - if C.vk_device_is_supported(*vHandles.vulkan, C.int(i)) == 0 { unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{ @@ -475,6 +468,13 @@ func GetGPUInfo() GpuInfoList { continue } + C.vk_check_vram(*vHandles.vulkan, C.int(i), &memInfo) + if memInfo.err != nil { + slog.Info("error looking up vulkan GPU memory", "error", C.GoString(memInfo.err)) + C.free(unsafe.Pointer(memInfo.err)) + continue + } + gpuInfo.TotalMemory = uint64(memInfo.total) gpuInfo.FreeMemory = uint64(memInfo.free) gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) From a7ddd0e2aebb7744eb2ea64d9cc0aef4949acae2 Mon Sep 17 00:00:00 2001 From: Inforithmics Date: Fri, 26 Sep 2025 22:15:58 +0200 Subject: [PATCH 5/5] gofumpt fix --- discover/gpu.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/discover/gpu.go b/discover/gpu.go index aba6dae0a..9cdd48de0 100644 --- a/discover/gpu.go +++ b/discover/gpu.go @@ -486,7 +486,7 @@ func GetGPUInfo() GpuInfoList { gpuInfo.DriverMinor = int(memInfo.minor) // TODO potentially sort on our own algorithm instead of what the underlying GPU library does... - var backend = gpuInfoExistsInOtherBackends(gpuInfo) + backend := gpuInfoExistsInOtherBackends(gpuInfo) if backend != "" { unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{