From c86af47ac0a8788a187c602377fc3911e9ce630f Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 5 Sep 2025 08:25:03 -0700 Subject: [PATCH] WIP - wire up Vulkan with the new engine based discovery Not a complete implementation - free VRAM is better, but not accurate on windows --- CMakeLists.txt | 12 +- Dockerfile | 20 +- discover/gpu.go | 38 +- discover/gpu_info_vulkan.c | 241 ----------- discover/gpu_info_vulkan.h | 394 ------------------ discover/runner.go | 5 +- discover/types.go | 12 +- llm/server.go | 3 +- .../ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp | 148 ++++++- scripts/build_windows.ps1 | 7 +- 10 files changed, 192 insertions(+), 688 deletions(-) delete mode 100644 discover/gpu_info_vulkan.c delete mode 100644 discover/gpu_info_vulkan.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 29fbd00cd..94114a709 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -114,7 +114,6 @@ if(CMAKE_HIP_COMPILER) target_compile_definitions(ggml-hip PRIVATE GGML_HIP_NO_VMM) - set(OLLAMA_HIP_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/rocm) install(TARGETS ggml-hip RUNTIME_DEPENDENCY_SET rocm RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP @@ -125,13 +124,13 @@ if(CMAKE_HIP_COMPILER) PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register drm drm_amdgpu numa elf PRE_EXCLUDE_REGEXES ".*" POST_EXCLUDE_REGEXES "system32" - RUNTIME DESTINATION ${OLLAMA_HIP_INSTALL_DIR} COMPONENT HIP - LIBRARY DESTINATION ${OLLAMA_HIP_INSTALL_DIR} COMPONENT HIP + RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP + LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP ) foreach(HIP_LIB_BIN_INSTALL_DIR IN ITEMS ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR}) if(EXISTS ${HIP_LIB_BIN_INSTALL_DIR}/rocblas) - install(DIRECTORY ${HIP_LIB_BIN_INSTALL_DIR}/rocblas DESTINATION ${OLLAMA_HIP_INSTALL_DIR} COMPONENT HIP) + install(DIRECTORY ${HIP_LIB_BIN_INSTALL_DIR}/rocblas DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP) break() endif() endforeach() @@ -141,12 +140,11 @@ endif() find_package(Vulkan) if(Vulkan_FOUND) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-vulkan) - set(OLLAMA_VULKAN_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/vulkan) install(TARGETS ggml-vulkan RUNTIME_DEPENDENCIES PRE_INCLUDE_REGEXES vulkan PRE_EXCLUDE_REGEXES ".*" - RUNTIME DESTINATION ${OLLAMA_VULKAN_INSTALL_DIR} COMPONENT Vulkan - LIBRARY DESTINATION ${OLLAMA_VULKAN_INSTALL_DIR} COMPONENT Vulkan + RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT Vulkan + LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT Vulkan ) endif() diff --git a/Dockerfile b/Dockerfile index aeab5947f..7478fbd95 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ ARG ROCMVERSION=6.3.3 ARG JETPACK5VERSION=r35.4.1 ARG JETPACK6VERSION=r36.4.0 ARG CMAKEVERSION=3.31.2 -ARG VULKANVERSION=1.4.313.2 +ARG VULKANVERSION=1.4.321.1 # We require gcc v10 minimum. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64 @@ -88,7 +88,7 @@ FROM base AS rocm-6 ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH ARG PARALLEL RUN --mount=type=cache,target=/root/.ccache \ - cmake --preset 'ROCm 6' \ + cmake --preset 'ROCm 6' -DOLLAMA_RUNNER_DIR="rocm" \ && cmake --build --parallel ${PARALLEL} --preset 'ROCm 6' \ && cmake --install build --component HIP --strip --parallel ${PARALLEL} @@ -100,7 +100,7 @@ COPY CMakeLists.txt CMakePresets.json . COPY ml/backend/ggml/ggml ml/backend/ggml/ggml ARG PARALLEL RUN --mount=type=cache,target=/root/.ccache \ - cmake --preset 'JetPack 5' \ + cmake --preset 'JetPack 5' -DOLLAMA_RUNNER_DIR="cuda_jetpack5" \ && cmake --build --parallel ${PARALLEL} --preset 'JetPack 5' \ && cmake --install build --component CUDA --strip --parallel ${PARALLEL} @@ -112,13 +112,13 @@ COPY CMakeLists.txt CMakePresets.json . COPY ml/backend/ggml/ggml ml/backend/ggml/ggml ARG PARALLEL RUN --mount=type=cache,target=/root/.ccache \ - cmake --preset 'JetPack 6' \ + cmake --preset 'JetPack 6' -DOLLAMA_RUNNER_DIR="cuda_jetpack6" \ && cmake --build --parallel ${PARALLEL} --preset 'JetPack 6' \ && cmake --install build --component CUDA --strip --parallel ${PARALLEL} FROM base AS vulkan RUN --mount=type=cache,target=/root/.ccache \ - cmake --preset 'Vulkan' \ + cmake --preset 'Vulkan' -DOLLAMA_RUNNER_DIR="vulkan" \ && cmake --build --parallel --preset 'Vulkan' \ && cmake --install build --component Vulkan --strip --parallel 8 @@ -140,15 +140,15 @@ RUN --mount=type=cache,target=/root/.cache/go-build \ FROM --platform=linux/amd64 scratch AS amd64 # COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/ COPY --from=cuda-12 dist/lib/ollama /lib/ollama/ -COPY --from=cuda-13 dist/lib/ollama/ /lib/ollama/ -COPY --from=vulkan dist/lib/ollama/vulkan /lib/ollama/vulkan +COPY --from=cuda-13 dist/lib/ollama /lib/ollama/ +COPY --from=vulkan dist/lib/ollama /lib/ollama/ FROM --platform=linux/arm64 scratch AS arm64 # COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/ COPY --from=cuda-12 dist/lib/ollama /lib/ollama/ -COPY --from=cuda-13 dist/lib/ollama/ /lib/ollama/ -COPY --from=jetpack-5 dist/lib/ollama /lib/ollama/cuda_jetpack5 -COPY --from=jetpack-6 dist/lib/ollama /lib/ollama/cuda_jetpack6 +COPY --from=cuda-13 dist/lib/ollama /lib/ollama/ +COPY --from=jetpack-5 dist/lib/ollama /lib/ollama/ +COPY --from=jetpack-6 dist/lib/ollama /lib/ollama/ FROM scratch AS rocm COPY --from=rocm-6 dist/lib/ollama /lib/ollama diff --git a/discover/gpu.go b/discover/gpu.go index 872b06c64..0cae79005 100644 --- a/discover/gpu.go +++ b/discover/gpu.go @@ -71,11 +71,9 @@ func devInfoToInfoList(devs []ml.DeviceInfo) GpuInfoList { } else { info.Compute = fmt.Sprintf("%d.%d", dev.ComputeMajor, dev.ComputeMinor) } + // TODO any special processing of Vulkan devices? resp = append(resp, info) } - for _, gpu := range vulkanGPUs { - resp = append(resp, gpu.GpuInfo) - } if len(resp) == 0 { mem, err := GetCPUMem() if err != nil { @@ -93,18 +91,20 @@ func devInfoToInfoList(devs []ml.DeviceInfo) GpuInfoList { // Given the list of GPUs this instantiation is targeted for, // figure out the visible devices environment variable -// -// # If different libraries are detected, the first one is what we use -// -// TODO once we're purely running on the new runner, this level of device -// filtering will no longer be necessary. Instead the runner can be told which -// of the set of GPUs to utilize and handle filtering itself, instead of relying -// on the env var to hide devices from the underlying GPU libraries func (l GpuInfoList) GetVisibleDevicesEnv() []string { if len(l) == 0 { return nil } - return []string{rocmGetVisibleDevicesEnv(l)} + res := []string{} + envVar := rocmGetVisibleDevicesEnv(l) + if envVar != "" { + res = append(res, envVar) + } + envVar = vkGetVisibleDevicesEnv(l) + if envVar != "" { + res = append(res, envVar) + } + return res } func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) string { @@ -134,6 +134,22 @@ func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) string { return envVar + strings.Join(ids, ",") } +func vkGetVisibleDevicesEnv(gpuInfo []GpuInfo) string { + ids := []string{} + for _, info := range gpuInfo { + if info.Library != "VULKAN" { + continue + } + ids = append(ids, info.ID) + + } + if len(ids) == 0 { + return "" + } + envVar := "GGML_VK_VISIBLE_DEVICES=" + return envVar + strings.Join(ids, ",") +} + // GetSystemInfo returns the last cached state of the GPUs on the system func GetSystemInfo() SystemInfo { deviceMu.Lock() diff --git a/discover/gpu_info_vulkan.c b/discover/gpu_info_vulkan.c deleted file mode 100644 index 65033ad8a..000000000 --- a/discover/gpu_info_vulkan.c +++ /dev/null @@ -1,241 +0,0 @@ -#ifndef __APPLE__ -#include "gpu_info_vulkan.h" - -#include - -int is_extension_supported(vk_handle_t* rh, VkPhysicalDevice device, char* extension) { - VkPhysicalDeviceProperties properties = {}; - (*rh->vkGetPhysicalDeviceProperties)(device, &properties); - - uint32_t extensionCount; - (*rh->vkEnumerateDeviceExtensionProperties)(device, NULL, &extensionCount, NULL); - - if (extensionCount == 0) { - return 0; - } - - VkExtensionProperties* extensions = malloc(extensionCount * sizeof(VkExtensionProperties)); - if (extensions == NULL) { - return 0; - } - - (*rh->vkEnumerateDeviceExtensionProperties)(device, NULL, &extensionCount, extensions); - - for (int j = 0; j < extensionCount; j++) { - if (strcmp(extensions[j].extensionName, extension) == 0) { - free(extensions); - return 1; - } - } - - free(extensions); - return 0; -} - -void vk_init(char* vk_lib_path, vk_init_resp_t *resp) { - const int buflen = 256; - char buf[buflen + 1]; - int i; - - struct lookup { - char *s; - void **p; - } l[] = { - {"vkGetPhysicalDeviceProperties", (void *)&resp->ch.vkGetPhysicalDeviceProperties}, - {"vkGetPhysicalDeviceProperties2", (void *)&resp->ch.vkGetPhysicalDeviceProperties2}, - {"vkEnumerateDeviceExtensionProperties", (void *)&resp->ch.vkEnumerateDeviceExtensionProperties}, - {"vkCreateInstance", (void *)&resp->ch.vkCreateInstance}, - {"vkEnumeratePhysicalDevices", (void *)&resp->ch.vkEnumeratePhysicalDevices}, - {"vkGetPhysicalDeviceMemoryProperties2", (void *)&resp->ch.vkGetPhysicalDeviceMemoryProperties2}, - {"vkDestroyInstance", (void *)&resp->ch.vkDestroyInstance}, - {NULL, NULL}, - }; - - resp->ch.vk_handle = LOAD_LIBRARY(vk_lib_path, RTLD_LAZY); - if (!resp->ch.vk_handle) { - char *msg = LOAD_ERR(); - LOG(resp->ch.verbose, "library %s load err: %s\n", vk_lib_path, msg); - snprintf(buf, buflen, - "Unable to load %s library to query for Vulkan GPUs: %s", - vk_lib_path, msg); - free(msg); - resp->err = strdup(buf); - return; - } - - for (i = 0; l[i].s != NULL; i++) { - *l[i].p = LOAD_SYMBOL(resp->ch.vk_handle, l[i].s); - if (!*l[i].p) { - char *msg = LOAD_ERR(); - LOG(resp->ch.verbose, "dlerr: %s\n", msg); - UNLOAD_LIBRARY(resp->ch.vk_handle); - resp->ch.vk_handle = NULL; - snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, - msg); - free(msg); - resp->err = strdup(buf); - return; - } - } - - VkInstance instance; - - VkApplicationInfo appInfo = {}; - appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; - appInfo.pNext = NULL; - appInfo.pApplicationName = "Ollama"; - appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0); - appInfo.pEngineName = "No Engine"; - appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0); - appInfo.apiVersion = VK_API_VERSION_1_2; - - VkInstanceCreateInfo createInfo = {}; - createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - createInfo.pNext = NULL; - createInfo.flags = 0; - createInfo.enabledExtensionCount = 1; - const char* extensions[] = { VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME }; - createInfo.ppEnabledExtensionNames = extensions; - createInfo.pApplicationInfo = &appInfo; - - VkResult result = (*resp->ch.vkCreateInstance)(&createInfo, NULL, &instance); - if (result != VK_SUCCESS) { - resp->err = strdup("failed to create instance"); - return; - } - - uint32_t deviceCount; - result = (*resp->ch.vkEnumeratePhysicalDevices)(instance, &deviceCount, NULL); - if (result != VK_SUCCESS) { - resp->err = strdup("failed to enumerate physical devices"); - return; - } - - resp->err = NULL; - resp->ch.vk = instance; - resp->ch.num_devices = deviceCount; - resp->num_devices = deviceCount; -} - -int vk_check_flash_attention(vk_handle_t rh, int i) { - VkInstance instance = rh.vk; - uint32_t deviceCount = rh.num_devices; - - VkPhysicalDevice* devices = malloc(deviceCount * sizeof(VkPhysicalDevice)); - if (devices == NULL) { - return 0; - } - - VkResult result = (*rh.vkEnumeratePhysicalDevices)(instance, &deviceCount, devices); - if (result != VK_SUCCESS) { - free(devices); - return 0; - } - - VkPhysicalDeviceProperties properties = {}; - (*rh.vkGetPhysicalDeviceProperties)(devices[i], &properties); - - int supports_nv_coopmat2 = is_extension_supported(&rh, devices[i], VK_NV_COOPERATIVE_MATRIX_2_EXTENSION_NAME); - if (!supports_nv_coopmat2) { - free(devices); - return 1; - } - - free(devices); - return 0; -} - -void vk_check_vram(vk_handle_t rh, int i, mem_info_t *resp) { - VkInstance instance = rh.vk; - uint32_t deviceCount = rh.num_devices; - - VkPhysicalDevice* devices = malloc(deviceCount * sizeof(VkPhysicalDevice)); - if (devices == NULL) { - resp->err = strdup("memory allocation failed for devices array"); - return; - } - - VkResult result = (*rh.vkEnumeratePhysicalDevices)(instance, &deviceCount, devices); - if (result != VK_SUCCESS) { - free(devices); - resp->err = strdup("failed to enumerate physical devices"); - return; - } - - VkPhysicalDeviceProperties properties = {}; - (*rh.vkGetPhysicalDeviceProperties)(devices[i], &properties); - - int supports_budget = is_extension_supported(&rh, devices[i], VK_EXT_MEMORY_BUDGET_EXTENSION_NAME); - if (!supports_budget) { - free(devices); - resp->err = strdup("device does not support memory budget"); - return; - } - - if (properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU) { - free(devices); - resp->err = strdup("device is a CPU"); - return; - } - - VkPhysicalDeviceProperties2 device_props2 = {}; - device_props2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - - VkPhysicalDeviceIDProperties id_props = {}; - id_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; - - device_props2.pNext = &id_props; - (*rh.vkGetPhysicalDeviceProperties2)(devices[i], &device_props2); - - VkPhysicalDeviceMemoryBudgetPropertiesEXT physical_device_memory_budget_properties = {}; - physical_device_memory_budget_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; - physical_device_memory_budget_properties.pNext = NULL; - - VkPhysicalDeviceMemoryProperties2 device_memory_properties = {}; - device_memory_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2; - device_memory_properties.pNext = &physical_device_memory_budget_properties; - - (*rh.vkGetPhysicalDeviceMemoryProperties2)(devices[i], &device_memory_properties); - - VkDeviceSize device_memory_total_size = 0; - VkDeviceSize device_memory_heap_budget = 0; - - for (uint32_t j = 0; j < device_memory_properties.memoryProperties.memoryHeapCount; j++) { - VkMemoryHeap heap = device_memory_properties.memoryProperties.memoryHeaps[j]; - if (heap.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) { - device_memory_total_size += heap.size; - device_memory_heap_budget += physical_device_memory_budget_properties.heapBudget[j]; - } - } - - free(devices); - - resp->err = NULL; - snprintf(&resp->gpu_id[0], GPU_ID_LEN, "%d", i); - resp->gpu_name[GPU_NAME_LEN - 1] = '\0'; - strncpy(&resp->gpu_name[0], properties.deviceName, GPU_NAME_LEN - 1); - resp->gpu_name[GPU_NAME_LEN - 1] = '\0'; - const uint8_t *uuid = id_props.deviceUUID; - snprintf(&resp->gpu_id[0], GPU_ID_LEN, - "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", - uuid[0], uuid[1], uuid[2], uuid[3], - uuid[4], uuid[5], - uuid[6], uuid[7], - uuid[8], uuid[9], - uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15] - ); - resp->total = (uint64_t) device_memory_total_size; - resp->free = (uint64_t) device_memory_heap_budget; - resp->major = VK_API_VERSION_MAJOR(properties.apiVersion); - resp->minor = VK_API_VERSION_MINOR(properties.apiVersion); - resp->patch = VK_API_VERSION_PATCH(properties.apiVersion); -} - -void vk_release(vk_handle_t rh) { - LOG(rh.verbose, "releasing vulkan library\n"); - (*rh.vkDestroyInstance)(rh.vk, NULL); - UNLOAD_LIBRARY(rh.vk_handle); - rh.vk_handle = NULL; -} - -#endif // __APPLE__ diff --git a/discover/gpu_info_vulkan.h b/discover/gpu_info_vulkan.h deleted file mode 100644 index 42e4b1610..000000000 --- a/discover/gpu_info_vulkan.h +++ /dev/null @@ -1,394 +0,0 @@ -#ifndef __APPLE__ -#ifndef __GPU_INFO_VULKAN_H__ -#define __GPU_INFO_VULKAN_H__ - -#include "gpu_info.h" - -#define VK_DEFINE_HANDLE(object) typedef struct object##_T* object; -VK_DEFINE_HANDLE(VkInstance) -VK_DEFINE_HANDLE(VkPhysicalDevice) - -#define VK_MAX_EXTENSION_NAME_SIZE 256U -#define VK_MAX_DESCRIPTION_SIZE 256U -#define VK_LUID_SIZE 8U -#define VK_UUID_SIZE 16U -#define VK_MAX_MEMORY_TYPES 32U -#define VK_MAX_MEMORY_HEAPS 16U -#define VK_MAX_PHYSICAL_DEVICE_NAME_SIZE 256U - -#define VK_MAKE_VERSION(major, minor, patch) \ - ((((uint32_t)(major)) << 22U) | (((uint32_t)(minor)) << 12U) | ((uint32_t)(patch))) - -#define VK_MAKE_API_VERSION(variant, major, minor, patch) \ - ((((uint32_t)(variant)) << 29U) | (((uint32_t)(major)) << 22U) | (((uint32_t)(minor)) << 12U) | ((uint32_t)(patch))) - -#define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0)// Patch version should always be set to 0 -#define VK_API_VERSION_1_1 VK_MAKE_API_VERSION(0, 1, 1, 0)// Patch version should always be set to 0 -#define VK_API_VERSION_1_2 VK_MAKE_API_VERSION(0, 1, 2, 0)// Patch version should always be set to 0 -#define VK_API_VERSION_1_3 VK_MAKE_API_VERSION(0, 1, 3, 0)// Patch version should always be set to 0 -#define VK_API_VERSION_MAJOR(version) (((uint32_t)(version) >> 22U) & 0x7FU) -#define VK_API_VERSION_MINOR(version) (((uint32_t)(version) >> 12U) & 0x3FFU) -#define VK_API_VERSION_PATCH(version) ((uint32_t)(version) & 0xFFFU) - -#define VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME "VK_KHR_get_physical_device_properties2" -#define VK_NV_COOPERATIVE_MATRIX_2_EXTENSION_NAME "VK_NV_cooperative_matrix2" -#define VK_EXT_MEMORY_BUDGET_EXTENSION_NAME "VK_EXT_memory_budget" - -typedef uint32_t VkFlags; -typedef uint32_t VkBool32; -typedef uint64_t VkDeviceSize; -typedef uint32_t VkSampleMask; -typedef VkFlags VkSampleCountFlags; -typedef VkFlags VkMemoryPropertyFlags; -typedef VkFlags VkMemoryHeapFlags; -typedef VkFlags VkInstanceCreateFlags; - -typedef enum VkResult { - VK_SUCCESS = 0, - VK_NOT_READY = 1, - VK_TIMEOUT = 2, - VK_EVENT_SET = 3, - VK_EVENT_RESET = 4, - VK_INCOMPLETE = 5, - VK_ERROR_OUT_OF_HOST_MEMORY = -1, - VK_ERROR_OUT_OF_DEVICE_MEMORY = -2, - VK_ERROR_INITIALIZATION_FAILED = -3, - VK_ERROR_DEVICE_LOST = -4, - VK_ERROR_MEMORY_MAP_FAILED = -5, - VK_ERROR_LAYER_NOT_PRESENT = -6, - VK_ERROR_EXTENSION_NOT_PRESENT = -7, - VK_ERROR_FEATURE_NOT_PRESENT = -8, - VK_ERROR_INCOMPATIBLE_DRIVER = -9, - VK_ERROR_TOO_MANY_OBJECTS = -10, - VK_ERROR_FORMAT_NOT_SUPPORTED = -11, - VK_ERROR_FRAGMENTED_POOL = -12, - VK_ERROR_UNKNOWN = -13, - VK_ERROR_OUT_OF_POOL_MEMORY = -1000069000, - VK_ERROR_INVALID_EXTERNAL_HANDLE = -1000072003, - VK_ERROR_FRAGMENTATION = -1000168000, - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS = -1000257000, - VK_PIPELINE_COMPILE_REQUIRED = 1000297000, - VK_ERROR_SURFACE_LOST_KHR = -1000000000, - VK_ERROR_NATIVE_WINDOW_IN_USE_KHR = -1000000001, - VK_SUBOPTIMAL_KHR = 1000001003, - VK_ERROR_OUT_OF_DATE_KHR = -1000001004, - VK_ERROR_INCOMPATIBLE_DISPLAY_KHR = -1000003001, - VK_ERROR_VALIDATION_FAILED_EXT = -1000011001, - VK_ERROR_INVALID_SHADER_NV = -1000012000, - VK_ERROR_IMAGE_USAGE_NOT_SUPPORTED_KHR = -1000158000, - VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR = -1000158001, - VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR = -1000158002, - VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR = -1000158003, - VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR = -1000158004, - VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR = -1000158005, - VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT = -1000158006, - VK_ERROR_NOT_PERMITTED_KHR = -1000174001, - VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT = -1000255000, - VK_THREAD_IDLE_KHR = 1000268000, - VK_THREAD_DONE_KHR = 1000268001, - VK_OPERATION_DEFERRED_KHR = 1000268002, - VK_OPERATION_NOT_DEFERRED_KHR = 1000268003, - VK_ERROR_COMPRESSION_EXHAUSTED_EXT = -1000338000, - VK_RESULT_MAX_ENUM = 0x7FFFFFFF -} VkResult; - -typedef enum VkStructureType { - VK_STRUCTURE_TYPE_APPLICATION_INFO = 0, - VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 1, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2 = 1000059001, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2 = 1000059006, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES = 1000071004, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT = 1000237000, - VK_STRUCTURE_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkStructureType; - -typedef enum VkPhysicalDeviceType { - VK_PHYSICAL_DEVICE_TYPE_OTHER = 0, - VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU = 1, - VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU = 2, - VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU = 3, - VK_PHYSICAL_DEVICE_TYPE_CPU = 4, - VK_PHYSICAL_DEVICE_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkPhysicalDeviceType; - -typedef enum VkSystemAllocationScope { - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND = 0, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT = 1, - VK_SYSTEM_ALLOCATION_SCOPE_CACHE = 2, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE = 3, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE = 4, - VK_SYSTEM_ALLOCATION_SCOPE_MAX_ENUM = 0x7FFFFFFF -} VkSystemAllocationScope; - -typedef enum VkInternalAllocationType { - VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE = 0, - VK_INTERNAL_ALLOCATION_TYPE_NON_EXECUTABLE = 1, - VK_INTERNAL_ALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkInternalAllocationType; - -typedef enum VkMemoryHeapFlagBits { - VK_MEMORY_HEAP_DEVICE_LOCAL_BIT = 0x00000001, - VK_MEMORY_HEAP_MULTI_INSTANCE_BIT = 0x00000002, - VK_MEMORY_HEAP_TILE_MEMORY_BIT_QCOM = 0x00000008, - VK_MEMORY_HEAP_MULTI_INSTANCE_BIT_KHR = VK_MEMORY_HEAP_MULTI_INSTANCE_BIT, - VK_MEMORY_HEAP_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VkMemoryHeapFlagBits; - -typedef struct VkExtensionProperties { - char extensionName[VK_MAX_EXTENSION_NAME_SIZE]; - uint32_t specVersion; -} VkExtensionProperties; - -typedef struct VkPhysicalDeviceLimits { - uint32_t maxImageDimension1D; - uint32_t maxImageDimension2D; - uint32_t maxImageDimension3D; - uint32_t maxImageDimensionCube; - uint32_t maxImageArrayLayers; - uint32_t maxTexelBufferElements; - uint32_t maxUniformBufferRange; - uint32_t maxStorageBufferRange; - uint32_t maxPushConstantsSize; - uint32_t maxMemoryAllocationCount; - uint32_t maxSamplerAllocationCount; - VkDeviceSize bufferImageGranularity; - VkDeviceSize sparseAddressSpaceSize; - uint32_t maxBoundDescriptorSets; - uint32_t maxPerStageDescriptorSamplers; - uint32_t maxPerStageDescriptorUniformBuffers; - uint32_t maxPerStageDescriptorStorageBuffers; - uint32_t maxPerStageDescriptorSampledImages; - uint32_t maxPerStageDescriptorStorageImages; - uint32_t maxPerStageDescriptorInputAttachments; - uint32_t maxPerStageResources; - uint32_t maxDescriptorSetSamplers; - uint32_t maxDescriptorSetUniformBuffers; - uint32_t maxDescriptorSetUniformBuffersDynamic; - uint32_t maxDescriptorSetStorageBuffers; - uint32_t maxDescriptorSetStorageBuffersDynamic; - uint32_t maxDescriptorSetSampledImages; - uint32_t maxDescriptorSetStorageImages; - uint32_t maxDescriptorSetInputAttachments; - uint32_t maxVertexInputAttributes; - uint32_t maxVertexInputBindings; - uint32_t maxVertexInputAttributeOffset; - uint32_t maxVertexInputBindingStride; - uint32_t maxVertexOutputComponents; - uint32_t maxTessellationGenerationLevel; - uint32_t maxTessellationPatchSize; - uint32_t maxTessellationControlPerVertexInputComponents; - uint32_t maxTessellationControlPerVertexOutputComponents; - uint32_t maxTessellationControlPerPatchOutputComponents; - uint32_t maxTessellationControlTotalOutputComponents; - uint32_t maxTessellationEvaluationInputComponents; - uint32_t maxTessellationEvaluationOutputComponents; - uint32_t maxGeometryShaderInvocations; - uint32_t maxGeometryInputComponents; - uint32_t maxGeometryOutputComponents; - uint32_t maxGeometryOutputVertices; - uint32_t maxGeometryTotalOutputComponents; - uint32_t maxFragmentInputComponents; - uint32_t maxFragmentOutputAttachments; - uint32_t maxFragmentDualSrcAttachments; - uint32_t maxFragmentCombinedOutputResources; - uint32_t maxComputeSharedMemorySize; - uint32_t maxComputeWorkGroupCount[3]; - uint32_t maxComputeWorkGroupInvocations; - uint32_t maxComputeWorkGroupSize[3]; - uint32_t subPixelPrecisionBits; - uint32_t subTexelPrecisionBits; - uint32_t mipmapPrecisionBits; - uint32_t maxDrawIndexedIndexValue; - uint32_t maxDrawIndirectCount; - float maxSamplerLodBias; - float maxSamplerAnisotropy; - uint32_t maxViewports; - uint32_t maxViewportDimensions[2]; - float viewportBoundsRange[2]; - uint32_t viewportSubPixelBits; - size_t minMemoryMapAlignment; - VkDeviceSize minTexelBufferOffsetAlignment; - VkDeviceSize minUniformBufferOffsetAlignment; - VkDeviceSize minStorageBufferOffsetAlignment; - int32_t minTexelOffset; - uint32_t maxTexelOffset; - int32_t minTexelGatherOffset; - uint32_t maxTexelGatherOffset; - float minInterpolationOffset; - float maxInterpolationOffset; - uint32_t subPixelInterpolationOffsetBits; - uint32_t maxFramebufferWidth; - uint32_t maxFramebufferHeight; - uint32_t maxFramebufferLayers; - VkSampleCountFlags framebufferColorSampleCounts; - VkSampleCountFlags framebufferDepthSampleCounts; - VkSampleCountFlags framebufferStencilSampleCounts; - VkSampleCountFlags framebufferNoAttachmentsSampleCounts; - uint32_t maxColorAttachments; - VkSampleCountFlags sampledImageColorSampleCounts; - VkSampleCountFlags sampledImageIntegerSampleCounts; - VkSampleCountFlags sampledImageDepthSampleCounts; - VkSampleCountFlags sampledImageStencilSampleCounts; - VkSampleCountFlags storageImageSampleCounts; - uint32_t maxSampleMaskWords; - VkBool32 timestampComputeAndGraphics; - float timestampPeriod; - uint32_t maxClipDistances; - uint32_t maxCullDistances; - uint32_t maxCombinedClipAndCullDistances; - uint32_t discreteQueuePriorities; - float pointSizeRange[2]; - float lineWidthRange[2]; - float pointSizeGranularity; - float lineWidthGranularity; - VkBool32 strictLines; - VkBool32 standardSampleLocations; - VkDeviceSize optimalBufferCopyOffsetAlignment; - VkDeviceSize optimalBufferCopyRowPitchAlignment; - VkDeviceSize nonCoherentAtomSize; -} VkPhysicalDeviceLimits; - -typedef struct VkPhysicalDeviceSparseProperties { - VkBool32 residencyStandard2DBlockShape; - VkBool32 residencyStandard2DMultisampleBlockShape; - VkBool32 residencyStandard3DBlockShape; - VkBool32 residencyAlignedMipSize; - VkBool32 residencyNonResidentStrict; -} VkPhysicalDeviceSparseProperties; - -typedef struct VkPhysicalDeviceProperties { - uint32_t apiVersion; - uint32_t driverVersion; - uint32_t vendorID; - uint32_t deviceID; - uint32_t deviceType; - char deviceName[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]; - uint8_t pipelineCacheUUID[VK_UUID_SIZE]; - VkPhysicalDeviceLimits limits; - VkPhysicalDeviceSparseProperties sparseProperties; -} VkPhysicalDeviceProperties; - -typedef struct VkPhysicalDeviceProperties2 { - VkStructureType sType; - void* pNext; - VkPhysicalDeviceProperties properties; -} VkPhysicalDeviceProperties2; - -typedef struct VkPhysicalDeviceIDProperties { - VkStructureType sType; - void* pNext; - uint8_t deviceUUID[VK_UUID_SIZE]; - uint8_t driverUUID[VK_UUID_SIZE]; - uint8_t deviceLUID[VK_LUID_SIZE]; - uint32_t deviceNodeMask; - VkBool32 deviceLUIDValid; -} VkPhysicalDeviceIDProperties; - -typedef struct VkMemoryType { - VkMemoryPropertyFlags propertyFlags; - uint32_t heapIndex; -} VkMemoryType; - -typedef struct VkMemoryHeap { - VkDeviceSize size; - VkMemoryHeapFlags flags; -} VkMemoryHeap; - -typedef struct VkPhysicalDeviceMemoryProperties { - uint32_t memoryTypeCount; - VkMemoryType memoryTypes[VK_MAX_MEMORY_TYPES]; - uint32_t memoryHeapCount; - VkMemoryHeap memoryHeaps[VK_MAX_MEMORY_HEAPS]; -} VkPhysicalDeviceMemoryProperties; - -typedef struct VkPhysicalDeviceMemoryProperties2 { - VkStructureType sType; - void* pNext; - VkPhysicalDeviceMemoryProperties memoryProperties; -} VkPhysicalDeviceMemoryProperties2; - -typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT { - VkStructureType sType; - void* pNext; - VkDeviceSize heapBudget[VK_MAX_MEMORY_HEAPS]; - VkDeviceSize heapUsage[VK_MAX_MEMORY_HEAPS]; -} VkPhysicalDeviceMemoryBudgetPropertiesEXT; - -typedef struct VkApplicationInfo { - VkStructureType sType; - const void* pNext; - const char* pApplicationName; - uint32_t applicationVersion; - const char* pEngineName; - uint32_t engineVersion; - uint32_t apiVersion; -} VkApplicationInfo; - -typedef struct VkInstanceCreateInfo { - VkStructureType sType; - const void* pNext; - VkInstanceCreateFlags flags; - const VkApplicationInfo* pApplicationInfo; - uint32_t enabledLayerCount; - const char* const* ppEnabledLayerNames; - uint32_t enabledExtensionCount; - const char* const* ppEnabledExtensionNames; -} VkInstanceCreateInfo; - -typedef struct VkAllocationCallbacks { - void* pUserData; - void* (*pfnAllocation)(void* pUserData, size_t size, size_t alignment, VkSystemAllocationScope allocationScope); - void* (*pfnReallocation)(void* pUserData, void* pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope); - void (*pfnFree)(void* pUserData, void* pMemory); - void (*pfnInternalAllocation)(void* pUserData, size_t size, VkInternalAllocationType allocationType, VkSystemAllocationScope allocationScope); - void (*pfnInternalFree)(void* pUserData, size_t size, VkInternalAllocationType allocationType, VkSystemAllocationScope allocationScope); -} VkAllocationCallbacks; - -typedef struct { - void* vk_handle; - uint16_t verbose; - - VkInstance vk; - int num_devices; - - void (*vkGetPhysicalDeviceProperties)( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties* pProperties); - void (*vkGetPhysicalDeviceProperties2)( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties2* pProperties); - VkResult (*vkEnumerateDeviceExtensionProperties)( - VkPhysicalDevice physicalDevice, - const char* pLayerName, - uint32_t* pPropertyCount, - VkExtensionProperties* pProperties); - VkResult (*vkCreateInstance)( - const VkInstanceCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkInstance* pInstance); - VkResult (*vkEnumeratePhysicalDevices)( - VkInstance instance, - uint32_t* pPhysicalDeviceCount, - VkPhysicalDevice* pPhysicalDevices); - void (*vkGetPhysicalDeviceMemoryProperties2)( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceMemoryProperties2* pMemoryProperties); - void (*vkDestroyInstance)( - VkInstance instance, - const VkAllocationCallbacks* pAllocator); -} vk_handle_t; - -typedef struct vk_init_resp -{ - char *err; // If err is non-null handle is invalid - int num_devices; - vk_handle_t ch; -} vk_init_resp_t; - -void vk_init(char* vk_lib_path, vk_init_resp_t *resp); -void vk_check_vram(vk_handle_t rh, int i, mem_info_t *resp); -int vk_check_flash_attention(vk_handle_t rh, int i); -void vk_release(vk_handle_t rh); - -#endif -#endif \ No newline at end of file diff --git a/discover/runner.go b/discover/runner.go index 5e4e05f95..c9bbef7f5 100644 --- a/discover/runner.go +++ b/discover/runner.go @@ -92,6 +92,7 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev // are enumerated, but not actually supported. // We run this in serial to avoid potentially initializing a GPU multiple // times concurrently leading to memory contention + // TODO refactor so we group the lib dirs and do serial per version, but parallel for different libs for dir := range libDirs { var dirs []string if dir == "" { @@ -125,8 +126,10 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev } else { envVar = "ROCR_VISIBLE_DEVICES" } - } else { + } else if devices[i].Library == "CUDA" { envVar = "CUDA_VISIBLE_DEVICES" + } else if devices[i].Library == "VULKAN" { + envVar = "GGML_VK_VISIBLE_DEVICES" } extraEnvs := []string{ diff --git a/discover/types.go b/discover/types.go index feb8c08e0..5a9ce1865 100644 --- a/discover/types.go +++ b/discover/types.go @@ -36,10 +36,11 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"? UnreliableFreeMemory bool // GPU information - ID string `json:"gpu_id"` // string to use for selection of this specific GPU - filterID string // AMD Workaround: The numeric ID of the device used to filter out other devices - Name string `json:"name"` // user friendly name if available - Compute string `json:"compute"` // Compute Capability or gfx + ID string `json:"gpu_id"` // string to use for selection of this specific GPU + filterID string // AMD Workaround: The numeric ID of the device used to filter out other devices + Name string `json:"name"` // user friendly name if available + Compute string `json:"compute"` // Compute Capability or gfx + FlashAttention bool `json:"flash_attention"` // is flash attention supported // Driver Information - TODO no need to put this on each GPU DriverMajor int `json:"driver_major,omitempty"` @@ -174,7 +175,8 @@ func (l GpuInfoList) FlashAttentionSupported() bool { supportsFA := gpu.Library == "cpu" || gpu.Name == "Metal" || (gpu.Library == "CUDA" && gpu.DriverMajor >= 7) || - gpu.Library == "HIP" + gpu.Library == "HIP" || + gpu.Library == "VULKAN" if !supportsFA { return false diff --git a/llm/server.go b/llm/server.go index d3438e6c2..8757d4d56 100644 --- a/llm/server.go +++ b/llm/server.go @@ -561,10 +561,11 @@ func (s *llamaServer) Load(ctx context.Context, gpus discover.GpuInfoList, requi // Windows CUDA should not use mmap for best performance // Linux with a model larger than free space, mmap leads to thrashing // For CPU loads we want the memory to be allocated, not FS cache - if (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && s.options.UseMMap == nil) || + if (runtime.GOOS == "windows" && gpus[0].Library == "CUDA" && s.options.UseMMap == nil) || (runtime.GOOS == "linux" && systemInfo.System.FreeMemory < s.estimate.TotalSize && s.options.UseMMap == nil) || (gpus[0].Library == "vulkan" && s.options.UseMMap == nil) || (gpus[0].Library == "cpu" && s.options.UseMMap == nil) || + (gpus[0].Library == "VULKAN" && s.options.UseMMap == nil) || (s.options.UseMMap != nil && !*s.options.UseMMap) { s.loadRequest.UseMmap = false } diff --git a/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp index d73cdf176..3b0a0891e 100644 --- a/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -4123,7 +4123,6 @@ static void ggml_vk_instance_init() { } } else { std::vector devices = vk_instance.instance.enumeratePhysicalDevices(); - // If no vulkan devices are found, return early if (devices.empty()) { GGML_LOG_INFO("ggml_vulkan: No devices found.\n"); @@ -10821,14 +10820,90 @@ std::string ggml_backend_vk_get_device_id(int device) { return ggml_vk_get_device_id(dev_idx); } -void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) { - GGML_ASSERT(device < (int) vk_instance.device_indices.size()); +////////////////////////// - vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]]; +struct ggml_backend_vk_device_context { + size_t device; + std::string name; + std::string description; + std::string id; + std::string uuid; + int major; + int minor; + int driver_major; + int driver_minor; + int integrated; + int pci_bus_id; + int pci_device_id; + int pci_domain_id; +}; + +void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size_t * free, size_t * total) { + GGML_ASSERT(ctx->device < (int) vk_instance.device_indices.size()); + + vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[ctx->device]]; vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties(); + vk::PhysicalDeviceProperties2 props2; + vkdev.getProperties2(&props2); - for (const vk::MemoryHeap& heap : memprops.memoryHeaps) { + // Use vendor specific management libraries for best VRAM reporting if available + switch (props2.properties.vendorID) { + case VK_VENDOR_ID_AMD: + if (ggml_hip_mgmt_init() == 0) { + int status = ggml_hip_get_device_memory(ctx->pci_bus_id, ctx->pci_device_id, free, total); + if (status == 0) { + GGML_LOG_DEBUG("%s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, *free, *total); + ggml_hip_mgmt_release(); + return; + } + ggml_hip_mgmt_release(); + } + break; + case VK_VENDOR_ID_NVIDIA: + if (ggml_nvml_init() == 0) { + int status = ggml_nvml_get_device_memory(ctx->uuid.c_str(), free, total); + if (status == 0) { + GGML_LOG_DEBUG("%s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, *free, *total); + ggml_nvml_release(); + return; + } + ggml_nvml_release(); + } + break; + } + // else fallback to memory budget if supported + + *total = 0; + *free = 0; + vk::PhysicalDeviceMemoryBudgetPropertiesEXT mem_budget_props; + vk::PhysicalDeviceMemoryProperties2 memprops2; + memprops2.pNext = &mem_budget_props; + vkdev.getMemoryProperties2(&memprops2); + for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) { + if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) { + *total += memprops2.memoryProperties.memoryHeaps[i].size; + } else if (ctx->integrated) { + // Include shared memory on iGPUs + *total += memprops2.memoryProperties.memoryHeaps[i].size; + } + } + for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) { + if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) { + *free += mem_budget_props.heapBudget[i]; + } else if (ctx->integrated) { + *free += mem_budget_props.heapBudget[i]; + } + } + if (*total > 0 && *free > 0) { + return; + } else if (*total > 0) { + *free = *total; + return; + } + + // else just report the physical memory + for (const vk::MemoryHeap& heap : memprops2.memoryProperties.memoryHeaps) { if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) { *total = heap.size; *free = heap.size; @@ -10837,14 +10912,6 @@ void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total } } -////////////////////////// - -struct ggml_backend_vk_device_context { - size_t device; - std::string name; - std::string description; - std::string id; -}; static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) { ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context; @@ -10863,7 +10930,7 @@ static const char * ggml_backend_vk_device_get_id(ggml_backend_dev_t dev) { static void ggml_backend_vk_device_get_memory(ggml_backend_dev_t device, size_t * free, size_t * total) { ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)device->context; - ggml_backend_vk_get_device_memory(ctx->device, free, total); + ggml_backend_vk_get_device_memory(ctx, free, total); } static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) { @@ -10881,6 +10948,7 @@ static enum ggml_backend_dev_type ggml_backend_vk_device_get_type(ggml_backend_d return GGML_BACKEND_DEVICE_TYPE_GPU; } +#define GGML_VULKAN_NAME "VULKAN" static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { props->name = ggml_backend_vk_device_get_name(dev); props->description = ggml_backend_vk_device_get_description(dev); @@ -10893,6 +10961,18 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml /* .buffer_from_host_ptr = */ false, /* .events = */ false, }; + + ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context; + props->id = ctx->id.c_str(); + props->compute_major = ctx->major; + props->compute_minor = ctx->minor; + props->driver_major = ctx->driver_major; + props->driver_minor = ctx->driver_minor; + props->integrated = ctx->integrated; + props->pci_bus_id = ctx->pci_bus_id; + props->pci_device_id = ctx->pci_device_id; + props->pci_domain_id = ctx->pci_domain_id; + props->library = GGML_VULKAN_NAME; } static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) { @@ -11296,6 +11376,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, static std::mutex mutex; std::lock_guard lock(mutex); if (!initialized) { + std::vector vk_devices = vk_instance.instance.enumeratePhysicalDevices(); + for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) { ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context; char desc[256]; @@ -11309,6 +11391,44 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, /* .reg = */ reg, /* .context = */ ctx, }); + + // Gather additional information about the device + int dev_idx = vk_instance.device_indices[i]; + vk::PhysicalDeviceProperties props1; + vk_devices[dev_idx].getProperties(&props1); + vk::PhysicalDeviceProperties2 props2; + vk::PhysicalDeviceIDProperties device_id_props; + vk::PhysicalDevicePCIBusInfoPropertiesEXT pci_bus_props; + vk::PhysicalDeviceDriverProperties driver_props; + props2.pNext = &device_id_props; + device_id_props.pNext = &pci_bus_props; + pci_bus_props.pNext = &driver_props; + vk_devices[dev_idx].getProperties2(&props2); + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + oss << "GPU-"; + int byteIdx = 0; + for (int i = 0; i < 16; ++i, ++byteIdx) { + oss << std::setw(2) << static_cast(device_id_props.deviceUUID[i]); + if (byteIdx == 3 || byteIdx == 5 || byteIdx == 7 || byteIdx == 9) { + oss << '-'; + } + } + ctx->uuid = oss.str(); + ctx->pci_bus_id = pci_bus_props.pciBus; + ctx->pci_device_id = pci_bus_props.pciDevice; + ctx->pci_domain_id = pci_bus_props.pciDomain; + ctx->id = std::to_string(i); + if (props1.deviceType == vk::PhysicalDeviceType::eIntegratedGpu) { + ctx->integrated = 1; + } else { + ctx->integrated = 0; + } + ctx->major = 0; + ctx->minor = 0; + // TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string + ctx->driver_major = 0; + ctx->driver_minor = 0; } initialized = true; } diff --git a/scripts/build_windows.ps1 b/scripts/build_windows.ps1 index 3ca25a13c..0a3d7c888 100644 --- a/scripts/build_windows.ps1 +++ b/scripts/build_windows.ps1 @@ -165,12 +165,11 @@ function buildROCm() { $env:HIPCXX="${env:HIP_PATH}\bin\clang++.exe" $env:HIP_PLATFORM="amd" $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" - & cmake --fresh --preset "ROCm 6" -G Ninja ` + & cmake --fresh --preset "ROCm 6" -G Ninja --install-prefix $script:DIST_DIR -DOLLAMA_RUNNER_DIR="rocm" ` -DCMAKE_C_COMPILER=clang ` -DCMAKE_CXX_COMPILER=clang++ ` -DCMAKE_C_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" ` - -DCMAKE_CXX_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" ` - --install-prefix $script:DIST_DIR + -DCMAKE_CXX_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} $env:HIPCXX="" $env:HIP_PLATFORM="" @@ -186,7 +185,7 @@ function buildROCm() { function buildVulkan(){ if ($env:VULKAN_SDK) { write-host "Building Vulkan backend libraries" - & cmake --fresh --preset Vulkan --install-prefix $script:DIST_DIR + & cmake --fresh --preset Vulkan --install-prefix $script:DIST_DIR -DOLLAMA_RUNNER_DIR="vulkan" if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} & cmake --build --preset Vulkan --config Release --parallel $script:JOBS if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}