WIP - wire up Vulkan with the new engine based discovery
Not a complete implementation - free VRAM is better, but not accurate on windows
This commit is contained in:
parent
3a8ee62bd5
commit
c86af47ac0
|
|
@ -114,7 +114,6 @@ if(CMAKE_HIP_COMPILER)
|
|||
|
||||
target_compile_definitions(ggml-hip PRIVATE GGML_HIP_NO_VMM)
|
||||
|
||||
set(OLLAMA_HIP_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/rocm)
|
||||
install(TARGETS ggml-hip
|
||||
RUNTIME_DEPENDENCY_SET rocm
|
||||
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
|
||||
|
|
@ -125,13 +124,13 @@ if(CMAKE_HIP_COMPILER)
|
|||
PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register drm drm_amdgpu numa elf
|
||||
PRE_EXCLUDE_REGEXES ".*"
|
||||
POST_EXCLUDE_REGEXES "system32"
|
||||
RUNTIME DESTINATION ${OLLAMA_HIP_INSTALL_DIR} COMPONENT HIP
|
||||
LIBRARY DESTINATION ${OLLAMA_HIP_INSTALL_DIR} COMPONENT HIP
|
||||
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
|
||||
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
|
||||
)
|
||||
|
||||
foreach(HIP_LIB_BIN_INSTALL_DIR IN ITEMS ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR})
|
||||
if(EXISTS ${HIP_LIB_BIN_INSTALL_DIR}/rocblas)
|
||||
install(DIRECTORY ${HIP_LIB_BIN_INSTALL_DIR}/rocblas DESTINATION ${OLLAMA_HIP_INSTALL_DIR} COMPONENT HIP)
|
||||
install(DIRECTORY ${HIP_LIB_BIN_INSTALL_DIR}/rocblas DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP)
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
|
|
@ -141,12 +140,11 @@ endif()
|
|||
find_package(Vulkan)
|
||||
if(Vulkan_FOUND)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-vulkan)
|
||||
set(OLLAMA_VULKAN_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/vulkan)
|
||||
install(TARGETS ggml-vulkan
|
||||
RUNTIME_DEPENDENCIES
|
||||
PRE_INCLUDE_REGEXES vulkan
|
||||
PRE_EXCLUDE_REGEXES ".*"
|
||||
RUNTIME DESTINATION ${OLLAMA_VULKAN_INSTALL_DIR} COMPONENT Vulkan
|
||||
LIBRARY DESTINATION ${OLLAMA_VULKAN_INSTALL_DIR} COMPONENT Vulkan
|
||||
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT Vulkan
|
||||
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT Vulkan
|
||||
)
|
||||
endif()
|
||||
|
|
|
|||
20
Dockerfile
20
Dockerfile
|
|
@ -7,7 +7,7 @@ ARG ROCMVERSION=6.3.3
|
|||
ARG JETPACK5VERSION=r35.4.1
|
||||
ARG JETPACK6VERSION=r36.4.0
|
||||
ARG CMAKEVERSION=3.31.2
|
||||
ARG VULKANVERSION=1.4.313.2
|
||||
ARG VULKANVERSION=1.4.321.1
|
||||
|
||||
# We require gcc v10 minimum. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
|
||||
FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
|
||||
|
|
@ -88,7 +88,7 @@ FROM base AS rocm-6
|
|||
ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH
|
||||
ARG PARALLEL
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
cmake --preset 'ROCm 6' \
|
||||
cmake --preset 'ROCm 6' -DOLLAMA_RUNNER_DIR="rocm" \
|
||||
&& cmake --build --parallel ${PARALLEL} --preset 'ROCm 6' \
|
||||
&& cmake --install build --component HIP --strip --parallel ${PARALLEL}
|
||||
|
||||
|
|
@ -100,7 +100,7 @@ COPY CMakeLists.txt CMakePresets.json .
|
|||
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
||||
ARG PARALLEL
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
cmake --preset 'JetPack 5' \
|
||||
cmake --preset 'JetPack 5' -DOLLAMA_RUNNER_DIR="cuda_jetpack5" \
|
||||
&& cmake --build --parallel ${PARALLEL} --preset 'JetPack 5' \
|
||||
&& cmake --install build --component CUDA --strip --parallel ${PARALLEL}
|
||||
|
||||
|
|
@ -112,13 +112,13 @@ COPY CMakeLists.txt CMakePresets.json .
|
|||
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
||||
ARG PARALLEL
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
cmake --preset 'JetPack 6' \
|
||||
cmake --preset 'JetPack 6' -DOLLAMA_RUNNER_DIR="cuda_jetpack6" \
|
||||
&& cmake --build --parallel ${PARALLEL} --preset 'JetPack 6' \
|
||||
&& cmake --install build --component CUDA --strip --parallel ${PARALLEL}
|
||||
|
||||
FROM base AS vulkan
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
cmake --preset 'Vulkan' \
|
||||
cmake --preset 'Vulkan' -DOLLAMA_RUNNER_DIR="vulkan" \
|
||||
&& cmake --build --parallel --preset 'Vulkan' \
|
||||
&& cmake --install build --component Vulkan --strip --parallel 8
|
||||
|
||||
|
|
@ -140,15 +140,15 @@ RUN --mount=type=cache,target=/root/.cache/go-build \
|
|||
FROM --platform=linux/amd64 scratch AS amd64
|
||||
# COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
|
||||
COPY --from=cuda-12 dist/lib/ollama /lib/ollama/
|
||||
COPY --from=cuda-13 dist/lib/ollama/ /lib/ollama/
|
||||
COPY --from=vulkan dist/lib/ollama/vulkan /lib/ollama/vulkan
|
||||
COPY --from=cuda-13 dist/lib/ollama /lib/ollama/
|
||||
COPY --from=vulkan dist/lib/ollama /lib/ollama/
|
||||
|
||||
FROM --platform=linux/arm64 scratch AS arm64
|
||||
# COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
|
||||
COPY --from=cuda-12 dist/lib/ollama /lib/ollama/
|
||||
COPY --from=cuda-13 dist/lib/ollama/ /lib/ollama/
|
||||
COPY --from=jetpack-5 dist/lib/ollama /lib/ollama/cuda_jetpack5
|
||||
COPY --from=jetpack-6 dist/lib/ollama /lib/ollama/cuda_jetpack6
|
||||
COPY --from=cuda-13 dist/lib/ollama /lib/ollama/
|
||||
COPY --from=jetpack-5 dist/lib/ollama /lib/ollama/
|
||||
COPY --from=jetpack-6 dist/lib/ollama /lib/ollama/
|
||||
|
||||
FROM scratch AS rocm
|
||||
COPY --from=rocm-6 dist/lib/ollama /lib/ollama
|
||||
|
|
|
|||
|
|
@ -71,11 +71,9 @@ func devInfoToInfoList(devs []ml.DeviceInfo) GpuInfoList {
|
|||
} else {
|
||||
info.Compute = fmt.Sprintf("%d.%d", dev.ComputeMajor, dev.ComputeMinor)
|
||||
}
|
||||
// TODO any special processing of Vulkan devices?
|
||||
resp = append(resp, info)
|
||||
}
|
||||
for _, gpu := range vulkanGPUs {
|
||||
resp = append(resp, gpu.GpuInfo)
|
||||
}
|
||||
if len(resp) == 0 {
|
||||
mem, err := GetCPUMem()
|
||||
if err != nil {
|
||||
|
|
@ -93,18 +91,20 @@ func devInfoToInfoList(devs []ml.DeviceInfo) GpuInfoList {
|
|||
|
||||
// Given the list of GPUs this instantiation is targeted for,
|
||||
// figure out the visible devices environment variable
|
||||
//
|
||||
// # If different libraries are detected, the first one is what we use
|
||||
//
|
||||
// TODO once we're purely running on the new runner, this level of device
|
||||
// filtering will no longer be necessary. Instead the runner can be told which
|
||||
// of the set of GPUs to utilize and handle filtering itself, instead of relying
|
||||
// on the env var to hide devices from the underlying GPU libraries
|
||||
func (l GpuInfoList) GetVisibleDevicesEnv() []string {
|
||||
if len(l) == 0 {
|
||||
return nil
|
||||
}
|
||||
return []string{rocmGetVisibleDevicesEnv(l)}
|
||||
res := []string{}
|
||||
envVar := rocmGetVisibleDevicesEnv(l)
|
||||
if envVar != "" {
|
||||
res = append(res, envVar)
|
||||
}
|
||||
envVar = vkGetVisibleDevicesEnv(l)
|
||||
if envVar != "" {
|
||||
res = append(res, envVar)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) string {
|
||||
|
|
@ -134,6 +134,22 @@ func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) string {
|
|||
return envVar + strings.Join(ids, ",")
|
||||
}
|
||||
|
||||
func vkGetVisibleDevicesEnv(gpuInfo []GpuInfo) string {
|
||||
ids := []string{}
|
||||
for _, info := range gpuInfo {
|
||||
if info.Library != "VULKAN" {
|
||||
continue
|
||||
}
|
||||
ids = append(ids, info.ID)
|
||||
|
||||
}
|
||||
if len(ids) == 0 {
|
||||
return ""
|
||||
}
|
||||
envVar := "GGML_VK_VISIBLE_DEVICES="
|
||||
return envVar + strings.Join(ids, ",")
|
||||
}
|
||||
|
||||
// GetSystemInfo returns the last cached state of the GPUs on the system
|
||||
func GetSystemInfo() SystemInfo {
|
||||
deviceMu.Lock()
|
||||
|
|
|
|||
|
|
@ -1,241 +0,0 @@
|
|||
#ifndef __APPLE__
|
||||
#include "gpu_info_vulkan.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
int is_extension_supported(vk_handle_t* rh, VkPhysicalDevice device, char* extension) {
|
||||
VkPhysicalDeviceProperties properties = {};
|
||||
(*rh->vkGetPhysicalDeviceProperties)(device, &properties);
|
||||
|
||||
uint32_t extensionCount;
|
||||
(*rh->vkEnumerateDeviceExtensionProperties)(device, NULL, &extensionCount, NULL);
|
||||
|
||||
if (extensionCount == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
VkExtensionProperties* extensions = malloc(extensionCount * sizeof(VkExtensionProperties));
|
||||
if (extensions == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
(*rh->vkEnumerateDeviceExtensionProperties)(device, NULL, &extensionCount, extensions);
|
||||
|
||||
for (int j = 0; j < extensionCount; j++) {
|
||||
if (strcmp(extensions[j].extensionName, extension) == 0) {
|
||||
free(extensions);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
free(extensions);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vk_init(char* vk_lib_path, vk_init_resp_t *resp) {
|
||||
const int buflen = 256;
|
||||
char buf[buflen + 1];
|
||||
int i;
|
||||
|
||||
struct lookup {
|
||||
char *s;
|
||||
void **p;
|
||||
} l[] = {
|
||||
{"vkGetPhysicalDeviceProperties", (void *)&resp->ch.vkGetPhysicalDeviceProperties},
|
||||
{"vkGetPhysicalDeviceProperties2", (void *)&resp->ch.vkGetPhysicalDeviceProperties2},
|
||||
{"vkEnumerateDeviceExtensionProperties", (void *)&resp->ch.vkEnumerateDeviceExtensionProperties},
|
||||
{"vkCreateInstance", (void *)&resp->ch.vkCreateInstance},
|
||||
{"vkEnumeratePhysicalDevices", (void *)&resp->ch.vkEnumeratePhysicalDevices},
|
||||
{"vkGetPhysicalDeviceMemoryProperties2", (void *)&resp->ch.vkGetPhysicalDeviceMemoryProperties2},
|
||||
{"vkDestroyInstance", (void *)&resp->ch.vkDestroyInstance},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
resp->ch.vk_handle = LOAD_LIBRARY(vk_lib_path, RTLD_LAZY);
|
||||
if (!resp->ch.vk_handle) {
|
||||
char *msg = LOAD_ERR();
|
||||
LOG(resp->ch.verbose, "library %s load err: %s\n", vk_lib_path, msg);
|
||||
snprintf(buf, buflen,
|
||||
"Unable to load %s library to query for Vulkan GPUs: %s",
|
||||
vk_lib_path, msg);
|
||||
free(msg);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; l[i].s != NULL; i++) {
|
||||
*l[i].p = LOAD_SYMBOL(resp->ch.vk_handle, l[i].s);
|
||||
if (!*l[i].p) {
|
||||
char *msg = LOAD_ERR();
|
||||
LOG(resp->ch.verbose, "dlerr: %s\n", msg);
|
||||
UNLOAD_LIBRARY(resp->ch.vk_handle);
|
||||
resp->ch.vk_handle = NULL;
|
||||
snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
|
||||
msg);
|
||||
free(msg);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
VkInstance instance;
|
||||
|
||||
VkApplicationInfo appInfo = {};
|
||||
appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
|
||||
appInfo.pNext = NULL;
|
||||
appInfo.pApplicationName = "Ollama";
|
||||
appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0);
|
||||
appInfo.pEngineName = "No Engine";
|
||||
appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0);
|
||||
appInfo.apiVersion = VK_API_VERSION_1_2;
|
||||
|
||||
VkInstanceCreateInfo createInfo = {};
|
||||
createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
|
||||
createInfo.pNext = NULL;
|
||||
createInfo.flags = 0;
|
||||
createInfo.enabledExtensionCount = 1;
|
||||
const char* extensions[] = { VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME };
|
||||
createInfo.ppEnabledExtensionNames = extensions;
|
||||
createInfo.pApplicationInfo = &appInfo;
|
||||
|
||||
VkResult result = (*resp->ch.vkCreateInstance)(&createInfo, NULL, &instance);
|
||||
if (result != VK_SUCCESS) {
|
||||
resp->err = strdup("failed to create instance");
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t deviceCount;
|
||||
result = (*resp->ch.vkEnumeratePhysicalDevices)(instance, &deviceCount, NULL);
|
||||
if (result != VK_SUCCESS) {
|
||||
resp->err = strdup("failed to enumerate physical devices");
|
||||
return;
|
||||
}
|
||||
|
||||
resp->err = NULL;
|
||||
resp->ch.vk = instance;
|
||||
resp->ch.num_devices = deviceCount;
|
||||
resp->num_devices = deviceCount;
|
||||
}
|
||||
|
||||
int vk_check_flash_attention(vk_handle_t rh, int i) {
|
||||
VkInstance instance = rh.vk;
|
||||
uint32_t deviceCount = rh.num_devices;
|
||||
|
||||
VkPhysicalDevice* devices = malloc(deviceCount * sizeof(VkPhysicalDevice));
|
||||
if (devices == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
VkResult result = (*rh.vkEnumeratePhysicalDevices)(instance, &deviceCount, devices);
|
||||
if (result != VK_SUCCESS) {
|
||||
free(devices);
|
||||
return 0;
|
||||
}
|
||||
|
||||
VkPhysicalDeviceProperties properties = {};
|
||||
(*rh.vkGetPhysicalDeviceProperties)(devices[i], &properties);
|
||||
|
||||
int supports_nv_coopmat2 = is_extension_supported(&rh, devices[i], VK_NV_COOPERATIVE_MATRIX_2_EXTENSION_NAME);
|
||||
if (!supports_nv_coopmat2) {
|
||||
free(devices);
|
||||
return 1;
|
||||
}
|
||||
|
||||
free(devices);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vk_check_vram(vk_handle_t rh, int i, mem_info_t *resp) {
|
||||
VkInstance instance = rh.vk;
|
||||
uint32_t deviceCount = rh.num_devices;
|
||||
|
||||
VkPhysicalDevice* devices = malloc(deviceCount * sizeof(VkPhysicalDevice));
|
||||
if (devices == NULL) {
|
||||
resp->err = strdup("memory allocation failed for devices array");
|
||||
return;
|
||||
}
|
||||
|
||||
VkResult result = (*rh.vkEnumeratePhysicalDevices)(instance, &deviceCount, devices);
|
||||
if (result != VK_SUCCESS) {
|
||||
free(devices);
|
||||
resp->err = strdup("failed to enumerate physical devices");
|
||||
return;
|
||||
}
|
||||
|
||||
VkPhysicalDeviceProperties properties = {};
|
||||
(*rh.vkGetPhysicalDeviceProperties)(devices[i], &properties);
|
||||
|
||||
int supports_budget = is_extension_supported(&rh, devices[i], VK_EXT_MEMORY_BUDGET_EXTENSION_NAME);
|
||||
if (!supports_budget) {
|
||||
free(devices);
|
||||
resp->err = strdup("device does not support memory budget");
|
||||
return;
|
||||
}
|
||||
|
||||
if (properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU) {
|
||||
free(devices);
|
||||
resp->err = strdup("device is a CPU");
|
||||
return;
|
||||
}
|
||||
|
||||
VkPhysicalDeviceProperties2 device_props2 = {};
|
||||
device_props2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
|
||||
|
||||
VkPhysicalDeviceIDProperties id_props = {};
|
||||
id_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
|
||||
|
||||
device_props2.pNext = &id_props;
|
||||
(*rh.vkGetPhysicalDeviceProperties2)(devices[i], &device_props2);
|
||||
|
||||
VkPhysicalDeviceMemoryBudgetPropertiesEXT physical_device_memory_budget_properties = {};
|
||||
physical_device_memory_budget_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
|
||||
physical_device_memory_budget_properties.pNext = NULL;
|
||||
|
||||
VkPhysicalDeviceMemoryProperties2 device_memory_properties = {};
|
||||
device_memory_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2;
|
||||
device_memory_properties.pNext = &physical_device_memory_budget_properties;
|
||||
|
||||
(*rh.vkGetPhysicalDeviceMemoryProperties2)(devices[i], &device_memory_properties);
|
||||
|
||||
VkDeviceSize device_memory_total_size = 0;
|
||||
VkDeviceSize device_memory_heap_budget = 0;
|
||||
|
||||
for (uint32_t j = 0; j < device_memory_properties.memoryProperties.memoryHeapCount; j++) {
|
||||
VkMemoryHeap heap = device_memory_properties.memoryProperties.memoryHeaps[j];
|
||||
if (heap.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) {
|
||||
device_memory_total_size += heap.size;
|
||||
device_memory_heap_budget += physical_device_memory_budget_properties.heapBudget[j];
|
||||
}
|
||||
}
|
||||
|
||||
free(devices);
|
||||
|
||||
resp->err = NULL;
|
||||
snprintf(&resp->gpu_id[0], GPU_ID_LEN, "%d", i);
|
||||
resp->gpu_name[GPU_NAME_LEN - 1] = '\0';
|
||||
strncpy(&resp->gpu_name[0], properties.deviceName, GPU_NAME_LEN - 1);
|
||||
resp->gpu_name[GPU_NAME_LEN - 1] = '\0';
|
||||
const uint8_t *uuid = id_props.deviceUUID;
|
||||
snprintf(&resp->gpu_id[0], GPU_ID_LEN,
|
||||
"GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
|
||||
uuid[0], uuid[1], uuid[2], uuid[3],
|
||||
uuid[4], uuid[5],
|
||||
uuid[6], uuid[7],
|
||||
uuid[8], uuid[9],
|
||||
uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]
|
||||
);
|
||||
resp->total = (uint64_t) device_memory_total_size;
|
||||
resp->free = (uint64_t) device_memory_heap_budget;
|
||||
resp->major = VK_API_VERSION_MAJOR(properties.apiVersion);
|
||||
resp->minor = VK_API_VERSION_MINOR(properties.apiVersion);
|
||||
resp->patch = VK_API_VERSION_PATCH(properties.apiVersion);
|
||||
}
|
||||
|
||||
void vk_release(vk_handle_t rh) {
|
||||
LOG(rh.verbose, "releasing vulkan library\n");
|
||||
(*rh.vkDestroyInstance)(rh.vk, NULL);
|
||||
UNLOAD_LIBRARY(rh.vk_handle);
|
||||
rh.vk_handle = NULL;
|
||||
}
|
||||
|
||||
#endif // __APPLE__
|
||||
|
|
@ -1,394 +0,0 @@
|
|||
#ifndef __APPLE__
|
||||
#ifndef __GPU_INFO_VULKAN_H__
|
||||
#define __GPU_INFO_VULKAN_H__
|
||||
|
||||
#include "gpu_info.h"
|
||||
|
||||
#define VK_DEFINE_HANDLE(object) typedef struct object##_T* object;
|
||||
VK_DEFINE_HANDLE(VkInstance)
|
||||
VK_DEFINE_HANDLE(VkPhysicalDevice)
|
||||
|
||||
#define VK_MAX_EXTENSION_NAME_SIZE 256U
|
||||
#define VK_MAX_DESCRIPTION_SIZE 256U
|
||||
#define VK_LUID_SIZE 8U
|
||||
#define VK_UUID_SIZE 16U
|
||||
#define VK_MAX_MEMORY_TYPES 32U
|
||||
#define VK_MAX_MEMORY_HEAPS 16U
|
||||
#define VK_MAX_PHYSICAL_DEVICE_NAME_SIZE 256U
|
||||
|
||||
#define VK_MAKE_VERSION(major, minor, patch) \
|
||||
((((uint32_t)(major)) << 22U) | (((uint32_t)(minor)) << 12U) | ((uint32_t)(patch)))
|
||||
|
||||
#define VK_MAKE_API_VERSION(variant, major, minor, patch) \
|
||||
((((uint32_t)(variant)) << 29U) | (((uint32_t)(major)) << 22U) | (((uint32_t)(minor)) << 12U) | ((uint32_t)(patch)))
|
||||
|
||||
#define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0)// Patch version should always be set to 0
|
||||
#define VK_API_VERSION_1_1 VK_MAKE_API_VERSION(0, 1, 1, 0)// Patch version should always be set to 0
|
||||
#define VK_API_VERSION_1_2 VK_MAKE_API_VERSION(0, 1, 2, 0)// Patch version should always be set to 0
|
||||
#define VK_API_VERSION_1_3 VK_MAKE_API_VERSION(0, 1, 3, 0)// Patch version should always be set to 0
|
||||
#define VK_API_VERSION_MAJOR(version) (((uint32_t)(version) >> 22U) & 0x7FU)
|
||||
#define VK_API_VERSION_MINOR(version) (((uint32_t)(version) >> 12U) & 0x3FFU)
|
||||
#define VK_API_VERSION_PATCH(version) ((uint32_t)(version) & 0xFFFU)
|
||||
|
||||
#define VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME "VK_KHR_get_physical_device_properties2"
|
||||
#define VK_NV_COOPERATIVE_MATRIX_2_EXTENSION_NAME "VK_NV_cooperative_matrix2"
|
||||
#define VK_EXT_MEMORY_BUDGET_EXTENSION_NAME "VK_EXT_memory_budget"
|
||||
|
||||
typedef uint32_t VkFlags;
|
||||
typedef uint32_t VkBool32;
|
||||
typedef uint64_t VkDeviceSize;
|
||||
typedef uint32_t VkSampleMask;
|
||||
typedef VkFlags VkSampleCountFlags;
|
||||
typedef VkFlags VkMemoryPropertyFlags;
|
||||
typedef VkFlags VkMemoryHeapFlags;
|
||||
typedef VkFlags VkInstanceCreateFlags;
|
||||
|
||||
typedef enum VkResult {
|
||||
VK_SUCCESS = 0,
|
||||
VK_NOT_READY = 1,
|
||||
VK_TIMEOUT = 2,
|
||||
VK_EVENT_SET = 3,
|
||||
VK_EVENT_RESET = 4,
|
||||
VK_INCOMPLETE = 5,
|
||||
VK_ERROR_OUT_OF_HOST_MEMORY = -1,
|
||||
VK_ERROR_OUT_OF_DEVICE_MEMORY = -2,
|
||||
VK_ERROR_INITIALIZATION_FAILED = -3,
|
||||
VK_ERROR_DEVICE_LOST = -4,
|
||||
VK_ERROR_MEMORY_MAP_FAILED = -5,
|
||||
VK_ERROR_LAYER_NOT_PRESENT = -6,
|
||||
VK_ERROR_EXTENSION_NOT_PRESENT = -7,
|
||||
VK_ERROR_FEATURE_NOT_PRESENT = -8,
|
||||
VK_ERROR_INCOMPATIBLE_DRIVER = -9,
|
||||
VK_ERROR_TOO_MANY_OBJECTS = -10,
|
||||
VK_ERROR_FORMAT_NOT_SUPPORTED = -11,
|
||||
VK_ERROR_FRAGMENTED_POOL = -12,
|
||||
VK_ERROR_UNKNOWN = -13,
|
||||
VK_ERROR_OUT_OF_POOL_MEMORY = -1000069000,
|
||||
VK_ERROR_INVALID_EXTERNAL_HANDLE = -1000072003,
|
||||
VK_ERROR_FRAGMENTATION = -1000168000,
|
||||
VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS = -1000257000,
|
||||
VK_PIPELINE_COMPILE_REQUIRED = 1000297000,
|
||||
VK_ERROR_SURFACE_LOST_KHR = -1000000000,
|
||||
VK_ERROR_NATIVE_WINDOW_IN_USE_KHR = -1000000001,
|
||||
VK_SUBOPTIMAL_KHR = 1000001003,
|
||||
VK_ERROR_OUT_OF_DATE_KHR = -1000001004,
|
||||
VK_ERROR_INCOMPATIBLE_DISPLAY_KHR = -1000003001,
|
||||
VK_ERROR_VALIDATION_FAILED_EXT = -1000011001,
|
||||
VK_ERROR_INVALID_SHADER_NV = -1000012000,
|
||||
VK_ERROR_IMAGE_USAGE_NOT_SUPPORTED_KHR = -1000158000,
|
||||
VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR = -1000158001,
|
||||
VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR = -1000158002,
|
||||
VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR = -1000158003,
|
||||
VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR = -1000158004,
|
||||
VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR = -1000158005,
|
||||
VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT = -1000158006,
|
||||
VK_ERROR_NOT_PERMITTED_KHR = -1000174001,
|
||||
VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT = -1000255000,
|
||||
VK_THREAD_IDLE_KHR = 1000268000,
|
||||
VK_THREAD_DONE_KHR = 1000268001,
|
||||
VK_OPERATION_DEFERRED_KHR = 1000268002,
|
||||
VK_OPERATION_NOT_DEFERRED_KHR = 1000268003,
|
||||
VK_ERROR_COMPRESSION_EXHAUSTED_EXT = -1000338000,
|
||||
VK_RESULT_MAX_ENUM = 0x7FFFFFFF
|
||||
} VkResult;
|
||||
|
||||
typedef enum VkStructureType {
|
||||
VK_STRUCTURE_TYPE_APPLICATION_INFO = 0,
|
||||
VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 1,
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2 = 1000059001,
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2 = 1000059006,
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES = 1000071004,
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT = 1000237000,
|
||||
VK_STRUCTURE_TYPE_MAX_ENUM = 0x7FFFFFFF
|
||||
} VkStructureType;
|
||||
|
||||
typedef enum VkPhysicalDeviceType {
|
||||
VK_PHYSICAL_DEVICE_TYPE_OTHER = 0,
|
||||
VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU = 1,
|
||||
VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU = 2,
|
||||
VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU = 3,
|
||||
VK_PHYSICAL_DEVICE_TYPE_CPU = 4,
|
||||
VK_PHYSICAL_DEVICE_TYPE_MAX_ENUM = 0x7FFFFFFF
|
||||
} VkPhysicalDeviceType;
|
||||
|
||||
typedef enum VkSystemAllocationScope {
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND = 0,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT = 1,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_CACHE = 2,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE = 3,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE = 4,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_MAX_ENUM = 0x7FFFFFFF
|
||||
} VkSystemAllocationScope;
|
||||
|
||||
typedef enum VkInternalAllocationType {
|
||||
VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE = 0,
|
||||
VK_INTERNAL_ALLOCATION_TYPE_NON_EXECUTABLE = 1,
|
||||
VK_INTERNAL_ALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF
|
||||
} VkInternalAllocationType;
|
||||
|
||||
typedef enum VkMemoryHeapFlagBits {
|
||||
VK_MEMORY_HEAP_DEVICE_LOCAL_BIT = 0x00000001,
|
||||
VK_MEMORY_HEAP_MULTI_INSTANCE_BIT = 0x00000002,
|
||||
VK_MEMORY_HEAP_TILE_MEMORY_BIT_QCOM = 0x00000008,
|
||||
VK_MEMORY_HEAP_MULTI_INSTANCE_BIT_KHR = VK_MEMORY_HEAP_MULTI_INSTANCE_BIT,
|
||||
VK_MEMORY_HEAP_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
|
||||
} VkMemoryHeapFlagBits;
|
||||
|
||||
typedef struct VkExtensionProperties {
|
||||
char extensionName[VK_MAX_EXTENSION_NAME_SIZE];
|
||||
uint32_t specVersion;
|
||||
} VkExtensionProperties;
|
||||
|
||||
typedef struct VkPhysicalDeviceLimits {
|
||||
uint32_t maxImageDimension1D;
|
||||
uint32_t maxImageDimension2D;
|
||||
uint32_t maxImageDimension3D;
|
||||
uint32_t maxImageDimensionCube;
|
||||
uint32_t maxImageArrayLayers;
|
||||
uint32_t maxTexelBufferElements;
|
||||
uint32_t maxUniformBufferRange;
|
||||
uint32_t maxStorageBufferRange;
|
||||
uint32_t maxPushConstantsSize;
|
||||
uint32_t maxMemoryAllocationCount;
|
||||
uint32_t maxSamplerAllocationCount;
|
||||
VkDeviceSize bufferImageGranularity;
|
||||
VkDeviceSize sparseAddressSpaceSize;
|
||||
uint32_t maxBoundDescriptorSets;
|
||||
uint32_t maxPerStageDescriptorSamplers;
|
||||
uint32_t maxPerStageDescriptorUniformBuffers;
|
||||
uint32_t maxPerStageDescriptorStorageBuffers;
|
||||
uint32_t maxPerStageDescriptorSampledImages;
|
||||
uint32_t maxPerStageDescriptorStorageImages;
|
||||
uint32_t maxPerStageDescriptorInputAttachments;
|
||||
uint32_t maxPerStageResources;
|
||||
uint32_t maxDescriptorSetSamplers;
|
||||
uint32_t maxDescriptorSetUniformBuffers;
|
||||
uint32_t maxDescriptorSetUniformBuffersDynamic;
|
||||
uint32_t maxDescriptorSetStorageBuffers;
|
||||
uint32_t maxDescriptorSetStorageBuffersDynamic;
|
||||
uint32_t maxDescriptorSetSampledImages;
|
||||
uint32_t maxDescriptorSetStorageImages;
|
||||
uint32_t maxDescriptorSetInputAttachments;
|
||||
uint32_t maxVertexInputAttributes;
|
||||
uint32_t maxVertexInputBindings;
|
||||
uint32_t maxVertexInputAttributeOffset;
|
||||
uint32_t maxVertexInputBindingStride;
|
||||
uint32_t maxVertexOutputComponents;
|
||||
uint32_t maxTessellationGenerationLevel;
|
||||
uint32_t maxTessellationPatchSize;
|
||||
uint32_t maxTessellationControlPerVertexInputComponents;
|
||||
uint32_t maxTessellationControlPerVertexOutputComponents;
|
||||
uint32_t maxTessellationControlPerPatchOutputComponents;
|
||||
uint32_t maxTessellationControlTotalOutputComponents;
|
||||
uint32_t maxTessellationEvaluationInputComponents;
|
||||
uint32_t maxTessellationEvaluationOutputComponents;
|
||||
uint32_t maxGeometryShaderInvocations;
|
||||
uint32_t maxGeometryInputComponents;
|
||||
uint32_t maxGeometryOutputComponents;
|
||||
uint32_t maxGeometryOutputVertices;
|
||||
uint32_t maxGeometryTotalOutputComponents;
|
||||
uint32_t maxFragmentInputComponents;
|
||||
uint32_t maxFragmentOutputAttachments;
|
||||
uint32_t maxFragmentDualSrcAttachments;
|
||||
uint32_t maxFragmentCombinedOutputResources;
|
||||
uint32_t maxComputeSharedMemorySize;
|
||||
uint32_t maxComputeWorkGroupCount[3];
|
||||
uint32_t maxComputeWorkGroupInvocations;
|
||||
uint32_t maxComputeWorkGroupSize[3];
|
||||
uint32_t subPixelPrecisionBits;
|
||||
uint32_t subTexelPrecisionBits;
|
||||
uint32_t mipmapPrecisionBits;
|
||||
uint32_t maxDrawIndexedIndexValue;
|
||||
uint32_t maxDrawIndirectCount;
|
||||
float maxSamplerLodBias;
|
||||
float maxSamplerAnisotropy;
|
||||
uint32_t maxViewports;
|
||||
uint32_t maxViewportDimensions[2];
|
||||
float viewportBoundsRange[2];
|
||||
uint32_t viewportSubPixelBits;
|
||||
size_t minMemoryMapAlignment;
|
||||
VkDeviceSize minTexelBufferOffsetAlignment;
|
||||
VkDeviceSize minUniformBufferOffsetAlignment;
|
||||
VkDeviceSize minStorageBufferOffsetAlignment;
|
||||
int32_t minTexelOffset;
|
||||
uint32_t maxTexelOffset;
|
||||
int32_t minTexelGatherOffset;
|
||||
uint32_t maxTexelGatherOffset;
|
||||
float minInterpolationOffset;
|
||||
float maxInterpolationOffset;
|
||||
uint32_t subPixelInterpolationOffsetBits;
|
||||
uint32_t maxFramebufferWidth;
|
||||
uint32_t maxFramebufferHeight;
|
||||
uint32_t maxFramebufferLayers;
|
||||
VkSampleCountFlags framebufferColorSampleCounts;
|
||||
VkSampleCountFlags framebufferDepthSampleCounts;
|
||||
VkSampleCountFlags framebufferStencilSampleCounts;
|
||||
VkSampleCountFlags framebufferNoAttachmentsSampleCounts;
|
||||
uint32_t maxColorAttachments;
|
||||
VkSampleCountFlags sampledImageColorSampleCounts;
|
||||
VkSampleCountFlags sampledImageIntegerSampleCounts;
|
||||
VkSampleCountFlags sampledImageDepthSampleCounts;
|
||||
VkSampleCountFlags sampledImageStencilSampleCounts;
|
||||
VkSampleCountFlags storageImageSampleCounts;
|
||||
uint32_t maxSampleMaskWords;
|
||||
VkBool32 timestampComputeAndGraphics;
|
||||
float timestampPeriod;
|
||||
uint32_t maxClipDistances;
|
||||
uint32_t maxCullDistances;
|
||||
uint32_t maxCombinedClipAndCullDistances;
|
||||
uint32_t discreteQueuePriorities;
|
||||
float pointSizeRange[2];
|
||||
float lineWidthRange[2];
|
||||
float pointSizeGranularity;
|
||||
float lineWidthGranularity;
|
||||
VkBool32 strictLines;
|
||||
VkBool32 standardSampleLocations;
|
||||
VkDeviceSize optimalBufferCopyOffsetAlignment;
|
||||
VkDeviceSize optimalBufferCopyRowPitchAlignment;
|
||||
VkDeviceSize nonCoherentAtomSize;
|
||||
} VkPhysicalDeviceLimits;
|
||||
|
||||
typedef struct VkPhysicalDeviceSparseProperties {
|
||||
VkBool32 residencyStandard2DBlockShape;
|
||||
VkBool32 residencyStandard2DMultisampleBlockShape;
|
||||
VkBool32 residencyStandard3DBlockShape;
|
||||
VkBool32 residencyAlignedMipSize;
|
||||
VkBool32 residencyNonResidentStrict;
|
||||
} VkPhysicalDeviceSparseProperties;
|
||||
|
||||
typedef struct VkPhysicalDeviceProperties {
|
||||
uint32_t apiVersion;
|
||||
uint32_t driverVersion;
|
||||
uint32_t vendorID;
|
||||
uint32_t deviceID;
|
||||
uint32_t deviceType;
|
||||
char deviceName[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
|
||||
uint8_t pipelineCacheUUID[VK_UUID_SIZE];
|
||||
VkPhysicalDeviceLimits limits;
|
||||
VkPhysicalDeviceSparseProperties sparseProperties;
|
||||
} VkPhysicalDeviceProperties;
|
||||
|
||||
typedef struct VkPhysicalDeviceProperties2 {
|
||||
VkStructureType sType;
|
||||
void* pNext;
|
||||
VkPhysicalDeviceProperties properties;
|
||||
} VkPhysicalDeviceProperties2;
|
||||
|
||||
typedef struct VkPhysicalDeviceIDProperties {
|
||||
VkStructureType sType;
|
||||
void* pNext;
|
||||
uint8_t deviceUUID[VK_UUID_SIZE];
|
||||
uint8_t driverUUID[VK_UUID_SIZE];
|
||||
uint8_t deviceLUID[VK_LUID_SIZE];
|
||||
uint32_t deviceNodeMask;
|
||||
VkBool32 deviceLUIDValid;
|
||||
} VkPhysicalDeviceIDProperties;
|
||||
|
||||
typedef struct VkMemoryType {
|
||||
VkMemoryPropertyFlags propertyFlags;
|
||||
uint32_t heapIndex;
|
||||
} VkMemoryType;
|
||||
|
||||
typedef struct VkMemoryHeap {
|
||||
VkDeviceSize size;
|
||||
VkMemoryHeapFlags flags;
|
||||
} VkMemoryHeap;
|
||||
|
||||
typedef struct VkPhysicalDeviceMemoryProperties {
|
||||
uint32_t memoryTypeCount;
|
||||
VkMemoryType memoryTypes[VK_MAX_MEMORY_TYPES];
|
||||
uint32_t memoryHeapCount;
|
||||
VkMemoryHeap memoryHeaps[VK_MAX_MEMORY_HEAPS];
|
||||
} VkPhysicalDeviceMemoryProperties;
|
||||
|
||||
typedef struct VkPhysicalDeviceMemoryProperties2 {
|
||||
VkStructureType sType;
|
||||
void* pNext;
|
||||
VkPhysicalDeviceMemoryProperties memoryProperties;
|
||||
} VkPhysicalDeviceMemoryProperties2;
|
||||
|
||||
typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT {
|
||||
VkStructureType sType;
|
||||
void* pNext;
|
||||
VkDeviceSize heapBudget[VK_MAX_MEMORY_HEAPS];
|
||||
VkDeviceSize heapUsage[VK_MAX_MEMORY_HEAPS];
|
||||
} VkPhysicalDeviceMemoryBudgetPropertiesEXT;
|
||||
|
||||
typedef struct VkApplicationInfo {
|
||||
VkStructureType sType;
|
||||
const void* pNext;
|
||||
const char* pApplicationName;
|
||||
uint32_t applicationVersion;
|
||||
const char* pEngineName;
|
||||
uint32_t engineVersion;
|
||||
uint32_t apiVersion;
|
||||
} VkApplicationInfo;
|
||||
|
||||
typedef struct VkInstanceCreateInfo {
|
||||
VkStructureType sType;
|
||||
const void* pNext;
|
||||
VkInstanceCreateFlags flags;
|
||||
const VkApplicationInfo* pApplicationInfo;
|
||||
uint32_t enabledLayerCount;
|
||||
const char* const* ppEnabledLayerNames;
|
||||
uint32_t enabledExtensionCount;
|
||||
const char* const* ppEnabledExtensionNames;
|
||||
} VkInstanceCreateInfo;
|
||||
|
||||
typedef struct VkAllocationCallbacks {
|
||||
void* pUserData;
|
||||
void* (*pfnAllocation)(void* pUserData, size_t size, size_t alignment, VkSystemAllocationScope allocationScope);
|
||||
void* (*pfnReallocation)(void* pUserData, void* pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope);
|
||||
void (*pfnFree)(void* pUserData, void* pMemory);
|
||||
void (*pfnInternalAllocation)(void* pUserData, size_t size, VkInternalAllocationType allocationType, VkSystemAllocationScope allocationScope);
|
||||
void (*pfnInternalFree)(void* pUserData, size_t size, VkInternalAllocationType allocationType, VkSystemAllocationScope allocationScope);
|
||||
} VkAllocationCallbacks;
|
||||
|
||||
typedef struct {
|
||||
void* vk_handle;
|
||||
uint16_t verbose;
|
||||
|
||||
VkInstance vk;
|
||||
int num_devices;
|
||||
|
||||
void (*vkGetPhysicalDeviceProperties)(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceProperties* pProperties);
|
||||
void (*vkGetPhysicalDeviceProperties2)(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceProperties2* pProperties);
|
||||
VkResult (*vkEnumerateDeviceExtensionProperties)(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
const char* pLayerName,
|
||||
uint32_t* pPropertyCount,
|
||||
VkExtensionProperties* pProperties);
|
||||
VkResult (*vkCreateInstance)(
|
||||
const VkInstanceCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkInstance* pInstance);
|
||||
VkResult (*vkEnumeratePhysicalDevices)(
|
||||
VkInstance instance,
|
||||
uint32_t* pPhysicalDeviceCount,
|
||||
VkPhysicalDevice* pPhysicalDevices);
|
||||
void (*vkGetPhysicalDeviceMemoryProperties2)(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceMemoryProperties2* pMemoryProperties);
|
||||
void (*vkDestroyInstance)(
|
||||
VkInstance instance,
|
||||
const VkAllocationCallbacks* pAllocator);
|
||||
} vk_handle_t;
|
||||
|
||||
typedef struct vk_init_resp
|
||||
{
|
||||
char *err; // If err is non-null handle is invalid
|
||||
int num_devices;
|
||||
vk_handle_t ch;
|
||||
} vk_init_resp_t;
|
||||
|
||||
void vk_init(char* vk_lib_path, vk_init_resp_t *resp);
|
||||
void vk_check_vram(vk_handle_t rh, int i, mem_info_t *resp);
|
||||
int vk_check_flash_attention(vk_handle_t rh, int i);
|
||||
void vk_release(vk_handle_t rh);
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -92,6 +92,7 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
|
|||
// are enumerated, but not actually supported.
|
||||
// We run this in serial to avoid potentially initializing a GPU multiple
|
||||
// times concurrently leading to memory contention
|
||||
// TODO refactor so we group the lib dirs and do serial per version, but parallel for different libs
|
||||
for dir := range libDirs {
|
||||
var dirs []string
|
||||
if dir == "" {
|
||||
|
|
@ -125,8 +126,10 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
|
|||
} else {
|
||||
envVar = "ROCR_VISIBLE_DEVICES"
|
||||
}
|
||||
} else {
|
||||
} else if devices[i].Library == "CUDA" {
|
||||
envVar = "CUDA_VISIBLE_DEVICES"
|
||||
} else if devices[i].Library == "VULKAN" {
|
||||
envVar = "GGML_VK_VISIBLE_DEVICES"
|
||||
}
|
||||
|
||||
extraEnvs := []string{
|
||||
|
|
|
|||
|
|
@ -36,10 +36,11 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
|
|||
UnreliableFreeMemory bool
|
||||
|
||||
// GPU information
|
||||
ID string `json:"gpu_id"` // string to use for selection of this specific GPU
|
||||
filterID string // AMD Workaround: The numeric ID of the device used to filter out other devices
|
||||
Name string `json:"name"` // user friendly name if available
|
||||
Compute string `json:"compute"` // Compute Capability or gfx
|
||||
ID string `json:"gpu_id"` // string to use for selection of this specific GPU
|
||||
filterID string // AMD Workaround: The numeric ID of the device used to filter out other devices
|
||||
Name string `json:"name"` // user friendly name if available
|
||||
Compute string `json:"compute"` // Compute Capability or gfx
|
||||
FlashAttention bool `json:"flash_attention"` // is flash attention supported
|
||||
|
||||
// Driver Information - TODO no need to put this on each GPU
|
||||
DriverMajor int `json:"driver_major,omitempty"`
|
||||
|
|
@ -174,7 +175,8 @@ func (l GpuInfoList) FlashAttentionSupported() bool {
|
|||
supportsFA := gpu.Library == "cpu" ||
|
||||
gpu.Name == "Metal" ||
|
||||
(gpu.Library == "CUDA" && gpu.DriverMajor >= 7) ||
|
||||
gpu.Library == "HIP"
|
||||
gpu.Library == "HIP" ||
|
||||
gpu.Library == "VULKAN"
|
||||
|
||||
if !supportsFA {
|
||||
return false
|
||||
|
|
|
|||
|
|
@ -561,10 +561,11 @@ func (s *llamaServer) Load(ctx context.Context, gpus discover.GpuInfoList, requi
|
|||
// Windows CUDA should not use mmap for best performance
|
||||
// Linux with a model larger than free space, mmap leads to thrashing
|
||||
// For CPU loads we want the memory to be allocated, not FS cache
|
||||
if (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && s.options.UseMMap == nil) ||
|
||||
if (runtime.GOOS == "windows" && gpus[0].Library == "CUDA" && s.options.UseMMap == nil) ||
|
||||
(runtime.GOOS == "linux" && systemInfo.System.FreeMemory < s.estimate.TotalSize && s.options.UseMMap == nil) ||
|
||||
(gpus[0].Library == "vulkan" && s.options.UseMMap == nil) ||
|
||||
(gpus[0].Library == "cpu" && s.options.UseMMap == nil) ||
|
||||
(gpus[0].Library == "VULKAN" && s.options.UseMMap == nil) ||
|
||||
(s.options.UseMMap != nil && !*s.options.UseMMap) {
|
||||
s.loadRequest.UseMmap = false
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4123,7 +4123,6 @@ static void ggml_vk_instance_init() {
|
|||
}
|
||||
} else {
|
||||
std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
|
||||
|
||||
// If no vulkan devices are found, return early
|
||||
if (devices.empty()) {
|
||||
GGML_LOG_INFO("ggml_vulkan: No devices found.\n");
|
||||
|
|
@ -10821,14 +10820,90 @@ std::string ggml_backend_vk_get_device_id(int device) {
|
|||
return ggml_vk_get_device_id(dev_idx);
|
||||
}
|
||||
|
||||
void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
|
||||
GGML_ASSERT(device < (int) vk_instance.device_indices.size());
|
||||
//////////////////////////
|
||||
|
||||
vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
|
||||
struct ggml_backend_vk_device_context {
|
||||
size_t device;
|
||||
std::string name;
|
||||
std::string description;
|
||||
std::string id;
|
||||
std::string uuid;
|
||||
int major;
|
||||
int minor;
|
||||
int driver_major;
|
||||
int driver_minor;
|
||||
int integrated;
|
||||
int pci_bus_id;
|
||||
int pci_device_id;
|
||||
int pci_domain_id;
|
||||
};
|
||||
|
||||
void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size_t * free, size_t * total) {
|
||||
GGML_ASSERT(ctx->device < (int) vk_instance.device_indices.size());
|
||||
|
||||
vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[ctx->device]];
|
||||
|
||||
vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
|
||||
vk::PhysicalDeviceProperties2 props2;
|
||||
vkdev.getProperties2(&props2);
|
||||
|
||||
for (const vk::MemoryHeap& heap : memprops.memoryHeaps) {
|
||||
// Use vendor specific management libraries for best VRAM reporting if available
|
||||
switch (props2.properties.vendorID) {
|
||||
case VK_VENDOR_ID_AMD:
|
||||
if (ggml_hip_mgmt_init() == 0) {
|
||||
int status = ggml_hip_get_device_memory(ctx->pci_bus_id, ctx->pci_device_id, free, total);
|
||||
if (status == 0) {
|
||||
GGML_LOG_DEBUG("%s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, *free, *total);
|
||||
ggml_hip_mgmt_release();
|
||||
return;
|
||||
}
|
||||
ggml_hip_mgmt_release();
|
||||
}
|
||||
break;
|
||||
case VK_VENDOR_ID_NVIDIA:
|
||||
if (ggml_nvml_init() == 0) {
|
||||
int status = ggml_nvml_get_device_memory(ctx->uuid.c_str(), free, total);
|
||||
if (status == 0) {
|
||||
GGML_LOG_DEBUG("%s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, *free, *total);
|
||||
ggml_nvml_release();
|
||||
return;
|
||||
}
|
||||
ggml_nvml_release();
|
||||
}
|
||||
break;
|
||||
}
|
||||
// else fallback to memory budget if supported
|
||||
|
||||
*total = 0;
|
||||
*free = 0;
|
||||
vk::PhysicalDeviceMemoryBudgetPropertiesEXT mem_budget_props;
|
||||
vk::PhysicalDeviceMemoryProperties2 memprops2;
|
||||
memprops2.pNext = &mem_budget_props;
|
||||
vkdev.getMemoryProperties2(&memprops2);
|
||||
for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) {
|
||||
if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
|
||||
*total += memprops2.memoryProperties.memoryHeaps[i].size;
|
||||
} else if (ctx->integrated) {
|
||||
// Include shared memory on iGPUs
|
||||
*total += memprops2.memoryProperties.memoryHeaps[i].size;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) {
|
||||
if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
|
||||
*free += mem_budget_props.heapBudget[i];
|
||||
} else if (ctx->integrated) {
|
||||
*free += mem_budget_props.heapBudget[i];
|
||||
}
|
||||
}
|
||||
if (*total > 0 && *free > 0) {
|
||||
return;
|
||||
} else if (*total > 0) {
|
||||
*free = *total;
|
||||
return;
|
||||
}
|
||||
|
||||
// else just report the physical memory
|
||||
for (const vk::MemoryHeap& heap : memprops2.memoryProperties.memoryHeaps) {
|
||||
if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
|
||||
*total = heap.size;
|
||||
*free = heap.size;
|
||||
|
|
@ -10837,14 +10912,6 @@ void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total
|
|||
}
|
||||
}
|
||||
|
||||
//////////////////////////
|
||||
|
||||
struct ggml_backend_vk_device_context {
|
||||
size_t device;
|
||||
std::string name;
|
||||
std::string description;
|
||||
std::string id;
|
||||
};
|
||||
|
||||
static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
|
||||
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
||||
|
|
@ -10863,7 +10930,7 @@ static const char * ggml_backend_vk_device_get_id(ggml_backend_dev_t dev) {
|
|||
|
||||
static void ggml_backend_vk_device_get_memory(ggml_backend_dev_t device, size_t * free, size_t * total) {
|
||||
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)device->context;
|
||||
ggml_backend_vk_get_device_memory(ctx->device, free, total);
|
||||
ggml_backend_vk_get_device_memory(ctx, free, total);
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) {
|
||||
|
|
@ -10881,6 +10948,7 @@ static enum ggml_backend_dev_type ggml_backend_vk_device_get_type(ggml_backend_d
|
|||
return GGML_BACKEND_DEVICE_TYPE_GPU;
|
||||
}
|
||||
|
||||
#define GGML_VULKAN_NAME "VULKAN"
|
||||
static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
|
||||
props->name = ggml_backend_vk_device_get_name(dev);
|
||||
props->description = ggml_backend_vk_device_get_description(dev);
|
||||
|
|
@ -10893,6 +10961,18 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
|
|||
/* .buffer_from_host_ptr = */ false,
|
||||
/* .events = */ false,
|
||||
};
|
||||
|
||||
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
||||
props->id = ctx->id.c_str();
|
||||
props->compute_major = ctx->major;
|
||||
props->compute_minor = ctx->minor;
|
||||
props->driver_major = ctx->driver_major;
|
||||
props->driver_minor = ctx->driver_minor;
|
||||
props->integrated = ctx->integrated;
|
||||
props->pci_bus_id = ctx->pci_bus_id;
|
||||
props->pci_device_id = ctx->pci_device_id;
|
||||
props->pci_domain_id = ctx->pci_domain_id;
|
||||
props->library = GGML_VULKAN_NAME;
|
||||
}
|
||||
|
||||
static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
|
||||
|
|
@ -11296,6 +11376,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
|||
static std::mutex mutex;
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
if (!initialized) {
|
||||
std::vector<vk::PhysicalDevice> vk_devices = vk_instance.instance.enumeratePhysicalDevices();
|
||||
|
||||
for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) {
|
||||
ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context;
|
||||
char desc[256];
|
||||
|
|
@ -11309,6 +11391,44 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
|||
/* .reg = */ reg,
|
||||
/* .context = */ ctx,
|
||||
});
|
||||
|
||||
// Gather additional information about the device
|
||||
int dev_idx = vk_instance.device_indices[i];
|
||||
vk::PhysicalDeviceProperties props1;
|
||||
vk_devices[dev_idx].getProperties(&props1);
|
||||
vk::PhysicalDeviceProperties2 props2;
|
||||
vk::PhysicalDeviceIDProperties device_id_props;
|
||||
vk::PhysicalDevicePCIBusInfoPropertiesEXT pci_bus_props;
|
||||
vk::PhysicalDeviceDriverProperties driver_props;
|
||||
props2.pNext = &device_id_props;
|
||||
device_id_props.pNext = &pci_bus_props;
|
||||
pci_bus_props.pNext = &driver_props;
|
||||
vk_devices[dev_idx].getProperties2(&props2);
|
||||
std::ostringstream oss;
|
||||
oss << std::hex << std::setfill('0');
|
||||
oss << "GPU-";
|
||||
int byteIdx = 0;
|
||||
for (int i = 0; i < 16; ++i, ++byteIdx) {
|
||||
oss << std::setw(2) << static_cast<int>(device_id_props.deviceUUID[i]);
|
||||
if (byteIdx == 3 || byteIdx == 5 || byteIdx == 7 || byteIdx == 9) {
|
||||
oss << '-';
|
||||
}
|
||||
}
|
||||
ctx->uuid = oss.str();
|
||||
ctx->pci_bus_id = pci_bus_props.pciBus;
|
||||
ctx->pci_device_id = pci_bus_props.pciDevice;
|
||||
ctx->pci_domain_id = pci_bus_props.pciDomain;
|
||||
ctx->id = std::to_string(i);
|
||||
if (props1.deviceType == vk::PhysicalDeviceType::eIntegratedGpu) {
|
||||
ctx->integrated = 1;
|
||||
} else {
|
||||
ctx->integrated = 0;
|
||||
}
|
||||
ctx->major = 0;
|
||||
ctx->minor = 0;
|
||||
// TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string
|
||||
ctx->driver_major = 0;
|
||||
ctx->driver_minor = 0;
|
||||
}
|
||||
initialized = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -165,12 +165,11 @@ function buildROCm() {
|
|||
$env:HIPCXX="${env:HIP_PATH}\bin\clang++.exe"
|
||||
$env:HIP_PLATFORM="amd"
|
||||
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
|
||||
& cmake --fresh --preset "ROCm 6" -G Ninja `
|
||||
& cmake --fresh --preset "ROCm 6" -G Ninja --install-prefix $script:DIST_DIR -DOLLAMA_RUNNER_DIR="rocm" `
|
||||
-DCMAKE_C_COMPILER=clang `
|
||||
-DCMAKE_CXX_COMPILER=clang++ `
|
||||
-DCMAKE_C_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" `
|
||||
-DCMAKE_CXX_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" `
|
||||
--install-prefix $script:DIST_DIR
|
||||
-DCMAKE_CXX_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma"
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
$env:HIPCXX=""
|
||||
$env:HIP_PLATFORM=""
|
||||
|
|
@ -186,7 +185,7 @@ function buildROCm() {
|
|||
function buildVulkan(){
|
||||
if ($env:VULKAN_SDK) {
|
||||
write-host "Building Vulkan backend libraries"
|
||||
& cmake --fresh --preset Vulkan --install-prefix $script:DIST_DIR
|
||||
& cmake --fresh --preset Vulkan --install-prefix $script:DIST_DIR -DOLLAMA_RUNNER_DIR="vulkan"
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& cmake --build --preset Vulkan --config Release --parallel $script:JOBS
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
|
|
|
|||
Loading…
Reference in New Issue