diff --git a/gpu/gpu.go b/gpu/gpu.go index a55903c51..359c6b5a7 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -29,6 +29,7 @@ type handles struct { cudart *C.cudart_handle_t nvcuda *C.nvcuda_handle_t oneapi *C.oneapi_handle_t + vulkan *C.vk_handle_t } const ( @@ -90,6 +91,16 @@ var OneapiLinuxGlobs = []string{ "/usr/lib*/libze_intel_gpu.so*", } +var VulkanLinuxGlobs = []string{ + "/usr/lib/x86_64-linux-gnu/libvulkan.so*", + "/usr/lib*/libvulkan.so*", +} + +var CapLinuxGlobs = []string{ + "/usr/lib/x86_64-linux-gnu/libcap.so*", + "/usr/lib*/libcap.so*", +} + // Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed. // Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices. var CudaTegra string = os.Getenv("JETSON_JETPACK") @@ -104,6 +115,10 @@ func initGPUHandles() *handles { var cudartMgmtPatterns []string var nvcudaMgmtName string var nvcudaMgmtPatterns []string + var vulkanMgmtName string + var vulkanMgmtPatterns []string + var libcapMgmtName string + var libcapMgmtPatterns []string tmpDir, _ := PayloadsDir() switch runtime.GOOS { @@ -125,6 +140,12 @@ func initGPUHandles() *handles { // Aligned with driver, we can't carry as payloads nvcudaMgmtName = "libcuda.so*" nvcudaMgmtPatterns = NvcudaLinuxGlobs + + // Vulkan also needs libcap + vulkanMgmtName = "libvulkan.so*" + vulkanMgmtPatterns = VulkanLinuxGlobs + libcapMgmtName = "libcap.so*" + libcapMgmtPatterns = CapLinuxGlobs default: return gpuHandles } @@ -152,6 +173,25 @@ func initGPUHandles() *handles { } } + vulkanLibPaths := FindGPULibs(vulkanMgmtName, vulkanMgmtPatterns) + + var libcapLibPaths []string + if runtime.GOOS == "linux" { + libcapLibPaths = FindGPULibs(libcapMgmtName, libcapMgmtPatterns) + } else { + libcapLibPaths = []string{""} + } + + if len(vulkanLibPaths) > 0 && len(libcapLibPaths) > 0 { + deviceCount, vulkan, vkLibPath, capLibPath := LoadVulkanMgmt(vulkanLibPaths, libcapLibPaths) + if vulkan != nil { + slog.Debug("detected GPUs", "library", vkLibPath, capLibPath, "count", deviceCount) + gpuHandles.vulkan = vulkan + gpuHandles.deviceCount = deviceCount + return gpuHandles + } + } + return gpuHandles } @@ -186,7 +226,7 @@ func GetGPUInfo() GpuInfoList { var memInfo C.mem_info_t resp := []GpuInfo{} - // NVIDIA first + // NVIDIA and Vulkan first for i := range gpuHandles.deviceCount { // TODO once we support CPU compilation variants of GPU libraries refine this... if cpuVariant == "" && runtime.GOARCH == "amd64" { @@ -227,6 +267,32 @@ func GetGPUInfo() GpuInfoList { // TODO potentially sort on our own algorithm instead of what the underlying GPU library does... resp = append(resp, gpuInfo) } + + if gpuHandles.vulkan != nil { + gpuInfo := GpuInfo{ + Library: "vulkan", + } + + C.vk_check_vram(*gpuHandles.vulkan, C.int(i), &memInfo) + if memInfo.err != nil { + slog.Info("error looking up vulkan GPU memory", "error", C.GoString(memInfo.err)) + C.free(unsafe.Pointer(memInfo.err)) + continue + } + + gpuInfo.TotalMemory = uint64(memInfo.total) + gpuInfo.FreeMemory = uint64(memInfo.free) + gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) + gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor) + gpuInfo.MinimumMemory = 0 + gpuInfo.DependencyPath = depPath + gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) + gpuInfo.DriverMajor = int(memInfo.major) + gpuInfo.DriverMinor = int(memInfo.minor) + + // TODO potentially sort on our own algorithm instead of what the underlying GPU library does... + resp = append(resp, gpuInfo) + } } // Then AMD @@ -379,6 +445,29 @@ func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) { return 0, nil, "" } +func LoadVulkanMgmt(vulkanLibPaths []string, capLibPaths []string) (int, *C.vk_handle_t, string, string) { + var resp C.vk_init_resp_t + for _, vkLibPath := range vulkanLibPaths { + for _, capLibPath := range capLibPaths { + vkLib := C.CString(vkLibPath) + capLib := C.CString(capLibPath) + defer C.free(unsafe.Pointer(vkLib)) + defer C.free(unsafe.Pointer(capLib)) + + C.vk_init(vkLib, capLib, &resp) + if resp.err != nil { + slog.Debug("Unable to load vulkan", "library", vkLibPath, "error", C.GoString(resp.err)) + slog.Debug("Unable to load libcap", "library", capLibPath, "error", C.GoString(resp.err)) + C.free(unsafe.Pointer(resp.err)) + } else { + return int(resp.num_devices), &resp.vk, vkLibPath, capLibPath + } + } + } + + return 0, nil, "", "" +} + func getVerboseState() C.uint16_t { if envconfig.Debug { return C.uint16_t(1) @@ -401,6 +490,8 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) { return rocmGetVisibleDevicesEnv(l) case "oneapi": return oneapiGetVisibleDevicesEnv(l) + case "vulkan": + return vkGetVisibleDevicesEnv(l) default: slog.Debug("no filter required for library " + l[0].Library) return "", "" diff --git a/gpu/gpu_vulkan.c b/gpu/gpu_vulkan.c index 39058cd75..bb45bdf21 100644 --- a/gpu/gpu_vulkan.c +++ b/gpu/gpu_vulkan.c @@ -22,18 +22,28 @@ int check_perfmon() { if (cap_free(caps) == -1) return -1; +#endif return 0; -#else - return 0; -#endif } -void vk_init(vk_init_resp_t *resp) { +void vk_init(char* vk_lib_path, char* cap_lib_path, vk_init_resp_t *resp) { + if (!LOAD_LIBRARY(vk_lib_path, RTLD_LAZY)) { + resp->err = "Failed to load Vulkan library"; + return; + } + +#ifdef __linux__ + if (!LOAD_LIBRARY(cap_lib_path, RTLD_LAZY)) { + resp->err = "Failed to load libcap library"; + return; + } + if (check_perfmon() != 0) { resp->err = "Performance monitoring is not allowed. Please enable CAP_PERFMON or run as root to use Vulkan."; return; } +#endif VkInstance instance; VkApplicationInfo appInfo = {}; @@ -123,4 +133,10 @@ void vk_check_vram(vk_handle_t rh, int i, mem_info_t *resp) { resp->major = VK_API_VERSION_MAJOR(properties.apiVersion); resp->minor = VK_API_VERSION_MINOR(properties.apiVersion); resp->patch = VK_API_VERSION_PATCH(properties.apiVersion); + } + +void vk_free(vk_handle_t rh) { + vkDestroyInstance(rh->oh, NULL); + free(rh); +} \ No newline at end of file diff --git a/gpu/gpu_vulkan.h b/gpu/gpu_vulkan.h index 61ebb1a57..e77ce554e 100644 --- a/gpu/gpu_vulkan.h +++ b/gpu/gpu_vulkan.h @@ -13,5 +13,6 @@ typedef struct vk_init_resp vk_handle_t oh; } vk_init_resp_t; -void vk_init(vk_init_resp_t *resp); +void vk_init(char* vk_lib_path, char* cap_lib_path, vk_init_resp_t *resp); void vk_check_vram(vk_handle_t rh, int i, mem_info_t *resp); +void vk_free(vk_handle_t rh); diff --git a/gpu/vulkan_common.go b/gpu/vulkan_common.go new file mode 100644 index 000000000..8d3d15d06 --- /dev/null +++ b/gpu/vulkan_common.go @@ -0,0 +1,19 @@ +package gpu + +import ( + "log/slog" + "strings" +) + +func vkGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { + ids := []string{} + for _, info := range gpuInfo { + if info.Library != "vulkan" { + // TODO shouldn't happen if things are wired correctly... + slog.Debug("vkGetVisibleDevicesEnv skipping over non-vulkan device", "library", info.Library) + continue + } + ids = append(ids, info.ID) + } + return "GGML_VK_VISIBLE_DEVICES", strings.Join(ids, ",") +}