add support in gpu.go

This commit is contained in:
pufferffish 2024-06-15 05:27:14 +01:00
parent f46b4a6fa2
commit 9c6b049567
4 changed files with 133 additions and 6 deletions

View File

@ -29,6 +29,7 @@ type handles struct {
cudart *C.cudart_handle_t
nvcuda *C.nvcuda_handle_t
oneapi *C.oneapi_handle_t
vulkan *C.vk_handle_t
}
const (
@ -90,6 +91,16 @@ var OneapiLinuxGlobs = []string{
"/usr/lib*/libze_intel_gpu.so*",
}
var VulkanLinuxGlobs = []string{
"/usr/lib/x86_64-linux-gnu/libvulkan.so*",
"/usr/lib*/libvulkan.so*",
}
var CapLinuxGlobs = []string{
"/usr/lib/x86_64-linux-gnu/libcap.so*",
"/usr/lib*/libcap.so*",
}
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")
@ -104,6 +115,10 @@ func initGPUHandles() *handles {
var cudartMgmtPatterns []string
var nvcudaMgmtName string
var nvcudaMgmtPatterns []string
var vulkanMgmtName string
var vulkanMgmtPatterns []string
var libcapMgmtName string
var libcapMgmtPatterns []string
tmpDir, _ := PayloadsDir()
switch runtime.GOOS {
@ -125,6 +140,12 @@ func initGPUHandles() *handles {
// Aligned with driver, we can't carry as payloads
nvcudaMgmtName = "libcuda.so*"
nvcudaMgmtPatterns = NvcudaLinuxGlobs
// Vulkan also needs libcap
vulkanMgmtName = "libvulkan.so*"
vulkanMgmtPatterns = VulkanLinuxGlobs
libcapMgmtName = "libcap.so*"
libcapMgmtPatterns = CapLinuxGlobs
default:
return gpuHandles
}
@ -152,6 +173,25 @@ func initGPUHandles() *handles {
}
}
vulkanLibPaths := FindGPULibs(vulkanMgmtName, vulkanMgmtPatterns)
var libcapLibPaths []string
if runtime.GOOS == "linux" {
libcapLibPaths = FindGPULibs(libcapMgmtName, libcapMgmtPatterns)
} else {
libcapLibPaths = []string{"<unused>"}
}
if len(vulkanLibPaths) > 0 && len(libcapLibPaths) > 0 {
deviceCount, vulkan, vkLibPath, capLibPath := LoadVulkanMgmt(vulkanLibPaths, libcapLibPaths)
if vulkan != nil {
slog.Debug("detected GPUs", "library", vkLibPath, capLibPath, "count", deviceCount)
gpuHandles.vulkan = vulkan
gpuHandles.deviceCount = deviceCount
return gpuHandles
}
}
return gpuHandles
}
@ -186,7 +226,7 @@ func GetGPUInfo() GpuInfoList {
var memInfo C.mem_info_t
resp := []GpuInfo{}
// NVIDIA first
// NVIDIA and Vulkan first
for i := range gpuHandles.deviceCount {
// TODO once we support CPU compilation variants of GPU libraries refine this...
if cpuVariant == "" && runtime.GOARCH == "amd64" {
@ -227,6 +267,32 @@ func GetGPUInfo() GpuInfoList {
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
resp = append(resp, gpuInfo)
}
if gpuHandles.vulkan != nil {
gpuInfo := GpuInfo{
Library: "vulkan",
}
C.vk_check_vram(*gpuHandles.vulkan, C.int(i), &memInfo)
if memInfo.err != nil {
slog.Info("error looking up vulkan GPU memory", "error", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
continue
}
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
gpuInfo.MinimumMemory = 0
gpuInfo.DependencyPath = depPath
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DriverMajor = int(memInfo.major)
gpuInfo.DriverMinor = int(memInfo.minor)
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
resp = append(resp, gpuInfo)
}
}
// Then AMD
@ -379,6 +445,29 @@ func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
return 0, nil, ""
}
func LoadVulkanMgmt(vulkanLibPaths []string, capLibPaths []string) (int, *C.vk_handle_t, string, string) {
var resp C.vk_init_resp_t
for _, vkLibPath := range vulkanLibPaths {
for _, capLibPath := range capLibPaths {
vkLib := C.CString(vkLibPath)
capLib := C.CString(capLibPath)
defer C.free(unsafe.Pointer(vkLib))
defer C.free(unsafe.Pointer(capLib))
C.vk_init(vkLib, capLib, &resp)
if resp.err != nil {
slog.Debug("Unable to load vulkan", "library", vkLibPath, "error", C.GoString(resp.err))
slog.Debug("Unable to load libcap", "library", capLibPath, "error", C.GoString(resp.err))
C.free(unsafe.Pointer(resp.err))
} else {
return int(resp.num_devices), &resp.vk, vkLibPath, capLibPath
}
}
}
return 0, nil, "", ""
}
func getVerboseState() C.uint16_t {
if envconfig.Debug {
return C.uint16_t(1)
@ -401,6 +490,8 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
return rocmGetVisibleDevicesEnv(l)
case "oneapi":
return oneapiGetVisibleDevicesEnv(l)
case "vulkan":
return vkGetVisibleDevicesEnv(l)
default:
slog.Debug("no filter required for library " + l[0].Library)
return "", ""

View File

@ -22,18 +22,28 @@ int check_perfmon() {
if (cap_free(caps) == -1)
return -1;
#endif
return 0;
#else
return 0;
#endif
}
void vk_init(vk_init_resp_t *resp) {
void vk_init(char* vk_lib_path, char* cap_lib_path, vk_init_resp_t *resp) {
if (!LOAD_LIBRARY(vk_lib_path, RTLD_LAZY)) {
resp->err = "Failed to load Vulkan library";
return;
}
#ifdef __linux__
if (!LOAD_LIBRARY(cap_lib_path, RTLD_LAZY)) {
resp->err = "Failed to load libcap library";
return;
}
if (check_perfmon() != 0) {
resp->err = "Performance monitoring is not allowed. Please enable CAP_PERFMON or run as root to use Vulkan.";
return;
}
#endif
VkInstance instance;
VkApplicationInfo appInfo = {};
@ -123,4 +133,10 @@ void vk_check_vram(vk_handle_t rh, int i, mem_info_t *resp) {
resp->major = VK_API_VERSION_MAJOR(properties.apiVersion);
resp->minor = VK_API_VERSION_MINOR(properties.apiVersion);
resp->patch = VK_API_VERSION_PATCH(properties.apiVersion);
}
void vk_free(vk_handle_t rh) {
vkDestroyInstance(rh->oh, NULL);
free(rh);
}

View File

@ -13,5 +13,6 @@ typedef struct vk_init_resp
vk_handle_t oh;
} vk_init_resp_t;
void vk_init(vk_init_resp_t *resp);
void vk_init(char* vk_lib_path, char* cap_lib_path, vk_init_resp_t *resp);
void vk_check_vram(vk_handle_t rh, int i, mem_info_t *resp);
void vk_free(vk_handle_t rh);

19
gpu/vulkan_common.go Normal file
View File

@ -0,0 +1,19 @@
package gpu
import (
"log/slog"
"strings"
)
func vkGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
ids := []string{}
for _, info := range gpuInfo {
if info.Library != "vulkan" {
// TODO shouldn't happen if things are wired correctly...
slog.Debug("vkGetVisibleDevicesEnv skipping over non-vulkan device", "library", info.Library)
continue
}
ids = append(ids, info.ID)
}
return "GGML_VK_VISIBLE_DEVICES", strings.Join(ids, ",")
}