update gpu.go

This commit is contained in:
pufferffish 2024-06-15 12:05:01 +01:00
parent e77ea68e11
commit 18f3f960b0
4 changed files with 441 additions and 228 deletions

View File

@ -24,20 +24,45 @@ import (
"github.com/ollama/ollama/format"
)
type handles struct {
type cudaHandles struct {
deviceCount int
cudart *C.cudart_handle_t
nvcuda *C.nvcuda_handle_t
nvml *C.nvml_handle_t
}
type oneapiHandles struct {
oneapi *C.oneapi_handle_t
vulkan *C.vk_handle_t
deviceCount int
}
type vulkanHandles struct {
vulkan *C.vulkan_handle_t
deviceCount int
}
const (
cudaMinimumMemory = 457 * format.MebiByte
rocmMinimumMemory = 457 * format.MebiByte
// TODO OneAPI minimum memory
)
var gpuMutex sync.Mutex
var (
gpuMutex sync.Mutex
bootstrapped bool
cpuCapability CPUCapability
cpus []CPUInfo
cudaGPUs []CudaGPUInfo
nvcudaLibPath string
cudartLibPath string
oneapiLibPath string
vulkanLibPath string
libcapLibPath string
nvmlLibPath string
rocmGPUs []RocmGPUInfo
oneapiGPUs []OneapiGPUInfo
vulkanGPUs []VulkanGPUInfo
)
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
@ -47,152 +72,133 @@ var RocmComputeMin = 9
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
var CudartLinuxGlobs = []string{
"/usr/local/cuda/lib64/libcudart.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
"/usr/lib/x86_64-linux-gnu/libcudart.so*",
"/usr/lib/wsl/lib/libcudart.so*",
"/usr/lib/wsl/drivers/*/libcudart.so*",
"/opt/cuda/lib64/libcudart.so*",
"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
"/usr/lib/aarch64-linux-gnu/libcudart.so*",
"/usr/local/cuda/lib*/libcudart.so*",
"/usr/lib*/libcudart.so*",
"/usr/local/lib*/libcudart.so*",
}
var CudartWindowsGlobs = []string{
"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
}
var NvcudaLinuxGlobs = []string{
"/usr/local/cuda*/targets/*/lib/libcuda.so*",
"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
"/usr/lib/*-linux-gnu/libcuda.so*",
"/usr/lib/wsl/lib/libcuda.so*",
"/usr/lib/wsl/drivers/*/libcuda.so*",
"/opt/cuda/lib*/libcuda.so*",
"/usr/local/cuda/lib*/libcuda.so*",
"/usr/lib*/libcuda.so*",
"/usr/local/lib*/libcuda.so*",
}
var NvcudaWindowsGlobs = []string{
"c:\\windows\\system*\\nvcuda.dll",
}
var OneapiWindowsGlobs = []string{
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
}
var OneapiLinuxGlobs = []string{
"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
"/usr/lib*/libze_intel_gpu.so*",
}
var VulkanLinuxGlobs = []string{
"/usr/lib/x86_64-linux-gnu/libvulkan.so*",
"/usr/lib*/libvulkan.so*",
}
var CapLinuxGlobs = []string{
"/usr/lib/x86_64-linux-gnu/libcap.so*",
"/usr/lib*/libcap.so*",
}
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")
// Note: gpuMutex must already be held
func initGPUHandles() *handles {
func initCudaHandles() *cudaHandles {
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
gpuHandles := &handles{}
var cudartMgmtName string
var cudartMgmtPatterns []string
var nvcudaMgmtName string
var nvcudaMgmtPatterns []string
var vulkanMgmtName string
var vulkanMgmtPatterns []string
var libcapMgmtName string
var libcapMgmtPatterns []string
tmpDir, _ := PayloadsDir()
switch runtime.GOOS {
case "windows":
cudartMgmtName = "cudart64_*.dll"
localAppData := os.Getenv("LOCALAPPDATA")
cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
// Aligned with driver, we can't carry as payloads
nvcudaMgmtName = "nvcuda.dll"
nvcudaMgmtPatterns = NvcudaWindowsGlobs
case "linux":
cudartMgmtName = "libcudart.so*"
if tmpDir != "" {
// TODO - add "payloads" for subprocess
cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
}
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
// Aligned with driver, we can't carry as payloads
nvcudaMgmtName = "libcuda.so*"
nvcudaMgmtPatterns = NvcudaLinuxGlobs
// Vulkan also needs libcap
vulkanMgmtName = "libvulkan.so*"
vulkanMgmtPatterns = VulkanLinuxGlobs
libcapMgmtName = "libcap.so*"
libcapMgmtPatterns = CapLinuxGlobs
default:
return gpuHandles
cHandles := &cudaHandles{}
// Short Circuit if we already know which library to use
if nvmlLibPath != "" {
cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
return cHandles
}
if nvcudaLibPath != "" {
cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
return cHandles
}
if cudartLibPath != "" {
cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
return cHandles
}
slog.Debug("Detecting GPUs")
nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
slog.Debug("searching for GPU discovery libraries for NVIDIA")
var cudartMgmtPatterns []string
// Aligned with driver, we can't carry as payloads
nvcudaMgmtPatterns := NvcudaGlobs
if runtime.GOOS == "windows" {
localAppData := os.Getenv("LOCALAPPDATA")
cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
}
tmpDir, _ := PayloadsDir()
if tmpDir != "" {
// TODO - add "payloads" for subprocess
cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", CudartMgmtName)}
}
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
if len(NvmlGlobs) > 0 {
nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
if len(nvmlLibPaths) > 0 {
nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
if nvml != nil {
slog.Debug("nvidia-ml loaded", "library", libPath)
cHandles.nvml = nvml
nvmlLibPath = libPath
}
}
}
nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
if len(nvcudaLibPaths) > 0 {
deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
if nvcuda != nil {
slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
gpuHandles.nvcuda = nvcuda
gpuHandles.deviceCount = deviceCount
return gpuHandles
cHandles.nvcuda = nvcuda
cHandles.deviceCount = deviceCount
nvcudaLibPath = libPath
return cHandles
}
}
cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
if len(cudartLibPaths) > 0 {
deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
if cudart != nil {
slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
gpuHandles.cudart = cudart
gpuHandles.deviceCount = deviceCount
return gpuHandles
cHandles.cudart = cudart
cHandles.deviceCount = deviceCount
cudartLibPath = libPath
return cHandles
}
}
vulkanLibPaths := FindGPULibs(vulkanMgmtName, vulkanMgmtPatterns)
return cHandles
}
var libcapLibPaths []string
if runtime.GOOS == "linux" {
libcapLibPaths = FindGPULibs(libcapMgmtName, libcapMgmtPatterns)
// Note: gpuMutex must already be held
func initOneAPIHandles() *oneapiHandles {
oHandles := &oneapiHandles{}
// Short Circuit if we already know which library to use
if oneapiLibPath != "" {
oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
return oHandles
}
oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
if len(oneapiLibPaths) > 0 {
oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
}
return oHandles
}
// Note: gpuMutex must already be held
func initVulkanHandles() *vulkanHandles {
vHandles := &vulkanHandles{}
// Short Circuit if we already know which library to use
if vulkanLibPath != "" && libcapLibPath != "" {
vHandles.deviceCount, vHandles.vulkan, _, _ = LoadVulkanMgmt([]string{vulkanLibPath}, []string{libcapLibPath})
return vHandles
}
vulkanPaths := FindGPULibs(VulkanMgmtName, VulkanGlobs)
libcapPaths := FindLibCapLibs()
if len(vulkanPaths) > 0 && len(libcapPaths) > 0 {
vHandles.deviceCount, vHandles.vulkan, vulkanLibPath, libcapLibPath = LoadVulkanMgmt(vulkanPaths, libcapPaths)
}
return vHandles
}
func GetCPUInfo() GpuInfoList {
gpuMutex.Lock()
if !bootstrapped {
gpuMutex.Unlock()
GetGPUInfo()
} else {
libcapLibPaths = []string{"<unused>"}
gpuMutex.Unlock()
}
if len(vulkanLibPaths) > 0 && len(libcapLibPaths) > 0 {
deviceCount, vulkan, vkLibPath, capLibPath := LoadVulkanMgmt(vulkanLibPaths, libcapLibPaths)
if vulkan != nil {
slog.Debug("detected GPUs", "library", vkLibPath, capLibPath, "count", deviceCount)
gpuHandles.vulkan = vulkan
gpuHandles.deviceCount = deviceCount
return gpuHandles
}
}
return gpuHandles
return GpuInfoList{cpus[0].GpuInfo}
}
func GetGPUInfo() GpuInfoList {
@ -200,141 +206,300 @@ func GetGPUInfo() GpuInfoList {
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
gpuMutex.Lock()
defer gpuMutex.Unlock()
gpuHandles := initGPUHandles()
needRefresh := true
var cHandles *cudaHandles
var oHandles *oneapiHandles
var vHandles *vulkanHandles
defer func() {
if gpuHandles.cudart != nil {
C.cudart_release(*gpuHandles.cudart)
if cHandles != nil {
if cHandles.cudart != nil {
C.cudart_release(*cHandles.cudart)
}
if cHandles.nvcuda != nil {
C.nvcuda_release(*cHandles.nvcuda)
}
if cHandles.nvml != nil {
C.nvml_release(*cHandles.nvml)
}
}
if gpuHandles.nvcuda != nil {
C.nvcuda_release(*gpuHandles.nvcuda)
}
if gpuHandles.vulkan != nil {
C.vk_release(*gpuHandles.vulkan)
if oHandles != nil {
if oHandles.oneapi != nil {
// TODO - is this needed?
C.oneapi_release(*oHandles.oneapi)
}
}
}()
// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
cpuVariant := GetCPUVariant()
if cpuVariant == "" && runtime.GOARCH == "amd64" {
slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
}
if !bootstrapped {
slog.Debug("Detecting GPUs")
needRefresh = false
cpuCapability = GetCPUCapability()
var memInfo C.mem_info_t
// On windows we bundle the nvidia library one level above the runner dir
depPath := ""
if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
depPath = filepath.Dir(envconfig.RunnersDir)
}
var memInfo C.mem_info_t
resp := []GpuInfo{}
// NVIDIA and Vulkan first
for i := range gpuHandles.deviceCount {
// TODO once we support CPU compilation variants of GPU libraries refine this...
if cpuVariant == "" && runtime.GOARCH == "amd64" {
continue
mem, err := GetCPUMem()
if err != nil {
slog.Warn("error looking up system memory", "error", err)
}
if gpuHandles.cudart != nil || gpuHandles.nvcuda != nil {
gpuInfo := GpuInfo{
Library: "cuda",
cpus = []CPUInfo{CPUInfo{
GpuInfo: GpuInfo{
memInfo: mem,
Library: "cpu",
Variant: cpuCapability,
ID: "0",
},
}}
// Fallback to CPU mode if we're lacking required vector extensions on x86
if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability, "detected", cpuCapability)
bootstrapped = true
// No need to do any GPU discovery, since we can't run on them
return GpuInfoList{cpus[0].GpuInfo}
}
// On windows we bundle the nvidia library one level above the runner dir
depPath := ""
if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
depPath = filepath.Dir(envconfig.RunnersDir)
}
// Load ALL libraries
cHandles = initCudaHandles()
// NVIDIA
for i := range cHandles.deviceCount {
if cHandles.cudart != nil || cHandles.nvcuda != nil {
gpuInfo := CudaGPUInfo{
GpuInfo: GpuInfo{
Library: "cuda",
},
index: i,
}
var driverMajor int
var driverMinor int
if cHandles.cudart != nil {
C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
} else {
C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
driverMajor = int(cHandles.nvcuda.driver_major)
driverMinor = int(cHandles.nvcuda.driver_minor)
}
if memInfo.err != nil {
slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
continue
}
if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
continue
}
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
gpuInfo.MinimumMemory = cudaMinimumMemory
gpuInfo.DependencyPath = depPath
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DriverMajor = driverMajor
gpuInfo.DriverMinor = driverMinor
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
cudaGPUs = append(cudaGPUs, gpuInfo)
}
var driverMajor int
var driverMinor int
if gpuHandles.cudart != nil {
C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
}
// Intel
oHandles = initOneAPIHandles()
for d := 0; oHandles.oneapi != nil && d < int(oHandles.oneapi.num_drivers); d++ {
if oHandles.oneapi == nil {
// shouldn't happen
slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
continue
}
devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
for i := range devCount {
gpuInfo := OneapiGPUInfo{
GpuInfo: GpuInfo{
Library: "oneapi",
},
driverIndex: d,
gpuIndex: int(i),
}
// TODO - split bootstrapping from updating free memory
C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
// TODO - convert this to MinimumMemory based on testing...
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo.free = C.uint64_t(totalFreeMem)
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
// TODO dependency path?
oneapiGPUs = append(oneapiGPUs, gpuInfo)
}
}
// Vulkan
vHandles = initVulkanHandles()
for i := range vHandles.deviceCount {
if vHandles.vulkan != nil {
gpuInfo := VulkanGPUInfo{
GpuInfo: GpuInfo{
Library: "vulkan",
},
index: i,
}
C.vk_check_vram(*vHandles.vulkan, C.int(i), &memInfo)
if memInfo.err != nil {
slog.Info("error looking up vulkan GPU memory", "error", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
continue
}
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
gpuInfo.MinimumMemory = 0
gpuInfo.DependencyPath = depPath
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DriverMajor = int(memInfo.major)
gpuInfo.DriverMinor = int(memInfo.minor)
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
vulkanGPUs = append(vulkanGPUs, gpuInfo)
}
}
rocmGPUs = AMDGetGPUInfo()
bootstrapped = true
}
// For detected GPUs, load library if not loaded
// Refresh free memory usage
if needRefresh {
mem, err := GetCPUMem()
if err != nil {
slog.Warn("error looking up system memory", "error", err)
} else {
slog.Debug("updating system memory data",
slog.Group(
"before",
"total", format.HumanBytes2(cpus[0].TotalMemory),
"free", format.HumanBytes2(cpus[0].FreeMemory),
),
slog.Group(
"now",
"total", format.HumanBytes2(mem.TotalMemory),
"free", format.HumanBytes2(mem.FreeMemory),
),
)
cpus[0].FreeMemory = mem.FreeMemory
}
var memInfo C.mem_info_t
if cHandles == nil && len(cudaGPUs) > 0 {
cHandles = initCudaHandles()
}
for i, gpu := range cudaGPUs {
if cHandles.nvml != nil {
C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
} else if cHandles.cudart != nil {
C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
} else if cHandles.nvcuda != nil {
C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
memInfo.used = memInfo.total - memInfo.free
} else {
C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
driverMajor = int(gpuHandles.nvcuda.driver_major)
driverMinor = int(gpuHandles.nvcuda.driver_minor)
// shouldn't happen
slog.Warn("no valid cuda library loaded to refresh vram usage")
break
}
if memInfo.err != nil {
slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
continue
}
if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
if memInfo.free == 0 {
slog.Warn("error looking up nvidia GPU memory")
continue
}
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
gpuInfo.MinimumMemory = cudaMinimumMemory
gpuInfo.DependencyPath = depPath
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DriverMajor = driverMajor
gpuInfo.DriverMinor = driverMinor
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
resp = append(resp, gpuInfo)
slog.Debug("updating cuda memory data",
"gpu", gpu.ID,
"name", gpu.Name,
slog.Group(
"before",
"total", format.HumanBytes2(gpu.TotalMemory),
"free", format.HumanBytes2(gpu.FreeMemory),
),
slog.Group(
"now",
"total", format.HumanBytes2(uint64(memInfo.total)),
"free", format.HumanBytes2(uint64(memInfo.free)),
"used", format.HumanBytes2(uint64(memInfo.used)),
),
)
cudaGPUs[i].FreeMemory = uint64(memInfo.free)
}
if gpuHandles.vulkan != nil {
gpuInfo := GpuInfo{
Library: "vulkan",
}
C.vk_check_vram(*gpuHandles.vulkan, C.int(i), &memInfo)
if memInfo.err != nil {
slog.Info("error looking up vulkan GPU memory", "error", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
if oHandles == nil && len(oneapiGPUs) > 0 {
oHandles = initOneAPIHandles()
}
for i, gpu := range oneapiGPUs {
if oHandles.oneapi == nil {
// shouldn't happen
slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
continue
}
C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
// TODO - convert this to MinimumMemory based on testing...
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo.free = C.uint64_t(totalFreeMem)
oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
}
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
gpuInfo.MinimumMemory = 0
gpuInfo.DependencyPath = depPath
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DriverMajor = int(memInfo.major)
gpuInfo.DriverMinor = int(memInfo.minor)
if vHandles == nil && len(vulkanGPUs) > 0 {
vHandles = initVulkanHandles()
}
for i, gpu := range vulkanGPUs {
if vHandles.vulkan == nil {
// shouldn't happen
slog.Warn("nil vulkan handle with device count", "count", oHandles.deviceCount)
continue
}
C.vk_check_vram(*vHandles.vulkan, C.int(gpu.index), &memInfo)
// TODO - convert this to MinimumMemory based on testing...
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo.free = C.uint64_t(totalFreeMem)
vulkanGPUs[i].FreeMemory = uint64(memInfo.free)
}
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
resp = append(resp, gpuInfo)
err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
if err != nil {
slog.Debug("problem refreshing ROCm free memory", "error", err)
}
}
// Then AMD
resp = append(resp, AMDGetGPUInfo()...)
resp := []GpuInfo{}
for _, gpu := range cudaGPUs {
resp = append(resp, gpu.GpuInfo)
}
for _, gpu := range rocmGPUs {
resp = append(resp, gpu.GpuInfo)
}
for _, gpu := range oneapiGPUs {
resp = append(resp, gpu.GpuInfo)
}
for _, gpu := range vulkanGPUs {
resp = append(resp, gpu.GpuInfo)
}
if len(resp) == 0 {
C.cpu_check_ram(&memInfo)
if memInfo.err != nil {
slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
return resp
}
gpuInfo := GpuInfo{
Library: "cpu",
Variant: cpuVariant,
}
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
resp = append(resp, gpuInfo)
resp = append(resp, cpus[0].GpuInfo)
}
return resp
}
func GetCPUMem() (memInfo, error) {
var ret memInfo
var info C.mem_info_t
C.cpu_check_ram(&info)
if info.err != nil {
defer C.free(unsafe.Pointer(info.err))
return ret, fmt.Errorf(C.GoString(info.err))
}
ret.FreeMemory = uint64(info.free)
ret.TotalMemory = uint64(info.total)
return ret, nil
}
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
var ldPaths []string
@ -431,8 +596,26 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
return 0, nil, ""
}
func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
var resp C.nvml_init_resp_t
resp.ch.verbose = getVerboseState()
for _, libPath := range nvmlLibPaths {
lib := C.CString(libPath)
defer C.free(unsafe.Pointer(lib))
C.nvml_init(lib, &resp)
if resp.err != nil {
slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
C.free(unsafe.Pointer(resp.err))
} else {
return &resp.ch, libPath
}
}
return nil, ""
}
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
var resp C.oneapi_init_resp_t
num_devices := 0
resp.oh.verbose = getVerboseState()
for _, libPath := range oneapiLibPaths {
lib := C.CString(libPath)
@ -442,7 +625,10 @@ func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
C.free(unsafe.Pointer(resp.err))
} else {
return int(resp.num_devices), &resp.oh, libPath
for i := range resp.oh.num_drivers {
num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
}
return num_devices, &resp.oh, libPath
}
}
return 0, nil, ""

View File

@ -43,10 +43,26 @@ var OneapiGlobs = []string{
"/usr/lib*/libze_intel_gpu.so*",
}
var VulkanGlobs = []string{
"/usr/lib/x86_64-linux-gnu/libvulkan.so*",
"/usr/lib*/libvulkan.so*",
}
var capLinuxGlobs = []string{
"/usr/lib/x86_64-linux-gnu/libcap.so*",
"/usr/lib*/libcap.so*",
}
var CudartMgmtName = "libcudart.so*"
var NvcudaMgmtName = "libcuda.so*"
var NvmlMgmtName = "" // not currently wired on linux
var OneapiMgmtName = "libze_intel_gpu.so"
var VulkanMgmtName = "libvulkan.so*"
var libcapMgmtName = "libcap.so*"
func FindLibCapLibs() []string {
return FindGPULibs(libcapMgmtName, capLinuxGlobs)
}
func GetCPUMem() (memInfo, error) {
var mem memInfo

View File

@ -45,6 +45,10 @@ var NvcudaMgmtName = "nvcuda.dll"
var NvmlMgmtName = "nvml.dll"
var OneapiMgmtName = "ze_intel_gpu64.dll"
func FindLibCapLibs() []string {
return []string{"<unused>"}
}
func GetCPUMem() (memInfo, error) {
memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
r1, _, err := globalMemoryStatusExProc.Call(uintptr(unsafe.Pointer(&memStatus)))

View File

@ -62,6 +62,13 @@ type OneapiGPUInfo struct {
}
type OneapiGPUInfoList []OneapiGPUInfo
type VulkanGPUInfo struct {
GpuInfo
index int
}
type VulkanGPUInfoList []VulkanGPUInfo
type GpuInfoList []GpuInfo
// Split up the set of gpu info's by Library and variant