Remove flashattention setting gpu.go

This commit is contained in:
Thomas Stocker 2025-08-09 22:26:54 +02:00 committed by GitHub
parent 42463fbb7f
commit 57270767ac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 3 additions and 6 deletions

View File

@ -309,8 +309,7 @@ func GetGPUInfo() GpuInfoList {
slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
continue
}
gpuInfo.FlashAttention = driverMajor >= 7
}
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
@ -394,8 +393,7 @@ func GetGPUInfo() GpuInfoList {
C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
// TODO - convert this to MinimumMemory based on testing...
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo.free = C.uint64_t(totalFreeMem)
gpuInfo.FlashAttention = false
memInfo.free = C.uint64_t(totalFreeMem)
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
@ -424,8 +422,7 @@ func GetGPUInfo() GpuInfoList {
C.free(unsafe.Pointer(memInfo.err))
continue
}
gpuInfo.FlashAttention = (C.vk_check_flash_attention(*vHandles.vulkan, C.int(i)) == 0) // 0 means supported
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])