discover: CPU supports flash attention

We already run flash attention on CPUs in cases where we have
partial offloading but were disabling it if running on pure CPU,
 which is unnecessary.
This commit is contained in:
Jesse Gross 2025-08-11 14:45:45 -07:00 committed by Ryan Schumacher
parent 257f0b6daa
commit 8ea0abf658
No known key found for this signature in database
1 changed files with 2 additions and 1 deletions

View File

@ -171,7 +171,8 @@ func (si SystemInfo) GetOptimalThreadCount() int {
// For each GPU, check if it does NOT support flash attention
func (l GpuInfoList) FlashAttentionSupported() bool {
for _, gpu := range l {
supportsFA := gpu.Library == "metal" ||
supportsFA := gpu.Library == "cpu" ||
gpu.Library == "metal" ||
(gpu.Library == "cuda" && gpu.DriverMajor >= 7) ||
gpu.Library == "rocm"