discover: CPU supports flash attention

We already run flash attention on CPUs in cases where we have partial offloading but were disabling it if running on pure CPU, which is unnecessary.
2025-08-11 14:45:45 -07:00 · 2025-08-11 14:45:45 -07:00 · 8ea0abf658
parent 257f0b6daa
commit 8ea0abf658
1 changed files with 2 additions and 1 deletions
--- a/discover/types.go
+++ b/discover/types.go
@ -171,7 +171,8 @@ func (si SystemInfo) GetOptimalThreadCount() int {
 // For each GPU, check if it does NOT support flash attention
 func (l GpuInfoList) FlashAttentionSupported() bool {
 	for _, gpu := range l {
-		supportsFA := gpu.Library == "metal" ||
+		supportsFA := gpu.Library == "cpu" ||
+			gpu.Library == "metal" ||
 			(gpu.Library == "cuda" && gpu.DriverMajor >= 7) ||
 			gpu.Library == "rocm"