discover: CPU supports flash attention
We already run flash attention on CPUs in cases where we have partial offloading but were disabling it if running on pure CPU, which is unnecessary.
This commit is contained in:
parent
257f0b6daa
commit
8ea0abf658
|
|
@ -171,7 +171,8 @@ func (si SystemInfo) GetOptimalThreadCount() int {
|
|||
// For each GPU, check if it does NOT support flash attention
|
||||
func (l GpuInfoList) FlashAttentionSupported() bool {
|
||||
for _, gpu := range l {
|
||||
supportsFA := gpu.Library == "metal" ||
|
||||
supportsFA := gpu.Library == "cpu" ||
|
||||
gpu.Library == "metal" ||
|
||||
(gpu.Library == "cuda" && gpu.DriverMajor >= 7) ||
|
||||
gpu.Library == "rocm"
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue