discover: CPU supports flash attention
We already run flash attention on CPUs in cases where we have partial offloading but were disabling it if running on pure CPU, which is unnecessary.
This commit is contained in:
parent
dbfd7bd027
commit
8f4ec9ab28
|
|
@ -171,7 +171,8 @@ func (si SystemInfo) GetOptimalThreadCount() int {
|
|||
// For each GPU, check if it does NOT support flash attention
|
||||
func (l GpuInfoList) FlashAttentionSupported() bool {
|
||||
for _, gpu := range l {
|
||||
supportsFA := gpu.Library == "metal" ||
|
||||
supportsFA := gpu.Library == "cpu" ||
|
||||
gpu.Library == "metal" ||
|
||||
(gpu.Library == "cuda" && gpu.DriverMajor >= 7) ||
|
||||
gpu.Library == "rocm"
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue