llm: Enable flash attention by default for qwen3 and qwen3moe
This commit is contained in:
parent
55ca827267
commit
0bda72892c
|
|
@ -899,6 +899,8 @@ func (f GGML) SupportsFlashAttention() bool {
|
|||
func (f GGML) FlashAttention() bool {
|
||||
return slices.Contains([]string{
|
||||
"gptoss", "gpt-oss",
|
||||
"qwen3",
|
||||
"qwen3moe",
|
||||
}, f.KV().String("general.architecture"))
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue