llm: Enable flash attention by default for gemma3
This commit is contained in:
parent
0d713051a2
commit
c3c85aa06c
|
|
@ -893,6 +893,7 @@ func (f GGML) SupportsFlashAttention() bool {
|
||||||
// FlashAttention checks if the model should enable flash attention
|
// FlashAttention checks if the model should enable flash attention
|
||||||
func (f GGML) FlashAttention() bool {
|
func (f GGML) FlashAttention() bool {
|
||||||
return slices.Contains([]string{
|
return slices.Contains([]string{
|
||||||
|
"gemma3",
|
||||||
"gptoss", "gpt-oss",
|
"gptoss", "gpt-oss",
|
||||||
"qwen3",
|
"qwen3",
|
||||||
"qwen3moe",
|
"qwen3moe",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue