diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go index 3f4374cd0..2eb1dbdde 100644 --- a/fs/ggml/ggml.go +++ b/fs/ggml/ggml.go @@ -175,6 +175,10 @@ func (kv KV) Bools(key string, defaultValue ...[]bool) []bool { } func (kv KV) OllamaEngineRequired() bool { + if kv.Uint("pooling_type") > 0 { + return false + } + return slices.Contains([]string{ "gemma3", "gemma3n", @@ -769,8 +773,7 @@ func (f GGML) SupportsKVCacheType(cacheType string) bool { // SupportsFlashAttention checks if the model supports flash attention func (f GGML) SupportsFlashAttention() bool { - _, isEmbedding := f.KV()[fmt.Sprintf("%s.pooling_type", f.KV().Architecture())] - if isEmbedding { + if f.KV().Uint("pooling_type") > 0 { return false }