From fd959fbf7a6d2b4477585cde809d224b872a676d Mon Sep 17 00:00:00 2001 From: nicole pardal Date: Wed, 26 Nov 2025 19:42:34 -0800 Subject: [PATCH] updated converter --- convert/convert.go | 2 +- convert/convert_olmo.go | 28 +++++++++++++++++++--------- model/models/olmo/model.go | 20 +++++++++++++++++++- 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/convert/convert.go b/convert/convert.go index f5cef5567..cf0b0545d 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -200,7 +200,7 @@ func ConvertModel(fsys fs.FS, f *os.File) error { conv = &qwen25VLModel{} case "Qwen3VLForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration": conv = &qwen3VLModel{} - case "OlmoForCausalLM", "OLMoForCausalLM", "OLMo3ForCausalLM": + case "OlmoForCausalLM", "OLMoForCausalLM", "OLMo3ForCausalLM", "Olmo3ForCausalLM": conv = &olmoModel{} case "BertModel": conv = &bertModel{} diff --git a/convert/convert_olmo.go b/convert/convert_olmo.go index 848b39475..f62ba533e 100644 --- a/convert/convert_olmo.go +++ b/convert/convert_olmo.go @@ -9,15 +9,17 @@ import ( type olmoModel struct { ModelParameters - HiddenSize uint32 `json:"hidden_size"` - NumHiddenLayers uint32 `json:"num_hidden_layers"` - IntermediateSize uint32 `json:"intermediate_size"` - NumAttentionHeads uint32 `json:"num_attention_heads"` - NumKeyValueHeads uint32 `json:"num_key_value_heads"` - MaxPositionEmbeddings uint32 `json:"max_position_embeddings"` - RMSNormEPS float32 `json:"rms_norm_eps"` - RopeTheta float32 `json:"rope_theta"` - ClampKQV float32 `json:"f_clamp_kqv"` + HiddenSize uint32 `json:"hidden_size"` + NumHiddenLayers uint32 `json:"num_hidden_layers"` + IntermediateSize uint32 `json:"intermediate_size"` + NumAttentionHeads uint32 `json:"num_attention_heads"` + NumKeyValueHeads uint32 `json:"num_key_value_heads"` + MaxPositionEmbeddings uint32 `json:"max_position_embeddings"` + RMSNormEPS float32 `json:"rms_norm_eps"` + RopeTheta float32 `json:"rope_theta"` + ClampKQV float32 `json:"f_clamp_kqv"` + SlidingWindow uint32 `json:"sliding_window"` + LayerTypes []string `json:"layer_types"` } var _ ModelConverter = (*olmoModel)(nil) @@ -46,6 +48,14 @@ func (p *olmoModel) KV(t *Tokenizer) ggml.KV { kv["olmo.attention.clamp_kqv"] = p.ClampKQV } + if p.SlidingWindow > 0 { + kv["olmo.attention.sliding_window"] = p.SlidingWindow + } + + if len(p.LayerTypes) > 0 { + kv["olmo.attention.layer_types"] = p.LayerTypes + } + return kv } diff --git a/model/models/olmo/model.go b/model/models/olmo/model.go index 2f891935c..698b9a614 100644 --- a/model/models/olmo/model.go +++ b/model/models/olmo/model.go @@ -30,6 +30,8 @@ type Model struct { OutputNorm *nn.RMSNorm `gguf:"output_norm"` Output *nn.Linear `gguf:"output,alt:token_embd"` + layerTypes []string + Options } @@ -69,6 +71,7 @@ func New(c fs.Config) (model.Model, error) { m := Model{ TextProcessor: processor, Layers: make([]Layer, c.Uint("block_count")), + layerTypes: c.Strings("attention.layer_types"), Options: Options{ hiddenSize: int(c.Uint("embedding_length")), numHeads: int(c.Uint("attention.head_count")), @@ -82,7 +85,14 @@ func New(c fs.Config) (model.Model, error) { }, } - m.Cache = kvcache.NewCausalCache(m.Shift) + if slidingWindow := c.Uint("attention.sliding_window"); slidingWindow > 0 { + m.Cache = kvcache.NewWrapperCache( + kvcache.NewSWACache(int32(slidingWindow), m.Shift), + kvcache.NewCausalCache(m.Shift), + ) + } else { + m.Cache = kvcache.NewCausalCache(m.Shift) + } return &m, nil } @@ -170,6 +180,14 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) { for i, layer := range m.Layers { m.Cache.SetLayer(i) + if wc, ok := m.Cache.(*kvcache.WrapperCache); ok && len(m.layerTypes) > i { + if m.layerTypes[i] == "full_attention" { + wc.SetLayerType(1) + } else { + wc.SetLayerType(0) + } + } + var outputs ml.Tensor if i == len(m.Layers)-1 { outputs = batch.Outputs