From fd959fbf7a6d2b4477585cde809d224b872a676d Mon Sep 17 00:00:00 2001
From: nicole pardal <nicolepardall@gmail.com>
Date: Wed, 26 Nov 2025 19:42:34 -0800
Subject: [PATCH] updated converter

---
 convert/convert.go         |  2 +-
 convert/convert_olmo.go    | 28 +++++++++++++++++++---------
 model/models/olmo/model.go | 20 +++++++++++++++++++-
 3 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/convert/convert.go b/convert/convert.go
index f5cef5567..cf0b0545d 100644
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -200,7 +200,7 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
 		conv = &qwen25VLModel{}
 	case "Qwen3VLForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration":
 		conv = &qwen3VLModel{}
-	case "OlmoForCausalLM", "OLMoForCausalLM", "OLMo3ForCausalLM":
+	case "OlmoForCausalLM", "OLMoForCausalLM", "OLMo3ForCausalLM", "Olmo3ForCausalLM":
 		conv = &olmoModel{}
 	case "BertModel":
 		conv = &bertModel{}
diff --git a/convert/convert_olmo.go b/convert/convert_olmo.go
index 848b39475..f62ba533e 100644
--- a/convert/convert_olmo.go
+++ b/convert/convert_olmo.go
@@ -9,15 +9,17 @@ import (
 type olmoModel struct {
 	ModelParameters
 
-	HiddenSize            uint32  `json:"hidden_size"`
-	NumHiddenLayers       uint32  `json:"num_hidden_layers"`
-	IntermediateSize      uint32  `json:"intermediate_size"`
-	NumAttentionHeads     uint32  `json:"num_attention_heads"`
-	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
-	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
-	RMSNormEPS            float32 `json:"rms_norm_eps"`
-	RopeTheta             float32 `json:"rope_theta"`
-	ClampKQV              float32 `json:"f_clamp_kqv"`
+	HiddenSize            uint32   `json:"hidden_size"`
+	NumHiddenLayers       uint32   `json:"num_hidden_layers"`
+	IntermediateSize      uint32   `json:"intermediate_size"`
+	NumAttentionHeads     uint32   `json:"num_attention_heads"`
+	NumKeyValueHeads      uint32   `json:"num_key_value_heads"`
+	MaxPositionEmbeddings uint32   `json:"max_position_embeddings"`
+	RMSNormEPS            float32  `json:"rms_norm_eps"`
+	RopeTheta             float32  `json:"rope_theta"`
+	ClampKQV              float32  `json:"f_clamp_kqv"`
+	SlidingWindow         uint32   `json:"sliding_window"`
+	LayerTypes            []string `json:"layer_types"`
 }
 
 var _ ModelConverter = (*olmoModel)(nil)
@@ -46,6 +48,14 @@ func (p *olmoModel) KV(t *Tokenizer) ggml.KV {
 		kv["olmo.attention.clamp_kqv"] = p.ClampKQV
 	}
 
+	if p.SlidingWindow > 0 {
+		kv["olmo.attention.sliding_window"] = p.SlidingWindow
+	}
+
+	if len(p.LayerTypes) > 0 {
+		kv["olmo.attention.layer_types"] = p.LayerTypes
+	}
+
 	return kv
 }
 
diff --git a/model/models/olmo/model.go b/model/models/olmo/model.go
index 2f891935c..698b9a614 100644
--- a/model/models/olmo/model.go
+++ b/model/models/olmo/model.go
@@ -30,6 +30,8 @@ type Model struct {
 	OutputNorm     *nn.RMSNorm   `gguf:"output_norm"`
 	Output         *nn.Linear    `gguf:"output,alt:token_embd"`
 
+	layerTypes []string
+
 	Options
 }
 
@@ -69,6 +71,7 @@ func New(c fs.Config) (model.Model, error) {
 	m := Model{
 		TextProcessor: processor,
 		Layers:        make([]Layer, c.Uint("block_count")),
+		layerTypes:    c.Strings("attention.layer_types"),
 		Options: Options{
 			hiddenSize: int(c.Uint("embedding_length")),
 			numHeads:   int(c.Uint("attention.head_count")),
@@ -82,7 +85,14 @@ func New(c fs.Config) (model.Model, error) {
 		},
 	}
 
-	m.Cache = kvcache.NewCausalCache(m.Shift)
+	if slidingWindow := c.Uint("attention.sliding_window"); slidingWindow > 0 {
+		m.Cache = kvcache.NewWrapperCache(
+			kvcache.NewSWACache(int32(slidingWindow), m.Shift),
+			kvcache.NewCausalCache(m.Shift),
+		)
+	} else {
+		m.Cache = kvcache.NewCausalCache(m.Shift)
+	}
 
 	return &m, nil
 }
@@ -170,6 +180,14 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
 	for i, layer := range m.Layers {
 		m.Cache.SetLayer(i)
 
+		if wc, ok := m.Cache.(*kvcache.WrapperCache); ok && len(m.layerTypes) > i {
+			if m.layerTypes[i] == "full_attention" {
+				wc.SetLayerType(1)
+			} else {
+				wc.SetLayerType(0)
+			}
+		}
+
 		var outputs ml.Tensor
 		if i == len(m.Layers)-1 {
 			outputs = batch.Outputs