updated converter

This commit is contained in:
nicole pardal 2025-11-26 19:42:34 -08:00 committed by ParthSareen
parent bdcf9e811b
commit 7505cd963e
3 changed files with 39 additions and 11 deletions

View File

@ -200,7 +200,7 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
conv = &qwen25VLModel{}
case "Qwen3VLForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration":
conv = &qwen3VLModel{}
case "OlmoForCausalLM", "OLMoForCausalLM", "OLMo3ForCausalLM":
case "OlmoForCausalLM", "OLMoForCausalLM", "OLMo3ForCausalLM", "Olmo3ForCausalLM":
conv = &olmoModel{}
case "BertModel":
conv = &bertModel{}

View File

@ -9,15 +9,17 @@ import (
type olmoModel struct {
ModelParameters
HiddenSize uint32 `json:"hidden_size"`
NumHiddenLayers uint32 `json:"num_hidden_layers"`
IntermediateSize uint32 `json:"intermediate_size"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
RMSNormEPS float32 `json:"rms_norm_eps"`
RopeTheta float32 `json:"rope_theta"`
ClampKQV float32 `json:"f_clamp_kqv"`
HiddenSize uint32 `json:"hidden_size"`
NumHiddenLayers uint32 `json:"num_hidden_layers"`
IntermediateSize uint32 `json:"intermediate_size"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
RMSNormEPS float32 `json:"rms_norm_eps"`
RopeTheta float32 `json:"rope_theta"`
ClampKQV float32 `json:"f_clamp_kqv"`
SlidingWindow uint32 `json:"sliding_window"`
LayerTypes []string `json:"layer_types"`
}
var _ ModelConverter = (*olmoModel)(nil)
@ -46,6 +48,14 @@ func (p *olmoModel) KV(t *Tokenizer) ggml.KV {
kv["olmo.attention.clamp_kqv"] = p.ClampKQV
}
if p.SlidingWindow > 0 {
kv["olmo.attention.sliding_window"] = p.SlidingWindow
}
if len(p.LayerTypes) > 0 {
kv["olmo.attention.layer_types"] = p.LayerTypes
}
return kv
}

View File

@ -30,6 +30,8 @@ type Model struct {
OutputNorm *nn.RMSNorm `gguf:"output_norm"`
Output *nn.Linear `gguf:"output,alt:token_embd"`
layerTypes []string
Options
}
@ -69,6 +71,7 @@ func New(c fs.Config) (model.Model, error) {
m := Model{
TextProcessor: processor,
Layers: make([]Layer, c.Uint("block_count")),
layerTypes: c.Strings("attention.layer_types"),
Options: Options{
hiddenSize: int(c.Uint("embedding_length")),
numHeads: int(c.Uint("attention.head_count")),
@ -82,7 +85,14 @@ func New(c fs.Config) (model.Model, error) {
},
}
m.Cache = kvcache.NewCausalCache(m.Shift)
if slidingWindow := c.Uint("attention.sliding_window"); slidingWindow > 0 {
m.Cache = kvcache.NewWrapperCache(
kvcache.NewSWACache(int32(slidingWindow), m.Shift),
kvcache.NewCausalCache(m.Shift),
)
} else {
m.Cache = kvcache.NewCausalCache(m.Shift)
}
return &m, nil
}
@ -170,6 +180,14 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
for i, layer := range m.Layers {
m.Cache.SetLayer(i)
if wc, ok := m.Cache.(*kvcache.WrapperCache); ok && len(m.layerTypes) > i {
if m.layerTypes[i] == "full_attention" {
wc.SetLayerType(1)
} else {
wc.SetLayerType(0)
}
}
var outputs ml.Tensor
if i == len(m.Layers)-1 {
outputs = batch.Outputs