updated converter
This commit is contained in:
parent
cfc9729edf
commit
fd959fbf7a
|
|
@ -200,7 +200,7 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
|
|||
conv = &qwen25VLModel{}
|
||||
case "Qwen3VLForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration":
|
||||
conv = &qwen3VLModel{}
|
||||
case "OlmoForCausalLM", "OLMoForCausalLM", "OLMo3ForCausalLM":
|
||||
case "OlmoForCausalLM", "OLMoForCausalLM", "OLMo3ForCausalLM", "Olmo3ForCausalLM":
|
||||
conv = &olmoModel{}
|
||||
case "BertModel":
|
||||
conv = &bertModel{}
|
||||
|
|
|
|||
|
|
@ -9,15 +9,17 @@ import (
|
|||
type olmoModel struct {
|
||||
ModelParameters
|
||||
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
ClampKQV float32 `json:"f_clamp_kqv"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
ClampKQV float32 `json:"f_clamp_kqv"`
|
||||
SlidingWindow uint32 `json:"sliding_window"`
|
||||
LayerTypes []string `json:"layer_types"`
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*olmoModel)(nil)
|
||||
|
|
@ -46,6 +48,14 @@ func (p *olmoModel) KV(t *Tokenizer) ggml.KV {
|
|||
kv["olmo.attention.clamp_kqv"] = p.ClampKQV
|
||||
}
|
||||
|
||||
if p.SlidingWindow > 0 {
|
||||
kv["olmo.attention.sliding_window"] = p.SlidingWindow
|
||||
}
|
||||
|
||||
if len(p.LayerTypes) > 0 {
|
||||
kv["olmo.attention.layer_types"] = p.LayerTypes
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ type Model struct {
|
|||
OutputNorm *nn.RMSNorm `gguf:"output_norm"`
|
||||
Output *nn.Linear `gguf:"output,alt:token_embd"`
|
||||
|
||||
layerTypes []string
|
||||
|
||||
Options
|
||||
}
|
||||
|
||||
|
|
@ -69,6 +71,7 @@ func New(c fs.Config) (model.Model, error) {
|
|||
m := Model{
|
||||
TextProcessor: processor,
|
||||
Layers: make([]Layer, c.Uint("block_count")),
|
||||
layerTypes: c.Strings("attention.layer_types"),
|
||||
Options: Options{
|
||||
hiddenSize: int(c.Uint("embedding_length")),
|
||||
numHeads: int(c.Uint("attention.head_count")),
|
||||
|
|
@ -82,7 +85,14 @@ func New(c fs.Config) (model.Model, error) {
|
|||
},
|
||||
}
|
||||
|
||||
m.Cache = kvcache.NewCausalCache(m.Shift)
|
||||
if slidingWindow := c.Uint("attention.sliding_window"); slidingWindow > 0 {
|
||||
m.Cache = kvcache.NewWrapperCache(
|
||||
kvcache.NewSWACache(int32(slidingWindow), m.Shift),
|
||||
kvcache.NewCausalCache(m.Shift),
|
||||
)
|
||||
} else {
|
||||
m.Cache = kvcache.NewCausalCache(m.Shift)
|
||||
}
|
||||
|
||||
return &m, nil
|
||||
}
|
||||
|
|
@ -170,6 +180,14 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
|
|||
for i, layer := range m.Layers {
|
||||
m.Cache.SetLayer(i)
|
||||
|
||||
if wc, ok := m.Cache.(*kvcache.WrapperCache); ok && len(m.layerTypes) > i {
|
||||
if m.layerTypes[i] == "full_attention" {
|
||||
wc.SetLayerType(1)
|
||||
} else {
|
||||
wc.SetLayerType(0)
|
||||
}
|
||||
}
|
||||
|
||||
var outputs ml.Tensor
|
||||
if i == len(m.Layers)-1 {
|
||||
outputs = batch.Outputs
|
||||
|
|
|
|||
Loading…
Reference in New Issue