fixed converter
This commit is contained in:
parent
b6f769ae60
commit
29a2d6d931
|
|
@ -6,6 +6,16 @@ import (
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/fs/ggml"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type ropeScaling struct {
|
||||||
|
Factor float32 `json:"factor"`
|
||||||
|
OriginalMaxPositionEmbeds uint32 `json:"original_max_position_embeddings"`
|
||||||
|
AttentionFactor float32 `json:"attention_factor"`
|
||||||
|
BetaFast float32 `json:"beta_fast"`
|
||||||
|
BetaSlow float32 `json:"beta_slow"`
|
||||||
|
RopeType string `json:"rope_type"`
|
||||||
|
ExtrapolationFactor float32 `json:"extrapolation_factor"`
|
||||||
|
}
|
||||||
|
|
||||||
type olmoModel struct {
|
type olmoModel struct {
|
||||||
ModelParameters
|
ModelParameters
|
||||||
|
|
||||||
|
|
@ -17,6 +27,7 @@ type olmoModel struct {
|
||||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||||
RopeTheta float32 `json:"rope_theta"`
|
RopeTheta float32 `json:"rope_theta"`
|
||||||
|
RopeScaling *ropeScaling `json:"rope_scaling"`
|
||||||
ClampKQV float32 `json:"f_clamp_kqv"`
|
ClampKQV float32 `json:"f_clamp_kqv"`
|
||||||
SlidingWindow uint32 `json:"sliding_window"`
|
SlidingWindow uint32 `json:"sliding_window"`
|
||||||
LayerTypes []string `json:"layer_types"`
|
LayerTypes []string `json:"layer_types"`
|
||||||
|
|
@ -26,34 +37,53 @@ var _ ModelConverter = (*olmoModel)(nil)
|
||||||
|
|
||||||
func (p *olmoModel) KV(t *Tokenizer) ggml.KV {
|
func (p *olmoModel) KV(t *Tokenizer) ggml.KV {
|
||||||
kv := p.ModelParameters.KV(t)
|
kv := p.ModelParameters.KV(t)
|
||||||
kv["general.architecture"] = "olmo"
|
kv["general.architecture"] = "olmo2"
|
||||||
kv["olmo.block_count"] = p.NumHiddenLayers
|
kv["olmo2.block_count"] = p.NumHiddenLayers
|
||||||
kv["olmo.context_length"] = p.MaxPositionEmbeddings
|
kv["olmo2.context_length"] = p.MaxPositionEmbeddings
|
||||||
kv["olmo.embedding_length"] = p.HiddenSize
|
kv["olmo2.embedding_length"] = p.HiddenSize
|
||||||
kv["olmo.feed_forward_length"] = p.IntermediateSize
|
kv["olmo2.feed_forward_length"] = p.IntermediateSize
|
||||||
kv["olmo.attention.head_count"] = p.NumAttentionHeads
|
kv["olmo2.attention.head_count"] = p.NumAttentionHeads
|
||||||
kv["olmo.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
kv["olmo2.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||||
|
|
||||||
if p.RopeTheta > 0 {
|
if p.RopeTheta > 0 {
|
||||||
kv["olmo.rope.freq_base"] = p.RopeTheta
|
kv["olmo2.rope.freq_base"] = p.RopeTheta
|
||||||
} else {
|
} else {
|
||||||
kv["olmo.rope.freq_base"] = float32(10000.0)
|
kv["olmo2.rope.freq_base"] = float32(10000.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.RopeScaling != nil {
|
||||||
|
if p.RopeScaling.Factor > 0 {
|
||||||
|
kv["olmo2.rope.scaling.factor"] = p.RopeScaling.Factor
|
||||||
|
}
|
||||||
|
if p.RopeScaling.OriginalMaxPositionEmbeds > 0 {
|
||||||
|
kv["olmo2.rope.scaling.original_context_length"] = p.RopeScaling.OriginalMaxPositionEmbeds
|
||||||
|
}
|
||||||
|
if p.RopeScaling.AttentionFactor > 0 {
|
||||||
|
kv["olmo2.rope.scaling.attn_factor"] = p.RopeScaling.AttentionFactor
|
||||||
|
}
|
||||||
|
if p.RopeScaling.RopeType != "" {
|
||||||
|
kv["olmo2.rope.scaling.type"] = p.RopeScaling.RopeType
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if p.RMSNormEPS > 0 {
|
if p.RMSNormEPS > 0 {
|
||||||
kv["olmo.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
kv["olmo2.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||||
}
|
}
|
||||||
|
|
||||||
if p.ClampKQV > 0 {
|
if p.ClampKQV > 0 {
|
||||||
kv["olmo.attention.clamp_kqv"] = p.ClampKQV
|
kv["olmo2.attention.clamp_kqv"] = p.ClampKQV
|
||||||
}
|
}
|
||||||
|
|
||||||
if p.SlidingWindow > 0 {
|
if p.SlidingWindow > 0 {
|
||||||
kv["olmo.attention.sliding_window"] = p.SlidingWindow
|
kv["olmo2.attention.sliding_window"] = p.SlidingWindow
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(p.LayerTypes) > 0 {
|
if len(p.LayerTypes) > 0 {
|
||||||
kv["olmo.attention.layer_types"] = p.LayerTypes
|
slidingPattern := make([]bool, len(p.LayerTypes))
|
||||||
|
for i, layerType := range p.LayerTypes {
|
||||||
|
slidingPattern[i] = (layerType == "sliding_attention")
|
||||||
|
}
|
||||||
|
kv["olmo2.attention.sliding_window_pattern"] = slidingPattern
|
||||||
}
|
}
|
||||||
|
|
||||||
return kv
|
return kv
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue