This commit is contained in:
cvrunmin 2026-01-06 03:15:24 +01:00 committed by GitHub
commit afebb4aa37
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 145 additions and 30 deletions

View File

@ -146,6 +146,16 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
if envconfig.NewEngine() || f.KV().OllamaEngineRequired() { if envconfig.NewEngine() || f.KV().OllamaEngineRequired() {
if len(projectors) == 0 { if len(projectors) == 0 {
textProcessor, err = model.NewTextProcessor(modelPath) textProcessor, err = model.NewTextProcessor(modelPath)
} else if len(projectors) == 1 {
var canMerge bool
canMerge, err = model.CanMergeProjector(modelPath)
if err == nil {
if !canMerge {
err = errors.New("split vision models aren't supported")
} else {
textProcessor, err = model.NewTextProcessor(modelPath)
}
}
} else { } else {
err = errors.New("split vision models aren't supported") err = errors.New("split vision models aren't supported")
} }
@ -479,8 +489,8 @@ type LoadRequest struct {
GPULayers ml.GPULayersList GPULayers ml.GPULayersList
MultiUserCache bool MultiUserCache bool
// Legacy fields - not used with the Ollama engine
ProjectorPath string ProjectorPath string
// Legacy fields - not used with the Ollama engine
MainGPU int MainGPU int
UseMmap bool UseMmap bool
} }

View File

@ -37,6 +37,9 @@ type Model interface {
Backend() ml.Backend Backend() ml.Backend
Config() config Config() config
PostPopulate()
IsOnlineProjectorMergingSupported() bool
} }
// MultimodalProcessor must be implemented by multimodal models. // MultimodalProcessor must be implemented by multimodal models.
@ -90,6 +93,16 @@ func (m *Base) Config() config {
return m.config return m.config
} }
func (m *Base) PostPopulate() {
// stub. This method can be used for redirecting tensors that
// has renamed by convert_hf_to_gguf.py from llama.cpp
// or any other model-specific logic
}
func (m *Base) IsOnlineProjectorMergingSupported() bool {
return false
}
var models = make(map[string]func(fs.Config) (Model, error)) var models = make(map[string]func(fs.Config) (Model, error))
// Register registers a model constructor for the given architecture // Register registers a model constructor for the given architecture
@ -115,7 +128,8 @@ func New(modelPath string, params ml.BackendParams) (Model, error) {
base := Base{b: b, config: m.Config()} base := Base{b: b, config: m.Config()}
v := reflect.ValueOf(m) v := reflect.ValueOf(m)
v.Elem().Set(populateFields(base, v.Elem())) v.Elem().Set(PopulateFields(base, v.Elem()))
m.PostPopulate()
return m, nil return m, nil
} }
@ -143,6 +157,25 @@ func NewTextProcessor(s string) (TextProcessor, error) {
return tp, nil return tp, nil
} }
func CanMergeProjector(s string) (bool, error) {
r, err := os.Open(s)
if err != nil {
return false, err
}
defer r.Close()
meta, err := fsggml.Decode(r, -1)
if err != nil {
return false, err
}
m, err := modelForArch(meta.KV())
if err != nil {
return false, err
}
return m.IsOnlineProjectorMergingSupported(), nil
}
func modelForArch(c fs.Config) (Model, error) { func modelForArch(c fs.Config) (Model, error) {
arch := c.Architecture() arch := c.Architecture()
if pooling.Type(c.Uint("pooling_type")) != pooling.TypeNone { if pooling.Type(c.Uint("pooling_type")) != pooling.TypeNone {
@ -157,7 +190,7 @@ func modelForArch(c fs.Config) (Model, error) {
return f(c) return f(c)
} }
func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value { func PopulateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
t := v.Type() t := v.Type()
if t.Kind() == reflect.Struct { if t.Kind() == reflect.Struct {
@ -172,7 +205,7 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
// make a copy // make a copy
tagsCopy := tags tagsCopy := tags
if tag := t.Field(i).Tag.Get("gguf"); tag != "" { if tag := t.Field(i).Tag.Get("gguf"); tag != "" {
tagsCopy = append(tagsCopy, parseTag(tag)) tagsCopy = append(tagsCopy, ParseTag(tag))
} }
if tt == reflect.TypeOf((*Base)(nil)).Elem() { if tt == reflect.TypeOf((*Base)(nil)).Elem() {
@ -194,17 +227,27 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
} else if len(childNames) == 0 { } else if len(childNames) == 0 {
// current tag has names but no children, create branches for each name // current tag has names but no children, create branches for each name
for _, name := range names { for _, name := range names {
if name == "" {
// If an empty alternate empty name exists, do not add it into the list
// as Go will create double dots in the name
fullNames = append(fullNames, []string{})
} else {
fullNames = append(fullNames, []string{name}) fullNames = append(fullNames, []string{name})
} }
}
} else { } else {
// merge each name with each child // merge each name with each child
for _, name := range names { for _, name := range names {
for _, childName := range childNames { for _, childName := range childNames {
if name == "" {
fullNames = append(fullNames, childName)
} else {
fullNames = append(fullNames, append([]string{name}, childName...)) fullNames = append(fullNames, append([]string{name}, childName...))
} }
} }
} }
} }
}
return fullNames return fullNames
} }
@ -218,14 +261,14 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
} }
} }
} else if tt.Kind() == reflect.Pointer || tt.Kind() == reflect.Interface { } else if tt.Kind() == reflect.Pointer || tt.Kind() == reflect.Interface {
setPointer(base, vv, tagsCopy) SetPointer(base, vv, tagsCopy)
} else if tt.Kind() == reflect.Slice || tt.Kind() == reflect.Array { } else if tt.Kind() == reflect.Slice || tt.Kind() == reflect.Array {
for i := range vv.Len() { for i := range vv.Len() {
vvv := vv.Index(i) vvv := vv.Index(i)
if vvv.Kind() == reflect.Pointer || vvv.Kind() == reflect.Interface { if vvv.Kind() == reflect.Pointer || vvv.Kind() == reflect.Interface {
setPointer(base, vvv, append(tagsCopy, Tag{name: strconv.Itoa(i)})) SetPointer(base, vvv, append(tagsCopy, Tag{name: strconv.Itoa(i)}))
} else { } else {
vvv.Set(populateFields(base, vvv, append(tagsCopy, Tag{name: strconv.Itoa(i)})...)) vvv.Set(PopulateFields(base, vvv, append(tagsCopy, Tag{name: strconv.Itoa(i)})...))
} }
} }
} }
@ -243,7 +286,7 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
return v return v
} }
func setPointer(base Base, v reflect.Value, tags []Tag) { func SetPointer(base Base, v reflect.Value, tags []Tag) {
vv := v vv := v
if v.Kind() == reflect.Interface { if v.Kind() == reflect.Interface {
if v.IsNil() { if v.IsNil() {
@ -258,7 +301,7 @@ func setPointer(base Base, v reflect.Value, tags []Tag) {
vv = reflect.New(v.Type().Elem()).Elem() vv = reflect.New(v.Type().Elem()).Elem()
} }
if f := populateFields(base, vv, tags...); f.CanAddr() { if f := PopulateFields(base, vv, tags...); f.CanAddr() {
v.Set(f.Addr()) v.Set(f.Addr())
} }
} }
@ -271,7 +314,7 @@ type Tag struct {
alternatives []string alternatives []string
} }
func parseTag(s string) (tag Tag) { func ParseTag(s string) (tag Tag) {
parts := strings.Split(s, ",") parts := strings.Split(s, ",")
if len(parts) > 0 { if len(parts) > 0 {
tag.name = parts[0] tag.name = parts[0]

View File

@ -38,7 +38,7 @@ func TestParseTags(t *testing.T) {
for _, tt := range cases { for _, tt := range cases {
t.Run(tt.value, func(t *testing.T) { t.Run(tt.value, func(t *testing.T) {
got := parseTag(tt.value) got := ParseTag(tt.value)
if diff := cmp.Diff(tt.want, got, cmp.AllowUnexported((Tag{}))); diff != "" { if diff := cmp.Diff(tt.want, got, cmp.AllowUnexported((Tag{}))); diff != "" {
t.Errorf("ParseTags() returned unexpected values (-want +got):\n%s", diff) t.Errorf("ParseTags() returned unexpected values (-want +got):\n%s", diff)
} }
@ -81,7 +81,7 @@ func TestPopulateFields(t *testing.T) {
var m fakeModel var m fakeModel
v := reflect.ValueOf(&m) v := reflect.ValueOf(&m)
v.Elem().Set(populateFields(Base{b: &fakeBackend{ v.Elem().Set(PopulateFields(Base{b: &fakeBackend{
names: []string{ names: []string{
"input.weight", "input.weight",
"blk.0.attn_q.weight", "blk.0.attn_q.weight",
@ -130,7 +130,7 @@ func TestPopulateFieldsAlternateName(t *testing.T) {
var m fakeModel var m fakeModel
v := reflect.ValueOf(&m) v := reflect.ValueOf(&m)
v.Elem().Set(populateFields(Base{b: &fakeBackend{ v.Elem().Set(PopulateFields(Base{b: &fakeBackend{
names: []string{ names: []string{
"input.weight", "input.weight",
"nested.b.weight", "nested.b.weight",
@ -166,7 +166,7 @@ func TestPopulateFieldsPrefixSuffixName(t *testing.T) {
Blocks: make([]fakeBlock, 2), Blocks: make([]fakeBlock, 2),
} }
v := reflect.ValueOf(&m) v := reflect.ValueOf(&m)
v.Elem().Set(populateFields(Base{b: &fakeBackend{ v.Elem().Set(PopulateFields(Base{b: &fakeBackend{
names: []string{ names: []string{
"blk.0.a.weight", "blk.0.a.weight",
"blk.0.b_weight", "blk.0.b_weight",

View File

@ -164,6 +164,10 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
return hiddenState, nil return hiddenState, nil
} }
func (m *Model) IsOnlineProjectorMergingSupported() bool {
return true
}
func init() { func init() {
model.Register("gemma3", New) model.Register("gemma3", New)
model.Register("gemma3_embed", newEmbedModel) model.Register("gemma3_embed", newEmbedModel)

View File

@ -76,9 +76,9 @@ type VisionModelOptions struct {
} }
type VisionModel struct { type VisionModel struct {
PatchEmbedding *nn.Conv2D `gguf:"patch_embedding"` PatchEmbedding *nn.Conv2D `gguf:"patch_embedding,alt:patch_embd"`
PositionEmbedding *nn.Embedding `gguf:"position_embedding"` PositionEmbedding *nn.Embedding `gguf:"position_embedding,alt:position_embd"`
PostLayerNorm *nn.LayerNorm `gguf:"post_layernorm"` PostLayerNorm *nn.LayerNorm `gguf:"post_layernorm,alt:post_ln"`
Layers []VisionEncoderLayer `gguf:"blk"` Layers []VisionEncoderLayer `gguf:"blk"`

View File

@ -59,7 +59,7 @@ func New(c fs.Config) (model.Model, error) {
} }
type PatchMerger struct { type PatchMerger struct {
MergingLayer *nn.Linear `gguf:"merging_layer"` MergingLayer *nn.Linear `gguf:"merging_layer,alt:"`
} }
func (pm *PatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, size image.Point, spatialMergeSize int) ml.Tensor { func (pm *PatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, size image.Point, spatialMergeSize int) ml.Tensor {
@ -72,9 +72,9 @@ func (pm *PatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, size ima
} }
type MultiModalProjector struct { type MultiModalProjector struct {
Norm *nn.RMSNorm `gguf:"norm"` Norm *nn.RMSNorm `gguf:"norm,alt:input_norm"`
Linear1 *nn.Linear `gguf:"linear_1"` Linear1 *nn.Linear `gguf:"linear_1,alt:1"`
Linear2 *nn.Linear `gguf:"linear_2"` Linear2 *nn.Linear `gguf:"linear_2,alt:2"`
PatchMerger *PatchMerger `gguf:"patch_merger"` PatchMerger *PatchMerger `gguf:"patch_merger"`
spatialMergeSize int spatialMergeSize int
@ -164,6 +164,10 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
return m.TextModel.Forward(ctx, batch.Inputs, positions, positionsScale, batch.Outputs, batch, m.Cache), nil return m.TextModel.Forward(ctx, batch.Inputs, positions, positionsScale, batch.Outputs, batch, m.Cache), nil
} }
func (m *Model) IsOnlineProjectorMergingSupported() bool {
return true
}
func init() { func init() {
model.Register("mistral3", New) model.Register("mistral3", New)
} }

View File

@ -87,8 +87,8 @@ type VisionModelOptions struct {
} }
type VisionModel struct { type VisionModel struct {
PatchEmbedding *nn.Conv2D `gguf:"patch_conv"` PatchEmbedding *nn.Conv2D `gguf:"patch_conv,alt:patch_embd"`
EncoderNorm *nn.RMSNorm `gguf:"encoder_norm"` EncoderNorm *nn.RMSNorm `gguf:"encoder_norm,alt:pre_ln"`
Layers []VisionEncoderLayer `gguf:"blk"` Layers []VisionEncoderLayer `gguf:"blk"`
*VisionModelOptions *VisionModelOptions

View File

@ -3,6 +3,7 @@ package qwen25vl
import ( import (
"bytes" "bytes"
"image" "image"
"reflect"
"slices" "slices"
"github.com/ollama/ollama/fs" "github.com/ollama/ollama/fs"
@ -190,6 +191,28 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
return m.Output.Forward(ctx, hiddenStates), nil return m.Output.Forward(ctx, hiddenStates), nil
} }
func (m *Model) PostPopulate() {
if m.VisionModel.PatchMerger.MLP0.Weight == nil {
if tensor := m.Base.Backend().Get("mm.0.weight"); tensor != nil {
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.MLP0), []model.Tag{model.ParseTag("mm.0")})
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.MLP2), []model.Tag{model.ParseTag("mm.2")})
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.LNQ), []model.Tag{model.ParseTag("v.post_ln")})
}
}
if m.VisionModel.PatchEmbedding.PatchConv0.Weight == nil {
if tensor := m.Base.Backend().Get("v.patch_embd.weight"); tensor != nil {
m.VisionModel.PatchEmbedding.PatchConv0.Weight = tensor
}
if tensor := m.Base.Backend().Get("v.patch_embd.weight.1"); tensor != nil {
m.VisionModel.PatchEmbedding.PatchConv1.Weight = tensor
}
}
}
func (m *Model) IsOnlineProjectorMergingSupported() bool {
return true
}
func init() { func init() {
model.Register("qwen25vl", New) model.Register("qwen25vl", New)
} }

View File

@ -2,7 +2,9 @@ package qwen3vl
import ( import (
"bytes" "bytes"
"fmt"
"image" "image"
"reflect"
"slices" "slices"
"github.com/ollama/ollama/fs" "github.com/ollama/ollama/fs"
@ -170,6 +172,27 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
return m.Output.Forward(ctx, hiddenStates), nil return m.Output.Forward(ctx, hiddenStates), nil
} }
func (m *Model) PostPopulate() {
if m.VisionModel.PatchMerger.FC1.Weight == nil {
if tensor := m.Base.Backend().Get("mm.0.weight"); tensor != nil {
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.FC1), []model.Tag{model.ParseTag("mm.0")})
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.FC2), []model.Tag{model.ParseTag("mm.2")})
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.Norm), []model.Tag{model.ParseTag("v.post_ln")})
}
}
for i, deepstacks := range m.VisionModel.DeepstackMerger {
if deepstacks.FC1.Weight == nil {
if tensor := m.Base.Backend().Get(fmt.Sprintf("v.deepstack.%d.weight", m.VisionModel.deepstackVisualIndexes[i])); tensor != nil {
model.SetPointer(m.Base, reflect.ValueOf(deepstacks), []model.Tag{model.ParseTag("v.deepstack.%d")})
}
}
}
}
func (m *Model) IsOnlineProjectorMergingSupported() bool {
return true
}
func New(c fs.Config) (model.Model, error) { func New(c fs.Config) (model.Model, error) {
m := Model{ m := Model{
TextProcessor: model.NewBytePairEncoding( TextProcessor: model.NewBytePairEncoding(

View File

@ -94,8 +94,8 @@ func (o VisionOptions) headDim() int {
type VisionPatchMerger struct { type VisionPatchMerger struct {
Norm *nn.LayerNorm `gguf:"norm"` Norm *nn.LayerNorm `gguf:"norm"`
FC1 *nn.Linear `gguf:"linear_fc1"` FC1 *nn.Linear `gguf:"linear_fc1,alt:fc.fc1"`
FC2 *nn.Linear `gguf:"linear_fc2"` FC2 *nn.Linear `gguf:"linear_fc2,alt:fc.fc2"`
} }
func (m *VisionPatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, postshuffleNorm bool, opts VisionOptions) ml.Tensor { func (m *VisionPatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, postshuffleNorm bool, opts VisionOptions) ml.Tensor {
@ -241,6 +241,13 @@ func (m *VisionModel) Forward(ctx ml.Context, pixelValues ml.Tensor, grid *Grid)
// newVisionModel creates a new instance of the Qwen vision model // newVisionModel creates a new instance of the Qwen vision model
func newVisionModel(c fs.Config) *VisionModel { func newVisionModel(c fs.Config) *VisionModel {
deepstackVisualIndexes := c.Ints("vision.deepstack_visual_indexes") deepstackVisualIndexes := c.Ints("vision.deepstack_visual_indexes")
if deepstackVisualIndexes == nil && c.Bools("vision.is_deepstack_layers") != nil {
for i, flag := range c.Bools("vision.is_deepstack_layers") {
if flag {
deepstackVisualIndexes = append(deepstackVisualIndexes, int32(i))
}
}
}
model := &VisionModel{ model := &VisionModel{
Layers: make([]VisionEncoderLayer, c.Uint("vision.block_count", 32)), Layers: make([]VisionEncoderLayer, c.Uint("vision.block_count", 32)),
DeepstackMerger: make([]*VisionPatchMerger, len(deepstackVisualIndexes)), DeepstackMerger: make([]*VisionPatchMerger, len(deepstackVisualIndexes)),

View File

@ -1171,6 +1171,7 @@ func (s *Server) allocModel(
mpath string, mpath string,
params ml.BackendParams, params ml.BackendParams,
loraPath []string, loraPath []string,
projectorPath string,
parallel int, parallel int,
kvCacheType string, kvCacheType string,
kvSize int, kvSize int,
@ -1302,7 +1303,7 @@ func (s *Server) load(w http.ResponseWriter, r *http.Request) {
s.batchSize = req.BatchSize s.batchSize = req.BatchSize
err := s.allocModel(s.modelPath, params, req.LoraPath, req.Parallel, req.KvCacheType, req.KvSize, req.MultiUserCache) err := s.allocModel(s.modelPath, params, req.LoraPath, req.ProjectorPath, req.Parallel, req.KvCacheType, req.KvSize, req.MultiUserCache)
if err != nil { if err != nil {
s.closeModel() s.closeModel()