model: add tensor names in mmproj
This commit is contained in:
parent
8852220f59
commit
e68d6054c1
|
|
@ -146,6 +146,16 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
|
|||
if envconfig.NewEngine() || f.KV().OllamaEngineRequired() {
|
||||
if len(projectors) == 0 {
|
||||
textProcessor, err = model.NewTextProcessor(modelPath)
|
||||
} else if len(projectors) == 1 {
|
||||
var canMerge bool
|
||||
canMerge, err = model.CanMergeProjector(modelPath)
|
||||
if err == nil {
|
||||
if !canMerge {
|
||||
err = errors.New("split vision models aren't supported")
|
||||
} else {
|
||||
textProcessor, err = model.NewTextProcessor(modelPath)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
err = errors.New("split vision models aren't supported")
|
||||
}
|
||||
|
|
@ -479,8 +489,8 @@ type LoadRequest struct {
|
|||
GPULayers ml.GPULayersList
|
||||
MultiUserCache bool
|
||||
|
||||
// Legacy fields - not used with the Ollama engine
|
||||
ProjectorPath string
|
||||
// Legacy fields - not used with the Ollama engine
|
||||
MainGPU int
|
||||
UseMmap bool
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,6 +37,9 @@ type Model interface {
|
|||
|
||||
Backend() ml.Backend
|
||||
Config() config
|
||||
|
||||
PostPopulate()
|
||||
IsOnlineProjectorMergingSupported() bool
|
||||
}
|
||||
|
||||
// MultimodalProcessor must be implemented by multimodal models.
|
||||
|
|
@ -90,6 +93,16 @@ func (m *Base) Config() config {
|
|||
return m.config
|
||||
}
|
||||
|
||||
func (m *Base) PostPopulate() {
|
||||
// stub. This method can be used for redirecting tensors that
|
||||
// has renamed by convert_hf_to_gguf.py from llama.cpp
|
||||
// or any other model-specific logic
|
||||
}
|
||||
|
||||
func (m *Base) IsOnlineProjectorMergingSupported() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
var models = make(map[string]func(fs.Config) (Model, error))
|
||||
|
||||
// Register registers a model constructor for the given architecture
|
||||
|
|
@ -115,7 +128,8 @@ func New(modelPath string, params ml.BackendParams) (Model, error) {
|
|||
|
||||
base := Base{b: b, config: m.Config()}
|
||||
v := reflect.ValueOf(m)
|
||||
v.Elem().Set(populateFields(base, v.Elem()))
|
||||
v.Elem().Set(PopulateFields(base, v.Elem()))
|
||||
m.PostPopulate()
|
||||
return m, nil
|
||||
}
|
||||
|
||||
|
|
@ -143,6 +157,25 @@ func NewTextProcessor(s string) (TextProcessor, error) {
|
|||
return tp, nil
|
||||
}
|
||||
|
||||
func CanMergeProjector(s string) (bool, error) {
|
||||
r, err := os.Open(s)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
meta, err := fsggml.Decode(r, -1)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
m, err := modelForArch(meta.KV())
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return m.IsOnlineProjectorMergingSupported(), nil
|
||||
}
|
||||
|
||||
func modelForArch(c fs.Config) (Model, error) {
|
||||
arch := c.Architecture()
|
||||
if pooling.Type(c.Uint("pooling_type")) != pooling.TypeNone {
|
||||
|
|
@ -157,7 +190,7 @@ func modelForArch(c fs.Config) (Model, error) {
|
|||
return f(c)
|
||||
}
|
||||
|
||||
func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
||||
func PopulateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
||||
t := v.Type()
|
||||
|
||||
if t.Kind() == reflect.Struct {
|
||||
|
|
@ -172,7 +205,7 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
|||
// make a copy
|
||||
tagsCopy := tags
|
||||
if tag := t.Field(i).Tag.Get("gguf"); tag != "" {
|
||||
tagsCopy = append(tagsCopy, parseTag(tag))
|
||||
tagsCopy = append(tagsCopy, ParseTag(tag))
|
||||
}
|
||||
|
||||
if tt == reflect.TypeOf((*Base)(nil)).Elem() {
|
||||
|
|
@ -194,17 +227,27 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
|||
} else if len(childNames) == 0 {
|
||||
// current tag has names but no children, create branches for each name
|
||||
for _, name := range names {
|
||||
if name == "" {
|
||||
// If an empty alternate empty name exists, do not add it into the list
|
||||
// as Go will create double dots in the name
|
||||
fullNames = append(fullNames, []string{})
|
||||
} else {
|
||||
fullNames = append(fullNames, []string{name})
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// merge each name with each child
|
||||
for _, name := range names {
|
||||
for _, childName := range childNames {
|
||||
if name == "" {
|
||||
fullNames = append(fullNames, childName)
|
||||
} else {
|
||||
fullNames = append(fullNames, append([]string{name}, childName...))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return fullNames
|
||||
}
|
||||
|
|
@ -218,14 +261,14 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
|||
}
|
||||
}
|
||||
} else if tt.Kind() == reflect.Pointer || tt.Kind() == reflect.Interface {
|
||||
setPointer(base, vv, tagsCopy)
|
||||
SetPointer(base, vv, tagsCopy)
|
||||
} else if tt.Kind() == reflect.Slice || tt.Kind() == reflect.Array {
|
||||
for i := range vv.Len() {
|
||||
vvv := vv.Index(i)
|
||||
if vvv.Kind() == reflect.Pointer || vvv.Kind() == reflect.Interface {
|
||||
setPointer(base, vvv, append(tagsCopy, Tag{name: strconv.Itoa(i)}))
|
||||
SetPointer(base, vvv, append(tagsCopy, Tag{name: strconv.Itoa(i)}))
|
||||
} else {
|
||||
vvv.Set(populateFields(base, vvv, append(tagsCopy, Tag{name: strconv.Itoa(i)})...))
|
||||
vvv.Set(PopulateFields(base, vvv, append(tagsCopy, Tag{name: strconv.Itoa(i)})...))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -243,7 +286,7 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
|||
return v
|
||||
}
|
||||
|
||||
func setPointer(base Base, v reflect.Value, tags []Tag) {
|
||||
func SetPointer(base Base, v reflect.Value, tags []Tag) {
|
||||
vv := v
|
||||
if v.Kind() == reflect.Interface {
|
||||
if v.IsNil() {
|
||||
|
|
@ -258,7 +301,7 @@ func setPointer(base Base, v reflect.Value, tags []Tag) {
|
|||
vv = reflect.New(v.Type().Elem()).Elem()
|
||||
}
|
||||
|
||||
if f := populateFields(base, vv, tags...); f.CanAddr() {
|
||||
if f := PopulateFields(base, vv, tags...); f.CanAddr() {
|
||||
v.Set(f.Addr())
|
||||
}
|
||||
}
|
||||
|
|
@ -271,7 +314,7 @@ type Tag struct {
|
|||
alternatives []string
|
||||
}
|
||||
|
||||
func parseTag(s string) (tag Tag) {
|
||||
func ParseTag(s string) (tag Tag) {
|
||||
parts := strings.Split(s, ",")
|
||||
if len(parts) > 0 {
|
||||
tag.name = parts[0]
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ func TestParseTags(t *testing.T) {
|
|||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.value, func(t *testing.T) {
|
||||
got := parseTag(tt.value)
|
||||
got := ParseTag(tt.value)
|
||||
if diff := cmp.Diff(tt.want, got, cmp.AllowUnexported((Tag{}))); diff != "" {
|
||||
t.Errorf("ParseTags() returned unexpected values (-want +got):\n%s", diff)
|
||||
}
|
||||
|
|
@ -81,7 +81,7 @@ func TestPopulateFields(t *testing.T) {
|
|||
|
||||
var m fakeModel
|
||||
v := reflect.ValueOf(&m)
|
||||
v.Elem().Set(populateFields(Base{b: &fakeBackend{
|
||||
v.Elem().Set(PopulateFields(Base{b: &fakeBackend{
|
||||
names: []string{
|
||||
"input.weight",
|
||||
"blk.0.attn_q.weight",
|
||||
|
|
@ -130,7 +130,7 @@ func TestPopulateFieldsAlternateName(t *testing.T) {
|
|||
|
||||
var m fakeModel
|
||||
v := reflect.ValueOf(&m)
|
||||
v.Elem().Set(populateFields(Base{b: &fakeBackend{
|
||||
v.Elem().Set(PopulateFields(Base{b: &fakeBackend{
|
||||
names: []string{
|
||||
"input.weight",
|
||||
"nested.b.weight",
|
||||
|
|
@ -166,7 +166,7 @@ func TestPopulateFieldsPrefixSuffixName(t *testing.T) {
|
|||
Blocks: make([]fakeBlock, 2),
|
||||
}
|
||||
v := reflect.ValueOf(&m)
|
||||
v.Elem().Set(populateFields(Base{b: &fakeBackend{
|
||||
v.Elem().Set(PopulateFields(Base{b: &fakeBackend{
|
||||
names: []string{
|
||||
"blk.0.a.weight",
|
||||
"blk.0.b_weight",
|
||||
|
|
|
|||
|
|
@ -164,6 +164,10 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
|
|||
return hiddenState, nil
|
||||
}
|
||||
|
||||
func (m *Model) IsOnlineProjectorMergingSupported() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func init() {
|
||||
model.Register("gemma3", New)
|
||||
model.Register("gemma3_embed", newEmbedModel)
|
||||
|
|
|
|||
|
|
@ -76,9 +76,9 @@ type VisionModelOptions struct {
|
|||
}
|
||||
|
||||
type VisionModel struct {
|
||||
PatchEmbedding *nn.Conv2D `gguf:"patch_embedding"`
|
||||
PositionEmbedding *nn.Embedding `gguf:"position_embedding"`
|
||||
PostLayerNorm *nn.LayerNorm `gguf:"post_layernorm"`
|
||||
PatchEmbedding *nn.Conv2D `gguf:"patch_embedding,alt:patch_embd"`
|
||||
PositionEmbedding *nn.Embedding `gguf:"position_embedding,alt:position_embd"`
|
||||
PostLayerNorm *nn.LayerNorm `gguf:"post_layernorm,alt:post_ln"`
|
||||
|
||||
Layers []VisionEncoderLayer `gguf:"blk"`
|
||||
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ func New(c fs.Config) (model.Model, error) {
|
|||
}
|
||||
|
||||
type PatchMerger struct {
|
||||
MergingLayer *nn.Linear `gguf:"merging_layer"`
|
||||
MergingLayer *nn.Linear `gguf:"merging_layer,alt:"`
|
||||
}
|
||||
|
||||
func (pm *PatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, size image.Point, spatialMergeSize int) ml.Tensor {
|
||||
|
|
@ -72,9 +72,9 @@ func (pm *PatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, size ima
|
|||
}
|
||||
|
||||
type MultiModalProjector struct {
|
||||
Norm *nn.RMSNorm `gguf:"norm"`
|
||||
Linear1 *nn.Linear `gguf:"linear_1"`
|
||||
Linear2 *nn.Linear `gguf:"linear_2"`
|
||||
Norm *nn.RMSNorm `gguf:"norm,alt:input_norm"`
|
||||
Linear1 *nn.Linear `gguf:"linear_1,alt:1"`
|
||||
Linear2 *nn.Linear `gguf:"linear_2,alt:2"`
|
||||
PatchMerger *PatchMerger `gguf:"patch_merger"`
|
||||
|
||||
spatialMergeSize int
|
||||
|
|
@ -164,6 +164,10 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
|
|||
return m.TextModel.Forward(ctx, batch.Inputs, positions, positionsScale, batch.Outputs, batch, m.Cache), nil
|
||||
}
|
||||
|
||||
func (m *Model) IsOnlineProjectorMergingSupported() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func init() {
|
||||
model.Register("mistral3", New)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -87,8 +87,8 @@ type VisionModelOptions struct {
|
|||
}
|
||||
|
||||
type VisionModel struct {
|
||||
PatchEmbedding *nn.Conv2D `gguf:"patch_conv"`
|
||||
EncoderNorm *nn.RMSNorm `gguf:"encoder_norm"`
|
||||
PatchEmbedding *nn.Conv2D `gguf:"patch_conv,alt:patch_embd"`
|
||||
EncoderNorm *nn.RMSNorm `gguf:"encoder_norm,alt:pre_ln"`
|
||||
Layers []VisionEncoderLayer `gguf:"blk"`
|
||||
|
||||
*VisionModelOptions
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package qwen25vl
|
|||
import (
|
||||
"bytes"
|
||||
"image"
|
||||
"reflect"
|
||||
"slices"
|
||||
|
||||
"github.com/ollama/ollama/fs"
|
||||
|
|
@ -190,6 +191,28 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
|
|||
return m.Output.Forward(ctx, hiddenStates), nil
|
||||
}
|
||||
|
||||
func (m *Model) PostPopulate() {
|
||||
if m.VisionModel.PatchMerger.MLP0.Weight == nil {
|
||||
if tensor := m.Base.Backend().Get("mm.0.weight"); tensor != nil {
|
||||
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.MLP0), []model.Tag{model.ParseTag("mm.0")})
|
||||
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.MLP2), []model.Tag{model.ParseTag("mm.2")})
|
||||
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.LNQ), []model.Tag{model.ParseTag("v.post_ln")})
|
||||
}
|
||||
}
|
||||
if m.VisionModel.PatchEmbedding.PatchConv0.Weight == nil {
|
||||
if tensor := m.Base.Backend().Get("v.patch_embd.weight"); tensor != nil {
|
||||
m.VisionModel.PatchEmbedding.PatchConv0.Weight = tensor
|
||||
}
|
||||
if tensor := m.Base.Backend().Get("v.patch_embd.weight.1"); tensor != nil {
|
||||
m.VisionModel.PatchEmbedding.PatchConv1.Weight = tensor
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Model) IsOnlineProjectorMergingSupported() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func init() {
|
||||
model.Register("qwen25vl", New)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,9 @@ package qwen3vl
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"image"
|
||||
"reflect"
|
||||
"slices"
|
||||
|
||||
"github.com/ollama/ollama/fs"
|
||||
|
|
@ -170,6 +172,27 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
|
|||
return m.Output.Forward(ctx, hiddenStates), nil
|
||||
}
|
||||
|
||||
func (m *Model) PostPopulate() {
|
||||
if m.VisionModel.PatchMerger.FC1.Weight == nil {
|
||||
if tensor := m.Base.Backend().Get("mm.0.weight"); tensor != nil {
|
||||
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.FC1), []model.Tag{model.ParseTag("mm.0")})
|
||||
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.FC2), []model.Tag{model.ParseTag("mm.2")})
|
||||
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.Norm), []model.Tag{model.ParseTag("v.post_ln")})
|
||||
}
|
||||
}
|
||||
for i, deepstacks := range m.VisionModel.DeepstackMerger {
|
||||
if deepstacks.FC1.Weight == nil {
|
||||
if tensor := m.Base.Backend().Get(fmt.Sprintf("v.deepstack.%d.weight", m.VisionModel.deepstackVisualIndexes[i])); tensor != nil {
|
||||
model.SetPointer(m.Base, reflect.ValueOf(deepstacks), []model.Tag{model.ParseTag("v.deepstack.%d")})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Model) IsOnlineProjectorMergingSupported() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func New(c fs.Config) (model.Model, error) {
|
||||
m := Model{
|
||||
TextProcessor: model.NewBytePairEncoding(
|
||||
|
|
|
|||
|
|
@ -94,8 +94,8 @@ func (o VisionOptions) headDim() int {
|
|||
|
||||
type VisionPatchMerger struct {
|
||||
Norm *nn.LayerNorm `gguf:"norm"`
|
||||
FC1 *nn.Linear `gguf:"linear_fc1"`
|
||||
FC2 *nn.Linear `gguf:"linear_fc2"`
|
||||
FC1 *nn.Linear `gguf:"linear_fc1,alt:fc.fc1"`
|
||||
FC2 *nn.Linear `gguf:"linear_fc2,alt:fc.fc2"`
|
||||
}
|
||||
|
||||
func (m *VisionPatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, postshuffleNorm bool, opts VisionOptions) ml.Tensor {
|
||||
|
|
@ -241,6 +241,13 @@ func (m *VisionModel) Forward(ctx ml.Context, pixelValues ml.Tensor, grid *Grid)
|
|||
// newVisionModel creates a new instance of the Qwen vision model
|
||||
func newVisionModel(c fs.Config) *VisionModel {
|
||||
deepstackVisualIndexes := c.Ints("vision.deepstack_visual_indexes")
|
||||
if deepstackVisualIndexes == nil && c.Bools("vision.is_deepstack_layers") != nil {
|
||||
for i, flag := range c.Bools("vision.is_deepstack_layers") {
|
||||
if flag {
|
||||
deepstackVisualIndexes = append(deepstackVisualIndexes, int32(i))
|
||||
}
|
||||
}
|
||||
}
|
||||
model := &VisionModel{
|
||||
Layers: make([]VisionEncoderLayer, c.Uint("vision.block_count", 32)),
|
||||
DeepstackMerger: make([]*VisionPatchMerger, len(deepstackVisualIndexes)),
|
||||
|
|
|
|||
|
|
@ -1171,6 +1171,7 @@ func (s *Server) allocModel(
|
|||
mpath string,
|
||||
params ml.BackendParams,
|
||||
loraPath []string,
|
||||
projectorPath string,
|
||||
parallel int,
|
||||
kvCacheType string,
|
||||
kvSize int,
|
||||
|
|
@ -1302,7 +1303,7 @@ func (s *Server) load(w http.ResponseWriter, r *http.Request) {
|
|||
|
||||
s.batchSize = req.BatchSize
|
||||
|
||||
err := s.allocModel(s.modelPath, params, req.LoraPath, req.Parallel, req.KvCacheType, req.KvSize, req.MultiUserCache)
|
||||
err := s.allocModel(s.modelPath, params, req.LoraPath, req.ProjectorPath, req.Parallel, req.KvCacheType, req.KvSize, req.MultiUserCache)
|
||||
if err != nil {
|
||||
s.closeModel()
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue