model: add tensor names in mmproj
This commit is contained in:
parent
8852220f59
commit
e68d6054c1
|
|
@ -146,6 +146,16 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
|
||||||
if envconfig.NewEngine() || f.KV().OllamaEngineRequired() {
|
if envconfig.NewEngine() || f.KV().OllamaEngineRequired() {
|
||||||
if len(projectors) == 0 {
|
if len(projectors) == 0 {
|
||||||
textProcessor, err = model.NewTextProcessor(modelPath)
|
textProcessor, err = model.NewTextProcessor(modelPath)
|
||||||
|
} else if len(projectors) == 1 {
|
||||||
|
var canMerge bool
|
||||||
|
canMerge, err = model.CanMergeProjector(modelPath)
|
||||||
|
if err == nil {
|
||||||
|
if !canMerge {
|
||||||
|
err = errors.New("split vision models aren't supported")
|
||||||
|
} else {
|
||||||
|
textProcessor, err = model.NewTextProcessor(modelPath)
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
err = errors.New("split vision models aren't supported")
|
err = errors.New("split vision models aren't supported")
|
||||||
}
|
}
|
||||||
|
|
@ -479,10 +489,10 @@ type LoadRequest struct {
|
||||||
GPULayers ml.GPULayersList
|
GPULayers ml.GPULayersList
|
||||||
MultiUserCache bool
|
MultiUserCache bool
|
||||||
|
|
||||||
// Legacy fields - not used with the Ollama engine
|
|
||||||
ProjectorPath string
|
ProjectorPath string
|
||||||
MainGPU int
|
// Legacy fields - not used with the Ollama engine
|
||||||
UseMmap bool
|
MainGPU int
|
||||||
|
UseMmap bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type LoadResponse struct {
|
type LoadResponse struct {
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,9 @@ type Model interface {
|
||||||
|
|
||||||
Backend() ml.Backend
|
Backend() ml.Backend
|
||||||
Config() config
|
Config() config
|
||||||
|
|
||||||
|
PostPopulate()
|
||||||
|
IsOnlineProjectorMergingSupported() bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// MultimodalProcessor must be implemented by multimodal models.
|
// MultimodalProcessor must be implemented by multimodal models.
|
||||||
|
|
@ -90,6 +93,16 @@ func (m *Base) Config() config {
|
||||||
return m.config
|
return m.config
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *Base) PostPopulate() {
|
||||||
|
// stub. This method can be used for redirecting tensors that
|
||||||
|
// has renamed by convert_hf_to_gguf.py from llama.cpp
|
||||||
|
// or any other model-specific logic
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Base) IsOnlineProjectorMergingSupported() bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
var models = make(map[string]func(fs.Config) (Model, error))
|
var models = make(map[string]func(fs.Config) (Model, error))
|
||||||
|
|
||||||
// Register registers a model constructor for the given architecture
|
// Register registers a model constructor for the given architecture
|
||||||
|
|
@ -115,7 +128,8 @@ func New(modelPath string, params ml.BackendParams) (Model, error) {
|
||||||
|
|
||||||
base := Base{b: b, config: m.Config()}
|
base := Base{b: b, config: m.Config()}
|
||||||
v := reflect.ValueOf(m)
|
v := reflect.ValueOf(m)
|
||||||
v.Elem().Set(populateFields(base, v.Elem()))
|
v.Elem().Set(PopulateFields(base, v.Elem()))
|
||||||
|
m.PostPopulate()
|
||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -143,6 +157,25 @@ func NewTextProcessor(s string) (TextProcessor, error) {
|
||||||
return tp, nil
|
return tp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CanMergeProjector(s string) (bool, error) {
|
||||||
|
r, err := os.Open(s)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
defer r.Close()
|
||||||
|
|
||||||
|
meta, err := fsggml.Decode(r, -1)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
m, err := modelForArch(meta.KV())
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return m.IsOnlineProjectorMergingSupported(), nil
|
||||||
|
}
|
||||||
|
|
||||||
func modelForArch(c fs.Config) (Model, error) {
|
func modelForArch(c fs.Config) (Model, error) {
|
||||||
arch := c.Architecture()
|
arch := c.Architecture()
|
||||||
if pooling.Type(c.Uint("pooling_type")) != pooling.TypeNone {
|
if pooling.Type(c.Uint("pooling_type")) != pooling.TypeNone {
|
||||||
|
|
@ -157,7 +190,7 @@ func modelForArch(c fs.Config) (Model, error) {
|
||||||
return f(c)
|
return f(c)
|
||||||
}
|
}
|
||||||
|
|
||||||
func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
func PopulateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
||||||
t := v.Type()
|
t := v.Type()
|
||||||
|
|
||||||
if t.Kind() == reflect.Struct {
|
if t.Kind() == reflect.Struct {
|
||||||
|
|
@ -172,7 +205,7 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
||||||
// make a copy
|
// make a copy
|
||||||
tagsCopy := tags
|
tagsCopy := tags
|
||||||
if tag := t.Field(i).Tag.Get("gguf"); tag != "" {
|
if tag := t.Field(i).Tag.Get("gguf"); tag != "" {
|
||||||
tagsCopy = append(tagsCopy, parseTag(tag))
|
tagsCopy = append(tagsCopy, ParseTag(tag))
|
||||||
}
|
}
|
||||||
|
|
||||||
if tt == reflect.TypeOf((*Base)(nil)).Elem() {
|
if tt == reflect.TypeOf((*Base)(nil)).Elem() {
|
||||||
|
|
@ -194,13 +227,23 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
||||||
} else if len(childNames) == 0 {
|
} else if len(childNames) == 0 {
|
||||||
// current tag has names but no children, create branches for each name
|
// current tag has names but no children, create branches for each name
|
||||||
for _, name := range names {
|
for _, name := range names {
|
||||||
fullNames = append(fullNames, []string{name})
|
if name == "" {
|
||||||
|
// If an empty alternate empty name exists, do not add it into the list
|
||||||
|
// as Go will create double dots in the name
|
||||||
|
fullNames = append(fullNames, []string{})
|
||||||
|
} else {
|
||||||
|
fullNames = append(fullNames, []string{name})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// merge each name with each child
|
// merge each name with each child
|
||||||
for _, name := range names {
|
for _, name := range names {
|
||||||
for _, childName := range childNames {
|
for _, childName := range childNames {
|
||||||
fullNames = append(fullNames, append([]string{name}, childName...))
|
if name == "" {
|
||||||
|
fullNames = append(fullNames, childName)
|
||||||
|
} else {
|
||||||
|
fullNames = append(fullNames, append([]string{name}, childName...))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -218,14 +261,14 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if tt.Kind() == reflect.Pointer || tt.Kind() == reflect.Interface {
|
} else if tt.Kind() == reflect.Pointer || tt.Kind() == reflect.Interface {
|
||||||
setPointer(base, vv, tagsCopy)
|
SetPointer(base, vv, tagsCopy)
|
||||||
} else if tt.Kind() == reflect.Slice || tt.Kind() == reflect.Array {
|
} else if tt.Kind() == reflect.Slice || tt.Kind() == reflect.Array {
|
||||||
for i := range vv.Len() {
|
for i := range vv.Len() {
|
||||||
vvv := vv.Index(i)
|
vvv := vv.Index(i)
|
||||||
if vvv.Kind() == reflect.Pointer || vvv.Kind() == reflect.Interface {
|
if vvv.Kind() == reflect.Pointer || vvv.Kind() == reflect.Interface {
|
||||||
setPointer(base, vvv, append(tagsCopy, Tag{name: strconv.Itoa(i)}))
|
SetPointer(base, vvv, append(tagsCopy, Tag{name: strconv.Itoa(i)}))
|
||||||
} else {
|
} else {
|
||||||
vvv.Set(populateFields(base, vvv, append(tagsCopy, Tag{name: strconv.Itoa(i)})...))
|
vvv.Set(PopulateFields(base, vvv, append(tagsCopy, Tag{name: strconv.Itoa(i)})...))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -243,7 +286,7 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
||||||
return v
|
return v
|
||||||
}
|
}
|
||||||
|
|
||||||
func setPointer(base Base, v reflect.Value, tags []Tag) {
|
func SetPointer(base Base, v reflect.Value, tags []Tag) {
|
||||||
vv := v
|
vv := v
|
||||||
if v.Kind() == reflect.Interface {
|
if v.Kind() == reflect.Interface {
|
||||||
if v.IsNil() {
|
if v.IsNil() {
|
||||||
|
|
@ -258,7 +301,7 @@ func setPointer(base Base, v reflect.Value, tags []Tag) {
|
||||||
vv = reflect.New(v.Type().Elem()).Elem()
|
vv = reflect.New(v.Type().Elem()).Elem()
|
||||||
}
|
}
|
||||||
|
|
||||||
if f := populateFields(base, vv, tags...); f.CanAddr() {
|
if f := PopulateFields(base, vv, tags...); f.CanAddr() {
|
||||||
v.Set(f.Addr())
|
v.Set(f.Addr())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -271,7 +314,7 @@ type Tag struct {
|
||||||
alternatives []string
|
alternatives []string
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseTag(s string) (tag Tag) {
|
func ParseTag(s string) (tag Tag) {
|
||||||
parts := strings.Split(s, ",")
|
parts := strings.Split(s, ",")
|
||||||
if len(parts) > 0 {
|
if len(parts) > 0 {
|
||||||
tag.name = parts[0]
|
tag.name = parts[0]
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@ func TestParseTags(t *testing.T) {
|
||||||
|
|
||||||
for _, tt := range cases {
|
for _, tt := range cases {
|
||||||
t.Run(tt.value, func(t *testing.T) {
|
t.Run(tt.value, func(t *testing.T) {
|
||||||
got := parseTag(tt.value)
|
got := ParseTag(tt.value)
|
||||||
if diff := cmp.Diff(tt.want, got, cmp.AllowUnexported((Tag{}))); diff != "" {
|
if diff := cmp.Diff(tt.want, got, cmp.AllowUnexported((Tag{}))); diff != "" {
|
||||||
t.Errorf("ParseTags() returned unexpected values (-want +got):\n%s", diff)
|
t.Errorf("ParseTags() returned unexpected values (-want +got):\n%s", diff)
|
||||||
}
|
}
|
||||||
|
|
@ -81,7 +81,7 @@ func TestPopulateFields(t *testing.T) {
|
||||||
|
|
||||||
var m fakeModel
|
var m fakeModel
|
||||||
v := reflect.ValueOf(&m)
|
v := reflect.ValueOf(&m)
|
||||||
v.Elem().Set(populateFields(Base{b: &fakeBackend{
|
v.Elem().Set(PopulateFields(Base{b: &fakeBackend{
|
||||||
names: []string{
|
names: []string{
|
||||||
"input.weight",
|
"input.weight",
|
||||||
"blk.0.attn_q.weight",
|
"blk.0.attn_q.weight",
|
||||||
|
|
@ -130,7 +130,7 @@ func TestPopulateFieldsAlternateName(t *testing.T) {
|
||||||
|
|
||||||
var m fakeModel
|
var m fakeModel
|
||||||
v := reflect.ValueOf(&m)
|
v := reflect.ValueOf(&m)
|
||||||
v.Elem().Set(populateFields(Base{b: &fakeBackend{
|
v.Elem().Set(PopulateFields(Base{b: &fakeBackend{
|
||||||
names: []string{
|
names: []string{
|
||||||
"input.weight",
|
"input.weight",
|
||||||
"nested.b.weight",
|
"nested.b.weight",
|
||||||
|
|
@ -166,7 +166,7 @@ func TestPopulateFieldsPrefixSuffixName(t *testing.T) {
|
||||||
Blocks: make([]fakeBlock, 2),
|
Blocks: make([]fakeBlock, 2),
|
||||||
}
|
}
|
||||||
v := reflect.ValueOf(&m)
|
v := reflect.ValueOf(&m)
|
||||||
v.Elem().Set(populateFields(Base{b: &fakeBackend{
|
v.Elem().Set(PopulateFields(Base{b: &fakeBackend{
|
||||||
names: []string{
|
names: []string{
|
||||||
"blk.0.a.weight",
|
"blk.0.a.weight",
|
||||||
"blk.0.b_weight",
|
"blk.0.b_weight",
|
||||||
|
|
|
||||||
|
|
@ -164,6 +164,10 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
|
||||||
return hiddenState, nil
|
return hiddenState, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *Model) IsOnlineProjectorMergingSupported() bool {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
model.Register("gemma3", New)
|
model.Register("gemma3", New)
|
||||||
model.Register("gemma3_embed", newEmbedModel)
|
model.Register("gemma3_embed", newEmbedModel)
|
||||||
|
|
|
||||||
|
|
@ -76,9 +76,9 @@ type VisionModelOptions struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type VisionModel struct {
|
type VisionModel struct {
|
||||||
PatchEmbedding *nn.Conv2D `gguf:"patch_embedding"`
|
PatchEmbedding *nn.Conv2D `gguf:"patch_embedding,alt:patch_embd"`
|
||||||
PositionEmbedding *nn.Embedding `gguf:"position_embedding"`
|
PositionEmbedding *nn.Embedding `gguf:"position_embedding,alt:position_embd"`
|
||||||
PostLayerNorm *nn.LayerNorm `gguf:"post_layernorm"`
|
PostLayerNorm *nn.LayerNorm `gguf:"post_layernorm,alt:post_ln"`
|
||||||
|
|
||||||
Layers []VisionEncoderLayer `gguf:"blk"`
|
Layers []VisionEncoderLayer `gguf:"blk"`
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ func New(c fs.Config) (model.Model, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
type PatchMerger struct {
|
type PatchMerger struct {
|
||||||
MergingLayer *nn.Linear `gguf:"merging_layer"`
|
MergingLayer *nn.Linear `gguf:"merging_layer,alt:"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pm *PatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, size image.Point, spatialMergeSize int) ml.Tensor {
|
func (pm *PatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, size image.Point, spatialMergeSize int) ml.Tensor {
|
||||||
|
|
@ -72,9 +72,9 @@ func (pm *PatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, size ima
|
||||||
}
|
}
|
||||||
|
|
||||||
type MultiModalProjector struct {
|
type MultiModalProjector struct {
|
||||||
Norm *nn.RMSNorm `gguf:"norm"`
|
Norm *nn.RMSNorm `gguf:"norm,alt:input_norm"`
|
||||||
Linear1 *nn.Linear `gguf:"linear_1"`
|
Linear1 *nn.Linear `gguf:"linear_1,alt:1"`
|
||||||
Linear2 *nn.Linear `gguf:"linear_2"`
|
Linear2 *nn.Linear `gguf:"linear_2,alt:2"`
|
||||||
PatchMerger *PatchMerger `gguf:"patch_merger"`
|
PatchMerger *PatchMerger `gguf:"patch_merger"`
|
||||||
|
|
||||||
spatialMergeSize int
|
spatialMergeSize int
|
||||||
|
|
@ -164,6 +164,10 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
|
||||||
return m.TextModel.Forward(ctx, batch.Inputs, positions, positionsScale, batch.Outputs, batch, m.Cache), nil
|
return m.TextModel.Forward(ctx, batch.Inputs, positions, positionsScale, batch.Outputs, batch, m.Cache), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *Model) IsOnlineProjectorMergingSupported() bool {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
model.Register("mistral3", New)
|
model.Register("mistral3", New)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -87,8 +87,8 @@ type VisionModelOptions struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type VisionModel struct {
|
type VisionModel struct {
|
||||||
PatchEmbedding *nn.Conv2D `gguf:"patch_conv"`
|
PatchEmbedding *nn.Conv2D `gguf:"patch_conv,alt:patch_embd"`
|
||||||
EncoderNorm *nn.RMSNorm `gguf:"encoder_norm"`
|
EncoderNorm *nn.RMSNorm `gguf:"encoder_norm,alt:pre_ln"`
|
||||||
Layers []VisionEncoderLayer `gguf:"blk"`
|
Layers []VisionEncoderLayer `gguf:"blk"`
|
||||||
|
|
||||||
*VisionModelOptions
|
*VisionModelOptions
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package qwen25vl
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"image"
|
"image"
|
||||||
|
"reflect"
|
||||||
"slices"
|
"slices"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs"
|
"github.com/ollama/ollama/fs"
|
||||||
|
|
@ -190,6 +191,28 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
|
||||||
return m.Output.Forward(ctx, hiddenStates), nil
|
return m.Output.Forward(ctx, hiddenStates), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *Model) PostPopulate() {
|
||||||
|
if m.VisionModel.PatchMerger.MLP0.Weight == nil {
|
||||||
|
if tensor := m.Base.Backend().Get("mm.0.weight"); tensor != nil {
|
||||||
|
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.MLP0), []model.Tag{model.ParseTag("mm.0")})
|
||||||
|
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.MLP2), []model.Tag{model.ParseTag("mm.2")})
|
||||||
|
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.LNQ), []model.Tag{model.ParseTag("v.post_ln")})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if m.VisionModel.PatchEmbedding.PatchConv0.Weight == nil {
|
||||||
|
if tensor := m.Base.Backend().Get("v.patch_embd.weight"); tensor != nil {
|
||||||
|
m.VisionModel.PatchEmbedding.PatchConv0.Weight = tensor
|
||||||
|
}
|
||||||
|
if tensor := m.Base.Backend().Get("v.patch_embd.weight.1"); tensor != nil {
|
||||||
|
m.VisionModel.PatchEmbedding.PatchConv1.Weight = tensor
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Model) IsOnlineProjectorMergingSupported() bool {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
model.Register("qwen25vl", New)
|
model.Register("qwen25vl", New)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,9 @@ package qwen3vl
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"fmt"
|
||||||
"image"
|
"image"
|
||||||
|
"reflect"
|
||||||
"slices"
|
"slices"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs"
|
"github.com/ollama/ollama/fs"
|
||||||
|
|
@ -170,6 +172,27 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
|
||||||
return m.Output.Forward(ctx, hiddenStates), nil
|
return m.Output.Forward(ctx, hiddenStates), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *Model) PostPopulate() {
|
||||||
|
if m.VisionModel.PatchMerger.FC1.Weight == nil {
|
||||||
|
if tensor := m.Base.Backend().Get("mm.0.weight"); tensor != nil {
|
||||||
|
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.FC1), []model.Tag{model.ParseTag("mm.0")})
|
||||||
|
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.FC2), []model.Tag{model.ParseTag("mm.2")})
|
||||||
|
model.SetPointer(m.Base, reflect.ValueOf(m.VisionModel.PatchMerger.Norm), []model.Tag{model.ParseTag("v.post_ln")})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i, deepstacks := range m.VisionModel.DeepstackMerger {
|
||||||
|
if deepstacks.FC1.Weight == nil {
|
||||||
|
if tensor := m.Base.Backend().Get(fmt.Sprintf("v.deepstack.%d.weight", m.VisionModel.deepstackVisualIndexes[i])); tensor != nil {
|
||||||
|
model.SetPointer(m.Base, reflect.ValueOf(deepstacks), []model.Tag{model.ParseTag("v.deepstack.%d")})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Model) IsOnlineProjectorMergingSupported() bool {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func New(c fs.Config) (model.Model, error) {
|
func New(c fs.Config) (model.Model, error) {
|
||||||
m := Model{
|
m := Model{
|
||||||
TextProcessor: model.NewBytePairEncoding(
|
TextProcessor: model.NewBytePairEncoding(
|
||||||
|
|
|
||||||
|
|
@ -94,8 +94,8 @@ func (o VisionOptions) headDim() int {
|
||||||
|
|
||||||
type VisionPatchMerger struct {
|
type VisionPatchMerger struct {
|
||||||
Norm *nn.LayerNorm `gguf:"norm"`
|
Norm *nn.LayerNorm `gguf:"norm"`
|
||||||
FC1 *nn.Linear `gguf:"linear_fc1"`
|
FC1 *nn.Linear `gguf:"linear_fc1,alt:fc.fc1"`
|
||||||
FC2 *nn.Linear `gguf:"linear_fc2"`
|
FC2 *nn.Linear `gguf:"linear_fc2,alt:fc.fc2"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *VisionPatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, postshuffleNorm bool, opts VisionOptions) ml.Tensor {
|
func (m *VisionPatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, postshuffleNorm bool, opts VisionOptions) ml.Tensor {
|
||||||
|
|
@ -241,6 +241,13 @@ func (m *VisionModel) Forward(ctx ml.Context, pixelValues ml.Tensor, grid *Grid)
|
||||||
// newVisionModel creates a new instance of the Qwen vision model
|
// newVisionModel creates a new instance of the Qwen vision model
|
||||||
func newVisionModel(c fs.Config) *VisionModel {
|
func newVisionModel(c fs.Config) *VisionModel {
|
||||||
deepstackVisualIndexes := c.Ints("vision.deepstack_visual_indexes")
|
deepstackVisualIndexes := c.Ints("vision.deepstack_visual_indexes")
|
||||||
|
if deepstackVisualIndexes == nil && c.Bools("vision.is_deepstack_layers") != nil {
|
||||||
|
for i, flag := range c.Bools("vision.is_deepstack_layers") {
|
||||||
|
if flag {
|
||||||
|
deepstackVisualIndexes = append(deepstackVisualIndexes, int32(i))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
model := &VisionModel{
|
model := &VisionModel{
|
||||||
Layers: make([]VisionEncoderLayer, c.Uint("vision.block_count", 32)),
|
Layers: make([]VisionEncoderLayer, c.Uint("vision.block_count", 32)),
|
||||||
DeepstackMerger: make([]*VisionPatchMerger, len(deepstackVisualIndexes)),
|
DeepstackMerger: make([]*VisionPatchMerger, len(deepstackVisualIndexes)),
|
||||||
|
|
|
||||||
|
|
@ -1171,6 +1171,7 @@ func (s *Server) allocModel(
|
||||||
mpath string,
|
mpath string,
|
||||||
params ml.BackendParams,
|
params ml.BackendParams,
|
||||||
loraPath []string,
|
loraPath []string,
|
||||||
|
projectorPath string,
|
||||||
parallel int,
|
parallel int,
|
||||||
kvCacheType string,
|
kvCacheType string,
|
||||||
kvSize int,
|
kvSize int,
|
||||||
|
|
@ -1302,7 +1303,7 @@ func (s *Server) load(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|
||||||
s.batchSize = req.BatchSize
|
s.batchSize = req.BatchSize
|
||||||
|
|
||||||
err := s.allocModel(s.modelPath, params, req.LoraPath, req.Parallel, req.KvCacheType, req.KvSize, req.MultiUserCache)
|
err := s.allocModel(s.modelPath, params, req.LoraPath, req.ProjectorPath, req.Parallel, req.KvCacheType, req.KvSize, req.MultiUserCache)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
s.closeModel()
|
s.closeModel()
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue