server: more sanity check when loading split gguf

This commit is contained in:
cvrunmin 2025-11-28 15:00:42 +08:00
parent fa9f1eef9d
commit 469ac5b638
5 changed files with 40 additions and 4 deletions

View File

@ -124,7 +124,7 @@ type ollamaServer struct {
// It collects array values for arrays with a size less than or equal to
// maxArraySize. If maxArraySize is 0, the default value of 1024 is used. If
// the maxArraySize is negative, all arrays are collected.
func LoadModel(model string, extraModels []string, maxArraySize int) (*ggml.MetaGGML, error) {
func LoadModel(model string, extraModels []string, maxArraySize int, reliefSplitConstrain bool) (*ggml.MetaGGML, error) {
if _, err := os.Stat(model); err != nil {
return nil, err
}
@ -140,7 +140,11 @@ func LoadModel(model string, extraModels []string, maxArraySize int) (*ggml.Meta
return nil, err
}
if ggml1.KV().GGUFSplitInfo() != nil {
if ggml1.KV().GGUFSplitInfo().No != 0 {
return nil, errors.New("not the first split of model")
}
loadedGgml := []ggml.GGML{*ggml1}
visitedSplitNo := []uint16{ggml1.KV().GGUFSplitInfo().No}
for i := range extraModels {
extraModel := extraModels[i]
f, err := os.Open(extraModel)
@ -153,8 +157,23 @@ func LoadModel(model string, extraModels []string, maxArraySize int) (*ggml.Meta
if err != nil {
return nil, err
}
if ggml1.KV().GGUFSplitInfo() == nil {
return nil, errors.New("non-split gguf in extra model paths while main model path is split gguf")
}
visitedSplitNo = append(visitedSplitNo, ggml1.KV().GGUFSplitInfo().No)
loadedGgml = append(loadedGgml, *ggml1)
}
if !reliefSplitConstrain {
if len(visitedSplitNo) != int(ggml1.KV().GGUFSplitInfo().Count) {
return nil, errors.New("mismatch split gguf count")
}
slices.Sort(visitedSplitNo)
for i := 0; i < len(visitedSplitNo)-1; i++ {
if visitedSplitNo[i] != visitedSplitNo[i+1]-1 {
return nil, errors.New("repeated or skipped split found")
}
}
}
metaggml := ggml.MakeMetaGGML(loadedGgml, append([]string{model}, extraModels...))
return &metaggml, nil
} else {

View File

@ -133,7 +133,11 @@ func New(modelPath string, extraModelPaths []string, params ml.BackendParams) (m
}
var meta fsggml.MetaGGML
if smallmeta.KV().GGUFSplitInfo() != nil {
if smallmeta.KV().GGUFSplitInfo().No != 0 {
return nil, errors.New("not the first split of model")
}
loadedGgml := []fsggml.GGML{*smallmeta}
visitedSplitNo := []uint16{smallmeta.KV().GGUFSplitInfo().No}
for i := range extraModelPaths {
extraModel := extraModelPaths[i]
f, err := os.Open(extraModel)
@ -146,8 +150,21 @@ func New(modelPath string, extraModelPaths []string, params ml.BackendParams) (m
if err != nil {
return nil, err
}
if smallmeta.KV().GGUFSplitInfo() == nil {
return nil, errors.New("non-split gguf in extra model paths while main model path is split gguf")
}
visitedSplitNo = append(visitedSplitNo, smallmeta.KV().GGUFSplitInfo().No)
loadedGgml = append(loadedGgml, *smallmeta)
}
if len(visitedSplitNo) != int(smallmeta.KV().GGUFSplitInfo().Count) {
return nil, errors.New("mismatch split gguf count")
}
slices.Sort(visitedSplitNo)
for i := 0; i < len(visitedSplitNo)-1; i++ {
if visitedSplitNo[i] != visitedSplitNo[i+1]-1 {
return nil, errors.New("repeated or skipped split found")
}
}
meta = fsggml.MakeMetaGGML(loadedGgml, append([]string{modelPath}, extraModelPaths...))
} else {
meta = fsggml.MakeMetaGGML([]fsggml.GGML{*smallmeta}, []string{modelPath})

View File

@ -1187,7 +1187,7 @@ func getModelData(digest string, verbose bool) (ggml.KV, ggml.ForeignTensors, er
if verbose {
maxArraySize = -1
}
data, err := llm.LoadModel(digest, make([]string, 0), maxArraySize)
data, err := llm.LoadModel(digest, make([]string, 0), maxArraySize, true)
if err != nil {
return nil, make(ggml.ForeignTensors, 0), err
}

View File

@ -196,7 +196,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
// Load model for fitting
logutil.Trace("loading model metadata", "model", pending.model.ModelPath)
ggml, err := llm.LoadModel(pending.model.ModelPath, pending.model.ExtraModelPaths, 1024)
ggml, err := llm.LoadModel(pending.model.ModelPath, pending.model.ExtraModelPaths, 1024, false)
if err != nil {
pending.errCh <- err
break

View File

@ -132,7 +132,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, vra
})
model := &Model{Name: modelName, ModelPath: p}
f, err := llm.LoadModel(model.ModelPath, make([]string, 0), 0)
f, err := llm.LoadModel(model.ModelPath, make([]string, 0), 0, true)
if err != nil {
t.Fatal(err)
}