From 469ac5b63843974a204d076bf982a99cdf56ce38 Mon Sep 17 00:00:00 2001 From: cvrunmin Date: Fri, 28 Nov 2025 15:00:42 +0800 Subject: [PATCH] server: more sanity check when loading split gguf --- llm/server.go | 21 ++++++++++++++++++++- ml/backend/ggml/ggml.go | 17 +++++++++++++++++ server/routes.go | 2 +- server/sched.go | 2 +- server/sched_test.go | 2 +- 5 files changed, 40 insertions(+), 4 deletions(-) diff --git a/llm/server.go b/llm/server.go index e3decfc72..7eaa53c98 100644 --- a/llm/server.go +++ b/llm/server.go @@ -124,7 +124,7 @@ type ollamaServer struct { // It collects array values for arrays with a size less than or equal to // maxArraySize. If maxArraySize is 0, the default value of 1024 is used. If // the maxArraySize is negative, all arrays are collected. -func LoadModel(model string, extraModels []string, maxArraySize int) (*ggml.MetaGGML, error) { +func LoadModel(model string, extraModels []string, maxArraySize int, reliefSplitConstrain bool) (*ggml.MetaGGML, error) { if _, err := os.Stat(model); err != nil { return nil, err } @@ -140,7 +140,11 @@ func LoadModel(model string, extraModels []string, maxArraySize int) (*ggml.Meta return nil, err } if ggml1.KV().GGUFSplitInfo() != nil { + if ggml1.KV().GGUFSplitInfo().No != 0 { + return nil, errors.New("not the first split of model") + } loadedGgml := []ggml.GGML{*ggml1} + visitedSplitNo := []uint16{ggml1.KV().GGUFSplitInfo().No} for i := range extraModels { extraModel := extraModels[i] f, err := os.Open(extraModel) @@ -153,8 +157,23 @@ func LoadModel(model string, extraModels []string, maxArraySize int) (*ggml.Meta if err != nil { return nil, err } + if ggml1.KV().GGUFSplitInfo() == nil { + return nil, errors.New("non-split gguf in extra model paths while main model path is split gguf") + } + visitedSplitNo = append(visitedSplitNo, ggml1.KV().GGUFSplitInfo().No) loadedGgml = append(loadedGgml, *ggml1) } + if !reliefSplitConstrain { + if len(visitedSplitNo) != int(ggml1.KV().GGUFSplitInfo().Count) { + return nil, errors.New("mismatch split gguf count") + } + slices.Sort(visitedSplitNo) + for i := 0; i < len(visitedSplitNo)-1; i++ { + if visitedSplitNo[i] != visitedSplitNo[i+1]-1 { + return nil, errors.New("repeated or skipped split found") + } + } + } metaggml := ggml.MakeMetaGGML(loadedGgml, append([]string{model}, extraModels...)) return &metaggml, nil } else { diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go index 33f683dfd..dcace4968 100644 --- a/ml/backend/ggml/ggml.go +++ b/ml/backend/ggml/ggml.go @@ -133,7 +133,11 @@ func New(modelPath string, extraModelPaths []string, params ml.BackendParams) (m } var meta fsggml.MetaGGML if smallmeta.KV().GGUFSplitInfo() != nil { + if smallmeta.KV().GGUFSplitInfo().No != 0 { + return nil, errors.New("not the first split of model") + } loadedGgml := []fsggml.GGML{*smallmeta} + visitedSplitNo := []uint16{smallmeta.KV().GGUFSplitInfo().No} for i := range extraModelPaths { extraModel := extraModelPaths[i] f, err := os.Open(extraModel) @@ -146,8 +150,21 @@ func New(modelPath string, extraModelPaths []string, params ml.BackendParams) (m if err != nil { return nil, err } + if smallmeta.KV().GGUFSplitInfo() == nil { + return nil, errors.New("non-split gguf in extra model paths while main model path is split gguf") + } + visitedSplitNo = append(visitedSplitNo, smallmeta.KV().GGUFSplitInfo().No) loadedGgml = append(loadedGgml, *smallmeta) } + if len(visitedSplitNo) != int(smallmeta.KV().GGUFSplitInfo().Count) { + return nil, errors.New("mismatch split gguf count") + } + slices.Sort(visitedSplitNo) + for i := 0; i < len(visitedSplitNo)-1; i++ { + if visitedSplitNo[i] != visitedSplitNo[i+1]-1 { + return nil, errors.New("repeated or skipped split found") + } + } meta = fsggml.MakeMetaGGML(loadedGgml, append([]string{modelPath}, extraModelPaths...)) } else { meta = fsggml.MakeMetaGGML([]fsggml.GGML{*smallmeta}, []string{modelPath}) diff --git a/server/routes.go b/server/routes.go index a2b2810ca..cc14a9a17 100644 --- a/server/routes.go +++ b/server/routes.go @@ -1187,7 +1187,7 @@ func getModelData(digest string, verbose bool) (ggml.KV, ggml.ForeignTensors, er if verbose { maxArraySize = -1 } - data, err := llm.LoadModel(digest, make([]string, 0), maxArraySize) + data, err := llm.LoadModel(digest, make([]string, 0), maxArraySize, true) if err != nil { return nil, make(ggml.ForeignTensors, 0), err } diff --git a/server/sched.go b/server/sched.go index 11f702d75..814cc7828 100644 --- a/server/sched.go +++ b/server/sched.go @@ -196,7 +196,7 @@ func (s *Scheduler) processPending(ctx context.Context) { // Load model for fitting logutil.Trace("loading model metadata", "model", pending.model.ModelPath) - ggml, err := llm.LoadModel(pending.model.ModelPath, pending.model.ExtraModelPaths, 1024) + ggml, err := llm.LoadModel(pending.model.ModelPath, pending.model.ExtraModelPaths, 1024, false) if err != nil { pending.errCh <- err break diff --git a/server/sched_test.go b/server/sched_test.go index 69546242c..f18575080 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -132,7 +132,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, vra }) model := &Model{Name: modelName, ModelPath: p} - f, err := llm.LoadModel(model.ModelPath, make([]string, 0), 0) + f, err := llm.LoadModel(model.ModelPath, make([]string, 0), 0, true) if err != nil { t.Fatal(err) }