server: more sanity check when loading split gguf
This commit is contained in:
parent
fa9f1eef9d
commit
469ac5b638
|
|
@ -124,7 +124,7 @@ type ollamaServer struct {
|
|||
// It collects array values for arrays with a size less than or equal to
|
||||
// maxArraySize. If maxArraySize is 0, the default value of 1024 is used. If
|
||||
// the maxArraySize is negative, all arrays are collected.
|
||||
func LoadModel(model string, extraModels []string, maxArraySize int) (*ggml.MetaGGML, error) {
|
||||
func LoadModel(model string, extraModels []string, maxArraySize int, reliefSplitConstrain bool) (*ggml.MetaGGML, error) {
|
||||
if _, err := os.Stat(model); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
@ -140,7 +140,11 @@ func LoadModel(model string, extraModels []string, maxArraySize int) (*ggml.Meta
|
|||
return nil, err
|
||||
}
|
||||
if ggml1.KV().GGUFSplitInfo() != nil {
|
||||
if ggml1.KV().GGUFSplitInfo().No != 0 {
|
||||
return nil, errors.New("not the first split of model")
|
||||
}
|
||||
loadedGgml := []ggml.GGML{*ggml1}
|
||||
visitedSplitNo := []uint16{ggml1.KV().GGUFSplitInfo().No}
|
||||
for i := range extraModels {
|
||||
extraModel := extraModels[i]
|
||||
f, err := os.Open(extraModel)
|
||||
|
|
@ -153,8 +157,23 @@ func LoadModel(model string, extraModels []string, maxArraySize int) (*ggml.Meta
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if ggml1.KV().GGUFSplitInfo() == nil {
|
||||
return nil, errors.New("non-split gguf in extra model paths while main model path is split gguf")
|
||||
}
|
||||
visitedSplitNo = append(visitedSplitNo, ggml1.KV().GGUFSplitInfo().No)
|
||||
loadedGgml = append(loadedGgml, *ggml1)
|
||||
}
|
||||
if !reliefSplitConstrain {
|
||||
if len(visitedSplitNo) != int(ggml1.KV().GGUFSplitInfo().Count) {
|
||||
return nil, errors.New("mismatch split gguf count")
|
||||
}
|
||||
slices.Sort(visitedSplitNo)
|
||||
for i := 0; i < len(visitedSplitNo)-1; i++ {
|
||||
if visitedSplitNo[i] != visitedSplitNo[i+1]-1 {
|
||||
return nil, errors.New("repeated or skipped split found")
|
||||
}
|
||||
}
|
||||
}
|
||||
metaggml := ggml.MakeMetaGGML(loadedGgml, append([]string{model}, extraModels...))
|
||||
return &metaggml, nil
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -133,7 +133,11 @@ func New(modelPath string, extraModelPaths []string, params ml.BackendParams) (m
|
|||
}
|
||||
var meta fsggml.MetaGGML
|
||||
if smallmeta.KV().GGUFSplitInfo() != nil {
|
||||
if smallmeta.KV().GGUFSplitInfo().No != 0 {
|
||||
return nil, errors.New("not the first split of model")
|
||||
}
|
||||
loadedGgml := []fsggml.GGML{*smallmeta}
|
||||
visitedSplitNo := []uint16{smallmeta.KV().GGUFSplitInfo().No}
|
||||
for i := range extraModelPaths {
|
||||
extraModel := extraModelPaths[i]
|
||||
f, err := os.Open(extraModel)
|
||||
|
|
@ -146,8 +150,21 @@ func New(modelPath string, extraModelPaths []string, params ml.BackendParams) (m
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if smallmeta.KV().GGUFSplitInfo() == nil {
|
||||
return nil, errors.New("non-split gguf in extra model paths while main model path is split gguf")
|
||||
}
|
||||
visitedSplitNo = append(visitedSplitNo, smallmeta.KV().GGUFSplitInfo().No)
|
||||
loadedGgml = append(loadedGgml, *smallmeta)
|
||||
}
|
||||
if len(visitedSplitNo) != int(smallmeta.KV().GGUFSplitInfo().Count) {
|
||||
return nil, errors.New("mismatch split gguf count")
|
||||
}
|
||||
slices.Sort(visitedSplitNo)
|
||||
for i := 0; i < len(visitedSplitNo)-1; i++ {
|
||||
if visitedSplitNo[i] != visitedSplitNo[i+1]-1 {
|
||||
return nil, errors.New("repeated or skipped split found")
|
||||
}
|
||||
}
|
||||
meta = fsggml.MakeMetaGGML(loadedGgml, append([]string{modelPath}, extraModelPaths...))
|
||||
} else {
|
||||
meta = fsggml.MakeMetaGGML([]fsggml.GGML{*smallmeta}, []string{modelPath})
|
||||
|
|
|
|||
|
|
@ -1187,7 +1187,7 @@ func getModelData(digest string, verbose bool) (ggml.KV, ggml.ForeignTensors, er
|
|||
if verbose {
|
||||
maxArraySize = -1
|
||||
}
|
||||
data, err := llm.LoadModel(digest, make([]string, 0), maxArraySize)
|
||||
data, err := llm.LoadModel(digest, make([]string, 0), maxArraySize, true)
|
||||
if err != nil {
|
||||
return nil, make(ggml.ForeignTensors, 0), err
|
||||
}
|
||||
|
|
|
|||
|
|
@ -196,7 +196,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
|||
|
||||
// Load model for fitting
|
||||
logutil.Trace("loading model metadata", "model", pending.model.ModelPath)
|
||||
ggml, err := llm.LoadModel(pending.model.ModelPath, pending.model.ExtraModelPaths, 1024)
|
||||
ggml, err := llm.LoadModel(pending.model.ModelPath, pending.model.ExtraModelPaths, 1024, false)
|
||||
if err != nil {
|
||||
pending.errCh <- err
|
||||
break
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, vra
|
|||
})
|
||||
|
||||
model := &Model{Name: modelName, ModelPath: p}
|
||||
f, err := llm.LoadModel(model.ModelPath, make([]string, 0), 0)
|
||||
f, err := llm.LoadModel(model.ModelPath, make([]string, 0), 0, true)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue