Merge remote-tracking branch 'upstream/main' into vulkanV3
This commit is contained in:
@@ -149,7 +149,11 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||
var textProcessor model.TextProcessor
|
||||
var err error
|
||||
if envconfig.NewEngine() || f.KV().OllamaEngineRequired() {
|
||||
textProcessor, err = model.NewTextProcessor(modelPath)
|
||||
if len(projectors) == 0 {
|
||||
textProcessor, err = model.NewTextProcessor(modelPath)
|
||||
} else {
|
||||
err = errors.New("split vision models aren't supported")
|
||||
}
|
||||
if err != nil {
|
||||
// To prepare for opt-out mode, instead of treating this as an error, we fallback to the old runner
|
||||
slog.Debug("model not yet supported by Ollama engine, switching to compatibility mode", "model", modelPath, "error", err)
|
||||
@@ -162,11 +166,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||
}
|
||||
}
|
||||
|
||||
newEstimates := textProcessor != nil && envconfig.NewMemoryEstimates()
|
||||
if newEstimates {
|
||||
slog.Info("enabling new memory estimates")
|
||||
}
|
||||
|
||||
// Verify the requested context size is <= the model training size
|
||||
trainCtx := f.KV().ContextLength()
|
||||
if opts.NumCtx > int(trainCtx) && trainCtx > 0 {
|
||||
@@ -434,7 +433,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||
}
|
||||
}()
|
||||
|
||||
if newEstimates {
|
||||
if textProcessor != nil {
|
||||
return &ollamaServer{llmServer: s}, nil
|
||||
} else {
|
||||
return &llamaServer{llmServer: s, ggml: f}, nil
|
||||
|
||||
Reference in New Issue
Block a user