Add OLLAMA_NUM_GPU default

This commit is contained in:
marx161-cmd 2026-01-01 21:18:53 +01:00
parent 598c08d22b
commit 7e2aac6255
2 changed files with 20 additions and 1 deletions

View File

@ -901,7 +901,7 @@ func DefaultOptions() Options {
// options set when the model is loaded
NumCtx: int(envconfig.ContextLength()),
NumBatch: 512,
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
NumGPU: envconfig.DefaultNumGPU(), // -1 lets the scheduler decide, 0 forces CPU-only
NumThread: 0, // let the runtime decide
UseMMap: nil,
},

View File

@ -248,6 +248,20 @@ var (
MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
)
func Int(key string, defaultValue int) func() int {
return func() int {
if s := Var(key); s != "" {
if n, err := strconv.ParseInt(s, 10, 64); err != nil {
slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
} else {
return int(n)
}
}
return defaultValue
}
}
func Uint64(key string, defaultValue uint64) func() uint64 {
return func() uint64 {
if s := Var(key); s != "" {
@ -265,6 +279,10 @@ func Uint64(key string, defaultValue uint64) func() uint64 {
// Set aside VRAM per GPU
var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
// DefaultNumGPU sets a default number of layers to place on GPU.
// -1 lets the scheduler decide, 0 forces CPU-only.
var DefaultNumGPU = Int("OLLAMA_NUM_GPU", -1)
type EnvVar struct {
Name string
Value any
@ -287,6 +305,7 @@ func AsMap() map[string]EnvVar {
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
"OLLAMA_NUM_GPU": {"OLLAMA_NUM_GPU", DefaultNumGPU(), "Default number of layers to place on GPU (-1 auto, 0 CPU-only)"},
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},