From 7e2aac6255cb80e0a347f7022dc025e315d23f77 Mon Sep 17 00:00:00 2001 From: marx161-cmd <222994158+marx161-cmd@users.noreply.github.com> Date: Thu, 1 Jan 2026 21:18:53 +0100 Subject: [PATCH] Add OLLAMA_NUM_GPU default --- api/types.go | 2 +- envconfig/config.go | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/api/types.go b/api/types.go index 63b898975..86a29289d 100644 --- a/api/types.go +++ b/api/types.go @@ -901,7 +901,7 @@ func DefaultOptions() Options { // options set when the model is loaded NumCtx: int(envconfig.ContextLength()), NumBatch: 512, - NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically + NumGPU: envconfig.DefaultNumGPU(), // -1 lets the scheduler decide, 0 forces CPU-only NumThread: 0, // let the runtime decide UseMMap: nil, }, diff --git a/envconfig/config.go b/envconfig/config.go index 238e5e6e1..47a7e7818 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -248,6 +248,20 @@ var ( MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512) ) +func Int(key string, defaultValue int) func() int { + return func() int { + if s := Var(key); s != "" { + if n, err := strconv.ParseInt(s, 10, 64); err != nil { + slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue) + } else { + return int(n) + } + } + + return defaultValue + } +} + func Uint64(key string, defaultValue uint64) func() uint64 { return func() uint64 { if s := Var(key); s != "" { @@ -265,6 +279,10 @@ func Uint64(key string, defaultValue uint64) func() uint64 { // Set aside VRAM per GPU var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0) +// DefaultNumGPU sets a default number of layers to place on GPU. +// -1 lets the scheduler decide, 0 forces CPU-only. +var DefaultNumGPU = Int("OLLAMA_NUM_GPU", -1) + type EnvVar struct { Name string Value any @@ -287,6 +305,7 @@ func AsMap() map[string]EnvVar { "OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"}, "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"}, "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"}, + "OLLAMA_NUM_GPU": {"OLLAMA_NUM_GPU", DefaultNumGPU(), "Default number of layers to place on GPU (-1 auto, 0 CPU-only)"}, "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"}, "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"}, "OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},