Add OLLAMA_NUM_GPU default
This commit is contained in:
parent
598c08d22b
commit
7e2aac6255
|
|
@ -901,7 +901,7 @@ func DefaultOptions() Options {
|
|||
// options set when the model is loaded
|
||||
NumCtx: int(envconfig.ContextLength()),
|
||||
NumBatch: 512,
|
||||
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
|
||||
NumGPU: envconfig.DefaultNumGPU(), // -1 lets the scheduler decide, 0 forces CPU-only
|
||||
NumThread: 0, // let the runtime decide
|
||||
UseMMap: nil,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -248,6 +248,20 @@ var (
|
|||
MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
|
||||
)
|
||||
|
||||
func Int(key string, defaultValue int) func() int {
|
||||
return func() int {
|
||||
if s := Var(key); s != "" {
|
||||
if n, err := strconv.ParseInt(s, 10, 64); err != nil {
|
||||
slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
|
||||
} else {
|
||||
return int(n)
|
||||
}
|
||||
}
|
||||
|
||||
return defaultValue
|
||||
}
|
||||
}
|
||||
|
||||
func Uint64(key string, defaultValue uint64) func() uint64 {
|
||||
return func() uint64 {
|
||||
if s := Var(key); s != "" {
|
||||
|
|
@ -265,6 +279,10 @@ func Uint64(key string, defaultValue uint64) func() uint64 {
|
|||
// Set aside VRAM per GPU
|
||||
var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
|
||||
|
||||
// DefaultNumGPU sets a default number of layers to place on GPU.
|
||||
// -1 lets the scheduler decide, 0 forces CPU-only.
|
||||
var DefaultNumGPU = Int("OLLAMA_NUM_GPU", -1)
|
||||
|
||||
type EnvVar struct {
|
||||
Name string
|
||||
Value any
|
||||
|
|
@ -287,6 +305,7 @@ func AsMap() map[string]EnvVar {
|
|||
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
|
||||
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
|
||||
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
|
||||
"OLLAMA_NUM_GPU": {"OLLAMA_NUM_GPU", DefaultNumGPU(), "Default number of layers to place on GPU (-1 auto, 0 CPU-only)"},
|
||||
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
|
||||
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
|
||||
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
|
||||
|
|
|
|||
Loading…
Reference in New Issue