log: warn if user overrides detected (#13088)

Many failed GPU discovery issues recently can be traced to incorrect override settings.
This extra logging should help quickly spot these and guide users to try unsetting them first.
This commit is contained in:
Daniel Hiltgen 2025-11-14 14:36:28 -08:00 committed by GitHub
parent ce29f695b4
commit 72ff5b9d8c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 25 additions and 0 deletions

View File

@ -65,6 +65,10 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
} }
slog.Info("discovering available GPUs...") slog.Info("discovering available GPUs...")
// Warn if any user-overrides are set which could lead to incorrect GPU discovery
overrideWarnings()
requested := envconfig.LLMLibrary() requested := envconfig.LLMLibrary()
jetpack := cudaJetpack() jetpack := cudaJetpack()
@ -449,3 +453,24 @@ func bootstrapDevices(ctx context.Context, ollamaLibDirs []string, extraEnvs map
return devices return devices
} }
func overrideWarnings() {
anyFound := false
m := envconfig.AsMap()
for _, k := range []string{
"CUDA_VISIBLE_DEVICES",
"HIP_VISIBLE_DEVICES",
"ROCR_VISIBLE_DEVICES",
"GGML_VK_VISIBLE_DEVICES",
"GPU_DEVICE_ORDINAL",
"HSA_OVERRIDE_GFX_VERSION",
} {
if e, found := m[k]; found && e.Value != "" {
anyFound = true
slog.Warn("user override visible devices", k, e.Value)
}
}
if anyFound {
slog.Warn("if GPUs are not correctly discovered, unset and try again")
}
}