From b47aa7e75a3249f61c9d8e4219945ad4c7f55854 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 7 Jul 2025 13:10:14 -0700 Subject: [PATCH] ggml: Use assigned layers when reporting loading stats Reporting params.NumGPULayers can be misleading because it is the requested number of layers, not the actual number that is loaded. While they are often the same, there are cases where they might mismatch, such as if the GPU backend is missing. --- ml/backend/ggml/ggml.go | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go index 7d6831eed..243476891 100644 --- a/ml/backend/ggml/ggml.go +++ b/ml/backend/ggml/ggml.go @@ -356,23 +356,25 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) { } // Mimic llama runner logs summarizing layers and memory - slog.Info(fmt.Sprintf("offloading %d repeating layers to GPU", max(0, params.NumGPULayers-1))) gpuLayers := 0 - switch C.ggml_backend_dev_type(output.d) { - case 0: // CPU - slog.Info("offloading output layer to CPU") - case 1: // GPU - slog.Info("offloading output layer to GPU") - gpuLayers++ - case 2: // ACCEL - slog.Info("offloading output layer to ACCEL") - } for _, layer := range layers { - if C.ggml_backend_dev_type(layer.d) == 1 { + if C.ggml_backend_dev_type(layer.d) == C.GGML_BACKEND_DEVICE_TYPE_GPU { gpuLayers++ } } + slog.Info(fmt.Sprintf("offloading %d repeating layers to GPU", gpuLayers)) + + switch C.ggml_backend_dev_type(output.d) { + case C.GGML_BACKEND_DEVICE_TYPE_CPU: + slog.Info("offloading output layer to CPU") + case C.GGML_BACKEND_DEVICE_TYPE_GPU: + slog.Info("offloading output layer to GPU") + gpuLayers++ + case C.GGML_BACKEND_DEVICE_TYPE_ACCEL: + slog.Info("offloading output layer to ACCEL") + } slog.Info(fmt.Sprintf("offloaded %d/%d layers to GPU", gpuLayers, len(layers)+1)) + for bs := range maps.Values(bbs) { slog.Info("model weights", "buffer", C.GoString(C.ggml_backend_buffer_name(bs)), "size", format.HumanBytes2(uint64(C.ggml_backend_buffer_get_size(bs)))) }