From c2e440879aea4b3000cd250165d536c0c56f9936 Mon Sep 17 00:00:00 2001 From: Vadim Grinco Date: Sun, 16 Mar 2025 10:52:49 +0100 Subject: [PATCH] Applied 04-disable-mmap-vulkan.patch From: https://github.com/whyvl/ollama-vulkan/issues/7#issuecomment-2660836871 Signed-off-by: Vadim Grinco --- llm/server.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llm/server.go b/llm/server.go index c6f117125..537cc1e1a 100644 --- a/llm/server.go +++ b/llm/server.go @@ -207,10 +207,12 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a } // Windows CUDA should not use mmap for best performance + // Vulkan should not use mmap because of double allocation (VRAM + RAM) // Linux with a model larger than free space, mmap leads to thrashing // For CPU loads we want the memory to be allocated, not FS cache if (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && opts.UseMMap == nil) || (runtime.GOOS == "linux" && systemFreeMemory < estimate.TotalSize && opts.UseMMap == nil) || + (gpus[0].Library == "vulkan" && opts.UseMMap == nil) || (gpus[0].Library == "cpu" && opts.UseMMap == nil) || (opts.UseMMap != nil && !*opts.UseMMap) { params = append(params, "--no-mmap")