From cee4922649680c5e7a435c805b981c687b69ce82 Mon Sep 17 00:00:00 2001 From: nicole pardal Date: Fri, 21 Nov 2025 16:29:56 -0800 Subject: [PATCH] added batch fix --- llm/server.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llm/server.go b/llm/server.go index 4eaa88df0..dca1a19d3 100644 --- a/llm/server.go +++ b/llm/server.go @@ -170,6 +170,11 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st opts.NumBatch = min(opts.NumBatch, opts.NumCtx) + if f.KV().Architecture() == "nomic-bert" { + opts.NumBatch = opts.NumCtx + slog.Debug("nomic-bert model detected, setting batch size equal to context length", "num_batch", opts.NumBatch, "num_ctx", opts.NumCtx) + } + loadRequest := LoadRequest{LoraPath: adapters, KvSize: opts.NumCtx * numParallel, BatchSize: opts.NumBatch, Parallel: numParallel, MultiUserCache: envconfig.MultiUserCache()} defaultThreads := systemInfo.ThreadCount