ggml: Disable unused pipeline parallelism

We're not currently using it, even in cases where we could. Disabling
it improves generation performance by 10-30% with multiple GPUs.
This commit is contained in:
Jesse Gross 2025-07-10 16:55:34 -07:00 committed by Ryan Schumacher
parent 39cec5338a
commit 015e39a8be
No known key found for this signature in database
1 changed files with 1 additions and 1 deletions

View File

@ -418,7 +418,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
(*C.ggml_backend_buffer_type_t)(unsafe.Pointer(&schedBufts[0])),
C.int(len(schedBackends)),
C.size_t(maxGraphNodes),
C._Bool(len(gpus) > 1 && slices.Contains(gpus, output.d)),
C._Bool(false),
C._Bool(false),
),
schedBackends: schedBackends,