From 7689aded24f76cb02e7bb1c34ad5110a135a9e4b Mon Sep 17 00:00:00 2001 From: Shalini Salomi Bodapati Date: Thu, 11 Dec 2025 07:37:33 -0600 Subject: [PATCH] ggml-cpu: Enbale Matrix Math Accelerator for Power10 Adding -mcpu=power10 improves matrix multiplication performance when running Ollama on PowerPC-based hardware. -mcpu=power10 needs to be added in llamafile.go, so that powerpc optimized code(using Matrix Multiply Assist) for llamafile_sgemm is enabled and is available in ollama binary. This changes adds -mcpu=power10 flag when built with build tag ppc64le.power10 and this enables mma optimizations in ollama binary -mcpu=power9 flag is added when built with build tag ppc64le.power9 and this enables vsx optimizations in ollama binary. When building on power10 machine use go build --tags ppc64le.power10 . When building on power9 machine use go build --tags ppc64le.power9 . Performance Impact: Improved performance on Power10 Chips for Q4_0,Q8_0,FP32,BF16 Models. Inference time with ollama run llama3:8b ( Q4_0 Model) ( ~ 30% less time for a 50 word summarization of a prompt with 512 tokens. with MMA enabled : 6.05 sec without MMA (Base) : 8.45 sec Signed-off-by: Shalini Salomi Bodapati --- .../src/ggml-cpu/llamafile/llamafile_ppc64le_power10.go | 7 +++++++ .../src/ggml-cpu/llamafile/llamafile_ppc64le_power9.go | 7 +++++++ 2 files changed, 14 insertions(+) create mode 100644 ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile_ppc64le_power10.go create mode 100644 ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile_ppc64le_power9.go diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile_ppc64le_power10.go b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile_ppc64le_power10.go new file mode 100644 index 000000000..0aaec68b2 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile_ppc64le_power10.go @@ -0,0 +1,7 @@ +// +build ppc64le.power10 + +package llamafile + +// #cgo CXXFLAGS: -std=c++17 -mcpu=power10 +// #cgo CPPFLAGS: -I${SRCDIR}/.. -I${SRCDIR}/../.. -I${SRCDIR}/../../../include +import "C" diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile_ppc64le_power9.go b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile_ppc64le_power9.go new file mode 100644 index 000000000..6bdd69780 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile_ppc64le_power9.go @@ -0,0 +1,7 @@ +// +build ppc64le.power9 + +package llamafile + +// #cgo CXXFLAGS: -std=c++17 -mcpu=power9 +// #cgo CPPFLAGS: -I${SRCDIR}/.. -I${SRCDIR}/../.. -I${SRCDIR}/../../../include +import "C"