ml/backend/ggml: fix crash on dlopen for non-AVX systems (#8976 )

readme: add Lunary to observability community integrations (#8975 )
2025-02-10 09:52:12 -08:00 · 2025-02-09 22:08:46 -08:00
3 changed files with 65 additions and 9 deletions
--- a/README.md
+++ b/README.md
@@ -551,7 +551,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.

 ### Observability
-
+- [Lunary](https://lunary.ai/docs/integrations/ollama) is the leading open-source LLM observability platform. It provides a variety of enterprise-grade features such as real-time analytics, prompt templates management, PII masking, and comprehensive agent tracing.
 - [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics.
 - [HoneyHive](https://docs.honeyhive.ai/integrations/ollama) is an AI observability and evaluation platform for AI agents. Use HoneyHive to evaluate agent performance, interrogate failures, and monitor quality in production.
 - [Langfuse](https://langfuse.com/docs/integrations/ollama) is an open source LLM observability platform that enables teams to collaboratively monitor, evaluate and debug AI applications.
--- a/llama/patches/0016-remove-sgemm-global-variables.patch
+++ b/llama/patches/0016-remove-sgemm-global-variables.patch
@@ -0,0 +1,55 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: jmorganca <jmorganca@gmail.com>
+Date: Sun, 9 Feb 2025 17:22:15 -0800
+Subject: [PATCH] remove sgemm global variables
+
+removes the 'iq4nlt' global variable in sgemm.cpp that causes
+a runtime crash when calling dlopen on ggml-cpu libraries as
+its initialization depends on AVX instructions the host machine
+may not have
+---
+ ggml/src/ggml-cpu/llamafile/sgemm.cpp | 17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ggml/src/ggml-cpu/llamafile/sgemm.cpp
+index 8fce576c..3f260ce5 100644
+--- a/ggml/src/ggml-cpu/llamafile/sgemm.cpp
+++ b/ggml/src/ggml-cpu/llamafile/sgemm.cpp
+@@ -279,14 +279,6 @@ template <> inline __m256bh load(const float *p) {
+ }
+ #endif
+ 
+-////////////////////////////////////////////////////////////////////////////////////////////////////
+-// CONSTANTS
+-
+-#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__)
+-static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
+-static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl);
+-#endif
+-
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
+ // FLOATING POINT MATRIX MULTIPLICATION
+ 
+@@ -613,6 +605,14 @@ class tinyBLAS_Q0_AVX {
+                     TC *C, int64_t ldc,
+                     int ith, int nth)
+         : A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
+        const int8_t kvalues_iq4nl[16] = {
+            -127, -104, -83, -65,
+            -49,  -35,  -22, -10,
+              1,   13,   25,  38,
+             53,   69,   89, 113
+        };
+
+        iq4nlt = _mm_loadu_si128((const __m128i *)kvalues_iq4nl);
+     }
+ 
+     void matmul(int64_t m, int64_t n) {
+@@ -1037,6 +1037,7 @@ class tinyBLAS_Q0_AVX {
+     const int64_t ldc;
+     const int ith;
+     const int nth;
+    __m128i iq4nlt;
+ };
+ #endif // __AVX__
+ 
--- a/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.cpp
@@ -279,14 +279,6 @@ template <> inline __m256bh load(const float *p) {
 }
 #endif

-////////////////////////////////////////////////////////////////////////////////////////////////////
-// CONSTANTS
-
-#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__)
-static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
-static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl);
-#endif
-
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // FLOATING POINT MATRIX MULTIPLICATION

@@ -613,6 +605,14 @@ class tinyBLAS_Q0_AVX {
                    TC *C, int64_t ldc,
                    int ith, int nth)
        : A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
+        const int8_t kvalues_iq4nl[16] = {
+            -127, -104, -83, -65,
+            -49,  -35,  -22, -10,
+              1,   13,   25,  38,
+             53,   69,   89, 113
+        };
+
+        iq4nlt = _mm_loadu_si128((const __m128i *)kvalues_iq4nl);
    }

    void matmul(int64_t m, int64_t n) {
@@ -1037,6 +1037,7 @@ class tinyBLAS_Q0_AVX {
    const int64_t ldc;
    const int ith;
    const int nth;
+    __m128i iq4nlt;
 };
 #endif // __AVX__
Author	SHA1	Message	Date
Jeffrey Morgan	f4711da7bd	ml/backend/ggml: fix crash on dlopen for non-AVX systems (#8976 )	2025-02-10 09:52:12 -08:00
Hugues Chocart	38117fba83	readme: add Lunary to observability community integrations (#8975 )	2025-02-09 22:08:46 -08:00