From d4e0da08907f7611e1a2d9bda319bb30cd4ff029 Mon Sep 17 00:00:00 2001
From: Thomas Stocker <thomas.stocker@gmail.com>
Date: Fri, 7 Nov 2025 00:52:56 +0100
Subject: [PATCH] Remove unnecessary MacOs 13 and lower Patches (#12656)

* Remove unnecessary macos 13 Patch

* Remove unnecessary MacOs Version Guard patch

* rename patchesw

* remove again macos13 patch

* rename files
---
 .../0018-BF16-macos-version-guard.patch       | 28 -------------------
 ...ch => 0018-ggml-Add-batch-size-hint.patch} |  0
 ...fix-mtmd-audio.cpp-build-on-windows.patch} |  0
 ...ble-ggml-blas-on-macos-v13-and-older.patch | 25 -----------------
 ...de.patch => 0020-ggml-No-alloc-mode.patch} |  0
 ...h => 0021-decode-disable-output_all.patch} |  0
 ...ml-Enable-resetting-backend-devices.patch} |  0
 ...den-uncaught-exception-registration.patch} |  0
 ... => 0024-GPU-discovery-enhancements.patch} |  0
 ...ML-fallback-for-unified-memory-GPUs.patch} |  0
 ...=> 0026-report-LoadLibrary-failures.patch} |  0
 ...patch => 0027-interleave-multi-rope.patch} |  0
 ...Add-memory-detection-using-DXGI-PDH.patch} |  0
 .../ggml/ggml/src/ggml-blas/ggml-blas.cpp     |  5 ----
 .../ggml/src/ggml-metal/ggml-metal-context.m  |  7 +----
 15 files changed, 1 insertion(+), 64 deletions(-)
 delete mode 100644 llama/patches/0018-BF16-macos-version-guard.patch
 rename llama/patches/{0019-ggml-Add-batch-size-hint.patch => 0018-ggml-Add-batch-size-hint.patch} (100%)
 rename llama/patches/{0021-fix-mtmd-audio.cpp-build-on-windows.patch => 0019-fix-mtmd-audio.cpp-build-on-windows.patch} (100%)
 delete mode 100644 llama/patches/0020-Disable-ggml-blas-on-macos-v13-and-older.patch
 rename llama/patches/{0022-ggml-No-alloc-mode.patch => 0020-ggml-No-alloc-mode.patch} (100%)
 rename llama/patches/{0023-decode-disable-output_all.patch => 0021-decode-disable-output_all.patch} (100%)
 rename llama/patches/{0024-ggml-Enable-resetting-backend-devices.patch => 0022-ggml-Enable-resetting-backend-devices.patch} (100%)
 rename llama/patches/{0025-harden-uncaught-exception-registration.patch => 0023-harden-uncaught-exception-registration.patch} (100%)
 rename llama/patches/{0026-GPU-discovery-enhancements.patch => 0024-GPU-discovery-enhancements.patch} (100%)
 rename llama/patches/{0027-NVML-fallback-for-unified-memory-GPUs.patch => 0025-NVML-fallback-for-unified-memory-GPUs.patch} (100%)
 rename llama/patches/{0028-report-LoadLibrary-failures.patch => 0026-report-LoadLibrary-failures.patch} (100%)
 rename llama/patches/{0029-interleave-multi-rope.patch => 0027-interleave-multi-rope.patch} (100%)
 rename llama/patches/{0030-Add-memory-detection-using-DXGI-PDH.patch => 0028-Add-memory-detection-using-DXGI-PDH.patch} (100%)

diff --git a/llama/patches/0018-BF16-macos-version-guard.patch b/llama/patches/0018-BF16-macos-version-guard.patch
deleted file mode 100644
index f209c8021..000000000
--- a/llama/patches/0018-BF16-macos-version-guard.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Daniel Hiltgen <daniel@ollama.com>
-Date: Wed, 30 Jul 2025 08:43:46 -0700
-Subject: [PATCH] BF16 macos version guard
-
-Only enable BF16 on supported MacOS versions (v14+)
----
- ggml/src/ggml-metal/ggml-metal-context.m | 7 ++++++-
- 1 file changed, 6 insertions(+), 1 deletion(-)
-
-diff --git a/ggml/src/ggml-metal/ggml-metal-context.m b/ggml/src/ggml-metal/ggml-metal-context.m
-index 052efb7ac..b47dc7879 100644
---- a/ggml/src/ggml-metal/ggml-metal-context.m
-+++ b/ggml/src/ggml-metal/ggml-metal-context.m
-@@ -125,7 +125,12 @@ ggml_metal_t ggml_metal_init(ggml_metal_device_t dev) {
- 
-     res->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
- 
--    res->use_bfloat      = props_dev->has_bfloat;
-+    if (@available(macOS 14.0, *)) {
-+        res->use_bfloat = props_dev->has_bfloat;
-+    } else {
-+        res->use_bfloat = false;
-+    }
-+
-     res->use_fusion      = getenv("GGML_METAL_FUSION_DISABLE") == nil;
-     res->use_concurrency = getenv("GGML_METAL_CONCURRENCY_DISABLE") == nil;
- 
diff --git a/llama/patches/0019-ggml-Add-batch-size-hint.patch b/llama/patches/0018-ggml-Add-batch-size-hint.patch
similarity index 100%
rename from llama/patches/0019-ggml-Add-batch-size-hint.patch
rename to llama/patches/0018-ggml-Add-batch-size-hint.patch
diff --git a/llama/patches/0021-fix-mtmd-audio.cpp-build-on-windows.patch b/llama/patches/0019-fix-mtmd-audio.cpp-build-on-windows.patch
similarity index 100%
rename from llama/patches/0021-fix-mtmd-audio.cpp-build-on-windows.patch
rename to llama/patches/0019-fix-mtmd-audio.cpp-build-on-windows.patch
diff --git a/llama/patches/0020-Disable-ggml-blas-on-macos-v13-and-older.patch b/llama/patches/0020-Disable-ggml-blas-on-macos-v13-and-older.patch
deleted file mode 100644
index 9fbc0b604..000000000
--- a/llama/patches/0020-Disable-ggml-blas-on-macos-v13-and-older.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Daniel Hiltgen <daniel@ollama.com>
-Date: Sun, 3 Aug 2025 10:00:20 -0700
-Subject: [PATCH] Disable ggml-blas on macos v13 and older
-
----
- ggml/src/ggml-blas/ggml-blas.cpp | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/ggml/src/ggml-blas/ggml-blas.cpp b/ggml/src/ggml-blas/ggml-blas.cpp
-index 88d088952..6a38a51a2 100644
---- a/ggml/src/ggml-blas/ggml-blas.cpp
-+++ b/ggml/src/ggml-blas/ggml-blas.cpp
-@@ -507,6 +507,11 @@ static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = {
- };
- 
- ggml_backend_reg_t ggml_backend_blas_reg(void) {
-+    // MacOS prior to v14 does not include cblas_sgemm - disable this backend if it isn't available
-+    if (&cblas_sgemm == NULL) {
-+        GGML_LOG_INFO("Disabling ggml-blas backend on old MacOS version\n");
-+        return NULL;
-+    }
-     static struct ggml_backend_reg ggml_backend_blas_reg = {
-         /* .api_version = */ GGML_BACKEND_API_VERSION,
-         /* .iface       = */ ggml_backend_blas_reg_i,
diff --git a/llama/patches/0022-ggml-No-alloc-mode.patch b/llama/patches/0020-ggml-No-alloc-mode.patch
similarity index 100%
rename from llama/patches/0022-ggml-No-alloc-mode.patch
rename to llama/patches/0020-ggml-No-alloc-mode.patch
diff --git a/llama/patches/0023-decode-disable-output_all.patch b/llama/patches/0021-decode-disable-output_all.patch
similarity index 100%
rename from llama/patches/0023-decode-disable-output_all.patch
rename to llama/patches/0021-decode-disable-output_all.patch
diff --git a/llama/patches/0024-ggml-Enable-resetting-backend-devices.patch b/llama/patches/0022-ggml-Enable-resetting-backend-devices.patch
similarity index 100%
rename from llama/patches/0024-ggml-Enable-resetting-backend-devices.patch
rename to llama/patches/0022-ggml-Enable-resetting-backend-devices.patch
diff --git a/llama/patches/0025-harden-uncaught-exception-registration.patch b/llama/patches/0023-harden-uncaught-exception-registration.patch
similarity index 100%
rename from llama/patches/0025-harden-uncaught-exception-registration.patch
rename to llama/patches/0023-harden-uncaught-exception-registration.patch
diff --git a/llama/patches/0026-GPU-discovery-enhancements.patch b/llama/patches/0024-GPU-discovery-enhancements.patch
similarity index 100%
rename from llama/patches/0026-GPU-discovery-enhancements.patch
rename to llama/patches/0024-GPU-discovery-enhancements.patch
diff --git a/llama/patches/0027-NVML-fallback-for-unified-memory-GPUs.patch b/llama/patches/0025-NVML-fallback-for-unified-memory-GPUs.patch
similarity index 100%
rename from llama/patches/0027-NVML-fallback-for-unified-memory-GPUs.patch
rename to llama/patches/0025-NVML-fallback-for-unified-memory-GPUs.patch
diff --git a/llama/patches/0028-report-LoadLibrary-failures.patch b/llama/patches/0026-report-LoadLibrary-failures.patch
similarity index 100%
rename from llama/patches/0028-report-LoadLibrary-failures.patch
rename to llama/patches/0026-report-LoadLibrary-failures.patch
diff --git a/llama/patches/0029-interleave-multi-rope.patch b/llama/patches/0027-interleave-multi-rope.patch
similarity index 100%
rename from llama/patches/0029-interleave-multi-rope.patch
rename to llama/patches/0027-interleave-multi-rope.patch
diff --git a/llama/patches/0030-Add-memory-detection-using-DXGI-PDH.patch b/llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch
similarity index 100%
rename from llama/patches/0030-Add-memory-detection-using-DXGI-PDH.patch
rename to llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch
diff --git a/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp b/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp
index 6a38a51a2..88d088952 100644
--- a/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp
@@ -507,11 +507,6 @@ static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = {
 };
 
 ggml_backend_reg_t ggml_backend_blas_reg(void) {
-    // MacOS prior to v14 does not include cblas_sgemm - disable this backend if it isn't available
-    if (&cblas_sgemm == NULL) {
-        GGML_LOG_INFO("Disabling ggml-blas backend on old MacOS version\n");
-        return NULL;
-    }
     static struct ggml_backend_reg ggml_backend_blas_reg = {
         /* .api_version = */ GGML_BACKEND_API_VERSION,
         /* .iface       = */ ggml_backend_blas_reg_i,
diff --git a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-context.m b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-context.m
index b47dc7879..052efb7ac 100644
--- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-context.m
+++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-context.m
@@ -125,12 +125,7 @@ ggml_metal_t ggml_metal_init(ggml_metal_device_t dev) {
 
     res->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
 
-    if (@available(macOS 14.0, *)) {
-        res->use_bfloat = props_dev->has_bfloat;
-    } else {
-        res->use_bfloat = false;
-    }
-
+    res->use_bfloat      = props_dev->has_bfloat;
     res->use_fusion      = getenv("GGML_METAL_FUSION_DISABLE") == nil;
     res->use_concurrency = getenv("GGML_METAL_CONCURRENCY_DISABLE") == nil;