From d4e0da08907f7611e1a2d9bda319bb30cd4ff029 Mon Sep 17 00:00:00 2001 From: Thomas Stocker Date: Fri, 7 Nov 2025 00:52:56 +0100 Subject: [PATCH] Remove unnecessary MacOs 13 and lower Patches (#12656) * Remove unnecessary macos 13 Patch * Remove unnecessary MacOs Version Guard patch * rename patchesw * remove again macos13 patch * rename files --- .../0018-BF16-macos-version-guard.patch | 28 ------------------- ...ch => 0018-ggml-Add-batch-size-hint.patch} | 0 ...fix-mtmd-audio.cpp-build-on-windows.patch} | 0 ...ble-ggml-blas-on-macos-v13-and-older.patch | 25 ----------------- ...de.patch => 0020-ggml-No-alloc-mode.patch} | 0 ...h => 0021-decode-disable-output_all.patch} | 0 ...ml-Enable-resetting-backend-devices.patch} | 0 ...den-uncaught-exception-registration.patch} | 0 ... => 0024-GPU-discovery-enhancements.patch} | 0 ...ML-fallback-for-unified-memory-GPUs.patch} | 0 ...=> 0026-report-LoadLibrary-failures.patch} | 0 ...patch => 0027-interleave-multi-rope.patch} | 0 ...Add-memory-detection-using-DXGI-PDH.patch} | 0 .../ggml/ggml/src/ggml-blas/ggml-blas.cpp | 5 ---- .../ggml/src/ggml-metal/ggml-metal-context.m | 7 +---- 15 files changed, 1 insertion(+), 64 deletions(-) delete mode 100644 llama/patches/0018-BF16-macos-version-guard.patch rename llama/patches/{0019-ggml-Add-batch-size-hint.patch => 0018-ggml-Add-batch-size-hint.patch} (100%) rename llama/patches/{0021-fix-mtmd-audio.cpp-build-on-windows.patch => 0019-fix-mtmd-audio.cpp-build-on-windows.patch} (100%) delete mode 100644 llama/patches/0020-Disable-ggml-blas-on-macos-v13-and-older.patch rename llama/patches/{0022-ggml-No-alloc-mode.patch => 0020-ggml-No-alloc-mode.patch} (100%) rename llama/patches/{0023-decode-disable-output_all.patch => 0021-decode-disable-output_all.patch} (100%) rename llama/patches/{0024-ggml-Enable-resetting-backend-devices.patch => 0022-ggml-Enable-resetting-backend-devices.patch} (100%) rename llama/patches/{0025-harden-uncaught-exception-registration.patch => 0023-harden-uncaught-exception-registration.patch} (100%) rename llama/patches/{0026-GPU-discovery-enhancements.patch => 0024-GPU-discovery-enhancements.patch} (100%) rename llama/patches/{0027-NVML-fallback-for-unified-memory-GPUs.patch => 0025-NVML-fallback-for-unified-memory-GPUs.patch} (100%) rename llama/patches/{0028-report-LoadLibrary-failures.patch => 0026-report-LoadLibrary-failures.patch} (100%) rename llama/patches/{0029-interleave-multi-rope.patch => 0027-interleave-multi-rope.patch} (100%) rename llama/patches/{0030-Add-memory-detection-using-DXGI-PDH.patch => 0028-Add-memory-detection-using-DXGI-PDH.patch} (100%) diff --git a/llama/patches/0018-BF16-macos-version-guard.patch b/llama/patches/0018-BF16-macos-version-guard.patch deleted file mode 100644 index f209c8021..000000000 --- a/llama/patches/0018-BF16-macos-version-guard.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Daniel Hiltgen -Date: Wed, 30 Jul 2025 08:43:46 -0700 -Subject: [PATCH] BF16 macos version guard - -Only enable BF16 on supported MacOS versions (v14+) ---- - ggml/src/ggml-metal/ggml-metal-context.m | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/ggml/src/ggml-metal/ggml-metal-context.m b/ggml/src/ggml-metal/ggml-metal-context.m -index 052efb7ac..b47dc7879 100644 ---- a/ggml/src/ggml-metal/ggml-metal-context.m -+++ b/ggml/src/ggml-metal/ggml-metal-context.m -@@ -125,7 +125,12 @@ ggml_metal_t ggml_metal_init(ggml_metal_device_t dev) { - - res->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); - -- res->use_bfloat = props_dev->has_bfloat; -+ if (@available(macOS 14.0, *)) { -+ res->use_bfloat = props_dev->has_bfloat; -+ } else { -+ res->use_bfloat = false; -+ } -+ - res->use_fusion = getenv("GGML_METAL_FUSION_DISABLE") == nil; - res->use_concurrency = getenv("GGML_METAL_CONCURRENCY_DISABLE") == nil; - diff --git a/llama/patches/0019-ggml-Add-batch-size-hint.patch b/llama/patches/0018-ggml-Add-batch-size-hint.patch similarity index 100% rename from llama/patches/0019-ggml-Add-batch-size-hint.patch rename to llama/patches/0018-ggml-Add-batch-size-hint.patch diff --git a/llama/patches/0021-fix-mtmd-audio.cpp-build-on-windows.patch b/llama/patches/0019-fix-mtmd-audio.cpp-build-on-windows.patch similarity index 100% rename from llama/patches/0021-fix-mtmd-audio.cpp-build-on-windows.patch rename to llama/patches/0019-fix-mtmd-audio.cpp-build-on-windows.patch diff --git a/llama/patches/0020-Disable-ggml-blas-on-macos-v13-and-older.patch b/llama/patches/0020-Disable-ggml-blas-on-macos-v13-and-older.patch deleted file mode 100644 index 9fbc0b604..000000000 --- a/llama/patches/0020-Disable-ggml-blas-on-macos-v13-and-older.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Daniel Hiltgen -Date: Sun, 3 Aug 2025 10:00:20 -0700 -Subject: [PATCH] Disable ggml-blas on macos v13 and older - ---- - ggml/src/ggml-blas/ggml-blas.cpp | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/ggml/src/ggml-blas/ggml-blas.cpp b/ggml/src/ggml-blas/ggml-blas.cpp -index 88d088952..6a38a51a2 100644 ---- a/ggml/src/ggml-blas/ggml-blas.cpp -+++ b/ggml/src/ggml-blas/ggml-blas.cpp -@@ -507,6 +507,11 @@ static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = { - }; - - ggml_backend_reg_t ggml_backend_blas_reg(void) { -+ // MacOS prior to v14 does not include cblas_sgemm - disable this backend if it isn't available -+ if (&cblas_sgemm == NULL) { -+ GGML_LOG_INFO("Disabling ggml-blas backend on old MacOS version\n"); -+ return NULL; -+ } - static struct ggml_backend_reg ggml_backend_blas_reg = { - /* .api_version = */ GGML_BACKEND_API_VERSION, - /* .iface = */ ggml_backend_blas_reg_i, diff --git a/llama/patches/0022-ggml-No-alloc-mode.patch b/llama/patches/0020-ggml-No-alloc-mode.patch similarity index 100% rename from llama/patches/0022-ggml-No-alloc-mode.patch rename to llama/patches/0020-ggml-No-alloc-mode.patch diff --git a/llama/patches/0023-decode-disable-output_all.patch b/llama/patches/0021-decode-disable-output_all.patch similarity index 100% rename from llama/patches/0023-decode-disable-output_all.patch rename to llama/patches/0021-decode-disable-output_all.patch diff --git a/llama/patches/0024-ggml-Enable-resetting-backend-devices.patch b/llama/patches/0022-ggml-Enable-resetting-backend-devices.patch similarity index 100% rename from llama/patches/0024-ggml-Enable-resetting-backend-devices.patch rename to llama/patches/0022-ggml-Enable-resetting-backend-devices.patch diff --git a/llama/patches/0025-harden-uncaught-exception-registration.patch b/llama/patches/0023-harden-uncaught-exception-registration.patch similarity index 100% rename from llama/patches/0025-harden-uncaught-exception-registration.patch rename to llama/patches/0023-harden-uncaught-exception-registration.patch diff --git a/llama/patches/0026-GPU-discovery-enhancements.patch b/llama/patches/0024-GPU-discovery-enhancements.patch similarity index 100% rename from llama/patches/0026-GPU-discovery-enhancements.patch rename to llama/patches/0024-GPU-discovery-enhancements.patch diff --git a/llama/patches/0027-NVML-fallback-for-unified-memory-GPUs.patch b/llama/patches/0025-NVML-fallback-for-unified-memory-GPUs.patch similarity index 100% rename from llama/patches/0027-NVML-fallback-for-unified-memory-GPUs.patch rename to llama/patches/0025-NVML-fallback-for-unified-memory-GPUs.patch diff --git a/llama/patches/0028-report-LoadLibrary-failures.patch b/llama/patches/0026-report-LoadLibrary-failures.patch similarity index 100% rename from llama/patches/0028-report-LoadLibrary-failures.patch rename to llama/patches/0026-report-LoadLibrary-failures.patch diff --git a/llama/patches/0029-interleave-multi-rope.patch b/llama/patches/0027-interleave-multi-rope.patch similarity index 100% rename from llama/patches/0029-interleave-multi-rope.patch rename to llama/patches/0027-interleave-multi-rope.patch diff --git a/llama/patches/0030-Add-memory-detection-using-DXGI-PDH.patch b/llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch similarity index 100% rename from llama/patches/0030-Add-memory-detection-using-DXGI-PDH.patch rename to llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch diff --git a/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp b/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp index 6a38a51a2..88d088952 100644 --- a/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp +++ b/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp @@ -507,11 +507,6 @@ static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = { }; ggml_backend_reg_t ggml_backend_blas_reg(void) { - // MacOS prior to v14 does not include cblas_sgemm - disable this backend if it isn't available - if (&cblas_sgemm == NULL) { - GGML_LOG_INFO("Disabling ggml-blas backend on old MacOS version\n"); - return NULL; - } static struct ggml_backend_reg ggml_backend_blas_reg = { /* .api_version = */ GGML_BACKEND_API_VERSION, /* .iface = */ ggml_backend_blas_reg_i, diff --git a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-context.m b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-context.m index b47dc7879..052efb7ac 100644 --- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-context.m +++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-context.m @@ -125,12 +125,7 @@ ggml_metal_t ggml_metal_init(ggml_metal_device_t dev) { res->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); - if (@available(macOS 14.0, *)) { - res->use_bfloat = props_dev->has_bfloat; - } else { - res->use_bfloat = false; - } - + res->use_bfloat = props_dev->has_bfloat; res->use_fusion = getenv("GGML_METAL_FUSION_DISABLE") == nil; res->use_concurrency = getenv("GGML_METAL_CONCURRENCY_DISABLE") == nil;