Update to B7618

This commit is contained in:
inforithmics 2026-01-03 12:54:46 +01:00
parent 47a8e00686
commit 7f25eb0038
5 changed files with 6 additions and 6 deletions

View File

@ -1,6 +1,6 @@
UPSTREAM=https://github.com/ggml-org/llama.cpp.git
WORKDIR=llama/vendor
FETCH_HEAD=18ddaea2aecf7fbfe7acab77465808f3cf6200d3
FETCH_HEAD=9dba9f5352308894bfb8786fcfe7c284168ff8f5
.PHONY: help
help:

2
llama/build-info.cpp generated vendored
View File

@ -1,4 +1,4 @@
int LLAMA_BUILD_NUMBER = 0;
char const *LLAMA_COMMIT = "18ddaea2aecf7fbfe7acab77465808f3cf6200d3";
char const *LLAMA_COMMIT = "9dba9f5352308894bfb8786fcfe7c284168ff8f5";
char const *LLAMA_COMPILER = "";
char const *LLAMA_BUILD_TARGET = "";

View File

@ -73,7 +73,7 @@ index 303278397..7d1733adb 100644
{
GGML_ABORT("fatal error");
diff --git a/ggml/src/ggml-cuda/argsort.cu b/ggml/src/ggml-cuda/argsort.cu
index da9652c3b..b82be371c 100644
index 99669200f..5e261fd2d 100644
--- a/ggml/src/ggml-cuda/argsort.cu
+++ b/ggml/src/ggml-cuda/argsort.cu
@@ -168,13 +168,107 @@ static void argsort_f32_i32_cuda_bitonic(const float * x,

View File

@ -29,8 +29,8 @@ static void argsort_f32_i32_cuda_cub(ggml_cuda_pool & pool,
const int nrows,
ggml_sort_order order,
cudaStream_t stream) {
ggml_cuda_pool_alloc<int> temp_indices_alloc(pool, ncols * nrows);
ggml_cuda_pool_alloc<float> temp_keys_alloc(pool, ncols * nrows);
ggml_cuda_pool_alloc<int> temp_indices_alloc(pool, ((size_t) ncols) * nrows);
ggml_cuda_pool_alloc<float> temp_keys_alloc(pool, ((size_t) ncols) * nrows);
ggml_cuda_pool_alloc<int> offsets_alloc(pool, nrows + 1);
int * temp_indices = temp_indices_alloc.get();

View File

@ -918,7 +918,7 @@ void launch_fattn(
blocks_num.y = 1;
blocks_num.z = 1;
dst_tmp_meta.alloc(blocks_num.x*ncols * (2*2 + DV) * sizeof(float));
dst_tmp_meta.alloc(((size_t) blocks_num.x) * ncols * (2 + DV/2));
} else {
const int ntiles_KQ = (K->ne[1] + nbatch_fa - 1) / nbatch_fa; // Max. number of parallel blocks limited by tensor size.