Update to B7618
This commit is contained in:
parent
47a8e00686
commit
7f25eb0038
|
|
@ -1,6 +1,6 @@
|
|||
UPSTREAM=https://github.com/ggml-org/llama.cpp.git
|
||||
WORKDIR=llama/vendor
|
||||
FETCH_HEAD=18ddaea2aecf7fbfe7acab77465808f3cf6200d3
|
||||
FETCH_HEAD=9dba9f5352308894bfb8786fcfe7c284168ff8f5
|
||||
|
||||
.PHONY: help
|
||||
help:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
int LLAMA_BUILD_NUMBER = 0;
|
||||
char const *LLAMA_COMMIT = "18ddaea2aecf7fbfe7acab77465808f3cf6200d3";
|
||||
char const *LLAMA_COMMIT = "9dba9f5352308894bfb8786fcfe7c284168ff8f5";
|
||||
char const *LLAMA_COMPILER = "";
|
||||
char const *LLAMA_BUILD_TARGET = "";
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ index 303278397..7d1733adb 100644
|
|||
{
|
||||
GGML_ABORT("fatal error");
|
||||
diff --git a/ggml/src/ggml-cuda/argsort.cu b/ggml/src/ggml-cuda/argsort.cu
|
||||
index da9652c3b..b82be371c 100644
|
||||
index 99669200f..5e261fd2d 100644
|
||||
--- a/ggml/src/ggml-cuda/argsort.cu
|
||||
+++ b/ggml/src/ggml-cuda/argsort.cu
|
||||
@@ -168,13 +168,107 @@ static void argsort_f32_i32_cuda_bitonic(const float * x,
|
||||
|
|
|
|||
|
|
@ -29,8 +29,8 @@ static void argsort_f32_i32_cuda_cub(ggml_cuda_pool & pool,
|
|||
const int nrows,
|
||||
ggml_sort_order order,
|
||||
cudaStream_t stream) {
|
||||
ggml_cuda_pool_alloc<int> temp_indices_alloc(pool, ncols * nrows);
|
||||
ggml_cuda_pool_alloc<float> temp_keys_alloc(pool, ncols * nrows);
|
||||
ggml_cuda_pool_alloc<int> temp_indices_alloc(pool, ((size_t) ncols) * nrows);
|
||||
ggml_cuda_pool_alloc<float> temp_keys_alloc(pool, ((size_t) ncols) * nrows);
|
||||
ggml_cuda_pool_alloc<int> offsets_alloc(pool, nrows + 1);
|
||||
|
||||
int * temp_indices = temp_indices_alloc.get();
|
||||
|
|
|
|||
|
|
@ -918,7 +918,7 @@ void launch_fattn(
|
|||
blocks_num.y = 1;
|
||||
blocks_num.z = 1;
|
||||
|
||||
dst_tmp_meta.alloc(blocks_num.x*ncols * (2*2 + DV) * sizeof(float));
|
||||
dst_tmp_meta.alloc(((size_t) blocks_num.x) * ncols * (2 + DV/2));
|
||||
} else {
|
||||
const int ntiles_KQ = (K->ne[1] + nbatch_fa - 1) / nbatch_fa; // Max. number of parallel blocks limited by tensor size.
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue