update patches
This commit is contained in:
parent
e5209778f1
commit
f5d663e370
|
|
@ -24,7 +24,7 @@ problem.
|
|||
9 files changed, 21 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
|
||||
index b30b4cb3..0ce73a99 100644
|
||||
index b1050ad5..e8694e5c 100644
|
||||
--- a/ggml/src/ggml-backend.cpp
|
||||
+++ b/ggml/src/ggml-backend.cpp
|
||||
@@ -107,7 +107,6 @@ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
|
||||
|
|
@ -43,7 +43,7 @@ index b30b4cb3..0ce73a99 100644
|
|||
}
|
||||
|
||||
static void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
||||
@@ -1871,6 +1871,11 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
@@ -1879,6 +1879,11 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
|
||||
static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
ggml_aligned_free(buffer->context, buffer->size);
|
||||
|
|
@ -55,7 +55,7 @@ index b30b4cb3..0ce73a99 100644
|
|||
}
|
||||
|
||||
static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
||||
@@ -1918,7 +1923,7 @@ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_i = {
|
||||
@@ -1926,7 +1931,7 @@ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_i = {
|
||||
};
|
||||
|
||||
static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_from_ptr_i = {
|
||||
|
|
@ -65,10 +65,10 @@ index b30b4cb3..0ce73a99 100644
|
|||
/* .init_tensor = */ NULL, // no initialization required
|
||||
/* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
|
||||
diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp
|
||||
index e2617b06..242e50a7 100644
|
||||
index c0ea2600..6c3398da 100755
|
||||
--- a/ggml/src/ggml-cann/ggml-cann.cpp
|
||||
+++ b/ggml/src/ggml-cann/ggml-cann.cpp
|
||||
@@ -800,6 +800,7 @@ static void ggml_backend_cann_buffer_free_buffer(
|
||||
@@ -801,6 +801,7 @@ static void ggml_backend_cann_buffer_free_buffer(
|
||||
ggml_backend_cann_buffer_context* ctx =
|
||||
(ggml_backend_cann_buffer_context*)buffer->context;
|
||||
delete ctx;
|
||||
|
|
@ -76,7 +76,7 @@ index e2617b06..242e50a7 100644
|
|||
}
|
||||
|
||||
/**
|
||||
@@ -1472,6 +1473,7 @@ static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buf
|
||||
@@ -1473,6 +1474,7 @@ static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buf
|
||||
*/
|
||||
static void ggml_backend_cann_host_buffer_free(ggml_backend_buffer_t buffer) {
|
||||
ACL_CHECK(aclrtFreeHost(buffer->context));
|
||||
|
|
@ -85,7 +85,7 @@ index e2617b06..242e50a7 100644
|
|||
|
||||
/**
|
||||
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
index b4b85abc..cb0d8528 100644
|
||||
index 2a6f7f10..ec031650 100644
|
||||
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
@@ -534,6 +534,7 @@ struct ggml_backend_cuda_buffer_context {
|
||||
|
|
@ -104,7 +104,7 @@ index b4b85abc..cb0d8528 100644
|
|||
}
|
||||
|
||||
static void * ggml_backend_cuda_split_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
@@ -1067,6 +1069,7 @@ static const char * ggml_backend_cuda_host_buffer_type_name(ggml_backend_buffer_
|
||||
@@ -1071,6 +1073,7 @@ static bool ggml_backend_buft_is_cuda_host(ggml_backend_buffer_type_t buft) {
|
||||
|
||||
static void ggml_backend_cuda_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
CUDA_CHECK(cudaFreeHost(buffer->context));
|
||||
|
|
@ -125,10 +125,10 @@ index 50579227..2799a0a5 100644
|
|||
|
||||
static void * ggml_backend_kompute_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
|
||||
index 576f9581..1b56f858 100644
|
||||
index bc93bc63..fd3a9d1b 100644
|
||||
--- a/ggml/src/ggml-metal/ggml-metal.m
|
||||
+++ b/ggml/src/ggml-metal/ggml-metal.m
|
||||
@@ -5214,6 +5214,7 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
|
||||
@@ -5272,6 +5272,7 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
|
||||
}
|
||||
|
||||
free(ctx);
|
||||
|
|
@ -137,10 +137,10 @@ index 576f9581..1b56f858 100644
|
|||
|
||||
static void * ggml_backend_metal_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp
|
||||
index 05a2f4e6..392cc18d 100644
|
||||
index 80a36438..6abb0ab2 100644
|
||||
--- a/ggml/src/ggml-opencl/ggml-opencl.cpp
|
||||
+++ b/ggml/src/ggml-opencl/ggml-opencl.cpp
|
||||
@@ -1940,6 +1940,7 @@ struct ggml_backend_opencl_buffer_context {
|
||||
@@ -2366,6 +2366,7 @@ struct ggml_backend_opencl_buffer_context {
|
||||
static void ggml_backend_opencl_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
|
||||
delete ctx;
|
||||
|
|
@ -161,10 +161,10 @@ index 4f0abb5a..de1ec184 100644
|
|||
|
||||
static void * ggml_backend_rpc_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp
|
||||
index 0ea72994..ae3a3c33 100644
|
||||
index 78513114..0dabdfe7 100644
|
||||
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
|
||||
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
|
||||
@@ -320,6 +320,7 @@ ggml_backend_sycl_buffer_free_buffer(ggml_backend_buffer_t buffer) try {
|
||||
@@ -331,6 +331,7 @@ ggml_backend_sycl_buffer_free_buffer(ggml_backend_buffer_t buffer) try {
|
||||
ggml_sycl_set_device(ctx->device);
|
||||
|
||||
delete ctx;
|
||||
|
|
@ -172,7 +172,7 @@ index 0ea72994..ae3a3c33 100644
|
|||
}
|
||||
catch (sycl::exception const &exc) {
|
||||
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
||||
@@ -765,6 +766,7 @@ struct ggml_backend_sycl_split_buffer_context {
|
||||
@@ -791,6 +792,7 @@ struct ggml_backend_sycl_split_buffer_context {
|
||||
static void ggml_backend_sycl_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
|
||||
delete ctx;
|
||||
|
|
@ -180,7 +180,7 @@ index 0ea72994..ae3a3c33 100644
|
|||
}
|
||||
|
||||
static void * ggml_backend_sycl_split_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
@@ -1099,6 +1101,7 @@ static const char * ggml_backend_sycl_host_buffer_type_name(ggml_backend_buffer_
|
||||
@@ -1133,6 +1135,7 @@ static const char * ggml_backend_sycl_host_buffer_type_name(ggml_backend_buffer_
|
||||
|
||||
static void ggml_backend_sycl_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
ggml_sycl_host_free(buffer->context);
|
||||
|
|
@ -189,10 +189,10 @@ index 0ea72994..ae3a3c33 100644
|
|||
|
||||
static ggml_backend_buffer_t ggml_backend_sycl_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
||||
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||
index e2b357fd..68768029 100644
|
||||
index 3e43b03b..01776f3d 100644
|
||||
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||
@@ -8962,6 +8962,7 @@ static void ggml_backend_vk_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
@@ -9272,6 +9272,7 @@ static void ggml_backend_vk_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
||||
ggml_vk_destroy_buffer(ctx->dev_buffer);
|
||||
delete ctx;
|
||||
|
|
@ -200,7 +200,7 @@ index e2b357fd..68768029 100644
|
|||
}
|
||||
|
||||
static void * ggml_backend_vk_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
@@ -9105,6 +9106,7 @@ static const char * ggml_backend_vk_host_buffer_name(ggml_backend_buffer_t buffe
|
||||
@@ -9415,6 +9416,7 @@ static const char * ggml_backend_vk_host_buffer_name(ggml_backend_buffer_t buffe
|
||||
static void ggml_backend_vk_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
VK_LOG_MEMORY("ggml_backend_vk_host_buffer_free_buffer()");
|
||||
ggml_vk_host_free(vk_instance.devices[0], buffer->context);
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ logs instead of throwing an error
|
|||
1 file changed, 3 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
|
||||
index 9389ca80..806c1b3d 100644
|
||||
index ba2e1864..0d7ad157 100644
|
||||
--- a/src/llama-vocab.cpp
|
||||
+++ b/src/llama-vocab.cpp
|
||||
@@ -1503,16 +1503,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||
|
|
|
|||
|
|
@ -11,10 +11,10 @@ instead of forcing one or the error
|
|||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
|
||||
index 62246c10..dca22d8b 100644
|
||||
index c29fe7e4..148d1132 100644
|
||||
--- a/src/llama-context.cpp
|
||||
+++ b/src/llama-context.cpp
|
||||
@@ -901,7 +901,7 @@ int llama_context::decode(llama_batch & inp_batch) {
|
||||
@@ -952,7 +952,7 @@ int llama_context::decode(llama_batch & inp_batch) {
|
||||
int64_t n_outputs_all = 0;
|
||||
|
||||
// count outputs
|
||||
|
|
@ -23,7 +23,7 @@ index 62246c10..dca22d8b 100644
|
|||
for (uint32_t i = 0; i < n_tokens_all; ++i) {
|
||||
n_outputs_all += batch.logits[i] != 0;
|
||||
}
|
||||
@@ -982,7 +982,7 @@ int llama_context::decode(llama_batch & inp_batch) {
|
||||
@@ -1083,7 +1083,7 @@ int llama_context::decode(llama_batch & inp_batch) {
|
||||
// ggml_graph_dump_dot(gf, NULL, "llama.dot");
|
||||
//}
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ index 62246c10..dca22d8b 100644
|
|||
auto * t_embd = cparams.embeddings ? res->get_embd() : nullptr;
|
||||
|
||||
if (t_embd && res->get_embd_pooled()) {
|
||||
@@ -1151,7 +1151,7 @@ int32_t llama_context::output_reserve(int32_t n_outputs) {
|
||||
@@ -1244,7 +1244,7 @@ int32_t llama_context::output_reserve(int32_t n_outputs) {
|
||||
const auto n_embd = hparams.n_embd;
|
||||
|
||||
// TODO: use a per-batch flag for logits presence instead
|
||||
|
|
|
|||
|
|
@ -10,10 +10,10 @@ filesystems for paths that include wide characters
|
|||
1 file changed, 39 insertions(+)
|
||||
|
||||
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
|
||||
index 41ba45a7..cdd8ca44 100644
|
||||
index c25bacc1..b3f92814 100644
|
||||
--- a/tools/mtmd/clip.cpp
|
||||
+++ b/tools/mtmd/clip.cpp
|
||||
@@ -31,6 +31,19 @@
|
||||
@@ -28,6 +28,19 @@
|
||||
#include <numeric>
|
||||
#include <functional>
|
||||
|
||||
|
|
@ -33,7 +33,7 @@ index 41ba45a7..cdd8ca44 100644
|
|||
struct clip_logger_state g_logger_state = {GGML_LOG_LEVEL_CONT, clip_log_callback_default, NULL};
|
||||
|
||||
enum ffn_op_type {
|
||||
@@ -2190,7 +2203,29 @@ struct clip_model_loader {
|
||||
@@ -2552,7 +2565,29 @@ struct clip_model_loader {
|
||||
{
|
||||
std::vector<uint8_t> read_buf;
|
||||
|
||||
|
|
@ -63,7 +63,7 @@ index 41ba45a7..cdd8ca44 100644
|
|||
if (!fin) {
|
||||
throw std::runtime_error(string_format("%s: failed to open %s\n", __func__, fname.c_str()));
|
||||
}
|
||||
@@ -2217,7 +2252,11 @@ struct clip_model_loader {
|
||||
@@ -2579,7 +2614,11 @@ struct clip_model_loader {
|
||||
ggml_backend_tensor_set(cur, read_buf.data(), 0, num_bytes);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ adds support for the Solar Pro architecture
|
|||
7 files changed, 248 insertions(+)
|
||||
|
||||
diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
|
||||
index f2bc8ca7..5ab3f572 100644
|
||||
index c0590e10..6d9f0719 100644
|
||||
--- a/src/llama-arch.cpp
|
||||
+++ b/src/llama-arch.cpp
|
||||
@@ -69,6 +69,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
||||
|
|
@ -34,7 +34,7 @@ index f2bc8ca7..5ab3f572 100644
|
|||
{ LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
|
||||
{ LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
|
||||
|
||||
@@ -1502,6 +1504,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
||||
@@ -1508,6 +1510,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
||||
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
||||
},
|
||||
},
|
||||
|
|
@ -59,7 +59,7 @@ index f2bc8ca7..5ab3f572 100644
|
|||
{
|
||||
LLM_ARCH_WAVTOKENIZER_DEC,
|
||||
{
|
||||
@@ -1680,6 +1700,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
||||
@@ -1686,6 +1706,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
||||
{LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
|
||||
// this tensor is loaded for T5, but never used
|
||||
{LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
|
||||
|
|
@ -68,7 +68,7 @@ index f2bc8ca7..5ab3f572 100644
|
|||
{LLM_TENSOR_POS_NET_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
||||
{LLM_TENSOR_POS_NET_NORM1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
||||
diff --git a/src/llama-arch.h b/src/llama-arch.h
|
||||
index 41a023da..525c1b7d 100644
|
||||
index 930cb4ec..591bc14e 100644
|
||||
--- a/src/llama-arch.h
|
||||
+++ b/src/llama-arch.h
|
||||
@@ -73,6 +73,7 @@ enum llm_arch {
|
||||
|
|
@ -87,7 +87,7 @@ index 41a023da..525c1b7d 100644
|
|||
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
|
||||
LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
|
||||
|
||||
@@ -346,6 +348,7 @@ enum llm_tensor {
|
||||
@@ -348,6 +350,7 @@ enum llm_tensor {
|
||||
LLM_TENSOR_ENC_OUTPUT_NORM,
|
||||
LLM_TENSOR_CLS,
|
||||
LLM_TENSOR_CLS_OUT,
|
||||
|
|
@ -96,10 +96,10 @@ index 41a023da..525c1b7d 100644
|
|||
LLM_TENSOR_CONVNEXT_DW,
|
||||
LLM_TENSOR_CONVNEXT_NORM,
|
||||
diff --git a/src/llama-hparams.cpp b/src/llama-hparams.cpp
|
||||
index 90dfe7a7..8a667960 100644
|
||||
index 1499eb08..aa7a4b23 100644
|
||||
--- a/src/llama-hparams.cpp
|
||||
+++ b/src/llama-hparams.cpp
|
||||
@@ -70,6 +70,14 @@ uint32_t llama_hparams::n_embd_v_s() const {
|
||||
@@ -86,6 +86,14 @@ uint32_t llama_hparams::n_embd_v_s() const {
|
||||
return ssm_d_state * ssm_d_inner;
|
||||
}
|
||||
|
||||
|
|
@ -113,12 +113,12 @@ index 90dfe7a7..8a667960 100644
|
|||
+
|
||||
bool llama_hparams::is_swa(uint32_t il) const {
|
||||
if (il < n_layer) {
|
||||
return n_swa > 0 && n_swa_pattern > 0 && il % n_swa_pattern < (n_swa_pattern - 1);
|
||||
return swa_layers[il];
|
||||
diff --git a/src/llama-hparams.h b/src/llama-hparams.h
|
||||
index 7ee6a5b7..48dce407 100644
|
||||
index b2bcb8b0..347d239d 100644
|
||||
--- a/src/llama-hparams.h
|
||||
+++ b/src/llama-hparams.h
|
||||
@@ -55,6 +55,8 @@ struct llama_hparams {
|
||||
@@ -59,6 +59,8 @@ struct llama_hparams {
|
||||
std::array<uint32_t, LLAMA_MAX_LAYERS> n_head_kv_arr;
|
||||
std::array<uint32_t, LLAMA_MAX_LAYERS> n_ff_arr;
|
||||
|
||||
|
|
@ -127,7 +127,7 @@ index 7ee6a5b7..48dce407 100644
|
|||
uint32_t n_layer_dense_lead = 0;
|
||||
uint32_t n_lora_q = 0;
|
||||
uint32_t n_lora_kv = 0;
|
||||
@@ -154,6 +156,9 @@ struct llama_hparams {
|
||||
@@ -186,6 +188,9 @@ struct llama_hparams {
|
||||
// dimension of the recurrent state embeddings
|
||||
uint32_t n_embd_v_s() const;
|
||||
|
||||
|
|
@ -138,7 +138,7 @@ index 7ee6a5b7..48dce407 100644
|
|||
};
|
||||
|
||||
diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp
|
||||
index 4cce5166..7f6617fa 100644
|
||||
index ddb1b036..f4a6c2cd 100644
|
||||
--- a/src/llama-model-loader.cpp
|
||||
+++ b/src/llama-model-loader.cpp
|
||||
@@ -439,6 +439,7 @@ namespace GGUFMeta {
|
||||
|
|
@ -150,10 +150,10 @@ index 4cce5166..7f6617fa 100644
|
|||
llama_model_loader::llama_model_loader(
|
||||
const std::string & fname,
|
||||
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
|
||||
index 3a4e72a3..831b68c0 100644
|
||||
index afef8487..c042546c 100644
|
||||
--- a/src/llama-model.cpp
|
||||
+++ b/src/llama-model.cpp
|
||||
@@ -1402,6 +1402,21 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||
@@ -1417,6 +1417,21 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
|
|
@ -175,7 +175,7 @@ index 3a4e72a3..831b68c0 100644
|
|||
case LLM_ARCH_WAVTOKENIZER_DEC:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
@@ -3774,6 +3789,34 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||
@@ -3797,6 +3812,34 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||
|
||||
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
|
||||
|
||||
|
|
@ -210,7 +210,7 @@ index 3a4e72a3..831b68c0 100644
|
|||
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0);
|
||||
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0);
|
||||
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
|
||||
@@ -12397,6 +12440,165 @@ struct llm_build_chameleon : public llm_graph_context {
|
||||
@@ -12721,6 +12764,165 @@ struct llm_build_chameleon : public llm_graph_context {
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -270,7 +270,7 @@ index 3a4e72a3..831b68c0 100644
|
|||
+ // self-attention
|
||||
+ {
|
||||
+ // rope freq factors for llama3; may return nullptr for llama2 and other models
|
||||
+ ggml_tensor * rope_factors = model.get_rope_factors(n_ctx_per_seq, il);
|
||||
+ ggml_tensor * rope_factors = model.get_rope_factors(cparams, il);
|
||||
+
|
||||
+ // compute Q and K and RoPE them
|
||||
+ ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur);
|
||||
|
|
@ -376,7 +376,7 @@ index 3a4e72a3..831b68c0 100644
|
|||
struct llm_build_wavtokenizer_dec : public llm_graph_context {
|
||||
llm_build_wavtokenizer_dec(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) {
|
||||
ggml_tensor * cur;
|
||||
@@ -13157,6 +13359,10 @@ llm_graph_result_ptr llama_model::build_graph(
|
||||
@@ -13515,6 +13717,10 @@ llm_graph_result_ptr llama_model::build_graph(
|
||||
{
|
||||
llm = std::make_unique<llm_build_chameleon>(*this, params, gf);
|
||||
} break;
|
||||
|
|
@ -387,7 +387,7 @@ index 3a4e72a3..831b68c0 100644
|
|||
case LLM_ARCH_WAVTOKENIZER_DEC:
|
||||
{
|
||||
llm = std::make_unique<llm_build_wavtokenizer_dec>(*this, params, gf);
|
||||
@@ -13301,6 +13507,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
|
||||
@@ -13663,6 +13869,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
|
||||
case LLM_ARCH_GRANITE:
|
||||
case LLM_ARCH_GRANITE_MOE:
|
||||
case LLM_ARCH_CHAMELEON:
|
||||
|
|
@ -396,7 +396,7 @@ index 3a4e72a3..831b68c0 100644
|
|||
return LLAMA_ROPE_TYPE_NORM;
|
||||
|
||||
diff --git a/src/llama-model.h b/src/llama-model.h
|
||||
index 6bdec263..43746c7d 100644
|
||||
index cbea2cb3..43e7fcda 100644
|
||||
--- a/src/llama-model.h
|
||||
+++ b/src/llama-model.h
|
||||
@@ -65,6 +65,7 @@ enum llm_type {
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ regex
|
|||
2 files changed, 22 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
|
||||
index 806c1b3d..10f34d33 100644
|
||||
index 0d7ad157..d007039f 100644
|
||||
--- a/src/llama-vocab.cpp
|
||||
+++ b/src/llama-vocab.cpp
|
||||
@@ -298,7 +298,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,22 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: jmorganca <jmorganca@gmail.com>
|
||||
Date: Tue, 8 Apr 2025 19:43:40 -0700
|
||||
Subject: [PATCH] maintain ordering for rules for grammar
|
||||
|
||||
---
|
||||
common/json-schema-to-grammar.cpp | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
|
||||
index d38a74f9..2a8aeca6 100644
|
||||
--- a/common/json-schema-to-grammar.cpp
|
||||
+++ b/common/json-schema-to-grammar.cpp
|
||||
@@ -350,7 +350,7 @@ private:
|
||||
friend std::string build_grammar(const std::function<void(const common_grammar_builder &)> & cb, const common_grammar_options & options);
|
||||
std::function<json(const std::string &)> _fetch_json;
|
||||
bool _dotall;
|
||||
- std::map<std::string, std::string> _rules;
|
||||
+ std::unordered_map<std::string, std::string> _rules;
|
||||
std::unordered_map<std::string, json> _refs;
|
||||
std::unordered_set<std::string> _refs_being_resolved;
|
||||
std::vector<std::string> _errors;
|
||||
|
|
@ -8,10 +8,10 @@ Subject: [PATCH] add phony target ggml-cpu for all cpu variants
|
|||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
|
||||
index ddea5ad3..45918bf6 100644
|
||||
index 7dcb031f..770e18bc 100644
|
||||
--- a/ggml/src/CMakeLists.txt
|
||||
+++ b/ggml/src/CMakeLists.txt
|
||||
@@ -279,6 +279,7 @@ function(ggml_add_cpu_backend_variant tag_name)
|
||||
@@ -282,6 +282,7 @@ function(ggml_add_cpu_backend_variant tag_name)
|
||||
endforeach()
|
||||
|
||||
ggml_add_cpu_backend_variant_impl(${tag_name})
|
||||
|
|
@ -19,11 +19,11 @@ index ddea5ad3..45918bf6 100644
|
|||
endfunction()
|
||||
|
||||
ggml_add_backend(CPU)
|
||||
@@ -287,6 +288,7 @@ if (GGML_CPU_ALL_VARIANTS)
|
||||
@@ -290,6 +291,7 @@ if (GGML_CPU_ALL_VARIANTS)
|
||||
if (NOT GGML_BACKEND_DL)
|
||||
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
|
||||
endif()
|
||||
+ add_custom_target(ggml-cpu)
|
||||
ggml_add_cpu_backend_variant(x64)
|
||||
ggml_add_cpu_backend_variant(sse42 SSE42)
|
||||
ggml_add_cpu_backend_variant(sandybridge SSE42 AVX)
|
||||
if (GGML_SYSTEM_ARCH STREQUAL "x86")
|
||||
ggml_add_cpu_backend_variant(x64)
|
||||
ggml_add_cpu_backend_variant(sse42 SSE42)
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: jmorganca <jmorganca@gmail.com>
|
||||
Date: Thu, 1 May 2025 15:05:08 -0700
|
||||
Subject: [PATCH] remove amx
|
||||
|
||||
disable amx as it reduces performance on some systems
|
||||
---
|
||||
ggml/src/CMakeLists.txt | 4 ----
|
||||
1 file changed, 4 deletions(-)
|
||||
|
||||
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
|
||||
index 770e18bc..62f3dbf6 100644
|
||||
--- a/ggml/src/CMakeLists.txt
|
||||
+++ b/ggml/src/CMakeLists.txt
|
||||
@@ -300,10 +300,6 @@ if (GGML_CPU_ALL_VARIANTS)
|
||||
ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512)
|
||||
ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
||||
ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI)
|
||||
- if (NOT MSVC)
|
||||
- # MSVC doesn't support AMX
|
||||
- ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
||||
- endif()
|
||||
else()
|
||||
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported on ${GGML_SYSTEM_ARCH}")
|
||||
endif()
|
||||
|
|
@ -25,10 +25,10 @@ index 79ee2020..3efb22f0 100644
|
|||
// get ith C string from array with given key_id
|
||||
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int64_t key_id, size_t i);
|
||||
diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp
|
||||
index 381a9c7d..e45b453d 100644
|
||||
index a0a318a2..b3326b94 100644
|
||||
--- a/ggml/src/gguf.cpp
|
||||
+++ b/ggml/src/gguf.cpp
|
||||
@@ -777,10 +777,14 @@ enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id
|
||||
@@ -794,10 +794,14 @@ enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id
|
||||
|
||||
const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id) {
|
||||
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
||||
|
|
@ -44,7 +44,7 @@ index 381a9c7d..e45b453d 100644
|
|||
const char * gguf_get_arr_str(const struct gguf_context * ctx, int64_t key_id, size_t i) {
|
||||
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
||||
GGML_ASSERT(ctx->kv[key_id].get_type() == GGUF_TYPE_STRING);
|
||||
@@ -874,7 +878,6 @@ const char * gguf_get_val_str(const struct gguf_context * ctx, int64_t key_id) {
|
||||
@@ -891,7 +895,6 @@ const char * gguf_get_val_str(const struct gguf_context * ctx, int64_t key_id) {
|
||||
const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id) {
|
||||
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
||||
GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
|
||||
|
|
@ -53,7 +53,7 @@ index 381a9c7d..e45b453d 100644
|
|||
}
|
||||
|
||||
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
|
||||
index 10f34d33..9f5fd57b 100644
|
||||
index d007039f..4a6c3ad6 100644
|
||||
--- a/src/llama-vocab.cpp
|
||||
+++ b/src/llama-vocab.cpp
|
||||
@@ -1469,9 +1469,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: jmorganca <jmorganca@gmail.com>
|
||||
Date: Thu, 1 May 2025 15:05:08 -0700
|
||||
Subject: [PATCH] remove amx
|
||||
|
||||
disable amx as it reduces performance on some systems
|
||||
---
|
||||
ggml/src/CMakeLists.txt | 4 ----
|
||||
1 file changed, 4 deletions(-)
|
||||
|
||||
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
|
||||
index 45918bf6..0beaed86 100644
|
||||
--- a/ggml/src/CMakeLists.txt
|
||||
+++ b/ggml/src/CMakeLists.txt
|
||||
@@ -296,10 +296,6 @@ if (GGML_CPU_ALL_VARIANTS)
|
||||
ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512)
|
||||
ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
||||
ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI)
|
||||
- if (NOT MSVC)
|
||||
- # MSVC doesn't support AMX
|
||||
- ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
||||
- endif()
|
||||
elseif (GGML_CPU)
|
||||
ggml_add_cpu_backend_variant_impl("")
|
||||
endif()
|
||||
|
|
@ -8,7 +8,7 @@ Subject: [PATCH] ollama debug tensor
|
|||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
|
||||
index a30e67f2..2462d2b8 100644
|
||||
index c7426df2..23441678 100644
|
||||
--- a/ggml/src/ggml-cpu/ggml-cpu.c
|
||||
+++ b/ggml/src/ggml-cpu/ggml-cpu.c
|
||||
@@ -15,6 +15,8 @@
|
||||
|
|
@ -20,7 +20,7 @@ index a30e67f2..2462d2b8 100644
|
|||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
#include <malloc.h> // using malloc.h with MSC/MINGW
|
||||
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
|
||||
@@ -2841,6 +2843,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
||||
@@ -2873,6 +2875,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
||||
|
||||
ggml_compute_forward(¶ms, node);
|
||||
|
||||
|
|
@ -10,7 +10,7 @@ Subject: [PATCH] add ollama vocab for grammar support
|
|||
3 files changed, 58 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp
|
||||
index 973b47ae..60d58236 100644
|
||||
index bed706bb..b51cee09 100644
|
||||
--- a/src/llama-grammar.cpp
|
||||
+++ b/src/llama-grammar.cpp
|
||||
@@ -907,6 +907,7 @@ llama_grammar_candidates llama_grammar_reject_candidates_for_stack(
|
||||
|
|
@ -90,7 +90,7 @@ index 973b47ae..60d58236 100644
|
|||
|
||||
if (grammar.awaiting_trigger) {
|
||||
if (std::find(grammar.trigger_tokens.begin(), grammar.trigger_tokens.end(), token) != grammar.trigger_tokens.end()) {
|
||||
@@ -1191,13 +1200,14 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token
|
||||
@@ -1201,13 +1210,14 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -107,7 +107,7 @@ index 973b47ae..60d58236 100644
|
|||
}
|
||||
|
||||
llama_grammar_accept_str(grammar, piece);
|
||||
@@ -1217,3 +1227,28 @@ void llama_grammar_accept_str(struct llama_grammar & grammar, const std::string
|
||||
@@ -1227,3 +1237,28 @@ void llama_grammar_accept_str(struct llama_grammar & grammar, const std::string
|
||||
throw std::runtime_error("Unexpected empty grammar stack after accepting piece: " + piece);
|
||||
}
|
||||
}
|
||||
|
|
@ -184,7 +184,7 @@ index f8c291de..2a3a62db 100644
|
|||
const char * grammar_root,
|
||||
bool lazy,
|
||||
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
|
||||
index 804b11e0..15a10ca8 100644
|
||||
index bfbf5fa2..11f93f42 100644
|
||||
--- a/src/llama-sampling.cpp
|
||||
+++ b/src/llama-sampling.cpp
|
||||
@@ -1466,7 +1466,7 @@ static void llama_sampler_grammar_reset(struct llama_sampler * smpl) {
|
||||
|
|
@ -10,10 +10,10 @@ Subject: [PATCH] add argsort and cuda copy for i32
|
|||
3 files changed, 192 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp
|
||||
index becdae07..7a44b6cf 100644
|
||||
index 08facb6d..aa5cf56b 100644
|
||||
--- a/ggml/src/ggml-cpu/ops.cpp
|
||||
+++ b/ggml/src/ggml-cpu/ops.cpp
|
||||
@@ -6890,6 +6890,45 @@ static void ggml_compute_forward_argsort_f32(
|
||||
@@ -6925,6 +6925,45 @@ static void ggml_compute_forward_argsort_f32(
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -59,7 +59,7 @@ index becdae07..7a44b6cf 100644
|
|||
void ggml_compute_forward_argsort(
|
||||
const ggml_compute_params * params,
|
||||
ggml_tensor * dst) {
|
||||
@@ -6901,6 +6940,10 @@ void ggml_compute_forward_argsort(
|
||||
@@ -6936,6 +6975,10 @@ void ggml_compute_forward_argsort(
|
||||
{
|
||||
ggml_compute_forward_argsort_f32(params, dst);
|
||||
} break;
|
||||
|
|
@ -195,10 +195,10 @@ index 607ded85..53b02634 100644
|
|||
+ }
|
||||
}
|
||||
diff --git a/ggml/src/ggml-cuda/cpy.cu b/ggml/src/ggml-cuda/cpy.cu
|
||||
index 2d46176e..47383486 100644
|
||||
index 2c55d214..90d95d32 100644
|
||||
--- a/ggml/src/ggml-cuda/cpy.cu
|
||||
+++ b/ggml/src/ggml-cuda/cpy.cu
|
||||
@@ -38,6 +38,13 @@ static __device__ void cpy_1_f16_f32(const char * cxi, char * cdsti) {
|
||||
@@ -41,6 +41,13 @@ static __device__ void cpy_1_f16_f32(const char * cxi, char * cdsti) {
|
||||
*dsti = *xi;
|
||||
}
|
||||
|
||||
|
|
@ -212,7 +212,7 @@ index 2d46176e..47383486 100644
|
|||
template <cpy_kernel_t cpy_1>
|
||||
static __global__ void cpy_f32_f16(const char * cx, char * cdst_direct, const int ne,
|
||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
||||
@@ -68,6 +75,44 @@ static __global__ void cpy_f32_f16(const char * cx, char * cdst_direct, const in
|
||||
@@ -71,6 +78,44 @@ static __global__ void cpy_f32_f16(const char * cx, char * cdst_direct, const in
|
||||
cpy_1(cx + x_offset, cdst + dst_offset);
|
||||
}
|
||||
|
||||
|
|
@ -257,7 +257,7 @@ index 2d46176e..47383486 100644
|
|||
static __device__ void cpy_blck_f32_q8_0(const char * cxi, char * cdsti) {
|
||||
const float * xi = (const float *) cxi;
|
||||
block_q8_0 * dsti = (block_q8_0 *) cdsti;
|
||||
@@ -631,6 +676,8 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
|
||||
@@ -643,6 +688,8 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
|
||||
ggml_cpy_f16_f16_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
||||
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32) {
|
||||
ggml_cpy_f16_f32_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
||||
|
|
@ -266,7 +266,7 @@ index 2d46176e..47383486 100644
|
|||
} else {
|
||||
GGML_ABORT("%s: unsupported type combination (%s to %s)\n", __func__,
|
||||
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
||||
@@ -686,6 +733,8 @@ void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1) {
|
||||
@@ -698,6 +745,8 @@ void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1) {
|
||||
return (void*) cpy_f32_f16<cpy_1_f32_f16>;
|
||||
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32) {
|
||||
return (void*) cpy_f32_f16<cpy_1_f16_f32>;
|
||||
|
|
@ -134,10 +134,10 @@ index 5fd379f6..04812990 100644
|
|||
|
||||
static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
|
||||
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
|
||||
index 0ce73a99..be335e8c 100644
|
||||
index e8694e5c..36f11537 100644
|
||||
--- a/ggml/src/ggml-backend.cpp
|
||||
+++ b/ggml/src/ggml-backend.cpp
|
||||
@@ -1629,6 +1629,16 @@ size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backe
|
||||
@@ -1637,6 +1637,16 @@ size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backe
|
||||
return ggml_gallocr_get_buffer_size(sched->galloc, backend_index);
|
||||
}
|
||||
|
||||
|
|
@ -24,10 +24,10 @@ index 74e46716..a880df33 100644
|
|||
size_t memory_total;
|
||||
enum ggml_backend_dev_type type;
|
||||
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
index cb0d8528..4c829153 100644
|
||||
index ec031650..8d5edd04 100644
|
||||
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
|
||||
@@ -2893,6 +2893,7 @@ struct ggml_backend_cuda_device_context {
|
||||
int device;
|
||||
std::string name;
|
||||
std::string description;
|
||||
|
|
@ -35,7 +35,7 @@ index cb0d8528..4c829153 100644
|
|||
};
|
||||
|
||||
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
|
||||
@@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
|
||||
@@ -2905,6 +2906,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
|
||||
return ctx->description.c_str();
|
||||
}
|
||||
|
||||
|
|
@ -47,7 +47,7 @@ index cb0d8528..4c829153 100644
|
|||
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
||||
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
|
||||
ggml_cuda_set_device(ctx->device);
|
||||
@@ -2910,6 +2916,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
|
||||
@@ -2919,6 +2925,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
|
||||
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
||||
props->name = ggml_backend_cuda_device_get_name(dev);
|
||||
props->description = ggml_backend_cuda_device_get_description(dev);
|
||||
|
|
@ -55,7 +55,7 @@ index cb0d8528..4c829153 100644
|
|||
props->type = ggml_backend_cuda_device_get_type(dev);
|
||||
ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
||||
|
||||
@@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
||||
@@ -3473,6 +3480,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
||||
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
|
||||
dev_ctx->description = prop.name;
|
||||
|
||||
|
|
@ -89,10 +89,10 @@ index cb0d8528..4c829153 100644
|
|||
/* .iface = */ ggml_backend_cuda_device_interface,
|
||||
/* .reg = */ ®,
|
||||
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
|
||||
index 1b56f858..ee4f2dcb 100644
|
||||
index fd3a9d1b..884bde80 100644
|
||||
--- a/ggml/src/ggml-metal/ggml-metal.m
|
||||
+++ b/ggml/src/ggml-metal/ggml-metal.m
|
||||
@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
|
||||
@@ -5761,6 +5761,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
|
||||
static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
|
||||
props->name = ggml_backend_metal_device_get_name(dev);
|
||||
props->description = ggml_backend_metal_device_get_description(dev);
|
||||
Loading…
Reference in New Issue