update patches

This commit is contained in:
inforithmics 2025-12-26 16:42:31 +01:00
parent 18fdcc94e5
commit c7d1f258aa
20 changed files with 100 additions and 110 deletions

View File

@ -1,6 +1,6 @@
UPSTREAM=https://github.com/ggml-org/llama.cpp.git
WORKDIR=llama/vendor
FETCH_HEAD=ec98e2002
FETCH_HEAD=85c40c9b02941ebf1add1469af75f1796d513ef4
.PHONY: help
help:

View File

@ -64,7 +64,7 @@ index 8547ecc84..9f37ca70c 100644
/* .init_tensor = */ NULL, // no initialization required
/* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp
index da624c587..efc63e092 100644
index e90759f98..91421a51d 100644
--- a/ggml/src/ggml-cann/ggml-cann.cpp
+++ b/ggml/src/ggml-cann/ggml-cann.cpp
@@ -831,6 +831,7 @@ static bool ggml_backend_buffer_is_cann(ggml_backend_buffer_t buffer) {
@ -84,7 +84,7 @@ index da624c587..efc63e092 100644
/**
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index ab0f6fe9c..6519af435 100644
index 55fa2e6a7..58eaf45b4 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -583,6 +583,7 @@ struct ggml_backend_cuda_buffer_context {
@ -132,10 +132,10 @@ index 70bf6f3d9..f2b7fe692 100644
static void * ggml_backend_metal_buffer_private_get_base(ggml_backend_buffer_t buffer) {
diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp
index 0d37587f6..ff373d413 100644
index 639715537..84d9f93f3 100644
--- a/ggml/src/ggml-opencl/ggml-opencl.cpp
+++ b/ggml/src/ggml-opencl/ggml-opencl.cpp
@@ -3417,6 +3417,7 @@ struct ggml_backend_opencl_buffer_context {
@@ -3421,6 +3421,7 @@ struct ggml_backend_opencl_buffer_context {
static void ggml_backend_opencl_buffer_free_buffer(ggml_backend_buffer_t buffer) {
ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
delete ctx;
@ -144,7 +144,7 @@ index 0d37587f6..ff373d413 100644
static void * ggml_backend_opencl_buffer_get_base(ggml_backend_buffer_t buffer) {
diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp
index 18a45d2d9..89041805e 100644
index e7890a5ee..d1f38235a 100644
--- a/ggml/src/ggml-rpc/ggml-rpc.cpp
+++ b/ggml/src/ggml-rpc/ggml-rpc.cpp
@@ -556,6 +556,7 @@ static void ggml_backend_rpc_buffer_free_buffer(ggml_backend_buffer_t buffer) {
@ -184,10 +184,10 @@ index e996d98be..84b679315 100644
static ggml_backend_buffer_t ggml_backend_sycl_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 34ec09d40..120191ca0 100644
index 1459b2608..8ca9e4403 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -12365,6 +12365,7 @@ static void ggml_backend_vk_buffer_free_buffer(ggml_backend_buffer_t buffer) {
@@ -12447,6 +12447,7 @@ static void ggml_backend_vk_buffer_free_buffer(ggml_backend_buffer_t buffer) {
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
ggml_vk_destroy_buffer(ctx->dev_buffer);
delete ctx;
@ -195,7 +195,7 @@ index 34ec09d40..120191ca0 100644
}
static void * ggml_backend_vk_buffer_get_base(ggml_backend_buffer_t buffer) {
@@ -12508,6 +12509,7 @@ static const char * ggml_backend_vk_host_buffer_name(ggml_backend_buffer_t buffe
@@ -12590,6 +12591,7 @@ static const char * ggml_backend_vk_host_buffer_name(ggml_backend_buffer_t buffe
static void ggml_backend_vk_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
VK_LOG_MEMORY("ggml_backend_vk_host_buffer_free_buffer()");
ggml_vk_host_free(vk_instance.devices[0], buffer->context);

View File

@ -10,7 +10,7 @@ logs instead of throwing an error
1 file changed, 3 insertions(+), 11 deletions(-)
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 7b01a2edf..63250cdf1 100644
index cd4092ca0..af2276960 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1825,16 +1825,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
@ -31,7 +31,7 @@ index 7b01a2edf..63250cdf1 100644
pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
} else if (
tokenizer_pre == "llama3" ||
@@ -2015,7 +2006,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
@@ -2016,7 +2007,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
pre_type = LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2;
clean_spaces = false;
} else {

View File

@ -10,7 +10,7 @@ filesystems for paths that include wide characters
1 file changed, 39 insertions(+)
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
index 35e3aef0a..84a3796b5 100644
index 3ba0823de..11a248963 100644
--- a/tools/mtmd/clip.cpp
+++ b/tools/mtmd/clip.cpp
@@ -24,6 +24,19 @@
@ -33,7 +33,7 @@ index 35e3aef0a..84a3796b5 100644
struct clip_logger_state g_logger_state = {clip_log_callback_default, NULL};
//#define CLIP_DEBUG_FUNCTIONS
@@ -1619,7 +1632,29 @@ struct clip_model_loader {
@@ -1678,7 +1691,29 @@ struct clip_model_loader {
{
std::vector<uint8_t> read_buf;
@ -63,7 +63,7 @@ index 35e3aef0a..84a3796b5 100644
if (!fin) {
throw std::runtime_error(string_format("%s: failed to open %s\n", __func__, fname.c_str()));
}
@@ -1646,7 +1681,11 @@ struct clip_model_loader {
@@ -1705,7 +1740,11 @@ struct clip_model_loader {
ggml_backend_tensor_set(cur, read_buf.data(), 0, num_bytes);
}
}

View File

@ -19,10 +19,10 @@ adds support for the Solar Pro architecture
create mode 100644 src/models/solar.cpp
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4192af7c0..bd44d73e7 100644
index 1e155534b..159f429e8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -125,6 +125,7 @@ add_library(llama
@@ -127,6 +127,7 @@ add_library(llama
models/seed-oss.cpp
models/smallthinker.cpp
models/smollm3.cpp
@ -31,10 +31,10 @@ index 4192af7c0..bd44d73e7 100644
models/starcoder.cpp
models/starcoder2.cpp
diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
index 8caf80afc..2ce8ffec0 100644
index 75013d8d3..22b30bfcc 100644
--- a/src/llama-arch.cpp
+++ b/src/llama-arch.cpp
@@ -87,6 +87,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
@@ -88,6 +88,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_GRANITE_MOE, "granitemoe" },
{ LLM_ARCH_GRANITE_HYBRID, "granitehybrid" },
{ LLM_ARCH_CHAMELEON, "chameleon" },
@ -42,7 +42,7 @@ index 8caf80afc..2ce8ffec0 100644
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
{ LLM_ARCH_PLM, "plm" },
{ LLM_ARCH_BAILINGMOE, "bailingmoe" },
@@ -208,6 +209,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
@@ -212,6 +213,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
{ LLM_KV_ATTENTION_OUTPUT_SCALE, "%s.attention.output_scale" },
{ LLM_KV_ATTENTION_TEMPERATURE_LENGTH, "%s.attention.temperature_length" },
{ LLM_KV_ATTENTION_TEMPERATURE_SCALE, "%s.attention.temperature_scale" },
@ -50,7 +50,7 @@ index 8caf80afc..2ce8ffec0 100644
{ LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
{ LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
@@ -339,6 +341,7 @@ static const std::map<llm_tensor, const char *> LLM_TENSOR_NAMES = {
@@ -344,6 +346,7 @@ static const std::map<llm_tensor, const char *> LLM_TENSOR_NAMES = {
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
{ LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
{ LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
@ -58,7 +58,7 @@ index 8caf80afc..2ce8ffec0 100644
{ LLM_TENSOR_POS_EMBD, "position_embd" },
{ LLM_TENSOR_FFN_ACT, "blk.%d.ffn.act" },
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
@@ -2176,6 +2179,22 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
@@ -2217,6 +2220,22 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
return {
LLM_TENSOR_TOKEN_EMBD,
};
@ -81,7 +81,7 @@ index 8caf80afc..2ce8ffec0 100644
default:
GGML_ABORT("unknown architecture for tensor mapping");
}
@@ -2344,6 +2363,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
@@ -2385,6 +2404,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
{LLM_TENSOR_LAUREL_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
// this tensor is loaded for T5, but never used
{LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
@ -90,10 +90,10 @@ index 8caf80afc..2ce8ffec0 100644
{LLM_TENSOR_POS_NET_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
{LLM_TENSOR_POS_NET_NORM1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
diff --git a/src/llama-arch.h b/src/llama-arch.h
index 6cbf9b1f8..14d461c76 100644
index 27bdedc83..06c903bb6 100644
--- a/src/llama-arch.h
+++ b/src/llama-arch.h
@@ -91,6 +91,7 @@ enum llm_arch {
@@ -92,6 +92,7 @@ enum llm_arch {
LLM_ARCH_GRANITE_MOE,
LLM_ARCH_GRANITE_HYBRID,
LLM_ARCH_CHAMELEON,
@ -101,7 +101,7 @@ index 6cbf9b1f8..14d461c76 100644
LLM_ARCH_WAVTOKENIZER_DEC,
LLM_ARCH_PLM,
LLM_ARCH_BAILINGMOE,
@@ -212,6 +213,7 @@ enum llm_kv {
@@ -216,6 +217,7 @@ enum llm_kv {
LLM_KV_ATTENTION_OUTPUT_SCALE,
LLM_KV_ATTENTION_TEMPERATURE_LENGTH,
LLM_KV_ATTENTION_TEMPERATURE_SCALE,
@ -109,7 +109,7 @@ index 6cbf9b1f8..14d461c76 100644
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
@@ -465,6 +467,7 @@ enum llm_tensor {
@@ -470,6 +472,7 @@ enum llm_tensor {
LLM_TENSOR_ENC_OUTPUT_NORM,
LLM_TENSOR_CLS,
LLM_TENSOR_CLS_OUT,
@ -137,7 +137,7 @@ index fe1fa4341..aabff2f06 100644
if (il < n_layer) {
return swa_layers[il];
diff --git a/src/llama-hparams.h b/src/llama-hparams.h
index f6e95b5d2..c6e673276 100644
index 42def73f0..d3c53b5f2 100644
--- a/src/llama-hparams.h
+++ b/src/llama-hparams.h
@@ -65,6 +65,8 @@ struct llama_hparams {
@ -149,7 +149,7 @@ index f6e95b5d2..c6e673276 100644
uint32_t n_layer_dense_lead = 0;
uint32_t n_lora_q = 0;
uint32_t n_lora_kv = 0;
@@ -259,6 +261,9 @@ struct llama_hparams {
@@ -260,6 +262,9 @@ struct llama_hparams {
uint32_t n_pos_per_embd() const;
@ -160,10 +160,10 @@ index f6e95b5d2..c6e673276 100644
bool has_kv(uint32_t il) const;
diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp
index ca2ea2461..8916a6242 100644
index 5003b4fbf..243b296b5 100644
--- a/src/llama-model-loader.cpp
+++ b/src/llama-model-loader.cpp
@@ -466,7 +466,7 @@ namespace GGUFMeta {
@@ -489,7 +489,7 @@ namespace GGUFMeta {
template bool llama_model_loader::get_key_or_arr<std::array<int, 4>>(enum llm_kv kid, std::array<int, 4> & result, uint32_t n, bool required);
template bool llama_model_loader::get_key_or_arr<std::array<uint32_t, 512>>(enum llm_kv kid, std::array<uint32_t, 512> & result, uint32_t n, bool required);
template bool llama_model_loader::get_key_or_arr<std::array<float, 512>>(enum llm_kv kid, std::array<float, 512> & result, uint32_t n, bool required);
@ -173,10 +173,10 @@ index ca2ea2461..8916a6242 100644
llama_model_loader::llama_model_loader(
const std::string & fname,
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
index ae8207ee1..00cd579e0 100644
index 69075742c..bdee9b6e6 100644
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -1995,6 +1995,21 @@ void llama_model::load_hparams(llama_model_loader & ml) {
@@ -2028,6 +2028,21 @@ void llama_model::load_hparams(llama_model_loader & ml) {
default: type = LLM_TYPE_UNKNOWN;
}
} break;
@ -198,7 +198,7 @@ index ae8207ee1..00cd579e0 100644
case LLM_ARCH_WAVTOKENIZER_DEC:
{
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
@@ -5429,6 +5444,34 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
@@ -5510,6 +5525,34 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
@ -233,7 +233,7 @@ index ae8207ee1..00cd579e0 100644
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0);
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0);
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
@@ -7534,6 +7577,10 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
@@ -7664,6 +7707,10 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
{
llm = std::make_unique<llm_build_chameleon>(*this, params);
} break;
@ -244,7 +244,7 @@ index ae8207ee1..00cd579e0 100644
case LLM_ARCH_WAVTOKENIZER_DEC:
{
llm = std::make_unique<llm_build_wavtokenizer_dec>(*this, params);
@@ -7798,6 +7845,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
@@ -7932,6 +7979,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
case LLM_ARCH_GRANITE_MOE:
case LLM_ARCH_GRANITE_HYBRID:
case LLM_ARCH_CHAMELEON:
@ -253,10 +253,10 @@ index ae8207ee1..00cd579e0 100644
case LLM_ARCH_NEO_BERT:
case LLM_ARCH_SMOLLM3:
diff --git a/src/llama-model.h b/src/llama-model.h
index c6eb95318..b378b23ec 100644
index 9c00eec75..858af51bb 100644
--- a/src/llama-model.h
+++ b/src/llama-model.h
@@ -76,6 +76,7 @@ enum llm_type {
@@ -79,6 +79,7 @@ enum llm_type {
LLM_TYPE_15B,
LLM_TYPE_16B,
LLM_TYPE_20B,
@ -264,7 +264,7 @@ index c6eb95318..b378b23ec 100644
LLM_TYPE_26B,
LLM_TYPE_27B,
LLM_TYPE_30B,
@@ -405,6 +406,8 @@ struct llama_layer {
@@ -409,6 +410,8 @@ struct llama_layer {
struct ggml_tensor * ffn_act_beta = nullptr;
struct ggml_tensor * ffn_act_eps = nullptr;
@ -274,10 +274,10 @@ index c6eb95318..b378b23ec 100644
struct llama_layer_convnext convnext;
diff --git a/src/models/models.h b/src/models/models.h
index ffb36acc6..6d84a185d 100644
index dd0e286ed..40f61b59d 100644
--- a/src/models/models.h
+++ b/src/models/models.h
@@ -515,6 +515,11 @@ struct llm_build_smollm3 : public llm_graph_context {
@@ -525,6 +525,11 @@ struct llm_build_smollm3 : public llm_graph_context {
llm_build_smollm3(const llama_model & model, const llm_graph_params & params);
};

View File

@ -12,7 +12,7 @@ regex
2 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 63250cdf1..dd86a1745 100644
index af2276960..e05314272 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -299,7 +299,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {

View File

@ -8,7 +8,7 @@ Subject: [PATCH] add phony target ggml-cpu for all cpu variants
1 file changed, 2 insertions(+)
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index 4c04c3300..f4747f262 100644
index 262d78a4c..76cb339ca 100644
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -345,6 +345,7 @@ function(ggml_add_cpu_backend_variant tag_name)

View File

@ -9,7 +9,7 @@ disable amx as it reduces performance on some systems
1 file changed, 4 deletions(-)
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index f4747f262..d55aed348 100644
index 76cb339ca..676fb5b5e 100644
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -365,10 +365,6 @@ if (GGML_CPU_ALL_VARIANTS)

View File

@ -53,7 +53,7 @@ index b165d8bdc..f91d4faba 100644
}
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index dd86a1745..d63ce9c84 100644
index e05314272..325ef9843 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1781,9 +1781,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {

View File

@ -8,7 +8,7 @@ Subject: [PATCH] ollama debug tensor
1 file changed, 6 insertions(+)
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
index a59b51893..53891a91f 100644
index f7ba1fe31..f700f74db 100644
--- a/ggml/src/ggml-cpu/ggml-cpu.c
+++ b/ggml/src/ggml-cpu/ggml-cpu.c
@@ -15,6 +15,8 @@

View File

@ -183,10 +183,10 @@ index a4c978ac1..5c0da4049 100644
const char * grammar_root,
bool lazy,
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 3f4a729bc..38a30ea05 100644
index d96f619ae..237eb8655 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -1561,7 +1561,7 @@ static void llama_sampler_grammar_reset(struct llama_sampler * smpl) {
@@ -1577,7 +1577,7 @@ static void llama_sampler_grammar_reset(struct llama_sampler * smpl) {
trigger_patterns_c.push_back(trigger_pattern.pattern.c_str());
}
@ -195,7 +195,7 @@ index 3f4a729bc..38a30ea05 100644
ctx->grammar->lazy, trigger_patterns_c.data(), trigger_patterns_c.size(),
ctx->grammar->trigger_tokens.data(), ctx->grammar->trigger_tokens.size());
@@ -1639,9 +1639,9 @@ static struct llama_sampler * llama_sampler_init_grammar_impl(
@@ -1655,9 +1655,9 @@ static struct llama_sampler * llama_sampler_init_grammar_impl(
trigger_pattern += ")[\\s\\S]*";
std::array<const char *, 1> tmp_trigger_patterns = { trigger_pattern.c_str() };

View File

@ -22,7 +22,7 @@ index a7ebe5dcd..03557bb31 100644
size_t memory_total;
// device type
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index 6519af435..c9d3a2b03 100644
index 58eaf45b4..693d5dd7c 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -189,6 +189,51 @@ static int ggml_cuda_parse_id(char devName[]) {
@ -108,7 +108,7 @@ index 6519af435..c9d3a2b03 100644
std::string device_name(prop.name);
if (device_name == "NVIDIA GeForce MX450") {
turing_devices_without_mma.push_back({ id, device_name });
@@ -4110,6 +4157,7 @@ struct ggml_backend_cuda_device_context {
@@ -4120,6 +4167,7 @@ struct ggml_backend_cuda_device_context {
std::string name;
std::string description;
std::string pci_bus_id;
@ -116,7 +116,7 @@ index 6519af435..c9d3a2b03 100644
};
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -4198,6 +4246,11 @@ static bool ggml_backend_cuda_get_available_uma_memory(long * available_memory_k
@@ -4208,6 +4256,11 @@ static bool ggml_backend_cuda_get_available_uma_memory(long * available_memory_k
}
#endif // defined(__linux__)
@ -128,7 +128,7 @@ index 6519af435..c9d3a2b03 100644
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
ggml_cuda_set_device(ctx->device);
@@ -4238,6 +4291,7 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
@@ -4248,6 +4301,7 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
props->name = ggml_backend_cuda_device_get_name(dev);
props->description = ggml_backend_cuda_device_get_description(dev);
@ -136,7 +136,7 @@ index 6519af435..c9d3a2b03 100644
props->type = ggml_backend_cuda_device_get_type(dev);
props->device_id = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
@@ -4834,6 +4888,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
@@ -4844,6 +4898,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
cudaDeviceProp prop;
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
dev_ctx->description = prop.name;

View File

@ -10,7 +10,7 @@ Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
2 files changed, 13 insertions(+)
diff --git a/tools/mtmd/mtmd.cpp b/tools/mtmd/mtmd.cpp
index 2638fe4fc..c4e905a4e 100644
index b9c4fa909..3b47aed0e 100644
--- a/tools/mtmd/mtmd.cpp
+++ b/tools/mtmd/mtmd.cpp
@@ -87,6 +87,16 @@ enum mtmd_slice_tmpl {

View File

@ -8,7 +8,7 @@ Subject: [PATCH] no power throttling win32 with gnuc
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
index 53891a91f..8d4851312 100644
index f700f74db..5581dd0ae 100644
--- a/ggml/src/ggml-cpu/ggml-cpu.c
+++ b/ggml/src/ggml-cpu/ggml-cpu.c
@@ -2479,7 +2479,7 @@ static bool ggml_thread_apply_priority(int32_t prio) {

View File

@ -178,7 +178,7 @@ index f4713a421..92ba577a5 100644
static const struct ggml_backend_i ggml_backend_cpu_i = {
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index c9d3a2b03..25548629d 100644
index 693d5dd7c..ed33f8f20 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2901,7 +2901,7 @@ static void ggml_backend_cuda_synchronize(ggml_backend_t backend) {
@ -241,7 +241,7 @@ index c9d3a2b03..25548629d 100644
}
if (!use_cuda_graph) {
@@ -3742,7 +3752,7 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
@@ -3752,7 +3762,7 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
}
}
@ -250,7 +250,7 @@ index c9d3a2b03..25548629d 100644
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
ggml_cuda_set_device(cuda_ctx->device);
@@ -3780,7 +3790,7 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
@@ -3790,7 +3800,7 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
if (use_cuda_graph) {
cuda_graph_update_required = is_cuda_graph_update_required(cuda_ctx, cgraph);
@ -278,10 +278,10 @@ index 8fc1c2fb5..ba95b4acc 100644
static void ggml_backend_metal_graph_optimize(ggml_backend_t backend, ggml_cgraph * cgraph) {
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 120191ca0..5349bce24 100644
index 8ca9e4403..3990d67cf 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -13099,7 +13099,7 @@ static uint32_t ggml_vk_fuse_multi_add(ggml_backend_vk_context * ctx, const stru
@@ -13216,7 +13216,7 @@ static uint32_t ggml_vk_fuse_multi_add(ggml_backend_vk_context * ctx, const stru
return num_adds;
}
@ -290,7 +290,7 @@ index 120191ca0..5349bce24 100644
VK_LOG_DEBUG("ggml_backend_vk_graph_compute(" << cgraph->n_nodes << " nodes)");
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
@@ -13334,6 +13334,7 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
@@ -13482,6 +13482,7 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
return GGML_STATUS_SUCCESS;
UNUSED(backend);

View File

@ -8,7 +8,7 @@ Subject: [PATCH] fix mtmd-audio.cpp build on windows
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/mtmd/mtmd-audio.cpp b/tools/mtmd/mtmd-audio.cpp
index f68829a61..2024d3d37 100644
index e99101184..a66d36f84 100644
--- a/tools/mtmd/mtmd-audio.cpp
+++ b/tools/mtmd/mtmd-audio.cpp
@@ -1,6 +1,6 @@

View File

@ -226,7 +226,7 @@ index 498186a7c..7746e8b92 100644
void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) {
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
index 9fcb2f9fd..e800ee8f6 100644
index 62e618850..dac9cfcdf 100644
--- a/ggml/src/ggml-cuda/common.cuh
+++ b/ggml/src/ggml-cuda/common.cuh
@@ -37,6 +37,41 @@
@ -271,7 +271,7 @@ index 9fcb2f9fd..e800ee8f6 100644
#define STRINGIZE_IMPL(...) #__VA_ARGS__
#define STRINGIZE(...) STRINGIZE_IMPL(__VA_ARGS__)
@@ -941,6 +976,9 @@ struct ggml_cuda_pool {
@@ -976,6 +1011,9 @@ struct ggml_cuda_pool {
virtual void * alloc(size_t size, size_t * actual_size) = 0;
virtual void free(void * ptr, size_t size) = 0;
@ -281,7 +281,7 @@ index 9fcb2f9fd..e800ee8f6 100644
};
template<typename T>
@@ -1232,11 +1270,15 @@ struct ggml_backend_cuda_context {
@@ -1267,11 +1305,15 @@ struct ggml_backend_cuda_context {
// pool
std::unique_ptr<ggml_cuda_pool> pools[GGML_CUDA_MAX_DEVICES][GGML_CUDA_MAX_STREAMS];
@ -299,7 +299,7 @@ index 9fcb2f9fd..e800ee8f6 100644
}
return *pools[device][curr_stream_no];
}
@@ -1244,6 +1286,22 @@ struct ggml_backend_cuda_context {
@@ -1279,6 +1321,22 @@ struct ggml_backend_cuda_context {
ggml_cuda_pool & pool() {
return pool(device);
}
@ -323,7 +323,7 @@ index 9fcb2f9fd..e800ee8f6 100644
struct ggml_cuda_mm_fusion_args_host {
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index 25548629d..eeaae3fe4 100644
index ed33f8f20..3a7fd31e0 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -365,6 +365,8 @@ const ggml_cuda_device_info & ggml_cuda_info() {
@ -567,7 +567,7 @@ index 25548629d..eeaae3fe4 100644
};
ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device) {
@@ -3274,6 +3338,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
@@ -3284,6 +3348,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx, ggml_cgraph * cgraph,
bool & graph_evaluated_or_captured, bool & use_cuda_graph, bool & cuda_graph_update_required) {
@ -575,7 +575,7 @@ index 25548629d..eeaae3fe4 100644
// flag used to determine whether it is an integrated_gpu
const bool integrated = ggml_cuda_info().devices[cuda_ctx->device].integrated;
@@ -3410,6 +3475,10 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
@@ -3420,6 +3485,10 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
continue;
}
@ -586,7 +586,7 @@ index 25548629d..eeaae3fe4 100644
// start of fusion operations
static bool disable_fusion = (getenv("GGML_CUDA_DISABLE_FUSION") != nullptr);
@@ -3754,6 +3823,7 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
@@ -3764,6 +3833,7 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph, int batch_size) {
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
@ -594,7 +594,7 @@ index 25548629d..eeaae3fe4 100644
ggml_cuda_set_device(cuda_ctx->device);
@@ -3829,6 +3899,77 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
@@ -3839,6 +3909,77 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
return GGML_STATUS_SUCCESS;
}
@ -672,7 +672,7 @@ index 25548629d..eeaae3fe4 100644
static void ggml_backend_cuda_event_record(ggml_backend_t backend, ggml_backend_event_t event) {
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
@@ -4097,6 +4238,9 @@ static const ggml_backend_i ggml_backend_cuda_interface = {
@@ -4107,6 +4248,9 @@ static const ggml_backend_i ggml_backend_cuda_interface = {
/* .event_record = */ ggml_backend_cuda_event_record,
/* .event_wait = */ ggml_backend_cuda_event_wait,
/* .graph_optimize = */ ggml_backend_cuda_graph_optimize,

View File

@ -8,10 +8,10 @@ Subject: [PATCH] decode: disable output_all
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index 8786d4ee3..9e6998272 100644
index 015ebae71..e346e7231 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -1051,8 +1051,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
@@ -1050,8 +1050,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
const int64_t n_vocab = vocab.n_tokens();
const int64_t n_embd = hparams.n_embd_inp();

View File

@ -62,7 +62,7 @@ index 7746e8b92..189e97170 100644
GGML_ASSERT(device);
return device->iface.get_buffer_type(device);
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index eeaae3fe4..6852d2e20 100644
index 3a7fd31e0..cfe21ebc7 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -113,6 +113,11 @@ int ggml_cuda_get_device() {
@ -77,7 +77,7 @@ index eeaae3fe4..6852d2e20 100644
static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device) {
ggml_cuda_set_device(device);
cudaError_t err;
@@ -4448,7 +4453,10 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
@@ -4458,7 +4463,10 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
props->id = ggml_backend_cuda_device_get_id(dev);
props->type = ggml_backend_cuda_device_get_type(dev);
props->device_id = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
@ -89,7 +89,7 @@ index eeaae3fe4..6852d2e20 100644
bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
#ifdef GGML_CUDA_NO_PEER_COPY
@@ -4908,6 +4916,11 @@ static void ggml_backend_cuda_device_event_synchronize(ggml_backend_dev_t dev, g
@@ -4918,6 +4926,11 @@ static void ggml_backend_cuda_device_event_synchronize(ggml_backend_dev_t dev, g
CUDA_CHECK(cudaEventSynchronize((cudaEvent_t)event->context));
}
@ -101,7 +101,7 @@ index eeaae3fe4..6852d2e20 100644
static const ggml_backend_device_i ggml_backend_cuda_device_interface = {
/* .get_name = */ ggml_backend_cuda_device_get_name,
/* .get_description = */ ggml_backend_cuda_device_get_description,
@@ -4924,6 +4937,7 @@ static const ggml_backend_device_i ggml_backend_cuda_device_interface = {
@@ -4934,6 +4947,7 @@ static const ggml_backend_device_i ggml_backend_cuda_device_interface = {
/* .event_new = */ ggml_backend_cuda_device_event_new,
/* .event_free = */ ggml_backend_cuda_device_event_free,
/* .event_synchronize = */ ggml_backend_cuda_device_event_synchronize,
@ -122,10 +122,10 @@ index 951a88d56..4e162258d 100644
#define cudaError_t hipError_t
#define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
diff --git a/src/llama.cpp b/src/llama.cpp
index f69964b6d..759152b76 100644
index 1e18637e3..ad0f45812 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -921,10 +921,12 @@ static struct llama_model * llama_model_load_from_file_impl(
@@ -934,10 +934,12 @@ static struct llama_model * llama_model_load_from_file_impl(
for (auto * dev : model->devices) {
ggml_backend_dev_props props;
ggml_backend_dev_get_props(dev, &props);

View File

@ -3,16 +3,6 @@ From: Daniel Hiltgen <daniel@ollama.com>
Date: Tue, 26 Aug 2025 12:48:29 -0700
Subject: [PATCH] GPU discovery enhancements
Expose more information about the devices through backend props, and leverage
management libraries for more accurate VRAM usage reporting if available.
vulkan: get GPU ID (ollama v0.11.5)
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
Vulkan PCI and Memory
fix vulkan PCI ID and ID handling
---
ggml/include/ggml-backend.h | 6 +
ggml/src/CMakeLists.txt | 2 +
@ -45,7 +35,7 @@ index 92ca32a4b..6ad583f09 100644
GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index d55aed348..99ae293cc 100644
index 676fb5b5e..6283c2d30 100644
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -205,6 +205,8 @@ add_library(ggml-base
@ -58,7 +48,7 @@ index d55aed348..99ae293cc 100644
set_target_properties(ggml-base PROPERTIES
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index 6852d2e20..334a30135 100644
index cfe21ebc7..53ce7827c 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -267,6 +267,16 @@ static ggml_cuda_device_info ggml_cuda_init() {
@ -90,7 +80,7 @@ index 6852d2e20..334a30135 100644
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
ggml_cuda_parse_uuid(prop, id).c_str());
@@ -4317,6 +4332,11 @@ struct ggml_backend_cuda_device_context {
@@ -4327,6 +4342,11 @@ struct ggml_backend_cuda_device_context {
std::string description;
std::string pci_bus_id;
std::string id;
@ -102,7 +92,7 @@ index 6852d2e20..334a30135 100644
};
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -4413,6 +4433,28 @@ static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
@@ -4423,6 +4443,28 @@ static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
ggml_cuda_set_device(ctx->device);
@ -131,7 +121,7 @@ index 6852d2e20..334a30135 100644
CUDA_CHECK(cudaMemGetInfo(free, total));
// ref: https://github.com/ggml-org/llama.cpp/pull/17368
@@ -4445,6 +4487,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
@@ -4455,6 +4497,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
return GGML_BACKEND_DEVICE_TYPE_GPU;
}
@ -139,7 +129,7 @@ index 6852d2e20..334a30135 100644
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
@@ -4458,6 +4501,19 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
@@ -4468,6 +4511,19 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
// If you need the memory data, call ggml_backend_dev_memory() explicitly.
props->memory_total = props->memory_free = 0;
@ -159,7 +149,7 @@ index 6852d2e20..334a30135 100644
bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
#ifdef GGML_CUDA_NO_PEER_COPY
bool events = false;
@@ -5047,6 +5103,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
@@ -5057,6 +5113,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
std::lock_guard<std::mutex> lock(mutex);
if (!initialized) {
ggml_backend_cuda_reg_context * ctx = new ggml_backend_cuda_reg_context;
@ -167,7 +157,7 @@ index 6852d2e20..334a30135 100644
for (int i = 0; i < ggml_cuda_info().device_count; i++) {
ggml_backend_cuda_device_context * dev_ctx = new ggml_backend_cuda_device_context;
@@ -5062,6 +5119,14 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
@@ -5072,6 +5129,14 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
dev_ctx->pci_bus_id = pci_bus_id;
@ -243,7 +233,7 @@ index ba95b4acc..f6f8f7a10 100644
/* .async = */ true,
/* .host_buffer = */ false,
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 5349bce24..0103fd03a 100644
index 3990d67cf..f3e65990d 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -236,6 +236,7 @@ class vk_memory_logger;
@ -254,7 +244,7 @@ index 5349bce24..0103fd03a 100644
static constexpr uint32_t mul_mat_vec_max_cols = 8;
static constexpr uint32_t p021_max_gqa_ratio = 8;
@@ -12350,6 +12351,29 @@ static void ggml_vk_get_device_description(int device, char * description, size_
@@ -12432,6 +12433,29 @@ static void ggml_vk_get_device_description(int device, char * description, size_
snprintf(description, description_size, "%s", props.deviceName.data());
}
@ -284,7 +274,7 @@ index 5349bce24..0103fd03a 100644
// backend interface
#define UNUSED GGML_UNUSED
@@ -13628,15 +13652,72 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
@@ -13828,15 +13852,72 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
ggml_vk_get_device_description(dev_idx, description, description_size);
}
@ -361,7 +351,7 @@ index 5349bce24..0103fd03a 100644
if (membudget_supported) {
memprops.pNext = &budgetprops;
@@ -13688,8 +13769,13 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
@@ -13888,8 +13969,13 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
}
}
@ -376,7 +366,7 @@ index 5349bce24..0103fd03a 100644
}
vk::PhysicalDeviceProperties2 props = {};
@@ -13706,19 +13792,24 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
@@ -13906,19 +13992,24 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
char pci_bus_id[16] = {};
snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.%x", pci_domain, pci_bus, pci_device, pci_function);
@ -410,7 +400,7 @@ index 5349bce24..0103fd03a 100644
static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
@@ -13730,9 +13821,14 @@ static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t de
@@ -13930,9 +14021,14 @@ static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t de
return ctx->description.c_str();
}
@ -426,7 +416,7 @@ index 5349bce24..0103fd03a 100644
}
static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) {
@@ -13756,8 +13852,9 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
@@ -13956,8 +14052,9 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
props->name = ggml_backend_vk_device_get_name(dev);
props->description = ggml_backend_vk_device_get_description(dev);
@ -436,10 +426,10 @@ index 5349bce24..0103fd03a 100644
+ props->device_id = ctx->pci_id.empty() ? nullptr : ctx->pci_id.c_str();
ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
props->caps = {
/* .async = */ false,
@@ -13765,6 +13862,13 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
/* .async = */ true,
@@ -13965,6 +14062,13 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
/* .buffer_from_host_ptr = */ false,
/* .events = */ false,
/* .events = */ true,
};
+
+ props->compute_major = ctx->major;
@ -451,7 +441,7 @@ index 5349bce24..0103fd03a 100644
}
static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
@@ -14331,6 +14435,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
@@ -14573,6 +14677,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
static std::mutex mutex;
std::lock_guard<std::mutex> lock(mutex);
if (!initialized) {
@ -460,7 +450,7 @@ index 5349bce24..0103fd03a 100644
for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) {
ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context;
char desc[256];
@@ -14339,12 +14445,41 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
@@ -14581,12 +14687,41 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
ctx->name = GGML_VK_NAME + std::to_string(i);
ctx->description = desc;
ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu;