skip quantizing per_layer_token_embd (#11207)

this tensor isn't compatible with cuda when quantized to q4_K so skip it
This commit is contained in:
Michael Yang 2025-06-26 21:49:35 -07:00 committed by Ryan Schumacher
parent 59112600d1
commit 8f2099306f
No known key found for this signature in database
1 changed files with 2 additions and 0 deletions

View File

@ -231,6 +231,8 @@ func newType(t *fsggml.Tensor, kv fsggml.KV, qs *quantizeState, ftype fsggml.Fil
// do not quantize relative position bias (T5)
quantize = quantize && !strings.Contains(name, "attn_rel_b.weight")
quantize = quantize && !strings.Contains(name, "per_layer_token_embd.weight")
newType := fsggml.TensorType(t.Kind)
if quantize {
// get more optimal quantization type based on the tensor shape, layer, etc.