skip quantizing per_layer_token_embd (#11207)
this tensor isn't compatible with cuda when quantized to q4_K so skip it
This commit is contained in:
parent
11ffc36157
commit
d0b32def60
|
|
@ -231,6 +231,8 @@ func newType(t *fsggml.Tensor, kv fsggml.KV, qs *quantizeState, ftype fsggml.Fil
|
||||||
// do not quantize relative position bias (T5)
|
// do not quantize relative position bias (T5)
|
||||||
quantize = quantize && !strings.Contains(name, "attn_rel_b.weight")
|
quantize = quantize && !strings.Contains(name, "attn_rel_b.weight")
|
||||||
|
|
||||||
|
quantize = quantize && !strings.Contains(name, "per_layer_token_embd.weight")
|
||||||
|
|
||||||
newType := fsggml.TensorType(t.Kind)
|
newType := fsggml.TensorType(t.Kind)
|
||||||
if quantize {
|
if quantize {
|
||||||
// get more optimal quantization type based on the tensor shape, layer, etc.
|
// get more optimal quantization type based on the tensor shape, layer, etc.
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue