fixed pretokenizer

This commit is contained in:
nicole pardal
2025-12-09 10:02:17 -08:00
committed by ParthSareen
parent d8bf6a5dee
commit 2c147bc780

View File

@@ -58,7 +58,7 @@ func New(c fs.Config) (model.Model, error) {
var pretokenizers []string
if c.String("tokenizer.ggml.pre") != "default" {
pretokenizers = []string{
"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+`,
}
}
processor := model.NewBytePairEncoding(&vocabulary, pretokenizers...)