From 03abdb4969be7135e2baf39effde82a006e3809e Mon Sep 17 00:00:00 2001 From: nicole pardal Date: Tue, 9 Dec 2025 10:02:17 -0800 Subject: [PATCH] fixed pretokenizer --- model/models/olmo/model.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/models/olmo/model.go b/model/models/olmo/model.go index 13ed0cc59..66ae1b7eb 100644 --- a/model/models/olmo/model.go +++ b/model/models/olmo/model.go @@ -58,7 +58,7 @@ func New(c fs.Config) (model.Model, error) { var pretokenizers []string if c.String("tokenizer.ggml.pre") != "default" { pretokenizers = []string{ - "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", + `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+`, } } processor := model.NewBytePairEncoding(&vocabulary, pretokenizers...)