prototyping

This commit is contained in:
ParthSareen
2025-03-25 15:00:14 -07:00
parent 1fd9967558
commit 5ec6bb52a0
11 changed files with 1647 additions and 13 deletions

View File

@@ -32,6 +32,7 @@ type TextProcessor interface {
Encode(s string, addSpecial bool) ([]int32, error)
Decode([]int32) (string, error)
Is(int32, Special) bool
Vocab() *Vocabulary
}
type Vocabulary struct {

View File

@@ -53,6 +53,10 @@ func (spm SentencePieceModel) Is(id int32, special Special) bool {
return spm.vocab.Is(id, special)
}
func (spm SentencePieceModel) Vocab() *Vocabulary {
return spm.vocab
}
func (spm *SentencePieceModel) split(s string) iter.Seq[string] {
return func(yield func(string) bool) {
for m, _ := spm.pre.FindStringMatch(s); m != nil; m, _ = spm.pre.FindNextMatch(m) {