Files
ollama/sample/state_machine.go
2025-01-21 17:48:12 -08:00

159 lines
3.9 KiB
Go

package sample
import (
"fmt"
"github.com/ollama/ollama/model"
)
type token []int32
type Node struct {
State JSONState
TransitionEdges map[*Node][]token
}
func NewNode(state JSONState) *Node {
return &Node{
State: state,
TransitionEdges: make(map[*Node][]token),
}
}
var (
startToken token
endToken token
stringToken token
objectKeyToken token
tabToken token
spaceToken token
newlineToken token
newlineSpace token
commaToken token
commaToken2 token
commaToken3 token
colonToken token
colonToken2 token
)
func initTokens(proc model.TextProcessor) error {
var err error
startToken, err = proc.Encode("{")
if err != nil {
return err
}
endToken, err = proc.Encode("}")
if err != nil {
return err
}
stringToken, err = proc.Encode("\"")
if err != nil {
return err
}
objectKeyToken, err = proc.Encode("\"")
if err != nil {
return err
}
tabToken, err = proc.Encode("\t")
if err != nil {
return err
}
spaceToken, err = proc.Encode(" ")
if err != nil {
return err
}
newlineToken, err = proc.Encode("\n")
if err != nil {
return err
}
newlineSpace, err = proc.Encode(" \n")
if err != nil {
return err
}
// TODO: figure out how to encode colon correctly
colonToken, err = proc.Encode("\":")
if err != nil {
return err
}
fmt.Println("colonToken", colonToken)
colonToken2, err = proc.Encode(":")
if err != nil {
return err
}
commaToken, err = proc.Encode(",")
if err != nil {
return err
}
commaToken2, err = proc.Encode("\",")
if err != nil {
return err
}
fmt.Println("commaToken2", commaToken2)
commaToken3, err = proc.Encode("\",\"")
if err != nil {
return err
}
return nil
}
func buildStateMachine(proc model.TextProcessor) (*Node, error) {
if err := initTokens(proc); err != nil {
return nil, err
}
startNode := NewNode(StateStart)
objectNode := NewNode(StateInObject)
objectKeyNode := NewNode(StateInObjectKey)
objectKeyEndNode := NewNode(StateInObjectKeyEnd)
stringNode := NewNode(StateInString)
intNode := NewNode(StateInInt)
commaNode := NewNode(StateInComma)
colonNode := NewNode(StateInColon)
stringEndNode := NewNode(StateInStringEnd)
endNode := NewNode(StateEnd)
terminateNode := NewNode(StateTerminate)
sentinelToken := token([]int32{-1})
intSentinelToken := token([]int32{-2})
startNode.TransitionEdges[objectNode] = []token{startToken}
objectNode.TransitionEdges[objectKeyNode] = []token{stringToken}
// objectNode.TransitionEdges[objectNode] = []token{newlineToken}
// objectNode.TransitionEdges[objectNode] = []token{spaceToken}
objectKeyNode.TransitionEdges[objectKeyNode] = []token{sentinelToken}
objectKeyNode.TransitionEdges[colonNode] = []token{colonToken, colonToken2}
// characterize end of object key
objectKeyNode.TransitionEdges[objectKeyEndNode] = []token{stringToken}
objectKeyEndNode.TransitionEdges[colonNode] = []token{colonToken}
// objectKeyNode.TransitionEdges[intNode] = []token{sentinelToken}
intNode.TransitionEdges[intNode] = []token{intSentinelToken}
intNode.TransitionEdges[commaNode] = []token{commaToken, commaToken2}
intNode.TransitionEdges[terminateNode] = []token{endToken}
commaNode.TransitionEdges[objectKeyNode] = []token{newlineToken}
colonNode.TransitionEdges[stringNode] = []token{stringToken}
colonNode.TransitionEdges[intNode] = []token{intSentinelToken}
stringNode.TransitionEdges[stringNode] = []token{sentinelToken}
stringNode.TransitionEdges[stringEndNode] = []token{stringToken}
// "\""," Case
stringNode.TransitionEdges[commaNode] = []token{commaToken2}
// "\"",\"" Case
stringNode.TransitionEdges[objectKeyNode] = []token{commaToken3}
stringEndNode.TransitionEdges[commaNode] = []token{commaToken, commaToken2}
stringEndNode.TransitionEdges[terminateNode] = []token{endToken}
endNode.TransitionEdges[terminateNode] = []token{endToken}
terminateNode.TransitionEdges[terminateNode] = []token{}
return startNode, nil
}