69 lines
1.4 KiB
Go
69 lines
1.4 KiB
Go
package runner
|
|
|
|
import (
|
|
"strings"
|
|
)
|
|
|
|
func findStop(sequence string, stops []string) (bool, string) {
|
|
for _, stop := range stops {
|
|
if strings.Contains(sequence, stop) {
|
|
return true, stop
|
|
}
|
|
}
|
|
|
|
return false, ""
|
|
}
|
|
|
|
func containsStopSuffix(sequence string, stops []string) bool {
|
|
for _, stop := range stops {
|
|
for i := 1; i <= len(stop); i++ {
|
|
if strings.HasSuffix(sequence, stop[:i]) {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// truncateStop removes the provided stop string from sequence,
|
|
// returning both the truncated sequence and a bool indicating if truncation occurred
|
|
func truncateStop(sequence string, stop string) (string, bool) {
|
|
index := strings.Index(sequence, stop)
|
|
if index == -1 {
|
|
return sequence, false
|
|
}
|
|
|
|
return sequence[:index], true
|
|
}
|
|
|
|
func incompleteUnicode(token string) bool {
|
|
incomplete := false
|
|
|
|
// check if there is incomplete UTF-8 character at the end
|
|
for i := 1; i < 5 && i <= len(token); i++ {
|
|
c := token[len(token)-i]
|
|
|
|
if (c & 0xc0) == 0x80 {
|
|
// continuation byte: 10xxxxxx
|
|
continue
|
|
}
|
|
|
|
if (c & 0xe0) == 0xc0 {
|
|
// 2-byte character: 110xxxxx ...
|
|
incomplete = i < 2
|
|
} else if (c & 0xf0) == 0xe0 {
|
|
// 3-byte character: 1110xxxx ...
|
|
incomplete = i < 3
|
|
} else if (c & 0xf8) == 0xf0 {
|
|
// 4-byte character: 11110xxx ...
|
|
incomplete = i < 4
|
|
}
|
|
|
|
// else 1-byte character or invalid byte
|
|
break
|
|
}
|
|
|
|
return incomplete
|
|
}
|