package uniseg
import "unicode/utf8"
const (
sbAny = iota
sbCR
sbParaSep
sbATerm
sbUpper
sbLower
sbSB7
sbSB8Close
sbSB8Sp
sbSTerm
sbSB8aClose
sbSB8aSp
)
func sbTransitions(state , prop int ) (newState int , sentenceBreak bool , rule int ) {
switch uint64 (state ) | uint64 (prop )<<32 {
case sbAny | prCR <<32 :
return sbCR , false , 9990
case sbCR | prLF <<32 :
return sbParaSep , false , 30
case sbAny | prSep <<32 :
return sbParaSep , false , 9990
case sbAny | prLF <<32 :
return sbParaSep , false , 9990
case sbParaSep | prAny <<32 :
return sbAny , true , 40
case sbCR | prAny <<32 :
return sbAny , true , 40
case sbAny | prATerm <<32 :
return sbATerm , false , 9990
case sbATerm | prNumeric <<32 :
return sbAny , false , 60
case sbSB7 | prNumeric <<32 :
return sbAny , false , 60
case sbAny | prUpper <<32 :
return sbUpper , false , 9990
case sbAny | prLower <<32 :
return sbLower , false , 9990
case sbUpper | prATerm <<32 :
return sbSB7 , false , 70
case sbLower | prATerm <<32 :
return sbSB7 , false , 70
case sbSB7 | prUpper <<32 :
return sbUpper , false , 70
case sbAny | prSTerm <<32 :
return sbSTerm , false , 9990
case sbATerm | prSContinue <<32 :
return sbAny , false , 81
case sbATerm | prATerm <<32 :
return sbATerm , false , 81
case sbATerm | prSTerm <<32 :
return sbSTerm , false , 81
case sbSB7 | prSContinue <<32 :
return sbAny , false , 81
case sbSB7 | prATerm <<32 :
return sbATerm , false , 81
case sbSB7 | prSTerm <<32 :
return sbSTerm , false , 81
case sbSB8Close | prSContinue <<32 :
return sbAny , false , 81
case sbSB8Close | prATerm <<32 :
return sbATerm , false , 81
case sbSB8Close | prSTerm <<32 :
return sbSTerm , false , 81
case sbSB8Sp | prSContinue <<32 :
return sbAny , false , 81
case sbSB8Sp | prATerm <<32 :
return sbATerm , false , 81
case sbSB8Sp | prSTerm <<32 :
return sbSTerm , false , 81
case sbSTerm | prSContinue <<32 :
return sbAny , false , 81
case sbSTerm | prATerm <<32 :
return sbATerm , false , 81
case sbSTerm | prSTerm <<32 :
return sbSTerm , false , 81
case sbSB8aClose | prSContinue <<32 :
return sbAny , false , 81
case sbSB8aClose | prATerm <<32 :
return sbATerm , false , 81
case sbSB8aClose | prSTerm <<32 :
return sbSTerm , false , 81
case sbSB8aSp | prSContinue <<32 :
return sbAny , false , 81
case sbSB8aSp | prATerm <<32 :
return sbATerm , false , 81
case sbSB8aSp | prSTerm <<32 :
return sbSTerm , false , 81
case sbATerm | prClose <<32 :
return sbSB8Close , false , 90
case sbSB7 | prClose <<32 :
return sbSB8Close , false , 90
case sbSB8Close | prClose <<32 :
return sbSB8Close , false , 90
case sbATerm | prSp <<32 :
return sbSB8Sp , false , 90
case sbSB7 | prSp <<32 :
return sbSB8Sp , false , 90
case sbSB8Close | prSp <<32 :
return sbSB8Sp , false , 90
case sbSTerm | prClose <<32 :
return sbSB8aClose , false , 90
case sbSB8aClose | prClose <<32 :
return sbSB8aClose , false , 90
case sbSTerm | prSp <<32 :
return sbSB8aSp , false , 90
case sbSB8aClose | prSp <<32 :
return sbSB8aSp , false , 90
case sbATerm | prSep <<32 :
return sbParaSep , false , 90
case sbATerm | prCR <<32 :
return sbParaSep , false , 90
case sbATerm | prLF <<32 :
return sbParaSep , false , 90
case sbSB7 | prSep <<32 :
return sbParaSep , false , 90
case sbSB7 | prCR <<32 :
return sbParaSep , false , 90
case sbSB7 | prLF <<32 :
return sbParaSep , false , 90
case sbSB8Close | prSep <<32 :
return sbParaSep , false , 90
case sbSB8Close | prCR <<32 :
return sbParaSep , false , 90
case sbSB8Close | prLF <<32 :
return sbParaSep , false , 90
case sbSTerm | prSep <<32 :
return sbParaSep , false , 90
case sbSTerm | prCR <<32 :
return sbParaSep , false , 90
case sbSTerm | prLF <<32 :
return sbParaSep , false , 90
case sbSB8aClose | prSep <<32 :
return sbParaSep , false , 90
case sbSB8aClose | prCR <<32 :
return sbParaSep , false , 90
case sbSB8aClose | prLF <<32 :
return sbParaSep , false , 90
case sbSB8Sp | prSp <<32 :
return sbSB8Sp , false , 100
case sbSB8aSp | prSp <<32 :
return sbSB8aSp , false , 100
case sbSB8Sp | prSep <<32 :
return sbParaSep , false , 100
case sbSB8Sp | prCR <<32 :
return sbParaSep , false , 100
case sbSB8Sp | prLF <<32 :
return sbParaSep , false , 100
case sbATerm | prAny <<32 :
return sbAny , true , 110
case sbSB7 | prAny <<32 :
return sbAny , true , 110
case sbSB8Close | prAny <<32 :
return sbAny , true , 110
case sbSB8Sp | prAny <<32 :
return sbAny , true , 110
case sbSTerm | prAny <<32 :
return sbAny , true , 110
case sbSB8aClose | prAny <<32 :
return sbAny , true , 110
case sbSB8aSp | prAny <<32 :
return sbAny , true , 110
default :
return -1 , false , -1
}
}
func transitionSentenceBreakState(state int , r rune , b []byte , str string ) (newState int , sentenceBreak bool ) {
nextProperty := property (sentenceBreakCodePoints , r )
if nextProperty == prExtend || nextProperty == prFormat {
if state == sbParaSep || state == sbCR {
return sbAny , true
}
if state < 0 {
return sbAny , true
}
return state , false
}
var rule int
newState , sentenceBreak , rule = sbTransitions (state , nextProperty )
if newState < 0 {
anyPropState , anyPropProp , anyPropRule := sbTransitions (state , prAny )
anyStateState , anyStateProp , anyStateRule := sbTransitions (sbAny , nextProperty )
if anyPropState >= 0 && anyStateState >= 0 {
newState , sentenceBreak , rule = anyStateState , anyStateProp , anyStateRule
if anyPropRule < anyStateRule {
sentenceBreak , rule = anyPropProp , anyPropRule
}
} else if anyPropState >= 0 {
newState , sentenceBreak , rule = anyPropState , anyPropProp , anyPropRule
} else if anyStateState >= 0 {
newState , sentenceBreak , rule = anyStateState , anyStateProp , anyStateRule
} else {
newState , sentenceBreak , rule = sbAny , false , 9990
}
}
if rule > 80 && (state == sbATerm || state == sbSB8Close || state == sbSB8Sp || state == sbSB7 ) {
var length int
for nextProperty != prOLetter &&
nextProperty != prUpper &&
nextProperty != prLower &&
nextProperty != prSep &&
nextProperty != prCR &&
nextProperty != prLF &&
nextProperty != prATerm &&
nextProperty != prSTerm {
if b != nil {
r , length = utf8 .DecodeRune (b )
b = b [length :]
} else {
r , length = utf8 .DecodeRuneInString (str )
str = str [length :]
}
if r == utf8 .RuneError {
break
}
nextProperty = property (sentenceBreakCodePoints , r )
}
if nextProperty == prLower {
return sbLower , false
}
}
return
}
The pages are generated with Golds v0.8.2 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .