package uniseg
import "unicode/utf8"
const (
wbAny = iota
wbCR
wbLF
wbNewline
wbWSegSpace
wbHebrewLetter
wbALetter
wbWB7
wbWB7c
wbNumeric
wbWB11
wbKatakana
wbExtendNumLet
wbOddRI
wbEvenRI
wbZWJBit = 16
)
func wbTransitions(state , prop int ) (newState int , wordBreak bool , rule int ) {
switch uint64 (state ) | uint64 (prop )<<32 {
case wbAny | prNewline <<32 :
return wbNewline , true , 32
case wbAny | prCR <<32 :
return wbCR , true , 32
case wbAny | prLF <<32 :
return wbLF , true , 32
case wbNewline | prAny <<32 :
return wbAny , true , 31
case wbCR | prAny <<32 :
return wbAny , true , 31
case wbLF | prAny <<32 :
return wbAny , true , 31
case wbCR | prLF <<32 :
return wbLF , false , 30
case wbAny | prWSegSpace <<32 :
return wbWSegSpace , true , 9990
case wbWSegSpace | prWSegSpace <<32 :
return wbWSegSpace , false , 34
case wbAny | prALetter <<32 :
return wbALetter , true , 9990
case wbAny | prHebrewLetter <<32 :
return wbHebrewLetter , true , 9990
case wbALetter | prALetter <<32 :
return wbALetter , false , 50
case wbALetter | prHebrewLetter <<32 :
return wbHebrewLetter , false , 50
case wbHebrewLetter | prALetter <<32 :
return wbALetter , false , 50
case wbHebrewLetter | prHebrewLetter <<32 :
return wbHebrewLetter , false , 50
case wbWB7 | prALetter <<32 :
return wbALetter , false , 70
case wbWB7 | prHebrewLetter <<32 :
return wbHebrewLetter , false , 70
case wbHebrewLetter | prSingleQuote <<32 :
return wbAny , false , 71
case wbWB7c | prHebrewLetter <<32 :
return wbHebrewLetter , false , 73
case wbAny | prNumeric <<32 :
return wbNumeric , true , 9990
case wbNumeric | prNumeric <<32 :
return wbNumeric , false , 80
case wbALetter | prNumeric <<32 :
return wbNumeric , false , 90
case wbHebrewLetter | prNumeric <<32 :
return wbNumeric , false , 90
case wbNumeric | prALetter <<32 :
return wbALetter , false , 100
case wbNumeric | prHebrewLetter <<32 :
return wbHebrewLetter , false , 100
case wbWB11 | prNumeric <<32 :
return wbNumeric , false , 110
case wbAny | prKatakana <<32 :
return wbKatakana , true , 9990
case wbKatakana | prKatakana <<32 :
return wbKatakana , false , 130
case wbAny | prExtendNumLet <<32 :
return wbExtendNumLet , true , 9990
case wbALetter | prExtendNumLet <<32 :
return wbExtendNumLet , false , 131
case wbHebrewLetter | prExtendNumLet <<32 :
return wbExtendNumLet , false , 131
case wbNumeric | prExtendNumLet <<32 :
return wbExtendNumLet , false , 131
case wbKatakana | prExtendNumLet <<32 :
return wbExtendNumLet , false , 131
case wbExtendNumLet | prExtendNumLet <<32 :
return wbExtendNumLet , false , 131
case wbExtendNumLet | prALetter <<32 :
return wbALetter , false , 132
case wbExtendNumLet | prHebrewLetter <<32 :
return wbHebrewLetter , false , 132
case wbExtendNumLet | prNumeric <<32 :
return wbNumeric , false , 132
case wbExtendNumLet | prKatakana <<32 :
return wbKatakana , false , 132
default :
return -1 , false , -1
}
}
func transitionWordBreakState(state int , r rune , b []byte , str string ) (newState int , wordBreak bool ) {
nextProperty := property (workBreakCodePoints , r )
if nextProperty == prZWJ {
if state == wbNewline || state == wbCR || state == wbLF {
return wbAny | wbZWJBit , true
}
if state < 0 {
return wbAny | wbZWJBit , false
}
return state | wbZWJBit , false
} else if nextProperty == prExtend || nextProperty == prFormat {
if state == wbNewline || state == wbCR || state == wbLF {
return wbAny , true
}
if state == wbWSegSpace || state == wbAny |wbZWJBit {
return wbAny , false
}
if state < 0 {
return wbAny , false
}
return state , false
} else if nextProperty == prExtendedPictographic && state >= 0 && state &wbZWJBit != 0 {
return wbAny , false
}
if state >= 0 {
state = state &^ wbZWJBit
}
var rule int
newState , wordBreak , rule = wbTransitions (state , nextProperty )
if newState < 0 {
anyPropState , anyPropWordBreak , anyPropRule := wbTransitions (state , prAny )
anyStateState , anyStateWordBreak , anyStateRule := wbTransitions (wbAny , nextProperty )
if anyPropState >= 0 && anyStateState >= 0 {
newState , wordBreak , rule = anyStateState , anyStateWordBreak , anyStateRule
if anyPropRule < anyStateRule {
wordBreak , rule = anyPropWordBreak , anyPropRule
}
} else if anyPropState >= 0 {
newState , wordBreak , rule = anyPropState , anyPropWordBreak , anyPropRule
} else if anyStateState >= 0 {
newState , wordBreak , rule = anyStateState , anyStateWordBreak , anyStateRule
} else {
newState , wordBreak , rule = wbAny , true , 9990
}
}
farProperty := -1
if rule > 60 &&
(state == wbALetter || state == wbHebrewLetter || state == wbNumeric ) &&
(nextProperty == prMidLetter || nextProperty == prMidNumLet || nextProperty == prSingleQuote ||
nextProperty == prDoubleQuote ||
nextProperty == prMidNum ) {
for {
var (
r rune
length int
)
if b != nil {
r , length = utf8 .DecodeRune (b )
b = b [length :]
} else {
r , length = utf8 .DecodeRuneInString (str )
str = str [length :]
}
if r == utf8 .RuneError {
break
}
prop := property (workBreakCodePoints , r )
if prop == prExtend || prop == prFormat || prop == prZWJ {
continue
}
farProperty = prop
break
}
}
if rule > 60 &&
(state == wbALetter || state == wbHebrewLetter ) &&
(nextProperty == prMidLetter || nextProperty == prMidNumLet || nextProperty == prSingleQuote ) &&
(farProperty == prALetter || farProperty == prHebrewLetter ) {
return wbWB7 , false
}
if rule > 72 &&
state == wbHebrewLetter &&
nextProperty == prDoubleQuote &&
farProperty == prHebrewLetter {
return wbWB7c , false
}
if rule > 120 &&
state == wbNumeric &&
(nextProperty == prMidNum || nextProperty == prMidNumLet || nextProperty == prSingleQuote ) &&
farProperty == prNumeric {
return wbWB11 , false
}
if newState == wbAny && nextProperty == prRegionalIndicator {
if state != wbOddRI && state != wbEvenRI {
return wbOddRI , true
}
if state == wbOddRI {
return wbEvenRI , false
}
return wbOddRI , true
}
return
}
The pages are generated with Golds v0.8.2 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .