package colltab
import (
"unicode/utf8"
"golang.org/x/text/unicode/norm"
)
type Table struct {
Index Trie
ExpandElem []uint32
ContractTries ContractTrieSet
ContractElem []uint32
MaxContractLen int
VariableTop uint32
}
func (t *Table ) AppendNext (w []Elem , b []byte ) (res []Elem , n int ) {
return t .appendNext (w , source {bytes : b })
}
func (t *Table ) AppendNextString (w []Elem , s string ) (res []Elem , n int ) {
return t .appendNext (w , source {str : s })
}
func (t *Table ) Start (p int , b []byte ) int {
panic ("not implemented" )
}
func (t *Table ) StartString (p int , s string ) int {
panic ("not implemented" )
}
func (t *Table ) Domain () []string {
panic ("not implemented" )
}
func (t *Table ) Top () uint32 {
return t .VariableTop
}
type source struct {
str string
bytes []byte
}
func (src *source ) lookup (t *Table ) (ce Elem , sz int ) {
if src .bytes == nil {
return t .Index .lookupString (src .str )
}
return t .Index .lookup (src .bytes )
}
func (src *source ) tail (sz int ) {
if src .bytes == nil {
src .str = src .str [sz :]
} else {
src .bytes = src .bytes [sz :]
}
}
func (src *source ) nfd (buf []byte , end int ) []byte {
if src .bytes == nil {
return norm .NFD .AppendString (buf [:0 ], src .str [:end ])
}
return norm .NFD .Append (buf [:0 ], src .bytes [:end ]...)
}
func (src *source ) rune () (r rune , sz int ) {
if src .bytes == nil {
return utf8 .DecodeRuneInString (src .str )
}
return utf8 .DecodeRune (src .bytes )
}
func (src *source ) properties (f norm .Form ) norm .Properties {
if src .bytes == nil {
return f .PropertiesString (src .str )
}
return f .Properties (src .bytes )
}
func (t *Table ) appendNext (w []Elem , src source ) (res []Elem , n int ) {
ce , sz := src .lookup (t )
tp := ce .ctype ()
if tp == ceNormal {
if ce == 0 {
r , _ := src .rune ()
const (
hangulSize = 3
firstHangul = 0xAC00
lastHangul = 0xD7A3
)
if r >= firstHangul && r <= lastHangul {
n = sz
var buf [16 ]byte
for b := src .nfd (buf [:0 ], hangulSize ); len (b ) > 0 ; b = b [sz :] {
ce , sz = t .Index .lookup (b )
w = append (w , ce )
}
return w , n
}
ce = makeImplicitCE (implicitPrimary (r ))
}
w = append (w , ce )
} else if tp == ceExpansionIndex {
w = t .appendExpansion (w , ce )
} else if tp == ceContractionIndex {
n := 0
src .tail (sz )
if src .bytes == nil {
w , n = t .matchContractionString (w , ce , src .str )
} else {
w , n = t .matchContraction (w , ce , src .bytes )
}
sz += n
} else if tp == ceDecompose {
t1 , t2 := splitDecompose (ce )
i := len (w )
nfkd := src .properties (norm .NFKD ).Decomposition ()
for p := 0 ; len (nfkd ) > 0 ; nfkd = nfkd [p :] {
w , p = t .appendNext (w , source {bytes : nfkd })
}
w [i ] = w [i ].updateTertiary (t1 )
if i ++; i < len (w ) {
w [i ] = w [i ].updateTertiary (t2 )
for i ++; i < len (w ); i ++ {
w [i ] = w [i ].updateTertiary (maxTertiary )
}
}
}
return w , sz
}
func (t *Table ) appendExpansion (w []Elem , ce Elem ) []Elem {
i := splitExpandIndex (ce )
n := int (t .ExpandElem [i ])
i ++
for _ , ce := range t .ExpandElem [i : i +n ] {
w = append (w , Elem (ce ))
}
return w
}
func (t *Table ) matchContraction (w []Elem , ce Elem , suffix []byte ) ([]Elem , int ) {
index , n , offset := splitContractIndex (ce )
scan := t .ContractTries .scanner (index , n , suffix )
buf := [norm .MaxSegmentSize ]byte {}
bufp := 0
p := scan .scan (0 )
if !scan .done && p < len (suffix ) && suffix [p ] >= utf8 .RuneSelf {
p0 := p
bufn := 0
rune := norm .NFD .Properties (suffix [p :])
p += rune .Size ()
if rune .LeadCCC () != 0 {
prevCC := rune .TrailCCC ()
if end := norm .NFD .FirstBoundary (suffix [p :]); end != -1 {
scan .s = suffix [:p +end ]
}
for p < len (suffix ) && !scan .done && suffix [p ] >= utf8 .RuneSelf {
rune = norm .NFD .Properties (suffix [p :])
if ccc := rune .LeadCCC (); ccc == 0 || prevCC >= ccc {
break
}
prevCC = rune .TrailCCC ()
if pp := scan .scan (p ); pp != p {
bufn += copy (buf [bufn :], suffix [p0 :p ])
if scan .pindex == pp {
bufp = bufn
}
p , p0 = pp , pp
} else {
p += rune .Size ()
}
}
}
}
i , n := scan .result ()
ce = Elem (t .ContractElem [i +offset ])
if ce .ctype () == ceNormal {
w = append (w , ce )
} else {
w = t .appendExpansion (w , ce )
}
for b , p := buf [:bufp ], 0 ; len (b ) > 0 ; b = b [p :] {
w , p = t .appendNext (w , source {bytes : b })
}
return w , n
}
func (t *Table ) matchContractionString (w []Elem , ce Elem , suffix string ) ([]Elem , int ) {
index , n , offset := splitContractIndex (ce )
scan := t .ContractTries .scannerString (index , n , suffix )
buf := [norm .MaxSegmentSize ]byte {}
bufp := 0
p := scan .scan (0 )
if !scan .done && p < len (suffix ) && suffix [p ] >= utf8 .RuneSelf {
p0 := p
bufn := 0
rune := norm .NFD .PropertiesString (suffix [p :])
p += rune .Size ()
if rune .LeadCCC () != 0 {
prevCC := rune .TrailCCC ()
if end := norm .NFD .FirstBoundaryInString (suffix [p :]); end != -1 {
scan .s = suffix [:p +end ]
}
for p < len (suffix ) && !scan .done && suffix [p ] >= utf8 .RuneSelf {
rune = norm .NFD .PropertiesString (suffix [p :])
if ccc := rune .LeadCCC (); ccc == 0 || prevCC >= ccc {
break
}
prevCC = rune .TrailCCC ()
if pp := scan .scan (p ); pp != p {
bufn += copy (buf [bufn :], suffix [p0 :p ])
if scan .pindex == pp {
bufp = bufn
}
p , p0 = pp , pp
} else {
p += rune .Size ()
}
}
}
}
i , n := scan .result ()
ce = Elem (t .ContractElem [i +offset ])
if ce .ctype () == ceNormal {
w = append (w , ce )
} else {
w = t .appendExpansion (w , ce )
}
for b , p := buf [:bufp ], 0 ; len (b ) > 0 ; b = b [p :] {
w , p = t .appendNext (w , source {bytes : b })
}
return w , n
}
The pages are generated with Golds v0.8.2 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .