// Copyright 2012 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package colltabimport// For a description of ContractTrieSet, see text/collate/build/contract.go.typeContractTrieSet []struct{ L, H, N, I uint8 }// ctScanner is used to match a trie to an input sequence.// A contraction may match a non-contiguous sequence of bytes in an input string.// For example, if there is a contraction for <a, combining_ring>, it should match// the sequence <a, combining_cedilla, combining_ring>, as combining_cedilla does// not block combining_ring.// ctScanner does not automatically skip over non-blocking non-starters, but rather// retains the state of the last match and leaves it up to the user to continue// the match at the appropriate points.type ctScanner struct { states ContractTrieSet s []byte n int index int pindex int done bool}type ctScannerString struct { states ContractTrieSet s string n int index int pindex int done bool}func ( ContractTrieSet) (, int, []byte) ctScanner {returnctScanner{s: , states: [:], n: }}func ( ContractTrieSet) (, int, string) ctScannerString {returnctScannerString{s: , states: [:], n: }}// result returns the offset i and bytes consumed p so far. If no suffix// matched, i and p will be 0.func ( *ctScanner) () (, int) {return .index, .pindex}func ( *ctScannerString) () (, int) {return .index, .pindex}const ( final = 0 noIndex = 0xFF)// scan matches the longest suffix at the current location in the input// and returns the number of bytes consumed.func ( *ctScanner) ( int) int { := // the p at the rune start := .s , := .states, .nfor := 0; < && < len(); { := [] := []// TODO: a significant number of contractions are of a form that // cannot match discontiguous UTF-8 in a normalized string. We could let // a negative value of e.n mean that we can set s.done = true and avoid // the need for additional matches.if >= .L {if .L == { ++if .I != noIndex { .index = int(.I) .pindex = }if .N != final { , , = 0, [int(.H)+:], int(.N)if >= len() || utf8.RuneStart([]) { .states, .n, = , , } } else { .done = truereturn }continue } elseif .N == final && <= .H { ++ .done = true .index = int(-.L) + int(.I) .pindex = return } } ++ }return}// scan is a verbatim copy of ctScanner.scan.func ( *ctScannerString) ( int) int { := // the p at the rune start := .s , := .states, .nfor := 0; < && < len(); { := [] := []// TODO: a significant number of contractions are of a form that // cannot match discontiguous UTF-8 in a normalized string. We could let // a negative value of e.n mean that we can set s.done = true and avoid // the need for additional matches.if >= .L {if .L == { ++if .I != noIndex { .index = int(.I) .pindex = }if .N != final { , , = 0, [int(.H)+:], int(.N)if >= len() || utf8.RuneStart([]) { .states, .n, = , , } } else { .done = truereturn }continue } elseif .N == final && <= .H { ++ .done = true .index = int(-.L) + int(.I) .pindex = return } } ++ }return}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.