Source File
iter.go
Belonging Package
golang.org/x/text/internal/colltab
// Copyright 2015 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package colltab// An Iter incrementally converts chunks of the input text to collation// elements, while ensuring that the collation elements are in normalized order// (that is, they are in the order as if the input text were normalized first).type Iter struct {Weighter WeighterElems []Elem// N is the number of elements in Elems that will not be reordered on// subsequent iterations, N <= len(Elems).N intbytes []bytestr string// Because the Elems buffer may contain collation elements that are needed// for look-ahead, we need two positions in the text (bytes or str): one for// the end position in the text for the current iteration and one for the// start of the next call to appendNext.pEnd int // end position in text corresponding to N.pNext int // pEnd <= pNext.}// Reset sets the position in the current input text to p and discards any// results obtained so far.func ( *Iter) ( int) {.Elems = .Elems[:0].N = 0.pEnd =.pNext =}// Len returns the length of the input text.func ( *Iter) () int {if .bytes != nil {return len(.bytes)}return len(.str)}// Discard removes the collation elements up to N.func ( *Iter) () {// TODO: change this such that only modifiers following starters will have// to be copied..Elems = .Elems[:copy(.Elems, .Elems[.N:])].N = 0}// End returns the end position of the input text for which Next has returned// results.func ( *Iter) () int {return .pEnd}// SetInput resets i to input s.func ( *Iter) ( []byte) {.bytes =.str = "".Reset(0)}// SetInputString resets i to input s.func ( *Iter) ( string) {.str =.bytes = nil.Reset(0)}func ( *Iter) () bool {return .pNext >= len(.str) && .pNext >= len(.bytes)}func ( *Iter) () bool {if .done() {return false}var intif .bytes == nil {.Elems, = .Weighter.AppendNextString(.Elems, .str[.pNext:])} else {.Elems, = .Weighter.AppendNext(.Elems, .bytes[.pNext:])}if == 0 {= 1}.pNext +=return true}// Next appends Elems to the internal array. On each iteration, it will either// add starters or modifiers. In the majority of cases, an Elem with a primary// value > 0 will have a CCC of 0. The CCC values of collation elements are also// used to detect if the input string was not normalized and to adjust the// result accordingly.func ( *Iter) () bool {if .N == len(.Elems) && !.appendNext() {return false}// Check if the current segment starts with a starter.:= .Elems[len(.Elems)-1].CCC()if == 0 {.N = len(.Elems).pEnd = .pNextreturn true} else if .Elems[.N].CCC() == 0 {// set i.N to only cover part of i.Elems for which prevCCC == 0 and// use rest for the next call to next.for .N++; .N < len(.Elems) && .Elems[.N].CCC() == 0; .N++ {}.pEnd = .pNextreturn true}// The current (partial) segment starts with modifiers. We need to collect// all successive modifiers to ensure that they are normalized.for {:= len(.Elems).pEnd = .pNextif !.appendNext() {break}if := .Elems[].CCC(); == 0 || len(.Elems)-.N > maxCombiningCharacters {// Leave the starter for the next iteration. This ensures that we// do not return sequences of collation elements that cross two// segments.//// TODO: handle large number of combining characters by fully// normalizing the input segment before iteration. This ensures// results are consistent across the text repo..N =return true} else if < {.doNorm(, ) // should be rare, never occurs for NFD and FCC.} else {=}}:= len(.Elems) != .N.N = len(.Elems)return}// nextNoNorm is the same as next, but does not "normalize" the collation// elements.func ( *Iter) () bool {// TODO: remove this function. Using this instead of next does not seem// to improve performance in any significant way. We retain this until// later for evaluation purposes.if .done() {return false}.appendNext().N = len(.Elems)return true}const maxCombiningCharacters = 30// doNorm reorders the collation elements in i.Elems.// It assumes that blocks of collation elements added with appendNext// either start and end with the same CCC or start with CCC == 0.// This allows for a single insertion point for the entire block.// The correctness of this assumption is verified in builder.go.func ( *Iter) ( int, uint8) {:= len(.Elems):=for --; > .N && < .Elems[-1].CCC(); -- {}.Elems = append(.Elems, .Elems[:]...)copy(.Elems[:], .Elems[:]).Elems = .Elems[:]}
![]() |
The pages are generated with Golds v0.8.2. (GOOS=linux GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds. |