// Copyright 2014 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package colltabimport ()// NewNumericWeighter wraps w to replace individual digits to sort based on their// numeric value.//// Weighter w must have a free primary weight after the primary weight for 9.// If this is not the case, numeric value will sort at the same primary level// as the first primary sorting after 9.func ( Weighter) Weighter { := func( string) Elem { , := .AppendNextString(nil, )return [0] } := ("9")// Numbers should order before zero, but the DUCET has no room for this. // TODO: move before zero once we use fractional collation elements. , := MakeElem(.Primary()+1, .Secondary(), int(.Tertiary()), 0)return &numericWeighter{Weighter: ,// We assume that w sorts digits of different kinds in order of numeric // value and that the tertiary weight order is preserved. // // TODO: evaluate whether it is worth basing the ranges on the Elem // encoding itself once the move to fractional weights is complete.zero: ("0"),zeroSpecialLo: ("0"), // U+FF10 FULLWIDTH DIGIT ZEROzeroSpecialHi: ("₀"), // U+2080 SUBSCRIPT ZEROnine: ,nineSpecialHi: ("₉"), // U+2089 SUBSCRIPT NINEnumberStart: , }}// A numericWeighter translates a stream of digits into a stream of weights// representing the numeric value.type numericWeighter struct {Weighter// The Elems below all demarcate boundaries of specific ranges. With the // current element encoding digits are in two ranges: normal (default // tertiary value) and special. For most languages, digits have collation // elements in the normal range. // // Note: the range tests are very specific for the element encoding used by // this implementation. The tests in collate_test.go are designed to fail // if this code is not updated when an encoding has changed. zero Elem// normal digit zero zeroSpecialLo Elem// special digit zero, low tertiary value zeroSpecialHi Elem// special digit zero, high tertiary value nine Elem// normal digit nine nineSpecialHi Elem// special digit nine numberStart Elem}// AppendNext calls the namesake of the underlying weigher, but replaces single// digits with weights representing their value.func ( *numericWeighter) ( []Elem, []byte) ( []Elem, int) { , = .Weighter.AppendNext(, ) := numberConverter{elems: ,w: ,b: , } , := .checkNextDigit()if ! {return , }// ce might have been grown already, so take it instead of buf. .init(, len(), )for < len() { , := .Weighter.AppendNext(.elems, [:]) .b = += if !.update() {break } }return .result(), }// AppendNextString calls the namesake of the underlying weigher, but replaces// single digits with weights representing their value.func ( *numericWeighter) ( []Elem, string) ( []Elem, int) { , = .Weighter.AppendNextString(, ) := numberConverter{elems: ,w: ,s: , } , := .checkNextDigit()if ! {return , } .init(, len(), )for < len() { , := .Weighter.AppendNextString(.elems, [:]) .s = += if !.update() {break } }return .result(), }type numberConverter struct { w *numericWeighter elems []Elem nDigits int lenIndex int s string// set if the input was of type string b []byte// set if the input was of type []byte}// init completes initialization of a numberConverter and prepares it for adding// more digits. elems is assumed to have a digit starting at oldLen.func ( *numberConverter) ( []Elem, int, bool) {// Insert a marker indicating the start of a number and a placeholder // for the number of digits.if { = append([:], .w.numberStart, 0) } else { = append(, 0, 0)copy([+2:], [:]) [] = .w.numberStart [+1] = 0 .nDigits = 1 } .elems = .lenIndex = + 1}// checkNextDigit reports whether bufNew adds a single digit relative to the old// buffer. If it does, it also reports whether this digit is zero.func ( *numberConverter) ( []Elem) (, bool) {iflen(.elems) >= len() {returnfalse, false } := [len(.elems)]if < .w.zeroSpecialLo || .w.nine < {// Not a number.returnfalse, false }if < .w.zero {if > .w.nineSpecialHi {// Not a number.returnfalse, false }if !.isDigit() {returnfalse, false } = <= .w.zeroSpecialHi } else {// This is the common case if we encounter a digit. = == .w.zero }// Test the remaining added collation elements have a zero primary value.if := len() - len(.elems); > 1 {for := len(.elems) + 1; < len(); ++ {if [].Primary() != 0 {returnfalse, false } }// In some rare cases, collation elements will encode runes in // unicode.No as a digit. For example Ethiopic digits (U+1369 - U+1371) // are not in Nd. Also some digits that clearly belong in unicode.No, // like U+0C78 TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR, have // collation elements indistinguishable from normal digits. // Unfortunately, this means we need to make this check for nearly all // non-Latin digits. // // TODO: check the performance impact and find something better if it is // an issue.if !.isDigit() {returnfalse, false } }return , true}func ( *numberConverter) () bool {if .b != nil { , := utf8.DecodeRune(.b)returnunicode.In(, unicode.Nd) } , := utf8.DecodeRuneInString(.s)returnunicode.In(, unicode.Nd)}// We currently support a maximum of about 2M digits (the number of primary// values). Such numbers will compare correctly against small numbers, but their// comparison against other large numbers is undefined.//// TODO: define a proper fallback, such as comparing large numbers textually or// actually allowing numbers of unlimited length.//// TODO: cap this to a lower number (like 100) and maybe allow a larger number// in an option?const maxDigits = 1<<maxPrimaryBits - 1func ( *numberConverter) ( []Elem) bool { , := .checkNextDigit()if .nDigits == 0 && {returntrue } .elems = if ! {returnfalse } .nDigits++return .nDigits < maxDigits}// result fills in the length element for the digit sequence and returns the// completed collation elements.func ( *numberConverter) () []Elem { , := MakeElem(.nDigits, defaultSecondary, defaultTertiary, 0) .elems[.lenIndex] = return .elems}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.