// Copyright 2012 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package colltabimport ()// Level identifies the collation comparison level.// The primary level corresponds to the basic sorting of text.// The secondary level corresponds to accents and related linguistic elements.// The tertiary level corresponds to casing and related concepts.// The quaternary level is derived from the other levels by the// various algorithms for handling variable elements.typeLevelintconst (PrimaryLevel = iotaSecondaryTertiaryQuaternaryIdentityNumLevels)const ( defaultSecondary = 0x20 defaultTertiary = 0x2 maxTertiary = 0x1FMaxQuaternary = 0x1FFFFF// 21 bits.)// Elem is a representation of a collation element. This API provides ways to encode// and decode Elems. Implementations of collation tables may use values greater// or equal to PrivateUse for their own purposes. However, these should never be// returned by AppendNext.typeElemuint32const ( maxCE Elem = 0xAFFFFFFFPrivateUse = minContract minContract = 0xC0000000 maxContract = 0xDFFFFFFF minExpand = 0xE0000000 maxExpand = 0xEFFFFFFF minDecomp = 0xF0000000)type ceType intconst ( ceNormal ceType = iota// ceNormal includes implicits (ce == 0) ceContractionIndex // rune can be a start of a contraction ceExpansionIndex // rune expands into a sequence of collation elements ceDecompose // rune expands using NFKC decomposition)func ( Elem) () ceType {if <= maxCE {returnceNormal }if <= maxContract {returnceContractionIndex } else {if <= maxExpand {returnceExpansionIndex }returnceDecompose }panic("should not reach here")returnceType(-1)}// For normal collation elements, we assume that a collation element either has// a primary or non-default secondary value, not both.// Collation elements with a primary value are of the form//// 01pppppp pppppppp ppppppp0 ssssssss// - p* is primary collation value// - s* is the secondary collation value// 00pppppp pppppppp ppppppps sssttttt, where// - p* is primary collation value// - s* offset of secondary from default value.// - t* is the tertiary collation value// 100ttttt cccccccc pppppppp pppppppp// - t* is the tertiar collation value// - c* is the canonical combining class// - p* is the primary collation value//// Collation elements with a secondary value are of the form//// 1010cccc ccccssss ssssssss tttttttt, where// - c* is the canonical combining class// - s* is the secondary collation value// - t* is the tertiary collation value// 11qqqqqq qqqqqqqq qqqqqqq0 00000000// - q* quaternary valueconst ( ceTypeMask = 0xC0000000 ceTypeMaskExt = 0xE0000000 ceIgnoreMask = 0xF00FFFFF ceType1 = 0x40000000 ceType2 = 0x00000000 ceType3or4 = 0x80000000 ceType4 = 0xA0000000 ceTypeQ = 0xC0000000Ignore = ceType4 firstNonPrimary = 0x80000000 lastSpecialPrimary = 0xA0000000 secondaryMask = 0x80000000 hasTertiaryMask = 0x40000000 primaryValueMask = 0x3FFFFE00 maxPrimaryBits = 21 compactPrimaryBits = 16 maxSecondaryBits = 12 maxTertiaryBits = 8 maxCCCBits = 8 maxSecondaryCompactBits = 8 maxSecondaryDiffBits = 4 maxTertiaryCompactBits = 5 primaryShift = 9 compactSecondaryShift = 5 minCompactSecondary = defaultSecondary - 4)func makeImplicitCE( int) Elem {returnceType1 | Elem(<<primaryShift) | defaultSecondary}// MakeElem returns an Elem for the given values. It will return an error// if the given combination of values is invalid.func (, , int, uint8) (Elem, error) {if := ; >= 1<<maxPrimaryBits || < 0 {return0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", , 1<<maxPrimaryBits) }if := ; >= 1<<maxSecondaryBits || < 0 {return0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", , 1<<maxSecondaryBits) }if := ; >= 1<<maxTertiaryBits || < 0 {return0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", , 1<<maxTertiaryBits) } := Elem(0)if != 0 {if != 0 {if >= 1<<compactPrimaryBits {return0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", , 1<<compactPrimaryBits) }if != defaultSecondary {return0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", , ) } = Elem( << (compactPrimaryBits + maxCCCBits)) |= Elem() << compactPrimaryBits |= Elem() |= ceType3or4 } elseif == defaultTertiary {if >= 1<<maxSecondaryCompactBits {return0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", , 1<<maxSecondaryCompactBits) } = Elem(<<(maxSecondaryCompactBits+1) + ) |= ceType1 } else { := - defaultSecondary + maxSecondaryDiffBitsif >= 1<<maxSecondaryDiffBits || < 0 {return0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", , , 1<<maxSecondaryDiffBits) }if >= 1<<maxTertiaryCompactBits {return0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", , 1<<maxTertiaryCompactBits) } = Elem(<<maxSecondaryDiffBits + ) = <<maxTertiaryCompactBits + Elem() } } else { = Elem(<<maxTertiaryBits + ) += Elem() << (maxSecondaryBits + maxTertiaryBits) |= ceType4 }return , nil}// MakeQuaternary returns an Elem with the given quaternary value.func ( int) Elem {returnceTypeQ | Elem(<<primaryShift)}// Mask sets weights for any level smaller than l to 0.// The resulting Elem can be used to test for equality with// other Elems to which the same mask has been applied.func ( Elem) ( Level) uint32 {return0}// CCC returns the canonical combining class associated with the underlying character,// if applicable, or 0 otherwise.func ( Elem) () uint8 {if &ceType3or4 != 0 {if &ceType4 == ceType3or4 {returnuint8( >> 16) }returnuint8( >> 20) }return0}// Primary returns the primary collation weight for ce.func ( Elem) () int {if >= firstNonPrimary {if > lastSpecialPrimary {return0 }returnint(uint16()) }returnint(&primaryValueMask) >> primaryShift}// Secondary returns the secondary collation weight for ce.func ( Elem) () int {switch & ceTypeMask {caseceType1:returnint(uint8())caseceType2:returnminCompactSecondary + int((>>compactSecondaryShift)&0xF)caseceType3or4:if < ceType4 {returndefaultSecondary }returnint(>>8) & 0xFFFcaseceTypeQ:return0 }panic("should not reach here")}// Tertiary returns the tertiary collation weight for ce.func ( Elem) () uint8 {if &hasTertiaryMask == 0 {if &ceType3or4 == 0 {returnuint8( & 0x1F) }if &ceType4 == ceType4 {returnuint8() }returnuint8(>>24) & 0x1F// type 2 } elseif &ceTypeMask == ceType1 {returndefaultTertiary }// ce is a quaternary value.return0}func ( Elem) ( uint8) Elem {if &ceTypeMask == ceType1 {// convert to type 4 := & primaryValueMask |= Elem(uint8()-minCompactSecondary) << compactSecondaryShift = } elseif &ceTypeMaskExt == ceType3or4 { &= ^Elem(maxTertiary << 24)return | (Elem() << 24) } else {// type 2 or 4 &= ^Elem(maxTertiary) }return | Elem()}// Quaternary returns the quaternary value if explicitly specified,// 0 if ce == Ignore, or MaxQuaternary otherwise.// Quaternary values are used only for shifted variants.func ( Elem) () int {if &ceTypeMask == ceTypeQ {returnint(&primaryValueMask) >> primaryShift } elseif &ceIgnoreMask == Ignore {return0 }returnMaxQuaternary}// Weight returns the collation weight for the given level.func ( Elem) ( Level) int {switch {casePrimary:return .Primary()caseSecondary:return .Secondary()caseTertiary:returnint(.Tertiary())caseQuaternary:return .Quaternary() }return0// return 0 (ignore) for undefined levels.}// For contractions, collation elements are of the form// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where// - n* is the size of the first node in the contraction trie.// - i* is the index of the first node in the contraction trie.// - b* is the offset into the contraction collation element table.//// See contract.go for details on the contraction trie.const ( maxNBits = 4 maxTrieIndexBits = 12 maxContractOffsetBits = 13)func splitContractIndex( Elem) (, , int) { = int( & (1<<maxNBits - 1)) >>= maxNBits = int( & (1<<maxTrieIndexBits - 1)) >>= maxTrieIndexBits = int( & (1<<maxContractOffsetBits - 1))return}// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,// where b* is the index into the expansion sequence table.const maxExpandIndexBits = 16func splitExpandIndex( Elem) ( int) {returnint(uint16())}// Some runes can be expanded using NFKD decomposition. Instead of storing the full// sequence of collation elements, we decompose the rune and lookup the collation// elements for each rune in the decomposition and modify the tertiary weights.// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where// - v* is the replacement tertiary weight for the first rune,// - w* is the replacement tertiary weight for the second rune,//// Tertiary weights of subsequent runes should be replaced with maxTertiary.// See https://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.func splitDecompose( Elem) (, uint8) {returnuint8(), uint8( >> 8)}const (// These constants were taken from https://www.unicode.org/versions/Unicode6.0.0/ch12.pdf. minUnified rune = 0x4E00 maxUnified = 0x9FFF minCompatibility = 0xF900 maxCompatibility = 0xFAFF minRare = 0x3400 maxRare = 0x4DBF)const ( commonUnifiedOffset = 0x10000 rareUnifiedOffset = 0x20000// largest rune in common is U+FAFF otherOffset = 0x50000// largest rune in rare is U+2FA1D illegalOffset = otherOffset + int(unicode.MaxRune) maxPrimary = illegalOffset + 1)// implicitPrimary returns the primary weight for the given rune// for which there is no entry for the rune in the collation table.// We take a different approach from the one specified in// https://unicode.org/reports/tr10/#Implicit_Weights,// but preserve the resulting relative ordering of the runes.func implicitPrimary( rune) int {ifunicode.Is(unicode.Ideographic, ) {if >= minUnified && <= maxUnified {// The most common case for CJK.returnint() + commonUnifiedOffset }if >= minCompatibility && <= maxCompatibility {// This will typically not hit. The DUCET explicitly specifies mappings // for all characters that do not decompose.returnint() + commonUnifiedOffset }returnint() + rareUnifiedOffset }returnint() + otherOffset}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.