// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// TODO: remove hard-coded versions when we have implemented fractional weights.
// The current implementation is incompatible with later CLDR versions.
//go:generate go run maketables.go -cldr=23 -unicode=6.2.0

// Package collate contains types for comparing and sorting Unicode strings // according to a given collation order.
package collate // import "golang.org/x/text/collate" import ( ) // Collator provides functionality for comparing strings for a given // collation order. type Collator struct { options sorter sorter _iter [2]iter } func ( *Collator) ( int) *iter { // TODO: evaluate performance for making the second iterator optional. return &._iter[] } // Supported returns the list of languages for which collating differs from its parent. func () []language.Tag { // TODO: use language.Coverage instead. := make([]language.Tag, len(tags)) copy(, tags) return } func init() { := strings.Split(availableLocales, ",") tags = make([]language.Tag, len()) for , := range { tags[] = language.Raw.MustParse() } } var tags []language.Tag // New returns a new Collator initialized for the given locale. func ( language.Tag, ...Option) *Collator { := colltab.MatchLang(, tags) := newCollator(getTable(locales[])) // Set options from the user-supplied tag. .setFromTag() // Set the user-supplied options. .setOptions() .init() return } // NewFromTable returns a new Collator for the given Weighter. func ( colltab.Weighter, ...Option) *Collator { := newCollator() .setOptions() .init() return } func ( *Collator) () { if .numeric { .t = colltab.NewNumericWeighter(.t) } ._iter[0].init() ._iter[1].init() } // Buffer holds keys generated by Key and KeyString. type Buffer struct { buf [4096]byte key []byte } func ( *Buffer) () { if .key == nil { .key = .buf[:0] } } // Reset clears the buffer from previous results generated by Key and KeyString. func ( *Buffer) () { .key = .key[:0] } // Compare returns an integer comparing the two byte slices. // The result will be 0 if a==b, -1 if a < b, and +1 if a > b. func ( *Collator) (, []byte) int { // TODO: skip identical prefixes once we have a fast way to detect if a rune is // part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest. .iter(0).SetInput() .iter(1).SetInput() if := .compare(); != 0 { return } if !.ignore[colltab.Identity] { return bytes.Compare(, ) } return 0 } // CompareString returns an integer comparing the two strings. // The result will be 0 if a==b, -1 if a < b, and +1 if a > b. func ( *Collator) (, string) int { // TODO: skip identical prefixes once we have a fast way to detect if a rune is // part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest. .iter(0).SetInputString() .iter(1).SetInputString() if := .compare(); != 0 { return } if !.ignore[colltab.Identity] { if < { return -1 } else if > { return 1 } } return 0 } func compareLevel( func( *iter) int, , *iter) int { .pce = 0 .pce = 0 for { := () := () if != { if < { return -1 } return 1 } else if == 0 { break } } return 0 } func ( *Collator) () int { , := .iter(0), .iter(1) // Process primary level if .alternate != altShifted { // TODO: implement script reordering if := compareLevel((*iter).nextPrimary, , ); != 0 { return } } else { // TODO: handle shifted } if !.ignore[colltab.Secondary] { := (*iter).nextSecondary if .backwards { = (*iter).prevSecondary } if := compareLevel(, , ); != 0 { return } } // TODO: special case handling (Danish?) if !.ignore[colltab.Tertiary] || .caseLevel { if := compareLevel((*iter).nextTertiary, , ); != 0 { return } if !.ignore[colltab.Quaternary] { if := compareLevel((*iter).nextQuaternary, , ); != 0 { return } } } return 0 } // Key returns the collation key for str. // Passing the buffer buf may avoid memory allocations. // The returned slice will point to an allocation in Buffer and will remain // valid until the next call to buf.Reset(). func ( *Collator) ( *Buffer, []byte) []byte { // See https://www.unicode.org/reports/tr10/#Main_Algorithm for more details. .init() return .key(, .getColElems()) } // KeyFromString returns the collation key for str. // Passing the buffer buf may avoid memory allocations. // The returned slice will point to an allocation in Buffer and will retain // valid until the next call to buf.ResetKeys(). func ( *Collator) ( *Buffer, string) []byte { // See https://www.unicode.org/reports/tr10/#Main_Algorithm for more details. .init() return .key(, .getColElemsString()) } func ( *Collator) ( *Buffer, []colltab.Elem) []byte { processWeights(.alternate, .t.Top(), ) := len(.key) .keyFromElems(, ) return .key[:] } func ( *Collator) ( []byte) []colltab.Elem { := .iter(0) .SetInput() for .Next() { } return .Elems } func ( *Collator) ( string) []colltab.Elem { := .iter(0) .SetInputString() for .Next() { } return .Elems } type iter struct { wa [512]colltab.Elem colltab.Iter pce int } func ( *iter) ( *Collator) { .Weighter = .t .Elems = .wa[:0] } func ( *iter) () int { for { for ; .pce < .N; .pce++ { if := .Elems[.pce].Primary(); != 0 { .pce++ return } } if !.Next() { return 0 } } } func ( *iter) () int { for ; .pce < len(.Elems); .pce++ { if := .Elems[.pce].Secondary(); != 0 { .pce++ return } } return 0 } func ( *iter) () int { for ; .pce < len(.Elems); .pce++ { if := .Elems[len(.Elems)-.pce-1].Secondary(); != 0 { .pce++ return } } return 0 } func ( *iter) () int { for ; .pce < len(.Elems); .pce++ { if := .Elems[.pce].Tertiary(); != 0 { .pce++ return int() } } return 0 } func ( *iter) () int { for ; .pce < len(.Elems); .pce++ { if := .Elems[.pce].Quaternary(); != 0 { .pce++ return } } return 0 } func appendPrimary( []byte, int) []byte { // Convert to variable length encoding; supports up to 23 bits. if <= 0x7FFF { = append(, uint8(>>8), uint8()) } else { = append(, uint8(>>16)|0x80, uint8(>>8), uint8()) } return } // keyFromElems converts the weights ws to a compact sequence of bytes. // The result will be appended to the byte buffer in buf. func ( *Collator) ( *Buffer, []colltab.Elem) { for , := range { if := .Primary(); > 0 { .key = appendPrimary(.key, ) } } if !.ignore[colltab.Secondary] { .key = append(.key, 0, 0) // TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF. if !.backwards { for , := range { if := .Secondary(); > 0 { .key = append(.key, uint8(>>8), uint8()) } } } else { for := len() - 1; >= 0; -- { if := [].Secondary(); > 0 { .key = append(.key, uint8(>>8), uint8()) } } } } else if .caseLevel { .key = append(.key, 0, 0) } if !.ignore[colltab.Tertiary] || .caseLevel { .key = append(.key, 0, 0) for , := range { if := .Tertiary(); > 0 { .key = append(.key, uint8()) } } // Derive the quaternary weights from the options and other levels. // Note that we represent MaxQuaternary as 0xFF. The first byte of the // representation of a primary weight is always smaller than 0xFF, // so using this single byte value will compare correctly. if !.ignore[colltab.Quaternary] && .alternate >= altShifted { if .alternate == altShiftTrimmed { := len(.key) .key = append(.key, 0) for , := range { if := .Quaternary(); == colltab.MaxQuaternary { .key = append(.key, 0xFF) } else if > 0 { .key = appendPrimary(.key, ) = len(.key) } } .key = .key[:] } else { .key = append(.key, 0) for , := range { if := .Quaternary(); == colltab.MaxQuaternary { .key = append(.key, 0xFF) } else if > 0 { .key = appendPrimary(.key, ) } } } } } } func processWeights( alternateHandling, uint32, []colltab.Elem) { := false := int() switch { case altShifted, altShiftTrimmed: for := range { if := [].Primary(); <= && != 0 { [] = colltab.MakeQuaternary() = true } else if == 0 { if { [] = colltab.Ignore } } else { = false } } case altBlanked: for := range { if := [].Primary(); <= && ( || != 0) { [] = colltab.Ignore = true } else { = false } } } }