// Package hashprobe provides implementations of probing tables for various // data types. // // Probing tables are specialized hash tables supporting only a single // "probing" operation which behave like a "lookup or insert". When a key // is probed, either its value is retrieved if it already existed in the table, // or it is inserted and assigned its index in the insert sequence as value. // // Values are represented as signed 32 bits integers, which means that probing // tables defined in this package may contain at most 2^31-1 entries. // // Probing tables have a method named Probe with the following signature: // // func (t *Int64Table) Probe(keys []int64, values []int32) int { // ... // } // // The method takes an array of keys to probe as first argument, an array of // values where the indexes of each key will be written as second argument, and // returns the number of keys that were inserted during the call. // // Applications that need to determine which keys were inserted can capture the // length of the probing table prior to the call, and scan the list of values // looking for indexes greater or equal to the length of the table before the // call.
package hashprobe import ( cryptoRand ) const ( // Number of probes tested per iteration. This parameter balances between // the amount of memory allocated on the stack to hold the computed hashes // of the keys being probed, and amortizing the baseline cost of the probing // algorithm. // // The larger the value, the more memory is required, but lower the baseline // cost will be. // // We chose a value that is somewhat large, resulting in reserving 2KiB of // stack but mostly erasing the baseline cost. probesPerLoop = 256 ) var ( prngSeed [8]byte prngMutex sync.Mutex prngSource rand.Source64 ) func init() { , := cryptoRand.Read(prngSeed[:]) if != nil { panic("cannot seed random number generator from system source: " + .Error()) } := int64(binary.LittleEndian.Uint64(prngSeed[:])) prngSource = rand.NewSource().(rand.Source64) } func tableSizeAndMaxLen(, int, float64) (, int) { := int(math.Ceil((1 / ) * float64())) = nextPowerOf2(( + ( - 1)) / ) = int(math.Ceil( * float64(*))) return } func nextPowerOf2( int) int { return 1 << (64 - bits.LeadingZeros64(uint64(-1))) } func randSeed() uintptr { prngMutex.Lock() defer prngMutex.Unlock() return uintptr(prngSource.Uint64()) } type Int32Table struct{ table32 } func ( int, float64) *Int32Table { return &Int32Table{makeTable32(, )} } func ( *Int32Table) () { .reset() } func ( *Int32Table) () int { return .len } func ( *Int32Table) () int { return .size() } func ( *Int32Table) (, []int32) int { return .probe(unsafecast.Slice[uint32](), ) } func ( *Int32Table) ( sparse.Int32Array, []int32) int { return .probeArray(.Uint32Array(), ) } type Float32Table struct{ table32 } func ( int, float64) *Float32Table { return &Float32Table{makeTable32(, )} } func ( *Float32Table) () { .reset() } func ( *Float32Table) () int { return .len } func ( *Float32Table) () int { return .size() } func ( *Float32Table) ( []float32, []int32) int { return .probe(unsafecast.Slice[uint32](), ) } func ( *Float32Table) ( sparse.Float32Array, []int32) int { return .probeArray(.Uint32Array(), ) } type Uint32Table struct{ table32 } func ( int, float64) *Uint32Table { return &Uint32Table{makeTable32(, )} } func ( *Uint32Table) () { .reset() } func ( *Uint32Table) () int { return .len } func ( *Uint32Table) () int { return .size() } func ( *Uint32Table) ( []uint32, []int32) int { return .probe(, ) } func ( *Uint32Table) ( sparse.Uint32Array, []int32) int { return .probeArray(, ) } // table32 is the generic implementation of probing tables for 32 bit types. // // The table uses the following memory layout: // // [group 0][group 1][...][group N] // // Each group contains up to 7 key/value pairs, and is exactly 64 bytes in size, // which allows it to fit within a single cache line, and ensures that probes // can be performed with a single memory load per key. // // Groups fill up by appending new entries to the keys and values arrays. When a // group is full, the probe checks the next group. // // https://en.wikipedia.org/wiki/Linear_probing type table32 struct { len int maxLen int maxLoad float64 seed uintptr table []table32Group } const table32GroupSize = 7 type table32Group struct { keys [table32GroupSize]uint32 values [table32GroupSize]uint32 bits uint32 _ uint32 } func makeTable32( int, float64) ( table32) { if < 0 || > 1 { panic("max load of probing table must be a value between 0 and 1") } if < table32GroupSize { = table32GroupSize } .init(, ) return } func ( *table32) () int { return table32GroupSize * len(.table) } func ( *table32) ( int, float64) { , := tableSizeAndMaxLen(table32GroupSize, , ) * = table32{ maxLen: , maxLoad: , seed: randSeed(), table: make([]table32Group, ), } } func ( *table32) ( int) { := table32{} .init(, .maxLoad) .len = .len := make([]uintptr, table32GroupSize) := uintptr(len(.table)) - 1 for := range .table { := &.table[] := bits.OnesCount32(.bits) if aeshash.Enabled() { aeshash.MultiHash32([:], .keys[:], .seed) } else { wyhash.MultiHash32([:], .keys[:], .seed) } for , := range [:] { for { := &.table[&] if := bits.OnesCount32(.bits); < table32GroupSize { .bits = (.bits << 1) | 1 .keys[] = .keys[] .values[] = .values[] break } ++ } } } * = } func ( *table32) () { .len = 0 for := range .table { .table[] = table32Group{} } } func ( *table32) ( []uint32, []int32) int { return .probeArray(sparse.MakeUint32Array(), ) } func ( *table32) ( sparse.Uint32Array, []int32) int { := .Len() if := .len + ; > .maxLen { .grow() } var [probesPerLoop]uintptr var = .len var = aeshash.Enabled() _ = [:] for := 0; < ; { := len() + := len() if > { = = - } := .Slice(, ) := [::] := [::] if { aeshash.MultiHashUint32Array(, , .seed) } else { wyhash.MultiHashUint32Array(, , .seed) } .len = multiProbe32(.table, .len, , , ) = } return .len - } func multiProbe32Default( []table32Group, int, []uintptr, sparse.Uint32Array, []int32) int { := uintptr(len()) - 1 for , := range { := .Index() for { := &[&] := table32GroupSize := int32(0) for , := range .keys { if == { = break } } if := bits.OnesCount32(.bits); < { = int32(.values[]) } else { if == table32GroupSize { ++ continue } = int32() .bits = (.bits << 1) | 1 .keys[] = .values[] = uint32() ++ } [] = break } } return } type Int64Table struct{ table64 } func ( int, float64) *Int64Table { return &Int64Table{makeTable64(, )} } func ( *Int64Table) () { .reset() } func ( *Int64Table) () int { return .len } func ( *Int64Table) () int { return .size() } func ( *Int64Table) ( []int64, []int32) int { return .probe(unsafecast.Slice[uint64](), ) } func ( *Int64Table) ( sparse.Int64Array, []int32) int { return .probeArray(.Uint64Array(), ) } type Float64Table struct{ table64 } func ( int, float64) *Float64Table { return &Float64Table{makeTable64(, )} } func ( *Float64Table) () { .reset() } func ( *Float64Table) () int { return .len } func ( *Float64Table) () int { return .size() } func ( *Float64Table) ( []float64, []int32) int { return .probe(unsafecast.Slice[uint64](), ) } func ( *Float64Table) ( sparse.Float64Array, []int32) int { return .probeArray(.Uint64Array(), ) } type Uint64Table struct{ table64 } func ( int, float64) *Uint64Table { return &Uint64Table{makeTable64(, )} } func ( *Uint64Table) () { .reset() } func ( *Uint64Table) () int { return .len } func ( *Uint64Table) () int { return .size() } func ( *Uint64Table) ( []uint64, []int32) int { return .probe(, ) } func ( *Uint64Table) ( sparse.Uint64Array, []int32) int { return .probeArray(, ) } // table64 is the generic implementation of probing tables for 64 bit types. // // The table uses a layout similar to the one documented on the table for 32 bit // keys (see table32). Each group holds up to 4 key/value pairs (instead of 7 // like table32) so that each group fits in a single CPU cache line. This table // version has a bit lower memory density, with ~23% of table memory being used // for padding. // // Technically we could hold up to 5 entries per group and still fit within the // 64 bytes of a CPU cache line; on x86 platforms, AVX2 registers can only hold // four 64 bit values, we would need twice as many instructions per probe if the // groups were holding 5 values. The trade off of memory for compute efficiency // appeared to be the right choice at the time. type table64 struct { len int maxLen int maxLoad float64 seed uintptr table []table64Group } const table64GroupSize = 4 type table64Group struct { keys [table64GroupSize]uint64 values [table64GroupSize]uint32 bits uint32 _ uint32 _ uint32 _ uint32 } func makeTable64( int, float64) ( table64) { if < 0 || > 1 { panic("max load of probing table must be a value between 0 and 1") } if < table64GroupSize { = table64GroupSize } .init(, ) return } func ( *table64) () int { return table64GroupSize * len(.table) } func ( *table64) ( int, float64) { , := tableSizeAndMaxLen(table64GroupSize, , ) * = table64{ maxLen: , maxLoad: , seed: randSeed(), table: make([]table64Group, ), } } func ( *table64) ( int) { := table64{} .init(, .maxLoad) .len = .len := make([]uintptr, table64GroupSize) := uintptr(len(.table)) - 1 for := range .table { := &.table[] := bits.OnesCount32(.bits) if aeshash.Enabled() { aeshash.MultiHash64([:], .keys[:], .seed) } else { wyhash.MultiHash64([:], .keys[:], .seed) } for , := range [:] { for { := &.table[&] if := bits.OnesCount32(.bits); < table64GroupSize { .bits = (.bits << 1) | 1 .keys[] = .keys[] .values[] = .values[] break } ++ } } } * = } func ( *table64) () { .len = 0 for := range .table { .table[] = table64Group{} } } func ( *table64) ( []uint64, []int32) int { return .probeArray(sparse.MakeUint64Array(), ) } func ( *table64) ( sparse.Uint64Array, []int32) int { := .Len() if := .len + ; > .maxLen { .grow() } var [probesPerLoop]uintptr var = .len var = aeshash.Enabled() _ = [:] for := 0; < ; { := len() + := len() if > { = = - } := .Slice(, ) := [::] := [::] if { aeshash.MultiHashUint64Array(, , .seed) } else { wyhash.MultiHashUint64Array(, , .seed) } .len = multiProbe64(.table, .len, , , ) = } return .len - } func multiProbe64Default( []table64Group, int, []uintptr, sparse.Uint64Array, []int32) int { := uintptr(len()) - 1 for , := range { := .Index() for { := &[&] := table64GroupSize := int32(0) for , := range .keys { if == { = break } } if := bits.OnesCount32(.bits); < { = int32(.values[]) } else { if == table64GroupSize { ++ continue } = int32() .bits = (.bits << 1) | 1 .keys[] = .values[] = uint32() ++ } [] = break } } return } type Uint128Table struct{ table128 } func ( int, float64) *Uint128Table { return &Uint128Table{makeTable128(, )} } func ( *Uint128Table) () { .reset() } func ( *Uint128Table) () int { return .len } func ( *Uint128Table) () int { return .cap } func ( *Uint128Table) ( [][16]byte, []int32) int { return .probe(, ) } func ( *Uint128Table) ( sparse.Uint128Array, []int32) int { return .probeArray(, ) } // table128 is the generic implementation of probing tables for 128 bit types. // // This table uses the following memory layout: // // [key A][key B][...][value A][value B][...] // // The table stores values as their actual value plus one, and uses zero as a // sentinel to determine whether a slot is occupied. A linear probing strategy // is used to resolve conflicts. This approach results in at most two memory // loads for every four keys being tested, since the location of a key and its // corresponding value will not be contiguous on the same CPU cache line, but // a cache line can hold four 16 byte keys. type table128 struct { len int cap int maxLen int maxLoad float64 seed uintptr table []byte } func makeTable128( int, float64) ( table128) { if < 0 || > 1 { panic("max load of probing table must be a value between 0 and 1") } if < 8 { = 8 } .init(, ) return } func ( *table128) ( int, float64) { , := tableSizeAndMaxLen(1, , ) * = table128{ cap: , maxLen: , maxLoad: , seed: randSeed(), table: make([]byte, 16*+4*), } } func ( *table128) () ( [][16]byte, []int32) { := .cap * 16 return unsafecast.Slice[[16]byte](.table[:]), unsafecast.Slice[int32](.table[:]) } func ( *table128) ( int) { := table128{} .init(, .maxLoad) .len = .len , := .kv() := make([]uintptr, probesPerLoop) := aeshash.Enabled() _ = [:len()] for := 0; < len(); { := len() + := len() if > len() { = len() = len() - } := [::] := [::] := [::] if { aeshash.MultiHash128(, , .seed) } else { wyhash.MultiHash128(, , .seed) } .insert(, , ) = } * = } func ( *table128) ( []uintptr, [][16]byte, []int32) { , := .kv() := uintptr(.cap) - 1 for , := range { for { := & := [] if == 0 { [] = [] [] = [] break } ++ } } } func ( *table128) () { .len = 0 for := range .table { .table[] = 0 } } func ( *table128) ( [][16]byte, []int32) int { return .probeArray(sparse.MakeUint128Array(), ) } func ( *table128) ( sparse.Uint128Array, []int32) int { := .Len() if := .len + ; > .maxLen { .grow() } var [probesPerLoop]uintptr var = .len var = aeshash.Enabled() _ = [:] for := 0; < ; { := len() + := len() if > { = = - } := .Slice(, ) := [::] := [::] if { aeshash.MultiHashUint128Array(, , .seed) } else { wyhash.MultiHashUint128Array(, , .seed) } .len = multiProbe128(.table, .cap, .len, , , ) = } return .len - } func multiProbe128Default( []byte, , int, []uintptr, sparse.Uint128Array, []int32) int { := uintptr() - 1 := uintptr() * 16 := unsafecast.Slice[[16]byte]([:]) := unsafecast.Slice[int32]([:]) for , := range { := .Index() for { := & := [] if == 0 { [] = int32() ++ [] = [] = int32() break } if == [] { [] = - 1 break } ++ } } return }