package z
import (
"fmt"
"math"
"os"
"reflect"
"strings"
"unsafe"
"github.com/dgraph-io/ristretto/v2/z/simd"
)
var (
pageSize = os .Getpagesize ()
maxKeys = (pageSize / 16 ) - 1
oneThird = int (float64 (maxKeys ) / 3 )
)
const (
absoluteMax = uint64 (math .MaxUint64 - 1 )
minSize = 1 << 20
)
type Tree struct {
buffer *Buffer
data []byte
nextPage uint64
freePage uint64
stats TreeStats
}
func (t *Tree ) initRootNode () {
t .newNode (0 )
t .Set (absoluteMax , 0 )
}
func NewTree (tag string ) *Tree {
const defaultTag = "tree"
if tag == "" {
tag = defaultTag
}
t := &Tree {buffer : NewBuffer (minSize , tag )}
t .Reset ()
return t
}
func NewTreePersistent (path string ) (*Tree , error ) {
t := &Tree {}
var err error
t .buffer , err = NewBufferPersistent (path , minSize )
if err != nil {
return nil , err
}
t .buffer .offset = uint64 (len (t .buffer .buf ))
t .data = t .buffer .Bytes ()
root := t .node (1 )
isInitialized := root .pageID () != 0
if !isInitialized {
t .nextPage = 1
t .freePage = 0
t .initRootNode ()
} else {
t .reinit ()
}
return t , nil
}
func (t *Tree ) reinit () {
t .nextPage = 1
for int (t .nextPage )*pageSize < len (t .data ) {
n := t .node (t .nextPage )
if n .pageID () == 0 {
break
}
t .nextPage ++
}
maxPageId := t .nextPage - 1
tailPages := make ([]bool , maxPageId )
t .Iterate (func (n node ) {
i := n .pageID () - 1
tailPages [i ] = true
if n .isLeaf () {
t .stats .NumLeafKeys += n .numKeys ()
}
})
pointedPages := make ([]uint64 , 0 )
for i , isTail := range tailPages {
if !isTail {
pageId := uint64 (i ) + 1
if nextPageId := t .node (pageId ).uint64 (0 ); nextPageId != 0 {
pointedPages = append (pointedPages , nextPageId )
}
t .stats .NumPagesFree ++
}
}
for _ , pageId := range pointedPages {
i := pageId - 1
tailPages [i ] = true
}
for i , isTail := range tailPages {
if !isTail {
pageId := uint64 (i ) + 1
t .freePage = pageId
break
}
}
}
func (t *Tree ) Reset () {
Memclr (t .buffer .buf )
t .buffer .Reset ()
t .buffer .AllocateOffset (minSize )
t .data = t .buffer .Bytes ()
t .stats = TreeStats {}
t .nextPage = 1
t .freePage = 0
t .initRootNode ()
}
func (t *Tree ) Close () error {
if t == nil {
return nil
}
return t .buffer .Release ()
}
type TreeStats struct {
Allocated int
Bytes int
NumLeafKeys int
NumPages int
NumPagesFree int
Occupancy float64
PageSize int
}
func (t *Tree ) Stats () TreeStats {
numPages := int (t .nextPage - 1 )
out := TreeStats {
Bytes : numPages * pageSize ,
Allocated : len (t .data ),
NumLeafKeys : t .stats .NumLeafKeys ,
NumPages : numPages ,
NumPagesFree : t .stats .NumPagesFree ,
PageSize : pageSize ,
}
out .Occupancy = 100.0 * float64 (out .NumLeafKeys ) / float64 (maxKeys *numPages )
return out
}
func BytesToUint64Slice (b []byte ) []uint64 {
if len (b ) == 0 {
return nil
}
var u64s []uint64
hdr := (*reflect .SliceHeader )(unsafe .Pointer (&u64s ))
hdr .Len = len (b ) / 8
hdr .Cap = hdr .Len
hdr .Data = uintptr (unsafe .Pointer (&b [0 ]))
return u64s
}
func (t *Tree ) newNode (bit uint64 ) node {
var pageId uint64
if t .freePage > 0 {
pageId = t .freePage
t .stats .NumPagesFree --
} else {
pageId = t .nextPage
t .nextPage ++
offset := int (pageId ) * pageSize
reqSize := offset + pageSize
if reqSize > len (t .data ) {
t .buffer .AllocateOffset (reqSize - len (t .data ))
t .data = t .buffer .Bytes ()
}
}
n := t .node (pageId )
if t .freePage > 0 {
t .freePage = n .uint64 (0 )
}
zeroOut (n )
n .setBit (bit )
n .setAt (keyOffset (maxKeys ), pageId )
return n
}
func getNode(data []byte ) node {
return node (BytesToUint64Slice (data ))
}
func zeroOut(data []uint64 ) {
for i := 0 ; i < len (data ); i ++ {
data [i ] = 0
}
}
func (t *Tree ) node (pid uint64 ) node {
if pid == 0 {
return nil
}
start := pageSize * int (pid )
return getNode (t .data [start : start +pageSize ])
}
func (t *Tree ) Set (k , v uint64 ) {
if k == math .MaxUint64 || k == 0 {
panic ("Error setting zero or MaxUint64" )
}
root := t .set (1 , k , v )
if root .isFull () {
right := t .split (1 )
left := t .newNode (root .bits ())
root = t .node (1 )
copy (left [:keyOffset (maxKeys )], root )
left .setNumKeys (root .numKeys ())
zeroOut (root [:keyOffset (maxKeys )])
root .setNumKeys (0 )
root .set (left .maxKey (), left .pageID ())
root .set (right .maxKey (), right .pageID ())
}
}
func (t *Tree ) set (pid , k , v uint64 ) node {
n := t .node (pid )
if n .isLeaf () {
t .stats .NumLeafKeys += n .set (k , v )
return n
}
idx := n .search (k )
if idx >= maxKeys {
panic ("search returned index >= maxKeys" )
}
if n .key (idx ) == 0 {
n .setAt (keyOffset (idx ), k )
n .setNumKeys (n .numKeys () + 1 )
}
child := t .node (n .val (idx ))
if child == nil {
child = t .newNode (bitLeaf )
n = t .node (pid )
n .setAt (valOffset (idx ), child .pageID ())
}
child = t .set (child .pageID (), k , v )
n = t .node (pid )
if child .isFull () {
nn := t .split (child .pageID ())
n = t .node (pid )
child = t .node (n .uint64 (valOffset (idx )))
n .set (child .maxKey (), child .pageID ())
n .set (nn .maxKey (), nn .pageID ())
}
return n
}
func (t *Tree ) Get (k uint64 ) uint64 {
if k == math .MaxUint64 || k == 0 {
panic ("Does not support getting MaxUint64/Zero" )
}
root := t .node (1 )
return t .get (root , k )
}
func (t *Tree ) get (n node , k uint64 ) uint64 {
if n .isLeaf () {
return n .get (k )
}
idx := n .search (k )
if idx == n .numKeys () || n .key (idx ) == 0 {
return 0
}
child := t .node (n .uint64 (valOffset (idx )))
assert (child != nil )
return t .get (child , k )
}
func (t *Tree ) DeleteBelow (ts uint64 ) {
root := t .node (1 )
t .stats .NumLeafKeys = 0
t .compact (root , ts )
assert (root .numKeys () >= 1 )
}
func (t *Tree ) compact (n node , ts uint64 ) int {
if n .isLeaf () {
numKeys := n .compact (ts )
t .stats .NumLeafKeys += n .numKeys ()
return numKeys
}
N := n .numKeys ()
for i := 0 ; i < N ; i ++ {
assert (n .key (i ) > 0 )
childID := n .uint64 (valOffset (i ))
child := t .node (childID )
if rem := t .compact (child , ts ); rem == 0 && i < N -1 {
t .stats .NumLeafKeys -= child .numKeys ()
child .setAt (0 , t .freePage )
t .freePage = childID
n .setAt (valOffset (i ), 0 )
t .stats .NumPagesFree ++
}
}
return n .compact (1 )
}
func (t *Tree ) iterate (n node , fn func (node )) {
fn (n )
if n .isLeaf () {
return
}
for i := 0 ; i < maxKeys ; i ++ {
if n .key (i ) == 0 {
return
}
childID := n .uint64 (valOffset (i ))
assert (childID > 0 )
child := t .node (childID )
t .iterate (child , fn )
}
}
func (t *Tree ) Iterate (fn func (node )) {
root := t .node (1 )
t .iterate (root , fn )
}
func (t *Tree ) IterateKV (f func (key , val uint64 ) (newVal uint64 )) {
t .Iterate (func (n node ) {
if !n .isLeaf () {
return
}
for i := 0 ; i < n .numKeys (); i ++ {
key := n .key (i )
val := n .val (i )
if val == 0 {
continue
}
newVal := f (key , val )
if newVal != 0 {
n .setAt (valOffset (i ), newVal )
}
}
})
}
func (t *Tree ) print (n node , parentID uint64 ) {
n .print (parentID )
if n .isLeaf () {
return
}
pid := n .pageID ()
for i := 0 ; i < maxKeys ; i ++ {
if n .key (i ) == 0 {
return
}
childID := n .uint64 (valOffset (i ))
child := t .node (childID )
t .print (child , pid )
}
}
func (t *Tree ) Print () {
root := t .node (1 )
t .print (root , 0 )
}
func (t *Tree ) split (pid uint64 ) node {
n := t .node (pid )
if !n .isFull () {
panic ("This should be called only when n is full" )
}
nn := t .newNode (n .bits ())
n = t .node (pid )
rightHalf := n [keyOffset (maxKeys /2 ):keyOffset (maxKeys )]
copy (nn , rightHalf )
nn .setNumKeys (maxKeys - maxKeys /2 )
zeroOut (rightHalf )
n .setNumKeys (maxKeys / 2 )
return nn
}
func (t *Tree ) shareWithSiblingXXX (n node , idx int ) bool {
if idx == 0 {
return false
}
left := t .node (n .val (idx - 1 ))
ns := left .numKeys ()
if ns >= maxKeys /2 {
return false
}
right := t .node (n .val (idx ))
copied := copy (left [keyOffset (ns ):], right [:keyOffset (oneThird )])
copied /= 2
left .setNumKeys (ns + copied )
n .setAt (keyOffset (idx -1 ), left .maxKey ())
until := copy (right , right [keyOffset (oneThird ):keyOffset (maxKeys )])
right .setNumKeys (until / 2 )
zeroOut (right [until :keyOffset (maxKeys )])
return true
}
type node []uint64
func (n node ) uint64 (start int ) uint64 { return n [start ] }
func keyOffset(i int ) int { return 2 * i }
func valOffset(i int ) int { return 2 *i + 1 }
func (n node ) numKeys () int { return int (n .uint64 (valOffset (maxKeys )) & 0xFFFFFFFF ) }
func (n node ) pageID () uint64 { return n .uint64 (keyOffset (maxKeys )) }
func (n node ) key (i int ) uint64 { return n .uint64 (keyOffset (i )) }
func (n node ) val (i int ) uint64 { return n .uint64 (valOffset (i )) }
func (n node ) data (i int ) []uint64 { return n [keyOffset (i ):keyOffset (i +1 )] }
func (n node ) setAt (start int , k uint64 ) {
n [start ] = k
}
func (n node ) setNumKeys (num int ) {
idx := valOffset (maxKeys )
val := n [idx ]
val &= 0xFFFFFFFF00000000
val |= uint64 (num )
n [idx ] = val
}
func (n node ) moveRight (lo int ) {
hi := n .numKeys ()
assert (hi != maxKeys )
copy (n [keyOffset (lo +1 ):keyOffset (hi +1 )], n [keyOffset (lo ):keyOffset (hi )])
}
const (
bitLeaf = uint64 (1 << 63 )
)
func (n node ) setBit (b uint64 ) {
vo := valOffset (maxKeys )
val := n [vo ]
val &= 0xFFFFFFFF
val |= b
n [vo ] = val
}
func (n node ) bits () uint64 {
return n .val (maxKeys ) & 0xFF00000000000000
}
func (n node ) isLeaf () bool {
return n .bits ()&bitLeaf > 0
}
func (n node ) isFull () bool {
return n .numKeys () == maxKeys
}
func (n node ) search (k uint64 ) int {
N := n .numKeys ()
if N < 4 {
for i := 0 ; i < N ; i ++ {
if ki := n .key (i ); ki >= k {
return i
}
}
return N
}
return int (simd .Search (n [:2 *N ], k ))
}
func (n node ) maxKey () uint64 {
idx := n .numKeys ()
if idx > 0 {
idx --
}
return n .key (idx )
}
func (n node ) compact (lo uint64 ) int {
N := n .numKeys ()
mk := n .maxKey ()
var left , right int
for right = 0 ; right < N ; right ++ {
if n .val (right ) < lo && n .key (right ) < mk {
continue
}
if left != right {
copy (n .data (left ), n .data (right ))
}
left ++
}
zeroOut (n [keyOffset (left ):keyOffset (right )])
n .setNumKeys (left )
if left == 1 && n .key (0 ) == mk && n .val (0 ) < lo {
return 0
}
return left
}
func (n node ) get (k uint64 ) uint64 {
idx := n .search (k )
if idx == n .numKeys () {
return 0
}
if ki := n .key (idx ); ki == k {
return n .val (idx )
}
return 0
}
func (n node ) set (k , v uint64 ) (numAdded int ) {
idx := n .search (k )
ki := n .key (idx )
if n .numKeys () == maxKeys {
assert (ki == k )
}
if ki > k {
n .moveRight (idx )
}
if ki != k {
n .setNumKeys (n .numKeys () + 1 )
numAdded = 1
}
if ki == 0 || ki >= k {
n .setAt (keyOffset (idx ), k )
n .setAt (valOffset (idx ), v )
return
}
panic ("shouldn't reach here" )
}
func (n node ) iterate (fn func (node , int )) {
for i := 0 ; i < maxKeys ; i ++ {
if k := n .key (i ); k > 0 {
fn (n , i )
} else {
break
}
}
}
func (n node ) print (parentID uint64 ) {
var keys []string
n .iterate (func (n node , i int ) {
keys = append (keys , fmt .Sprintf ("%d" , n .key (i )))
})
if len (keys ) > 8 {
copy (keys [4 :], keys [len (keys )-4 :])
keys [3 ] = "..."
keys = keys [:8 ]
}
fmt .Printf ("%d Child of: %d num keys: %d keys: %s\n" ,
n .pageID (), parentID , n .numKeys (), strings .Join (keys , " " ))
}
The pages are generated with Golds v0.8.4 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .