package bbolt
import (
"bytes"
"fmt"
"sort"
"unsafe"
)
type node struct {
bucket *Bucket
isLeaf bool
unbalanced bool
spilled bool
key []byte
pgid pgid
parent *node
children nodes
inodes inodes
}
func (n *node ) root () *node {
if n .parent == nil {
return n
}
return n .parent .root ()
}
func (n *node ) minKeys () int {
if n .isLeaf {
return 1
}
return 2
}
func (n *node ) size () int {
sz , elsz := pageHeaderSize , n .pageElementSize ()
for i := 0 ; i < len (n .inodes ); i ++ {
item := &n .inodes [i ]
sz += elsz + uintptr (len (item .key )) + uintptr (len (item .value ))
}
return int (sz )
}
func (n *node ) sizeLessThan (v uintptr ) bool {
sz , elsz := pageHeaderSize , n .pageElementSize ()
for i := 0 ; i < len (n .inodes ); i ++ {
item := &n .inodes [i ]
sz += elsz + uintptr (len (item .key )) + uintptr (len (item .value ))
if sz >= v {
return false
}
}
return true
}
func (n *node ) pageElementSize () uintptr {
if n .isLeaf {
return leafPageElementSize
}
return branchPageElementSize
}
func (n *node ) childAt (index int ) *node {
if n .isLeaf {
panic (fmt .Sprintf ("invalid childAt(%d) on a leaf node" , index ))
}
return n .bucket .node (n .inodes [index ].pgid , n )
}
func (n *node ) childIndex (child *node ) int {
index := sort .Search (len (n .inodes ), func (i int ) bool { return bytes .Compare (n .inodes [i ].key , child .key ) != -1 })
return index
}
func (n *node ) numChildren () int {
return len (n .inodes )
}
func (n *node ) nextSibling () *node {
if n .parent == nil {
return nil
}
index := n .parent .childIndex (n )
if index >= n .parent .numChildren ()-1 {
return nil
}
return n .parent .childAt (index + 1 )
}
func (n *node ) prevSibling () *node {
if n .parent == nil {
return nil
}
index := n .parent .childIndex (n )
if index == 0 {
return nil
}
return n .parent .childAt (index - 1 )
}
func (n *node ) put (oldKey , newKey , value []byte , pgid pgid , flags uint32 ) {
if pgid >= n .bucket .tx .meta .pgid {
panic (fmt .Sprintf ("pgid (%d) above high water mark (%d)" , pgid , n .bucket .tx .meta .pgid ))
} else if len (oldKey ) <= 0 {
panic ("put: zero-length old key" )
} else if len (newKey ) <= 0 {
panic ("put: zero-length new key" )
}
index := sort .Search (len (n .inodes ), func (i int ) bool { return bytes .Compare (n .inodes [i ].key , oldKey ) != -1 })
exact := (len (n .inodes ) > 0 && index < len (n .inodes ) && bytes .Equal (n .inodes [index ].key , oldKey ))
if !exact {
n .inodes = append (n .inodes , inode {})
copy (n .inodes [index +1 :], n .inodes [index :])
}
inode := &n .inodes [index ]
inode .flags = flags
inode .key = newKey
inode .value = value
inode .pgid = pgid
_assert (len (inode .key ) > 0 , "put: zero-length inode key" )
}
func (n *node ) del (key []byte ) {
index := sort .Search (len (n .inodes ), func (i int ) bool { return bytes .Compare (n .inodes [i ].key , key ) != -1 })
if index >= len (n .inodes ) || !bytes .Equal (n .inodes [index ].key , key ) {
return
}
n .inodes = append (n .inodes [:index ], n .inodes [index +1 :]...)
n .unbalanced = true
}
func (n *node ) read (p *page ) {
n .pgid = p .id
n .isLeaf = ((p .flags & leafPageFlag ) != 0 )
n .inodes = make (inodes , int (p .count ))
for i := 0 ; i < int (p .count ); i ++ {
inode := &n .inodes [i ]
if n .isLeaf {
elem := p .leafPageElement (uint16 (i ))
inode .flags = elem .flags
inode .key = elem .key ()
inode .value = elem .value ()
} else {
elem := p .branchPageElement (uint16 (i ))
inode .pgid = elem .pgid
inode .key = elem .key ()
}
_assert (len (inode .key ) > 0 , "read: zero-length inode key" )
}
if len (n .inodes ) > 0 {
n .key = n .inodes [0 ].key
_assert (len (n .key ) > 0 , "read: zero-length node key" )
} else {
n .key = nil
}
}
func (n *node ) write (p *page ) {
if n .isLeaf {
p .flags |= leafPageFlag
} else {
p .flags |= branchPageFlag
}
if len (n .inodes ) >= 0xFFFF {
panic (fmt .Sprintf ("inode overflow: %d (pgid=%d)" , len (n .inodes ), p .id ))
}
p .count = uint16 (len (n .inodes ))
if p .count == 0 {
return
}
off := unsafe .Sizeof (*p ) + n .pageElementSize ()*uintptr (len (n .inodes ))
for i , item := range n .inodes {
_assert (len (item .key ) > 0 , "write: zero-length inode key" )
sz := len (item .key ) + len (item .value )
b := unsafeByteSlice (unsafe .Pointer (p ), off , 0 , sz )
off += uintptr (sz )
if n .isLeaf {
elem := p .leafPageElement (uint16 (i ))
elem .pos = uint32 (uintptr (unsafe .Pointer (&b [0 ])) - uintptr (unsafe .Pointer (elem )))
elem .flags = item .flags
elem .ksize = uint32 (len (item .key ))
elem .vsize = uint32 (len (item .value ))
} else {
elem := p .branchPageElement (uint16 (i ))
elem .pos = uint32 (uintptr (unsafe .Pointer (&b [0 ])) - uintptr (unsafe .Pointer (elem )))
elem .ksize = uint32 (len (item .key ))
elem .pgid = item .pgid
_assert (elem .pgid != p .id , "write: circular dependency occurred" )
}
l := copy (b , item .key )
copy (b [l :], item .value )
}
}
func (n *node ) split (pageSize uintptr ) []*node {
var nodes []*node
node := n
for {
a , b := node .splitTwo (pageSize )
nodes = append (nodes , a )
if b == nil {
break
}
node = b
}
return nodes
}
func (n *node ) splitTwo (pageSize uintptr ) (*node , *node ) {
if len (n .inodes ) <= (minKeysPerPage *2 ) || n .sizeLessThan (pageSize ) {
return n , nil
}
var fillPercent = n .bucket .FillPercent
if fillPercent < minFillPercent {
fillPercent = minFillPercent
} else if fillPercent > maxFillPercent {
fillPercent = maxFillPercent
}
threshold := int (float64 (pageSize ) * fillPercent )
splitIndex , _ := n .splitIndex (threshold )
if n .parent == nil {
n .parent = &node {bucket : n .bucket , children : []*node {n }}
}
next := &node {bucket : n .bucket , isLeaf : n .isLeaf , parent : n .parent }
n .parent .children = append (n .parent .children , next )
next .inodes = n .inodes [splitIndex :]
n .inodes = n .inodes [:splitIndex ]
n .bucket .tx .stats .Split ++
return n , next
}
func (n *node ) splitIndex (threshold int ) (index , sz uintptr ) {
sz = pageHeaderSize
for i := 0 ; i < len (n .inodes )-minKeysPerPage ; i ++ {
index = uintptr (i )
inode := n .inodes [i ]
elsize := n .pageElementSize () + uintptr (len (inode .key )) + uintptr (len (inode .value ))
if index >= minKeysPerPage && sz +elsize > uintptr (threshold ) {
break
}
sz += elsize
}
return
}
func (n *node ) spill () error {
var tx = n .bucket .tx
if n .spilled {
return nil
}
sort .Sort (n .children )
for i := 0 ; i < len (n .children ); i ++ {
if err := n .children [i ].spill (); err != nil {
return err
}
}
n .children = nil
var nodes = n .split (uintptr (tx .db .pageSize ))
for _ , node := range nodes {
if node .pgid > 0 {
tx .db .freelist .free (tx .meta .txid , tx .page (node .pgid ))
node .pgid = 0
}
p , err := tx .allocate ((node .size () + tx .db .pageSize - 1 ) / tx .db .pageSize )
if err != nil {
return err
}
if p .id >= tx .meta .pgid {
panic (fmt .Sprintf ("pgid (%d) above high water mark (%d)" , p .id , tx .meta .pgid ))
}
node .pgid = p .id
node .write (p )
node .spilled = true
if node .parent != nil {
var key = node .key
if key == nil {
key = node .inodes [0 ].key
}
node .parent .put (key , node .inodes [0 ].key , nil , node .pgid , 0 )
node .key = node .inodes [0 ].key
_assert (len (node .key ) > 0 , "spill: zero-length node key" )
}
tx .stats .Spill ++
}
if n .parent != nil && n .parent .pgid == 0 {
n .children = nil
return n .parent .spill ()
}
return nil
}
func (n *node ) rebalance () {
if !n .unbalanced {
return
}
n .unbalanced = false
n .bucket .tx .stats .Rebalance ++
var threshold = n .bucket .tx .db .pageSize / 4
if n .size () > threshold && len (n .inodes ) > n .minKeys () {
return
}
if n .parent == nil {
if !n .isLeaf && len (n .inodes ) == 1 {
child := n .bucket .node (n .inodes [0 ].pgid , n )
n .isLeaf = child .isLeaf
n .inodes = child .inodes [:]
n .children = child .children
for _ , inode := range n .inodes {
if child , ok := n .bucket .nodes [inode .pgid ]; ok {
child .parent = n
}
}
child .parent = nil
delete (n .bucket .nodes , child .pgid )
child .free ()
}
return
}
if n .numChildren () == 0 {
n .parent .del (n .key )
n .parent .removeChild (n )
delete (n .bucket .nodes , n .pgid )
n .free ()
n .parent .rebalance ()
return
}
_assert (n .parent .numChildren () > 1 , "parent must have at least 2 children" )
var target *node
var useNextSibling = (n .parent .childIndex (n ) == 0 )
if useNextSibling {
target = n .nextSibling ()
} else {
target = n .prevSibling ()
}
if useNextSibling {
for _ , inode := range target .inodes {
if child , ok := n .bucket .nodes [inode .pgid ]; ok {
child .parent .removeChild (child )
child .parent = n
child .parent .children = append (child .parent .children , child )
}
}
n .inodes = append (n .inodes , target .inodes ...)
n .parent .del (target .key )
n .parent .removeChild (target )
delete (n .bucket .nodes , target .pgid )
target .free ()
} else {
for _ , inode := range n .inodes {
if child , ok := n .bucket .nodes [inode .pgid ]; ok {
child .parent .removeChild (child )
child .parent = target
child .parent .children = append (child .parent .children , child )
}
}
target .inodes = append (target .inodes , n .inodes ...)
n .parent .del (n .key )
n .parent .removeChild (n )
delete (n .bucket .nodes , n .pgid )
n .free ()
}
n .parent .rebalance ()
}
func (n *node ) removeChild (target *node ) {
for i , child := range n .children {
if child == target {
n .children = append (n .children [:i ], n .children [i +1 :]...)
return
}
}
}
func (n *node ) dereference () {
if n .key != nil {
key := make ([]byte , len (n .key ))
copy (key , n .key )
n .key = key
_assert (n .pgid == 0 || len (n .key ) > 0 , "dereference: zero-length node key on existing node" )
}
for i := range n .inodes {
inode := &n .inodes [i ]
key := make ([]byte , len (inode .key ))
copy (key , inode .key )
inode .key = key
_assert (len (inode .key ) > 0 , "dereference: zero-length inode key" )
value := make ([]byte , len (inode .value ))
copy (value , inode .value )
inode .value = value
}
for _ , child := range n .children {
child .dereference ()
}
n .bucket .tx .stats .NodeDeref ++
}
func (n *node ) free () {
if n .pgid != 0 {
n .bucket .tx .db .freelist .free (n .bucket .tx .meta .txid , n .bucket .tx .page (n .pgid ))
n .pgid = 0
}
}
type nodes []*node
func (s nodes ) Len () int { return len (s ) }
func (s nodes ) Swap (i , j int ) { s [i ], s [j ] = s [j ], s [i ] }
func (s nodes ) Less (i , j int ) bool {
return bytes .Compare (s [i ].inodes [0 ].key , s [j ].inodes [0 ].key ) == -1
}
type inode struct {
flags uint32
pgid pgid
key []byte
value []byte
}
type inodes []inode
The pages are generated with Golds v0.8.2 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .