package array
import (
"fmt"
"sync/atomic"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/bitutil"
"github.com/apache/arrow-go/v18/arrow/memory"
"github.com/apache/arrow-go/v18/internal/json"
)
const (
minBuilderCapacity = 1 << 5
)
type Builder interface {
json .Unmarshaler
Type () arrow .DataType
Retain ()
Release ()
Len () int
Cap () int
NullN () int
AppendNull ()
AppendNulls (n int )
AppendEmptyValue ()
AppendEmptyValues (n int )
AppendValueFromString (string ) error
Reserve (n int )
Resize (n int )
NewArray () arrow .Array
IsNull (i int ) bool
SetNull (i int )
UnsafeAppendBoolToBitmap (bool )
init(capacity int )
resize(newBits int , init func (int ))
UnmarshalOne (*json .Decoder ) error
Unmarshal (*json .Decoder ) error
newData() *Data
}
type builder struct {
refCount atomic .Int64
mem memory .Allocator
nullBitmap *memory .Buffer
nulls int
length int
capacity int
}
func (b *builder ) Retain () {
b .refCount .Add (1 )
}
func (b *builder ) Len () int { return b .length }
func (b *builder ) Cap () int { return b .capacity }
func (b *builder ) NullN () int { return b .nulls }
func (b *builder ) IsNull (i int ) bool {
return b .nullBitmap .Len () != 0 && bitutil .BitIsNotSet (b .nullBitmap .Bytes (), i )
}
func (b *builder ) SetNull (i int ) {
if i < 0 || i >= b .length {
panic ("arrow/array: index out of range" )
}
bitutil .ClearBit (b .nullBitmap .Bytes (), i )
}
func (b *builder ) init (capacity int ) {
toAlloc := bitutil .CeilByte (capacity ) / 8
b .nullBitmap = memory .NewResizableBuffer (b .mem )
b .nullBitmap .Resize (toAlloc )
b .capacity = capacity
memory .Set (b .nullBitmap .Buf (), 0 )
}
func (b *builder ) reset () {
if b .nullBitmap != nil {
b .nullBitmap .Release ()
b .nullBitmap = nil
}
b .nulls = 0
b .length = 0
b .capacity = 0
}
func (b *builder ) resize (newBits int , init func (int )) {
if b .nullBitmap == nil {
init (newBits )
return
}
newBytesN := bitutil .CeilByte (newBits ) / 8
oldBytesN := b .nullBitmap .Len ()
b .nullBitmap .Resize (newBytesN )
b .capacity = newBits
if oldBytesN < newBytesN {
memory .Set (b .nullBitmap .Buf ()[oldBytesN :], 0 )
}
if newBits < b .length {
b .length = newBits
b .nulls = newBits - bitutil .CountSetBits (b .nullBitmap .Buf (), 0 , newBits )
}
}
func (b *builder ) reserve (elements int , resize func (int )) {
if b .length +elements > b .capacity {
newCap := bitutil .NextPowerOf2 (b .length + elements )
resize (newCap )
}
if b .nullBitmap == nil {
b .nullBitmap = memory .NewResizableBuffer (b .mem )
}
}
func (b *builder ) unsafeAppendBoolsToBitmap (valid []bool , length int ) {
if len (valid ) == 0 {
b .unsafeSetValid (length )
return
}
byteOffset := b .length / 8
bitOffset := byte (b .length % 8 )
nullBitmap := b .nullBitmap .Bytes ()
bitSet := nullBitmap [byteOffset ]
for _ , v := range valid {
if bitOffset == 8 {
bitOffset = 0
nullBitmap [byteOffset ] = bitSet
byteOffset ++
bitSet = nullBitmap [byteOffset ]
}
if v {
bitSet |= bitutil .BitMask [bitOffset ]
} else {
bitSet &= bitutil .FlippedBitMask [bitOffset ]
b .nulls ++
}
bitOffset ++
}
if bitOffset != 0 {
nullBitmap [byteOffset ] = bitSet
}
b .length += len (valid )
}
func (b *builder ) unsafeSetValid (length int ) {
padToByte := min (8 -(b .length %8 ), length )
if padToByte == 8 {
padToByte = 0
}
bits := b .nullBitmap .Bytes ()
for i := b .length ; i < b .length +padToByte ; i ++ {
bitutil .SetBit (bits , i )
}
start := (b .length + padToByte ) / 8
fastLength := (length - padToByte ) / 8
memory .Set (bits [start :start +fastLength ], 0xff )
newLength := b .length + length
for i := b .length + padToByte + (fastLength * 8 ); i < newLength ; i ++ {
bitutil .SetBit (bits , i )
}
b .length = newLength
}
func (b *builder ) UnsafeAppendBoolToBitmap (isValid bool ) {
if isValid {
bitutil .SetBit (b .nullBitmap .Bytes (), b .length )
} else {
b .nulls ++
}
b .length ++
}
func NewBuilder (mem memory .Allocator , dtype arrow .DataType ) Builder {
switch dtype .ID () {
case arrow .NULL :
return NewNullBuilder (mem )
case arrow .BOOL :
return NewBooleanBuilder (mem )
case arrow .UINT8 :
return NewUint8Builder (mem )
case arrow .INT8 :
return NewInt8Builder (mem )
case arrow .UINT16 :
return NewUint16Builder (mem )
case arrow .INT16 :
return NewInt16Builder (mem )
case arrow .UINT32 :
return NewUint32Builder (mem )
case arrow .INT32 :
return NewInt32Builder (mem )
case arrow .UINT64 :
return NewUint64Builder (mem )
case arrow .INT64 :
return NewInt64Builder (mem )
case arrow .FLOAT16 :
return NewFloat16Builder (mem )
case arrow .FLOAT32 :
return NewFloat32Builder (mem )
case arrow .FLOAT64 :
return NewFloat64Builder (mem )
case arrow .STRING :
return NewStringBuilder (mem )
case arrow .LARGE_STRING :
return NewLargeStringBuilder (mem )
case arrow .BINARY :
return NewBinaryBuilder (mem , arrow .BinaryTypes .Binary )
case arrow .LARGE_BINARY :
return NewBinaryBuilder (mem , arrow .BinaryTypes .LargeBinary )
case arrow .FIXED_SIZE_BINARY :
typ := dtype .(*arrow .FixedSizeBinaryType )
return NewFixedSizeBinaryBuilder (mem , typ )
case arrow .DATE32 :
return NewDate32Builder (mem )
case arrow .DATE64 :
return NewDate64Builder (mem )
case arrow .TIMESTAMP :
typ := dtype .(*arrow .TimestampType )
return NewTimestampBuilder (mem , typ )
case arrow .TIME32 :
typ := dtype .(*arrow .Time32Type )
return NewTime32Builder (mem , typ )
case arrow .TIME64 :
typ := dtype .(*arrow .Time64Type )
return NewTime64Builder (mem , typ )
case arrow .INTERVAL_MONTHS :
return NewMonthIntervalBuilder (mem )
case arrow .INTERVAL_DAY_TIME :
return NewDayTimeIntervalBuilder (mem )
case arrow .INTERVAL_MONTH_DAY_NANO :
return NewMonthDayNanoIntervalBuilder (mem )
case arrow .DECIMAL32 :
if typ , ok := dtype .(*arrow .Decimal32Type ); ok {
return NewDecimal32Builder (mem , typ )
}
case arrow .DECIMAL64 :
if typ , ok := dtype .(*arrow .Decimal64Type ); ok {
return NewDecimal64Builder (mem , typ )
}
case arrow .DECIMAL128 :
if typ , ok := dtype .(*arrow .Decimal128Type ); ok {
return NewDecimal128Builder (mem , typ )
}
case arrow .DECIMAL256 :
if typ , ok := dtype .(*arrow .Decimal256Type ); ok {
return NewDecimal256Builder (mem , typ )
}
case arrow .LIST :
typ := dtype .(*arrow .ListType )
return NewListBuilderWithField (mem , typ .ElemField ())
case arrow .STRUCT :
typ := dtype .(*arrow .StructType )
return NewStructBuilder (mem , typ )
case arrow .SPARSE_UNION :
typ := dtype .(*arrow .SparseUnionType )
return NewSparseUnionBuilder (mem , typ )
case arrow .DENSE_UNION :
typ := dtype .(*arrow .DenseUnionType )
return NewDenseUnionBuilder (mem , typ )
case arrow .DICTIONARY :
typ := dtype .(*arrow .DictionaryType )
return NewDictionaryBuilder (mem , typ )
case arrow .LARGE_LIST :
typ := dtype .(*arrow .LargeListType )
return NewLargeListBuilderWithField (mem , typ .ElemField ())
case arrow .MAP :
typ := dtype .(*arrow .MapType )
return NewMapBuilderWithType (mem , typ )
case arrow .LIST_VIEW :
typ := dtype .(*arrow .ListViewType )
return NewListViewBuilderWithField (mem , typ .ElemField ())
case arrow .LARGE_LIST_VIEW :
typ := dtype .(*arrow .LargeListViewType )
return NewLargeListViewBuilderWithField (mem , typ .ElemField ())
case arrow .EXTENSION :
if custom , ok := dtype .(CustomExtensionBuilder ); ok {
return custom .NewBuilder (mem )
}
if typ , ok := dtype .(arrow .ExtensionType ); ok {
return NewExtensionBuilder (mem , typ )
}
panic (fmt .Errorf ("arrow/array: invalid extension type: %T" , dtype ))
case arrow .FIXED_SIZE_LIST :
typ := dtype .(*arrow .FixedSizeListType )
return NewFixedSizeListBuilderWithField (mem , typ .Len (), typ .ElemField ())
case arrow .DURATION :
typ := dtype .(*arrow .DurationType )
return NewDurationBuilder (mem , typ )
case arrow .RUN_END_ENCODED :
typ := dtype .(*arrow .RunEndEncodedType )
return NewRunEndEncodedBuilder (mem , typ .RunEnds (), typ .Encoded ())
case arrow .BINARY_VIEW :
return NewBinaryViewBuilder (mem )
case arrow .STRING_VIEW :
return NewStringViewBuilder (mem )
}
panic (fmt .Errorf ("arrow/array: unsupported builder for %T" , dtype ))
}
The pages are generated with Golds v0.8.2 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .