package parquet
import (
"encoding/binary"
"io"
"reflect"
"strings"
"time"
"unsafe"
"github.com/apache/arrow-go/v18/arrow"
format "github.com/apache/arrow-go/v18/parquet/internal/gen-go/parquet"
)
const (
julianUnixEpoch int64 = 2440588
nanosPerDay int64 = 3600 * 24 * 1000 * 1000 * 1000
Int96SizeBytes int = 12
)
var (
Int96Traits int96Traits
ByteArrayTraits byteArrayTraits
FixedLenByteArrayTraits fixedLenByteArrayTraits
ByteArraySizeBytes int = int (reflect .TypeOf (ByteArray {}).Size ())
FixedLenByteArraySizeBytes int = int (reflect .TypeOf (FixedLenByteArray {}).Size ())
)
type ReaderAtSeeker interface {
io .ReaderAt
io .Seeker
}
func NewInt96 (v [3 ]uint32 ) (out Int96 ) {
binary .LittleEndian .PutUint32 (out [0 :], v [0 ])
binary .LittleEndian .PutUint32 (out [4 :], v [1 ])
binary .LittleEndian .PutUint32 (out [8 :], v [2 ])
return
}
type Int96 [12 ]byte
func (i96 *Int96 ) SetNanoSeconds (nanos int64 ) {
binary .LittleEndian .PutUint64 (i96 [:8 ], uint64 (nanos ))
}
func (i96 Int96 ) String () string {
return i96 .ToTime ().String ()
}
func (i96 Int96 ) ToTime () time .Time {
nanos := binary .LittleEndian .Uint64 (i96 [:8 ])
jdays := binary .LittleEndian .Uint32 (i96 [8 :])
nanos = (uint64 (jdays )-uint64 (julianUnixEpoch ))*uint64 (nanosPerDay ) + nanos
t := time .Unix (0 , int64 (nanos ))
return t .UTC ()
}
type int96Traits struct {}
func (int96Traits ) BytesRequired (n int ) int { return Int96SizeBytes * n }
func (int96Traits ) CastFromBytes (b []byte ) []Int96 {
return unsafe .Slice ((*Int96 )(unsafe .Pointer (unsafe .SliceData (b ))),
len (b )/Int96SizeBytes )
}
func (int96Traits ) CastToBytes (b []Int96 ) []byte {
return unsafe .Slice ((*byte )(unsafe .Pointer (unsafe .SliceData (b ))),
len (b )*Int96SizeBytes )
}
type ByteArray []byte
func (b ByteArray ) Len () int {
return len (b )
}
func (b ByteArray ) String () string {
return *(*string )(unsafe .Pointer (&b ))
}
func (b ByteArray ) Bytes () []byte {
return b
}
type byteArrayTraits struct {}
func (byteArrayTraits ) BytesRequired (n int ) int {
return ByteArraySizeBytes * n
}
func (byteArrayTraits ) CastFromBytes (b []byte ) []ByteArray {
return unsafe .Slice ((*ByteArray )(unsafe .Pointer (unsafe .SliceData (b ))),
len (b )/ByteArraySizeBytes )
}
type FixedLenByteArray []byte
func (b FixedLenByteArray ) Len () int {
return len (b )
}
func (b FixedLenByteArray ) String () string {
return *(*string )(unsafe .Pointer (&b ))
}
func (b FixedLenByteArray ) Bytes () []byte {
return b
}
type fixedLenByteArrayTraits struct {}
func (fixedLenByteArrayTraits ) BytesRequired (n int ) int {
return FixedLenByteArraySizeBytes * n
}
func (fixedLenByteArrayTraits ) CastFromBytes (b []byte ) []FixedLenByteArray {
return unsafe .Slice ((*FixedLenByteArray )(unsafe .Pointer (unsafe .SliceData (b ))),
len (b )/FixedLenByteArraySizeBytes )
}
type (
Type format .Type
Cipher int
ColumnOrder *format .ColumnOrder
Version int8
DataPageVersion int8
Encoding format .Encoding
Repetition format .FieldRepetitionType
ColumnPath []string
)
func (c ColumnPath ) String () string {
if c == nil {
return ""
}
return strings .Join (c , "." )
}
func (c ColumnPath ) Extend (s string ) ColumnPath {
p := make ([]string , len (c ), len (c )+1 )
copy (p , c )
return append (p , s )
}
func ColumnPathFromString (s string ) ColumnPath {
return strings .Split (s , "." )
}
const (
AesGcm Cipher = iota
AesCtr
)
const (
V1_0 Version = iota
V2_4
V2_6
V2_LATEST = V2_6
)
const (
DataPageV1 DataPageVersion = iota
DataPageV2
)
func (e Encoding ) String () string {
return format .Encoding (e ).String ()
}
var (
Types = struct {
Boolean Type
Int32 Type
Int64 Type
Int96 Type
Float Type
Double Type
ByteArray Type
FixedLenByteArray Type
Undefined Type
}{
Boolean : Type (format .Type_BOOLEAN ),
Int32 : Type (format .Type_INT32 ),
Int64 : Type (format .Type_INT64 ),
Int96 : Type (format .Type_INT96 ),
Float : Type (format .Type_FLOAT ),
Double : Type (format .Type_DOUBLE ),
ByteArray : Type (format .Type_BYTE_ARRAY ),
FixedLenByteArray : Type (format .Type_FIXED_LEN_BYTE_ARRAY ),
Undefined : Type (format .Type_FIXED_LEN_BYTE_ARRAY + 1 ),
}
Encodings = struct {
Plain Encoding
PlainDict Encoding
RLE Encoding
RLEDict Encoding
BitPacked Encoding
DeltaByteArray Encoding
DeltaBinaryPacked Encoding
DeltaLengthByteArray Encoding
ByteStreamSplit Encoding
}{
Plain : Encoding (format .Encoding_PLAIN ),
PlainDict : Encoding (format .Encoding_PLAIN_DICTIONARY ),
RLE : Encoding (format .Encoding_RLE ),
RLEDict : Encoding (format .Encoding_RLE_DICTIONARY ),
BitPacked : Encoding (format .Encoding_BIT_PACKED ),
DeltaByteArray : Encoding (format .Encoding_DELTA_BYTE_ARRAY ),
DeltaBinaryPacked : Encoding (format .Encoding_DELTA_BINARY_PACKED ),
DeltaLengthByteArray : Encoding (format .Encoding_DELTA_LENGTH_BYTE_ARRAY ),
ByteStreamSplit : Encoding (format .Encoding_BYTE_STREAM_SPLIT ),
}
ColumnOrders = struct {
Undefined ColumnOrder
TypeDefinedOrder ColumnOrder
}{
Undefined : format .NewColumnOrder (),
TypeDefinedOrder : &format .ColumnOrder {TYPE_ORDER : format .NewTypeDefinedOrder ()},
}
DefaultColumnOrder = ColumnOrders .TypeDefinedOrder
Repetitions = struct {
Required Repetition
Optional Repetition
Repeated Repetition
Undefined Repetition
}{
Required : Repetition (format .FieldRepetitionType_REQUIRED ),
Optional : Repetition (format .FieldRepetitionType_OPTIONAL ),
Repeated : Repetition (format .FieldRepetitionType_REPEATED ),
Undefined : Repetition (format .FieldRepetitionType_REPEATED + 1 ),
}
)
func (t Type ) String () string {
switch t {
case Types .Undefined :
return "UNDEFINED"
default :
return format .Type (t ).String ()
}
}
func (r Repetition ) String () string {
return strings .ToLower (format .FieldRepetitionType (r ).String ())
}
func (t Type ) ByteSize () int {
switch t {
case Types .Boolean :
return 1
case Types .Int32 :
return arrow .Int32SizeBytes
case Types .Int64 :
return arrow .Int64SizeBytes
case Types .Int96 :
return Int96SizeBytes
case Types .Float :
return arrow .Float32SizeBytes
case Types .Double :
return arrow .Float64SizeBytes
case Types .ByteArray :
return ByteArraySizeBytes
case Types .FixedLenByteArray :
return FixedLenByteArraySizeBytes
}
panic ("no bytesize info for type" )
}
type ColumnTypes interface {
bool | int32 | int64 | float32 | float64 | Int96 | ByteArray | FixedLenByteArray
}
func GetColumnType [T ColumnTypes ]() Type {
var z T
switch any (z ).(type ) {
case bool :
return Types .Boolean
case int32 :
return Types .Int32
case int64 :
return Types .Int64
case float32 :
return Types .Float
case float64 :
return Types .Double
case Int96 :
return Types .Int96
case ByteArray :
return Types .ByteArray
case FixedLenByteArray :
return Types .FixedLenByteArray
default :
panic ("unknown column type" )
}
}
The pages are generated with Golds v0.8.2 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .