package parquet
import (
"reflect"
"sort"
"unicode"
"unicode/utf8"
"github.com/parquet-go/parquet-go/compress"
"github.com/parquet-go/parquet-go/deprecated"
"github.com/parquet-go/parquet-go/encoding"
"github.com/parquet-go/parquet-go/format"
)
type Node interface {
ID () int
String () string
Type () Type
Optional () bool
Repeated () bool
Required () bool
Leaf () bool
Fields () []Field
Encoding () encoding .Encoding
Compression () compress .Codec
GoType () reflect .Type
}
type Field interface {
Node
Name () string
Value (base reflect .Value ) reflect .Value
}
func Encoded (node Node , encoding encoding .Encoding ) Node {
if !node .Leaf () {
panic ("cannot add encoding to a non-leaf node" )
}
if encoding != nil {
kind := node .Type ().Kind ()
if !canEncode (encoding , kind ) {
panic ("cannot apply " + encoding .Encoding ().String () + " to node of type " + kind .String ())
}
}
return &encodedNode {
Node : node ,
encoding : encoding ,
}
}
type encodedNode struct {
Node
encoding encoding .Encoding
}
func (n *encodedNode ) Encoding () encoding .Encoding {
return n .encoding
}
func Compressed (node Node , codec compress .Codec ) Node {
if !node .Leaf () {
panic ("cannot add compression codec to a non-leaf node" )
}
return &compressedNode {
Node : node ,
codec : codec ,
}
}
type compressedNode struct {
Node
codec compress .Codec
}
func (n *compressedNode ) Compression () compress .Codec {
return n .codec
}
func Optional (node Node ) Node { return &optionalNode {node } }
type optionalNode struct { Node }
func (opt *optionalNode ) Optional () bool { return true }
func (opt *optionalNode ) Repeated () bool { return false }
func (opt *optionalNode ) Required () bool { return false }
func (opt *optionalNode ) GoType () reflect .Type { return reflect .PtrTo (opt .Node .GoType ()) }
func FieldID (node Node , id int ) Node { return &fieldIDNode {Node : node , id : id } }
type fieldIDNode struct {
Node
id int
}
func (f *fieldIDNode ) ID () int { return f .id }
func Repeated (node Node ) Node { return &repeatedNode {node } }
type repeatedNode struct { Node }
func (rep *repeatedNode ) Optional () bool { return false }
func (rep *repeatedNode ) Repeated () bool { return true }
func (rep *repeatedNode ) Required () bool { return false }
func (rep *repeatedNode ) GoType () reflect .Type { return reflect .SliceOf (rep .Node .GoType ()) }
func Required (node Node ) Node { return &requiredNode {node } }
type requiredNode struct { Node }
func (req *requiredNode ) Optional () bool { return false }
func (req *requiredNode ) Repeated () bool { return false }
func (req *requiredNode ) Required () bool { return true }
func (req *requiredNode ) GoType () reflect .Type { return req .Node .GoType () }
type node struct {}
func Leaf (typ Type ) Node {
return &leafNode {typ : typ }
}
type leafNode struct { typ Type }
func (n *leafNode ) ID () int { return 0 }
func (n *leafNode ) String () string { return sprint ("" , n ) }
func (n *leafNode ) Type () Type { return n .typ }
func (n *leafNode ) Optional () bool { return false }
func (n *leafNode ) Repeated () bool { return false }
func (n *leafNode ) Required () bool { return true }
func (n *leafNode ) Leaf () bool { return true }
func (n *leafNode ) Fields () []Field { return nil }
func (n *leafNode ) Encoding () encoding .Encoding { return nil }
func (n *leafNode ) Compression () compress .Codec { return nil }
func (n *leafNode ) GoType () reflect .Type { return goTypeOfLeaf (n ) }
var repetitionTypes = [...]format .FieldRepetitionType {
0 : format .Required ,
1 : format .Optional ,
2 : format .Repeated ,
}
func fieldRepetitionTypePtrOf(node Node ) *format .FieldRepetitionType {
switch {
case node .Required ():
return &repetitionTypes [format .Required ]
case node .Optional ():
return &repetitionTypes [format .Optional ]
case node .Repeated ():
return &repetitionTypes [format .Repeated ]
default :
return nil
}
}
func fieldRepetitionTypeOf(node Node ) format .FieldRepetitionType {
switch {
case node .Optional ():
return format .Optional
case node .Repeated ():
return format .Repeated
default :
return format .Required
}
}
func applyFieldRepetitionType(t format .FieldRepetitionType , repetitionLevel , definitionLevel byte ) (byte , byte ) {
switch t {
case format .Optional :
definitionLevel ++
case format .Repeated :
repetitionLevel ++
definitionLevel ++
}
return repetitionLevel , definitionLevel
}
type Group map [string ]Node
func (g Group ) ID () int { return 0 }
func (g Group ) String () string { return sprint ("" , g ) }
func (g Group ) Type () Type { return groupType {} }
func (g Group ) Optional () bool { return false }
func (g Group ) Repeated () bool { return false }
func (g Group ) Required () bool { return true }
func (g Group ) Leaf () bool { return false }
func (g Group ) Fields () []Field {
groupFields := make ([]groupField , 0 , len (g ))
for name , node := range g {
groupFields = append (groupFields , groupField {
Node : node ,
name : name ,
})
}
sort .Slice (groupFields , func (i , j int ) bool {
return groupFields [i ].name < groupFields [j ].name
})
fields := make ([]Field , len (groupFields ))
for i := range groupFields {
fields [i ] = &groupFields [i ]
}
return fields
}
func (g Group ) Encoding () encoding .Encoding { return nil }
func (g Group ) Compression () compress .Codec { return nil }
func (g Group ) GoType () reflect .Type { return goTypeOfGroup (g ) }
type groupField struct {
Node
name string
}
func (f *groupField ) Name () string { return f .name }
func (f *groupField ) Value (base reflect .Value ) reflect .Value {
if base .Kind () == reflect .Interface {
if base .IsNil () {
return reflect .ValueOf (nil )
}
if base = base .Elem (); base .Kind () == reflect .Pointer && base .IsNil () {
return reflect .ValueOf (nil )
}
}
return base .MapIndex (reflect .ValueOf (&f .name ).Elem ())
}
func goTypeOf(node Node ) reflect .Type {
switch {
case node .Optional ():
return goTypeOfOptional (node )
case node .Repeated ():
return goTypeOfRepeated (node )
default :
return goTypeOfRequired (node )
}
}
func goTypeOfOptional(node Node ) reflect .Type {
return reflect .PtrTo (goTypeOfRequired (node ))
}
func goTypeOfRepeated(node Node ) reflect .Type {
return reflect .SliceOf (goTypeOfRequired (node ))
}
func goTypeOfRequired(node Node ) reflect .Type {
if node .Leaf () {
return goTypeOfLeaf (node )
} else {
return goTypeOfGroup (node )
}
}
func goTypeOfLeaf(node Node ) reflect .Type {
t := node .Type ()
if convertibleType , ok := t .(interface { GoType () reflect .Type }); ok {
return convertibleType .GoType ()
}
switch t .Kind () {
case Boolean :
return reflect .TypeOf (false )
case Int32 :
return reflect .TypeOf (int32 (0 ))
case Int64 :
return reflect .TypeOf (int64 (0 ))
case Int96 :
return reflect .TypeOf (deprecated .Int96 {})
case Float :
return reflect .TypeOf (float32 (0 ))
case Double :
return reflect .TypeOf (float64 (0 ))
case ByteArray :
return reflect .TypeOf (([]byte )(nil ))
case FixedLenByteArray :
return reflect .ArrayOf (t .Length (), reflect .TypeOf (byte (0 )))
default :
panic ("BUG: parquet type returned an unsupported kind" )
}
}
func goTypeOfGroup(node Node ) reflect .Type {
fields := node .Fields ()
structFields := make ([]reflect .StructField , len (fields ))
for i , field := range fields {
structFields [i ].Name = exportedStructFieldName (field .Name ())
structFields [i ].Type = field .GoType ()
}
return reflect .StructOf (structFields )
}
func exportedStructFieldName(name string ) string {
firstRune , size := utf8 .DecodeRuneInString (name )
return string ([]rune {unicode .ToUpper (firstRune )}) + name [size :]
}
func isList(node Node ) bool {
logicalType := node .Type ().LogicalType ()
return logicalType != nil && logicalType .List != nil
}
func isMap(node Node ) bool {
logicalType := node .Type ().LogicalType ()
return logicalType != nil && logicalType .Map != nil
}
func numLeafColumnsOf(node Node ) int16 {
return makeColumnIndex (numLeafColumns (node , 0 ))
}
func numLeafColumns(node Node , columnIndex int ) int {
if node .Leaf () {
return columnIndex + 1
}
for _ , field := range node .Fields () {
columnIndex = numLeafColumns (field , columnIndex )
}
return columnIndex
}
func listElementOf(node Node ) Node {
if !node .Leaf () {
if list := fieldByName (node , "list" ); list != nil {
if elem := fieldByName (list , "element" ); elem != nil {
return elem
}
if elem := fieldByName (list , "item" ); elem != nil {
return elem
}
}
}
panic ("node with logical type LIST is not composed of a repeated .list.element" )
}
func mapKeyValueOf(node Node ) Node {
if !node .Leaf () && (node .Required () || node .Optional ()) {
for _ , kv_name := range []string {"key_value" , "map" } {
if keyValue := fieldByName (node , kv_name ); keyValue != nil && !keyValue .Leaf () && keyValue .Repeated () {
k := fieldByName (keyValue , "key" )
v := fieldByName (keyValue , "value" )
if k != nil && v != nil && k .Required () {
return keyValue
}
}
}
}
panic ("node with logical type MAP is not composed of a repeated .key_value group (or .map group) with key and value fields" )
}
func encodingOf(node Node ) encoding .Encoding {
encoding := node .Encoding ()
if node .Type ().Kind () == ByteArray && encoding == nil {
encoding = &DeltaLengthByteArray
}
if encoding == nil {
encoding = &Plain
}
return encoding
}
func forEachNodeOf(name string , node Node , do func (string , Node )) {
do (name , node )
for _ , f := range node .Fields () {
forEachNodeOf (f .Name (), f , do )
}
}
func fieldByName(node Node , name string ) Field {
for _ , f := range node .Fields () {
if f .Name () == name {
return f
}
}
return nil
}
func nodesAreEqual(node1 , node2 Node ) bool {
if node1 .Leaf () {
return node2 .Leaf () && leafNodesAreEqual (node1 , node2 )
} else {
return !node2 .Leaf () && groupNodesAreEqual (node1 , node2 )
}
}
func typesAreEqual(type1 , type2 Type ) bool {
return type1 .Kind () == type2 .Kind () &&
type1 .Length () == type2 .Length () &&
reflect .DeepEqual (type1 .LogicalType (), type2 .LogicalType ())
}
func repetitionsAreEqual(node1 , node2 Node ) bool {
return node1 .Optional () == node2 .Optional () && node1 .Repeated () == node2 .Repeated ()
}
func leafNodesAreEqual(node1 , node2 Node ) bool {
return typesAreEqual (node1 .Type (), node2 .Type ()) && repetitionsAreEqual (node1 , node2 )
}
func groupNodesAreEqual(node1 , node2 Node ) bool {
fields1 := node1 .Fields ()
fields2 := node2 .Fields ()
if len (fields1 ) != len (fields2 ) {
return false
}
if !repetitionsAreEqual (node1 , node2 ) {
return false
}
for i := range fields1 {
f1 := fields1 [i ]
f2 := fields2 [i ]
if f1 .Name () != f2 .Name () {
return false
}
if !nodesAreEqual (f1 , f2 ) {
return false
}
}
return true
}
The pages are generated with Golds v0.8.2 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .