package parquet
import (
"crypto/rand"
"unicode/utf8"
format "github.com/apache/arrow-go/v18/parquet/internal/gen-go/parquet"
)
const (
DefaultEncryptionAlgorithm = AesGcm
MaximalAadMetadataLength int32 = 256
DefaultEncryptedFooter = true
DefaultCheckSignature = true
DefaultAllowPlaintextFiles = false
AadFileUniqueLength int32 = 8
)
type ColumnPathToDecryptionPropsMap map [string ]*ColumnDecryptionProperties
type ColumnPathToEncryptionPropsMap map [string ]*ColumnEncryptionProperties
type ColumnEncryptionProperties struct {
columnPath string
encrypted bool
encryptedWithFooterKey bool
key string
keyMetadata string
utilized bool
}
func (ce *ColumnEncryptionProperties ) ColumnPath () string {
return ce .columnPath
}
func (ce *ColumnEncryptionProperties ) IsEncrypted () bool { return ce .encrypted }
func (ce *ColumnEncryptionProperties ) IsEncryptedWithFooterKey () bool {
return ce .encryptedWithFooterKey
}
func (ce *ColumnEncryptionProperties ) Key () string { return ce .key }
func (ce *ColumnEncryptionProperties ) KeyMetadata () string { return ce .keyMetadata }
func (ce *ColumnEncryptionProperties ) WipeOutEncryptionKey () { ce .key = "" }
func (ce *ColumnEncryptionProperties ) IsUtilized () bool {
if ce .key == "" {
return false
}
return ce .utilized
}
func (ce *ColumnEncryptionProperties ) SetUtilized () {
ce .utilized = true
}
func (ce *ColumnEncryptionProperties ) Clone () *ColumnEncryptionProperties {
copy := ce .key
return NewColumnEncryptionProperties (ce .columnPath , WithKey (copy ), WithKeyMetadata (ce .keyMetadata ))
}
type colEncryptConfig struct {
key string
keyMetadata string
encrypted bool
}
type ColumnEncryptOption func (*colEncryptConfig )
func WithKey (key string ) ColumnEncryptOption {
return func (c *colEncryptConfig ) {
if key != "" {
c .key = key
}
}
}
func WithKeyMetadata (keyMeta string ) ColumnEncryptOption {
return func (c *colEncryptConfig ) {
c .keyMetadata = keyMeta
}
}
func WithKeyID (keyID string ) ColumnEncryptOption {
if !utf8 .ValidString (keyID ) {
panic ("parquet: key id should be UTF8 encoded" )
}
return WithKeyMetadata (keyID )
}
func NewColumnEncryptionProperties (name string , opts ...ColumnEncryptOption ) *ColumnEncryptionProperties {
var cfg colEncryptConfig
cfg .encrypted = true
for _ , o := range opts {
o (&cfg )
}
return &ColumnEncryptionProperties {
utilized : false ,
encrypted : cfg .encrypted ,
encryptedWithFooterKey : cfg .encrypted && cfg .key == "" ,
keyMetadata : cfg .keyMetadata ,
key : cfg .key ,
columnPath : name ,
}
}
type ColumnDecryptionProperties struct {
columnPath string
key string
utilized bool
}
func NewColumnDecryptionProperties (column string , opts ...ColumnDecryptOption ) *ColumnDecryptionProperties {
var cfg columnDecryptConfig
for _ , o := range opts {
o (&cfg )
}
return &ColumnDecryptionProperties {
columnPath : column ,
utilized : false ,
key : cfg .key ,
}
}
func (cd *ColumnDecryptionProperties ) ColumnPath () string { return cd .columnPath }
func (cd *ColumnDecryptionProperties ) Key () string { return cd .key }
func (cd *ColumnDecryptionProperties ) IsUtilized () bool { return cd .utilized }
func (cd *ColumnDecryptionProperties ) SetUtilized () { cd .utilized = true }
func (cd *ColumnDecryptionProperties ) WipeOutDecryptionKey () { cd .key = "" }
func (cd *ColumnDecryptionProperties ) Clone () *ColumnDecryptionProperties {
return NewColumnDecryptionProperties (cd .columnPath , WithDecryptKey (cd .key ))
}
type columnDecryptConfig struct {
key string
}
type ColumnDecryptOption func (*columnDecryptConfig )
func WithDecryptKey (key string ) ColumnDecryptOption {
return func (cfg *columnDecryptConfig ) {
if key != "" {
cfg .key = key
}
}
}
type AADPrefixVerifier interface {
Verify (string )
}
type DecryptionKeyRetriever interface {
GetKey (keyMetadata []byte ) string
}
type FileDecryptionProperties struct {
footerKey string
aadPrefix string
checkPlaintextFooterIntegrity bool
plaintextAllowed bool
utilized bool
columnDecryptProps ColumnPathToDecryptionPropsMap
Verifier AADPrefixVerifier
KeyRetriever DecryptionKeyRetriever
}
func NewFileDecryptionProperties (opts ...FileDecryptionOption ) *FileDecryptionProperties {
var cfg fileDecryptConfig
cfg .checkFooterIntegrity = DefaultCheckSignature
cfg .plaintextAllowed = DefaultAllowPlaintextFiles
for _ , o := range opts {
o (&cfg )
}
return &FileDecryptionProperties {
Verifier : cfg .verifier ,
footerKey : cfg .footerKey ,
checkPlaintextFooterIntegrity : cfg .checkFooterIntegrity ,
KeyRetriever : cfg .retriever ,
aadPrefix : cfg .aadPrefix ,
columnDecryptProps : cfg .colDecrypt ,
plaintextAllowed : cfg .plaintextAllowed ,
utilized : false ,
}
}
func (fd *FileDecryptionProperties ) ColumnKey (path string ) string {
if d , ok := fd .columnDecryptProps [path ]; ok {
if d != nil {
return d .Key ()
}
}
return ""
}
func (fd *FileDecryptionProperties ) FooterKey () string { return fd .footerKey }
func (fd *FileDecryptionProperties ) AadPrefix () string { return fd .aadPrefix }
func (fd *FileDecryptionProperties ) PlaintextFooterIntegrity () bool {
return fd .checkPlaintextFooterIntegrity
}
func (fd *FileDecryptionProperties ) PlaintextFilesAllowed () bool { return fd .plaintextAllowed }
func (fd *FileDecryptionProperties ) SetUtilized () { fd .utilized = true }
func (fd *FileDecryptionProperties ) IsUtilized () bool {
if fd .footerKey == "" && len (fd .columnDecryptProps ) == 0 && fd .aadPrefix == "" {
return false
}
return fd .utilized
}
func (fd *FileDecryptionProperties ) WipeOutDecryptionKeys () {
fd .footerKey = ""
for _ , cd := range fd .columnDecryptProps {
cd .WipeOutDecryptionKey ()
}
}
func (fd *FileDecryptionProperties ) Clone (newAadPrefix string ) *FileDecryptionProperties {
keyCopy := fd .footerKey
colDecryptMapCopy := make (ColumnPathToDecryptionPropsMap )
for k , v := range fd .columnDecryptProps {
colDecryptMapCopy [k ] = v .Clone ()
}
if newAadPrefix == "" {
newAadPrefix = fd .aadPrefix
}
return &FileDecryptionProperties {
footerKey : keyCopy ,
KeyRetriever : fd .KeyRetriever ,
checkPlaintextFooterIntegrity : fd .checkPlaintextFooterIntegrity ,
Verifier : fd .Verifier ,
columnDecryptProps : colDecryptMapCopy ,
aadPrefix : newAadPrefix ,
plaintextAllowed : fd .plaintextAllowed ,
utilized : false ,
}
}
type fileDecryptConfig struct {
footerKey string
aadPrefix string
verifier AADPrefixVerifier
colDecrypt ColumnPathToDecryptionPropsMap
retriever DecryptionKeyRetriever
checkFooterIntegrity bool
plaintextAllowed bool
}
type FileDecryptionOption func (*fileDecryptConfig )
func WithFooterKey (key string ) FileDecryptionOption {
return func (cfg *fileDecryptConfig ) {
if key != "" {
cfg .footerKey = key
}
}
}
func WithPrefixVerifier (verifier AADPrefixVerifier ) FileDecryptionOption {
return func (cfg *fileDecryptConfig ) {
if verifier != nil {
cfg .verifier = verifier
}
}
}
func WithColumnKeys (decrypt ColumnPathToDecryptionPropsMap ) FileDecryptionOption {
return func (cfg *fileDecryptConfig ) {
if len (decrypt ) == 0 {
return
}
if len (cfg .colDecrypt ) != 0 {
panic ("column properties already set" )
}
for _ , v := range decrypt {
if v .IsUtilized () {
panic ("parquet: column properties utilized in another file" )
}
v .SetUtilized ()
}
cfg .colDecrypt = decrypt
}
}
func WithKeyRetriever (retriever DecryptionKeyRetriever ) FileDecryptionOption {
return func (cfg *fileDecryptConfig ) {
if retriever != nil {
cfg .retriever = retriever
}
}
}
func DisableFooterSignatureVerification () FileDecryptionOption {
return func (cfg *fileDecryptConfig ) {
cfg .checkFooterIntegrity = false
}
}
func WithPlaintextAllowed () FileDecryptionOption {
return func (cfg *fileDecryptConfig ) {
cfg .plaintextAllowed = true
}
}
func WithDecryptAadPrefix (prefix string ) FileDecryptionOption {
return func (cfg *fileDecryptConfig ) {
if prefix != "" {
cfg .aadPrefix = prefix
}
}
}
type Algorithm struct {
Algo Cipher
Aad struct {
AadPrefix []byte
AadFileUnique []byte
SupplyAadPrefix bool
}
}
func (e Algorithm ) ToThrift () *format .EncryptionAlgorithm {
if e .Algo == AesGcm {
return &format .EncryptionAlgorithm {
AES_GCM_V1 : &format .AesGcmV1 {
AadPrefix : e .Aad .AadPrefix ,
AadFileUnique : e .Aad .AadFileUnique ,
SupplyAadPrefix : &e .Aad .SupplyAadPrefix ,
},
}
}
return &format .EncryptionAlgorithm {
AES_GCM_CTR_V1 : &format .AesGcmCtrV1 {
AadPrefix : e .Aad .AadPrefix ,
AadFileUnique : e .Aad .AadFileUnique ,
SupplyAadPrefix : &e .Aad .SupplyAadPrefix ,
},
}
}
func AlgorithmFromThrift (enc *format .EncryptionAlgorithm ) (ret Algorithm ) {
if enc .IsSetAES_GCM_V1 () {
ret .Algo = AesGcm
ret .Aad .AadFileUnique = enc .AES_GCM_V1 .AadFileUnique
ret .Aad .AadPrefix = enc .AES_GCM_V1 .AadPrefix
ret .Aad .SupplyAadPrefix = *enc .AES_GCM_V1 .SupplyAadPrefix
return
}
ret .Algo = AesCtr
ret .Aad .AadFileUnique = enc .AES_GCM_CTR_V1 .AadFileUnique
ret .Aad .AadPrefix = enc .AES_GCM_CTR_V1 .AadPrefix
ret .Aad .SupplyAadPrefix = *enc .AES_GCM_CTR_V1 .SupplyAadPrefix
return
}
type FileEncryptionProperties struct {
alg Algorithm
footerKey string
footerKeyMetadata string
encryptedFooter bool
fileAad string
utilized bool
storeAadPrefixInFile bool
aadPrefix string
encryptedCols ColumnPathToEncryptionPropsMap
}
func (fe *FileEncryptionProperties ) EncryptedFooter () bool { return fe .encryptedFooter }
func (fe *FileEncryptionProperties ) Algorithm () Algorithm { return fe .alg }
func (fe *FileEncryptionProperties ) FooterKey () string { return fe .footerKey }
func (fe *FileEncryptionProperties ) FooterKeyMetadata () string { return fe .footerKeyMetadata }
func (fe *FileEncryptionProperties ) FileAad () string { return fe .fileAad }
func (fe *FileEncryptionProperties ) IsUtilized () bool { return fe .utilized }
func (fe *FileEncryptionProperties ) SetUtilized () { fe .utilized = true }
func (fe *FileEncryptionProperties ) EncryptedColumns () ColumnPathToEncryptionPropsMap {
return fe .encryptedCols
}
func (fe *FileEncryptionProperties ) ColumnEncryptionProperties (path string ) *ColumnEncryptionProperties {
if len (fe .encryptedCols ) == 0 {
return NewColumnEncryptionProperties (path )
}
if c , ok := fe .encryptedCols [path ]; ok {
return c
}
return nil
}
func (fe *FileEncryptionProperties ) Clone (newAadPrefix string ) *FileEncryptionProperties {
footerKeyCopy := fe .footerKey
encryptedColsCopy := make (ColumnPathToEncryptionPropsMap )
for k , v := range fe .encryptedCols {
encryptedColsCopy [k ] = v .Clone ()
}
if newAadPrefix == "" {
newAadPrefix = fe .aadPrefix
}
opts := []EncryptOption {
WithAlg (fe .alg .Algo ), WithFooterKeyMetadata (fe .footerKeyMetadata ),
WithAadPrefix (newAadPrefix ), WithEncryptedColumns (encryptedColsCopy ),
}
if !fe .encryptedFooter {
opts = append (opts , WithPlaintextFooter ())
}
if !fe .storeAadPrefixInFile {
opts = append (opts , DisableAadPrefixStorage ())
}
return NewFileEncryptionProperties (footerKeyCopy , opts ...)
}
func (fe *FileEncryptionProperties ) WipeOutEncryptionKeys () {
fe .footerKey = ""
for _ , elem := range fe .encryptedCols {
elem .WipeOutEncryptionKey ()
}
}
type configEncrypt struct {
cipher Cipher
encryptFooter bool
keyMetadata string
aadprefix string
storeAadPrefixInFile bool
encryptedCols ColumnPathToEncryptionPropsMap
}
type EncryptOption func (*configEncrypt )
func WithPlaintextFooter () EncryptOption {
return func (cfg *configEncrypt ) {
cfg .encryptFooter = false
}
}
func WithAlg (cipher Cipher ) EncryptOption {
return func (cfg *configEncrypt ) {
cfg .cipher = cipher
}
}
func WithFooterKeyID (key string ) EncryptOption {
if !utf8 .ValidString (key ) {
panic ("parquet: footer key id should be UTF8 encoded" )
}
return WithFooterKeyMetadata (key )
}
func WithFooterKeyMetadata (keyMeta string ) EncryptOption {
return func (cfg *configEncrypt ) {
if keyMeta != "" {
cfg .keyMetadata = keyMeta
}
}
}
func WithAadPrefix (aadPrefix string ) EncryptOption {
return func (cfg *configEncrypt ) {
if aadPrefix != "" {
cfg .aadprefix = aadPrefix
cfg .storeAadPrefixInFile = true
}
}
}
func DisableAadPrefixStorage () EncryptOption {
return func (cfg *configEncrypt ) {
cfg .storeAadPrefixInFile = false
}
}
func WithEncryptedColumns (encrypted ColumnPathToEncryptionPropsMap ) EncryptOption {
none := func (*configEncrypt ) {}
if len (encrypted ) == 0 {
return none
}
return func (cfg *configEncrypt ) {
if len (cfg .encryptedCols ) != 0 {
panic ("column properties already set" )
}
for _ , v := range encrypted {
if v .IsUtilized () {
panic ("column properties utilized in another file" )
}
v .SetUtilized ()
}
cfg .encryptedCols = encrypted
}
}
func NewFileEncryptionProperties (footerKey string , opts ...EncryptOption ) *FileEncryptionProperties {
var cfg configEncrypt
cfg .cipher = DefaultEncryptionAlgorithm
cfg .encryptFooter = DefaultEncryptedFooter
for _ , o := range opts {
o (&cfg )
}
props := &FileEncryptionProperties {
footerKey : footerKey ,
footerKeyMetadata : cfg .keyMetadata ,
encryptedFooter : cfg .encryptFooter ,
aadPrefix : cfg .aadprefix ,
storeAadPrefixInFile : cfg .storeAadPrefixInFile ,
encryptedCols : cfg .encryptedCols ,
utilized : false ,
}
aadFileUnique := [AadFileUniqueLength ]uint8 {}
_ , err := rand .Read (aadFileUnique [:])
if err != nil {
panic (err )
}
supplyAadPrefix := false
if props .aadPrefix == "" {
props .fileAad = string (aadFileUnique [:])
} else {
props .fileAad = props .aadPrefix + string (aadFileUnique [:])
if !props .storeAadPrefixInFile {
supplyAadPrefix = true
}
}
props .alg .Algo = cfg .cipher
props .alg .Aad .AadFileUnique = aadFileUnique [:]
props .alg .Aad .SupplyAadPrefix = supplyAadPrefix
if cfg .aadprefix != "" && cfg .storeAadPrefixInFile {
props .alg .Aad .AadPrefix = []byte (props .aadPrefix )
}
return props
}
The pages are generated with Golds v0.8.2 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .