Source File
compress.go
Belonging Package
github.com/apache/arrow-go/v18/parquet/compress
// Licensed to the Apache Software Foundation (ASF) under one// or more contributor license agreements. See the NOTICE file// distributed with this work for additional information// regarding copyright ownership. The ASF licenses this file// to you under the Apache License, Version 2.0 (the// "License"); you may not use this file except in compliance// with the License. You may obtain a copy of the License at//// http://www.apache.org/licenses/LICENSE-2.0//// Unless required by applicable law or agreed to in writing, software// distributed under the License is distributed on an "AS IS" BASIS,// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.// See the License for the specific language governing permissions and// limitations under the License.// Package compress contains the interfaces and implementations for handling compression/decompression// of parquet data at the column levels.package compressimport ()// Compression is an alias to the thrift compression codec enum type for easy usetype Compression parquet.CompressionCodecfunc ( Compression) () string {return parquet.CompressionCodec().String()}func ( Compression) () ([]byte, error) {return parquet.CompressionCodec().MarshalText()}func ( *Compression) ( []byte) error {return (*parquet.CompressionCodec)().UnmarshalText()}// DefaultCompressionLevel will use flate.DefaultCompression since many of the compression libraries// use that to denote "use the default".const DefaultCompressionLevel = flate.DefaultCompression// Codecs is a useful struct to provide namespaced enum values to use for specifying the compression type to use// which make for easy internal swapping between them and the thrift enum since they are initialized to the same// constant values.var Codecs = struct {Uncompressed CompressionSnappy CompressionGzip Compression// LZO is unsupported in this library since LZO license is incompatible with Apache LicenseLzo CompressionBrotli Compression// LZ4 unsupported in this library due to problematic issues between the Hadoop LZ4 spec vs regular lz4// see: http://mail-archives.apache.org/mod_mbox/arrow-dev/202007.mbox/%3CCAAri41v24xuA8MGHLDvgSnE+7AAgOhiEukemW_oPNHMvfMmrWw@mail.gmail.com%3ELz4 CompressionZstd CompressionLz4Raw Compression}{Uncompressed: Compression(parquet.CompressionCodec_UNCOMPRESSED),Snappy: Compression(parquet.CompressionCodec_SNAPPY),Gzip: Compression(parquet.CompressionCodec_GZIP),Lzo: Compression(parquet.CompressionCodec_LZO),Brotli: Compression(parquet.CompressionCodec_BROTLI),Lz4: Compression(parquet.CompressionCodec_LZ4),Zstd: Compression(parquet.CompressionCodec_ZSTD),Lz4Raw: Compression(parquet.CompressionCodec_LZ4_RAW),}// Codec is an interface which is implemented for each compression type in order to make the interactions easy to// implement. Most consumers won't be calling GetCodec directly.type Codec interface {// Encode encodes a block of data given by src and returns the compressed block. dst should be either nil// or sized large enough to fit the compressed block (use CompressBound to allocate). dst and src should not// overlap since some of the compression types don't allow it.//// The returned slice will be one of the following:// 1. If dst was nil or dst was too small to fit the compressed data, it will be a newly allocated slice// 2. If dst was large enough to fit the compressed data (depending on the compression algorithm it might// be required to be at least CompressBound length) then it might be a slice of dst.Encode(dst, src []byte) []byte// EncodeLevel is like Encode, but specifies a particular encoding level instead of the default.EncodeLevel(dst, src []byte, level int) []byte// CompressBound returns the boundary of maximum size of compressed data under the chosen codec.CompressBound(int64) int64// Decode is for decoding a single block rather than a stream, like with Encode, dst must be either nil or// sized large enough to accommodate the uncompressed data and should not overlap with src.//// the returned slice *might* be a slice of dst.Decode(dst, src []byte) []byte}// StreamingCodec is an interface that may be implemented for compression codecs that expose a streaming API.type StreamingCodec interface {// NewReader provides a reader that wraps a stream with compressed data to stream the uncompressed dataNewReader(io.Reader) io.ReadCloser// NewWriter provides a wrapper around a write stream to compress data before writing it.NewWriter(io.Writer) io.WriteCloser// NewWriterLevel is like NewWriter but allows specifying the compression levelNewWriterLevel(io.Writer, int) (io.WriteCloser, error)}var codecs = map[Compression]Codec{}// RegisterCodec adds or overrides a codec implementation for a given compression algorithm.// The intended use case is within the init() section of a package. For example,//// // inside a custom codec package, say czstd//// func init() {// RegisterCodec(compress.Codecs.Zstd, czstdCodec{})// }//// type czstdCodec struct{} // implementing Codec interface using CGO based ZSTD wrapper//// And user of the custom codec can import the above package like below,//// package main//// import _ "package/path/to/czstd"func ( Compression, Codec) {codecs[] =}type nocodec struct{}func (nocodec) ( io.Reader) io.ReadCloser {, := .(io.ReadCloser)if ! {return io.NopCloser()}return}func (nocodec) (, []byte) []byte {if != nil {copy(, )}return}type writerNopCloser struct {io.Writer}func (writerNopCloser) () error {return nil}func (nocodec) (, []byte) []byte {copy(, )return}func (nocodec) (, []byte, int) []byte {copy(, )return}func (nocodec) ( io.Writer) io.WriteCloser {, := .(io.WriteCloser)if ! {return writerNopCloser{}}return}func ( nocodec) ( io.Writer, int) (io.WriteCloser, error) {return .NewWriter(), nil}func (nocodec) ( int64) int64 { return }func init() {codecs[Codecs.Uncompressed] = nocodec{}}// GetCodec returns a Codec interface for the requested Compression typefunc ( Compression) (Codec, error) {, := codecs[]if ! {return nil, fmt.Errorf("compression for %s unimplemented", .String())}return , nil}
![]() |
The pages are generated with Golds v0.8.2. (GOOS=linux GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds. |