// Package huff0 provides fast huffman encoding as used in zstd. // // See README.md at https://github.com/klauspost/compress/tree/master/huff0 for details.
package huff0 import ( ) const ( maxSymbolValue = 255 // zstandard limits tablelog to 11, see: // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#huffman-tree-description tableLogMax = 11 tableLogDefault = 11 minTablelog = 5 huffNodesLen = 512 // BlockSizeMax is maximum input size for a single block uncompressed. BlockSizeMax = 1<<18 - 1 ) var ( // ErrIncompressible is returned when input is judged to be too hard to compress. ErrIncompressible = errors.New("input is not compressible") // ErrUseRLE is returned from the compressor when the input is a single byte value repeated. ErrUseRLE = errors.New("input is single value repeated") // ErrTooBig is return if input is too large for a single block. ErrTooBig = errors.New("input too big") // ErrMaxDecodedSizeExceeded is return if input is too large for a single block. ErrMaxDecodedSizeExceeded = errors.New("maximum output size exceeded") ) type ReusePolicy uint8 const ( // ReusePolicyAllow will allow reuse if it produces smaller output. ReusePolicyAllow ReusePolicy = iota // ReusePolicyPrefer will re-use aggressively if possible. // This will not check if a new table will produce smaller output, // except if the current table is impossible to use or // compressed output is bigger than input. ReusePolicyPrefer // ReusePolicyNone will disable re-use of tables. // This is slightly faster than ReusePolicyAllow but may produce larger output. ReusePolicyNone // ReusePolicyMust must allow reuse and produce smaller output. ReusePolicyMust ) type Scratch struct { count [maxSymbolValue + 1]uint32 // Per block parameters. // These can be used to override compression parameters of the block. // Do not touch, unless you know what you are doing. // Out is output buffer. // If the scratch is re-used before the caller is done processing the output, // set this field to nil. // Otherwise the output buffer will be re-used for next Compression/Decompression step // and allocation will be avoided. Out []byte // OutTable will contain the table data only, if a new table has been generated. // Slice of the returned data. OutTable []byte // OutData will contain the compressed data. // Slice of the returned data. OutData []byte // MaxDecodedSize will set the maximum allowed output size. // This value will automatically be set to BlockSizeMax if not set. // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded. MaxDecodedSize int srcLen int // MaxSymbolValue will override the maximum symbol value of the next block. MaxSymbolValue uint8 // TableLog will attempt to override the tablelog for the next block. // Must be <= 11 and >= 5. TableLog uint8 // Reuse will specify the reuse policy Reuse ReusePolicy // WantLogLess allows to specify a log 2 reduction that should at least be achieved, // otherwise the block will be returned as incompressible. // The reduction should then at least be (input size >> WantLogLess) // If WantLogLess == 0 any improvement will do. WantLogLess uint8 symbolLen uint16 // Length of active part of the symbol table. maxCount int // count of the most probable symbol clearCount bool // clear count actualTableLog uint8 // Selected tablelog. prevTableLog uint8 // Tablelog for previous table prevTable cTable // Table used for previous compression. cTable cTable // compression table dt dTable // decompression table nodes []nodeElt tmpOut [4][]byte fse *fse.Scratch decPool sync.Pool // *[4][256]byte buffers. huffWeight [maxSymbolValue + 1]byte } // TransferCTable will transfer the previously used compression table. func ( *Scratch) ( *Scratch) { if cap(.prevTable) < len(.prevTable) { .prevTable = make(cTable, 0, maxSymbolValue+1) } .prevTable = .prevTable[:len(.prevTable)] copy(.prevTable, .prevTable) .prevTableLog = .prevTableLog } func ( *Scratch) ( []byte) (*Scratch, error) { if len() > BlockSizeMax { return nil, ErrTooBig } if == nil { = &Scratch{} } if .MaxSymbolValue == 0 { .MaxSymbolValue = maxSymbolValue } if .TableLog == 0 { .TableLog = tableLogDefault } if .TableLog > tableLogMax || .TableLog < minTablelog { return nil, fmt.Errorf(" invalid tableLog %d (%d -> %d)", .TableLog, minTablelog, tableLogMax) } if .MaxDecodedSize <= 0 || .MaxDecodedSize > BlockSizeMax { .MaxDecodedSize = BlockSizeMax } if .clearCount && .maxCount == 0 { for := range .count { .count[] = 0 } .clearCount = false } if cap(.Out) == 0 { .Out = make([]byte, 0, len()) } .Out = .Out[:0] .OutTable = nil .OutData = nil if cap(.nodes) < huffNodesLen+1 { .nodes = make([]nodeElt, 0, huffNodesLen+1) } .nodes = .nodes[:0] if .fse == nil { .fse = &fse.Scratch{} } .srcLen = len() return , nil } type cTable []cTableEntry func ( cTable) ( *Scratch) error { var ( // precomputed conversion table [tableLogMax + 1]byte = .actualTableLog // last weight is not saved. = uint8(.symbolLen - 1) = .huffWeight[:256] ) const ( = 6 ) // convert to weight [0] = 0 for := uint8(1); < +1; ++ { [] = + 1 - } // Acquire histogram for FSE. := .fse.Histogram() = [:256] for := range [:16] { [] = 0 } for := uint8(0); < ; ++ { := [[].nBits] & 15 [] = []++ } // FSE compress if feasible. if >= 2 { := uint32(0) := uint8(0) for , := range [:16] { if == 0 { continue } = byte() if > { = } } .fse.HistogramFinished(, int()) .fse.TableLog = , := fse.Compress([:], .fse) if == nil && len() < int(.symbolLen>>1) { .Out = append(.Out, uint8(len())) .Out = append(.Out, ...) return nil } // Unable to compress (RLE/uncompressible) } // write raw values as 4-bits (max : 15) if > (256 - 128) { // should not happen : likely means source cannot be compressed return ErrIncompressible } := .Out // special case, pack weights 4 bits/weight. = append(, 128|(-1)) // be sure it doesn't cause msan issue in final combination [] = 0 for := uint16(0); < uint16(); += 2 { = append(, ([]<<4)|[+1]) } .Out = return nil } func ( cTable) ( *Scratch) ( int, error) { var ( // precomputed conversion table [tableLogMax + 1]byte = .actualTableLog // last weight is not saved. = uint8(.symbolLen - 1) = .huffWeight[:256] ) const ( = 6 ) // convert to weight [0] = 0 for := uint8(1); < +1; ++ { [] = + 1 - } // Acquire histogram for FSE. := .fse.Histogram() = [:256] for := range [:16] { [] = 0 } for := uint8(0); < ; ++ { := [[].nBits] & 15 [] = []++ } // FSE compress if feasible. if >= 2 { := uint32(0) := uint8(0) for , := range [:16] { if == 0 { continue } = byte() if > { = } } .fse.HistogramFinished(, int()) .fse.TableLog = , := fse.Compress([:], .fse) if == nil && len() < int(.symbolLen>>1) { += 1 + len() return , nil } // Unable to compress (RLE/uncompressible) } // write raw values as 4-bits (max : 15) if > (256 - 128) { // should not happen : likely means source cannot be compressed return 0, ErrIncompressible } // special case, pack weights 4 bits/weight. += 1 + int(/2) return , nil } // estimateSize returns the estimated size in bytes of the input represented in the // histogram supplied. func ( cTable) ( []uint32) int { := uint32(7) for , := range [:len()] { += uint32(.nBits) * [] } return int( >> 3) } // minSize returns the minimum possible size considering the shannon limit. func ( *Scratch) ( int) int { := float64(7) := float64() for , := range .count[:.symbolLen] { := float64() if > 0 { += math.Log2(/) * } } return int() >> 3 } func highBit32( uint32) ( uint32) { return uint32(bits.Len32() - 1) }