// Package huff0 provides fast huffman encoding as used in zstd.//// See README.md at https://github.com/klauspost/compress/tree/master/huff0 for details.
package huff0import ()const ( maxSymbolValue = 255// zstandard limits tablelog to 11, see: // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#huffman-tree-description tableLogMax = 11 tableLogDefault = 11 minTablelog = 5 huffNodesLen = 512// BlockSizeMax is maximum input size for a single block uncompressed.BlockSizeMax = 1<<18 - 1)var (// ErrIncompressible is returned when input is judged to be too hard to compress.ErrIncompressible = errors.New("input is not compressible")// ErrUseRLE is returned from the compressor when the input is a single byte value repeated.ErrUseRLE = errors.New("input is single value repeated")// ErrTooBig is return if input is too large for a single block.ErrTooBig = errors.New("input too big")// ErrMaxDecodedSizeExceeded is return if input is too large for a single block.ErrMaxDecodedSizeExceeded = errors.New("maximum output size exceeded"))typeReusePolicyuint8const (// ReusePolicyAllow will allow reuse if it produces smaller output.ReusePolicyAllowReusePolicy = iota// ReusePolicyPrefer will re-use aggressively if possible. // This will not check if a new table will produce smaller output, // except if the current table is impossible to use or // compressed output is bigger than input.ReusePolicyPrefer// ReusePolicyNone will disable re-use of tables. // This is slightly faster than ReusePolicyAllow but may produce larger output.ReusePolicyNone// ReusePolicyMust must allow reuse and produce smaller output.ReusePolicyMust)typeScratchstruct { count [maxSymbolValue + 1]uint32// Per block parameters. // These can be used to override compression parameters of the block. // Do not touch, unless you know what you are doing.// Out is output buffer. // If the scratch is re-used before the caller is done processing the output, // set this field to nil. // Otherwise the output buffer will be re-used for next Compression/Decompression step // and allocation will be avoided. Out []byte// OutTable will contain the table data only, if a new table has been generated. // Slice of the returned data. OutTable []byte// OutData will contain the compressed data. // Slice of the returned data. OutData []byte// MaxDecodedSize will set the maximum allowed output size. // This value will automatically be set to BlockSizeMax if not set. // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded. MaxDecodedSize int srcLen int// MaxSymbolValue will override the maximum symbol value of the next block. MaxSymbolValue uint8// TableLog will attempt to override the tablelog for the next block. // Must be <= 11 and >= 5. TableLog uint8// Reuse will specify the reuse policy Reuse ReusePolicy// WantLogLess allows to specify a log 2 reduction that should at least be achieved, // otherwise the block will be returned as incompressible. // The reduction should then at least be (input size >> WantLogLess) // If WantLogLess == 0 any improvement will do. WantLogLess uint8 symbolLen uint16// Length of active part of the symbol table. maxCount int// count of the most probable symbol clearCount bool// clear count actualTableLog uint8// Selected tablelog. prevTableLog uint8// Tablelog for previous table prevTable cTable// Table used for previous compression. cTable cTable// compression table dt dTable// decompression table nodes []nodeElt tmpOut [4][]byte fse *fse.Scratch decPool sync.Pool// *[4][256]byte buffers. huffWeight [maxSymbolValue + 1]byte}// TransferCTable will transfer the previously used compression table.func ( *Scratch) ( *Scratch) {ifcap(.prevTable) < len(.prevTable) { .prevTable = make(cTable, 0, maxSymbolValue+1) } .prevTable = .prevTable[:len(.prevTable)]copy(.prevTable, .prevTable) .prevTableLog = .prevTableLog}func ( *Scratch) ( []byte) (*Scratch, error) {iflen() > BlockSizeMax {returnnil, ErrTooBig }if == nil { = &Scratch{} }if .MaxSymbolValue == 0 { .MaxSymbolValue = maxSymbolValue }if .TableLog == 0 { .TableLog = tableLogDefault }if .TableLog > tableLogMax || .TableLog < minTablelog {returnnil, fmt.Errorf(" invalid tableLog %d (%d -> %d)", .TableLog, minTablelog, tableLogMax) }if .MaxDecodedSize <= 0 || .MaxDecodedSize > BlockSizeMax { .MaxDecodedSize = BlockSizeMax }if .clearCount && .maxCount == 0 {for := range .count { .count[] = 0 } .clearCount = false }ifcap(.Out) == 0 { .Out = make([]byte, 0, len()) } .Out = .Out[:0] .OutTable = nil .OutData = nilifcap(.nodes) < huffNodesLen+1 { .nodes = make([]nodeElt, 0, huffNodesLen+1) } .nodes = .nodes[:0]if .fse == nil { .fse = &fse.Scratch{} } .srcLen = len()return , nil}type cTable []cTableEntryfunc ( cTable) ( *Scratch) error {var (// precomputed conversion table [tableLogMax + 1]byte = .actualTableLog// last weight is not saved. = uint8(.symbolLen - 1) = .huffWeight[:256] )const ( = 6 )// convert to weight [0] = 0for := uint8(1); < +1; ++ { [] = + 1 - }// Acquire histogram for FSE. := .fse.Histogram() = [:256]for := range [:16] { [] = 0 }for := uint8(0); < ; ++ { := [[].nBits] & 15 [] = []++ }// FSE compress if feasible.if >= 2 { := uint32(0) := uint8(0)for , := range [:16] {if == 0 {continue } = byte()if > { = } } .fse.HistogramFinished(, int()) .fse.TableLog = , := fse.Compress([:], .fse)if == nil && len() < int(.symbolLen>>1) { .Out = append(.Out, uint8(len())) .Out = append(.Out, ...)returnnil }// Unable to compress (RLE/uncompressible) }// write raw values as 4-bits (max : 15)if > (256 - 128) {// should not happen : likely means source cannot be compressedreturnErrIncompressible } := .Out// special case, pack weights 4 bits/weight. = append(, 128|(-1))// be sure it doesn't cause msan issue in final combination [] = 0for := uint16(0); < uint16(); += 2 { = append(, ([]<<4)|[+1]) } .Out = returnnil}func ( cTable) ( *Scratch) ( int, error) {var (// precomputed conversion table [tableLogMax + 1]byte = .actualTableLog// last weight is not saved. = uint8(.symbolLen - 1) = .huffWeight[:256] )const ( = 6 )// convert to weight [0] = 0for := uint8(1); < +1; ++ { [] = + 1 - }// Acquire histogram for FSE. := .fse.Histogram() = [:256]for := range [:16] { [] = 0 }for := uint8(0); < ; ++ { := [[].nBits] & 15 [] = []++ }// FSE compress if feasible.if >= 2 { := uint32(0) := uint8(0)for , := range [:16] {if == 0 {continue } = byte()if > { = } } .fse.HistogramFinished(, int()) .fse.TableLog = , := fse.Compress([:], .fse)if == nil && len() < int(.symbolLen>>1) { += 1 + len()return , nil }// Unable to compress (RLE/uncompressible) }// write raw values as 4-bits (max : 15)if > (256 - 128) {// should not happen : likely means source cannot be compressedreturn0, ErrIncompressible }// special case, pack weights 4 bits/weight. += 1 + int(/2)return , nil}// estimateSize returns the estimated size in bytes of the input represented in the// histogram supplied.func ( cTable) ( []uint32) int { := uint32(7)for , := range [:len()] { += uint32(.nBits) * [] }returnint( >> 3)}// minSize returns the minimum possible size considering the shannon limit.func ( *Scratch) ( int) int { := float64(7) := float64()for , := range .count[:.symbolLen] { := float64()if > 0 { += math.Log2(/) * } }returnint() >> 3}func highBit32( uint32) ( uint32) {returnuint32(bits.Len32() - 1)}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.