// Copyright 2009 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.
// Package flate implements the DEFLATE compressed data format, described in// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file// formats.
package flateimport ()const ( maxCodeLen = 16// max length of Huffman code maxCodeLenMask = 15// mask for max length of Huffman code// The next three numbers come from the RFC section 3.2.7, with the // additional proviso in section 3.2.5 which implies that distance codes // 30 and 31 should never occur in compressed data. maxNumLit = 286 maxNumDist = 30 numCodes = 19// number of codes in Huffman meta-code debugDecode = false)// Value of length - 3 and extra bits.type lengthExtra struct { length, extra uint8}var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}}var bitMask32 = [32]uint32{0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,} // up to 32 bits// Initialize the fixedHuffmanDecoder only once upon first use.var fixedOnce sync.Oncevar fixedHuffmanDecoder huffmanDecoder// A CorruptInputError reports the presence of corrupt input at a given offset.typeCorruptInputError = flate.CorruptInputError// An InternalError reports an error in the flate code itself.typeInternalErrorstringfunc ( InternalError) () string { return"flate: internal error: " + string() }// A ReadError reports an error encountered while reading input.//// Deprecated: No longer returned.typeReadError = flate.ReadError// A WriteError reports an error encountered while writing output.//// Deprecated: No longer returned.typeWriteError = flate.WriteError// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to// to switch to a new underlying Reader. This permits reusing a ReadCloser// instead of allocating a new one.typeResetterinterface {// Reset discards any buffered data and resets the Resetter as if it was // newly initialized with the given reader.Reset(r io.Reader, dict []byte) error}// The data structure for decoding Huffman tables is based on that of// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),// For codes smaller than the table width, there are multiple entries// (each combination of trailing bits has the same value). For codes// larger than the table width, the table contains a link to an overflow// table. The width of each entry in the link table is the maximum code// size minus the chunk width.//// Note that you can do a lookup in the table even without all bits// filled. Since the extra bits are zero, and the DEFLATE Huffman codes// have the property that shorter codes come before longer ones, the// bit length estimate in the result is a lower bound on the actual// number of bits.//// See the following:// http://www.gzip.org/algorithm.txt// chunk & 15 is number of bits// chunk >> 4 is value, including table linkconst ( huffmanChunkBits = 9 huffmanNumChunks = 1 << huffmanChunkBits huffmanCountMask = 15 huffmanValueShift = 4)type huffmanDecoder struct { maxRead int// the maximum number of bits we can read and not overread chunks *[huffmanNumChunks]uint16// chunks as described above links [][]uint16// overflow links linkMask uint32// mask the width of the link table}// Initialize Huffman decoding tables from array of code lengths.// Following this function, h is guaranteed to be initialized into a complete// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a// degenerate case where the tree has only a single symbol with length 1. Empty// trees are permitted.func ( *huffmanDecoder) ( []int) bool {// Sanity enables additional runtime tests during Huffman // table construction. It's intended to be used during // development to supplement the currently ad-hoc unit tests.const = falseif .chunks == nil { .chunks = new([huffmanNumChunks]uint16) }if .maxRead != 0 { * = huffmanDecoder{chunks: .chunks, links: .links} }// Count number of codes of each length, // compute maxRead and max length.var [maxCodeLen]intvar , intfor , := range {if == 0 {continue }if == 0 || < { = }if > { = } [&maxCodeLenMask]++ }// Empty tree. The decompressor.huffSym function will fail later if the tree // is used. Technically, an empty tree is only valid for the HDIST tree and // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree // is guaranteed to fail since it will attempt to use the tree to decode the // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is // guaranteed to fail later since the compressed data section must be // composed of at least one symbol (the end-of-block marker).if == 0 {returntrue } := 0var [maxCodeLen]intfor := ; <= ; ++ { <<= 1 [&maxCodeLenMask] = += [&maxCodeLenMask] }// Check that the coding is complete (i.e., that we've // assigned all 2-to-the-max possible bit sequences). // Exception: To be compatible with zlib, we also need to // accept degenerate single-code codings. See also // TestDegenerateHuffmanCoding.if != 1<<uint() && !( == 1 && == 1) {ifdebugDecode {fmt.Println("coding failed, code, max:", , , == 1<<uint(), == 1 && == 1, "(one should be true)") }returnfalse } .maxRead = := .chunks[:]for := range { [] = 0 }if > huffmanChunkBits { := 1 << (uint() - huffmanChunkBits) .linkMask = uint32( - 1)// create link tables := [huffmanChunkBits+1] >> 1ifcap(.links) < huffmanNumChunks- { .links = make([][]uint16, huffmanNumChunks-) } else { .links = .links[:huffmanNumChunks-] }for := uint(); < huffmanNumChunks; ++ { := int(bits.Reverse16(uint16())) >>= uint(16 - huffmanChunkBits) := - uint()if && .chunks[] != 0 {panic("impossible: overwriting existing chunk") } .chunks[] = uint16(<<huffmanValueShift | (huffmanChunkBits + 1))ifcap(.links[]) < { .links[] = make([]uint16, ) } else { .links[] = .links[][:] } } } else { .links = .links[:0] }for , := range {if == 0 {continue } := [] []++ := uint16(<<huffmanValueShift | ) := int(bits.Reverse16(uint16())) >>= uint(16 - )if <= huffmanChunkBits {for := ; < len(.chunks); += 1 << uint() {// We should never need to overwrite // an existing chunk. Also, 0 is // never a valid chunk, because the // lower 4 "count" bits should be // between 1 and 15.if && .chunks[] != 0 {panic("impossible: overwriting existing chunk") } .chunks[] = } } else { := & (huffmanNumChunks - 1)if && .chunks[]&huffmanCountMask != huffmanChunkBits+1 {// Longer codes should have been // associated with a link table above.panic("impossible: not an indirect chunk") } := .chunks[] >> huffmanValueShift := .links[] >>= huffmanChunkBitsfor := ; < len(); += 1 << uint(-huffmanChunkBits) {if && [] != 0 {panic("impossible: overwriting existing chunk") } [] = } } }if {// Above we've sanity checked that we never overwrote // an existing entry. Here we additionally check that // we filled the tables completely.for , := range .chunks {if == 0 {// As an exception, in the degenerate // single-code case, we allow odd // chunks to be missing.if == 1 && %2 == 1 {continue }panic("impossible: missing chunk") } }for , := range .links {for , := range {if == 0 {panic("impossible: missing chunk") } } } }returntrue}// Reader is the actual read interface needed by NewReader.// If the passed in io.Reader does not also have ReadByte,// the NewReader will introduce its own buffering.typeReaderinterface {io.Readerio.ByteReader}type step uint8const ( copyData step = iota + 1 nextBlock huffmanBytesBuffer huffmanBytesReader huffmanBufioReader huffmanStringsReader huffmanGenericReader)// flushMode tells decompressor when to return datatype flushMode uint8const ( syncFlush flushMode = iota// return data after sync flush block partialFlush // return data after each block)// Decompress state.type decompressor struct {// Input source. r Reader roffset int64// Huffman decoders for literal/length, distance. h1, h2 huffmanDecoder// Length arrays used to define Huffman codes. bits *[maxNumLit + maxNumDist]int codebits *[numCodes]int// Output history, buffer. dict dictDecoder// Next step in the decompression, // and decompression state. step step stepState int err error toRead []byte hl, hd *huffmanDecoder copyLen int copyDist int// Temporary buffer (avoids repeated allocation). buf [4]byte// Input bits, in top of b. b uint32 nb uint final bool flushMode flushMode}func ( *decompressor) () {for .nb < 1+2 {if .err = .moreBits(); .err != nil {return } } .final = .b&1 == 1 .b >>= 1 := .b & 3 .b >>= 2 .nb -= 1 + 2switch {case0: .dataBlock()ifdebugDecode {fmt.Println("stored block") }case1:// compressed, fixed Huffman tables .hl = &fixedHuffmanDecoder .hd = nil .huffmanBlockDecoder()ifdebugDecode {fmt.Println("predefinied huffman block") }case2:// compressed, dynamic Huffman tablesif .err = .readHuffman(); .err != nil {break } .hl = &.h1 .hd = &.h2 .huffmanBlockDecoder()ifdebugDecode {fmt.Println("dynamic huffman block") }default:// 3 is reserved.ifdebugDecode {fmt.Println("reserved data block encountered") } .err = CorruptInputError(.roffset) }}func ( *decompressor) ( []byte) (int, error) {for {iflen(.toRead) > 0 { := copy(, .toRead) .toRead = .toRead[:]iflen(.toRead) == 0 {return , .err }return , nil }if .err != nil {return0, .err } .doStep()if .err != nil && len(.toRead) == 0 { .toRead = .dict.readFlush() // Flush what's left in case of error } }}// WriteTo implements the io.WriteTo interface for io.Copy and friends.func ( *decompressor) ( io.Writer) (int64, error) { := int64(0) := falsefor {iflen(.toRead) > 0 { , := .Write(.toRead) += int64()if != nil { .err = return , }if != len(.toRead) {return , io.ErrShortWrite } .toRead = .toRead[:0] }if .err != nil && {if .err == io.EOF {return , nil }return , .err }if .err == nil { .doStep() }iflen(.toRead) == 0 && .err != nil && ! { .toRead = .dict.readFlush() // Flush what's left in case of error = true } }}func ( *decompressor) () error {if .err == io.EOF {returnnil }return .err}// RFC 1951 section 3.2.7.// Compression with dynamic Huffman codesvar codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}func ( *decompressor) () error {// HLIT[5], HDIST[5], HCLEN[4].for .nb < 5+5+4 {if := .moreBits(); != nil {return } } := int(.b&0x1F) + 257if > maxNumLit {ifdebugDecode {fmt.Println("nlit > maxNumLit", ) }returnCorruptInputError(.roffset) } .b >>= 5 := int(.b&0x1F) + 1if > maxNumDist {ifdebugDecode {fmt.Println("ndist > maxNumDist", ) }returnCorruptInputError(.roffset) } .b >>= 5 := int(.b&0xF) + 4// numCodes is 19, so nclen is always valid. .b >>= 4 .nb -= 5 + 5 + 4// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.for := 0; < ; ++ {for .nb < 3 {if := .moreBits(); != nil {return } } .codebits[codeOrder[]] = int(.b & 0x7) .b >>= 3 .nb -= 3 }for := ; < len(codeOrder); ++ { .codebits[codeOrder[]] = 0 }if !.h1.init(.codebits[0:]) {ifdebugDecode {fmt.Println("init codebits failed") }returnCorruptInputError(.roffset) }// HLIT + 257 code lengths, HDIST + 1 code lengths, // using the code length Huffman code.for , := 0, +; < ; { , := .huffSym(&.h1)if != nil {return }if < 16 {// Actual length. .bits[] = ++continue }// Repeat previous length or zero.varintvaruintvarintswitch {default:returnInternalError("unexpected length code")case16: = 3 = 2if == 0 {ifdebugDecode {fmt.Println("i==0") }returnCorruptInputError(.roffset) } = .bits[-1]case17: = 3 = 3 = 0case18: = 11 = 7 = 0 }for .nb < {if := .moreBits(); != nil {ifdebugDecode {fmt.Println("morebits:", ) }return } } += int(.b & uint32(1<<(®SizeMaskUint32)-1)) .b >>= & regSizeMaskUint32 .nb -= if + > {ifdebugDecode {fmt.Println("i+rep > n", , , ) }returnCorruptInputError(.roffset) }for := 0; < ; ++ { .bits[] = ++ } }if !.h1.init(.bits[0:]) || !.h2.init(.bits[:+]) {ifdebugDecode {fmt.Println("init2 failed") }returnCorruptInputError(.roffset) }// As an optimization, we can initialize the maxRead bits to read at a time // for the HLIT tree to the length of the EOB marker since we know that // every block must terminate with one. This preserves the property that // we never read any extra bytes after the end of the DEFLATE stream.if .h1.maxRead < .bits[endBlockMarker] { .h1.maxRead = .bits[endBlockMarker] }if !.final {// If not the final block, the smallest block possible is // a predefined table, BTYPE=01, with a single EOB marker. // This will take up 3 + 7 bits. .h1.maxRead += 10 }returnnil}// Copy a single uncompressed data block from input to output.func ( *decompressor) () {// Uncompressed. // Discard current half-byte. := (.nb) & 7 .nb -= .b >>= := .nb >> 3// Unfilled values will be overwritten. .buf[0] = uint8(.b) .buf[1] = uint8(.b >> 8) .buf[2] = uint8(.b >> 16) .buf[3] = uint8(.b >> 24) .roffset += int64() .nb, .b = 0, 0// Length then ones-complement of length. , := io.ReadFull(.r, .buf[:4]) .roffset += int64()if != nil { .err = noEOF()return } := uint16(.buf[0]) | uint16(.buf[1])<<8 := uint16(.buf[2]) | uint16(.buf[3])<<8if != ^ {ifdebugDecode { := ^fmt.Println("uint16(nn) != uint16(^n)", , ) } .err = CorruptInputError(.roffset)return }if == 0 {if .flushMode == syncFlush { .toRead = .dict.readFlush() } .finishBlock()return } .copyLen = int() .copyData()}// copyData copies f.copyLen bytes from the underlying reader into f.hist.// It pauses for reads when f.hist is full.func ( *decompressor) () { := .dict.writeSlice()iflen() > .copyLen { = [:.copyLen] } , := io.ReadFull(.r, ) .roffset += int64() .copyLen -= .dict.writeMark()if != nil { .err = noEOF()return }if .dict.availWrite() == 0 || .copyLen > 0 { .toRead = .dict.readFlush() .step = copyDatareturn } .finishBlock()}func ( *decompressor) () {if .final {if .dict.availRead() > 0 { .toRead = .dict.readFlush() } .err = io.EOF } elseif .flushMode == partialFlush && .dict.availRead() > 0 { .toRead = .dict.readFlush() } .step = nextBlock}func ( *decompressor) () {switch .step {casecopyData: .copyData()casenextBlock: .nextBlock()casehuffmanBytesBuffer: .huffmanBytesBuffer()casehuffmanBytesReader: .huffmanBytesReader()casehuffmanBufioReader: .huffmanBufioReader()casehuffmanStringsReader: .huffmanStringsReader()casehuffmanGenericReader: .huffmanGenericReader()default:panic("BUG: unexpected step state") }}// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.func noEOF( error) error {if == io.EOF {returnio.ErrUnexpectedEOF }return}func ( *decompressor) () error { , := .r.ReadByte()if != nil {returnnoEOF() } .roffset++ .b |= uint32() << (.nb & regSizeMaskUint32) .nb += 8returnnil}// Read the next Huffman-encoded symbol from f according to h.func ( *decompressor) ( *huffmanDecoder) (int, error) {// Since a huffmanDecoder can be empty or be composed of a degenerate tree // with single element, huffSym must error on these two edge cases. In both // cases, the chunks slice will be 0 for the invalid sequence, leading it // satisfy the n == 0 check below. := uint(.maxRead)// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. , := .nb, .bfor {for < { , := .r.ReadByte()if != nil { .b = .nb = return0, noEOF() } .roffset++ |= uint32() << ( & regSizeMaskUint32) += 8 } := .chunks[&(huffmanNumChunks-1)] = uint( & huffmanCountMask)if > huffmanChunkBits { = .links[>>huffmanValueShift][(>>huffmanChunkBits)&.linkMask] = uint( & huffmanCountMask) }if <= {if == 0 { .b = .nb = ifdebugDecode {fmt.Println("huffsym: n==0") } .err = CorruptInputError(.roffset)return0, .err } .b = >> ( & regSizeMaskUint32) .nb = - returnint( >> huffmanValueShift), nil } }}func makeReader( io.Reader) Reader {if , := .(Reader); {return }returnbufio.NewReader()}func fixedHuffmanDecoderInit() {fixedOnce.Do(func() {// These come from the RFC section 3.2.6.var [288]intfor := 0; < 144; ++ { [] = 8 }for := 144; < 256; ++ { [] = 9 }for := 256; < 280; ++ { [] = 7 }for := 280; < 288; ++ { [] = 8 }fixedHuffmanDecoder.init([:]) })}func ( *decompressor) ( io.Reader, []byte) error { * = decompressor{r: makeReader(),bits: .bits,codebits: .codebits,h1: .h1,h2: .h2,dict: .dict,step: nextBlock, } .dict.init(maxMatchOffset, )returnnil}typeReaderOptfunc(*decompressor)// WithPartialBlock tells decompressor to return after each block,// so it can read data written with partial flushfunc () ReaderOpt {returnfunc( *decompressor) { .flushMode = partialFlush }}// WithDict initializes the reader with a preset dictionaryfunc ( []byte) ReaderOpt {returnfunc( *decompressor) { .dict.init(maxMatchOffset, ) }}// NewReaderOpts returns new reader with provided optionsfunc ( io.Reader, ...ReaderOpt) io.ReadCloser {fixedHuffmanDecoderInit()vardecompressor .r = makeReader() .bits = new([maxNumLit + maxNumDist]int) .codebits = new([numCodes]int) .step = nextBlock .dict.init(maxMatchOffset, nil)for , := range { (&) }return &}// NewReader returns a new ReadCloser that can be used// to read the uncompressed version of r.// If r does not also implement io.ByteReader,// the decompressor may read more data than necessary from r.// It is the caller's responsibility to call Close on the ReadCloser// when finished reading.//// The ReadCloser returned by NewReader also implements Resetter.func ( io.Reader) io.ReadCloser {returnNewReaderOpts()}// NewReaderDict is like NewReader but initializes the reader// with a preset dictionary. The returned Reader behaves as if// the uncompressed data stream started with the given dictionary,// which has already been read. NewReaderDict is typically used// to read data compressed by NewWriterDict.//// The ReadCloser returned by NewReader also implements Resetter.func ( io.Reader, []byte) io.ReadCloser {returnNewReaderOpts(, WithDict())}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.