package matchfinderimport ()// M0 is an implementation of the MatchFinder interface based// on the algorithm used by snappy, but modified to be more like the algorithm// used by compression level 0 of the brotli reference implementation.//// It has a maximum block size of 65536 bytes.typeM0struct {// Lazy turns on "lazy matching," for higher compression but less speed. Lazy bool MaxDistance int MaxLength int}func (M0) () {}const ( m0HashLen = 5 m0TableBits = 14 m0TableSize = 1 << m0TableBits m0Shift = 32 - m0TableBits// m0TableMask is redundant, but helps the compiler eliminate bounds // checks. m0TableMask = m0TableSize - 1)func ( M0) ( uint64) uint64 { := ( << (64 - 8*m0HashLen)) * hashMul64return >> (64 - m0TableBits)}// FindMatches looks for matches in src, appends them to dst, and returns dst.// src must not be longer than 65536 bytes.func ( M0) ( []Match, []byte) []Match {const = 16 - 1const = 1 + 1 + iflen() < { = append(, Match{Unmatched: len(), })return }iflen() > 65536 {panic("block too long") }var [m0TableSize]uint16// sLimit is when to stop looking for offset/length copies. The inputMargin // lets us use a fast path for emitLiteral in the main loop, while we are // looking for copies. := len() - // nextEmit is where in src the next emitLiteral should start from. := 0// The encoded form must start with a literal, as there are no previous // bytes to copy, so we start looking for hash matches at s == 1. := 1 := .hash(binary.LittleEndian.Uint64([:]))for {// Copied from the C++ snappy implementation: // // Heuristic match skipping: If 32 bytes are scanned with no matches // found, start looking only at every other byte. If 32 more bytes are // scanned (or skipped), look at every third byte, etc.. When a match // is found, immediately go back to looking at every byte. This is a // small loss (~5% performance, ~0.1% density) for compressible data // due to more bookkeeping, but for non-compressible data (such as // JPEG) it's a huge win since the compressor quickly "realizes" the // data is incompressible and doesn't bother looking for matches // everywhere. // // The "skip" variable keeps track of how many bytes there are since // the last match; dividing it by 32 (ie. right-shifting by five) gives // the number of bytes to move ahead for each iteration. := 32 := := 0for { = := >> 5 = + += if > {goto } = int([&m0TableMask]) [&m0TableMask] = uint16() = .hash(binary.LittleEndian.Uint64([:]))if .MaxDistance != 0 && - > .MaxDistance {continue }ifbinary.LittleEndian.Uint32([:]) == binary.LittleEndian.Uint32([:]) {break } }// Invariant: we have a 4-byte match at s. := = extendMatch(, +4, +4) := if .Lazy && +1 < { := + 1 := .hash(binary.LittleEndian.Uint64([:])) := int([&m0TableMask]) [&m0TableMask] = uint16() := trueif .MaxDistance != 0 && - > .MaxDistance { = false }if && binary.LittleEndian.Uint32([:]) == binary.LittleEndian.Uint32([:]) { := extendMatch(, +4, +4)if - > -+1 { = = = } } }if .MaxLength != 0 && - > .MaxLength { = + .MaxLength } = append(, Match{Unmatched: - ,Length: - ,Distance: - , }) = if >= {goto }if .Lazy {// If lazy matching is enabled, we update the hash table for // every byte in the match.for := + 2; < -1; ++ { := binary.LittleEndian.Uint64([:]) [.hash()&m0TableMask] = uint16() } }// We could immediately start working at s now, but to improve // compression we first update the hash table at s-1 and at s. := binary.LittleEndian.Uint64([-1:]) := .hash( >> 0) [&m0TableMask] = uint16( - 1) = .hash( >> 8) }:if < len() { = append(, Match{Unmatched: len() - , }) }return}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.