package lz4blockimport ()const (// The following constants are used to setup the compression algorithm. minMatch = 4// the minimum size of the match sequence size (4 bytes) winSizeLog = 16// LZ4 64Kb window size limit winSize = 1 << winSizeLog winMask = winSize - 1// 64Kb window of previous data for dependent blocks// hashLog determines the size of the hash table used to quickly find a previous match position. // Its value influences the compression speed and memory usage, the lower the faster, // but at the expense of the compression ratio. // 16 seems to be the best compromise for fast compression. hashLog = 16 htSize = 1 << hashLog mfLimit = 10 + minMatch// The last match cannot start within the last 14 bytes.)func recoverBlock( *error) {if := recover(); != nil && * == nil { * = lz4errors.ErrInvalidSourceShortBuffer }}// blockHash hashes the lower 6 bytes into a value < htSize.func blockHash( uint64) uint32 {const = 227718039650203returnuint32((( << (64 - 48)) * ) >> (64 - hashLog))}func ( int) int {return + /255 + 16}func (, , []byte) (int, error) {iflen() == 0 {return0, nil }if := decodeBlock(, , ); >= 0 {return , nil }return0, lz4errors.ErrInvalidSourceShortBuffer}typeCompressorstruct {// Offsets are at most 64kiB, so we can store only the lower 16 bits of // match positions: effectively, an offset from some 64kiB block boundary. // // When we retrieve such an offset, we interpret it as relative to the last // block boundary si &^ 0xffff, or the one before, (si &^ 0xffff) - 0x10000, // depending on which of these is inside the current window. If a table // entry was generated more than 64kiB back in the input, we find out by // inspecting the input stream. table [htSize]uint16// Bitmap indicating which positions in the table are in use. // This allows us to quickly reset the table for reuse, // without having to zero everything. inUse [htSize / 32]uint32}// Get returns the position of a presumptive match for the hash h.// The match may be a false positive due to a hash collision or an old entry.// If si < winSize, the return value may be negative.func ( *Compressor) ( uint32, int) int { &= htSize - 1 := 0if .inUse[/32]&(1<<(%32)) != 0 { = int(.table[]) } += &^ winMaskif >= {// Try previous 64kiB block (negative when in first block). -= winSize }return}func ( *Compressor) ( uint32, int) { &= htSize - 1 .table[] = uint16() .inUse[/32] |= 1 << ( % 32)}func ( *Compressor) () { .inUse = [htSize / 32]uint32{} }var compressorPool = sync.Pool{New: func() interface{} { returnnew(Compressor) }}func (, []byte) (int, error) { := compressorPool.Get().(*Compressor) , := .CompressBlock(, )compressorPool.Put()return , }func ( *Compressor) (, []byte) (int, error) {// Zero out reused table to avoid non-deterministic output (issue #65). .reset()// Return 0, nil only if the destination buffer size is < CompressBlockBound. := len() < CompressBlockBound(len())// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible. // This significantly speeds up incompressible data and usually has very small impact on compression. // bytes to skip = 1 + (bytes since last match >> adaptSkipLog)const = 7// si: Current position of the search. // anchor: Position of the current literals.var , , int := len() - mfLimitif <= 0 {goto }// Fast scan strategy: the hash table only stores the last 4 bytes sequences.for < {// Hash the next 6 bytes (sequence)... := binary.LittleEndian.Uint64([:]) := blockHash() := blockHash( >> 8)// We check a match at s, s+1 and s+2 and pick the first one we get. // Checking 3 only requires us to load the source one. := .get(, ) := .get(, +1) .put(, ) .put(, +1) := - if <= 0 || >= winSize || uint32() != binary.LittleEndian.Uint32([:]) {// No match. Start calculating another hash. // The processor can usually do this out-of-order. = blockHash( >> 16) := .get(, +2)// Check the second match at si+1 += 1 = - if <= 0 || >= winSize || uint32(>>8) != binary.LittleEndian.Uint32([:]) {// No match. Check the third match at si+2 += 1 = - .put(, )if <= 0 || >= winSize || uint32(>>16) != binary.LittleEndian.Uint32([:]) {// Skip one extra byte (at si+3) before we check 3 matches again. += 2 + (-)>>continue } } }// Match found. := - // Literal length.// We already matched 4 bytes. := 4// Extend backwards if we can, reducing literals. := - - 1for > 0 && >= 0 && [-1] == [] { -- -- -- ++ }// Add the match length, so we continue search at the end. // Use mLen to store the offset base. , = +, +minMatch// Find the longest match by looking by batches of 8 bytes.for +8 <= { := binary.LittleEndian.Uint64([:]) ^ binary.LittleEndian.Uint64([-:])if == 0 { += 8 } else {// Stop is first non-zero byte. += bits.TrailingZeros64() >> 3break } } = - if >= len() {return0, lz4errors.ErrInvalidSourceShortBuffer }if < 0xF { [] = byte() } else { [] = 0xF }// Encode literals length.if < 0xF { [] |= byte( << 4) } else { [] |= 0xF0 ++ := - 0xFfor ; >= 0xFF && < len(); -= 0xFF { [] = 0xFF ++ }if >= len() {return0, lz4errors.ErrInvalidSourceShortBuffer } [] = byte() } ++// Literals.if + > len() {return0, lz4errors.ErrInvalidSourceShortBuffer }copy([:+], [:+]) += + 2 = // Encode offset.if > len() {return0, lz4errors.ErrInvalidSourceShortBuffer } [-2], [-1] = byte(), byte(>>8)// Encode match length part 2.if >= 0xF {for -= 0xF; >= 0xFF && < len(); -= 0xFF { [] = 0xFF ++ }if >= len() {return0, lz4errors.ErrInvalidSourceShortBuffer } [] = byte() ++ }// Check if we can load next values.if >= {break }// Hash match end-2 = blockHash(binary.LittleEndian.Uint64([-2:])) .put(, -2) }:if && == 0 {// Incompressible.return0, nil }// Last literals.if >= len() {return0, lz4errors.ErrInvalidSourceShortBuffer } := len() - if < 0xF { [] = byte( << 4) } else { [] = 0xF0 ++for -= 0xF; >= 0xFF && < len(); -= 0xFF { [] = 0xFF ++ }if >= len() {return0, lz4errors.ErrInvalidSourceShortBuffer } [] = byte() } ++// Write the last literals.if && >= {// Incompressible.return0, nil }if +len()- > len() {return0, lz4errors.ErrInvalidSourceShortBuffer } += copy([:+len()-], [:])return , nil}// blockHash hashes 4 bytes into a value < winSize.func blockHashHC( uint32) uint32 {constuint32 = 2654435761// Knuth multiplicative hash.return * >> (32 - winSizeLog)}typeCompressorHCstruct {// hashTable: stores the last position found for a given hash // chainTable: stores previous positions for a given hash hashTable, chainTable [htSize]int needsReset bool}var compressorHCPool = sync.Pool{New: func() interface{} { returnnew(CompressorHC) }}func (, []byte, CompressionLevel) (int, error) { := compressorHCPool.Get().(*CompressorHC) , := .CompressBlock(, , )compressorHCPool.Put()return , }func ( *CompressorHC) (, []byte, CompressionLevel) ( int, error) {if .needsReset {// Zero out reused table to avoid non-deterministic output (issue #65). .hashTable = [htSize]int{} .chainTable = [htSize]int{} } .needsReset = true// Only false on first call.deferrecoverBlock(&)// Return 0, nil only if the destination buffer size is < CompressBlockBound. := len() < CompressBlockBound(len())// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible. // This significantly speeds up incompressible data and usually has very small impact on compression. // bytes to skip = 1 + (bytes since last match >> adaptSkipLog)const = 7var , , int := len() - mfLimitif <= 0 {goto }if == 0 { = winSize }for < {// Hash the next 4 bytes (sequence). := binary.LittleEndian.Uint32([:]) := blockHashHC()// Follow the chain until out of window and give the longest match. := 0 := 0for , := .hashTable[], ; > 0 && > 0 && - < winSize; , = .chainTable[&winMask], -1 {// The first (mLen==0) or next byte (mLen>=minMatch) at current match length // must match to improve on the match length.if [+] != [+] {continue } := 0// Compare the current position with a previous with the same hash.for < - { := binary.LittleEndian.Uint64([+:]) ^ binary.LittleEndian.Uint64([+:])if == 0 { += 8 } else {// Stop is first non-zero byte. += bits.TrailingZeros64() >> 3break } }if < minMatch || <= {// Match too small (<minMath) or smaller than the current match.continue }// Found a longer match, keep its position and length. = = - // Try another previous position with the same hash. } .chainTable[&winMask] = .hashTable[] .hashTable[] = // No match found.if == 0 { += 1 + (-)>>continue }// Match found. // Update hash/chain tables with overlapping bytes: // si already hashed, add everything from si+1 up to the match length. := + 1if := + - winSize; > { = }for , := , +; < ; { >>= 8 |= uint32([+3]) << 24 := blockHashHC() .chainTable[&winMask] = .hashTable[] .hashTable[] = ++ } := - += -= minMatch// Match length does not include minMatch.if < 0xF { [] = byte() } else { [] = 0xF }// Encode literals length.if < 0xF { [] |= byte( << 4) } else { [] |= 0xF0 ++ := - 0xFfor ; >= 0xFF; -= 0xFF { [] = 0xFF ++ } [] = byte() } ++// Literals.copy([:+], [:+]) += = // Encode offset. += 2 [-2], [-1] = byte(), byte(>>8)// Encode match length part 2.if >= 0xF {for -= 0xF; >= 0xFF; -= 0xFF { [] = 0xFF ++ } [] = byte() ++ } }if && == 0 {// Incompressible.return0, nil }// Last literals.: := len() - if < 0xF { [] = byte( << 4) } else { [] = 0xF0 ++ -= 0xFfor ; >= 0xFF; -= 0xFF { [] = 0xFF ++ } [] = byte() } ++// Write the last literals.if && >= {// Incompressible.return0, nil } += copy([:+len()-], [:])return , nil}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.