// Copyright 2016 The Snappy-Go Authors. All rights reserved.// Copyright (c) 2019 Klaus Post. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package s2import ()// hash4 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.// Preferably h should be a constant and should always be <32.func hash4( uint64, uint8) uint32 {const = 2654435761return (uint32() * ) >> ((32 - ) & 31)}// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits.// Preferably h should be a constant and should always be <64.func hash5( uint64, uint8) uint32 {const = 889523592379returnuint32((( << (64 - 40)) * ) >> ((64 - ) & 63))}// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.// Preferably h should be a constant and should always be <64.func hash7( uint64, uint8) uint32 {const = 58295818150454627returnuint32((( << (64 - 56)) * ) >> ((64 - ) & 63))}// hash8 returns the hash of u to fit in a hash table with h bits.// Preferably h should be a constant and should always be <64.func hash8( uint64, uint8) uint32 {const = 0xcf1bbcdcb7a56463returnuint32(( * ) >> ((64 - ) & 63))}// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It// assumes that the varint-encoded length of the decompressed bytes has already// been written.//// It also assumes that://// len(dst) >= MaxEncodedLen(len(src)) &&// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSizefunc encodeBlockBetterGo(, []byte) ( int) {// sLimit is when to stop looking for offset/length copies. The inputMargin // lets us use a fast path for emitLiteral in the main loop, while we are // looking for copies. := len() - inputMarginiflen() < minNonLiteralBlockSize {return0 }// Initialize the hash tables.const (// Long hash matches. = 17 = 1 << // Short hash matches. = 14 = 1 << )var []uint32var []uint32// Bail if we can't compress to at least this. := len() - len()>>5 - 6// nextEmit is where in src the next emitLiteral should start from. := 0// The encoded form must start with a literal, as there are no previous // bytes to copy, so we start looking for hash matches at s == 1. := 1 := load64(, )// We initialize repeat to 0, so we never match on first attempt := 0for { := 0 := 0for {// Next src position to check = + (-)>>7 + 1if > {goto } := hash7(, ) := hash4(, ) = int([]) := int([]) [] = uint32() [] = uint32() := load64(, ) := load64(, )// If long matches at least 8 bytes, use that.if == {break }if == { = break }// Check repeat at offset checkRep.const = 1// Minimum length of a repeat. Tested with various values. // While 4-5 offers improvements in some, 6 reduces // regressions significantly.const = 6const = ((1 << ( * 8)) - 1) << (8 * )iffalse && > 0 && & == load64(, -)& { := + // Extend backfor := - ; > && > 0 && [-1] == [-1]; { -- -- } += emitLiteral([:], [:])// Extend forward := - + + += + for < len() {iflen()- < 8 {if [] == [] { ++ ++continue }break }if := load64(, ) ^ load64(, ); != 0 { += bits.TrailingZeros64() >> 3break } += 8 += 8 }// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. += emitRepeat([:], , -) = if >= {goto }// Index in-between := + 1 := - 2for < { := load64(, ) := load64(, ) [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1) [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1) += 2 -= 2 } = load64(, )continue }// Long likely matches 7, so take that.ifuint32() == uint32() {break }// Check our short candidateifuint32() == uint32() {// Try a long candidate at s+1 = hash7(>>8, ) = int([]) [] = uint32( + 1)ifuint32(>>8) == load32(, ) { ++break }// Use our short candidate. = break } = load64(, ) = }// Extend backwardsfor > 0 && > && [-1] == [-1] { -- -- }// Bail if we exceed the maximum size.if +(-) > {return0 } := := - // Extend the 4-byte match as long as possible. += 4 += 4for < len() {iflen()- < 8 {if [] == [] { ++ ++continue }break }if := load64(, ) ^ load64(, ); != 0 { += bits.TrailingZeros64() >> 3break } += 8 += 8 }if > 65535 && - <= 5 && != {// Bail if the match is equal or worse to the encoding. = + 1if >= {goto } = load64(, )continue } += emitLiteral([:], [:])if == { += emitRepeat([:], , -) } else { += emitCopy([:], , -) = } = if >= {goto }if > {// Do we have space for more, if not bail.return0 }// Index short & long := + 1 := - 2 := load64(, ) := load64(, ) [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1)// lTable could be postponed, but very minor difference. [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1) += 1 -= 1 = load64(, )// Index large values sparsely in between. // We do two starting from different offsets for speed. := ( + + 1) >> 1for < { [hash7(load64(, ), )] = uint32() [hash7(load64(, ), )] = uint32() += 2 += 2 } }:if < len() {// Bail if we exceed the maximum size.if +len()- > {return0 } += emitLiteral([:], [:]) }return}// encodeBlockBetterSnappyGo encodes a non-empty src to a guaranteed-large-enough dst. It// assumes that the varint-encoded length of the decompressed bytes has already// been written.//// It also assumes that://// len(dst) >= MaxEncodedLen(len(src)) &&// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSizefunc encodeBlockBetterSnappyGo(, []byte) ( int) {// sLimit is when to stop looking for offset/length copies. The inputMargin // lets us use a fast path for emitLiteral in the main loop, while we are // looking for copies. := len() - inputMarginiflen() < minNonLiteralBlockSize {return0 }// Initialize the hash tables.const (// Long hash matches. = 16 = 1 << // Short hash matches. = 14 = 1 << )var []uint32var []uint32// Bail if we can't compress to at least this. := len() - len()>>5 - 6// nextEmit is where in src the next emitLiteral should start from. := 0// The encoded form must start with a literal, as there are no previous // bytes to copy, so we start looking for hash matches at s == 1. := 1 := load64(, )// We initialize repeat to 0, so we never match on first attempt := 0const = 100for { := 0 := 0for {// Next src position to check = min(+(-)>>7+1, +)if > {goto } := hash7(, ) := hash4(, ) = int([]) := int([]) [] = uint32() [] = uint32()ifuint32() == load32(, ) {break }// Check our short candidateifuint32() == load32(, ) {// Try a long candidate at s+1 = hash7(>>8, ) = int([]) [] = uint32( + 1)ifuint32(>>8) == load32(, ) { ++break }// Use our short candidate. = break } = load64(, ) = }// Extend backwardsfor > 0 && > && [-1] == [-1] { -- -- }// Bail if we exceed the maximum size.if +(-) > {return0 } := := - // Extend the 4-byte match as long as possible. += 4 += 4for < len() {iflen()- < 8 {if [] == [] { ++ ++continue }break }if := load64(, ) ^ load64(, ); != 0 { += bits.TrailingZeros64() >> 3break } += 8 += 8 }if > 65535 && - <= 5 && != {// Bail if the match is equal or worse to the encoding. = + 1if >= {goto } = load64(, )continue } += emitLiteral([:], [:]) += emitCopyNoRepeat([:], , -) = = if >= {goto }if > {// Do we have space for more, if not bail.return0 }// Index short & long := + 1 := - 2 := load64(, ) := load64(, ) [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1) [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1) += 1 -= 1 = load64(, )// Index large values sparsely in between. // We do two starting from different offsets for speed. := ( + + 1) >> 1for < { [hash7(load64(, ), )] = uint32() [hash7(load64(, ), )] = uint32() += 2 += 2 } }:if < len() {// Bail if we exceed the maximum size.if +len()- > {return0 } += emitLiteral([:], [:]) }return}func encodeBlockBetterGo64K(, []byte) ( int) {// sLimit is when to stop looking for offset/length copies. The inputMargin // lets us use a fast path for emitLiteral in the main loop, while we are // looking for copies. := len() - inputMarginiflen() < minNonLiteralBlockSize {return0 }// Initialize the hash tables. // Use smaller tables for smaller blocksconst (// Long hash matches. = 16 = 1 << // Short hash matches. = 13 = 1 << )var []uint16var []uint16// Bail if we can't compress to at least this. := len() - len()>>5 - 6// nextEmit is where in src the next emitLiteral should start from. := 0// The encoded form must start with a literal, as there are no previous // bytes to copy, so we start looking for hash matches at s == 1. := 1 := load64(, )// We initialize repeat to 0, so we never match on first attempt := 0for { := 0 := 0for {// Next src position to check = + (-)>>6 + 1if > {goto } := hash7(, ) := hash4(, ) = int([]) := int([]) [] = uint16() [] = uint16() := load64(, ) := load64(, )// If long matches at least 8 bytes, use that.if == {break }if == { = break }// Check repeat at offset checkRep.const = 1// Minimum length of a repeat. Tested with various values. // While 4-5 offers improvements in some, 6 reduces // regressions significantly.const = 6const = ((1 << ( * 8)) - 1) << (8 * )iffalse && > 0 && & == load64(, -)& { := + // Extend backfor := - ; > && > 0 && [-1] == [-1]; { -- -- } += emitLiteral([:], [:])// Extend forward := - + + += + for < len() {iflen()- < 8 {if [] == [] { ++ ++continue }break }if := load64(, ) ^ load64(, ); != 0 { += bits.TrailingZeros64() >> 3break } += 8 += 8 }// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. += emitRepeat([:], , -) = if >= {goto }// Index in-between := + 1 := - 2for < { := load64(, ) := load64(, ) [hash7(, )] = uint16() [hash4(>>8, )] = uint16( + 1) [hash7(, )] = uint16() [hash4(>>8, )] = uint16( + 1) += 2 -= 2 } = load64(, )continue }// Long likely matches 7, so take that.ifuint32() == uint32() {break }// Check our short candidateifuint32() == uint32() {// Try a long candidate at s+1 = hash7(>>8, ) = int([]) [] = uint16( + 1)ifuint32(>>8) == load32(, ) { ++break }// Use our short candidate. = break } = load64(, ) = }// Extend backwardsfor > 0 && > && [-1] == [-1] { -- -- }// Bail if we exceed the maximum size.if +(-) > {return0 } := := - // Extend the 4-byte match as long as possible. += 4 += 4for < len() {iflen()- < 8 {if [] == [] { ++ ++continue }break }if := load64(, ) ^ load64(, ); != 0 { += bits.TrailingZeros64() >> 3break } += 8 += 8 } += emitLiteral([:], [:])if == { += emitRepeat([:], , -) } else { += emitCopy([:], , -) = } = if >= {goto }if > {// Do we have space for more, if not bail.return0 }// Index short & long := + 1 := - 2 := load64(, ) := load64(, ) [hash7(, )] = uint16() [hash4(>>8, )] = uint16( + 1)// lTable could be postponed, but very minor difference. [hash7(, )] = uint16() [hash4(>>8, )] = uint16( + 1) += 1 -= 1 = load64(, )// Index large values sparsely in between. // We do two starting from different offsets for speed. := ( + + 1) >> 1for < { [hash7(load64(, ), )] = uint16() [hash7(load64(, ), )] = uint16() += 2 += 2 } }:if < len() {// Bail if we exceed the maximum size.if +len()- > {return0 } += emitLiteral([:], [:]) }return}// encodeBlockBetterSnappyGo encodes a non-empty src to a guaranteed-large-enough dst. It// assumes that the varint-encoded length of the decompressed bytes has already// been written.//// It also assumes that://// len(dst) >= MaxEncodedLen(len(src)) &&// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSizefunc encodeBlockBetterSnappyGo64K(, []byte) ( int) {// sLimit is when to stop looking for offset/length copies. The inputMargin // lets us use a fast path for emitLiteral in the main loop, while we are // looking for copies. := len() - inputMarginiflen() < minNonLiteralBlockSize {return0 }// Initialize the hash tables. // Use smaller tables for smaller blocksconst (// Long hash matches. = 15 = 1 << // Short hash matches. = 13 = 1 << )var []uint16var []uint16// Bail if we can't compress to at least this. := len() - len()>>5 - 6// nextEmit is where in src the next emitLiteral should start from. := 0// The encoded form must start with a literal, as there are no previous // bytes to copy, so we start looking for hash matches at s == 1. := 1 := load64(, )const = 100for { := 0 := 0for {// Next src position to check = min(+(-)>>6+1, +)if > {goto } := hash7(, ) := hash4(, ) = int([]) := int([]) [] = uint16() [] = uint16()ifuint32() == load32(, ) {break }// Check our short candidateifuint32() == load32(, ) {// Try a long candidate at s+1 = hash7(>>8, ) = int([]) [] = uint16( + 1)ifuint32(>>8) == load32(, ) { ++break }// Use our short candidate. = break } = load64(, ) = }// Extend backwardsfor > 0 && > && [-1] == [-1] { -- -- }// Bail if we exceed the maximum size.if +(-) > {return0 } := := - // Extend the 4-byte match as long as possible. += 4 += 4for < len() {iflen()- < 8 {if [] == [] { ++ ++continue }break }if := load64(, ) ^ load64(, ); != 0 { += bits.TrailingZeros64() >> 3break } += 8 += 8 } += emitLiteral([:], [:]) += emitCopyNoRepeat([:], , -) = if >= {goto }if > {// Do we have space for more, if not bail.return0 }// Index short & long := + 1 := - 2 := load64(, ) := load64(, ) [hash7(, )] = uint16() [hash4(>>8, )] = uint16( + 1) [hash7(, )] = uint16() [hash4(>>8, )] = uint16( + 1) += 1 -= 1 = load64(, )// Index large values sparsely in between. // We do two starting from different offsets for speed. := ( + + 1) >> 1for < { [hash7(load64(, ), )] = uint16() [hash7(load64(, ), )] = uint16() += 2 += 2 } }:if < len() {// Bail if we exceed the maximum size.if +len()- > {return0 } += emitLiteral([:], [:]) }return}// encodeBlockBetterDict encodes a non-empty src to a guaranteed-large-enough dst. It// assumes that the varint-encoded length of the decompressed bytes has already// been written.//// It also assumes that://// len(dst) >= MaxEncodedLen(len(src)) &&// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSizefunc encodeBlockBetterDict(, []byte, *Dict) ( int) {// sLimit is when to stop looking for offset/length copies. The inputMargin // lets us use a fast path for emitLiteral in the main loop, while we are // looking for copies. // Initialize the hash tables.const (// Long hash matches. = 17 = 1 << // Short hash matches. = 14 = 1 << = 8// maximum bytes ahead without checking sLimit = false ) := len() - inputMarginif > MaxDictSrcOffset- { = MaxDictSrcOffset - }iflen() < minNonLiteralBlockSize {return0 } .initBetter()var []uint32var []uint32// Bail if we can't compress to at least this. := len() - len()>>5 - 6// nextEmit is where in src the next emitLiteral should start from. := 0// The encoded form must start with a literal, as there are no previous // bytes to copy, so we start looking for hash matches at s == 1. := 0 := load64(, )// We initialize repeat to 0, so we never match on first attempt := len(.dict) - .repeat// While in dict:for { := 0 := 0for {// Next src position to check = + (-)>>7 + 1if > {break } := hash7(, ) := hash4(, ) = int([]) := int([]) := int(.betterTableLong[]) := int(.betterTableShort[]) [] = uint32() [] = uint32() := load64(, ) := load64(, )// If long matches at least 8 bytes, use that.if != 0 {if == {goto }if == { = goto } }// Check dict repeat.if >= +4 { := len(.dict) - + if > 0 && uint32() == load32(.dict, ) {// Extend back := for := ; > && > 0 && .dict[-1] == [-1]; { -- -- } += emitLiteral([:], [:])if && != {fmt.Println("emitted ", -, "literals") } += 4 += 4for < len(.dict)-8 && <= len()-8 {if := load64(, ) ^ load64(.dict, ); != 0 { += bits.TrailingZeros64() >> 3break } += 8 += 8 } += emitRepeat([:], , -)if {fmt.Println("emitted dict repeat length", -, "offset:", , "s:", ) } = if >= {break }// Index in-between := + 1 := - 2 = load64(, )for < { := load64(, ) := load64(, ) [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1) [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1) += 2 -= 2 }continue } }// Don't try to find match at s==0if == 0 { = load64(, ) = continue }// Long likely matches 7, so take that.ifuint32() == uint32() {goto }// Long dict...ifuint32() == load32(.dict, ) { = goto }// Check our short candidateifuint32() == uint32() {// Try a long candidate at s+1 = hash7(>>8, ) = int([]) [] = uint32( + 1)ifuint32(>>8) == load32(, ) { ++goto }// Use our short candidate. = goto }ifuint32() == load32(.dict, ) {// Try a long candidate at s+1 = hash7(>>8, ) = int([]) [] = uint32( + 1)ifuint32(>>8) == load32(, ) { ++goto } = goto } = load64(, ) = } : {if {ifload32(.dict, ) != load32(, ) {panic("dict emit mismatch") } }// Extend backwards. // The top bytes will be rechecked to get the full match.for > 0 && > && .dict[-1] == [-1] { -- -- }// Bail if we exceed the maximum size.if +(-) > {return0 }// A 4-byte match has been found. We'll later see if more than 4 bytes // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit // them as literal bytes. += emitLiteral([:], [:])if && != {fmt.Println("emitted ", -, "literals") } {// Invariant: we have a 4-byte match at s, and no need to emit any // literal bytes prior to s. := := + (len(.dict)) - // Extend the 4-byte match as long as possible. += 4 += 4for <= len()-8 && len(.dict)- >= 8 {if := load64(, ) ^ load64(.dict, ); != 0 { += bits.TrailingZeros64() >> 3break } += 8 += 8 }if == {if {fmt.Println("emitted dict repeat, length", -, "offset:", , "s:", , "dict offset:", ) } += emitRepeat([:], , -) } else {if {fmt.Println("emitted dict copy, length", -, "offset:", , "s:", , "dict offset:", ) }// Matches longer than 64 are split.if <= || - < 8 { += emitCopy([:], , -) } else {// Split to ensure we don't start a copy within next block. += emitCopy([:], , 4) += emitRepeat([:], , --4) } = }iffalse {// Validate match.if <= {panic("s <= candidate") } := [:] := .dict[- : -+(-)]if !bytes.Equal(, ) {panic("mismatch") } } = if >= {break }if > {// Do we have space for more, if not bail.return0 }// Index short & long := + 1 := - 2 := load64(, ) := load64(, ) [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1) [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1) += 1 -= 1 = load64(, )// index every second long in between.for < { [hash7(load64(, ), )] = uint32() [hash7(load64(, ), )] = uint32() += 2 -= 2 } }continue } :// Extend backwardsfor > 0 && > && [-1] == [-1] { -- -- }// Bail if we exceed the maximum size.if +(-) > {return0 } := := - // Extend the 4-byte match as long as possible. += 4 += 4for < len() {iflen()- < 8 {if [] == [] { ++ ++continue }break }if := load64(, ) ^ load64(, ); != 0 { += bits.TrailingZeros64() >> 3break } += 8 += 8 }if > 65535 && - <= 5 && != {// Bail if the match is equal or worse to the encoding. = + 1if >= {goto } = load64(, )continue } += emitLiteral([:], [:])if && != {fmt.Println("emitted ", -, "literals") }if == {if {fmt.Println("emitted match repeat, length", -, "offset:", , "s:", ) } += emitRepeat([:], , -) } else {if {fmt.Println("emitted match copy, length", -, "offset:", , "s:", ) } += emitCopy([:], , -) = } = if >= {goto }if > {// Do we have space for more, if not bail.return0 }// Index short & long := + 1 := - 2 := load64(, ) := load64(, ) [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1) [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1) += 1 -= 1 = load64(, )// Index large values sparsely in between. // We do two starting from different offsets for speed. := ( + + 1) >> 1for < { [hash7(load64(, ), )] = uint32() [hash7(load64(, ), )] = uint32() += 2 += 2 } }// Search without dict:if > { = 0 }// No more dict = len() - inputMarginif >= {goto } = load64(, )if {fmt.Println("now", , "->", , "out:", , "left:", len()-, "nextemit:", , "dstLimit:", , "s:", ) }for { := 0 := 0for {// Next src position to check = + (-)>>7 + 1if > {goto } := hash7(, ) := hash4(, ) = int([]) := int([]) [] = uint32() [] = uint32() := load64(, ) := load64(, )// If long matches at least 8 bytes, use that.if == {break }if == { = break }// Check repeat at offset checkRep.const = 1// Minimum length of a repeat. Tested with various values. // While 4-5 offers improvements in some, 6 reduces // regressions significantly.const = 6const = ((1 << ( * 8)) - 1) << (8 * )iffalse && > 0 && & == load64(, -)& { := + // Extend backfor := - ; > && > 0 && [-1] == [-1]; { -- -- } += emitLiteral([:], [:])// Extend forward := - + + += + for < len() {iflen()- < 8 {if [] == [] { ++ ++continue }break }if := load64(, ) ^ load64(, ); != 0 { += bits.TrailingZeros64() >> 3break } += 8 += 8 }// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. += emitRepeat([:], , -) = if >= {goto }// Index in-between := + 1 := - 2for < { := load64(, ) := load64(, ) [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1) [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1) += 2 -= 2 } = load64(, )continue }// Long likely matches 7, so take that.ifuint32() == uint32() {break }// Check our short candidateifuint32() == uint32() {// Try a long candidate at s+1 = hash7(>>8, ) = int([]) [] = uint32( + 1)ifuint32(>>8) == load32(, ) { ++break }// Use our short candidate. = break } = load64(, ) = }// Extend backwardsfor > 0 && > && [-1] == [-1] { -- -- }// Bail if we exceed the maximum size.if +(-) > {return0 } := := - // Extend the 4-byte match as long as possible. += 4 += 4for < len() {iflen()- < 8 {if [] == [] { ++ ++continue }break }if := load64(, ) ^ load64(, ); != 0 { += bits.TrailingZeros64() >> 3break } += 8 += 8 }if > 65535 && - <= 5 && != {// Bail if the match is equal or worse to the encoding. = + 1if >= {goto } = load64(, )continue } += emitLiteral([:], [:])if == { += emitRepeat([:], , -) } else { += emitCopy([:], , -) = } = if >= {goto }if > {// Do we have space for more, if not bail.return0 }// Index short & long := + 1 := - 2 := load64(, ) := load64(, ) [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1) [hash7(, )] = uint32() [hash4(>>8, )] = uint32( + 1) += 1 -= 1 = load64(, )// Index large values sparsely in between. // We do two starting from different offsets for speed. := ( + + 1) >> 1for < { [hash7(load64(, ), )] = uint32() [hash7(load64(, ), )] = uint32() += 2 += 2 } }:if < len() {// Bail if we exceed the maximum size.if +len()- > {return0 } += emitLiteral([:], [:]) }return}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.