// Copyright 2016 The Snappy-Go Authors. All rights reserved.// Copyright (c) 2019 Klaus Post. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package s2import ()// encodeBlockBest encodes a non-empty src to a guaranteed-large-enough dst. It// assumes that the varint-encoded length of the decompressed bytes has already// been written.//// It also assumes that://// len(dst) >= MaxEncodedLen(len(src)) &&// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSizefunc encodeBlockBest(, []byte, *Dict) ( int) {// Initialize the hash tables.const (// Long hash matches. = 19 = 1 << // Short hash matches. = 16 = 1 << = 8 + 2 = false )// sLimit is when to stop looking for offset/length copies. The inputMargin // lets us use a fast path for emitLiteral in the main loop, while we are // looking for copies. := len() - iflen() < minNonLiteralBlockSize {return0 } := len() - if > MaxDictSrcOffset- { = MaxDictSrcOffset - }var []uint64var []uint64// Bail if we can't compress to at least this. := len() - 5// nextEmit is where in src the next emitLiteral should start from. := 0// The encoded form must start with a literal, as there are no previous // bytes to copy, so we start looking for hash matches at s == 1. := 1 := 1if != nil { .initBest() = 0 = len(.dict) - .repeat } := load64(, )// We search for a repeat at -1, but don't output repeats when nextEmit == 0const = 0xffffffff := func( uint64) int {returnint( & ) } := func( uint64) int {returnint( >> 32) }const = 64for {typestruct {intintintint , bool }varfor {// Next src position to check := (-)>>8 + 1if > { = + } else { += }if > {goto }if != nil && >= MaxDictSrcOffset { = nilif > { = math.MinInt32 } } := hash8(, ) := hash4(, ) := [] := [] := func( ) int {// Matches that are longer forward are penalized since we must emit it as a literal. := . - .if == . {// If we do not have to emit literals, we save 1 byte ++ } := . - .if . {return - emitRepeatSize(, .) }return - emitCopySize(, .) } := func(, int, uint32, bool) {if . != 0 && .-. == - {// Don't retest if we have the same offset.return {: , : } }ifload32(, ) != {return {: , : } } := {: , : , : 4 + , : } += 4for < len() {iflen()- < 8 {if [] == [.] { .++ ++continue }break }if := load64(, ) ^ load64(, .); != 0 { . += bits.TrailingZeros64() >> 3break } += 8 . += 8 } . -= . = ()if . <= -. {// Eliminate if no savings, we might find a better one. . = 0 }return } := func(, int, uint32, bool) {if >= MaxDictSrcOffset {return {: , : } }// Calculate offset as if in continuous array with s := -len(.dict) + if . != 0 && .-. == - && ! {// Don't retest if we have the same offset.return {: , : } }ifload32(.dict, ) != {return {: , : } } := {: , : , : 4 + , : , : true} += 4if ! {for < && . < len(.dict) {iflen()- < 8 || len(.dict)-. < 8 {if [] == .dict[.] { .++ ++continue }break }if := load64(, ) ^ load64(.dict, .); != 0 { . += bits.TrailingZeros64() >> 3break } += 8 . += 8 } } else {for < len() && . < len(.dict) {iflen()- < 8 || len(.dict)-. < 8 {if [] == .dict[.] { .++ ++continue }break }if := load64(, ) ^ load64(.dict, .); != 0 { . += bits.TrailingZeros64() >> 3break } += 8 . += 8 } } . -= . = ()if . <= -. {// Eliminate if no savings, we might find a better one. . = 0 }return } := func(, ) {if . == 0 {return }if . == 0 {return } := . + . := . + .if >= {return }return }if > 0 { = (((), , uint32(), false), ((), , uint32(), false)) = (, ((), , uint32(), false)) = (, ((), , uint32(), false)) }if != nil { := .bestTableLong[] := .bestTableShort[] = (, (int(&0xffff), , uint32(), false)) = (, (int(>>16), , uint32(), false)) = (, (int(&0xffff), , uint32(), false)) = (, (int(>>16), , uint32(), false)) } {if ( == nil || <= ) && > 0 { = (, (-+1, +1, uint32(>>8), true)) } elseif - < -4 && != nil { := len(.dict) - ( - ) = (, (, , uint32(), true)) ++ = (, (, +1, uint32(>>8), true)) }if . > 0 { := hash4(>>8, )// s+1 := [] := + 1 := load64(, ) := hash8(, ) := [] = (, ((), , uint32(), false)) = (, ((), , uint32(), false)) = (, ((), , uint32(), false)) = (, ((), , uint32(), false))// Dict at + 1if != nil { := .bestTableLong[] := .bestTableShort[] = (, (int(&0xffff), , uint32(), false)) = (, (int(&0xffff), , uint32(), false)) }// s+2iftrue { := hash4(>>8, ) = [] ++ = load64(, ) := hash8(, ) = []if ( == nil || <= ) && > 0 {// Repeat at + 2 = (, (-, , uint32(), true)) } elseif - > 4 && != nil { := len(.dict) - ( - ) = (, (, , uint32(), true)) } = (, ((), , uint32(), false)) = (, ((), , uint32(), false)) = (, ((), , uint32(), false)) = (, ((), , uint32(), false))// Dict at +2 // Very small gainif != nil { := .bestTableLong[] := .bestTableShort[] = (, (int(&0xffff), , uint32(), false)) = (, (int(&0xffff), , uint32(), false)) } }// Search for a match at best match end, see if that is better. // Allow some bytes at the beginning to mismatch. // Sweet spot is around 1-2 bytes, but depends on input. // The skipped bytes are tested in Extend backwards, // and still picked up as part of the match if they do.const = 2const = 1if := . + . - ; < { := . + - := . - // Load initial values = load64(, )// Grab candidates... := [hash8(load64(, ), )]if := () - ; > 0 { = (, (, , uint32(), false)) }if := () - ; > 0 { = (, (, , uint32(), false)) }// Disabled: Extremely small gainiffalse { = [hash4(load64(, ), )]if := () - ; > 0 { = (, (, , uint32(), false)) }if := () - ; > 0 { = (, (, , uint32(), false)) } } } } }// Update table [] = uint64() | <<32 [] = uint64() | <<32if . > 0 {break } = load64(, ) = }// Extend backwards, not needed for repeats... = .if !. && !. {for . > 0 && > && [.-1] == [-1] { .-- .++ -- } }iffalse && . >= {panic(fmt.Errorf("t %d >= s %d", ., )) }// Bail if we exceed the maximum size.if +(-) > {return0 } := := - . += .if > 65535 && - <= 5 && !. {// Bail if the match is equal or worse to the encoding. = . + 1if >= {goto } = load64(, )continue }if && != {fmt.Println("EMIT", -, "literals. base-after:", ) } += emitLiteral([:], [:])if . {if > 0 || . {if {fmt.Println("REPEAT, length", ., "offset:", , "s-after:", , "dict:", ., "best:", ) }// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. += emitRepeat([:], , .) } else {// First match without dict cannot be a repeat.if {fmt.Println("COPY, length", ., "offset:", , "s-after:", , "dict:", ., "best:", ) } += emitCopy([:], , .) } } else {if {fmt.Println("COPY, length", ., "offset:", , "s-after:", , "dict:", ., "best:", ) } += emitCopy([:], , .) } = = if >= {goto }if > {// Do we have space for more, if not bail.return0 }// Fill tables...for := . + 1; < ; ++ { := load64(, ) := hash8(, ) := hash4(, ) [] = uint64() | []<<32 [] = uint64() | []<<32 } = load64(, ) }:if < len() {// Bail if we exceed the maximum size.if +len()- > {return0 }if && != {fmt.Println("emitted ", len()-, "literals") } += emitLiteral([:], [:]) }return}// encodeBlockBestSnappy encodes a non-empty src to a guaranteed-large-enough dst. It// assumes that the varint-encoded length of the decompressed bytes has already// been written.//// It also assumes that://// len(dst) >= MaxEncodedLen(len(src)) &&// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSizefunc encodeBlockBestSnappy(, []byte) ( int) {// Initialize the hash tables.const (// Long hash matches. = 19 = 1 << // Short hash matches. = 16 = 1 << = 8 + 2 )// sLimit is when to stop looking for offset/length copies. The inputMargin // lets us use a fast path for emitLiteral in the main loop, while we are // looking for copies. := len() - iflen() < minNonLiteralBlockSize {return0 }var []uint64var []uint64// Bail if we can't compress to at least this. := len() - 5// nextEmit is where in src the next emitLiteral should start from. := 0// The encoded form must start with a literal, as there are no previous // bytes to copy, so we start looking for hash matches at s == 1. := 1 := load64(, )// We search for a repeat at -1, but don't output repeats when nextEmit == 0 := 1const = 0xffffffff := func( uint64) int {returnint( & ) } := func( uint64) int {returnint( >> 32) }const = 64for {typestruct {intintintint }varfor {// Next src position to check := (-)>>8 + 1if > { = + } else { += }if > {goto } := hash8(, ) := hash4(, ) := [] := [] := func( ) int {// Matches that are longer forward are penalized since we must emit it as a literal. := . - .if == . {// If we do not have to emit literals, we save 1 byte ++ } := . - .return - emitCopyNoRepeatSize(, .) } := func(, int, uint32) {if . != 0 && .-. == - {// Don't retest if we have the same offset.return {: , : } }ifload32(, ) != {return {: , : } } := {: , : , : 4 + } += 4for <= {if := load64(, ) ^ load64(, .); != 0 { . += bits.TrailingZeros64() >> 3break } += 8 . += 8 } . -= . = ()if . <= -. {// Eliminate if no savings, we might find a better one. . = 0 }return } := func(, ) {if . == 0 {return }if . == 0 {return } := . + . := . + .if >= {return }return } = (((), , uint32()), ((), , uint32())) = (, ((), , uint32())) = (, ((), , uint32())) { = (, (-+1, +1, uint32(>>8)))if . > 0 {// s+1 := [hash4(>>8, )] := + 1 := load64(, ) := [hash8(, )] = (, ((), , uint32())) = (, ((), , uint32())) = (, ((), , uint32())) = (, ((), , uint32()))// Repeat at + 2 = (, (-+1, +1, uint32(>>8)))// s+2iftrue { = [hash4(>>8, )] ++ = load64(, ) = [hash8(, )] = (, ((), , uint32())) = (, ((), , uint32())) = (, ((), , uint32())) = (, ((), , uint32())) }// Search for a match at best match end, see if that is better.if := . + .; < { := . := .// Load initial values = load64(, )// Search for mismatch := [hash8(load64(, ), )]//next := sTable[hash4(load64(src, sAt), sTableBits)]if := () - ; > 0 { = (, (, , uint32())) }if := () - ; > 0 { = (, (, , uint32())) } } } }// Update table [] = uint64() | <<32 [] = uint64() | <<32if . > 0 {break } = load64(, ) = }// Extend backwards, not needed for repeats... = .iftrue {for . > 0 && > && [.-1] == [-1] { .-- .++ -- } }iffalse && . >= {panic(fmt.Errorf("t %d >= s %d", ., )) }// Bail if we exceed the maximum size.if +(-) > {return0 } := := - . += .if > 65535 && - <= 5 {// Bail if the match is equal or worse to the encoding. = . + 1if >= {goto } = load64(, )continue } += emitLiteral([:], [:]) += emitCopyNoRepeat([:], , .) = = if >= {goto }if > {// Do we have space for more, if not bail.return0 }// Fill tables...for := . + 1; < ; ++ { := load64(, ) := hash8(, ) := hash4(, ) [] = uint64() | []<<32 [] = uint64() | []<<32 } = load64(, ) }:if < len() {// Bail if we exceed the maximum size.if +len()- > {return0 } += emitLiteral([:], [:]) }return}// emitCopySize returns the size to encode the offset+length//// It assumes that://// 1 <= offset && offset <= math.MaxUint32// 4 <= length && length <= 1 << 24func emitCopySize(, int) int {if >= 65536 { := 0if > 64 { -= 64if >= 4 {// Emit remaining as repeatsreturn5 + emitRepeatSize(, ) } = 5 }if == 0 {return }return + 5 }// Offset no more than 2 bytes.if > 64 {if < 2048 {// Emit 8 bytes, then rest as repeats...return2 + emitRepeatSize(, -8) }// Emit remaining as repeats, at least 4 bytes remain.return3 + emitRepeatSize(, -60) }if >= 12 || >= 2048 {return3 }// Emit the remaining copy, encoded as 2 bytes.return2}// emitCopyNoRepeatSize returns the size to encode the offset+length//// It assumes that://// 1 <= offset && offset <= math.MaxUint32// 4 <= length && length <= 1 << 24func emitCopyNoRepeatSize(, int) int {if >= 65536 {return5 + 5*(/64) }// Offset no more than 2 bytes.if > 64 {// Emit remaining as repeats, at least 4 bytes remain.return3 + 3*(/60) }if >= 12 || >= 2048 {return3 }// Emit the remaining copy, encoded as 2 bytes.return2}// emitRepeatSize returns the number of bytes required to encode a repeat.// Length must be at least 4 and < 1<<24func emitRepeatSize(, int) int {// Repeat offset, make length cheaperif <= 4+4 || ( < 8+4 && < 2048) {return2 }if < (1<<8)+4+4 {return3 }if < (1<<16)+(1<<8)+4 {return4 }const = (1 << 24) - 1 -= (1 << 16) - 4 := 0if > { = - + 4 }if > 0 {return5 + (, ) }return5}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.