// Copyright (c) 2022+ Klaus Post. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package s2import ()const (// MinDictSize is the minimum dictionary size when repeat has been read.MinDictSize = 16// MaxDictSize is the maximum dictionary size when repeat has been read.MaxDictSize = 65536// MaxDictSrcOffset is the maximum offset where a dictionary entry can start.MaxDictSrcOffset = 65535)// Dict contains a dictionary that can be used for encoding and decoding s2typeDictstruct { dict []byte repeat int// Repeat as index of dict fast, better, best sync.Once fastTable *[1 << 14]uint16 betterTableShort *[1 << 14]uint16 betterTableLong *[1 << 17]uint16 bestTableShort *[1 << 16]uint32 bestTableLong *[1 << 19]uint32}// NewDict will read a dictionary.// It will return nil if the dictionary is invalid.func ( []byte) *Dict {iflen() == 0 {returnnil }varDict// Repeat is the first value of the dict , := binary.Uvarint()if <= 0 {returnnil } = [:] .dict = ifcap(.dict) < len(.dict)+16 { .dict = append(make([]byte, 0, len(.dict)+16), .dict...) }iflen() < MinDictSize || len() > MaxDictSize {returnnil } .repeat = int()if .repeat > len() {returnnil }return &}// Bytes will return a serialized version of the dictionary.// The output can be sent to NewDict.func ( *Dict) () []byte { := make([]byte, binary.MaxVarintLen16+len(.dict))returnappend([:binary.PutUvarint(, uint64(.repeat))], .dict...)}// MakeDict will create a dictionary.// 'data' must be at least MinDictSize.// If data is longer than MaxDictSize only the last MaxDictSize bytes will be used.// If searchStart is set the start repeat value will be set to the last// match of this content.// If no matches are found, it will attempt to find shorter matches.// This content should match the typical start of a block.// If at least 4 bytes cannot be matched, repeat is set to start of block.func ( []byte, []byte) *Dict {iflen() == 0 {returnnil }iflen() > MaxDictSize { = [len()-MaxDictSize:] }varDict := .dict = ifcap(.dict) < len(.dict)+16 { .dict = append(make([]byte, 0, len(.dict)+16), .dict...) }iflen() < MinDictSize {returnnil }// Find the longest match possible, last entry if multiple.for := len(); > 4; -- {if := bytes.LastIndex(, [:]); >= 0 && <= len()-8 { .repeat = break } }return &}// MakeDictManual will create a dictionary.// 'data' must be at least MinDictSize and less than or equal to MaxDictSize.// A manual first repeat index into data must be provided.// It must be less than len(data)-8.func ( []byte, uint16) *Dict {iflen() < MinDictSize || int() >= len()-8 || len() > MaxDictSize {returnnil }varDict := .dict = ifcap(.dict) < len(.dict)+16 { .dict = append(make([]byte, 0, len(.dict)+16), .dict...) } .repeat = int()return &}// Encode returns the encoded form of src. The returned slice may be a sub-// slice of dst if dst was large enough to hold the entire encoded block.// Otherwise, a newly allocated slice will be returned.//// The dst and src must not overlap. It is valid to pass a nil dst.//// The blocks will require the same amount of memory to decode as encoding,// and does not make for concurrent decoding.// Also note that blocks do not contain CRC information, so corruption may be undetected.//// If you need to encode larger amounts of data, consider using// the streaming interface which gives all of these features.func ( *Dict) (, []byte) []byte {if := MaxEncodedLen(len()); < 0 {panic(ErrTooLarge) } elseifcap() < { = make([]byte, ) } else { = [:] }// The block starts with the varint-encoded length of the decompressed bytes. := binary.PutUvarint(, uint64(len()))iflen() == 0 {return [:] }iflen() < minNonLiteralBlockSize { += emitLiteral([:], )return [:] } := encodeBlockDictGo([:], , )if > 0 { += return [:] }// Not compressible += emitLiteral([:], )return [:]}// EncodeBetter returns the encoded form of src. The returned slice may be a sub-// slice of dst if dst was large enough to hold the entire encoded block.// Otherwise, a newly allocated slice will be returned.//// EncodeBetter compresses better than Encode but typically with a// 10-40% speed decrease on both compression and decompression.//// The dst and src must not overlap. It is valid to pass a nil dst.//// The blocks will require the same amount of memory to decode as encoding,// and does not make for concurrent decoding.// Also note that blocks do not contain CRC information, so corruption may be undetected.//// If you need to encode larger amounts of data, consider using// the streaming interface which gives all of these features.func ( *Dict) (, []byte) []byte {if := MaxEncodedLen(len()); < 0 {panic(ErrTooLarge) } elseiflen() < { = make([]byte, ) }// The block starts with the varint-encoded length of the decompressed bytes. := binary.PutUvarint(, uint64(len()))iflen() == 0 {return [:] }iflen() < minNonLiteralBlockSize { += emitLiteral([:], )return [:] } := encodeBlockBetterDict([:], , )if > 0 { += return [:] }// Not compressible += emitLiteral([:], )return [:]}// EncodeBest returns the encoded form of src. The returned slice may be a sub-// slice of dst if dst was large enough to hold the entire encoded block.// Otherwise, a newly allocated slice will be returned.//// EncodeBest compresses as good as reasonably possible but with a// big speed decrease.//// The dst and src must not overlap. It is valid to pass a nil dst.//// The blocks will require the same amount of memory to decode as encoding,// and does not make for concurrent decoding.// Also note that blocks do not contain CRC information, so corruption may be undetected.//// If you need to encode larger amounts of data, consider using// the streaming interface which gives all of these features.func ( *Dict) (, []byte) []byte {if := MaxEncodedLen(len()); < 0 {panic(ErrTooLarge) } elseiflen() < { = make([]byte, ) }// The block starts with the varint-encoded length of the decompressed bytes. := binary.PutUvarint(, uint64(len()))iflen() == 0 {return [:] }iflen() < minNonLiteralBlockSize { += emitLiteral([:], )return [:] } := encodeBlockBest([:], , )if > 0 { += return [:] }// Not compressible += emitLiteral([:], )return [:]}// Decode returns the decoded form of src. The returned slice may be a sub-// slice of dst if dst was large enough to hold the entire decoded block.// Otherwise, a newly allocated slice will be returned.//// The dst and src must not overlap. It is valid to pass a nil dst.func ( *Dict) (, []byte) ([]byte, error) { , , := decodedLen()if != nil {returnnil, }if <= cap() { = [:] } else { = make([]byte, ) }ifs2DecodeDict(, [:], ) != 0 {returnnil, ErrCorrupt }return , nil}func ( *Dict) () { .fast.Do(func() {const ( = 14 = 1 << )var []uint16// We stop so any entry of length 8 can always be read.for := 0; < len(.dict)-8-2; += 3 { := load64(.dict, ) := hash6(, ) := hash6(>>8, ) := hash6(>>16, ) [] = uint16() [] = uint16( + 1) [] = uint16( + 2) } .fastTable = & })}func ( *Dict) () { .better.Do(func() {const (// Long hash matches. = 17 = 1 << // Short hash matches. = 14 = 1 << )var []uint16var []uint16// We stop so any entry of length 8 can always be read.for := 0; < len(.dict)-8; ++ { := load64(.dict, ) [hash7(, )] = uint16() [hash4(, )] = uint16() } .betterTableShort = & .betterTableLong = & })}func ( *Dict) () { .best.Do(func() {const (// Long hash matches. = 19 = 1 << // Short hash matches. = 16 = 1 << )var []uint32var []uint32// We stop so any entry of length 8 can always be read.for := 0; < len(.dict)-8; ++ { := load64(.dict, ) := hash8(, ) := hash4(, ) := [] := [] [] = uint32() | <<16 [] = uint32() | <<16 } .bestTableShort = & .bestTableLong = & })}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.