// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
// Based on work by Yann Collet, released under BSD License.

package zstd

import 

const (
	betterLongTableBits = 19                       // Bits used in the long match table
	betterLongTableSize = 1 << betterLongTableBits // Size of the table
	betterLongLen       = 8                        // Bytes used for table hash

	// Note: Increasing the short table bits or making the hash shorter
	// can actually lead to compression degradation since it will 'steal' more from the
	// long match table and match offsets are quite big.
	// This greatly depends on the type of input.
	betterShortTableBits = 13                        // Bits used in the short match table
	betterShortTableSize = 1 << betterShortTableBits // Size of the table
	betterShortLen       = 5                         // Bytes used for table hash

	betterLongTableShardCnt  = 1 << (betterLongTableBits - dictShardBits)    // Number of shards in the table
	betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard

	betterShortTableShardCnt  = 1 << (betterShortTableBits - dictShardBits)     // Number of shards in the table
	betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard
)

type prevEntry struct {
	offset int32
	prev   int32
}

// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
// The long match table contains the previous entry with the same hash,
// effectively making it a "chain" of length 2.
// When we find a long match we choose between the two values and select the longest.
// When we find a short match, after checking the long, we check if we can find a long at n+1
// and that it is longer (lazy matching).
type betterFastEncoder struct {
	fastBase
	table     [betterShortTableSize]tableEntry
	longTable [betterLongTableSize]prevEntry
}

type betterFastEncoderDict struct {
	betterFastEncoder
	dictTable            []tableEntry
	dictLongTable        []prevEntry
	shortTableShardDirty [betterShortTableShardCnt]bool
	longTableShardDirty  [betterLongTableShardCnt]bool
	allDirty             bool
}

// Encode improves compression...
func ( *betterFastEncoder) ( *blockEnc,  []byte) {
	const (
		// Input margin is the number of bytes we read (8)
		// and the maximum we will read ahead (2)
		            = 8 + 2
		 = 16
	)

	// Protect against e.cur wraparound.
	for .cur >= .bufferReset-int32(len(.hist)) {
		if len(.hist) == 0 {
			.table = [betterShortTableSize]tableEntry{}
			.longTable = [betterLongTableSize]prevEntry{}
			.cur = .maxMatchOff
			break
		}
		// Shift down everything in the table that isn't already too far away.
		 := .cur + int32(len(.hist)) - .maxMatchOff
		for  := range .table[:] {
			 := .table[].offset
			if  <  {
				 = 0
			} else {
				 =  - .cur + .maxMatchOff
			}
			.table[].offset = 
		}
		for  := range .longTable[:] {
			 := .longTable[].offset
			 := .longTable[].prev
			if  <  {
				 = 0
				 = 0
			} else {
				 =  - .cur + .maxMatchOff
				if  <  {
					 = 0
				} else {
					 =  - .cur + .maxMatchOff
				}
			}
			.longTable[] = prevEntry{
				offset: ,
				prev:   ,
			}
		}
		.cur = .maxMatchOff
		break
	}
	// Add block to history
	 := .addBlock()
	.size = len()

	// Check RLE first
	if len() > zstdMinMatch {
		 := matchLen([1:], )
		if  == len()-1 {
			.literals = append(.literals, [0])
			.sequences = append(.sequences, seq{litLen: 1, matchLen: uint32(len()-1) - zstdMinMatch, offset: 1 + 3})
			return
		}
	}

	if len() <  {
		.extraLits = len()
		.literals = .literals[:len()]
		copy(.literals, )
		return
	}

	// Override src
	 = .hist
	 := int32(len()) - 
	// stepSize is the number of bytes to skip on every main loop iteration.
	// It should be >= 1.
	const  = 1

	const  = 9

	// nextEmit is where in src the next emitLiteral should start from.
	 := 
	 := load6432(, )

	// Relative offsets
	 := int32(.recentOffsets[0])
	 := int32(.recentOffsets[1])

	 := func( *seq,  int32) {
		if  ==  {
			return
		}
		.literals = append(.literals, [:]...)
		.litLen = uint32( - )
	}
	if debugEncoder {
		println("recent offsets:", .recentOffsets)
	}

:
	for {
		var  int32
		// We allow the encoder to optionally turn off repeat offsets across blocks
		 := len(.sequences) > 2
		var ,  int32

		for {
			if debugAsserts &&  &&  == 0 {
				panic("offset0 was 0")
			}

			 := hashLen(, betterLongTableBits, betterLongLen)
			 := hashLen(, betterShortTableBits, betterShortLen)
			 := .longTable[]
			 := .table[]

			const  = 1
			 :=  -  + 
			 :=  + .cur
			.longTable[] = prevEntry{offset: , prev: .offset}
			.table[] = tableEntry{offset: , val: uint32()}
			 =  + 1

			if  {
				if  >= 0 && load3232(, ) == uint32(>>(*8)) {
					// Consider history as well.
					var  seq
					 := 4 + .matchlen(+4+, +4, )

					.matchLen = uint32( - zstdMinMatch)

					// We might be able to match backwards.
					// Extend as long as we can.
					 :=  + 
					// We end the search early, so we don't risk 0 literals
					// and have to do special offset treatment.
					 :=  + 1

					 :=  - .maxMatchOff
					if  < 0 {
						 = 0
					}
					for  >  &&  >  && [-1] == [-1] && .matchLen < maxMatchLength-zstdMinMatch-1 {
						--
						--
						.matchLen++
					}
					(&, )

					// rep 0
					.offset = 1
					if debugSequences {
						println("repeat sequence", , "next s:", )
					}
					.sequences = append(.sequences, )

					// Index match start+1 (long) -> s - 1
					 :=  + 
					 +=  + 

					 = 
					if  >=  {
						if debugEncoder {
							println("repeat ended", , )

						}
						break 
					}
					// Index skipped...
					for  < -1 {
						 := load6432(, )
						 :=  >> 8
						 := hashLen(, betterLongTableBits, betterLongLen)
						 :=  + .cur
						.longTable[] = prevEntry{offset: , prev: .longTable[].offset}
						.table[hashLen(, betterShortTableBits, betterShortLen)] = tableEntry{offset:  + 1, val: uint32()}
						 += 2
					}
					 = load6432(, )
					continue
				}
				const  = 1

				// We deviate from the reference encoder and also check offset 2.
				// Still slower and not much better, so disabled.
				// repIndex = s - offset2 + repOff2
				if false &&  >= 0 && load6432(, ) == load6432(, +) {
					// Consider history as well.
					var  seq
					 := 8 + .matchlen(+8+, +8, )

					.matchLen = uint32( - zstdMinMatch)

					// We might be able to match backwards.
					// Extend as long as we can.
					 :=  + 
					// We end the search early, so we don't risk 0 literals
					// and have to do special offset treatment.
					 :=  + 1

					 :=  - .maxMatchOff
					if  < 0 {
						 = 0
					}
					for  >  &&  >  && [-1] == [-1] && .matchLen < maxMatchLength-zstdMinMatch-1 {
						--
						--
						.matchLen++
					}
					(&, )

					// rep 2
					.offset = 2
					if debugSequences {
						println("repeat sequence 2", , "next s:", )
					}
					.sequences = append(.sequences, )

					 +=  + 
					 = 
					if  >=  {
						if debugEncoder {
							println("repeat ended", , )

						}
						break 
					}

					// Index skipped...
					for  < -1 {
						 := load6432(, )
						 :=  >> 8
						 := hashLen(, betterLongTableBits, betterLongLen)
						 :=  + .cur
						.longTable[] = prevEntry{offset: , prev: .longTable[].offset}
						.table[hashLen(, betterShortTableBits, betterShortLen)] = tableEntry{offset:  + 1, val: uint32()}
						 += 2
					}
					 = load6432(, )
					// Swap offsets
					,  = , 
					continue
				}
			}
			// Find the offsets of our two matches.
			 := .offset - .cur
			 := .prev - .cur

			// Check if we have a long match.
			if - < .maxMatchOff &&  == load6432(, ) {
				// Found a long match, at least 8 bytes.
				 = .matchlen(+8, +8, ) + 8
				 = 
				if debugAsserts &&  <=  {
					panic(fmt.Sprintf("s (%d) <= t (%d)", , ))
				}
				if debugAsserts && - > .maxMatchOff {
					panic("s - t >e.maxMatchOff")
				}
				if debugMatches {
					println("long match")
				}

				if - < .maxMatchOff &&  == load6432(, ) {
					// Found a long match, at least 8 bytes.
					 := .matchlen(+8, +8, ) + 8
					if  >  {
						 = 
						 = 
					}
					if debugAsserts &&  <=  {
						panic(fmt.Sprintf("s (%d) <= t (%d)", , ))
					}
					if debugAsserts && - > .maxMatchOff {
						panic("s - t >e.maxMatchOff")
					}
					if debugMatches {
						println("long match")
					}
				}
				break
			}

			// Check if we have a long match on prev.
			if - < .maxMatchOff &&  == load6432(, ) {
				// Found a long match, at least 8 bytes.
				 = .matchlen(+8, +8, ) + 8
				 = 
				if debugAsserts &&  <=  {
					panic(fmt.Sprintf("s (%d) <= t (%d)", , ))
				}
				if debugAsserts && - > .maxMatchOff {
					panic("s - t >e.maxMatchOff")
				}
				if debugMatches {
					println("long match")
				}
				break
			}

			 := .offset - .cur

			// Check if we have a short match.
			if - < .maxMatchOff && uint32() == .val {
				// found a regular match
				 = .matchlen(+4, +4, ) + 4

				// See if we can find a long match at s+1
				const  = 1
				 := load6432(, +)
				 = hashLen(, betterLongTableBits, betterLongLen)
				 = .longTable[]
				 = .offset - .cur

				// We can store it, since we have at least a 4 byte match.
				.longTable[] = prevEntry{offset:  +  + .cur, prev: .offset}
				if - < .maxMatchOff &&  == load6432(, ) {
					// Found a long match, at least 8 bytes.
					 := .matchlen(+8+, +8, ) + 8
					if  >  {
						 = 
						 += 
						 = 
						if debugMatches {
							println("long match (after short)")
						}
						break
					}
				}

				// Check prev long...
				 = .prev - .cur
				if - < .maxMatchOff &&  == load6432(, ) {
					// Found a long match, at least 8 bytes.
					 := .matchlen(+8+, +8, ) + 8
					if  >  {
						 = 
						 += 
						 = 
						if debugMatches {
							println("prev long match (after short)")
						}
						break
					}
				}
				 = 
				if debugAsserts &&  <=  {
					panic(fmt.Sprintf("s (%d) <= t (%d)", , ))
				}
				if debugAsserts && - > .maxMatchOff {
					panic("s - t >e.maxMatchOff")
				}
				if debugAsserts &&  < 0 {
					panic("t<0")
				}
				if debugMatches {
					println("short match")
				}
				break
			}

			// No match found, move forward in input.
			 +=  + (( - ) >> ( - 1))
			if  >=  {
				break 
			}
			 = load6432(, )
		}

		// Try to find a better match by searching for a long match at the end of the current best match
		if + <  {
			// Allow some bytes at the beginning to mismatch.
			// Sweet spot is around 3 bytes, but depends on input.
			// The skipped bytes are tested in Extend backwards,
			// and still picked up as part of the match if they do.
			const  = 3

			 := hashLen(load6432(, +), betterLongTableBits, betterLongLen)
			 :=  + 
			 := load3232(, )
			 := .longTable[]
			 := .offset - .cur -  + 
			if  >= 0 &&  <  && - < .maxMatchOff &&  == load3232(, ) {
				// Found a long match, at least 4 bytes.
				 := .matchlen(+4, +4, ) + 4
				if  >  {
					 = 
					 = 
					 = 
					if debugMatches {
						println("long match at end-of-match")
					}
				}
			}

			// Check prev long...
			if true {
				 = .prev - .cur -  + 
				if  >= 0 &&  <  && - < .maxMatchOff &&  == load3232(, ) {
					// Found a long match, at least 4 bytes.
					 := .matchlen(+4, +4, ) + 4
					if  >  {
						 = 
						 = 
						 = 
						if debugMatches {
							println("prev long match at end-of-match")
						}
					}
				}
			}
		}
		// A match has been found. Update recent offsets.
		 = 
		 =  - 

		if debugAsserts &&  <=  {
			panic(fmt.Sprintf("s (%d) <= t (%d)", , ))
		}

		if debugAsserts &&  && int() > len() {
			panic("invalid offset")
		}

		// Extend the n-byte match as long as possible.
		 := 

		// Extend backwards
		 :=  - .maxMatchOff
		if  < 0 {
			 = 0
		}
		for  >  &&  >  && [-1] == [-1] &&  < maxMatchLength {
			--
			--
			++
		}

		// Write our sequence
		var  seq
		.litLen = uint32( - )
		.matchLen = uint32( - zstdMinMatch)
		if .litLen > 0 {
			.literals = append(.literals, [:]...)
		}
		.offset = uint32(-) + 3
		 += 
		if debugSequences {
			println("sequence", , "next s:", )
		}
		.sequences = append(.sequences, )
		 = 
		if  >=  {
			break 
		}

		// Index match start+1 (long) -> s - 1
		 :=  + .cur
		for  < -1 {
			 := load6432(, )
			 :=  >> 8
			 := hashLen(, betterLongTableBits, betterLongLen)
			.longTable[] = prevEntry{offset: , prev: .longTable[].offset}
			.table[hashLen(, betterShortTableBits, betterShortLen)] = tableEntry{offset:  + 1, val: uint32()}
			 += 2
			 += 2
		}

		 = load6432(, )
		if ! {
			continue
		}

		// Check offset 2
		for {
			 :=  - 
			if load3232(, ) != uint32() {
				// Do regular search
				break
			}

			// Store this, since we have it.
			 := hashLen(, betterLongTableBits, betterLongLen)
			 := hashLen(, betterShortTableBits, betterShortLen)

			// We have at least 4 byte match.
			// No need to check backwards. We come straight from a match
			 := 4 + .matchlen(+4, +4, )

			.longTable[] = prevEntry{offset:  + .cur, prev: .longTable[].offset}
			.table[] = tableEntry{offset:  + .cur, val: uint32()}
			.matchLen = uint32() - zstdMinMatch
			.litLen = 0

			// Since litlen is always 0, this is offset 1.
			.offset = 1
			 += 
			 = 
			if debugSequences {
				println("sequence", , "next s:", )
			}
			.sequences = append(.sequences, )

			// Swap offset 1 and 2.
			,  = , 
			if  >=  {
				// Finished
				break 
			}
			 = load6432(, )
		}
	}

	if int() < len() {
		.literals = append(.literals, [:]...)
		.extraLits = len() - int()
	}
	.recentOffsets[0] = uint32()
	.recentOffsets[1] = uint32()
	if debugEncoder {
		println("returning, recent offsets:", .recentOffsets, "extra literals:", .extraLits)
	}
}

// EncodeNoHist will encode a block with no history and no following blocks.
// Most notable difference is that src will not be copied for history and
// we do not need to check for max match length.
func ( *betterFastEncoder) ( *blockEnc,  []byte) {
	.ensureHist(len())
	.Encode(, )
}

// Encode improves compression...
func ( *betterFastEncoderDict) ( *blockEnc,  []byte) {
	const (
		// Input margin is the number of bytes we read (8)
		// and the maximum we will read ahead (2)
		            = 8 + 2
		 = 16
	)

	// Protect against e.cur wraparound.
	for .cur >= .bufferReset-int32(len(.hist)) {
		if len(.hist) == 0 {
			for  := range .table[:] {
				.table[] = tableEntry{}
			}
			for  := range .longTable[:] {
				.longTable[] = prevEntry{}
			}
			.cur = .maxMatchOff
			.allDirty = true
			break
		}
		// Shift down everything in the table that isn't already too far away.
		 := .cur + int32(len(.hist)) - .maxMatchOff
		for  := range .table[:] {
			 := .table[].offset
			if  <  {
				 = 0
			} else {
				 =  - .cur + .maxMatchOff
			}
			.table[].offset = 
		}
		for  := range .longTable[:] {
			 := .longTable[].offset
			 := .longTable[].prev
			if  <  {
				 = 0
				 = 0
			} else {
				 =  - .cur + .maxMatchOff
				if  <  {
					 = 0
				} else {
					 =  - .cur + .maxMatchOff
				}
			}
			.longTable[] = prevEntry{
				offset: ,
				prev:   ,
			}
		}
		.allDirty = true
		.cur = .maxMatchOff
		break
	}

	 := .addBlock()
	.size = len()
	if len() <  {
		.extraLits = len()
		.literals = .literals[:len()]
		copy(.literals, )
		return
	}

	// Override src
	 = .hist
	 := int32(len()) - 
	// stepSize is the number of bytes to skip on every main loop iteration.
	// It should be >= 1.
	const  = 1

	const  = 9

	// nextEmit is where in src the next emitLiteral should start from.
	 := 
	 := load6432(, )

	// Relative offsets
	 := int32(.recentOffsets[0])
	 := int32(.recentOffsets[1])

	 := func( *seq,  int32) {
		if  ==  {
			return
		}
		.literals = append(.literals, [:]...)
		.litLen = uint32( - )
	}
	if debugEncoder {
		println("recent offsets:", .recentOffsets)
	}

:
	for {
		var  int32
		// We allow the encoder to optionally turn off repeat offsets across blocks
		 := len(.sequences) > 2
		var ,  int32

		for {
			if debugAsserts &&  &&  == 0 {
				panic("offset0 was 0")
			}

			 := hashLen(, betterLongTableBits, betterLongLen)
			 := hashLen(, betterShortTableBits, betterShortLen)
			 := .longTable[]
			 := .table[]

			const  = 1
			 :=  -  + 
			 :=  + .cur
			.longTable[] = prevEntry{offset: , prev: .offset}
			.markLongShardDirty()
			.table[] = tableEntry{offset: , val: uint32()}
			.markShortShardDirty()
			 =  + 1

			if  {
				if  >= 0 && load3232(, ) == uint32(>>(*8)) {
					// Consider history as well.
					var  seq
					 := 4 + .matchlen(+4+, +4, )

					.matchLen = uint32( - zstdMinMatch)

					// We might be able to match backwards.
					// Extend as long as we can.
					 :=  + 
					// We end the search early, so we don't risk 0 literals
					// and have to do special offset treatment.
					 :=  + 1

					 :=  - .maxMatchOff
					if  < 0 {
						 = 0
					}
					for  >  &&  >  && [-1] == [-1] && .matchLen < maxMatchLength-zstdMinMatch-1 {
						--
						--
						.matchLen++
					}
					(&, )

					// rep 0
					.offset = 1
					if debugSequences {
						println("repeat sequence", , "next s:", )
					}
					.sequences = append(.sequences, )

					// Index match start+1 (long) -> s - 1
					 +=  + 

					 = 
					if  >=  {
						if debugEncoder {
							println("repeat ended", , )

						}
						break 
					}
					// Index skipped...
					for  < -1 {
						 := load6432(, )
						 :=  >> 8
						 := hashLen(, betterLongTableBits, betterLongLen)
						 :=  + .cur
						.longTable[] = prevEntry{offset: , prev: .longTable[].offset}
						.markLongShardDirty()
						 := hashLen(, betterShortTableBits, betterShortLen)
						.table[] = tableEntry{offset:  + 1, val: uint32()}
						.markShortShardDirty()
						 += 2
					}
					 = load6432(, )
					continue
				}
				const  = 1

				// We deviate from the reference encoder and also check offset 2.
				// Still slower and not much better, so disabled.
				// repIndex = s - offset2 + repOff2
				if false &&  >= 0 && load6432(, ) == load6432(, +) {
					// Consider history as well.
					var  seq
					 := 8 + .matchlen(+8+, +8, )

					.matchLen = uint32( - zstdMinMatch)

					// We might be able to match backwards.
					// Extend as long as we can.
					 :=  + 
					// We end the search early, so we don't risk 0 literals
					// and have to do special offset treatment.
					 :=  + 1

					 :=  - .maxMatchOff
					if  < 0 {
						 = 0
					}
					for  >  &&  >  && [-1] == [-1] && .matchLen < maxMatchLength-zstdMinMatch-1 {
						--
						--
						.matchLen++
					}
					(&, )

					// rep 2
					.offset = 2
					if debugSequences {
						println("repeat sequence 2", , "next s:", )
					}
					.sequences = append(.sequences, )

					 +=  + 
					 = 
					if  >=  {
						if debugEncoder {
							println("repeat ended", , )

						}
						break 
					}

					// Index skipped...
					for  < -1 {
						 := load6432(, )
						 :=  >> 8
						 := hashLen(, betterLongTableBits, betterLongLen)
						 :=  + .cur
						.longTable[] = prevEntry{offset: , prev: .longTable[].offset}
						.markLongShardDirty()
						 := hashLen(, betterShortTableBits, betterShortLen)
						.table[] = tableEntry{offset:  + 1, val: uint32()}
						.markShortShardDirty()
						 += 2
					}
					 = load6432(, )
					// Swap offsets
					,  = , 
					continue
				}
			}
			// Find the offsets of our two matches.
			 := .offset - .cur
			 := .prev - .cur

			// Check if we have a long match.
			if - < .maxMatchOff &&  == load6432(, ) {
				// Found a long match, at least 8 bytes.
				 = .matchlen(+8, +8, ) + 8
				 = 
				if debugAsserts &&  <=  {
					panic(fmt.Sprintf("s (%d) <= t (%d)", , ))
				}
				if debugAsserts && - > .maxMatchOff {
					panic("s - t >e.maxMatchOff")
				}
				if debugMatches {
					println("long match")
				}

				if - < .maxMatchOff &&  == load6432(, ) {
					// Found a long match, at least 8 bytes.
					 := .matchlen(+8, +8, ) + 8
					if  >  {
						 = 
						 = 
					}
					if debugAsserts &&  <=  {
						panic(fmt.Sprintf("s (%d) <= t (%d)", , ))
					}
					if debugAsserts && - > .maxMatchOff {
						panic("s - t >e.maxMatchOff")
					}
					if debugMatches {
						println("long match")
					}
				}
				break
			}

			// Check if we have a long match on prev.
			if - < .maxMatchOff &&  == load6432(, ) {
				// Found a long match, at least 8 bytes.
				 = .matchlen(+8, +8, ) + 8
				 = 
				if debugAsserts &&  <=  {
					panic(fmt.Sprintf("s (%d) <= t (%d)", , ))
				}
				if debugAsserts && - > .maxMatchOff {
					panic("s - t >e.maxMatchOff")
				}
				if debugMatches {
					println("long match")
				}
				break
			}

			 := .offset - .cur

			// Check if we have a short match.
			if - < .maxMatchOff && uint32() == .val {
				// found a regular match
				 = .matchlen(+4, +4, ) + 4

				// See if we can find a long match at s+1
				const  = 1
				 := load6432(, +)
				 = hashLen(, betterLongTableBits, betterLongLen)
				 = .longTable[]
				 = .offset - .cur

				// We can store it, since we have at least a 4 byte match.
				.longTable[] = prevEntry{offset:  +  + .cur, prev: .offset}
				.markLongShardDirty()
				if - < .maxMatchOff &&  == load6432(, ) {
					// Found a long match, at least 8 bytes.
					 := .matchlen(+8+, +8, ) + 8
					if  >  {
						 = 
						 += 
						 = 
						if debugMatches {
							println("long match (after short)")
						}
						break
					}
				}

				// Check prev long...
				 = .prev - .cur
				if - < .maxMatchOff &&  == load6432(, ) {
					// Found a long match, at least 8 bytes.
					 := .matchlen(+8+, +8, ) + 8
					if  >  {
						 = 
						 += 
						 = 
						if debugMatches {
							println("prev long match (after short)")
						}
						break
					}
				}
				 = 
				if debugAsserts &&  <=  {
					panic(fmt.Sprintf("s (%d) <= t (%d)", , ))
				}
				if debugAsserts && - > .maxMatchOff {
					panic("s - t >e.maxMatchOff")
				}
				if debugAsserts &&  < 0 {
					panic("t<0")
				}
				if debugMatches {
					println("short match")
				}
				break
			}

			// No match found, move forward in input.
			 +=  + (( - ) >> ( - 1))
			if  >=  {
				break 
			}
			 = load6432(, )
		}
		// Try to find a better match by searching for a long match at the end of the current best match
		if + <  {
			 := hashLen(load6432(, +), betterLongTableBits, betterLongLen)
			 := load3232(, )
			 := .longTable[]
			 := .offset - .cur - 
			if  >= 0 &&  <  && - < .maxMatchOff &&  == load3232(, ) {
				// Found a long match, at least 4 bytes.
				 := .matchlen(+4, +4, ) + 4
				if  >  {
					 = 
					 = 
					if debugMatches {
						println("long match at end-of-match")
					}
				}
			}

			// Check prev long...
			if true {
				 = .prev - .cur - 
				if  >= 0 &&  <  && - < .maxMatchOff &&  == load3232(, ) {
					// Found a long match, at least 4 bytes.
					 := .matchlen(+4, +4, ) + 4
					if  >  {
						 = 
						 = 
						if debugMatches {
							println("prev long match at end-of-match")
						}
					}
				}
			}
		}
		// A match has been found. Update recent offsets.
		 = 
		 =  - 

		if debugAsserts &&  <=  {
			panic(fmt.Sprintf("s (%d) <= t (%d)", , ))
		}

		if debugAsserts &&  && int() > len() {
			panic("invalid offset")
		}

		// Extend the n-byte match as long as possible.
		 := 

		// Extend backwards
		 :=  - .maxMatchOff
		if  < 0 {
			 = 0
		}
		for  >  &&  >  && [-1] == [-1] &&  < maxMatchLength {
			--
			--
			++
		}

		// Write our sequence
		var  seq
		.litLen = uint32( - )
		.matchLen = uint32( - zstdMinMatch)
		if .litLen > 0 {
			.literals = append(.literals, [:]...)
		}
		.offset = uint32(-) + 3
		 += 
		if debugSequences {
			println("sequence", , "next s:", )
		}
		.sequences = append(.sequences, )
		 = 
		if  >=  {
			break 
		}

		// Index match start+1 (long) -> s - 1
		 :=  + .cur
		for  < -1 {
			 := load6432(, )
			 :=  >> 8
			 := hashLen(, betterLongTableBits, betterLongLen)
			.longTable[] = prevEntry{offset: , prev: .longTable[].offset}
			.markLongShardDirty()
			 := hashLen(, betterShortTableBits, betterShortLen)
			.table[] = tableEntry{offset:  + 1, val: uint32()}
			.markShortShardDirty()
			 += 2
			 += 2
		}

		 = load6432(, )
		if ! {
			continue
		}

		// Check offset 2
		for {
			 :=  - 
			if load3232(, ) != uint32() {
				// Do regular search
				break
			}

			// Store this, since we have it.
			 := hashLen(, betterLongTableBits, betterLongLen)
			 := hashLen(, betterShortTableBits, betterShortLen)

			// We have at least 4 byte match.
			// No need to check backwards. We come straight from a match
			 := 4 + .matchlen(+4, +4, )

			.longTable[] = prevEntry{offset:  + .cur, prev: .longTable[].offset}
			.markLongShardDirty()
			.table[] = tableEntry{offset:  + .cur, val: uint32()}
			.markShortShardDirty()
			.matchLen = uint32() - zstdMinMatch
			.litLen = 0

			// Since litlen is always 0, this is offset 1.
			.offset = 1
			 += 
			 = 
			if debugSequences {
				println("sequence", , "next s:", )
			}
			.sequences = append(.sequences, )

			// Swap offset 1 and 2.
			,  = , 
			if  >=  {
				// Finished
				break 
			}
			 = load6432(, )
		}
	}

	if int() < len() {
		.literals = append(.literals, [:]...)
		.extraLits = len() - int()
	}
	.recentOffsets[0] = uint32()
	.recentOffsets[1] = uint32()
	if debugEncoder {
		println("returning, recent offsets:", .recentOffsets, "extra literals:", .extraLits)
	}
}

// ResetDict will reset and set a dictionary if not nil
func ( *betterFastEncoder) ( *dict,  bool) {
	.resetBase(, )
	if  != nil {
		panic("betterFastEncoder: Reset with dict")
	}
}

// ResetDict will reset and set a dictionary if not nil
func ( *betterFastEncoderDict) ( *dict,  bool) {
	.resetBase(, )
	if  == nil {
		return
	}
	// Init or copy dict table
	if len(.dictTable) != len(.table) || .id != .lastDictID {
		if len(.dictTable) != len(.table) {
			.dictTable = make([]tableEntry, len(.table))
		}
		 := int32(len(.content)) - 8 + .maxMatchOff
		for  := .maxMatchOff;  < ;  += 4 {
			const  = betterShortTableBits

			 := load6432(.content, -.maxMatchOff)
			 := hashLen(, , betterShortLen)      // 0 -> 4
			 := hashLen(>>8, , betterShortLen)  // 1 -> 5
			 := hashLen(>>16, , betterShortLen) // 2 -> 6
			 := hashLen(>>24, , betterShortLen) // 3 -> 7
			.dictTable[] = tableEntry{
				val:    uint32(),
				offset: ,
			}
			.dictTable[] = tableEntry{
				val:    uint32( >> 8),
				offset:  + 1,
			}
			.dictTable[] = tableEntry{
				val:    uint32( >> 16),
				offset:  + 2,
			}
			.dictTable[] = tableEntry{
				val:    uint32( >> 24),
				offset:  + 3,
			}
		}
		.lastDictID = .id
		.allDirty = true
	}

	// Init or copy dict table
	if len(.dictLongTable) != len(.longTable) || .id != .lastDictID {
		if len(.dictLongTable) != len(.longTable) {
			.dictLongTable = make([]prevEntry, len(.longTable))
		}
		if len(.content) >= 8 {
			 := load6432(.content, 0)
			 := hashLen(, betterLongTableBits, betterLongLen)
			.dictLongTable[] = prevEntry{
				offset: .maxMatchOff,
				prev:   .dictLongTable[].offset,
			}

			 := int32(len(.content)) - 8 + .maxMatchOff
			 := 8 // First to read
			for  := .maxMatchOff + 1;  < ; ++ {
				 = >>8 | (uint64(.content[]) << 56)
				 := hashLen(, betterLongTableBits, betterLongLen)
				.dictLongTable[] = prevEntry{
					offset: ,
					prev:   .dictLongTable[].offset,
				}
				++
			}
		}
		.lastDictID = .id
		.allDirty = true
	}

	// Reset table to initial state
	{
		 := 0
		if !.allDirty {
			for  := range .shortTableShardDirty {
				if .shortTableShardDirty[] {
					++
				}
			}
		}
		const  = betterShortTableShardCnt
		const  = betterShortTableShardSize
		if .allDirty ||  > *4/6 {
			copy(.table[:], .dictTable)
			for  := range .shortTableShardDirty {
				.shortTableShardDirty[] = false
			}
		} else {
			for  := range .shortTableShardDirty {
				if !.shortTableShardDirty[] {
					continue
				}

				copy(.table[*:(+1)*], .dictTable[*:(+1)*])
				.shortTableShardDirty[] = false
			}
		}
	}
	{
		 := 0
		if !.allDirty {
			for  := range .shortTableShardDirty {
				if .shortTableShardDirty[] {
					++
				}
			}
		}
		const  = betterLongTableShardCnt
		const  = betterLongTableShardSize
		if .allDirty ||  > *4/6 {
			copy(.longTable[:], .dictLongTable)
			for  := range .longTableShardDirty {
				.longTableShardDirty[] = false
			}
		} else {
			for  := range .longTableShardDirty {
				if !.longTableShardDirty[] {
					continue
				}

				copy(.longTable[*:(+1)*], .dictLongTable[*:(+1)*])
				.longTableShardDirty[] = false
			}
		}
	}
	.cur = .maxMatchOff
	.allDirty = false
}

func ( *betterFastEncoderDict) ( uint32) {
	.longTableShardDirty[/betterLongTableShardSize] = true
}

func ( *betterFastEncoderDict) ( uint32) {
	.shortTableShardDirty[/betterShortTableShardSize] = true
}