// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Copyright (c) 2019 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package s2

import (
	
	
	
)

// hash4 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4( uint64,  uint8) uint32 {
	const  = 2654435761
	return (uint32() * ) >> ((32 - ) & 31)
}

// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash5( uint64,  uint8) uint32 {
	const  = 889523592379
	return uint32((( << (64 - 40)) * ) >> ((64 - ) & 63))
}

// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash7( uint64,  uint8) uint32 {
	const  = 58295818150454627
	return uint32((( << (64 - 56)) * ) >> ((64 - ) & 63))
}

// hash8 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash8( uint64,  uint8) uint32 {
	const  = 0xcf1bbcdcb7a56463
	return uint32(( * ) >> ((64 - ) & 63))
}

// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
//	len(dst) >= MaxEncodedLen(len(src)) &&
//	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetterGo(,  []byte) ( int) {
	// sLimit is when to stop looking for offset/length copies. The inputMargin
	// lets us use a fast path for emitLiteral in the main loop, while we are
	// looking for copies.
	 := len() - inputMargin
	if len() < minNonLiteralBlockSize {
		return 0
	}

	// Initialize the hash tables.
	const (
		// Long hash matches.
		    = 17
		 = 1 << 

		// Short hash matches.
		    = 14
		 = 1 << 
	)

	var  []uint32
	var  []uint32

	// Bail if we can't compress to at least this.
	 := len() - len()>>5 - 6

	// nextEmit is where in src the next emitLiteral should start from.
	 := 0

	// The encoded form must start with a literal, as there are no previous
	// bytes to copy, so we start looking for hash matches at s == 1.
	 := 1
	 := load64(, )

	// We initialize repeat to 0, so we never match on first attempt
	 := 0

	for {
		 := 0
		 := 0
		for {
			// Next src position to check
			 =  + (-)>>7 + 1
			if  >  {
				goto 
			}
			 := hash7(, )
			 := hash4(, )
			 = int([])
			 := int([])
			[] = uint32()
			[] = uint32()

			 := load64(, )
			 := load64(, )

			// If long matches at least 8 bytes, use that.
			if  ==  {
				break
			}
			if  ==  {
				 = 
				break
			}

			// Check repeat at offset checkRep.
			const  = 1
			// Minimum length of a repeat. Tested with various values.
			// While 4-5 offers improvements in some, 6 reduces
			// regressions significantly.
			const  = 6
			const  = ((1 << ( * 8)) - 1) << (8 * )
			if false &&  > 0 && & == load64(, -)& {
				 :=  + 
				// Extend back
				for  :=  - ;  >  &&  > 0 && [-1] == [-1]; {
					--
					--
				}
				 += emitLiteral([:], [:])

				// Extend forward
				 :=  -  +  + 
				 +=  + 
				for  < len() {
					if len()- < 8 {
						if [] == [] {
							++
							++
							continue
						}
						break
					}
					if  := load64(, ) ^ load64(, );  != 0 {
						 += bits.TrailingZeros64() >> 3
						break
					}
					 += 8
					 += 8
				}
				// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
				 += emitRepeat([:], , -)
				 = 
				if  >=  {
					goto 
				}
				// Index in-between
				 :=  + 1
				 :=  - 2

				for  <  {
					 := load64(, )
					 := load64(, )
					[hash7(, )] = uint32()
					[hash4(>>8, )] = uint32( + 1)

					[hash7(, )] = uint32()
					[hash4(>>8, )] = uint32( + 1)
					 += 2
					 -= 2
				}

				 = load64(, )
				continue
			}

			// Long likely matches 7, so take that.
			if uint32() == uint32() {
				break
			}

			// Check our short candidate
			if uint32() == uint32() {
				// Try a long candidate at s+1
				 = hash7(>>8, )
				 = int([])
				[] = uint32( + 1)
				if uint32(>>8) == load32(, ) {
					++
					break
				}
				// Use our short candidate.
				 = 
				break
			}

			 = load64(, )
			 = 
		}

		// Extend backwards
		for  > 0 &&  >  && [-1] == [-1] {
			--
			--
		}

		// Bail if we exceed the maximum size.
		if +(-) >  {
			return 0
		}

		 := 
		 :=  - 

		// Extend the 4-byte match as long as possible.
		 += 4
		 += 4
		for  < len() {
			if len()- < 8 {
				if [] == [] {
					++
					++
					continue
				}
				break
			}
			if  := load64(, ) ^ load64(, );  != 0 {
				 += bits.TrailingZeros64() >> 3
				break
			}
			 += 8
			 += 8
		}

		if  > 65535 && - <= 5 &&  !=  {
			// Bail if the match is equal or worse to the encoding.
			 =  + 1
			if  >=  {
				goto 
			}
			 = load64(, )
			continue
		}

		 += emitLiteral([:], [:])
		if  ==  {
			 += emitRepeat([:], , -)
		} else {
			 += emitCopy([:], , -)
			 = 
		}

		 = 
		if  >=  {
			goto 
		}

		if  >  {
			// Do we have space for more, if not bail.
			return 0
		}

		// Index short & long
		 :=  + 1
		 :=  - 2

		 := load64(, )
		 := load64(, )
		[hash7(, )] = uint32()
		[hash4(>>8, )] = uint32( + 1)

		// lTable could be postponed, but very minor difference.
		[hash7(, )] = uint32()
		[hash4(>>8, )] = uint32( + 1)
		 += 1
		 -= 1
		 = load64(, )

		// Index large values sparsely in between.
		// We do two starting from different offsets for speed.
		 := ( +  + 1) >> 1
		for  <  {
			[hash7(load64(, ), )] = uint32()
			[hash7(load64(, ), )] = uint32()
			 += 2
			 += 2
		}
	}

:
	if  < len() {
		// Bail if we exceed the maximum size.
		if +len()- >  {
			return 0
		}
		 += emitLiteral([:], [:])
	}
	return 
}

// encodeBlockBetterSnappyGo encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
//	len(dst) >= MaxEncodedLen(len(src)) &&
//	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetterSnappyGo(,  []byte) ( int) {
	// sLimit is when to stop looking for offset/length copies. The inputMargin
	// lets us use a fast path for emitLiteral in the main loop, while we are
	// looking for copies.
	 := len() - inputMargin
	if len() < minNonLiteralBlockSize {
		return 0
	}

	// Initialize the hash tables.
	const (
		// Long hash matches.
		    = 16
		 = 1 << 

		// Short hash matches.
		    = 14
		 = 1 << 
	)

	var  []uint32
	var  []uint32

	// Bail if we can't compress to at least this.
	 := len() - len()>>5 - 6

	// nextEmit is where in src the next emitLiteral should start from.
	 := 0

	// The encoded form must start with a literal, as there are no previous
	// bytes to copy, so we start looking for hash matches at s == 1.
	 := 1
	 := load64(, )

	// We initialize repeat to 0, so we never match on first attempt
	 := 0
	const  = 100

	for {
		 := 0
		 := 0
		for {
			// Next src position to check
			 = min(+(-)>>7+1, +)

			if  >  {
				goto 
			}
			 := hash7(, )
			 := hash4(, )
			 = int([])
			 := int([])
			[] = uint32()
			[] = uint32()

			if uint32() == load32(, ) {
				break
			}

			// Check our short candidate
			if uint32() == load32(, ) {
				// Try a long candidate at s+1
				 = hash7(>>8, )
				 = int([])
				[] = uint32( + 1)
				if uint32(>>8) == load32(, ) {
					++
					break
				}
				// Use our short candidate.
				 = 
				break
			}

			 = load64(, )
			 = 
		}

		// Extend backwards
		for  > 0 &&  >  && [-1] == [-1] {
			--
			--
		}

		// Bail if we exceed the maximum size.
		if +(-) >  {
			return 0
		}

		 := 
		 :=  - 

		// Extend the 4-byte match as long as possible.
		 += 4
		 += 4
		for  < len() {
			if len()- < 8 {
				if [] == [] {
					++
					++
					continue
				}
				break
			}
			if  := load64(, ) ^ load64(, );  != 0 {
				 += bits.TrailingZeros64() >> 3
				break
			}
			 += 8
			 += 8
		}

		if  > 65535 && - <= 5 &&  !=  {
			// Bail if the match is equal or worse to the encoding.
			 =  + 1
			if  >=  {
				goto 
			}
			 = load64(, )
			continue
		}

		 += emitLiteral([:], [:])
		 += emitCopyNoRepeat([:], , -)
		 = 

		 = 
		if  >=  {
			goto 
		}

		if  >  {
			// Do we have space for more, if not bail.
			return 0
		}

		// Index short & long
		 :=  + 1
		 :=  - 2

		 := load64(, )
		 := load64(, )
		[hash7(, )] = uint32()
		[hash4(>>8, )] = uint32( + 1)

		[hash7(, )] = uint32()
		[hash4(>>8, )] = uint32( + 1)
		 += 1
		 -= 1
		 = load64(, )

		// Index large values sparsely in between.
		// We do two starting from different offsets for speed.
		 := ( +  + 1) >> 1
		for  <  {
			[hash7(load64(, ), )] = uint32()
			[hash7(load64(, ), )] = uint32()
			 += 2
			 += 2
		}
	}

:
	if  < len() {
		// Bail if we exceed the maximum size.
		if +len()- >  {
			return 0
		}
		 += emitLiteral([:], [:])
	}
	return 
}

func encodeBlockBetterGo64K(,  []byte) ( int) {
	// sLimit is when to stop looking for offset/length copies. The inputMargin
	// lets us use a fast path for emitLiteral in the main loop, while we are
	// looking for copies.
	 := len() - inputMargin
	if len() < minNonLiteralBlockSize {
		return 0
	}
	// Initialize the hash tables.
	// Use smaller tables for smaller blocks
	const (
		// Long hash matches.
		    = 16
		 = 1 << 

		// Short hash matches.
		    = 13
		 = 1 << 
	)

	var  []uint16
	var  []uint16

	// Bail if we can't compress to at least this.
	 := len() - len()>>5 - 6

	// nextEmit is where in src the next emitLiteral should start from.
	 := 0

	// The encoded form must start with a literal, as there are no previous
	// bytes to copy, so we start looking for hash matches at s == 1.
	 := 1
	 := load64(, )

	// We initialize repeat to 0, so we never match on first attempt
	 := 0

	for {
		 := 0
		 := 0
		for {
			// Next src position to check
			 =  + (-)>>6 + 1
			if  >  {
				goto 
			}
			 := hash7(, )
			 := hash4(, )
			 = int([])
			 := int([])
			[] = uint16()
			[] = uint16()

			 := load64(, )
			 := load64(, )

			// If long matches at least 8 bytes, use that.
			if  ==  {
				break
			}
			if  ==  {
				 = 
				break
			}

			// Check repeat at offset checkRep.
			const  = 1
			// Minimum length of a repeat. Tested with various values.
			// While 4-5 offers improvements in some, 6 reduces
			// regressions significantly.
			const  = 6
			const  = ((1 << ( * 8)) - 1) << (8 * )
			if false &&  > 0 && & == load64(, -)& {
				 :=  + 
				// Extend back
				for  :=  - ;  >  &&  > 0 && [-1] == [-1]; {
					--
					--
				}
				 += emitLiteral([:], [:])

				// Extend forward
				 :=  -  +  + 
				 +=  + 
				for  < len() {
					if len()- < 8 {
						if [] == [] {
							++
							++
							continue
						}
						break
					}
					if  := load64(, ) ^ load64(, );  != 0 {
						 += bits.TrailingZeros64() >> 3
						break
					}
					 += 8
					 += 8
				}
				// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
				 += emitRepeat([:], , -)
				 = 
				if  >=  {
					goto 
				}
				// Index in-between
				 :=  + 1
				 :=  - 2

				for  <  {
					 := load64(, )
					 := load64(, )
					[hash7(, )] = uint16()
					[hash4(>>8, )] = uint16( + 1)

					[hash7(, )] = uint16()
					[hash4(>>8, )] = uint16( + 1)
					 += 2
					 -= 2
				}

				 = load64(, )
				continue
			}

			// Long likely matches 7, so take that.
			if uint32() == uint32() {
				break
			}

			// Check our short candidate
			if uint32() == uint32() {
				// Try a long candidate at s+1
				 = hash7(>>8, )
				 = int([])
				[] = uint16( + 1)
				if uint32(>>8) == load32(, ) {
					++
					break
				}
				// Use our short candidate.
				 = 
				break
			}

			 = load64(, )
			 = 
		}

		// Extend backwards
		for  > 0 &&  >  && [-1] == [-1] {
			--
			--
		}

		// Bail if we exceed the maximum size.
		if +(-) >  {
			return 0
		}

		 := 
		 :=  - 

		// Extend the 4-byte match as long as possible.
		 += 4
		 += 4
		for  < len() {
			if len()- < 8 {
				if [] == [] {
					++
					++
					continue
				}
				break
			}
			if  := load64(, ) ^ load64(, );  != 0 {
				 += bits.TrailingZeros64() >> 3
				break
			}
			 += 8
			 += 8
		}

		 += emitLiteral([:], [:])
		if  ==  {
			 += emitRepeat([:], , -)
		} else {
			 += emitCopy([:], , -)
			 = 
		}

		 = 
		if  >=  {
			goto 
		}

		if  >  {
			// Do we have space for more, if not bail.
			return 0
		}

		// Index short & long
		 :=  + 1
		 :=  - 2

		 := load64(, )
		 := load64(, )
		[hash7(, )] = uint16()
		[hash4(>>8, )] = uint16( + 1)

		// lTable could be postponed, but very minor difference.
		[hash7(, )] = uint16()
		[hash4(>>8, )] = uint16( + 1)
		 += 1
		 -= 1
		 = load64(, )

		// Index large values sparsely in between.
		// We do two starting from different offsets for speed.
		 := ( +  + 1) >> 1
		for  <  {
			[hash7(load64(, ), )] = uint16()
			[hash7(load64(, ), )] = uint16()
			 += 2
			 += 2
		}
	}

:
	if  < len() {
		// Bail if we exceed the maximum size.
		if +len()- >  {
			return 0
		}
		 += emitLiteral([:], [:])
	}
	return 
}

// encodeBlockBetterSnappyGo encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
//	len(dst) >= MaxEncodedLen(len(src)) &&
//	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetterSnappyGo64K(,  []byte) ( int) {
	// sLimit is when to stop looking for offset/length copies. The inputMargin
	// lets us use a fast path for emitLiteral in the main loop, while we are
	// looking for copies.
	 := len() - inputMargin
	if len() < minNonLiteralBlockSize {
		return 0
	}

	// Initialize the hash tables.
	// Use smaller tables for smaller blocks
	const (
		// Long hash matches.
		    = 15
		 = 1 << 

		// Short hash matches.
		    = 13
		 = 1 << 
	)

	var  []uint16
	var  []uint16

	// Bail if we can't compress to at least this.
	 := len() - len()>>5 - 6

	// nextEmit is where in src the next emitLiteral should start from.
	 := 0

	// The encoded form must start with a literal, as there are no previous
	// bytes to copy, so we start looking for hash matches at s == 1.
	 := 1
	 := load64(, )

	const  = 100

	for {
		 := 0
		 := 0
		for {
			// Next src position to check
			 = min(+(-)>>6+1, +)

			if  >  {
				goto 
			}
			 := hash7(, )
			 := hash4(, )
			 = int([])
			 := int([])
			[] = uint16()
			[] = uint16()

			if uint32() == load32(, ) {
				break
			}

			// Check our short candidate
			if uint32() == load32(, ) {
				// Try a long candidate at s+1
				 = hash7(>>8, )
				 = int([])
				[] = uint16( + 1)
				if uint32(>>8) == load32(, ) {
					++
					break
				}
				// Use our short candidate.
				 = 
				break
			}

			 = load64(, )
			 = 
		}

		// Extend backwards
		for  > 0 &&  >  && [-1] == [-1] {
			--
			--
		}

		// Bail if we exceed the maximum size.
		if +(-) >  {
			return 0
		}

		 := 
		 :=  - 

		// Extend the 4-byte match as long as possible.
		 += 4
		 += 4
		for  < len() {
			if len()- < 8 {
				if [] == [] {
					++
					++
					continue
				}
				break
			}
			if  := load64(, ) ^ load64(, );  != 0 {
				 += bits.TrailingZeros64() >> 3
				break
			}
			 += 8
			 += 8
		}

		 += emitLiteral([:], [:])
		 += emitCopyNoRepeat([:], , -)

		 = 
		if  >=  {
			goto 
		}

		if  >  {
			// Do we have space for more, if not bail.
			return 0
		}

		// Index short & long
		 :=  + 1
		 :=  - 2

		 := load64(, )
		 := load64(, )
		[hash7(, )] = uint16()
		[hash4(>>8, )] = uint16( + 1)

		[hash7(, )] = uint16()
		[hash4(>>8, )] = uint16( + 1)
		 += 1
		 -= 1
		 = load64(, )

		// Index large values sparsely in between.
		// We do two starting from different offsets for speed.
		 := ( +  + 1) >> 1
		for  <  {
			[hash7(load64(, ), )] = uint16()
			[hash7(load64(, ), )] = uint16()
			 += 2
			 += 2
		}
	}

:
	if  < len() {
		// Bail if we exceed the maximum size.
		if +len()- >  {
			return 0
		}
		 += emitLiteral([:], [:])
	}
	return 
}

// encodeBlockBetterDict encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
//	len(dst) >= MaxEncodedLen(len(src)) &&
//	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetterDict(,  []byte,  *Dict) ( int) {
	// sLimit is when to stop looking for offset/length copies. The inputMargin
	// lets us use a fast path for emitLiteral in the main loop, while we are
	// looking for copies.
	// Initialize the hash tables.
	const (
		// Long hash matches.
		    = 17
		 = 1 << 

		// Short hash matches.
		    = 14
		 = 1 << 

		 = 8 // maximum bytes ahead without checking sLimit

		 = false
	)

	 := len() - inputMargin
	if  > MaxDictSrcOffset- {
		 = MaxDictSrcOffset - 
	}
	if len() < minNonLiteralBlockSize {
		return 0
	}

	.initBetter()

	var  []uint32
	var  []uint32

	// Bail if we can't compress to at least this.
	 := len() - len()>>5 - 6

	// nextEmit is where in src the next emitLiteral should start from.
	 := 0

	// The encoded form must start with a literal, as there are no previous
	// bytes to copy, so we start looking for hash matches at s == 1.
	 := 0
	 := load64(, )

	// We initialize repeat to 0, so we never match on first attempt
	 := len(.dict) - .repeat

	// While in dict
:
	for {
		 := 0
		 := 0
		for {
			// Next src position to check
			 =  + (-)>>7 + 1
			if  >  {
				break 
			}
			 := hash7(, )
			 := hash4(, )
			 = int([])
			 := int([])
			 := int(.betterTableLong[])
			 := int(.betterTableShort[])
			[] = uint32()
			[] = uint32()

			 := load64(, )
			 := load64(, )

			// If long matches at least 8 bytes, use that.
			if  != 0 {
				if  ==  {
					goto 
				}
				if  ==  {
					 = 
					goto 
				}
			}

			// Check dict repeat.
			if  >= +4 {
				 := len(.dict) -  + 
				if  > 0 && uint32() == load32(.dict, ) {
					// Extend back
					 := 
					for  := ;  >  &&  > 0 && .dict[-1] == [-1]; {
						--
						--
					}
					 += emitLiteral([:], [:])
					if  &&  !=  {
						fmt.Println("emitted ", -, "literals")
					}
					 += 4
					 += 4
					for  < len(.dict)-8 &&  <= len()-8 {
						if  := load64(, ) ^ load64(.dict, );  != 0 {
							 += bits.TrailingZeros64() >> 3
							break
						}
						 += 8
						 += 8
					}
					 += emitRepeat([:], , -)
					if  {
						fmt.Println("emitted dict repeat length", -, "offset:", , "s:", )
					}
					 = 
					if  >=  {
						break 
					}
					// Index in-between
					 :=  + 1
					 :=  - 2

					 = load64(, )
					for  <  {
						 := load64(, )
						 := load64(, )
						[hash7(, )] = uint32()
						[hash4(>>8, )] = uint32( + 1)

						[hash7(, )] = uint32()
						[hash4(>>8, )] = uint32( + 1)
						 += 2
						 -= 2
					}
					continue
				}
			}
			// Don't try to find match at s==0
			if  == 0 {
				 = load64(, )
				 = 
				continue
			}

			// Long likely matches 7, so take that.
			if uint32() == uint32() {
				goto 
			}

			// Long dict...
			if uint32() == load32(.dict, ) {
				 = 
				goto 
			}

			// Check our short candidate
			if uint32() == uint32() {
				// Try a long candidate at s+1
				 = hash7(>>8, )
				 = int([])
				[] = uint32( + 1)
				if uint32(>>8) == load32(, ) {
					++
					goto 
				}
				// Use our short candidate.
				 = 
				goto 
			}
			if uint32() == load32(.dict, ) {
				// Try a long candidate at s+1
				 = hash7(>>8, )
				 = int([])
				[] = uint32( + 1)
				if uint32(>>8) == load32(, ) {
					++
					goto 
				}
				 = 
				goto 
			}
			 = load64(, )
			 = 
		}
	:
		{
			if  {
				if load32(.dict, ) != load32(, ) {
					panic("dict emit mismatch")
				}
			}
			// Extend backwards.
			// The top bytes will be rechecked to get the full match.
			for  > 0 &&  >  && .dict[-1] == [-1] {
				--
				--
			}

			// Bail if we exceed the maximum size.
			if +(-) >  {
				return 0
			}

			// A 4-byte match has been found. We'll later see if more than 4 bytes
			// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
			// them as literal bytes.

			 += emitLiteral([:], [:])
			if  &&  !=  {
				fmt.Println("emitted ", -, "literals")
			}
			{
				// Invariant: we have a 4-byte match at s, and no need to emit any
				// literal bytes prior to s.
				 := 
				 :=  + (len(.dict)) - 

				// Extend the 4-byte match as long as possible.
				 += 4
				 += 4
				for  <= len()-8 && len(.dict)- >= 8 {
					if  := load64(, ) ^ load64(.dict, );  != 0 {
						 += bits.TrailingZeros64() >> 3
						break
					}
					 += 8
					 += 8
				}

				if  ==  {
					if  {
						fmt.Println("emitted dict repeat, length", -, "offset:", , "s:", , "dict offset:", )
					}
					 += emitRepeat([:], , -)
				} else {
					if  {
						fmt.Println("emitted dict copy, length", -, "offset:", , "s:", , "dict offset:", )
					}
					// Matches longer than 64 are split.
					if  <=  || - < 8 {
						 += emitCopy([:], , -)
					} else {
						// Split to ensure we don't start a copy within next block.
						 += emitCopy([:], , 4)
						 += emitRepeat([:], , --4)
					}
					 = 
				}
				if false {
					// Validate match.
					if  <=  {
						panic("s <= candidate")
					}
					 := [:]
					 := .dict[- : -+(-)]
					if !bytes.Equal(, ) {
						panic("mismatch")
					}
				}

				 = 
				if  >=  {
					break 
				}

				if  >  {
					// Do we have space for more, if not bail.
					return 0
				}

				// Index short & long
				 :=  + 1
				 :=  - 2

				 := load64(, )
				 := load64(, )
				[hash7(, )] = uint32()
				[hash4(>>8, )] = uint32( + 1)

				[hash7(, )] = uint32()
				[hash4(>>8, )] = uint32( + 1)
				 += 1
				 -= 1
				 = load64(, )

				// index every second long in between.
				for  <  {
					[hash7(load64(, ), )] = uint32()
					[hash7(load64(, ), )] = uint32()
					 += 2
					 -= 2
				}
			}
			continue
		}
	:

		// Extend backwards
		for  > 0 &&  >  && [-1] == [-1] {
			--
			--
		}

		// Bail if we exceed the maximum size.
		if +(-) >  {
			return 0
		}

		 := 
		 :=  - 

		// Extend the 4-byte match as long as possible.
		 += 4
		 += 4
		for  < len() {
			if len()- < 8 {
				if [] == [] {
					++
					++
					continue
				}
				break
			}
			if  := load64(, ) ^ load64(, );  != 0 {
				 += bits.TrailingZeros64() >> 3
				break
			}
			 += 8
			 += 8
		}

		if  > 65535 && - <= 5 &&  !=  {
			// Bail if the match is equal or worse to the encoding.
			 =  + 1
			if  >=  {
				goto 
			}
			 = load64(, )
			continue
		}

		 += emitLiteral([:], [:])
		if  &&  !=  {
			fmt.Println("emitted ", -, "literals")
		}
		if  ==  {
			if  {
				fmt.Println("emitted match repeat, length", -, "offset:", , "s:", )
			}
			 += emitRepeat([:], , -)
		} else {
			if  {
				fmt.Println("emitted match copy, length", -, "offset:", , "s:", )
			}
			 += emitCopy([:], , -)
			 = 
		}

		 = 
		if  >=  {
			goto 
		}

		if  >  {
			// Do we have space for more, if not bail.
			return 0
		}

		// Index short & long
		 :=  + 1
		 :=  - 2

		 := load64(, )
		 := load64(, )
		[hash7(, )] = uint32()
		[hash4(>>8, )] = uint32( + 1)

		[hash7(, )] = uint32()
		[hash4(>>8, )] = uint32( + 1)
		 += 1
		 -= 1
		 = load64(, )

		// Index large values sparsely in between.
		// We do two starting from different offsets for speed.
		 := ( +  + 1) >> 1
		for  <  {
			[hash7(load64(, ), )] = uint32()
			[hash7(load64(, ), )] = uint32()
			 += 2
			 += 2
		}
	}

	// Search without dict:
	if  >  {
		 = 0
	}

	// No more dict
	 = len() - inputMargin
	if  >=  {
		goto 
	}
	 = load64(, )
	if  {
		fmt.Println("now", , "->", , "out:", , "left:", len()-, "nextemit:", , "dstLimit:", , "s:", )
	}
	for {
		 := 0
		 := 0
		for {
			// Next src position to check
			 =  + (-)>>7 + 1
			if  >  {
				goto 
			}
			 := hash7(, )
			 := hash4(, )
			 = int([])
			 := int([])
			[] = uint32()
			[] = uint32()

			 := load64(, )
			 := load64(, )

			// If long matches at least 8 bytes, use that.
			if  ==  {
				break
			}
			if  ==  {
				 = 
				break
			}

			// Check repeat at offset checkRep.
			const  = 1
			// Minimum length of a repeat. Tested with various values.
			// While 4-5 offers improvements in some, 6 reduces
			// regressions significantly.
			const  = 6
			const  = ((1 << ( * 8)) - 1) << (8 * )
			if false &&  > 0 && & == load64(, -)& {
				 :=  + 
				// Extend back
				for  :=  - ;  >  &&  > 0 && [-1] == [-1]; {
					--
					--
				}
				 += emitLiteral([:], [:])

				// Extend forward
				 :=  -  +  + 
				 +=  + 
				for  < len() {
					if len()- < 8 {
						if [] == [] {
							++
							++
							continue
						}
						break
					}
					if  := load64(, ) ^ load64(, );  != 0 {
						 += bits.TrailingZeros64() >> 3
						break
					}
					 += 8
					 += 8
				}
				// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
				 += emitRepeat([:], , -)
				 = 
				if  >=  {
					goto 
				}
				// Index in-between
				 :=  + 1
				 :=  - 2

				for  <  {
					 := load64(, )
					 := load64(, )
					[hash7(, )] = uint32()
					[hash4(>>8, )] = uint32( + 1)

					[hash7(, )] = uint32()
					[hash4(>>8, )] = uint32( + 1)
					 += 2
					 -= 2
				}

				 = load64(, )
				continue
			}

			// Long likely matches 7, so take that.
			if uint32() == uint32() {
				break
			}

			// Check our short candidate
			if uint32() == uint32() {
				// Try a long candidate at s+1
				 = hash7(>>8, )
				 = int([])
				[] = uint32( + 1)
				if uint32(>>8) == load32(, ) {
					++
					break
				}
				// Use our short candidate.
				 = 
				break
			}

			 = load64(, )
			 = 
		}

		// Extend backwards
		for  > 0 &&  >  && [-1] == [-1] {
			--
			--
		}

		// Bail if we exceed the maximum size.
		if +(-) >  {
			return 0
		}

		 := 
		 :=  - 

		// Extend the 4-byte match as long as possible.
		 += 4
		 += 4
		for  < len() {
			if len()- < 8 {
				if [] == [] {
					++
					++
					continue
				}
				break
			}
			if  := load64(, ) ^ load64(, );  != 0 {
				 += bits.TrailingZeros64() >> 3
				break
			}
			 += 8
			 += 8
		}

		if  > 65535 && - <= 5 &&  !=  {
			// Bail if the match is equal or worse to the encoding.
			 =  + 1
			if  >=  {
				goto 
			}
			 = load64(, )
			continue
		}

		 += emitLiteral([:], [:])
		if  ==  {
			 += emitRepeat([:], , -)
		} else {
			 += emitCopy([:], , -)
			 = 
		}

		 = 
		if  >=  {
			goto 
		}

		if  >  {
			// Do we have space for more, if not bail.
			return 0
		}

		// Index short & long
		 :=  + 1
		 :=  - 2

		 := load64(, )
		 := load64(, )
		[hash7(, )] = uint32()
		[hash4(>>8, )] = uint32( + 1)

		[hash7(, )] = uint32()
		[hash4(>>8, )] = uint32( + 1)
		 += 1
		 -= 1
		 = load64(, )

		// Index large values sparsely in between.
		// We do two starting from different offsets for speed.
		 := ( +  + 1) >> 1
		for  <  {
			[hash7(load64(, ), )] = uint32()
			[hash7(load64(, ), )] = uint32()
			 += 2
			 += 2
		}
	}

:
	if  < len() {
		// Bail if we exceed the maximum size.
		if +len()- >  {
			return 0
		}
		 += emitLiteral([:], [:])
	}
	return 
}