package xxh3

import (
	
)

// Hash128 returns the 128-bit hash of the byte slice.
func ( []byte) Uint128 {
	return hashAny128(*(*str)(ptr(&)))
}

// HashString128 returns the 128-bit hash of the string slice.
func ( string) Uint128 {
	return hashAny128(*(*str)(ptr(&)))
}

func hashAny128( str) ( u128) {
	,  := .p, .l

	switch {
	case  <= 16:
		switch {
		case  > 8: // 9-16
			const  = key64_032 ^ key64_040
			const  = key64_048 ^ key64_056

			 := readU64(, 0)
			 := readU64(, ui()-8)

			,  := bits.Mul64(^^, prime64_1)

			 += uint64(-1) << 54
			 ^= 

			 +=  + uint64(uint32())*(prime32_2-1)

			 ^= bits.ReverseBytes64()

			.Hi, .Lo = bits.Mul64(, prime64_2)
			.Hi +=  * prime64_2

			.Lo = xxh3Avalanche(.Lo)
			.Hi = xxh3Avalanche(.Hi)

			return 

		case  > 3: // 4-8
			const  = key64_016 ^ key64_024

			 := readU32(, 0)
			 := readU32(, ui()-4)
			 := u64() + u64()<<32
			 :=  ^ 

			.Hi, .Lo = bits.Mul64(, prime64_1+(uint64()<<2))

			.Hi += .Lo << 1
			.Lo ^= .Hi >> 3

			.Lo ^= .Lo >> 35
			.Lo *= 0x9fb21c651e98df25
			.Lo ^= .Lo >> 28
			.Hi = xxh3Avalanche(.Hi)

			return 

		case  == 3: // 3
			 := u64(readU16(, 0))
			 := u64(readU8(, 2))
			.Lo = <<16 +  + 3<<8

		case  > 1: // 2
			 := u64(readU16(, 0))
			.Lo = *(1<<24+1)>>8 + 2<<8

		case  == 1: // 1
			 := u64(readU8(, 0))
			.Lo = *(1<<24+1<<16+1) + 1<<8

		default: // 0
			return u128{0x99aa06d3014798d8, 0x6001c324468d497f}
		}

		.Hi = uint64(bits.RotateLeft32(bits.ReverseBytes32(uint32(.Lo)), 13))
		.Lo ^= uint64(key32_000 ^ key32_004)
		.Hi ^= uint64(key32_008 ^ key32_012)

		.Lo = xxh64AvalancheSmall(.Lo)
		.Hi = xxh64AvalancheSmall(.Hi)

		return 

	case  <= 128:
		.Lo = u64() * prime64_1

		if  > 32 {
			if  > 64 {
				if  > 96 {
					,  := readU64(, ui()-8*8), readU64(, ui()-7*8)
					,  := readU64(, 6*8), readU64(, 7*8)

					.Hi += mulFold64(^key64_112, ^key64_120)
					.Hi ^=  + 
					.Lo += mulFold64(^key64_096, ^key64_104)
					.Lo ^=  + 

				} // 96

				,  := readU64(, ui()-6*8), readU64(, ui()-5*8)
				,  := readU64(, 4*8), readU64(, 5*8)

				.Hi += mulFold64(^key64_080, ^key64_088)
				.Hi ^=  + 
				.Lo += mulFold64(^key64_064, ^key64_072)
				.Lo ^=  + 

			} // 64

			,  := readU64(, ui()-4*8), readU64(, ui()-3*8)
			,  := readU64(, 2*8), readU64(, 3*8)

			.Hi += mulFold64(^key64_048, ^key64_056)
			.Hi ^=  + 
			.Lo += mulFold64(^key64_032, ^key64_040)
			.Lo ^=  + 

		} // 32

		,  := readU64(, ui()-2*8), readU64(, ui()-1*8)
		,  := readU64(, 0*8), readU64(, 1*8)

		.Hi += mulFold64(^key64_016, ^key64_024)
		.Hi ^=  + 
		.Lo += mulFold64(^key64_000, ^key64_008)
		.Lo ^=  + 

		.Hi, .Lo = (.Lo*prime64_1)+(.Hi*prime64_4)+(u64()*prime64_2), .Hi+.Lo

		.Hi = -xxh3Avalanche(.Hi)
		.Lo = xxh3Avalanche(.Lo)

		return 

	case  <= 240:
		.Lo = u64() * prime64_1

		{
			, , ,  := readU64(, 0*8), readU64(, 1*8), readU64(, 2*8), readU64(, 3*8)

			.Hi += mulFold64(^key64_016, ^key64_024)
			.Hi ^=  + 
			.Lo += mulFold64(^key64_000, ^key64_008)
			.Lo ^=  + 
		}

		{
			, , ,  := readU64(, 4*8), readU64(, 5*8), readU64(, 6*8), readU64(, 7*8)

			.Hi += mulFold64(^key64_048, ^key64_056)
			.Hi ^=  + 
			.Lo += mulFold64(^key64_032, ^key64_040)
			.Lo ^=  + 
		}

		{
			, , ,  := readU64(, 8*8), readU64(, 9*8), readU64(, 10*8), readU64(, 11*8)

			.Hi += mulFold64(^key64_080, ^key64_088)
			.Hi ^=  + 
			.Lo += mulFold64(^key64_064, ^key64_072)
			.Lo ^=  + 
		}

		{
			, , ,  := readU64(, 12*8), readU64(, 13*8), readU64(, 14*8), readU64(, 15*8)

			.Hi += mulFold64(^key64_112, ^key64_120)
			.Hi ^=  + 
			.Lo += mulFold64(^key64_096, ^key64_104)
			.Lo ^=  + 
		}

		// avalanche
		.Hi = xxh3Avalanche(.Hi)
		.Lo = xxh3Avalanche(.Lo)

		// trailing groups after 128
		 := ui() &^ 31
		for  := ui(4 * 32);  < ;  += 32 {
			, , ,  := readU64(, +0), readU64(, +8), readU64(, +16), readU64(, +24)
			, , ,  := readU64(key, -125), readU64(key, -117), readU64(key, -109), readU64(key, -101)

			.Hi += mulFold64(^, ^)
			.Hi ^=  + 
			.Lo += mulFold64(^, ^)
			.Lo ^=  + 
		}

		// last 32 bytes
		{
			, , ,  := readU64(, ui()-32), readU64(, ui()-24), readU64(, ui()-16), readU64(, ui()-8)

			.Hi += mulFold64(^key64_119, ^key64_127)
			.Hi ^=  + 
			.Lo += mulFold64(^key64_103, ^key64_111)
			.Lo ^=  + 
		}

		.Hi, .Lo = (.Lo*prime64_1)+(.Hi*prime64_4)+(u64()*prime64_2), .Hi+.Lo

		.Hi = -xxh3Avalanche(.Hi)
		.Lo = xxh3Avalanche(.Lo)

		return 

	default:
		.Lo = u64() * prime64_1
		.Hi = ^(u64() * prime64_2)

		 := [8]u64{
			prime32_3, prime64_1, prime64_2, prime64_3,
			prime64_4, prime32_2, prime64_5, prime32_1,
		}

		if hasAVX512 &&  >= avx512Switch {
			accumAVX512(&, , key, u64())
		} else if hasAVX2 {
			accumAVX2(&, , key, u64())
		} else if hasSSE2 {
			accumSSE(&, , key, u64())
		} else {
			accumScalar(&, , key, u64())
		}

		// merge accs
		.Lo += mulFold64([0]^key64_011, [1]^key64_019)
		.Hi += mulFold64([0]^key64_117, [1]^key64_125)

		.Lo += mulFold64([2]^key64_027, [3]^key64_035)
		.Hi += mulFold64([2]^key64_133, [3]^key64_141)

		.Lo += mulFold64([4]^key64_043, [5]^key64_051)
		.Hi += mulFold64([4]^key64_149, [5]^key64_157)

		.Lo += mulFold64([6]^key64_059, [7]^key64_067)
		.Hi += mulFold64([6]^key64_165, [7]^key64_173)

		.Lo = xxh3Avalanche(.Lo)
		.Hi = xxh3Avalanche(.Hi)

		return 
	}
}