package xxh3

// avx512Switch is the size at which the avx512 code is used.
// Bigger blocks benefit more.
const avx512Switch = 1 << 10

func accumScalar( *[8]u64, ,  ptr,  u64) {
	if  != key {
		accumScalarSeed(, , , )
		return
	}
	for  > _block {
		 := 

		// accs
		for  := 0;  < 16; ++ {
			 := readU64(, 8*0)
			 :=  ^ readU64(, 8*0)
			[1] += 
			[0] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*1)
			 :=  ^ readU64(, 8*1)
			[0] += 
			[1] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*2)
			 :=  ^ readU64(, 8*2)
			[3] += 
			[2] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*3)
			 :=  ^ readU64(, 8*3)
			[2] += 
			[3] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*4)
			 :=  ^ readU64(, 8*4)
			[5] += 
			[4] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*5)
			 :=  ^ readU64(, 8*5)
			[4] += 
			[5] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*6)
			 :=  ^ readU64(, 8*6)
			[7] += 
			[6] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*7)
			 :=  ^ readU64(, 8*7)
			[6] += 
			[7] += ( & 0xffffffff) * ( >> 32)

			 -= _stripe
			if  > 0 {
				,  = ptr(ui()+_stripe), ptr(ui()+8)
			}
		}

		// scramble accs
		[0] ^= [0] >> 47
		[0] ^= key64_128
		[0] *= prime32_1

		[1] ^= [1] >> 47
		[1] ^= key64_136
		[1] *= prime32_1

		[2] ^= [2] >> 47
		[2] ^= key64_144
		[2] *= prime32_1

		[3] ^= [3] >> 47
		[3] ^= key64_152
		[3] *= prime32_1

		[4] ^= [4] >> 47
		[4] ^= key64_160
		[4] *= prime32_1

		[5] ^= [5] >> 47
		[5] ^= key64_168
		[5] *= prime32_1

		[6] ^= [6] >> 47
		[6] ^= key64_176
		[6] *= prime32_1

		[7] ^= [7] >> 47
		[7] ^= key64_184
		[7] *= prime32_1
	}

	if  > 0 {
		,  := (-1)/_stripe, 

		for  := u64(0);  < ; ++ {
			 := readU64(, 8*0)
			 :=  ^ readU64(, 8*0)
			[1] += 
			[0] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*1)
			 :=  ^ readU64(, 8*1)
			[0] += 
			[1] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*2)
			 :=  ^ readU64(, 8*2)
			[3] += 
			[2] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*3)
			 :=  ^ readU64(, 8*3)
			[2] += 
			[3] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*4)
			 :=  ^ readU64(, 8*4)
			[5] += 
			[4] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*5)
			 :=  ^ readU64(, 8*5)
			[4] += 
			[5] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*6)
			 :=  ^ readU64(, 8*6)
			[7] += 
			[6] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*7)
			 :=  ^ readU64(, 8*7)
			[6] += 
			[7] += ( & 0xffffffff) * ( >> 32)

			 -= _stripe
			if  > 0 {
				,  = ptr(ui()+_stripe), ptr(ui()+8)
			}
		}

		if  > 0 {
			 = ptr(ui() - uintptr(_stripe-))

			 := readU64(, 8*0)
			 :=  ^ key64_121
			[1] += 
			[0] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*1)
			 :=  ^ key64_129
			[0] += 
			[1] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*2)
			 :=  ^ key64_137
			[3] += 
			[2] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*3)
			 :=  ^ key64_145
			[2] += 
			[3] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*4)
			 :=  ^ key64_153
			[5] += 
			[4] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*5)
			 :=  ^ key64_161
			[4] += 
			[5] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*6)
			 :=  ^ key64_169
			[7] += 
			[6] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*7)
			 :=  ^ key64_177
			[6] += 
			[7] += ( & 0xffffffff) * ( >> 32)
		}
	}
}

func accumBlockScalar( *[8]u64, ,  ptr) {
	if  != key {
		accumBlockScalarSeed(, , )
		return
	}
	// accs
	for  := 0;  < 16; ++ {
		 := readU64(, 8*0)
		 :=  ^ readU64(, 8*0)
		[1] += 
		[0] += ( & 0xffffffff) * ( >> 32)

		 := readU64(, 8*1)
		 :=  ^ readU64(, 8*1)
		[0] += 
		[1] += ( & 0xffffffff) * ( >> 32)

		 := readU64(, 8*2)
		 :=  ^ readU64(, 8*2)
		[3] += 
		[2] += ( & 0xffffffff) * ( >> 32)

		 := readU64(, 8*3)
		 :=  ^ readU64(, 8*3)
		[2] += 
		[3] += ( & 0xffffffff) * ( >> 32)

		 := readU64(, 8*4)
		 :=  ^ readU64(, 8*4)
		[5] += 
		[4] += ( & 0xffffffff) * ( >> 32)

		 := readU64(, 8*5)
		 :=  ^ readU64(, 8*5)
		[4] += 
		[5] += ( & 0xffffffff) * ( >> 32)

		 := readU64(, 8*6)
		 :=  ^ readU64(, 8*6)
		[7] += 
		[6] += ( & 0xffffffff) * ( >> 32)

		 := readU64(, 8*7)
		 :=  ^ readU64(, 8*7)
		[6] += 
		[7] += ( & 0xffffffff) * ( >> 32)

		,  = ptr(ui()+_stripe), ptr(ui()+8)
	}

	// scramble accs
	[0] ^= [0] >> 47
	[0] ^= key64_128
	[0] *= prime32_1

	[1] ^= [1] >> 47
	[1] ^= key64_136
	[1] *= prime32_1

	[2] ^= [2] >> 47
	[2] ^= key64_144
	[2] *= prime32_1

	[3] ^= [3] >> 47
	[3] ^= key64_152
	[3] *= prime32_1

	[4] ^= [4] >> 47
	[4] ^= key64_160
	[4] *= prime32_1

	[5] ^= [5] >> 47
	[5] ^= key64_168
	[5] *= prime32_1

	[6] ^= [6] >> 47
	[6] ^= key64_176
	[6] *= prime32_1

	[7] ^= [7] >> 47
	[7] ^= key64_184
	[7] *= prime32_1
}

// accumScalarSeed should be used with custom key.
func accumScalarSeed( *[8]u64, ,  ptr,  u64) {
	for  > _block {
		 := 

		// accs
		for  := 0;  < 16; ++ {
			 := readU64(, 8*0)
			 :=  ^ readU64(, 8*0)
			[1] += 
			[0] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*1)
			 :=  ^ readU64(, 8*1)
			[0] += 
			[1] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*2)
			 :=  ^ readU64(, 8*2)
			[3] += 
			[2] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*3)
			 :=  ^ readU64(, 8*3)
			[2] += 
			[3] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*4)
			 :=  ^ readU64(, 8*4)
			[5] += 
			[4] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*5)
			 :=  ^ readU64(, 8*5)
			[4] += 
			[5] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*6)
			 :=  ^ readU64(, 8*6)
			[7] += 
			[6] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*7)
			 :=  ^ readU64(, 8*7)
			[6] += 
			[7] += ( & 0xffffffff) * ( >> 32)

			 -= _stripe
			if  > 0 {
				,  = ptr(ui()+_stripe), ptr(ui()+8)
			}
		}

		// scramble accs
		[0] ^= [0] >> 47
		[0] ^= readU64(, 128)
		[0] *= prime32_1

		[1] ^= [1] >> 47
		[1] ^= readU64(, 136)
		[1] *= prime32_1

		[2] ^= [2] >> 47
		[2] ^= readU64(, 144)
		[2] *= prime32_1

		[3] ^= [3] >> 47
		[3] ^= readU64(, 152)
		[3] *= prime32_1

		[4] ^= [4] >> 47
		[4] ^= readU64(, 160)
		[4] *= prime32_1

		[5] ^= [5] >> 47
		[5] ^= readU64(, 168)
		[5] *= prime32_1

		[6] ^= [6] >> 47
		[6] ^= readU64(, 176)
		[6] *= prime32_1

		[7] ^= [7] >> 47
		[7] ^= readU64(, 184)
		[7] *= prime32_1
	}

	if  > 0 {
		,  := (-1)/_stripe, 

		for  := u64(0);  < ; ++ {
			 := readU64(, 8*0)
			 :=  ^ readU64(, 8*0)
			[1] += 
			[0] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*1)
			 :=  ^ readU64(, 8*1)
			[0] += 
			[1] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*2)
			 :=  ^ readU64(, 8*2)
			[3] += 
			[2] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*3)
			 :=  ^ readU64(, 8*3)
			[2] += 
			[3] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*4)
			 :=  ^ readU64(, 8*4)
			[5] += 
			[4] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*5)
			 :=  ^ readU64(, 8*5)
			[4] += 
			[5] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*6)
			 :=  ^ readU64(, 8*6)
			[7] += 
			[6] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*7)
			 :=  ^ readU64(, 8*7)
			[6] += 
			[7] += ( & 0xffffffff) * ( >> 32)

			 -= _stripe
			if  > 0 {
				,  = ptr(ui()+_stripe), ptr(ui()+8)
			}
		}

		if  > 0 {
			 = ptr(ui() - uintptr(_stripe-))

			 := readU64(, 8*0)
			 :=  ^ readU64(, 121)
			[1] += 
			[0] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*1)
			 :=  ^ readU64(, 129)
			[0] += 
			[1] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*2)
			 :=  ^ readU64(, 137)
			[3] += 
			[2] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*3)
			 :=  ^ readU64(, 145)
			[2] += 
			[3] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*4)
			 :=  ^ readU64(, 153)
			[5] += 
			[4] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*5)
			 :=  ^ readU64(, 161)
			[4] += 
			[5] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*6)
			 :=  ^ readU64(, 169)
			[7] += 
			[6] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*7)
			 :=  ^ readU64(, 177)
			[6] += 
			[7] += ( & 0xffffffff) * ( >> 32)
		}
	}
}

// accumBlockScalarSeed should be used with custom key.
func accumBlockScalarSeed( *[8]u64, ,  ptr) {
	// accs
	{
		 := 
		for  := 0;  < 16; ++ {
			 := readU64(, 8*0)
			 :=  ^ readU64(, 8*0)
			[1] += 
			[0] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*1)
			 :=  ^ readU64(, 8*1)
			[0] += 
			[1] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*2)
			 :=  ^ readU64(, 8*2)
			[3] += 
			[2] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*3)
			 :=  ^ readU64(, 8*3)
			[2] += 
			[3] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*4)
			 :=  ^ readU64(, 8*4)
			[5] += 
			[4] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*5)
			 :=  ^ readU64(, 8*5)
			[4] += 
			[5] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*6)
			 :=  ^ readU64(, 8*6)
			[7] += 
			[6] += ( & 0xffffffff) * ( >> 32)

			 := readU64(, 8*7)
			 :=  ^ readU64(, 8*7)
			[6] += 
			[7] += ( & 0xffffffff) * ( >> 32)

			,  = ptr(ui()+_stripe), ptr(ui()+8)
		}
	}

	// scramble accs
	[0] ^= [0] >> 47
	[0] ^= readU64(, 128)
	[0] *= prime32_1

	[1] ^= [1] >> 47
	[1] ^= readU64(, 136)
	[1] *= prime32_1

	[2] ^= [2] >> 47
	[2] ^= readU64(, 144)
	[2] *= prime32_1

	[3] ^= [3] >> 47
	[3] ^= readU64(, 152)
	[3] *= prime32_1

	[4] ^= [4] >> 47
	[4] ^= readU64(, 160)
	[4] *= prime32_1

	[5] ^= [5] >> 47
	[5] ^= readU64(, 168)
	[5] *= prime32_1

	[6] ^= [6] >> 47
	[6] ^= readU64(, 176)
	[6] *= prime32_1

	[7] ^= [7] >> 47
	[7] ^= readU64(, 184)
	[7] *= prime32_1
}