// Code generated by command: go run gen.go -sse -out ../accum_vector_sse_amd64.s -pkg xxh3. DO NOT EDIT.

#include "textflag.h"

DATA prime_sse<>+0(SB)/4, $0x9e3779b1
DATA prime_sse<>+4(SB)/4, $0x9e3779b1
DATA prime_sse<>+8(SB)/4, $0x9e3779b1
DATA prime_sse<>+12(SB)/4, $0x9e3779b1
GLOBL prime_sse<>(SB), RODATA|NOPTR, $16

// func accumSSE(acc *[8]uint64, data *byte, key *byte, len uint64)
// Requires: SSE2
TEXT ·accumSSE(SB), NOSPLIT, $0-32
	MOVQ  acc+0(FP), AX
	MOVQ  data+8(FP), CX
	MOVQ  key+16(FP), DX
	MOVQ  key+16(FP), BX
	MOVQ  len+24(FP), SI
	MOVOU (AX), X1
	MOVOU 16(AX), X2
	MOVOU 32(AX), X3
	MOVOU 48(AX), X4
	MOVOU prime_sse<>+0(SB), X0

accum_large:
	CMPQ    SI, $0x00000400
	JLE     accum
	MOVOU   (CX), X5
	MOVOU   (DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   16(CX), X5
	MOVOU   16(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   32(CX), X5
	MOVOU   32(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   48(CX), X5
	MOVOU   48(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   64(CX), X5
	MOVOU   8(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   80(CX), X5
	MOVOU   24(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   96(CX), X5
	MOVOU   40(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   112(CX), X5
	MOVOU   56(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   128(CX), X5
	MOVOU   16(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   144(CX), X5
	MOVOU   32(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   160(CX), X5
	MOVOU   48(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   176(CX), X5
	MOVOU   64(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   192(CX), X5
	MOVOU   24(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   208(CX), X5
	MOVOU   40(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   224(CX), X5
	MOVOU   56(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   240(CX), X5
	MOVOU   72(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   256(CX), X5
	MOVOU   32(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   272(CX), X5
	MOVOU   48(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   288(CX), X5
	MOVOU   64(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   304(CX), X5
	MOVOU   80(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   320(CX), X5
	MOVOU   40(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   336(CX), X5
	MOVOU   56(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   352(CX), X5
	MOVOU   72(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   368(CX), X5
	MOVOU   88(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   384(CX), X5
	MOVOU   48(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   400(CX), X5
	MOVOU   64(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   416(CX), X5
	MOVOU   80(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   432(CX), X5
	MOVOU   96(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   448(CX), X5
	MOVOU   56(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   464(CX), X5
	MOVOU   72(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   480(CX), X5
	MOVOU   88(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   496(CX), X5
	MOVOU   104(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   512(CX), X5
	MOVOU   64(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   528(CX), X5
	MOVOU   80(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   544(CX), X5
	MOVOU   96(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   560(CX), X5
	MOVOU   112(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   576(CX), X5
	MOVOU   72(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   592(CX), X5
	MOVOU   88(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   608(CX), X5
	MOVOU   104(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   624(CX), X5
	MOVOU   120(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   640(CX), X5
	MOVOU   80(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   656(CX), X5
	MOVOU   96(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   672(CX), X5
	MOVOU   112(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   688(CX), X5
	MOVOU   128(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   704(CX), X5
	MOVOU   88(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   720(CX), X5
	MOVOU   104(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   736(CX), X5
	MOVOU   120(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   752(CX), X5
	MOVOU   136(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   768(CX), X5
	MOVOU   96(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   784(CX), X5
	MOVOU   112(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   800(CX), X5
	MOVOU   128(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   816(CX), X5
	MOVOU   144(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   832(CX), X5
	MOVOU   104(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   848(CX), X5
	MOVOU   120(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   864(CX), X5
	MOVOU   136(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   880(CX), X5
	MOVOU   152(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   896(CX), X5
	MOVOU   112(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   912(CX), X5
	MOVOU   128(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   928(CX), X5
	MOVOU   144(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   944(CX), X5
	MOVOU   160(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   960(CX), X5
	MOVOU   120(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   976(CX), X5
	MOVOU   136(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   992(CX), X5
	MOVOU   152(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   1008(CX), X5
	MOVOU   168(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	ADDQ    $0x00000400, CX
	SUBQ    $0x00000400, SI
	MOVOU   X1, X5
	PSRLQ   $0x2f, X5
	PXOR    X5, X1
	MOVOU   128(DX), X5
	PXOR    X5, X1
	PSHUFD  $0xf5, X1, X5
	PMULULQ X0, X1
	PMULULQ X0, X5
	PSLLQ   $0x20, X5
	PADDQ   X5, X1
	MOVOU   X2, X5
	PSRLQ   $0x2f, X5
	PXOR    X5, X2
	MOVOU   144(DX), X5
	PXOR    X5, X2
	PSHUFD  $0xf5, X2, X5
	PMULULQ X0, X2
	PMULULQ X0, X5
	PSLLQ   $0x20, X5
	PADDQ   X5, X2
	MOVOU   X3, X5
	PSRLQ   $0x2f, X5
	PXOR    X5, X3
	MOVOU   160(DX), X5
	PXOR    X5, X3
	PSHUFD  $0xf5, X3, X5
	PMULULQ X0, X3
	PMULULQ X0, X5
	PSLLQ   $0x20, X5
	PADDQ   X5, X3
	MOVOU   X4, X5
	PSRLQ   $0x2f, X5
	PXOR    X5, X4
	MOVOU   176(DX), X5
	PXOR    X5, X4
	PSHUFD  $0xf5, X4, X5
	PMULULQ X0, X4
	PMULULQ X0, X5
	PSLLQ   $0x20, X5
	PADDQ   X5, X4
	JMP     accum_large

accum:
	CMPQ    SI, $0x40
	JLE     finalize
	MOVOU   (CX), X0
	MOVOU   (BX), X5
	PXOR    X0, X5
	PSHUFD  $0x31, X5, X6
	PMULULQ X5, X6
	PSHUFD  $0x4e, X0, X0
	PADDQ   X0, X1
	PADDQ   X6, X1
	MOVOU   16(CX), X0
	MOVOU   16(BX), X5
	PXOR    X0, X5
	PSHUFD  $0x31, X5, X6
	PMULULQ X5, X6
	PSHUFD  $0x4e, X0, X0
	PADDQ   X0, X2
	PADDQ   X6, X2
	MOVOU   32(CX), X0
	MOVOU   32(BX), X5
	PXOR    X0, X5
	PSHUFD  $0x31, X5, X6
	PMULULQ X5, X6
	PSHUFD  $0x4e, X0, X0
	PADDQ   X0, X3
	PADDQ   X6, X3
	MOVOU   48(CX), X0
	MOVOU   48(BX), X5
	PXOR    X0, X5
	PSHUFD  $0x31, X5, X6
	PMULULQ X5, X6
	PSHUFD  $0x4e, X0, X0
	PADDQ   X0, X4
	PADDQ   X6, X4
	ADDQ    $0x00000040, CX
	SUBQ    $0x00000040, SI
	ADDQ    $0x00000008, BX
	JMP     accum

finalize:
	CMPQ    SI, $0x00
	JE      return
	SUBQ    $0x40, CX
	ADDQ    SI, CX
	MOVOU   (CX), X0
	MOVOU   121(DX), X5
	PXOR    X0, X5
	PSHUFD  $0x31, X5, X6
	PMULULQ X5, X6
	PSHUFD  $0x4e, X0, X0
	PADDQ   X0, X1
	PADDQ   X6, X1
	MOVOU   16(CX), X0
	MOVOU   137(DX), X5
	PXOR    X0, X5
	PSHUFD  $0x31, X5, X6
	PMULULQ X5, X6
	PSHUFD  $0x4e, X0, X0
	PADDQ   X0, X2
	PADDQ   X6, X2
	MOVOU   32(CX), X0
	MOVOU   153(DX), X5
	PXOR    X0, X5
	PSHUFD  $0x31, X5, X6
	PMULULQ X5, X6
	PSHUFD  $0x4e, X0, X0
	PADDQ   X0, X3
	PADDQ   X6, X3
	MOVOU   48(CX), X0
	MOVOU   169(DX), X5
	PXOR    X0, X5
	PSHUFD  $0x31, X5, X6
	PMULULQ X5, X6
	PSHUFD  $0x4e, X0, X0
	PADDQ   X0, X4
	PADDQ   X6, X4

return:
	MOVOU X1, (AX)
	MOVOU X2, 16(AX)
	MOVOU X3, 32(AX)
	MOVOU X4, 48(AX)
	RET

// func accumBlockSSE(acc *[8]uint64, data *byte, key *byte)
// Requires: SSE2
TEXT ·accumBlockSSE(SB), NOSPLIT, $0-24
	MOVQ    acc+0(FP), AX
	MOVQ    data+8(FP), CX
	MOVQ    key+16(FP), DX
	MOVOU   (AX), X1
	MOVOU   16(AX), X2
	MOVOU   32(AX), X3
	MOVOU   48(AX), X4
	MOVOU   prime_sse<>+0(SB), X0
	MOVOU   (CX), X5
	MOVOU   (DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   16(CX), X5
	MOVOU   16(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   32(CX), X5
	MOVOU   32(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   48(CX), X5
	MOVOU   48(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   64(CX), X5
	MOVOU   8(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   80(CX), X5
	MOVOU   24(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   96(CX), X5
	MOVOU   40(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   112(CX), X5
	MOVOU   56(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   128(CX), X5
	MOVOU   16(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   144(CX), X5
	MOVOU   32(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   160(CX), X5
	MOVOU   48(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   176(CX), X5
	MOVOU   64(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   192(CX), X5
	MOVOU   24(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   208(CX), X5
	MOVOU   40(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   224(CX), X5
	MOVOU   56(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   240(CX), X5
	MOVOU   72(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   256(CX), X5
	MOVOU   32(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   272(CX), X5
	MOVOU   48(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   288(CX), X5
	MOVOU   64(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   304(CX), X5
	MOVOU   80(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   320(CX), X5
	MOVOU   40(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   336(CX), X5
	MOVOU   56(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   352(CX), X5
	MOVOU   72(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   368(CX), X5
	MOVOU   88(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   384(CX), X5
	MOVOU   48(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   400(CX), X5
	MOVOU   64(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   416(CX), X5
	MOVOU   80(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   432(CX), X5
	MOVOU   96(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   448(CX), X5
	MOVOU   56(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   464(CX), X5
	MOVOU   72(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   480(CX), X5
	MOVOU   88(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   496(CX), X5
	MOVOU   104(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   512(CX), X5
	MOVOU   64(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   528(CX), X5
	MOVOU   80(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   544(CX), X5
	MOVOU   96(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   560(CX), X5
	MOVOU   112(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   576(CX), X5
	MOVOU   72(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   592(CX), X5
	MOVOU   88(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   608(CX), X5
	MOVOU   104(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   624(CX), X5
	MOVOU   120(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   640(CX), X5
	MOVOU   80(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   656(CX), X5
	MOVOU   96(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   672(CX), X5
	MOVOU   112(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   688(CX), X5
	MOVOU   128(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   704(CX), X5
	MOVOU   88(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   720(CX), X5
	MOVOU   104(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   736(CX), X5
	MOVOU   120(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   752(CX), X5
	MOVOU   136(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   768(CX), X5
	MOVOU   96(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   784(CX), X5
	MOVOU   112(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   800(CX), X5
	MOVOU   128(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   816(CX), X5
	MOVOU   144(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   832(CX), X5
	MOVOU   104(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   848(CX), X5
	MOVOU   120(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   864(CX), X5
	MOVOU   136(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   880(CX), X5
	MOVOU   152(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   896(CX), X5
	MOVOU   112(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   912(CX), X5
	MOVOU   128(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   928(CX), X5
	MOVOU   144(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   944(CX), X5
	MOVOU   160(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   960(CX), X5
	MOVOU   120(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X1
	PADDQ   X7, X1
	MOVOU   976(CX), X5
	MOVOU   136(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X2
	PADDQ   X7, X2
	MOVOU   992(CX), X5
	MOVOU   152(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X3
	PADDQ   X7, X3
	MOVOU   1008(CX), X5
	MOVOU   168(DX), X6
	PXOR    X5, X6
	PSHUFD  $0x31, X6, X7
	PMULULQ X6, X7
	PSHUFD  $0x4e, X5, X5
	PADDQ   X5, X4
	PADDQ   X7, X4
	MOVOU   X1, X5
	PSRLQ   $0x2f, X5
	PXOR    X5, X1
	MOVOU   128(DX), X5
	PXOR    X5, X1
	PSHUFD  $0xf5, X1, X5
	PMULULQ X0, X1
	PMULULQ X0, X5
	PSLLQ   $0x20, X5
	PADDQ   X5, X1
	MOVOU   X2, X5
	PSRLQ   $0x2f, X5
	PXOR    X5, X2
	MOVOU   144(DX), X5
	PXOR    X5, X2
	PSHUFD  $0xf5, X2, X5
	PMULULQ X0, X2
	PMULULQ X0, X5
	PSLLQ   $0x20, X5
	PADDQ   X5, X2
	MOVOU   X3, X5
	PSRLQ   $0x2f, X5
	PXOR    X5, X3
	MOVOU   160(DX), X5
	PXOR    X5, X3
	PSHUFD  $0xf5, X3, X5
	PMULULQ X0, X3
	PMULULQ X0, X5
	PSLLQ   $0x20, X5
	PADDQ   X5, X3
	MOVOU   X4, X5
	PSRLQ   $0x2f, X5
	PXOR    X5, X4
	MOVOU   176(DX), X5
	PXOR    X5, X4
	PSHUFD  $0xf5, X4, X5
	PMULULQ X0, X4
	PMULULQ X0, X5
	PSLLQ   $0x20, X5
	PADDQ   X5, X4
	MOVOU   X1, (AX)
	MOVOU   X2, 16(AX)
	MOVOU   X3, 32(AX)
	MOVOU   X4, 48(AX)
	RET