//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT

DATA LCDATA1<>+0x000(SB)/8, $0x00ff00ff00ff00ff
DATA LCDATA1<>+0x008(SB)/8, $0x00ff00ff00ff00ff
DATA LCDATA1<>+0x010(SB)/8, $0x00ff00ff00ff00ff
DATA LCDATA1<>+0x018(SB)/8, $0x00ff00ff00ff00ff
GLOBL LCDATA1<>(SB), 8, $32

TEXT ยท_arithmetic_binary_avx2(SB), $0-48

	MOVQ typ+0(FP), DI
	MOVQ op+8(FP), SI
	MOVQ inLeft+16(FP), DX
	MOVQ inRight+24(FP), CX
	MOVQ out+32(FP), R8
	MOVQ len+40(FP), R9
	LEAQ LCDATA1<>(SB), BP

	LONG $0x14fe8040         // cmp    sil, 20
	JG   LBB0_11
	WORD $0x8440; BYTE $0xf6 // test    sil, sil
	JE   LBB0_21
	LONG $0x01fe8040         // cmp    sil, 1
	JE   LBB0_287
	LONG $0x02fe8040         // cmp    sil, 2
	JNE  LBB0_825
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB0_559
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB0_6
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB0_602
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB0_614
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_626
	WORD $0xf631             // xor    esi, esi

LBB0_631:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_633

LBB0_632:
	WORD $0x3c8b; BYTE $0xb1 // mov    edi, dword [rcx + 4*rsi]
	LONG $0xb23caf0f         // imul    edi, dword [rdx + 4*rsi]
	LONG $0xb03c8941         // mov    dword [r8 + 4*rsi], edi
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB0_632

LBB0_633:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_634:
	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
	LONG $0xb204af0f             // imul    eax, dword [rdx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
	LONG $0xb244af0f; BYTE $0x04 // imul    eax, dword [rdx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
	LONG $0xb244af0f; BYTE $0x08 // imul    eax, dword [rdx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
	LONG $0xb244af0f; BYTE $0x0c // imul    eax, dword [rdx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_634
	JMP  LBB0_825

LBB0_11:
	LONG $0x15fe8040         // cmp    sil, 21
	JE   LBB0_154
	LONG $0x16fe8040         // cmp    sil, 22
	JE   LBB0_420
	LONG $0x17fe8040         // cmp    sil, 23
	JNE  LBB0_825
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB0_695
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB0_16
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB0_738
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB0_750
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_762
	WORD $0xf631             // xor    esi, esi

LBB0_767:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_769

LBB0_768:
	WORD $0x3c8b; BYTE $0xb1 // mov    edi, dword [rcx + 4*rsi]
	LONG $0xb23caf0f         // imul    edi, dword [rdx + 4*rsi]
	LONG $0xb03c8941         // mov    dword [r8 + 4*rsi], edi
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB0_768

LBB0_769:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_770:
	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
	LONG $0xb204af0f             // imul    eax, dword [rdx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
	LONG $0xb244af0f; BYTE $0x04 // imul    eax, dword [rdx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
	LONG $0xb244af0f; BYTE $0x08 // imul    eax, dword [rdx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
	LONG $0xb244af0f; BYTE $0x0c // imul    eax, dword [rdx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_770
	JMP  LBB0_825

LBB0_21:
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB0_34
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB0_23
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB0_67
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB0_79
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_91
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_96

LBB0_287:
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB0_300
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB0_289
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB0_333
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB0_345
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_357
	WORD $0xf631             // xor    esi, esi

LBB0_362:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_364

LBB0_363:
	WORD $0x3c8b; BYTE $0xb2 // mov    edi, dword [rdx + 4*rsi]
	WORD $0x3c2b; BYTE $0xb1 // sub    edi, dword [rcx + 4*rsi]
	LONG $0xb03c8941         // mov    dword [r8 + 4*rsi], edi
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB0_363

LBB0_364:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_365:
	WORD $0x048b; BYTE $0xb2     // mov    eax, dword [rdx + 4*rsi]
	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	LONG $0x04b2448b             // mov    eax, dword [rdx + 4*rsi + 4]
	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	LONG $0x08b2448b             // mov    eax, dword [rdx + 4*rsi + 8]
	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	LONG $0x0cb2448b             // mov    eax, dword [rdx + 4*rsi + 12]
	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_365
	JMP  LBB0_825

LBB0_154:
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB0_167
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB0_156
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB0_200
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB0_212
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_224
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_229

LBB0_420:
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB0_433
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB0_422
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB0_466
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB0_478
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_490
	WORD $0xf631             // xor    esi, esi

LBB0_495:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_497

LBB0_496:
	WORD $0x3c8b; BYTE $0xb2 // mov    edi, dword [rdx + 4*rsi]
	WORD $0x3c2b; BYTE $0xb1 // sub    edi, dword [rcx + 4*rsi]
	LONG $0xb03c8941         // mov    dword [r8 + 4*rsi], edi
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB0_496

LBB0_497:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_498:
	WORD $0x048b; BYTE $0xb2     // mov    eax, dword [rdx + 4*rsi]
	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	LONG $0x04b2448b             // mov    eax, dword [rdx + 4*rsi + 4]
	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	LONG $0x08b2448b             // mov    eax, dword [rdx + 4*rsi + 8]
	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	LONG $0x0cb2448b             // mov    eax, dword [rdx + 4*rsi + 12]
	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_498
	JMP  LBB0_825

LBB0_559:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB0_560
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB0_656
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB0_668
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_680
	WORD $0xf631             // xor    esi, esi

LBB0_685:
	WORD $0x8948; BYTE $0xf7 // mov    rdi, rsi
	WORD $0xf748; BYTE $0xd7 // not    rdi
	WORD $0x014c; BYTE $0xd7 // add    rdi, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_687

LBB0_686:
	LONG $0x0410fbc5; BYTE $0xf1   // vmovsd    xmm0, qword [rcx + 8*rsi]
	LONG $0x0459fbc5; BYTE $0xf2   // vmulsd    xmm0, xmm0, qword [rdx + 8*rsi]
	LONG $0x117bc1c4; WORD $0xf004 // vmovsd    qword [r8 + 8*rsi], xmm0
	LONG $0x01c68348               // add    rsi, 1
	LONG $0xffc08348               // add    rax, -1
	JNE  LBB0_686

LBB0_687:
	LONG $0x03ff8348 // cmp    rdi, 3
	JB   LBB0_825

LBB0_688:
	LONG $0x0410fbc5; BYTE $0xf1               // vmovsd    xmm0, qword [rcx + 8*rsi]
	LONG $0x0459fbc5; BYTE $0xf2               // vmulsd    xmm0, xmm0, qword [rdx + 8*rsi]
	LONG $0x117bc1c4; WORD $0xf004             // vmovsd    qword [r8 + 8*rsi], xmm0
	LONG $0x4410fbc5; WORD $0x08f1             // vmovsd    xmm0, qword [rcx + 8*rsi + 8]
	LONG $0x4459fbc5; WORD $0x08f2             // vmulsd    xmm0, xmm0, qword [rdx + 8*rsi + 8]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x08 // vmovsd    qword [r8 + 8*rsi + 8], xmm0
	LONG $0x4410fbc5; WORD $0x10f1             // vmovsd    xmm0, qword [rcx + 8*rsi + 16]
	LONG $0x4459fbc5; WORD $0x10f2             // vmulsd    xmm0, xmm0, qword [rdx + 8*rsi + 16]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x10 // vmovsd    qword [r8 + 8*rsi + 16], xmm0
	LONG $0x4410fbc5; WORD $0x18f1             // vmovsd    xmm0, qword [rcx + 8*rsi + 24]
	LONG $0x4459fbc5; WORD $0x18f2             // vmulsd    xmm0, xmm0, qword [rdx + 8*rsi + 24]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x18 // vmovsd    qword [r8 + 8*rsi + 24], xmm0
	LONG $0x04c68348                           // add    rsi, 4
	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
	JNE  LBB0_688
	JMP  LBB0_825

LBB0_695:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB0_696
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB0_792
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB0_804
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_816
	WORD $0xf631             // xor    esi, esi

LBB0_821:
	WORD $0x8948; BYTE $0xf7 // mov    rdi, rsi
	WORD $0xf748; BYTE $0xd7 // not    rdi
	WORD $0x014c; BYTE $0xd7 // add    rdi, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_823

LBB0_822:
	LONG $0x0410fbc5; BYTE $0xf1   // vmovsd    xmm0, qword [rcx + 8*rsi]
	LONG $0x0459fbc5; BYTE $0xf2   // vmulsd    xmm0, xmm0, qword [rdx + 8*rsi]
	LONG $0x117bc1c4; WORD $0xf004 // vmovsd    qword [r8 + 8*rsi], xmm0
	LONG $0x01c68348               // add    rsi, 1
	LONG $0xffc08348               // add    rax, -1
	JNE  LBB0_822

LBB0_823:
	LONG $0x03ff8348 // cmp    rdi, 3
	JB   LBB0_825

LBB0_824:
	LONG $0x0410fbc5; BYTE $0xf1               // vmovsd    xmm0, qword [rcx + 8*rsi]
	LONG $0x0459fbc5; BYTE $0xf2               // vmulsd    xmm0, xmm0, qword [rdx + 8*rsi]
	LONG $0x117bc1c4; WORD $0xf004             // vmovsd    qword [r8 + 8*rsi], xmm0
	LONG $0x4410fbc5; WORD $0x08f1             // vmovsd    xmm0, qword [rcx + 8*rsi + 8]
	LONG $0x4459fbc5; WORD $0x08f2             // vmulsd    xmm0, xmm0, qword [rdx + 8*rsi + 8]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x08 // vmovsd    qword [r8 + 8*rsi + 8], xmm0
	LONG $0x4410fbc5; WORD $0x10f1             // vmovsd    xmm0, qword [rcx + 8*rsi + 16]
	LONG $0x4459fbc5; WORD $0x10f2             // vmulsd    xmm0, xmm0, qword [rdx + 8*rsi + 16]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x10 // vmovsd    qword [r8 + 8*rsi + 16], xmm0
	LONG $0x4410fbc5; WORD $0x18f1             // vmovsd    xmm0, qword [rcx + 8*rsi + 24]
	LONG $0x4459fbc5; WORD $0x18f2             // vmulsd    xmm0, xmm0, qword [rdx + 8*rsi + 24]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x18 // vmovsd    qword [r8 + 8*rsi + 24], xmm0
	LONG $0x04c68348                           // add    rsi, 4
	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
	JNE  LBB0_824
	JMP  LBB0_825

LBB0_34:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB0_35
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB0_121
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB0_133
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_145
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_150

LBB0_300:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB0_301
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB0_387
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB0_399
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_411
	WORD $0xf631             // xor    esi, esi

LBB0_416:
	WORD $0x8948; BYTE $0xf7 // mov    rdi, rsi
	WORD $0xf748; BYTE $0xd7 // not    rdi
	WORD $0x014c; BYTE $0xd7 // add    rdi, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_418

LBB0_417:
	LONG $0x0410fbc5; BYTE $0xf2   // vmovsd    xmm0, qword [rdx + 8*rsi]
	LONG $0x045cfbc5; BYTE $0xf1   // vsubsd    xmm0, xmm0, qword [rcx + 8*rsi]
	LONG $0x117bc1c4; WORD $0xf004 // vmovsd    qword [r8 + 8*rsi], xmm0
	LONG $0x01c68348               // add    rsi, 1
	LONG $0xffc08348               // add    rax, -1
	JNE  LBB0_417

LBB0_418:
	LONG $0x03ff8348 // cmp    rdi, 3
	JB   LBB0_825

LBB0_419:
	LONG $0x0410fbc5; BYTE $0xf2               // vmovsd    xmm0, qword [rdx + 8*rsi]
	LONG $0x045cfbc5; BYTE $0xf1               // vsubsd    xmm0, xmm0, qword [rcx + 8*rsi]
	LONG $0x117bc1c4; WORD $0xf004             // vmovsd    qword [r8 + 8*rsi], xmm0
	LONG $0x4410fbc5; WORD $0x08f2             // vmovsd    xmm0, qword [rdx + 8*rsi + 8]
	LONG $0x445cfbc5; WORD $0x08f1             // vsubsd    xmm0, xmm0, qword [rcx + 8*rsi + 8]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x08 // vmovsd    qword [r8 + 8*rsi + 8], xmm0
	LONG $0x4410fbc5; WORD $0x10f2             // vmovsd    xmm0, qword [rdx + 8*rsi + 16]
	LONG $0x445cfbc5; WORD $0x10f1             // vsubsd    xmm0, xmm0, qword [rcx + 8*rsi + 16]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x10 // vmovsd    qword [r8 + 8*rsi + 16], xmm0
	LONG $0x4410fbc5; WORD $0x18f2             // vmovsd    xmm0, qword [rdx + 8*rsi + 24]
	LONG $0x445cfbc5; WORD $0x18f1             // vsubsd    xmm0, xmm0, qword [rcx + 8*rsi + 24]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x18 // vmovsd    qword [r8 + 8*rsi + 24], xmm0
	LONG $0x04c68348                           // add    rsi, 4
	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
	JNE  LBB0_419
	JMP  LBB0_825

LBB0_167:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB0_168
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB0_254
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB0_266
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_278
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_283

LBB0_433:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB0_434
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB0_520
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB0_532
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_544
	WORD $0xf631             // xor    esi, esi

LBB0_549:
	WORD $0x8948; BYTE $0xf7 // mov    rdi, rsi
	WORD $0xf748; BYTE $0xd7 // not    rdi
	WORD $0x014c; BYTE $0xd7 // add    rdi, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_551

LBB0_550:
	LONG $0x0410fbc5; BYTE $0xf2   // vmovsd    xmm0, qword [rdx + 8*rsi]
	LONG $0x045cfbc5; BYTE $0xf1   // vsubsd    xmm0, xmm0, qword [rcx + 8*rsi]
	LONG $0x117bc1c4; WORD $0xf004 // vmovsd    qword [r8 + 8*rsi], xmm0
	LONG $0x01c68348               // add    rsi, 1
	LONG $0xffc08348               // add    rax, -1
	JNE  LBB0_550

LBB0_551:
	LONG $0x03ff8348 // cmp    rdi, 3
	JB   LBB0_825

LBB0_552:
	LONG $0x0410fbc5; BYTE $0xf2               // vmovsd    xmm0, qword [rdx + 8*rsi]
	LONG $0x045cfbc5; BYTE $0xf1               // vsubsd    xmm0, xmm0, qword [rcx + 8*rsi]
	LONG $0x117bc1c4; WORD $0xf004             // vmovsd    qword [r8 + 8*rsi], xmm0
	LONG $0x4410fbc5; WORD $0x08f2             // vmovsd    xmm0, qword [rdx + 8*rsi + 8]
	LONG $0x445cfbc5; WORD $0x08f1             // vsubsd    xmm0, xmm0, qword [rcx + 8*rsi + 8]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x08 // vmovsd    qword [r8 + 8*rsi + 8], xmm0
	LONG $0x4410fbc5; WORD $0x10f2             // vmovsd    xmm0, qword [rdx + 8*rsi + 16]
	LONG $0x445cfbc5; WORD $0x10f1             // vsubsd    xmm0, xmm0, qword [rcx + 8*rsi + 16]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x10 // vmovsd    qword [r8 + 8*rsi + 16], xmm0
	LONG $0x4410fbc5; WORD $0x18f2             // vmovsd    xmm0, qword [rdx + 8*rsi + 24]
	LONG $0x445cfbc5; WORD $0x18f1             // vsubsd    xmm0, xmm0, qword [rcx + 8*rsi + 24]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x18 // vmovsd    qword [r8 + 8*rsi + 24], xmm0
	LONG $0x04c68348                           // add    rsi, 4
	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
	JNE  LBB0_552
	JMP  LBB0_825

LBB0_6:
	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
	JE   LBB0_571
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_588
	WORD $0xff31             // xor    edi, edi
	JMP  LBB0_598

LBB0_16:
	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
	JE   LBB0_707
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_724
	WORD $0xff31             // xor    edi, edi
	JMP  LBB0_734

LBB0_23:
	WORD $0xff83; BYTE $0x02                   // cmp    edi, 2
	JE   LBB0_46
	WORD $0xff83; BYTE $0x03                   // cmp    edi, 3
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JAE  LBB0_58
	WORD $0xf631                               // xor    esi, esi
	JMP  LBB0_63

LBB0_289:
	WORD $0xff83; BYTE $0x02                   // cmp    edi, 2
	JE   LBB0_312
	WORD $0xff83; BYTE $0x03                   // cmp    edi, 3
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JAE  LBB0_324
	WORD $0xf631                               // xor    esi, esi
	JMP  LBB0_329

LBB0_156:
	WORD $0xff83; BYTE $0x02                   // cmp    edi, 2
	JE   LBB0_179
	WORD $0xff83; BYTE $0x03                   // cmp    edi, 3
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JAE  LBB0_191
	WORD $0xf631                               // xor    esi, esi
	JMP  LBB0_196

LBB0_422:
	WORD $0xff83; BYTE $0x02                   // cmp    edi, 2
	JE   LBB0_445
	WORD $0xff83; BYTE $0x03                   // cmp    edi, 3
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JAE  LBB0_457
	WORD $0xf631                               // xor    esi, esi
	JMP  LBB0_462

LBB0_560:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB0_635
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_647
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_652

LBB0_696:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB0_771
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_783
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_788

LBB0_35:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB0_100
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_112
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_117

LBB0_301:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB0_366
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_378
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_383

LBB0_168:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB0_233
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_245
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_250

LBB0_434:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB0_499
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB0_825
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_511
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_516

LBB0_602:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JAE  LBB0_605
	WORD $0xf631             // xor    esi, esi

LBB0_610:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_612

LBB0_611:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	LONG $0x3caf0f66; BYTE $0x72 // imul    di, word [rdx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_611

LBB0_612:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_613:
	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
	LONG $0x04af0f66; BYTE $0x72   // imul    ax, word [rdx + 2*rsi]
	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
	LONG $0x44af0f66; WORD $0x0272 // imul    ax, word [rdx + 2*rsi + 2]
	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
	LONG $0x44af0f66; WORD $0x0472 // imul    ax, word [rdx + 2*rsi + 4]
	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
	LONG $0x44af0f66; WORD $0x0672 // imul    ax, word [rdx + 2*rsi + 6]
	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_613
	JMP  LBB0_825

LBB0_614:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JAE  LBB0_617
	WORD $0xf631             // xor    esi, esi

LBB0_622:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_624

LBB0_623:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	LONG $0x3caf0f66; BYTE $0x72 // imul    di, word [rdx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_623

LBB0_624:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_625:
	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
	LONG $0x04af0f66; BYTE $0x72   // imul    ax, word [rdx + 2*rsi]
	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
	LONG $0x44af0f66; WORD $0x0272 // imul    ax, word [rdx + 2*rsi + 2]
	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
	LONG $0x44af0f66; WORD $0x0472 // imul    ax, word [rdx + 2*rsi + 4]
	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
	LONG $0x44af0f66; WORD $0x0672 // imul    ax, word [rdx + 2*rsi + 6]
	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_625
	JMP  LBB0_825

LBB0_738:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JAE  LBB0_741
	WORD $0xf631             // xor    esi, esi

LBB0_746:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_748

LBB0_747:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	LONG $0x3caf0f66; BYTE $0x72 // imul    di, word [rdx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_747

LBB0_748:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_749:
	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
	LONG $0x04af0f66; BYTE $0x72   // imul    ax, word [rdx + 2*rsi]
	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
	LONG $0x44af0f66; WORD $0x0272 // imul    ax, word [rdx + 2*rsi + 2]
	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
	LONG $0x44af0f66; WORD $0x0472 // imul    ax, word [rdx + 2*rsi + 4]
	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
	LONG $0x44af0f66; WORD $0x0672 // imul    ax, word [rdx + 2*rsi + 6]
	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_749
	JMP  LBB0_825

LBB0_750:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JAE  LBB0_753
	WORD $0xf631             // xor    esi, esi

LBB0_758:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_760

LBB0_759:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	LONG $0x3caf0f66; BYTE $0x72 // imul    di, word [rdx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_759

LBB0_760:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_761:
	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
	LONG $0x04af0f66; BYTE $0x72   // imul    ax, word [rdx + 2*rsi]
	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
	LONG $0x44af0f66; WORD $0x0272 // imul    ax, word [rdx + 2*rsi + 2]
	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
	LONG $0x44af0f66; WORD $0x0472 // imul    ax, word [rdx + 2*rsi + 4]
	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
	LONG $0x44af0f66; WORD $0x0672 // imul    ax, word [rdx + 2*rsi + 6]
	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_761
	JMP  LBB0_825

LBB0_67:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JAE  LBB0_70
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_75

LBB0_79:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JAE  LBB0_82
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_87

LBB0_333:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JAE  LBB0_336
	WORD $0xf631             // xor    esi, esi

LBB0_341:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_343

LBB0_342:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	LONG $0x713c2b66             // sub    di, word [rcx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_342

LBB0_343:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_344:
	LONG $0x7204b70f               // movzx    eax, word [rdx + 2*rsi]
	LONG $0x71042b66               // sub    ax, word [rcx + 2*rsi]
	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
	LONG $0x7244b70f; BYTE $0x02   // movzx    eax, word [rdx + 2*rsi + 2]
	LONG $0x71442b66; BYTE $0x02   // sub    ax, word [rcx + 2*rsi + 2]
	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
	LONG $0x7244b70f; BYTE $0x04   // movzx    eax, word [rdx + 2*rsi + 4]
	LONG $0x71442b66; BYTE $0x04   // sub    ax, word [rcx + 2*rsi + 4]
	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
	LONG $0x7244b70f; BYTE $0x06   // movzx    eax, word [rdx + 2*rsi + 6]
	LONG $0x71442b66; BYTE $0x06   // sub    ax, word [rcx + 2*rsi + 6]
	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_344
	JMP  LBB0_825

LBB0_345:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JAE  LBB0_348
	WORD $0xf631             // xor    esi, esi

LBB0_353:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_355

LBB0_354:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	LONG $0x713c2b66             // sub    di, word [rcx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_354

LBB0_355:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_356:
	LONG $0x7204b70f               // movzx    eax, word [rdx + 2*rsi]
	LONG $0x71042b66               // sub    ax, word [rcx + 2*rsi]
	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
	LONG $0x7244b70f; BYTE $0x02   // movzx    eax, word [rdx + 2*rsi + 2]
	LONG $0x71442b66; BYTE $0x02   // sub    ax, word [rcx + 2*rsi + 2]
	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
	LONG $0x7244b70f; BYTE $0x04   // movzx    eax, word [rdx + 2*rsi + 4]
	LONG $0x71442b66; BYTE $0x04   // sub    ax, word [rcx + 2*rsi + 4]
	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
	LONG $0x7244b70f; BYTE $0x06   // movzx    eax, word [rdx + 2*rsi + 6]
	LONG $0x71442b66; BYTE $0x06   // sub    ax, word [rcx + 2*rsi + 6]
	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_356
	JMP  LBB0_825

LBB0_200:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JAE  LBB0_203
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_208

LBB0_212:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JAE  LBB0_215
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_220

LBB0_466:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JAE  LBB0_469
	WORD $0xf631             // xor    esi, esi

LBB0_474:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_476

LBB0_475:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	LONG $0x713c2b66             // sub    di, word [rcx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_475

LBB0_476:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_477:
	LONG $0x7204b70f               // movzx    eax, word [rdx + 2*rsi]
	LONG $0x71042b66               // sub    ax, word [rcx + 2*rsi]
	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
	LONG $0x7244b70f; BYTE $0x02   // movzx    eax, word [rdx + 2*rsi + 2]
	LONG $0x71442b66; BYTE $0x02   // sub    ax, word [rcx + 2*rsi + 2]
	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
	LONG $0x7244b70f; BYTE $0x04   // movzx    eax, word [rdx + 2*rsi + 4]
	LONG $0x71442b66; BYTE $0x04   // sub    ax, word [rcx + 2*rsi + 4]
	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
	LONG $0x7244b70f; BYTE $0x06   // movzx    eax, word [rdx + 2*rsi + 6]
	LONG $0x71442b66; BYTE $0x06   // sub    ax, word [rcx + 2*rsi + 6]
	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_477
	JMP  LBB0_825

LBB0_478:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JAE  LBB0_481
	WORD $0xf631             // xor    esi, esi

LBB0_486:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_488

LBB0_487:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	LONG $0x713c2b66             // sub    di, word [rcx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_487

LBB0_488:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_489:
	LONG $0x7204b70f               // movzx    eax, word [rdx + 2*rsi]
	LONG $0x71042b66               // sub    ax, word [rcx + 2*rsi]
	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
	LONG $0x7244b70f; BYTE $0x02   // movzx    eax, word [rdx + 2*rsi + 2]
	LONG $0x71442b66; BYTE $0x02   // sub    ax, word [rcx + 2*rsi + 2]
	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
	LONG $0x7244b70f; BYTE $0x04   // movzx    eax, word [rdx + 2*rsi + 4]
	LONG $0x71442b66; BYTE $0x04   // sub    ax, word [rcx + 2*rsi + 4]
	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
	LONG $0x7244b70f; BYTE $0x06   // movzx    eax, word [rdx + 2*rsi + 6]
	LONG $0x71442b66; BYTE $0x06   // sub    ax, word [rcx + 2*rsi + 6]
	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_489
	JMP  LBB0_825

LBB0_656:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_659
	WORD $0xf631             // xor    esi, esi

LBB0_664:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_666

LBB0_665:
	LONG $0xf13c8b48             // mov    rdi, qword [rcx + 8*rsi]
	LONG $0x3caf0f48; BYTE $0xf2 // imul    rdi, qword [rdx + 8*rsi]
	LONG $0xf03c8949             // mov    qword [r8 + 8*rsi], rdi
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_665

LBB0_666:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_667:
	LONG $0xf1048b48               // mov    rax, qword [rcx + 8*rsi]
	LONG $0x04af0f48; BYTE $0xf2   // imul    rax, qword [rdx + 8*rsi]
	LONG $0xf0048949               // mov    qword [r8 + 8*rsi], rax
	LONG $0xf1448b48; BYTE $0x08   // mov    rax, qword [rcx + 8*rsi + 8]
	LONG $0x44af0f48; WORD $0x08f2 // imul    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08   // mov    qword [r8 + 8*rsi + 8], rax
	LONG $0xf1448b48; BYTE $0x10   // mov    rax, qword [rcx + 8*rsi + 16]
	LONG $0x44af0f48; WORD $0x10f2 // imul    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10   // mov    qword [r8 + 8*rsi + 16], rax
	LONG $0xf1448b48; BYTE $0x18   // mov    rax, qword [rcx + 8*rsi + 24]
	LONG $0x44af0f48; WORD $0x18f2 // imul    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18   // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_667
	JMP  LBB0_825

LBB0_668:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_671
	WORD $0xf631             // xor    esi, esi

LBB0_676:
	WORD $0x8948; BYTE $0xf7 // mov    rdi, rsi
	WORD $0xf748; BYTE $0xd7 // not    rdi
	WORD $0x014c; BYTE $0xd7 // add    rdi, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_678

LBB0_677:
	LONG $0x0410fac5; BYTE $0xb1   // vmovss    xmm0, dword [rcx + 4*rsi]
	LONG $0x0459fac5; BYTE $0xb2   // vmulss    xmm0, xmm0, dword [rdx + 4*rsi]
	LONG $0x117ac1c4; WORD $0xb004 // vmovss    dword [r8 + 4*rsi], xmm0
	LONG $0x01c68348               // add    rsi, 1
	LONG $0xffc08348               // add    rax, -1
	JNE  LBB0_677

LBB0_678:
	LONG $0x03ff8348 // cmp    rdi, 3
	JB   LBB0_825

LBB0_679:
	LONG $0x0410fac5; BYTE $0xb1               // vmovss    xmm0, dword [rcx + 4*rsi]
	LONG $0x0459fac5; BYTE $0xb2               // vmulss    xmm0, xmm0, dword [rdx + 4*rsi]
	LONG $0x117ac1c4; WORD $0xb004             // vmovss    dword [r8 + 4*rsi], xmm0
	LONG $0x4410fac5; WORD $0x04b1             // vmovss    xmm0, dword [rcx + 4*rsi + 4]
	LONG $0x4459fac5; WORD $0x04b2             // vmulss    xmm0, xmm0, dword [rdx + 4*rsi + 4]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x04 // vmovss    dword [r8 + 4*rsi + 4], xmm0
	LONG $0x4410fac5; WORD $0x08b1             // vmovss    xmm0, dword [rcx + 4*rsi + 8]
	LONG $0x4459fac5; WORD $0x08b2             // vmulss    xmm0, xmm0, dword [rdx + 4*rsi + 8]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x08 // vmovss    dword [r8 + 4*rsi + 8], xmm0
	LONG $0x4410fac5; WORD $0x0cb1             // vmovss    xmm0, dword [rcx + 4*rsi + 12]
	LONG $0x4459fac5; WORD $0x0cb2             // vmulss    xmm0, xmm0, dword [rdx + 4*rsi + 12]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x0c // vmovss    dword [r8 + 4*rsi + 12], xmm0
	LONG $0x04c68348                           // add    rsi, 4
	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
	JNE  LBB0_679
	JMP  LBB0_825

LBB0_792:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_795
	WORD $0xf631             // xor    esi, esi

LBB0_800:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_802

LBB0_801:
	LONG $0xf13c8b48             // mov    rdi, qword [rcx + 8*rsi]
	LONG $0x3caf0f48; BYTE $0xf2 // imul    rdi, qword [rdx + 8*rsi]
	LONG $0xf03c8949             // mov    qword [r8 + 8*rsi], rdi
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_801

LBB0_802:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_803:
	LONG $0xf1048b48               // mov    rax, qword [rcx + 8*rsi]
	LONG $0x04af0f48; BYTE $0xf2   // imul    rax, qword [rdx + 8*rsi]
	LONG $0xf0048949               // mov    qword [r8 + 8*rsi], rax
	LONG $0xf1448b48; BYTE $0x08   // mov    rax, qword [rcx + 8*rsi + 8]
	LONG $0x44af0f48; WORD $0x08f2 // imul    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08   // mov    qword [r8 + 8*rsi + 8], rax
	LONG $0xf1448b48; BYTE $0x10   // mov    rax, qword [rcx + 8*rsi + 16]
	LONG $0x44af0f48; WORD $0x10f2 // imul    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10   // mov    qword [r8 + 8*rsi + 16], rax
	LONG $0xf1448b48; BYTE $0x18   // mov    rax, qword [rcx + 8*rsi + 24]
	LONG $0x44af0f48; WORD $0x18f2 // imul    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18   // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_803
	JMP  LBB0_825

LBB0_804:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_807
	WORD $0xf631             // xor    esi, esi

LBB0_812:
	WORD $0x8948; BYTE $0xf7 // mov    rdi, rsi
	WORD $0xf748; BYTE $0xd7 // not    rdi
	WORD $0x014c; BYTE $0xd7 // add    rdi, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_814

LBB0_813:
	LONG $0x0410fac5; BYTE $0xb1   // vmovss    xmm0, dword [rcx + 4*rsi]
	LONG $0x0459fac5; BYTE $0xb2   // vmulss    xmm0, xmm0, dword [rdx + 4*rsi]
	LONG $0x117ac1c4; WORD $0xb004 // vmovss    dword [r8 + 4*rsi], xmm0
	LONG $0x01c68348               // add    rsi, 1
	LONG $0xffc08348               // add    rax, -1
	JNE  LBB0_813

LBB0_814:
	LONG $0x03ff8348 // cmp    rdi, 3
	JB   LBB0_825

LBB0_815:
	LONG $0x0410fac5; BYTE $0xb1               // vmovss    xmm0, dword [rcx + 4*rsi]
	LONG $0x0459fac5; BYTE $0xb2               // vmulss    xmm0, xmm0, dword [rdx + 4*rsi]
	LONG $0x117ac1c4; WORD $0xb004             // vmovss    dword [r8 + 4*rsi], xmm0
	LONG $0x4410fac5; WORD $0x04b1             // vmovss    xmm0, dword [rcx + 4*rsi + 4]
	LONG $0x4459fac5; WORD $0x04b2             // vmulss    xmm0, xmm0, dword [rdx + 4*rsi + 4]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x04 // vmovss    dword [r8 + 4*rsi + 4], xmm0
	LONG $0x4410fac5; WORD $0x08b1             // vmovss    xmm0, dword [rcx + 4*rsi + 8]
	LONG $0x4459fac5; WORD $0x08b2             // vmulss    xmm0, xmm0, dword [rdx + 4*rsi + 8]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x08 // vmovss    dword [r8 + 4*rsi + 8], xmm0
	LONG $0x4410fac5; WORD $0x0cb1             // vmovss    xmm0, dword [rcx + 4*rsi + 12]
	LONG $0x4459fac5; WORD $0x0cb2             // vmulss    xmm0, xmm0, dword [rdx + 4*rsi + 12]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x0c // vmovss    dword [r8 + 4*rsi + 12], xmm0
	LONG $0x04c68348                           // add    rsi, 4
	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
	JNE  LBB0_815
	JMP  LBB0_825

LBB0_121:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_124
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_129

LBB0_133:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_136
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_141

LBB0_387:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_390
	WORD $0xf631             // xor    esi, esi

LBB0_395:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_397

LBB0_396:
	LONG $0xf23c8b48 // mov    rdi, qword [rdx + 8*rsi]
	LONG $0xf13c2b48 // sub    rdi, qword [rcx + 8*rsi]
	LONG $0xf03c8949 // mov    qword [r8 + 8*rsi], rdi
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc08348 // add    rax, -1
	JNE  LBB0_396

LBB0_397:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_398:
	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_398
	JMP  LBB0_825

LBB0_399:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_402
	WORD $0xf631             // xor    esi, esi

LBB0_407:
	WORD $0x8948; BYTE $0xf7 // mov    rdi, rsi
	WORD $0xf748; BYTE $0xd7 // not    rdi
	WORD $0x014c; BYTE $0xd7 // add    rdi, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_409

LBB0_408:
	LONG $0x0410fac5; BYTE $0xb2   // vmovss    xmm0, dword [rdx + 4*rsi]
	LONG $0x045cfac5; BYTE $0xb1   // vsubss    xmm0, xmm0, dword [rcx + 4*rsi]
	LONG $0x117ac1c4; WORD $0xb004 // vmovss    dword [r8 + 4*rsi], xmm0
	LONG $0x01c68348               // add    rsi, 1
	LONG $0xffc08348               // add    rax, -1
	JNE  LBB0_408

LBB0_409:
	LONG $0x03ff8348 // cmp    rdi, 3
	JB   LBB0_825

LBB0_410:
	LONG $0x0410fac5; BYTE $0xb2               // vmovss    xmm0, dword [rdx + 4*rsi]
	LONG $0x045cfac5; BYTE $0xb1               // vsubss    xmm0, xmm0, dword [rcx + 4*rsi]
	LONG $0x117ac1c4; WORD $0xb004             // vmovss    dword [r8 + 4*rsi], xmm0
	LONG $0x4410fac5; WORD $0x04b2             // vmovss    xmm0, dword [rdx + 4*rsi + 4]
	LONG $0x445cfac5; WORD $0x04b1             // vsubss    xmm0, xmm0, dword [rcx + 4*rsi + 4]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x04 // vmovss    dword [r8 + 4*rsi + 4], xmm0
	LONG $0x4410fac5; WORD $0x08b2             // vmovss    xmm0, dword [rdx + 4*rsi + 8]
	LONG $0x445cfac5; WORD $0x08b1             // vsubss    xmm0, xmm0, dword [rcx + 4*rsi + 8]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x08 // vmovss    dword [r8 + 4*rsi + 8], xmm0
	LONG $0x4410fac5; WORD $0x0cb2             // vmovss    xmm0, dword [rdx + 4*rsi + 12]
	LONG $0x445cfac5; WORD $0x0cb1             // vsubss    xmm0, xmm0, dword [rcx + 4*rsi + 12]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x0c // vmovss    dword [r8 + 4*rsi + 12], xmm0
	LONG $0x04c68348                           // add    rsi, 4
	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
	JNE  LBB0_410
	JMP  LBB0_825

LBB0_254:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_257
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_262

LBB0_266:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_269
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_274

LBB0_520:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB0_523
	WORD $0xf631             // xor    esi, esi

LBB0_528:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_530

LBB0_529:
	LONG $0xf23c8b48 // mov    rdi, qword [rdx + 8*rsi]
	LONG $0xf13c2b48 // sub    rdi, qword [rcx + 8*rsi]
	LONG $0xf03c8949 // mov    qword [r8 + 8*rsi], rdi
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc08348 // add    rax, -1
	JNE  LBB0_529

LBB0_530:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_531:
	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_531
	JMP  LBB0_825

LBB0_532:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_535
	WORD $0xf631             // xor    esi, esi

LBB0_540:
	WORD $0x8948; BYTE $0xf7 // mov    rdi, rsi
	WORD $0xf748; BYTE $0xd7 // not    rdi
	WORD $0x014c; BYTE $0xd7 // add    rdi, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_542

LBB0_541:
	LONG $0x0410fac5; BYTE $0xb2   // vmovss    xmm0, dword [rdx + 4*rsi]
	LONG $0x045cfac5; BYTE $0xb1   // vsubss    xmm0, xmm0, dword [rcx + 4*rsi]
	LONG $0x117ac1c4; WORD $0xb004 // vmovss    dword [r8 + 4*rsi], xmm0
	LONG $0x01c68348               // add    rsi, 1
	LONG $0xffc08348               // add    rax, -1
	JNE  LBB0_541

LBB0_542:
	LONG $0x03ff8348 // cmp    rdi, 3
	JB   LBB0_825

LBB0_543:
	LONG $0x0410fac5; BYTE $0xb2               // vmovss    xmm0, dword [rdx + 4*rsi]
	LONG $0x045cfac5; BYTE $0xb1               // vsubss    xmm0, xmm0, dword [rcx + 4*rsi]
	LONG $0x117ac1c4; WORD $0xb004             // vmovss    dword [r8 + 4*rsi], xmm0
	LONG $0x4410fac5; WORD $0x04b2             // vmovss    xmm0, dword [rdx + 4*rsi + 4]
	LONG $0x445cfac5; WORD $0x04b1             // vsubss    xmm0, xmm0, dword [rcx + 4*rsi + 4]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x04 // vmovss    dword [r8 + 4*rsi + 4], xmm0
	LONG $0x4410fac5; WORD $0x08b2             // vmovss    xmm0, dword [rdx + 4*rsi + 8]
	LONG $0x445cfac5; WORD $0x08b1             // vsubss    xmm0, xmm0, dword [rcx + 4*rsi + 8]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x08 // vmovss    dword [r8 + 4*rsi + 8], xmm0
	LONG $0x4410fac5; WORD $0x0cb2             // vmovss    xmm0, dword [rdx + 4*rsi + 12]
	LONG $0x445cfac5; WORD $0x0cb1             // vsubss    xmm0, xmm0, dword [rcx + 4*rsi + 12]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x0c // vmovss    dword [r8 + 4*rsi + 12], xmm0
	LONG $0x04c68348                           // add    rsi, 4
	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
	JNE  LBB0_543
	JMP  LBB0_825

LBB0_571:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_574
	WORD $0xff31             // xor    edi, edi
	JMP  LBB0_584

LBB0_707:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_710
	WORD $0xff31             // xor    edi, edi
	JMP  LBB0_720

LBB0_46:
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JAE  LBB0_49
	WORD $0xf631                               // xor    esi, esi
	JMP  LBB0_54

LBB0_312:
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JAE  LBB0_315
	WORD $0xf631                               // xor    esi, esi
	JMP  LBB0_320

LBB0_179:
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JAE  LBB0_182
	WORD $0xf631                               // xor    esi, esi
	JMP  LBB0_187

LBB0_445:
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JAE  LBB0_448
	WORD $0xf631                               // xor    esi, esi
	JMP  LBB0_453

LBB0_635:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_638
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_643

LBB0_771:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_774
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_779

LBB0_100:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_103
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_108

LBB0_366:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_369
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_374

LBB0_233:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_236
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_241

LBB0_499:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB0_825
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB0_502
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_507

LBB0_91:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_96
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_96
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_94:
	LONG $0x046ffec5; BYTE $0xb9               // vmovdqu    ymm0, yword [rcx + 4*rdi]
	LONG $0x4c6ffec5; WORD $0x20b9             // vmovdqu    ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x546ffec5; WORD $0x40b9             // vmovdqu    ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60b9             // vmovdqu    ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x04fefdc5; BYTE $0xba               // vpaddd    ymm0, ymm0, yword [rdx + 4*rdi]
	LONG $0x4cfef5c5; WORD $0x20ba             // vpaddd    ymm1, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x54feedc5; WORD $0x40ba             // vpaddd    ymm2, ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5cfee5c5; WORD $0x60ba             // vpaddd    ymm3, ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb804             // vmovdqu    yword [r8 + 4*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xb84c; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_94
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_96:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_98

LBB0_97:
	WORD $0x3c8b; BYTE $0xb1 // mov    edi, dword [rcx + 4*rsi]
	WORD $0x3c03; BYTE $0xb2 // add    edi, dword [rdx + 4*rsi]
	LONG $0xb03c8941         // mov    dword [r8 + 4*rsi], edi
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB0_97

LBB0_98:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_99:
	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
	WORD $0x0403; BYTE $0xb2     // add    eax, dword [rdx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
	LONG $0x04b24403             // add    eax, dword [rdx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
	LONG $0x08b24403             // add    eax, dword [rdx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
	LONG $0x0cb24403             // add    eax, dword [rdx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_99
	JMP  LBB0_825

LBB0_224:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_229
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_229
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_227:
	LONG $0x046ffec5; BYTE $0xb9               // vmovdqu    ymm0, yword [rcx + 4*rdi]
	LONG $0x4c6ffec5; WORD $0x20b9             // vmovdqu    ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x546ffec5; WORD $0x40b9             // vmovdqu    ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60b9             // vmovdqu    ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x04fefdc5; BYTE $0xba               // vpaddd    ymm0, ymm0, yword [rdx + 4*rdi]
	LONG $0x4cfef5c5; WORD $0x20ba             // vpaddd    ymm1, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x54feedc5; WORD $0x40ba             // vpaddd    ymm2, ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5cfee5c5; WORD $0x60ba             // vpaddd    ymm3, ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb804             // vmovdqu    yword [r8 + 4*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xb84c; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_227
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_229:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_231

LBB0_230:
	WORD $0x3c8b; BYTE $0xb1 // mov    edi, dword [rcx + 4*rsi]
	WORD $0x3c03; BYTE $0xb2 // add    edi, dword [rdx + 4*rsi]
	LONG $0xb03c8941         // mov    dword [r8 + 4*rsi], edi
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB0_230

LBB0_231:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_232:
	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
	WORD $0x0403; BYTE $0xb2     // add    eax, dword [rdx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
	LONG $0x04b24403             // add    eax, dword [rdx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
	LONG $0x08b24403             // add    eax, dword [rdx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
	LONG $0x0cb24403             // add    eax, dword [rdx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_232
	JMP  LBB0_825

LBB0_145:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_150
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_150
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_148:
	LONG $0x0410fdc5; BYTE $0xf9               // vmovupd    ymm0, yword [rcx + 8*rdi]
	LONG $0x4c10fdc5; WORD $0x20f9             // vmovupd    ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x5410fdc5; WORD $0x40f9             // vmovupd    ymm2, yword [rcx + 8*rdi + 64]
	LONG $0x5c10fdc5; WORD $0x60f9             // vmovupd    ymm3, yword [rcx + 8*rdi + 96]
	LONG $0x0458fdc5; BYTE $0xfa               // vaddpd    ymm0, ymm0, yword [rdx + 8*rdi]
	LONG $0x4c58f5c5; WORD $0x20fa             // vaddpd    ymm1, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x5458edc5; WORD $0x40fa             // vaddpd    ymm2, ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5c58e5c5; WORD $0x60fa             // vaddpd    ymm3, ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf804             // vmovupd    yword [r8 + 8*rdi], ymm0
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm1
	LONG $0x117dc1c4; WORD $0xf854; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm3
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_148
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_150:
	WORD $0x8948; BYTE $0xf7 // mov    rdi, rsi
	WORD $0xf748; BYTE $0xd7 // not    rdi
	WORD $0x014c; BYTE $0xd7 // add    rdi, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_152

LBB0_151:
	LONG $0x0410fbc5; BYTE $0xf1   // vmovsd    xmm0, qword [rcx + 8*rsi]
	LONG $0x0458fbc5; BYTE $0xf2   // vaddsd    xmm0, xmm0, qword [rdx + 8*rsi]
	LONG $0x117bc1c4; WORD $0xf004 // vmovsd    qword [r8 + 8*rsi], xmm0
	LONG $0x01c68348               // add    rsi, 1
	LONG $0xffc08348               // add    rax, -1
	JNE  LBB0_151

LBB0_152:
	LONG $0x03ff8348 // cmp    rdi, 3
	JB   LBB0_825

LBB0_153:
	LONG $0x0410fbc5; BYTE $0xf1               // vmovsd    xmm0, qword [rcx + 8*rsi]
	LONG $0x0458fbc5; BYTE $0xf2               // vaddsd    xmm0, xmm0, qword [rdx + 8*rsi]
	LONG $0x117bc1c4; WORD $0xf004             // vmovsd    qword [r8 + 8*rsi], xmm0
	LONG $0x4410fbc5; WORD $0x08f1             // vmovsd    xmm0, qword [rcx + 8*rsi + 8]
	LONG $0x4458fbc5; WORD $0x08f2             // vaddsd    xmm0, xmm0, qword [rdx + 8*rsi + 8]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x08 // vmovsd    qword [r8 + 8*rsi + 8], xmm0
	LONG $0x4410fbc5; WORD $0x10f1             // vmovsd    xmm0, qword [rcx + 8*rsi + 16]
	LONG $0x4458fbc5; WORD $0x10f2             // vaddsd    xmm0, xmm0, qword [rdx + 8*rsi + 16]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x10 // vmovsd    qword [r8 + 8*rsi + 16], xmm0
	LONG $0x4410fbc5; WORD $0x18f1             // vmovsd    xmm0, qword [rcx + 8*rsi + 24]
	LONG $0x4458fbc5; WORD $0x18f2             // vaddsd    xmm0, xmm0, qword [rdx + 8*rsi + 24]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x18 // vmovsd    qword [r8 + 8*rsi + 24], xmm0
	LONG $0x04c68348                           // add    rsi, 4
	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
	JNE  LBB0_153
	JMP  LBB0_825

LBB0_278:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_283
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_283
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_281:
	LONG $0x0410fdc5; BYTE $0xf9               // vmovupd    ymm0, yword [rcx + 8*rdi]
	LONG $0x4c10fdc5; WORD $0x20f9             // vmovupd    ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x5410fdc5; WORD $0x40f9             // vmovupd    ymm2, yword [rcx + 8*rdi + 64]
	LONG $0x5c10fdc5; WORD $0x60f9             // vmovupd    ymm3, yword [rcx + 8*rdi + 96]
	LONG $0x0458fdc5; BYTE $0xfa               // vaddpd    ymm0, ymm0, yword [rdx + 8*rdi]
	LONG $0x4c58f5c5; WORD $0x20fa             // vaddpd    ymm1, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x5458edc5; WORD $0x40fa             // vaddpd    ymm2, ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5c58e5c5; WORD $0x60fa             // vaddpd    ymm3, ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf804             // vmovupd    yword [r8 + 8*rdi], ymm0
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm1
	LONG $0x117dc1c4; WORD $0xf854; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm3
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_281
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_283:
	WORD $0x8948; BYTE $0xf7 // mov    rdi, rsi
	WORD $0xf748; BYTE $0xd7 // not    rdi
	WORD $0x014c; BYTE $0xd7 // add    rdi, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_285

LBB0_284:
	LONG $0x0410fbc5; BYTE $0xf1   // vmovsd    xmm0, qword [rcx + 8*rsi]
	LONG $0x0458fbc5; BYTE $0xf2   // vaddsd    xmm0, xmm0, qword [rdx + 8*rsi]
	LONG $0x117bc1c4; WORD $0xf004 // vmovsd    qword [r8 + 8*rsi], xmm0
	LONG $0x01c68348               // add    rsi, 1
	LONG $0xffc08348               // add    rax, -1
	JNE  LBB0_284

LBB0_285:
	LONG $0x03ff8348 // cmp    rdi, 3
	JB   LBB0_825

LBB0_286:
	LONG $0x0410fbc5; BYTE $0xf1               // vmovsd    xmm0, qword [rcx + 8*rsi]
	LONG $0x0458fbc5; BYTE $0xf2               // vaddsd    xmm0, xmm0, qword [rdx + 8*rsi]
	LONG $0x117bc1c4; WORD $0xf004             // vmovsd    qword [r8 + 8*rsi], xmm0
	LONG $0x4410fbc5; WORD $0x08f1             // vmovsd    xmm0, qword [rcx + 8*rsi + 8]
	LONG $0x4458fbc5; WORD $0x08f2             // vaddsd    xmm0, xmm0, qword [rdx + 8*rsi + 8]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x08 // vmovsd    qword [r8 + 8*rsi + 8], xmm0
	LONG $0x4410fbc5; WORD $0x10f1             // vmovsd    xmm0, qword [rcx + 8*rsi + 16]
	LONG $0x4458fbc5; WORD $0x10f2             // vaddsd    xmm0, xmm0, qword [rdx + 8*rsi + 16]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x10 // vmovsd    qword [r8 + 8*rsi + 16], xmm0
	LONG $0x4410fbc5; WORD $0x18f1             // vmovsd    xmm0, qword [rcx + 8*rsi + 24]
	LONG $0x4458fbc5; WORD $0x18f2             // vaddsd    xmm0, xmm0, qword [rdx + 8*rsi + 24]
	LONG $0x117bc1c4; WORD $0xf044; BYTE $0x18 // vmovsd    qword [r8 + 8*rsi + 24], xmm0
	LONG $0x04c68348                           // add    rsi, 4
	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
	JNE  LBB0_286
	JMP  LBB0_825

LBB0_588:
	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd6970f40         // seta    sil
	WORD $0xff31             // xor    edi, edi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_598
	WORD $0x2040; BYTE $0xf0 // and    al, sil
	JNE  LBB0_598
	WORD $0x8944; BYTE $0xd7 // mov    edi, r10d
	WORD $0xe783; BYTE $0xe0 // and    edi, -32
	LONG $0xe0778d48         // lea    rsi, [rdi - 32]
	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
	LONG $0x05e8c148         // shr    rax, 5
	LONG $0x01c08348         // add    rax, 1
	WORD $0x8941; BYTE $0xc1 // mov    r9d, eax
	LONG $0x03e18341         // and    r9d, 3
	LONG $0x60fe8348         // cmp    rsi, 96
	JAE  LBB0_592
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_594

LBB0_724:
	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd6970f40         // seta    sil
	WORD $0xff31             // xor    edi, edi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_734
	WORD $0x2040; BYTE $0xf0 // and    al, sil
	JNE  LBB0_734
	WORD $0x8944; BYTE $0xd7 // mov    edi, r10d
	WORD $0xe783; BYTE $0xe0 // and    edi, -32
	LONG $0xe0778d48         // lea    rsi, [rdi - 32]
	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
	LONG $0x05e8c148         // shr    rax, 5
	LONG $0x01c08348         // add    rax, 1
	WORD $0x8941; BYTE $0xc1 // mov    r9d, eax
	LONG $0x03e18341         // and    r9d, 3
	LONG $0x60fe8348         // cmp    rsi, 96
	JAE  LBB0_728
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_730

LBB0_58:
	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_63
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_63
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0x80 // and    esi, -128
	WORD $0xff31             // xor    edi, edi

LBB0_61:
	LONG $0x046ffec5; BYTE $0x39               // vmovdqu    ymm0, yword [rcx + rdi]
	LONG $0x4c6ffec5; WORD $0x2039             // vmovdqu    ymm1, yword [rcx + rdi + 32]
	LONG $0x546ffec5; WORD $0x4039             // vmovdqu    ymm2, yword [rcx + rdi + 64]
	LONG $0x5c6ffec5; WORD $0x6039             // vmovdqu    ymm3, yword [rcx + rdi + 96]
	LONG $0x04fcfdc5; BYTE $0x3a               // vpaddb    ymm0, ymm0, yword [rdx + rdi]
	LONG $0x4cfcf5c5; WORD $0x203a             // vpaddb    ymm1, ymm1, yword [rdx + rdi + 32]
	LONG $0x54fcedc5; WORD $0x403a             // vpaddb    ymm2, ymm2, yword [rdx + rdi + 64]
	LONG $0x5cfce5c5; WORD $0x603a             // vpaddb    ymm3, ymm3, yword [rdx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x3804             // vmovdqu    yword [r8 + rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x384c; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm3
	LONG $0x80ef8348                           // sub    rdi, -128
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_61
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_63:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB0_65

LBB0_64:
	LONG $0x3104b60f         // movzx    eax, byte [rcx + rsi]
	WORD $0x0402; BYTE $0x32 // add    al, byte [rdx + rsi]
	LONG $0x30048841         // mov    byte [r8 + rsi], al
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB0_64

LBB0_65:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_66:
	LONG $0x3104b60f             // movzx    eax, byte [rcx + rsi]
	WORD $0x0402; BYTE $0x32     // add    al, byte [rdx + rsi]
	LONG $0x30048841             // mov    byte [r8 + rsi], al
	LONG $0x3144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rsi + 1]
	LONG $0x01324402             // add    al, byte [rdx + rsi + 1]
	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
	LONG $0x3144b60f; BYTE $0x02 // movzx    eax, byte [rcx + rsi + 2]
	LONG $0x02324402             // add    al, byte [rdx + rsi + 2]
	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
	LONG $0x3144b60f; BYTE $0x03 // movzx    eax, byte [rcx + rsi + 3]
	LONG $0x03324402             // add    al, byte [rdx + rsi + 3]
	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_66
	JMP  LBB0_825

LBB0_324:
	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_329
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_329
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0x80 // and    esi, -128
	WORD $0xff31             // xor    edi, edi

LBB0_327:
	LONG $0x046ffec5; BYTE $0x3a               // vmovdqu    ymm0, yword [rdx + rdi]
	LONG $0x4c6ffec5; WORD $0x203a             // vmovdqu    ymm1, yword [rdx + rdi + 32]
	LONG $0x546ffec5; WORD $0x403a             // vmovdqu    ymm2, yword [rdx + rdi + 64]
	LONG $0x5c6ffec5; WORD $0x603a             // vmovdqu    ymm3, yword [rdx + rdi + 96]
	LONG $0x04f8fdc5; BYTE $0x39               // vpsubb    ymm0, ymm0, yword [rcx + rdi]
	LONG $0x4cf8f5c5; WORD $0x2039             // vpsubb    ymm1, ymm1, yword [rcx + rdi + 32]
	LONG $0x54f8edc5; WORD $0x4039             // vpsubb    ymm2, ymm2, yword [rcx + rdi + 64]
	LONG $0x5cf8e5c5; WORD $0x6039             // vpsubb    ymm3, ymm3, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x3804             // vmovdqu    yword [r8 + rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x384c; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm3
	LONG $0x80ef8348                           // sub    rdi, -128
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_327
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_329:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB0_331

LBB0_330:
	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
	WORD $0x042a; BYTE $0x31 // sub    al, byte [rcx + rsi]
	LONG $0x30048841         // mov    byte [r8 + rsi], al
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB0_330

LBB0_331:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_332:
	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
	WORD $0x042a; BYTE $0x31     // sub    al, byte [rcx + rsi]
	LONG $0x30048841             // mov    byte [r8 + rsi], al
	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
	LONG $0x0131442a             // sub    al, byte [rcx + rsi + 1]
	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
	LONG $0x0231442a             // sub    al, byte [rcx + rsi + 2]
	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
	LONG $0x0331442a             // sub    al, byte [rcx + rsi + 3]
	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_332
	JMP  LBB0_825

LBB0_191:
	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_196
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_196
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0x80 // and    esi, -128
	WORD $0xff31             // xor    edi, edi

LBB0_194:
	LONG $0x046ffec5; BYTE $0x39               // vmovdqu    ymm0, yword [rcx + rdi]
	LONG $0x4c6ffec5; WORD $0x2039             // vmovdqu    ymm1, yword [rcx + rdi + 32]
	LONG $0x546ffec5; WORD $0x4039             // vmovdqu    ymm2, yword [rcx + rdi + 64]
	LONG $0x5c6ffec5; WORD $0x6039             // vmovdqu    ymm3, yword [rcx + rdi + 96]
	LONG $0x04fcfdc5; BYTE $0x3a               // vpaddb    ymm0, ymm0, yword [rdx + rdi]
	LONG $0x4cfcf5c5; WORD $0x203a             // vpaddb    ymm1, ymm1, yword [rdx + rdi + 32]
	LONG $0x54fcedc5; WORD $0x403a             // vpaddb    ymm2, ymm2, yword [rdx + rdi + 64]
	LONG $0x5cfce5c5; WORD $0x603a             // vpaddb    ymm3, ymm3, yword [rdx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x3804             // vmovdqu    yword [r8 + rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x384c; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm3
	LONG $0x80ef8348                           // sub    rdi, -128
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_194
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_196:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB0_198

LBB0_197:
	LONG $0x3104b60f         // movzx    eax, byte [rcx + rsi]
	WORD $0x0402; BYTE $0x32 // add    al, byte [rdx + rsi]
	LONG $0x30048841         // mov    byte [r8 + rsi], al
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB0_197

LBB0_198:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_199:
	LONG $0x3104b60f             // movzx    eax, byte [rcx + rsi]
	WORD $0x0402; BYTE $0x32     // add    al, byte [rdx + rsi]
	LONG $0x30048841             // mov    byte [r8 + rsi], al
	LONG $0x3144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rsi + 1]
	LONG $0x01324402             // add    al, byte [rdx + rsi + 1]
	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
	LONG $0x3144b60f; BYTE $0x02 // movzx    eax, byte [rcx + rsi + 2]
	LONG $0x02324402             // add    al, byte [rdx + rsi + 2]
	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
	LONG $0x3144b60f; BYTE $0x03 // movzx    eax, byte [rcx + rsi + 3]
	LONG $0x03324402             // add    al, byte [rdx + rsi + 3]
	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_199
	JMP  LBB0_825

LBB0_457:
	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_462
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_462
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0x80 // and    esi, -128
	WORD $0xff31             // xor    edi, edi

LBB0_460:
	LONG $0x046ffec5; BYTE $0x3a               // vmovdqu    ymm0, yword [rdx + rdi]
	LONG $0x4c6ffec5; WORD $0x203a             // vmovdqu    ymm1, yword [rdx + rdi + 32]
	LONG $0x546ffec5; WORD $0x403a             // vmovdqu    ymm2, yword [rdx + rdi + 64]
	LONG $0x5c6ffec5; WORD $0x603a             // vmovdqu    ymm3, yword [rdx + rdi + 96]
	LONG $0x04f8fdc5; BYTE $0x39               // vpsubb    ymm0, ymm0, yword [rcx + rdi]
	LONG $0x4cf8f5c5; WORD $0x2039             // vpsubb    ymm1, ymm1, yword [rcx + rdi + 32]
	LONG $0x54f8edc5; WORD $0x4039             // vpsubb    ymm2, ymm2, yword [rcx + rdi + 64]
	LONG $0x5cf8e5c5; WORD $0x6039             // vpsubb    ymm3, ymm3, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x3804             // vmovdqu    yword [r8 + rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x384c; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm3
	LONG $0x80ef8348                           // sub    rdi, -128
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_460
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_462:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB0_464

LBB0_463:
	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
	WORD $0x042a; BYTE $0x31 // sub    al, byte [rcx + rsi]
	LONG $0x30048841         // mov    byte [r8 + rsi], al
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB0_463

LBB0_464:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_465:
	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
	WORD $0x042a; BYTE $0x31     // sub    al, byte [rcx + rsi]
	LONG $0x30048841             // mov    byte [r8 + rsi], al
	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
	LONG $0x0131442a             // sub    al, byte [rcx + rsi + 1]
	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
	LONG $0x0231442a             // sub    al, byte [rcx + rsi + 2]
	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
	LONG $0x0331442a             // sub    al, byte [rcx + rsi + 3]
	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_465
	JMP  LBB0_825

LBB0_647:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_652
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_652
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_650:
	LONG $0x0c6ffec5; BYTE $0xfa               // vmovdqu    ymm1, yword [rdx + 8*rdi]
	LONG $0x546ffec5; WORD $0x20fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 64]
	LONG $0x446ffec5; WORD $0x60fa             // vmovdqu    ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x246ffec5; BYTE $0xf9               // vmovdqu    ymm4, yword [rcx + 8*rdi]
	LONG $0x6c6ffec5; WORD $0x20f9             // vmovdqu    ymm5, yword [rcx + 8*rdi + 32]
	LONG $0x746ffec5; WORD $0x40f9             // vmovdqu    ymm6, yword [rcx + 8*rdi + 64]
	LONG $0x7c6ffec5; WORD $0x60f9             // vmovdqu    ymm7, yword [rcx + 8*rdi + 96]
	LONG $0xd473bdc5; BYTE $0x20               // vpsrlq    ymm8, ymm4, 32
	LONG $0xc1f43dc5                           // vpmuludq    ymm8, ymm8, ymm1
	LONG $0xd173b5c5; BYTE $0x20               // vpsrlq    ymm9, ymm1, 32
	LONG $0xccf435c5                           // vpmuludq    ymm9, ymm9, ymm4
	LONG $0xd43541c4; BYTE $0xc0               // vpaddq    ymm8, ymm9, ymm8
	LONG $0x733dc1c4; WORD $0x20f0             // vpsllq    ymm8, ymm8, 32
	LONG $0xc9f4ddc5                           // vpmuludq    ymm1, ymm4, ymm1
	LONG $0xc9d4bdc5                           // vpaddq    ymm1, ymm8, ymm1
	LONG $0xd573ddc5; BYTE $0x20               // vpsrlq    ymm4, ymm5, 32
	LONG $0xe2f4ddc5                           // vpmuludq    ymm4, ymm4, ymm2
	LONG $0xd273bdc5; BYTE $0x20               // vpsrlq    ymm8, ymm2, 32
	LONG $0xc5f43dc5                           // vpmuludq    ymm8, ymm8, ymm5
	LONG $0xe4d4bdc5                           // vpaddq    ymm4, ymm8, ymm4
	LONG $0xf473ddc5; BYTE $0x20               // vpsllq    ymm4, ymm4, 32
	LONG $0xd2f4d5c5                           // vpmuludq    ymm2, ymm5, ymm2
	LONG $0xd4d4edc5                           // vpaddq    ymm2, ymm2, ymm4
	LONG $0xd673ddc5; BYTE $0x20               // vpsrlq    ymm4, ymm6, 32
	LONG $0xe3f4ddc5                           // vpmuludq    ymm4, ymm4, ymm3
	LONG $0xd373d5c5; BYTE $0x20               // vpsrlq    ymm5, ymm3, 32
	LONG $0xedf4cdc5                           // vpmuludq    ymm5, ymm6, ymm5
	LONG $0xe4d4d5c5                           // vpaddq    ymm4, ymm5, ymm4
	LONG $0xf473ddc5; BYTE $0x20               // vpsllq    ymm4, ymm4, 32
	LONG $0xdbf4cdc5                           // vpmuludq    ymm3, ymm6, ymm3
	LONG $0xdcd4e5c5                           // vpaddq    ymm3, ymm3, ymm4
	LONG $0xd773ddc5; BYTE $0x20               // vpsrlq    ymm4, ymm7, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xd073d5c5; BYTE $0x20               // vpsrlq    ymm5, ymm0, 32
	LONG $0xedf4c5c5                           // vpmuludq    ymm5, ymm7, ymm5
	LONG $0xe4d4d5c5                           // vpaddq    ymm4, ymm5, ymm4
	LONG $0xf473ddc5; BYTE $0x20               // vpsllq    ymm4, ymm4, 32
	LONG $0xc0f4c5c5                           // vpmuludq    ymm0, ymm7, ymm0
	LONG $0xc4d4fdc5                           // vpaddq    ymm0, ymm0, ymm4
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_650
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_652:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_654

LBB0_653:
	LONG $0xf13c8b48             // mov    rdi, qword [rcx + 8*rsi]
	LONG $0x3caf0f48; BYTE $0xf2 // imul    rdi, qword [rdx + 8*rsi]
	LONG $0xf03c8949             // mov    qword [r8 + 8*rsi], rdi
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_653

LBB0_654:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_655:
	LONG $0xf1048b48               // mov    rax, qword [rcx + 8*rsi]
	LONG $0x04af0f48; BYTE $0xf2   // imul    rax, qword [rdx + 8*rsi]
	LONG $0xf0048949               // mov    qword [r8 + 8*rsi], rax
	LONG $0xf1448b48; BYTE $0x08   // mov    rax, qword [rcx + 8*rsi + 8]
	LONG $0x44af0f48; WORD $0x08f2 // imul    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08   // mov    qword [r8 + 8*rsi + 8], rax
	LONG $0xf1448b48; BYTE $0x10   // mov    rax, qword [rcx + 8*rsi + 16]
	LONG $0x44af0f48; WORD $0x10f2 // imul    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10   // mov    qword [r8 + 8*rsi + 16], rax
	LONG $0xf1448b48; BYTE $0x18   // mov    rax, qword [rcx + 8*rsi + 24]
	LONG $0x44af0f48; WORD $0x18f2 // imul    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18   // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_655
	JMP  LBB0_825

LBB0_783:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_788
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_788
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_786:
	LONG $0x0c6ffec5; BYTE $0xfa               // vmovdqu    ymm1, yword [rdx + 8*rdi]
	LONG $0x546ffec5; WORD $0x20fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 64]
	LONG $0x446ffec5; WORD $0x60fa             // vmovdqu    ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x246ffec5; BYTE $0xf9               // vmovdqu    ymm4, yword [rcx + 8*rdi]
	LONG $0x6c6ffec5; WORD $0x20f9             // vmovdqu    ymm5, yword [rcx + 8*rdi + 32]
	LONG $0x746ffec5; WORD $0x40f9             // vmovdqu    ymm6, yword [rcx + 8*rdi + 64]
	LONG $0x7c6ffec5; WORD $0x60f9             // vmovdqu    ymm7, yword [rcx + 8*rdi + 96]
	LONG $0xd473bdc5; BYTE $0x20               // vpsrlq    ymm8, ymm4, 32
	LONG $0xc1f43dc5                           // vpmuludq    ymm8, ymm8, ymm1
	LONG $0xd173b5c5; BYTE $0x20               // vpsrlq    ymm9, ymm1, 32
	LONG $0xccf435c5                           // vpmuludq    ymm9, ymm9, ymm4
	LONG $0xd43541c4; BYTE $0xc0               // vpaddq    ymm8, ymm9, ymm8
	LONG $0x733dc1c4; WORD $0x20f0             // vpsllq    ymm8, ymm8, 32
	LONG $0xc9f4ddc5                           // vpmuludq    ymm1, ymm4, ymm1
	LONG $0xc9d4bdc5                           // vpaddq    ymm1, ymm8, ymm1
	LONG $0xd573ddc5; BYTE $0x20               // vpsrlq    ymm4, ymm5, 32
	LONG $0xe2f4ddc5                           // vpmuludq    ymm4, ymm4, ymm2
	LONG $0xd273bdc5; BYTE $0x20               // vpsrlq    ymm8, ymm2, 32
	LONG $0xc5f43dc5                           // vpmuludq    ymm8, ymm8, ymm5
	LONG $0xe4d4bdc5                           // vpaddq    ymm4, ymm8, ymm4
	LONG $0xf473ddc5; BYTE $0x20               // vpsllq    ymm4, ymm4, 32
	LONG $0xd2f4d5c5                           // vpmuludq    ymm2, ymm5, ymm2
	LONG $0xd4d4edc5                           // vpaddq    ymm2, ymm2, ymm4
	LONG $0xd673ddc5; BYTE $0x20               // vpsrlq    ymm4, ymm6, 32
	LONG $0xe3f4ddc5                           // vpmuludq    ymm4, ymm4, ymm3
	LONG $0xd373d5c5; BYTE $0x20               // vpsrlq    ymm5, ymm3, 32
	LONG $0xedf4cdc5                           // vpmuludq    ymm5, ymm6, ymm5
	LONG $0xe4d4d5c5                           // vpaddq    ymm4, ymm5, ymm4
	LONG $0xf473ddc5; BYTE $0x20               // vpsllq    ymm4, ymm4, 32
	LONG $0xdbf4cdc5                           // vpmuludq    ymm3, ymm6, ymm3
	LONG $0xdcd4e5c5                           // vpaddq    ymm3, ymm3, ymm4
	LONG $0xd773ddc5; BYTE $0x20               // vpsrlq    ymm4, ymm7, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xd073d5c5; BYTE $0x20               // vpsrlq    ymm5, ymm0, 32
	LONG $0xedf4c5c5                           // vpmuludq    ymm5, ymm7, ymm5
	LONG $0xe4d4d5c5                           // vpaddq    ymm4, ymm5, ymm4
	LONG $0xf473ddc5; BYTE $0x20               // vpsllq    ymm4, ymm4, 32
	LONG $0xc0f4c5c5                           // vpmuludq    ymm0, ymm7, ymm0
	LONG $0xc4d4fdc5                           // vpaddq    ymm0, ymm0, ymm4
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_786
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_788:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_790

LBB0_789:
	LONG $0xf13c8b48             // mov    rdi, qword [rcx + 8*rsi]
	LONG $0x3caf0f48; BYTE $0xf2 // imul    rdi, qword [rdx + 8*rsi]
	LONG $0xf03c8949             // mov    qword [r8 + 8*rsi], rdi
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_789

LBB0_790:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_791:
	LONG $0xf1048b48               // mov    rax, qword [rcx + 8*rsi]
	LONG $0x04af0f48; BYTE $0xf2   // imul    rax, qword [rdx + 8*rsi]
	LONG $0xf0048949               // mov    qword [r8 + 8*rsi], rax
	LONG $0xf1448b48; BYTE $0x08   // mov    rax, qword [rcx + 8*rsi + 8]
	LONG $0x44af0f48; WORD $0x08f2 // imul    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08   // mov    qword [r8 + 8*rsi + 8], rax
	LONG $0xf1448b48; BYTE $0x10   // mov    rax, qword [rcx + 8*rsi + 16]
	LONG $0x44af0f48; WORD $0x10f2 // imul    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10   // mov    qword [r8 + 8*rsi + 16], rax
	LONG $0xf1448b48; BYTE $0x18   // mov    rax, qword [rcx + 8*rsi + 24]
	LONG $0x44af0f48; WORD $0x18f2 // imul    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18   // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_791
	JMP  LBB0_825

LBB0_112:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_117
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_117
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_115:
	LONG $0x046ffec5; BYTE $0xf9               // vmovdqu    ymm0, yword [rcx + 8*rdi]
	LONG $0x4c6ffec5; WORD $0x20f9             // vmovdqu    ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x546ffec5; WORD $0x40f9             // vmovdqu    ymm2, yword [rcx + 8*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60f9             // vmovdqu    ymm3, yword [rcx + 8*rdi + 96]
	LONG $0x04d4fdc5; BYTE $0xfa               // vpaddq    ymm0, ymm0, yword [rdx + 8*rdi]
	LONG $0x4cd4f5c5; WORD $0x20fa             // vpaddq    ymm1, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x54d4edc5; WORD $0x40fa             // vpaddq    ymm2, ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5cd4e5c5; WORD $0x60fa             // vpaddq    ymm3, ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf804             // vmovdqu    yword [r8 + 8*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xf84c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm3
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_115
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_117:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_119

LBB0_118:
	LONG $0xf13c8b48 // mov    rdi, qword [rcx + 8*rsi]
	LONG $0xf23c0348 // add    rdi, qword [rdx + 8*rsi]
	LONG $0xf03c8949 // mov    qword [r8 + 8*rsi], rdi
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc08348 // add    rax, -1
	JNE  LBB0_118

LBB0_119:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_120:
	LONG $0xf1048b48             // mov    rax, qword [rcx + 8*rsi]
	LONG $0xf2040348             // add    rax, qword [rdx + 8*rsi]
	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
	LONG $0xf1448b48; BYTE $0x08 // mov    rax, qword [rcx + 8*rsi + 8]
	LONG $0xf2440348; BYTE $0x08 // add    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
	LONG $0xf1448b48; BYTE $0x10 // mov    rax, qword [rcx + 8*rsi + 16]
	LONG $0xf2440348; BYTE $0x10 // add    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
	LONG $0xf1448b48; BYTE $0x18 // mov    rax, qword [rcx + 8*rsi + 24]
	LONG $0xf2440348; BYTE $0x18 // add    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_120
	JMP  LBB0_825

LBB0_378:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_383
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_383
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_381:
	LONG $0x046ffec5; BYTE $0xfa               // vmovdqu    ymm0, yword [rdx + 8*rdi]
	LONG $0x4c6ffec5; WORD $0x20fa             // vmovdqu    ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x546ffec5; WORD $0x40fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x04fbfdc5; BYTE $0xf9               // vpsubq    ymm0, ymm0, yword [rcx + 8*rdi]
	LONG $0x4cfbf5c5; WORD $0x20f9             // vpsubq    ymm1, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x54fbedc5; WORD $0x40f9             // vpsubq    ymm2, ymm2, yword [rcx + 8*rdi + 64]
	LONG $0x5cfbe5c5; WORD $0x60f9             // vpsubq    ymm3, ymm3, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf804             // vmovdqu    yword [r8 + 8*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xf84c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm3
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_381
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_383:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_385

LBB0_384:
	LONG $0xf23c8b48 // mov    rdi, qword [rdx + 8*rsi]
	LONG $0xf13c2b48 // sub    rdi, qword [rcx + 8*rsi]
	LONG $0xf03c8949 // mov    qword [r8 + 8*rsi], rdi
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc08348 // add    rax, -1
	JNE  LBB0_384

LBB0_385:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_386:
	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_386
	JMP  LBB0_825

LBB0_245:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_250
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_250
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_248:
	LONG $0x046ffec5; BYTE $0xf9               // vmovdqu    ymm0, yword [rcx + 8*rdi]
	LONG $0x4c6ffec5; WORD $0x20f9             // vmovdqu    ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x546ffec5; WORD $0x40f9             // vmovdqu    ymm2, yword [rcx + 8*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60f9             // vmovdqu    ymm3, yword [rcx + 8*rdi + 96]
	LONG $0x04d4fdc5; BYTE $0xfa               // vpaddq    ymm0, ymm0, yword [rdx + 8*rdi]
	LONG $0x4cd4f5c5; WORD $0x20fa             // vpaddq    ymm1, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x54d4edc5; WORD $0x40fa             // vpaddq    ymm2, ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5cd4e5c5; WORD $0x60fa             // vpaddq    ymm3, ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf804             // vmovdqu    yword [r8 + 8*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xf84c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm3
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_248
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_250:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_252

LBB0_251:
	LONG $0xf13c8b48 // mov    rdi, qword [rcx + 8*rsi]
	LONG $0xf23c0348 // add    rdi, qword [rdx + 8*rsi]
	LONG $0xf03c8949 // mov    qword [r8 + 8*rsi], rdi
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc08348 // add    rax, -1
	JNE  LBB0_251

LBB0_252:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_253:
	LONG $0xf1048b48             // mov    rax, qword [rcx + 8*rsi]
	LONG $0xf2040348             // add    rax, qword [rdx + 8*rsi]
	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
	LONG $0xf1448b48; BYTE $0x08 // mov    rax, qword [rcx + 8*rsi + 8]
	LONG $0xf2440348; BYTE $0x08 // add    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
	LONG $0xf1448b48; BYTE $0x10 // mov    rax, qword [rcx + 8*rsi + 16]
	LONG $0xf2440348; BYTE $0x10 // add    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
	LONG $0xf1448b48; BYTE $0x18 // mov    rax, qword [rcx + 8*rsi + 24]
	LONG $0xf2440348; BYTE $0x18 // add    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_253
	JMP  LBB0_825

LBB0_511:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_516
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_516
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_514:
	LONG $0x046ffec5; BYTE $0xfa               // vmovdqu    ymm0, yword [rdx + 8*rdi]
	LONG $0x4c6ffec5; WORD $0x20fa             // vmovdqu    ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x546ffec5; WORD $0x40fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x04fbfdc5; BYTE $0xf9               // vpsubq    ymm0, ymm0, yword [rcx + 8*rdi]
	LONG $0x4cfbf5c5; WORD $0x20f9             // vpsubq    ymm1, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x54fbedc5; WORD $0x40f9             // vpsubq    ymm2, ymm2, yword [rcx + 8*rdi + 64]
	LONG $0x5cfbe5c5; WORD $0x60f9             // vpsubq    ymm3, ymm3, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf804             // vmovdqu    yword [r8 + 8*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xf84c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm3
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_514
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_516:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_518

LBB0_517:
	LONG $0xf23c8b48 // mov    rdi, qword [rdx + 8*rsi]
	LONG $0xf13c2b48 // sub    rdi, qword [rcx + 8*rsi]
	LONG $0xf03c8949 // mov    qword [r8 + 8*rsi], rdi
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc08348 // add    rax, -1
	JNE  LBB0_517

LBB0_518:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_519:
	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_519
	JMP  LBB0_825

LBB0_70:
	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_75
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_75
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xc0 // and    esi, -64
	WORD $0xff31             // xor    edi, edi

LBB0_73:
	LONG $0x046ffec5; BYTE $0x79               // vmovdqu    ymm0, yword [rcx + 2*rdi]
	LONG $0x4c6ffec5; WORD $0x2079             // vmovdqu    ymm1, yword [rcx + 2*rdi + 32]
	LONG $0x546ffec5; WORD $0x4079             // vmovdqu    ymm2, yword [rcx + 2*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x6079             // vmovdqu    ymm3, yword [rcx + 2*rdi + 96]
	LONG $0x04fdfdc5; BYTE $0x7a               // vpaddw    ymm0, ymm0, yword [rdx + 2*rdi]
	LONG $0x4cfdf5c5; WORD $0x207a             // vpaddw    ymm1, ymm1, yword [rdx + 2*rdi + 32]
	LONG $0x54fdedc5; WORD $0x407a             // vpaddw    ymm2, ymm2, yword [rdx + 2*rdi + 64]
	LONG $0x5cfde5c5; WORD $0x607a             // vpaddw    ymm3, ymm3, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x7804             // vmovdqu    yword [r8 + 2*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm3
	LONG $0x40c78348                           // add    rdi, 64
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_73
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_75:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_77

LBB0_76:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	LONG $0x723c0366             // add    di, word [rdx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_76

LBB0_77:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_78:
	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
	LONG $0x72040366               // add    ax, word [rdx + 2*rsi]
	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
	LONG $0x72440366; BYTE $0x02   // add    ax, word [rdx + 2*rsi + 2]
	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
	LONG $0x72440366; BYTE $0x04   // add    ax, word [rdx + 2*rsi + 4]
	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
	LONG $0x72440366; BYTE $0x06   // add    ax, word [rdx + 2*rsi + 6]
	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_78
	JMP  LBB0_825

LBB0_82:
	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_87
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_87
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xc0 // and    esi, -64
	WORD $0xff31             // xor    edi, edi

LBB0_85:
	LONG $0x046ffec5; BYTE $0x79               // vmovdqu    ymm0, yword [rcx + 2*rdi]
	LONG $0x4c6ffec5; WORD $0x2079             // vmovdqu    ymm1, yword [rcx + 2*rdi + 32]
	LONG $0x546ffec5; WORD $0x4079             // vmovdqu    ymm2, yword [rcx + 2*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x6079             // vmovdqu    ymm3, yword [rcx + 2*rdi + 96]
	LONG $0x04fdfdc5; BYTE $0x7a               // vpaddw    ymm0, ymm0, yword [rdx + 2*rdi]
	LONG $0x4cfdf5c5; WORD $0x207a             // vpaddw    ymm1, ymm1, yword [rdx + 2*rdi + 32]
	LONG $0x54fdedc5; WORD $0x407a             // vpaddw    ymm2, ymm2, yword [rdx + 2*rdi + 64]
	LONG $0x5cfde5c5; WORD $0x607a             // vpaddw    ymm3, ymm3, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x7804             // vmovdqu    yword [r8 + 2*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm3
	LONG $0x40c78348                           // add    rdi, 64
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_85
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_87:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_89

LBB0_88:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	LONG $0x723c0366             // add    di, word [rdx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_88

LBB0_89:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_90:
	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
	LONG $0x72040366               // add    ax, word [rdx + 2*rsi]
	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
	LONG $0x72440366; BYTE $0x02   // add    ax, word [rdx + 2*rsi + 2]
	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
	LONG $0x72440366; BYTE $0x04   // add    ax, word [rdx + 2*rsi + 4]
	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
	LONG $0x72440366; BYTE $0x06   // add    ax, word [rdx + 2*rsi + 6]
	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_90
	JMP  LBB0_825

LBB0_203:
	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_208
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_208
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xc0 // and    esi, -64
	WORD $0xff31             // xor    edi, edi

LBB0_206:
	LONG $0x046ffec5; BYTE $0x79               // vmovdqu    ymm0, yword [rcx + 2*rdi]
	LONG $0x4c6ffec5; WORD $0x2079             // vmovdqu    ymm1, yword [rcx + 2*rdi + 32]
	LONG $0x546ffec5; WORD $0x4079             // vmovdqu    ymm2, yword [rcx + 2*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x6079             // vmovdqu    ymm3, yword [rcx + 2*rdi + 96]
	LONG $0x04fdfdc5; BYTE $0x7a               // vpaddw    ymm0, ymm0, yword [rdx + 2*rdi]
	LONG $0x4cfdf5c5; WORD $0x207a             // vpaddw    ymm1, ymm1, yword [rdx + 2*rdi + 32]
	LONG $0x54fdedc5; WORD $0x407a             // vpaddw    ymm2, ymm2, yword [rdx + 2*rdi + 64]
	LONG $0x5cfde5c5; WORD $0x607a             // vpaddw    ymm3, ymm3, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x7804             // vmovdqu    yword [r8 + 2*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm3
	LONG $0x40c78348                           // add    rdi, 64
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_206
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_208:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_210

LBB0_209:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	LONG $0x723c0366             // add    di, word [rdx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_209

LBB0_210:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_211:
	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
	LONG $0x72040366               // add    ax, word [rdx + 2*rsi]
	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
	LONG $0x72440366; BYTE $0x02   // add    ax, word [rdx + 2*rsi + 2]
	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
	LONG $0x72440366; BYTE $0x04   // add    ax, word [rdx + 2*rsi + 4]
	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
	LONG $0x72440366; BYTE $0x06   // add    ax, word [rdx + 2*rsi + 6]
	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_211
	JMP  LBB0_825

LBB0_215:
	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_220
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_220
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xc0 // and    esi, -64
	WORD $0xff31             // xor    edi, edi

LBB0_218:
	LONG $0x046ffec5; BYTE $0x79               // vmovdqu    ymm0, yword [rcx + 2*rdi]
	LONG $0x4c6ffec5; WORD $0x2079             // vmovdqu    ymm1, yword [rcx + 2*rdi + 32]
	LONG $0x546ffec5; WORD $0x4079             // vmovdqu    ymm2, yword [rcx + 2*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x6079             // vmovdqu    ymm3, yword [rcx + 2*rdi + 96]
	LONG $0x04fdfdc5; BYTE $0x7a               // vpaddw    ymm0, ymm0, yword [rdx + 2*rdi]
	LONG $0x4cfdf5c5; WORD $0x207a             // vpaddw    ymm1, ymm1, yword [rdx + 2*rdi + 32]
	LONG $0x54fdedc5; WORD $0x407a             // vpaddw    ymm2, ymm2, yword [rdx + 2*rdi + 64]
	LONG $0x5cfde5c5; WORD $0x607a             // vpaddw    ymm3, ymm3, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x7804             // vmovdqu    yword [r8 + 2*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm3
	LONG $0x40c78348                           // add    rdi, 64
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_218
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_220:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_222

LBB0_221:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	LONG $0x723c0366             // add    di, word [rdx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB0_221

LBB0_222:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_223:
	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
	LONG $0x72040366               // add    ax, word [rdx + 2*rsi]
	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
	LONG $0x72440366; BYTE $0x02   // add    ax, word [rdx + 2*rsi + 2]
	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
	LONG $0x72440366; BYTE $0x04   // add    ax, word [rdx + 2*rsi + 4]
	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
	LONG $0x72440366; BYTE $0x06   // add    ax, word [rdx + 2*rsi + 6]
	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB0_223
	JMP  LBB0_825

LBB0_124:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_129
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_129
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_127:
	LONG $0x046ffec5; BYTE $0xf9               // vmovdqu    ymm0, yword [rcx + 8*rdi]
	LONG $0x4c6ffec5; WORD $0x20f9             // vmovdqu    ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x546ffec5; WORD $0x40f9             // vmovdqu    ymm2, yword [rcx + 8*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60f9             // vmovdqu    ymm3, yword [rcx + 8*rdi + 96]
	LONG $0x04d4fdc5; BYTE $0xfa               // vpaddq    ymm0, ymm0, yword [rdx + 8*rdi]
	LONG $0x4cd4f5c5; WORD $0x20fa             // vpaddq    ymm1, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x54d4edc5; WORD $0x40fa             // vpaddq    ymm2, ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5cd4e5c5; WORD $0x60fa             // vpaddq    ymm3, ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf804             // vmovdqu    yword [r8 + 8*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xf84c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm3
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_127
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_129:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_131

LBB0_130:
	LONG $0xf13c8b48 // mov    rdi, qword [rcx + 8*rsi]
	LONG $0xf23c0348 // add    rdi, qword [rdx + 8*rsi]
	LONG $0xf03c8949 // mov    qword [r8 + 8*rsi], rdi
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc08348 // add    rax, -1
	JNE  LBB0_130

LBB0_131:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_132:
	LONG $0xf1048b48             // mov    rax, qword [rcx + 8*rsi]
	LONG $0xf2040348             // add    rax, qword [rdx + 8*rsi]
	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
	LONG $0xf1448b48; BYTE $0x08 // mov    rax, qword [rcx + 8*rsi + 8]
	LONG $0xf2440348; BYTE $0x08 // add    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
	LONG $0xf1448b48; BYTE $0x10 // mov    rax, qword [rcx + 8*rsi + 16]
	LONG $0xf2440348; BYTE $0x10 // add    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
	LONG $0xf1448b48; BYTE $0x18 // mov    rax, qword [rcx + 8*rsi + 24]
	LONG $0xf2440348; BYTE $0x18 // add    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_132
	JMP  LBB0_825

LBB0_136:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_141
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_141
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_139:
	LONG $0x0410fcc5; BYTE $0xb9               // vmovups    ymm0, yword [rcx + 4*rdi]
	LONG $0x4c10fcc5; WORD $0x20b9             // vmovups    ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x5410fcc5; WORD $0x40b9             // vmovups    ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5c10fcc5; WORD $0x60b9             // vmovups    ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x0458fcc5; BYTE $0xba               // vaddps    ymm0, ymm0, yword [rdx + 4*rdi]
	LONG $0x4c58f4c5; WORD $0x20ba             // vaddps    ymm1, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x5458ecc5; WORD $0x40ba             // vaddps    ymm2, ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5c58e4c5; WORD $0x60ba             // vaddps    ymm3, ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb804             // vmovups    yword [r8 + 4*rdi], ymm0
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x117cc1c4; WORD $0xb854; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_139
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_141:
	WORD $0x8948; BYTE $0xf7 // mov    rdi, rsi
	WORD $0xf748; BYTE $0xd7 // not    rdi
	WORD $0x014c; BYTE $0xd7 // add    rdi, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_143

LBB0_142:
	LONG $0x0410fac5; BYTE $0xb1   // vmovss    xmm0, dword [rcx + 4*rsi]
	LONG $0x0458fac5; BYTE $0xb2   // vaddss    xmm0, xmm0, dword [rdx + 4*rsi]
	LONG $0x117ac1c4; WORD $0xb004 // vmovss    dword [r8 + 4*rsi], xmm0
	LONG $0x01c68348               // add    rsi, 1
	LONG $0xffc08348               // add    rax, -1
	JNE  LBB0_142

LBB0_143:
	LONG $0x03ff8348 // cmp    rdi, 3
	JB   LBB0_825

LBB0_144:
	LONG $0x0410fac5; BYTE $0xb1               // vmovss    xmm0, dword [rcx + 4*rsi]
	LONG $0x0458fac5; BYTE $0xb2               // vaddss    xmm0, xmm0, dword [rdx + 4*rsi]
	LONG $0x117ac1c4; WORD $0xb004             // vmovss    dword [r8 + 4*rsi], xmm0
	LONG $0x4410fac5; WORD $0x04b1             // vmovss    xmm0, dword [rcx + 4*rsi + 4]
	LONG $0x4458fac5; WORD $0x04b2             // vaddss    xmm0, xmm0, dword [rdx + 4*rsi + 4]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x04 // vmovss    dword [r8 + 4*rsi + 4], xmm0
	LONG $0x4410fac5; WORD $0x08b1             // vmovss    xmm0, dword [rcx + 4*rsi + 8]
	LONG $0x4458fac5; WORD $0x08b2             // vaddss    xmm0, xmm0, dword [rdx + 4*rsi + 8]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x08 // vmovss    dword [r8 + 4*rsi + 8], xmm0
	LONG $0x4410fac5; WORD $0x0cb1             // vmovss    xmm0, dword [rcx + 4*rsi + 12]
	LONG $0x4458fac5; WORD $0x0cb2             // vaddss    xmm0, xmm0, dword [rdx + 4*rsi + 12]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x0c // vmovss    dword [r8 + 4*rsi + 12], xmm0
	LONG $0x04c68348                           // add    rsi, 4
	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
	JNE  LBB0_144
	JMP  LBB0_825

LBB0_257:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_262
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_262
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_260:
	LONG $0x046ffec5; BYTE $0xf9               // vmovdqu    ymm0, yword [rcx + 8*rdi]
	LONG $0x4c6ffec5; WORD $0x20f9             // vmovdqu    ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x546ffec5; WORD $0x40f9             // vmovdqu    ymm2, yword [rcx + 8*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60f9             // vmovdqu    ymm3, yword [rcx + 8*rdi + 96]
	LONG $0x04d4fdc5; BYTE $0xfa               // vpaddq    ymm0, ymm0, yword [rdx + 8*rdi]
	LONG $0x4cd4f5c5; WORD $0x20fa             // vpaddq    ymm1, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x54d4edc5; WORD $0x40fa             // vpaddq    ymm2, ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5cd4e5c5; WORD $0x60fa             // vpaddq    ymm3, ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf804             // vmovdqu    yword [r8 + 8*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xf84c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm3
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_260
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_262:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_264

LBB0_263:
	LONG $0xf13c8b48 // mov    rdi, qword [rcx + 8*rsi]
	LONG $0xf23c0348 // add    rdi, qword [rdx + 8*rsi]
	LONG $0xf03c8949 // mov    qword [r8 + 8*rsi], rdi
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc08348 // add    rax, -1
	JNE  LBB0_263

LBB0_264:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_265:
	LONG $0xf1048b48             // mov    rax, qword [rcx + 8*rsi]
	LONG $0xf2040348             // add    rax, qword [rdx + 8*rsi]
	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
	LONG $0xf1448b48; BYTE $0x08 // mov    rax, qword [rcx + 8*rsi + 8]
	LONG $0xf2440348; BYTE $0x08 // add    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
	LONG $0xf1448b48; BYTE $0x10 // mov    rax, qword [rcx + 8*rsi + 16]
	LONG $0xf2440348; BYTE $0x10 // add    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
	LONG $0xf1448b48; BYTE $0x18 // mov    rax, qword [rcx + 8*rsi + 24]
	LONG $0xf2440348; BYTE $0x18 // add    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_265
	JMP  LBB0_825

LBB0_269:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_274
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_274
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_272:
	LONG $0x0410fcc5; BYTE $0xb9               // vmovups    ymm0, yword [rcx + 4*rdi]
	LONG $0x4c10fcc5; WORD $0x20b9             // vmovups    ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x5410fcc5; WORD $0x40b9             // vmovups    ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5c10fcc5; WORD $0x60b9             // vmovups    ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x0458fcc5; BYTE $0xba               // vaddps    ymm0, ymm0, yword [rdx + 4*rdi]
	LONG $0x4c58f4c5; WORD $0x20ba             // vaddps    ymm1, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x5458ecc5; WORD $0x40ba             // vaddps    ymm2, ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5c58e4c5; WORD $0x60ba             // vaddps    ymm3, ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb804             // vmovups    yword [r8 + 4*rdi], ymm0
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x117cc1c4; WORD $0xb854; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_272
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_274:
	WORD $0x8948; BYTE $0xf7 // mov    rdi, rsi
	WORD $0xf748; BYTE $0xd7 // not    rdi
	WORD $0x014c; BYTE $0xd7 // add    rdi, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_276

LBB0_275:
	LONG $0x0410fac5; BYTE $0xb1   // vmovss    xmm0, dword [rcx + 4*rsi]
	LONG $0x0458fac5; BYTE $0xb2   // vaddss    xmm0, xmm0, dword [rdx + 4*rsi]
	LONG $0x117ac1c4; WORD $0xb004 // vmovss    dword [r8 + 4*rsi], xmm0
	LONG $0x01c68348               // add    rsi, 1
	LONG $0xffc08348               // add    rax, -1
	JNE  LBB0_275

LBB0_276:
	LONG $0x03ff8348 // cmp    rdi, 3
	JB   LBB0_825

LBB0_277:
	LONG $0x0410fac5; BYTE $0xb1               // vmovss    xmm0, dword [rcx + 4*rsi]
	LONG $0x0458fac5; BYTE $0xb2               // vaddss    xmm0, xmm0, dword [rdx + 4*rsi]
	LONG $0x117ac1c4; WORD $0xb004             // vmovss    dword [r8 + 4*rsi], xmm0
	LONG $0x4410fac5; WORD $0x04b1             // vmovss    xmm0, dword [rcx + 4*rsi + 4]
	LONG $0x4458fac5; WORD $0x04b2             // vaddss    xmm0, xmm0, dword [rdx + 4*rsi + 4]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x04 // vmovss    dword [r8 + 4*rsi + 4], xmm0
	LONG $0x4410fac5; WORD $0x08b1             // vmovss    xmm0, dword [rcx + 4*rsi + 8]
	LONG $0x4458fac5; WORD $0x08b2             // vaddss    xmm0, xmm0, dword [rdx + 4*rsi + 8]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x08 // vmovss    dword [r8 + 4*rsi + 8], xmm0
	LONG $0x4410fac5; WORD $0x0cb1             // vmovss    xmm0, dword [rcx + 4*rsi + 12]
	LONG $0x4458fac5; WORD $0x0cb2             // vaddss    xmm0, xmm0, dword [rdx + 4*rsi + 12]
	LONG $0x117ac1c4; WORD $0xb044; BYTE $0x0c // vmovss    dword [r8 + 4*rsi + 12], xmm0
	LONG $0x04c68348                           // add    rsi, 4
	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
	JNE  LBB0_277
	JMP  LBB0_825

LBB0_574:
	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd6970f40         // seta    sil
	WORD $0xff31             // xor    edi, edi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_584
	WORD $0x2040; BYTE $0xf0 // and    al, sil
	JNE  LBB0_584
	WORD $0x8944; BYTE $0xd7 // mov    edi, r10d
	WORD $0xe783; BYTE $0xe0 // and    edi, -32
	LONG $0xe0778d48         // lea    rsi, [rdi - 32]
	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
	LONG $0x05e8c148         // shr    rax, 5
	LONG $0x01c08348         // add    rax, 1
	WORD $0x8941; BYTE $0xc1 // mov    r9d, eax
	LONG $0x03e18341         // and    r9d, 3
	LONG $0x60fe8348         // cmp    rsi, 96
	JAE  LBB0_578
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_580

LBB0_710:
	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd6970f40         // seta    sil
	WORD $0xff31             // xor    edi, edi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_720
	WORD $0x2040; BYTE $0xf0 // and    al, sil
	JNE  LBB0_720
	WORD $0x8944; BYTE $0xd7 // mov    edi, r10d
	WORD $0xe783; BYTE $0xe0 // and    edi, -32
	LONG $0xe0778d48         // lea    rsi, [rdi - 32]
	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
	LONG $0x05e8c148         // shr    rax, 5
	LONG $0x01c08348         // add    rax, 1
	WORD $0x8941; BYTE $0xc1 // mov    r9d, eax
	LONG $0x03e18341         // and    r9d, 3
	LONG $0x60fe8348         // cmp    rsi, 96
	JAE  LBB0_714
	WORD $0xf631             // xor    esi, esi
	JMP  LBB0_716

LBB0_49:
	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_54
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_54
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0x80 // and    esi, -128
	WORD $0xff31             // xor    edi, edi

LBB0_52:
	LONG $0x046ffec5; BYTE $0x39               // vmovdqu    ymm0, yword [rcx + rdi]
	LONG $0x4c6ffec5; WORD $0x2039             // vmovdqu    ymm1, yword [rcx + rdi + 32]
	LONG $0x546ffec5; WORD $0x4039             // vmovdqu    ymm2, yword [rcx + rdi + 64]
	LONG $0x5c6ffec5; WORD $0x6039             // vmovdqu    ymm3, yword [rcx + rdi + 96]
	LONG $0x04fcfdc5; BYTE $0x3a               // vpaddb    ymm0, ymm0, yword [rdx + rdi]
	LONG $0x4cfcf5c5; WORD $0x203a             // vpaddb    ymm1, ymm1, yword [rdx + rdi + 32]
	LONG $0x54fcedc5; WORD $0x403a             // vpaddb    ymm2, ymm2, yword [rdx + rdi + 64]
	LONG $0x5cfce5c5; WORD $0x603a             // vpaddb    ymm3, ymm3, yword [rdx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x3804             // vmovdqu    yword [r8 + rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x384c; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm3
	LONG $0x80ef8348                           // sub    rdi, -128
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_52
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_54:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB0_56

LBB0_55:
	LONG $0x3104b60f         // movzx    eax, byte [rcx + rsi]
	WORD $0x0402; BYTE $0x32 // add    al, byte [rdx + rsi]
	LONG $0x30048841         // mov    byte [r8 + rsi], al
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB0_55

LBB0_56:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_57:
	LONG $0x3104b60f             // movzx    eax, byte [rcx + rsi]
	WORD $0x0402; BYTE $0x32     // add    al, byte [rdx + rsi]
	LONG $0x30048841             // mov    byte [r8 + rsi], al
	LONG $0x3144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rsi + 1]
	LONG $0x01324402             // add    al, byte [rdx + rsi + 1]
	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
	LONG $0x3144b60f; BYTE $0x02 // movzx    eax, byte [rcx + rsi + 2]
	LONG $0x02324402             // add    al, byte [rdx + rsi + 2]
	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
	LONG $0x3144b60f; BYTE $0x03 // movzx    eax, byte [rcx + rsi + 3]
	LONG $0x03324402             // add    al, byte [rdx + rsi + 3]
	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_57
	JMP  LBB0_825

LBB0_315:
	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_320
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_320
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0x80 // and    esi, -128
	WORD $0xff31             // xor    edi, edi

LBB0_318:
	LONG $0x046ffec5; BYTE $0x3a               // vmovdqu    ymm0, yword [rdx + rdi]
	LONG $0x4c6ffec5; WORD $0x203a             // vmovdqu    ymm1, yword [rdx + rdi + 32]
	LONG $0x546ffec5; WORD $0x403a             // vmovdqu    ymm2, yword [rdx + rdi + 64]
	LONG $0x5c6ffec5; WORD $0x603a             // vmovdqu    ymm3, yword [rdx + rdi + 96]
	LONG $0x04f8fdc5; BYTE $0x39               // vpsubb    ymm0, ymm0, yword [rcx + rdi]
	LONG $0x4cf8f5c5; WORD $0x2039             // vpsubb    ymm1, ymm1, yword [rcx + rdi + 32]
	LONG $0x54f8edc5; WORD $0x4039             // vpsubb    ymm2, ymm2, yword [rcx + rdi + 64]
	LONG $0x5cf8e5c5; WORD $0x6039             // vpsubb    ymm3, ymm3, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x3804             // vmovdqu    yword [r8 + rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x384c; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm3
	LONG $0x80ef8348                           // sub    rdi, -128
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_318
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_320:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB0_322

LBB0_321:
	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
	WORD $0x042a; BYTE $0x31 // sub    al, byte [rcx + rsi]
	LONG $0x30048841         // mov    byte [r8 + rsi], al
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB0_321

LBB0_322:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_323:
	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
	WORD $0x042a; BYTE $0x31     // sub    al, byte [rcx + rsi]
	LONG $0x30048841             // mov    byte [r8 + rsi], al
	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
	LONG $0x0131442a             // sub    al, byte [rcx + rsi + 1]
	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
	LONG $0x0231442a             // sub    al, byte [rcx + rsi + 2]
	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
	LONG $0x0331442a             // sub    al, byte [rcx + rsi + 3]
	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_323
	JMP  LBB0_825

LBB0_182:
	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_187
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_187
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0x80 // and    esi, -128
	WORD $0xff31             // xor    edi, edi

LBB0_185:
	LONG $0x046ffec5; BYTE $0x39               // vmovdqu    ymm0, yword [rcx + rdi]
	LONG $0x4c6ffec5; WORD $0x2039             // vmovdqu    ymm1, yword [rcx + rdi + 32]
	LONG $0x546ffec5; WORD $0x4039             // vmovdqu    ymm2, yword [rcx + rdi + 64]
	LONG $0x5c6ffec5; WORD $0x6039             // vmovdqu    ymm3, yword [rcx + rdi + 96]
	LONG $0x04fcfdc5; BYTE $0x3a               // vpaddb    ymm0, ymm0, yword [rdx + rdi]
	LONG $0x4cfcf5c5; WORD $0x203a             // vpaddb    ymm1, ymm1, yword [rdx + rdi + 32]
	LONG $0x54fcedc5; WORD $0x403a             // vpaddb    ymm2, ymm2, yword [rdx + rdi + 64]
	LONG $0x5cfce5c5; WORD $0x603a             // vpaddb    ymm3, ymm3, yword [rdx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x3804             // vmovdqu    yword [r8 + rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x384c; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm3
	LONG $0x80ef8348                           // sub    rdi, -128
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_185
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_187:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB0_189

LBB0_188:
	LONG $0x3104b60f         // movzx    eax, byte [rcx + rsi]
	WORD $0x0402; BYTE $0x32 // add    al, byte [rdx + rsi]
	LONG $0x30048841         // mov    byte [r8 + rsi], al
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB0_188

LBB0_189:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_190:
	LONG $0x3104b60f             // movzx    eax, byte [rcx + rsi]
	WORD $0x0402; BYTE $0x32     // add    al, byte [rdx + rsi]
	LONG $0x30048841             // mov    byte [r8 + rsi], al
	LONG $0x3144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rsi + 1]
	LONG $0x01324402             // add    al, byte [rdx + rsi + 1]
	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
	LONG $0x3144b60f; BYTE $0x02 // movzx    eax, byte [rcx + rsi + 2]
	LONG $0x02324402             // add    al, byte [rdx + rsi + 2]
	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
	LONG $0x3144b60f; BYTE $0x03 // movzx    eax, byte [rcx + rsi + 3]
	LONG $0x03324402             // add    al, byte [rdx + rsi + 3]
	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_190
	JMP  LBB0_825

LBB0_448:
	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_453
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_453
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0x80 // and    esi, -128
	WORD $0xff31             // xor    edi, edi

LBB0_451:
	LONG $0x046ffec5; BYTE $0x3a               // vmovdqu    ymm0, yword [rdx + rdi]
	LONG $0x4c6ffec5; WORD $0x203a             // vmovdqu    ymm1, yword [rdx + rdi + 32]
	LONG $0x546ffec5; WORD $0x403a             // vmovdqu    ymm2, yword [rdx + rdi + 64]
	LONG $0x5c6ffec5; WORD $0x603a             // vmovdqu    ymm3, yword [rdx + rdi + 96]
	LONG $0x04f8fdc5; BYTE $0x39               // vpsubb    ymm0, ymm0, yword [rcx + rdi]
	LONG $0x4cf8f5c5; WORD $0x2039             // vpsubb    ymm1, ymm1, yword [rcx + rdi + 32]
	LONG $0x54f8edc5; WORD $0x4039             // vpsubb    ymm2, ymm2, yword [rcx + rdi + 64]
	LONG $0x5cf8e5c5; WORD $0x6039             // vpsubb    ymm3, ymm3, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x3804             // vmovdqu    yword [r8 + rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x384c; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm3
	LONG $0x80ef8348                           // sub    rdi, -128
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_451
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_453:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB0_455

LBB0_454:
	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
	WORD $0x042a; BYTE $0x31 // sub    al, byte [rcx + rsi]
	LONG $0x30048841         // mov    byte [r8 + rsi], al
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB0_454

LBB0_455:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_456:
	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
	WORD $0x042a; BYTE $0x31     // sub    al, byte [rcx + rsi]
	LONG $0x30048841             // mov    byte [r8 + rsi], al
	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
	LONG $0x0131442a             // sub    al, byte [rcx + rsi + 1]
	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
	LONG $0x0231442a             // sub    al, byte [rcx + rsi + 2]
	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
	LONG $0x0331442a             // sub    al, byte [rcx + rsi + 3]
	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_456
	JMP  LBB0_825

LBB0_638:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_643
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_643
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_641:
	LONG $0x046ffec5; BYTE $0xb9               // vmovdqu    ymm0, yword [rcx + 4*rdi]
	LONG $0x4c6ffec5; WORD $0x20b9             // vmovdqu    ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x546ffec5; WORD $0x40b9             // vmovdqu    ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60b9             // vmovdqu    ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x407de2c4; WORD $0xba04             // vpmulld    ymm0, ymm0, yword [rdx + 4*rdi]
	LONG $0x4075e2c4; WORD $0xba4c; BYTE $0x20 // vpmulld    ymm1, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x406de2c4; WORD $0xba54; BYTE $0x40 // vpmulld    ymm2, ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x4065e2c4; WORD $0xba5c; BYTE $0x60 // vpmulld    ymm3, ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb804             // vmovdqu    yword [r8 + 4*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xb84c; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_641
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_643:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_645

LBB0_644:
	WORD $0x3c8b; BYTE $0xb1 // mov    edi, dword [rcx + 4*rsi]
	LONG $0xb23caf0f         // imul    edi, dword [rdx + 4*rsi]
	LONG $0xb03c8941         // mov    dword [r8 + 4*rsi], edi
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB0_644

LBB0_645:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_646:
	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
	LONG $0xb204af0f             // imul    eax, dword [rdx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
	LONG $0xb244af0f; BYTE $0x04 // imul    eax, dword [rdx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
	LONG $0xb244af0f; BYTE $0x08 // imul    eax, dword [rdx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
	LONG $0xb244af0f; BYTE $0x0c // imul    eax, dword [rdx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_646
	JMP  LBB0_825

LBB0_774:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_779
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_779
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_777:
	LONG $0x046ffec5; BYTE $0xb9               // vmovdqu    ymm0, yword [rcx + 4*rdi]
	LONG $0x4c6ffec5; WORD $0x20b9             // vmovdqu    ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x546ffec5; WORD $0x40b9             // vmovdqu    ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60b9             // vmovdqu    ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x407de2c4; WORD $0xba04             // vpmulld    ymm0, ymm0, yword [rdx + 4*rdi]
	LONG $0x4075e2c4; WORD $0xba4c; BYTE $0x20 // vpmulld    ymm1, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x406de2c4; WORD $0xba54; BYTE $0x40 // vpmulld    ymm2, ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x4065e2c4; WORD $0xba5c; BYTE $0x60 // vpmulld    ymm3, ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb804             // vmovdqu    yword [r8 + 4*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xb84c; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_777
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_779:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_781

LBB0_780:
	WORD $0x3c8b; BYTE $0xb1 // mov    edi, dword [rcx + 4*rsi]
	LONG $0xb23caf0f         // imul    edi, dword [rdx + 4*rsi]
	LONG $0xb03c8941         // mov    dword [r8 + 4*rsi], edi
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB0_780

LBB0_781:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_782:
	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
	LONG $0xb204af0f             // imul    eax, dword [rdx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
	LONG $0xb244af0f; BYTE $0x04 // imul    eax, dword [rdx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
	LONG $0xb244af0f; BYTE $0x08 // imul    eax, dword [rdx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
	LONG $0xb244af0f; BYTE $0x0c // imul    eax, dword [rdx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_782
	JMP  LBB0_825

LBB0_103:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_108
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_108
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_106:
	LONG $0x046ffec5; BYTE $0xb9               // vmovdqu    ymm0, yword [rcx + 4*rdi]
	LONG $0x4c6ffec5; WORD $0x20b9             // vmovdqu    ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x546ffec5; WORD $0x40b9             // vmovdqu    ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60b9             // vmovdqu    ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x04fefdc5; BYTE $0xba               // vpaddd    ymm0, ymm0, yword [rdx + 4*rdi]
	LONG $0x4cfef5c5; WORD $0x20ba             // vpaddd    ymm1, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x54feedc5; WORD $0x40ba             // vpaddd    ymm2, ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5cfee5c5; WORD $0x60ba             // vpaddd    ymm3, ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb804             // vmovdqu    yword [r8 + 4*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xb84c; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_106
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_108:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_110

LBB0_109:
	WORD $0x3c8b; BYTE $0xb1 // mov    edi, dword [rcx + 4*rsi]
	WORD $0x3c03; BYTE $0xb2 // add    edi, dword [rdx + 4*rsi]
	LONG $0xb03c8941         // mov    dword [r8 + 4*rsi], edi
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB0_109

LBB0_110:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_111:
	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
	WORD $0x0403; BYTE $0xb2     // add    eax, dword [rdx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
	LONG $0x04b24403             // add    eax, dword [rdx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
	LONG $0x08b24403             // add    eax, dword [rdx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
	LONG $0x0cb24403             // add    eax, dword [rdx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_111
	JMP  LBB0_825

LBB0_369:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_374
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_374
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_372:
	LONG $0x046ffec5; BYTE $0xba               // vmovdqu    ymm0, yword [rdx + 4*rdi]
	LONG $0x4c6ffec5; WORD $0x20ba             // vmovdqu    ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x546ffec5; WORD $0x40ba             // vmovdqu    ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60ba             // vmovdqu    ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x04fafdc5; BYTE $0xb9               // vpsubd    ymm0, ymm0, yword [rcx + 4*rdi]
	LONG $0x4cfaf5c5; WORD $0x20b9             // vpsubd    ymm1, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x54faedc5; WORD $0x40b9             // vpsubd    ymm2, ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5cfae5c5; WORD $0x60b9             // vpsubd    ymm3, ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb804             // vmovdqu    yword [r8 + 4*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xb84c; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_372
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_374:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_376

LBB0_375:
	WORD $0x3c8b; BYTE $0xb2 // mov    edi, dword [rdx + 4*rsi]
	WORD $0x3c2b; BYTE $0xb1 // sub    edi, dword [rcx + 4*rsi]
	LONG $0xb03c8941         // mov    dword [r8 + 4*rsi], edi
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB0_375

LBB0_376:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_377:
	WORD $0x048b; BYTE $0xb2     // mov    eax, dword [rdx + 4*rsi]
	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	LONG $0x04b2448b             // mov    eax, dword [rdx + 4*rsi + 4]
	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	LONG $0x08b2448b             // mov    eax, dword [rdx + 4*rsi + 8]
	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	LONG $0x0cb2448b             // mov    eax, dword [rdx + 4*rsi + 12]
	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_377
	JMP  LBB0_825

LBB0_236:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_241
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_241
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_239:
	LONG $0x046ffec5; BYTE $0xb9               // vmovdqu    ymm0, yword [rcx + 4*rdi]
	LONG $0x4c6ffec5; WORD $0x20b9             // vmovdqu    ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x546ffec5; WORD $0x40b9             // vmovdqu    ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60b9             // vmovdqu    ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x04fefdc5; BYTE $0xba               // vpaddd    ymm0, ymm0, yword [rdx + 4*rdi]
	LONG $0x4cfef5c5; WORD $0x20ba             // vpaddd    ymm1, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x54feedc5; WORD $0x40ba             // vpaddd    ymm2, ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5cfee5c5; WORD $0x60ba             // vpaddd    ymm3, ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb804             // vmovdqu    yword [r8 + 4*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xb84c; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_239
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_241:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_243

LBB0_242:
	WORD $0x3c8b; BYTE $0xb1 // mov    edi, dword [rcx + 4*rsi]
	WORD $0x3c03; BYTE $0xb2 // add    edi, dword [rdx + 4*rsi]
	LONG $0xb03c8941         // mov    dword [r8 + 4*rsi], edi
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB0_242

LBB0_243:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_244:
	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
	WORD $0x0403; BYTE $0xb2     // add    eax, dword [rdx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
	LONG $0x04b24403             // add    eax, dword [rdx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
	LONG $0x08b24403             // add    eax, dword [rdx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
	LONG $0x0cb24403             // add    eax, dword [rdx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_244
	JMP  LBB0_825

LBB0_502:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_507
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_507
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_505:
	LONG $0x046ffec5; BYTE $0xba               // vmovdqu    ymm0, yword [rdx + 4*rdi]
	LONG $0x4c6ffec5; WORD $0x20ba             // vmovdqu    ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x546ffec5; WORD $0x40ba             // vmovdqu    ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60ba             // vmovdqu    ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x04fafdc5; BYTE $0xb9               // vpsubd    ymm0, ymm0, yword [rcx + 4*rdi]
	LONG $0x4cfaf5c5; WORD $0x20b9             // vpsubd    ymm1, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x54faedc5; WORD $0x40b9             // vpsubd    ymm2, ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5cfae5c5; WORD $0x60b9             // vpsubd    ymm3, ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb804             // vmovdqu    yword [r8 + 4*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xb84c; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_505
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB0_825

LBB0_507:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd0 // mov    rax, r10
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_509

LBB0_508:
	WORD $0x3c8b; BYTE $0xb2 // mov    edi, dword [rdx + 4*rsi]
	WORD $0x3c2b; BYTE $0xb1 // sub    edi, dword [rcx + 4*rsi]
	LONG $0xb03c8941         // mov    dword [r8 + 4*rsi], edi
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB0_508

LBB0_509:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_510:
	WORD $0x048b; BYTE $0xb2     // mov    eax, dword [rdx + 4*rsi]
	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	LONG $0x04b2448b             // mov    eax, dword [rdx + 4*rsi + 4]
	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	LONG $0x08b2448b             // mov    eax, dword [rdx + 4*rsi + 8]
	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	LONG $0x0cb2448b             // mov    eax, dword [rdx + 4*rsi + 12]
	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB0_510
	JMP  LBB0_825

LBB0_626:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_631
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_631
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_629:
	LONG $0x046ffec5; BYTE $0xb9               // vmovdqu    ymm0, yword [rcx + 4*rdi]
	LONG $0x4c6ffec5; WORD $0x20b9             // vmovdqu    ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x546ffec5; WORD $0x40b9             // vmovdqu    ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60b9             // vmovdqu    ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x407de2c4; WORD $0xba04             // vpmulld    ymm0, ymm0, yword [rdx + 4*rdi]
	LONG $0x4075e2c4; WORD $0xba4c; BYTE $0x20 // vpmulld    ymm1, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x406de2c4; WORD $0xba54; BYTE $0x40 // vpmulld    ymm2, ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x4065e2c4; WORD $0xba5c; BYTE $0x60 // vpmulld    ymm3, ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb804             // vmovdqu    yword [r8 + 4*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xb84c; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_629
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_631
	JMP  LBB0_825

LBB0_762:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_767
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_767
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_765:
	LONG $0x046ffec5; BYTE $0xb9               // vmovdqu    ymm0, yword [rcx + 4*rdi]
	LONG $0x4c6ffec5; WORD $0x20b9             // vmovdqu    ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x546ffec5; WORD $0x40b9             // vmovdqu    ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60b9             // vmovdqu    ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x407de2c4; WORD $0xba04             // vpmulld    ymm0, ymm0, yword [rdx + 4*rdi]
	LONG $0x4075e2c4; WORD $0xba4c; BYTE $0x20 // vpmulld    ymm1, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x406de2c4; WORD $0xba54; BYTE $0x40 // vpmulld    ymm2, ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x4065e2c4; WORD $0xba5c; BYTE $0x60 // vpmulld    ymm3, ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb804             // vmovdqu    yword [r8 + 4*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xb84c; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_765
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_767
	JMP  LBB0_825

LBB0_357:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_362
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_362
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_360:
	LONG $0x046ffec5; BYTE $0xba               // vmovdqu    ymm0, yword [rdx + 4*rdi]
	LONG $0x4c6ffec5; WORD $0x20ba             // vmovdqu    ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x546ffec5; WORD $0x40ba             // vmovdqu    ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60ba             // vmovdqu    ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x04fafdc5; BYTE $0xb9               // vpsubd    ymm0, ymm0, yword [rcx + 4*rdi]
	LONG $0x4cfaf5c5; WORD $0x20b9             // vpsubd    ymm1, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x54faedc5; WORD $0x40b9             // vpsubd    ymm2, ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5cfae5c5; WORD $0x60b9             // vpsubd    ymm3, ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb804             // vmovdqu    yword [r8 + 4*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xb84c; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_360
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_362
	JMP  LBB0_825

LBB0_490:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_495
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_495
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_493:
	LONG $0x046ffec5; BYTE $0xba               // vmovdqu    ymm0, yword [rdx + 4*rdi]
	LONG $0x4c6ffec5; WORD $0x20ba             // vmovdqu    ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x546ffec5; WORD $0x40ba             // vmovdqu    ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60ba             // vmovdqu    ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x04fafdc5; BYTE $0xb9               // vpsubd    ymm0, ymm0, yword [rcx + 4*rdi]
	LONG $0x4cfaf5c5; WORD $0x20b9             // vpsubd    ymm1, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x54faedc5; WORD $0x40b9             // vpsubd    ymm2, ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5cfae5c5; WORD $0x60b9             // vpsubd    ymm3, ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb804             // vmovdqu    yword [r8 + 4*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xb84c; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_493
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_495
	JMP  LBB0_825

LBB0_680:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_685
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_685
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_683:
	LONG $0x0410fdc5; BYTE $0xf9               // vmovupd    ymm0, yword [rcx + 8*rdi]
	LONG $0x4c10fdc5; WORD $0x20f9             // vmovupd    ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x5410fdc5; WORD $0x40f9             // vmovupd    ymm2, yword [rcx + 8*rdi + 64]
	LONG $0x5c10fdc5; WORD $0x60f9             // vmovupd    ymm3, yword [rcx + 8*rdi + 96]
	LONG $0x0459fdc5; BYTE $0xfa               // vmulpd    ymm0, ymm0, yword [rdx + 8*rdi]
	LONG $0x4c59f5c5; WORD $0x20fa             // vmulpd    ymm1, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x5459edc5; WORD $0x40fa             // vmulpd    ymm2, ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5c59e5c5; WORD $0x60fa             // vmulpd    ymm3, ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf804             // vmovupd    yword [r8 + 8*rdi], ymm0
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm1
	LONG $0x117dc1c4; WORD $0xf854; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm3
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_683
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_685
	JMP  LBB0_825

LBB0_816:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_821
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_821
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_819:
	LONG $0x0410fdc5; BYTE $0xf9               // vmovupd    ymm0, yword [rcx + 8*rdi]
	LONG $0x4c10fdc5; WORD $0x20f9             // vmovupd    ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x5410fdc5; WORD $0x40f9             // vmovupd    ymm2, yword [rcx + 8*rdi + 64]
	LONG $0x5c10fdc5; WORD $0x60f9             // vmovupd    ymm3, yword [rcx + 8*rdi + 96]
	LONG $0x0459fdc5; BYTE $0xfa               // vmulpd    ymm0, ymm0, yword [rdx + 8*rdi]
	LONG $0x4c59f5c5; WORD $0x20fa             // vmulpd    ymm1, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x5459edc5; WORD $0x40fa             // vmulpd    ymm2, ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5c59e5c5; WORD $0x60fa             // vmulpd    ymm3, ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf804             // vmovupd    yword [r8 + 8*rdi], ymm0
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm1
	LONG $0x117dc1c4; WORD $0xf854; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm3
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_819
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_821
	JMP  LBB0_825

LBB0_411:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_416
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_416
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_414:
	LONG $0x0410fdc5; BYTE $0xfa               // vmovupd    ymm0, yword [rdx + 8*rdi]
	LONG $0x4c10fdc5; WORD $0x20fa             // vmovupd    ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x5410fdc5; WORD $0x40fa             // vmovupd    ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5c10fdc5; WORD $0x60fa             // vmovupd    ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x045cfdc5; BYTE $0xf9               // vsubpd    ymm0, ymm0, yword [rcx + 8*rdi]
	LONG $0x4c5cf5c5; WORD $0x20f9             // vsubpd    ymm1, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x545cedc5; WORD $0x40f9             // vsubpd    ymm2, ymm2, yword [rcx + 8*rdi + 64]
	LONG $0x5c5ce5c5; WORD $0x60f9             // vsubpd    ymm3, ymm3, yword [rcx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf804             // vmovupd    yword [r8 + 8*rdi], ymm0
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm1
	LONG $0x117dc1c4; WORD $0xf854; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm3
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_414
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_416
	JMP  LBB0_825

LBB0_544:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_549
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_549
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_547:
	LONG $0x0410fdc5; BYTE $0xfa               // vmovupd    ymm0, yword [rdx + 8*rdi]
	LONG $0x4c10fdc5; WORD $0x20fa             // vmovupd    ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x5410fdc5; WORD $0x40fa             // vmovupd    ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5c10fdc5; WORD $0x60fa             // vmovupd    ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x045cfdc5; BYTE $0xf9               // vsubpd    ymm0, ymm0, yword [rcx + 8*rdi]
	LONG $0x4c5cf5c5; WORD $0x20f9             // vsubpd    ymm1, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x545cedc5; WORD $0x40f9             // vsubpd    ymm2, ymm2, yword [rcx + 8*rdi + 64]
	LONG $0x5c5ce5c5; WORD $0x60f9             // vsubpd    ymm3, ymm3, yword [rcx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf804             // vmovupd    yword [r8 + 8*rdi], ymm0
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm1
	LONG $0x117dc1c4; WORD $0xf854; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm3
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_547
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_549
	JMP  LBB0_825

LBB0_605:
	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_610
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_610
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xc0 // and    esi, -64
	WORD $0xff31             // xor    edi, edi

LBB0_608:
	LONG $0x046ffec5; BYTE $0x79               // vmovdqu    ymm0, yword [rcx + 2*rdi]
	LONG $0x4c6ffec5; WORD $0x2079             // vmovdqu    ymm1, yword [rcx + 2*rdi + 32]
	LONG $0x546ffec5; WORD $0x4079             // vmovdqu    ymm2, yword [rcx + 2*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x6079             // vmovdqu    ymm3, yword [rcx + 2*rdi + 96]
	LONG $0x04d5fdc5; BYTE $0x7a               // vpmullw    ymm0, ymm0, yword [rdx + 2*rdi]
	LONG $0x4cd5f5c5; WORD $0x207a             // vpmullw    ymm1, ymm1, yword [rdx + 2*rdi + 32]
	LONG $0x54d5edc5; WORD $0x407a             // vpmullw    ymm2, ymm2, yword [rdx + 2*rdi + 64]
	LONG $0x5cd5e5c5; WORD $0x607a             // vpmullw    ymm3, ymm3, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x7804             // vmovdqu    yword [r8 + 2*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm3
	LONG $0x40c78348                           // add    rdi, 64
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_608
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_610
	JMP  LBB0_825

LBB0_617:
	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_622
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_622
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xc0 // and    esi, -64
	WORD $0xff31             // xor    edi, edi

LBB0_620:
	LONG $0x046ffec5; BYTE $0x79               // vmovdqu    ymm0, yword [rcx + 2*rdi]
	LONG $0x4c6ffec5; WORD $0x2079             // vmovdqu    ymm1, yword [rcx + 2*rdi + 32]
	LONG $0x546ffec5; WORD $0x4079             // vmovdqu    ymm2, yword [rcx + 2*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x6079             // vmovdqu    ymm3, yword [rcx + 2*rdi + 96]
	LONG $0x04d5fdc5; BYTE $0x7a               // vpmullw    ymm0, ymm0, yword [rdx + 2*rdi]
	LONG $0x4cd5f5c5; WORD $0x207a             // vpmullw    ymm1, ymm1, yword [rdx + 2*rdi + 32]
	LONG $0x54d5edc5; WORD $0x407a             // vpmullw    ymm2, ymm2, yword [rdx + 2*rdi + 64]
	LONG $0x5cd5e5c5; WORD $0x607a             // vpmullw    ymm3, ymm3, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x7804             // vmovdqu    yword [r8 + 2*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm3
	LONG $0x40c78348                           // add    rdi, 64
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_620
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_622
	JMP  LBB0_825

LBB0_741:
	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_746
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_746
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xc0 // and    esi, -64
	WORD $0xff31             // xor    edi, edi

LBB0_744:
	LONG $0x046ffec5; BYTE $0x79               // vmovdqu    ymm0, yword [rcx + 2*rdi]
	LONG $0x4c6ffec5; WORD $0x2079             // vmovdqu    ymm1, yword [rcx + 2*rdi + 32]
	LONG $0x546ffec5; WORD $0x4079             // vmovdqu    ymm2, yword [rcx + 2*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x6079             // vmovdqu    ymm3, yword [rcx + 2*rdi + 96]
	LONG $0x04d5fdc5; BYTE $0x7a               // vpmullw    ymm0, ymm0, yword [rdx + 2*rdi]
	LONG $0x4cd5f5c5; WORD $0x207a             // vpmullw    ymm1, ymm1, yword [rdx + 2*rdi + 32]
	LONG $0x54d5edc5; WORD $0x407a             // vpmullw    ymm2, ymm2, yword [rdx + 2*rdi + 64]
	LONG $0x5cd5e5c5; WORD $0x607a             // vpmullw    ymm3, ymm3, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x7804             // vmovdqu    yword [r8 + 2*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm3
	LONG $0x40c78348                           // add    rdi, 64
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_744
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_746
	JMP  LBB0_825

LBB0_753:
	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_758
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_758
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xc0 // and    esi, -64
	WORD $0xff31             // xor    edi, edi

LBB0_756:
	LONG $0x046ffec5; BYTE $0x79               // vmovdqu    ymm0, yword [rcx + 2*rdi]
	LONG $0x4c6ffec5; WORD $0x2079             // vmovdqu    ymm1, yword [rcx + 2*rdi + 32]
	LONG $0x546ffec5; WORD $0x4079             // vmovdqu    ymm2, yword [rcx + 2*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x6079             // vmovdqu    ymm3, yword [rcx + 2*rdi + 96]
	LONG $0x04d5fdc5; BYTE $0x7a               // vpmullw    ymm0, ymm0, yword [rdx + 2*rdi]
	LONG $0x4cd5f5c5; WORD $0x207a             // vpmullw    ymm1, ymm1, yword [rdx + 2*rdi + 32]
	LONG $0x54d5edc5; WORD $0x407a             // vpmullw    ymm2, ymm2, yword [rdx + 2*rdi + 64]
	LONG $0x5cd5e5c5; WORD $0x607a             // vpmullw    ymm3, ymm3, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x7804             // vmovdqu    yword [r8 + 2*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm3
	LONG $0x40c78348                           // add    rdi, 64
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_756
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_758
	JMP  LBB0_825

LBB0_336:
	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_341
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_341
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xc0 // and    esi, -64
	WORD $0xff31             // xor    edi, edi

LBB0_339:
	LONG $0x046ffec5; BYTE $0x7a               // vmovdqu    ymm0, yword [rdx + 2*rdi]
	LONG $0x4c6ffec5; WORD $0x207a             // vmovdqu    ymm1, yword [rdx + 2*rdi + 32]
	LONG $0x546ffec5; WORD $0x407a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x607a             // vmovdqu    ymm3, yword [rdx + 2*rdi + 96]
	LONG $0x04f9fdc5; BYTE $0x79               // vpsubw    ymm0, ymm0, yword [rcx + 2*rdi]
	LONG $0x4cf9f5c5; WORD $0x2079             // vpsubw    ymm1, ymm1, yword [rcx + 2*rdi + 32]
	LONG $0x54f9edc5; WORD $0x4079             // vpsubw    ymm2, ymm2, yword [rcx + 2*rdi + 64]
	LONG $0x5cf9e5c5; WORD $0x6079             // vpsubw    ymm3, ymm3, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x7804             // vmovdqu    yword [r8 + 2*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm3
	LONG $0x40c78348                           // add    rdi, 64
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_339
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_341
	JMP  LBB0_825

LBB0_348:
	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_353
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_353
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xc0 // and    esi, -64
	WORD $0xff31             // xor    edi, edi

LBB0_351:
	LONG $0x046ffec5; BYTE $0x7a               // vmovdqu    ymm0, yword [rdx + 2*rdi]
	LONG $0x4c6ffec5; WORD $0x207a             // vmovdqu    ymm1, yword [rdx + 2*rdi + 32]
	LONG $0x546ffec5; WORD $0x407a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x607a             // vmovdqu    ymm3, yword [rdx + 2*rdi + 96]
	LONG $0x04f9fdc5; BYTE $0x79               // vpsubw    ymm0, ymm0, yword [rcx + 2*rdi]
	LONG $0x4cf9f5c5; WORD $0x2079             // vpsubw    ymm1, ymm1, yword [rcx + 2*rdi + 32]
	LONG $0x54f9edc5; WORD $0x4079             // vpsubw    ymm2, ymm2, yword [rcx + 2*rdi + 64]
	LONG $0x5cf9e5c5; WORD $0x6079             // vpsubw    ymm3, ymm3, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x7804             // vmovdqu    yword [r8 + 2*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm3
	LONG $0x40c78348                           // add    rdi, 64
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_351
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_353
	JMP  LBB0_825

LBB0_469:
	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_474
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_474
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xc0 // and    esi, -64
	WORD $0xff31             // xor    edi, edi

LBB0_472:
	LONG $0x046ffec5; BYTE $0x7a               // vmovdqu    ymm0, yword [rdx + 2*rdi]
	LONG $0x4c6ffec5; WORD $0x207a             // vmovdqu    ymm1, yword [rdx + 2*rdi + 32]
	LONG $0x546ffec5; WORD $0x407a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x607a             // vmovdqu    ymm3, yword [rdx + 2*rdi + 96]
	LONG $0x04f9fdc5; BYTE $0x79               // vpsubw    ymm0, ymm0, yword [rcx + 2*rdi]
	LONG $0x4cf9f5c5; WORD $0x2079             // vpsubw    ymm1, ymm1, yword [rcx + 2*rdi + 32]
	LONG $0x54f9edc5; WORD $0x4079             // vpsubw    ymm2, ymm2, yword [rcx + 2*rdi + 64]
	LONG $0x5cf9e5c5; WORD $0x6079             // vpsubw    ymm3, ymm3, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x7804             // vmovdqu    yword [r8 + 2*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm3
	LONG $0x40c78348                           // add    rdi, 64
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_472
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_474
	JMP  LBB0_825

LBB0_481:
	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_486
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_486
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xc0 // and    esi, -64
	WORD $0xff31             // xor    edi, edi

LBB0_484:
	LONG $0x046ffec5; BYTE $0x7a               // vmovdqu    ymm0, yword [rdx + 2*rdi]
	LONG $0x4c6ffec5; WORD $0x207a             // vmovdqu    ymm1, yword [rdx + 2*rdi + 32]
	LONG $0x546ffec5; WORD $0x407a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x607a             // vmovdqu    ymm3, yword [rdx + 2*rdi + 96]
	LONG $0x04f9fdc5; BYTE $0x79               // vpsubw    ymm0, ymm0, yword [rcx + 2*rdi]
	LONG $0x4cf9f5c5; WORD $0x2079             // vpsubw    ymm1, ymm1, yword [rcx + 2*rdi + 32]
	LONG $0x54f9edc5; WORD $0x4079             // vpsubw    ymm2, ymm2, yword [rcx + 2*rdi + 64]
	LONG $0x5cf9e5c5; WORD $0x6079             // vpsubw    ymm3, ymm3, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x7804             // vmovdqu    yword [r8 + 2*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm3
	LONG $0x40c78348                           // add    rdi, 64
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_484
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_486
	JMP  LBB0_825

LBB0_659:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_664
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_664
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_662:
	LONG $0x0c6ffec5; BYTE $0xfa               // vmovdqu    ymm1, yword [rdx + 8*rdi]
	LONG $0x546ffec5; WORD $0x20fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 64]
	LONG $0x446ffec5; WORD $0x60fa             // vmovdqu    ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x246ffec5; BYTE $0xf9               // vmovdqu    ymm4, yword [rcx + 8*rdi]
	LONG $0x6c6ffec5; WORD $0x20f9             // vmovdqu    ymm5, yword [rcx + 8*rdi + 32]
	LONG $0x746ffec5; WORD $0x40f9             // vmovdqu    ymm6, yword [rcx + 8*rdi + 64]
	LONG $0x7c6ffec5; WORD $0x60f9             // vmovdqu    ymm7, yword [rcx + 8*rdi + 96]
	LONG $0xd473bdc5; BYTE $0x20               // vpsrlq    ymm8, ymm4, 32
	LONG $0xc1f43dc5                           // vpmuludq    ymm8, ymm8, ymm1
	LONG $0xd173b5c5; BYTE $0x20               // vpsrlq    ymm9, ymm1, 32
	LONG $0xccf435c5                           // vpmuludq    ymm9, ymm9, ymm4
	LONG $0xd43541c4; BYTE $0xc0               // vpaddq    ymm8, ymm9, ymm8
	LONG $0x733dc1c4; WORD $0x20f0             // vpsllq    ymm8, ymm8, 32
	LONG $0xc9f4ddc5                           // vpmuludq    ymm1, ymm4, ymm1
	LONG $0xc9d4bdc5                           // vpaddq    ymm1, ymm8, ymm1
	LONG $0xd573ddc5; BYTE $0x20               // vpsrlq    ymm4, ymm5, 32
	LONG $0xe2f4ddc5                           // vpmuludq    ymm4, ymm4, ymm2
	LONG $0xd273bdc5; BYTE $0x20               // vpsrlq    ymm8, ymm2, 32
	LONG $0xc5f43dc5                           // vpmuludq    ymm8, ymm8, ymm5
	LONG $0xe4d4bdc5                           // vpaddq    ymm4, ymm8, ymm4
	LONG $0xf473ddc5; BYTE $0x20               // vpsllq    ymm4, ymm4, 32
	LONG $0xd2f4d5c5                           // vpmuludq    ymm2, ymm5, ymm2
	LONG $0xd4d4edc5                           // vpaddq    ymm2, ymm2, ymm4
	LONG $0xd673ddc5; BYTE $0x20               // vpsrlq    ymm4, ymm6, 32
	LONG $0xe3f4ddc5                           // vpmuludq    ymm4, ymm4, ymm3
	LONG $0xd373d5c5; BYTE $0x20               // vpsrlq    ymm5, ymm3, 32
	LONG $0xedf4cdc5                           // vpmuludq    ymm5, ymm6, ymm5
	LONG $0xe4d4d5c5                           // vpaddq    ymm4, ymm5, ymm4
	LONG $0xf473ddc5; BYTE $0x20               // vpsllq    ymm4, ymm4, 32
	LONG $0xdbf4cdc5                           // vpmuludq    ymm3, ymm6, ymm3
	LONG $0xdcd4e5c5                           // vpaddq    ymm3, ymm3, ymm4
	LONG $0xd773ddc5; BYTE $0x20               // vpsrlq    ymm4, ymm7, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xd073d5c5; BYTE $0x20               // vpsrlq    ymm5, ymm0, 32
	LONG $0xedf4c5c5                           // vpmuludq    ymm5, ymm7, ymm5
	LONG $0xe4d4d5c5                           // vpaddq    ymm4, ymm5, ymm4
	LONG $0xf473ddc5; BYTE $0x20               // vpsllq    ymm4, ymm4, 32
	LONG $0xc0f4c5c5                           // vpmuludq    ymm0, ymm7, ymm0
	LONG $0xc4d4fdc5                           // vpaddq    ymm0, ymm0, ymm4
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_662
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_664
	JMP  LBB0_825

LBB0_671:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_676
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_676
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_674:
	LONG $0x0410fcc5; BYTE $0xb9               // vmovups    ymm0, yword [rcx + 4*rdi]
	LONG $0x4c10fcc5; WORD $0x20b9             // vmovups    ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x5410fcc5; WORD $0x40b9             // vmovups    ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5c10fcc5; WORD $0x60b9             // vmovups    ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x0459fcc5; BYTE $0xba               // vmulps    ymm0, ymm0, yword [rdx + 4*rdi]
	LONG $0x4c59f4c5; WORD $0x20ba             // vmulps    ymm1, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x5459ecc5; WORD $0x40ba             // vmulps    ymm2, ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5c59e4c5; WORD $0x60ba             // vmulps    ymm3, ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb804             // vmovups    yword [r8 + 4*rdi], ymm0
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x117cc1c4; WORD $0xb854; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_674
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_676
	JMP  LBB0_825

LBB0_795:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_800
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_800
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_798:
	LONG $0x0c6ffec5; BYTE $0xfa               // vmovdqu    ymm1, yword [rdx + 8*rdi]
	LONG $0x546ffec5; WORD $0x20fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 64]
	LONG $0x446ffec5; WORD $0x60fa             // vmovdqu    ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x246ffec5; BYTE $0xf9               // vmovdqu    ymm4, yword [rcx + 8*rdi]
	LONG $0x6c6ffec5; WORD $0x20f9             // vmovdqu    ymm5, yword [rcx + 8*rdi + 32]
	LONG $0x746ffec5; WORD $0x40f9             // vmovdqu    ymm6, yword [rcx + 8*rdi + 64]
	LONG $0x7c6ffec5; WORD $0x60f9             // vmovdqu    ymm7, yword [rcx + 8*rdi + 96]
	LONG $0xd473bdc5; BYTE $0x20               // vpsrlq    ymm8, ymm4, 32
	LONG $0xc1f43dc5                           // vpmuludq    ymm8, ymm8, ymm1
	LONG $0xd173b5c5; BYTE $0x20               // vpsrlq    ymm9, ymm1, 32
	LONG $0xccf435c5                           // vpmuludq    ymm9, ymm9, ymm4
	LONG $0xd43541c4; BYTE $0xc0               // vpaddq    ymm8, ymm9, ymm8
	LONG $0x733dc1c4; WORD $0x20f0             // vpsllq    ymm8, ymm8, 32
	LONG $0xc9f4ddc5                           // vpmuludq    ymm1, ymm4, ymm1
	LONG $0xc9d4bdc5                           // vpaddq    ymm1, ymm8, ymm1
	LONG $0xd573ddc5; BYTE $0x20               // vpsrlq    ymm4, ymm5, 32
	LONG $0xe2f4ddc5                           // vpmuludq    ymm4, ymm4, ymm2
	LONG $0xd273bdc5; BYTE $0x20               // vpsrlq    ymm8, ymm2, 32
	LONG $0xc5f43dc5                           // vpmuludq    ymm8, ymm8, ymm5
	LONG $0xe4d4bdc5                           // vpaddq    ymm4, ymm8, ymm4
	LONG $0xf473ddc5; BYTE $0x20               // vpsllq    ymm4, ymm4, 32
	LONG $0xd2f4d5c5                           // vpmuludq    ymm2, ymm5, ymm2
	LONG $0xd4d4edc5                           // vpaddq    ymm2, ymm2, ymm4
	LONG $0xd673ddc5; BYTE $0x20               // vpsrlq    ymm4, ymm6, 32
	LONG $0xe3f4ddc5                           // vpmuludq    ymm4, ymm4, ymm3
	LONG $0xd373d5c5; BYTE $0x20               // vpsrlq    ymm5, ymm3, 32
	LONG $0xedf4cdc5                           // vpmuludq    ymm5, ymm6, ymm5
	LONG $0xe4d4d5c5                           // vpaddq    ymm4, ymm5, ymm4
	LONG $0xf473ddc5; BYTE $0x20               // vpsllq    ymm4, ymm4, 32
	LONG $0xdbf4cdc5                           // vpmuludq    ymm3, ymm6, ymm3
	LONG $0xdcd4e5c5                           // vpaddq    ymm3, ymm3, ymm4
	LONG $0xd773ddc5; BYTE $0x20               // vpsrlq    ymm4, ymm7, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xd073d5c5; BYTE $0x20               // vpsrlq    ymm5, ymm0, 32
	LONG $0xedf4c5c5                           // vpmuludq    ymm5, ymm7, ymm5
	LONG $0xe4d4d5c5                           // vpaddq    ymm4, ymm5, ymm4
	LONG $0xf473ddc5; BYTE $0x20               // vpsllq    ymm4, ymm4, 32
	LONG $0xc0f4c5c5                           // vpmuludq    ymm0, ymm7, ymm0
	LONG $0xc4d4fdc5                           // vpaddq    ymm0, ymm0, ymm4
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_798
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_800
	JMP  LBB0_825

LBB0_807:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_812
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_812
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_810:
	LONG $0x0410fcc5; BYTE $0xb9               // vmovups    ymm0, yword [rcx + 4*rdi]
	LONG $0x4c10fcc5; WORD $0x20b9             // vmovups    ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x5410fcc5; WORD $0x40b9             // vmovups    ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5c10fcc5; WORD $0x60b9             // vmovups    ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x0459fcc5; BYTE $0xba               // vmulps    ymm0, ymm0, yword [rdx + 4*rdi]
	LONG $0x4c59f4c5; WORD $0x20ba             // vmulps    ymm1, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x5459ecc5; WORD $0x40ba             // vmulps    ymm2, ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5c59e4c5; WORD $0x60ba             // vmulps    ymm3, ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb804             // vmovups    yword [r8 + 4*rdi], ymm0
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x117cc1c4; WORD $0xb854; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_810
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_812
	JMP  LBB0_825

LBB0_390:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_395
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_395
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_393:
	LONG $0x046ffec5; BYTE $0xfa               // vmovdqu    ymm0, yword [rdx + 8*rdi]
	LONG $0x4c6ffec5; WORD $0x20fa             // vmovdqu    ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x546ffec5; WORD $0x40fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x04fbfdc5; BYTE $0xf9               // vpsubq    ymm0, ymm0, yword [rcx + 8*rdi]
	LONG $0x4cfbf5c5; WORD $0x20f9             // vpsubq    ymm1, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x54fbedc5; WORD $0x40f9             // vpsubq    ymm2, ymm2, yword [rcx + 8*rdi + 64]
	LONG $0x5cfbe5c5; WORD $0x60f9             // vpsubq    ymm3, ymm3, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf804             // vmovdqu    yword [r8 + 8*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xf84c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm3
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_393
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_395
	JMP  LBB0_825

LBB0_402:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_407
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_407
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_405:
	LONG $0x0410fcc5; BYTE $0xba               // vmovups    ymm0, yword [rdx + 4*rdi]
	LONG $0x4c10fcc5; WORD $0x20ba             // vmovups    ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x5410fcc5; WORD $0x40ba             // vmovups    ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5c10fcc5; WORD $0x60ba             // vmovups    ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x045cfcc5; BYTE $0xb9               // vsubps    ymm0, ymm0, yword [rcx + 4*rdi]
	LONG $0x4c5cf4c5; WORD $0x20b9             // vsubps    ymm1, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x545cecc5; WORD $0x40b9             // vsubps    ymm2, ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5c5ce4c5; WORD $0x60b9             // vsubps    ymm3, ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb804             // vmovups    yword [r8 + 4*rdi], ymm0
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x117cc1c4; WORD $0xb854; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_405
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_407
	JMP  LBB0_825

LBB0_523:
	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_528
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_528
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi

LBB0_526:
	LONG $0x046ffec5; BYTE $0xfa               // vmovdqu    ymm0, yword [rdx + 8*rdi]
	LONG $0x4c6ffec5; WORD $0x20fa             // vmovdqu    ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x546ffec5; WORD $0x40fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5c6ffec5; WORD $0x60fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x04fbfdc5; BYTE $0xf9               // vpsubq    ymm0, ymm0, yword [rcx + 8*rdi]
	LONG $0x4cfbf5c5; WORD $0x20f9             // vpsubq    ymm1, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x54fbedc5; WORD $0x40f9             // vpsubq    ymm2, ymm2, yword [rcx + 8*rdi + 64]
	LONG $0x5cfbe5c5; WORD $0x60f9             // vpsubq    ymm3, ymm3, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf804             // vmovdqu    yword [r8 + 8*rdi], ymm0
	LONG $0x7f7ec1c4; WORD $0xf84c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm3
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_526
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_528
	JMP  LBB0_825

LBB0_535:
	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	LONG $0xd1970f41         // seta    r9b
	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	LONG $0xd3970f41         // seta    r11b
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	WORD $0x970f; BYTE $0xd0 // seta    al
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	LONG $0xd7970f40         // seta    dil
	WORD $0xf631             // xor    esi, esi
	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
	JNE  LBB0_540
	WORD $0x2040; BYTE $0xf8 // and    al, dil
	JNE  LBB0_540
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB0_538:
	LONG $0x0410fcc5; BYTE $0xba               // vmovups    ymm0, yword [rdx + 4*rdi]
	LONG $0x4c10fcc5; WORD $0x20ba             // vmovups    ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x5410fcc5; WORD $0x40ba             // vmovups    ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5c10fcc5; WORD $0x60ba             // vmovups    ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x045cfcc5; BYTE $0xb9               // vsubps    ymm0, ymm0, yword [rcx + 4*rdi]
	LONG $0x4c5cf4c5; WORD $0x20b9             // vsubps    ymm1, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x545cecc5; WORD $0x40b9             // vsubps    ymm2, ymm2, yword [rcx + 4*rdi + 64]
	LONG $0x5c5ce4c5; WORD $0x60b9             // vsubps    ymm3, ymm3, yword [rcx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb804             // vmovups    yword [r8 + 4*rdi], ymm0
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm1
	LONG $0x117cc1c4; WORD $0xb854; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB0_538
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JNE  LBB0_540
	JMP  LBB0_825

LBB0_592:
	LONG $0xfce08348             // and    rax, -4
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0xf631                 // xor    esi, esi
	LONG $0x456ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword 0[rbp] /* [rip + .LCPI0_0] */

LBB0_593:
	LONG $0x0c6ffec5; BYTE $0x32               // vmovdqu    ymm1, yword [rdx + rsi]
	LONG $0x146ffec5; BYTE $0x31               // vmovdqu    ymm2, yword [rcx + rsi]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x300c             // vmovdqu    yword [r8 + rsi], ymm1
	LONG $0x4c6ffec5; WORD $0x2032             // vmovdqu    ymm1, yword [rdx + rsi + 32]
	LONG $0x546ffec5; WORD $0x2031             // vmovdqu    ymm2, yword [rcx + rsi + 32]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x304c; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm1
	LONG $0x4c6ffec5; WORD $0x4032             // vmovdqu    ymm1, yword [rdx + rsi + 64]
	LONG $0x546ffec5; WORD $0x4031             // vmovdqu    ymm2, yword [rcx + rsi + 64]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x304c; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm1
	LONG $0x4c6ffec5; WORD $0x6032             // vmovdqu    ymm1, yword [rdx + rsi + 96]
	LONG $0x546ffec5; WORD $0x6031             // vmovdqu    ymm2, yword [rcx + rsi + 96]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x304c; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm1
	LONG $0x80ee8348                           // sub    rsi, -128
	LONG $0x04c08348                           // add    rax, 4
	JNE  LBB0_593

LBB0_594:
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	JE   LBB0_597
	WORD $0xf749; BYTE $0xd9     // neg    r9
	LONG $0x456ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword 0[rbp] /* [rip + .LCPI0_0] */

LBB0_596:
	LONG $0x0c6ffec5; BYTE $0x32   // vmovdqu    ymm1, yword [rdx + rsi]
	LONG $0x146ffec5; BYTE $0x31   // vmovdqu    ymm2, yword [rcx + rsi]
	LONG $0xd968f5c5               // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5               // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5               // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5               // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5               // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5               // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5               // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5               // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5               // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x300c // vmovdqu    yword [r8 + rsi], ymm1
	LONG $0x20c68348               // add    rsi, 32
	WORD $0xff49; BYTE $0xc1       // inc    r9
	JNE  LBB0_596

LBB0_597:
	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
	JE   LBB0_825

LBB0_598:
	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
	LONG $0x03e68348         // and    rsi, 3
	JE   LBB0_600

LBB0_599:
	LONG $0x3904b60f         // movzx    eax, byte [rcx + rdi]
	WORD $0x24f6; BYTE $0x3a // mul    byte [rdx + rdi]
	LONG $0x38048841         // mov    byte [r8 + rdi], al
	LONG $0x01c78348         // add    rdi, 1
	LONG $0xffc68348         // add    rsi, -1
	JNE  LBB0_599

LBB0_600:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_601:
	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
	WORD $0x24f6; BYTE $0x3a     // mul    byte [rdx + rdi]
	LONG $0x38048841             // mov    byte [r8 + rdi], al
	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
	LONG $0x013a64f6             // mul    byte [rdx + rdi + 1]
	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
	LONG $0x023a64f6             // mul    byte [rdx + rdi + 2]
	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
	LONG $0x033a64f6             // mul    byte [rdx + rdi + 3]
	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
	LONG $0x04c78348             // add    rdi, 4
	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
	JNE  LBB0_601
	JMP  LBB0_825

LBB0_728:
	LONG $0xfce08348             // and    rax, -4
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0xf631                 // xor    esi, esi
	LONG $0x456ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword 0[rbp] /* [rip + .LCPI0_0] */

LBB0_729:
	LONG $0x0c6ffec5; BYTE $0x32               // vmovdqu    ymm1, yword [rdx + rsi]
	LONG $0x146ffec5; BYTE $0x31               // vmovdqu    ymm2, yword [rcx + rsi]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x300c             // vmovdqu    yword [r8 + rsi], ymm1
	LONG $0x4c6ffec5; WORD $0x2032             // vmovdqu    ymm1, yword [rdx + rsi + 32]
	LONG $0x546ffec5; WORD $0x2031             // vmovdqu    ymm2, yword [rcx + rsi + 32]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x304c; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm1
	LONG $0x4c6ffec5; WORD $0x4032             // vmovdqu    ymm1, yword [rdx + rsi + 64]
	LONG $0x546ffec5; WORD $0x4031             // vmovdqu    ymm2, yword [rcx + rsi + 64]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x304c; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm1
	LONG $0x4c6ffec5; WORD $0x6032             // vmovdqu    ymm1, yword [rdx + rsi + 96]
	LONG $0x546ffec5; WORD $0x6031             // vmovdqu    ymm2, yword [rcx + rsi + 96]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x304c; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm1
	LONG $0x80ee8348                           // sub    rsi, -128
	LONG $0x04c08348                           // add    rax, 4
	JNE  LBB0_729

LBB0_730:
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	JE   LBB0_733
	WORD $0xf749; BYTE $0xd9     // neg    r9
	LONG $0x456ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword 0[rbp] /* [rip + .LCPI0_0] */

LBB0_732:
	LONG $0x0c6ffec5; BYTE $0x32   // vmovdqu    ymm1, yword [rdx + rsi]
	LONG $0x146ffec5; BYTE $0x31   // vmovdqu    ymm2, yword [rcx + rsi]
	LONG $0xd968f5c5               // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5               // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5               // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5               // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5               // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5               // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5               // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5               // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5               // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x300c // vmovdqu    yword [r8 + rsi], ymm1
	LONG $0x20c68348               // add    rsi, 32
	WORD $0xff49; BYTE $0xc1       // inc    r9
	JNE  LBB0_732

LBB0_733:
	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
	JE   LBB0_825

LBB0_734:
	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
	LONG $0x03e68348         // and    rsi, 3
	JE   LBB0_736

LBB0_735:
	LONG $0x3904b60f         // movzx    eax, byte [rcx + rdi]
	WORD $0x24f6; BYTE $0x3a // mul    byte [rdx + rdi]
	LONG $0x38048841         // mov    byte [r8 + rdi], al
	LONG $0x01c78348         // add    rdi, 1
	LONG $0xffc68348         // add    rsi, -1
	JNE  LBB0_735

LBB0_736:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_737:
	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
	WORD $0x24f6; BYTE $0x3a     // mul    byte [rdx + rdi]
	LONG $0x38048841             // mov    byte [r8 + rdi], al
	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
	LONG $0x013a64f6             // mul    byte [rdx + rdi + 1]
	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
	LONG $0x023a64f6             // mul    byte [rdx + rdi + 2]
	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
	LONG $0x033a64f6             // mul    byte [rdx + rdi + 3]
	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
	LONG $0x04c78348             // add    rdi, 4
	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
	JNE  LBB0_737
	JMP  LBB0_825

LBB0_578:
	LONG $0xfce08348             // and    rax, -4
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0xf631                 // xor    esi, esi
	LONG $0x456ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword 0[rbp] /* [rip + .LCPI0_0] */

LBB0_579:
	LONG $0x0c6ffec5; BYTE $0x32               // vmovdqu    ymm1, yword [rdx + rsi]
	LONG $0x146ffec5; BYTE $0x31               // vmovdqu    ymm2, yword [rcx + rsi]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x300c             // vmovdqu    yword [r8 + rsi], ymm1
	LONG $0x4c6ffec5; WORD $0x2032             // vmovdqu    ymm1, yword [rdx + rsi + 32]
	LONG $0x546ffec5; WORD $0x2031             // vmovdqu    ymm2, yword [rcx + rsi + 32]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x304c; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm1
	LONG $0x4c6ffec5; WORD $0x4032             // vmovdqu    ymm1, yword [rdx + rsi + 64]
	LONG $0x546ffec5; WORD $0x4031             // vmovdqu    ymm2, yword [rcx + rsi + 64]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x304c; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm1
	LONG $0x4c6ffec5; WORD $0x6032             // vmovdqu    ymm1, yword [rdx + rsi + 96]
	LONG $0x546ffec5; WORD $0x6031             // vmovdqu    ymm2, yword [rcx + rsi + 96]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x304c; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm1
	LONG $0x80ee8348                           // sub    rsi, -128
	LONG $0x04c08348                           // add    rax, 4
	JNE  LBB0_579

LBB0_580:
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	JE   LBB0_583
	WORD $0xf749; BYTE $0xd9     // neg    r9
	LONG $0x456ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword 0[rbp] /* [rip + .LCPI0_0] */

LBB0_582:
	LONG $0x0c6ffec5; BYTE $0x32   // vmovdqu    ymm1, yword [rdx + rsi]
	LONG $0x146ffec5; BYTE $0x31   // vmovdqu    ymm2, yword [rcx + rsi]
	LONG $0xd968f5c5               // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5               // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5               // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5               // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5               // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5               // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5               // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5               // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5               // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x300c // vmovdqu    yword [r8 + rsi], ymm1
	LONG $0x20c68348               // add    rsi, 32
	WORD $0xff49; BYTE $0xc1       // inc    r9
	JNE  LBB0_582

LBB0_583:
	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
	JE   LBB0_825

LBB0_584:
	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
	LONG $0x03e68348         // and    rsi, 3
	JE   LBB0_586

LBB0_585:
	LONG $0x3904b60f         // movzx    eax, byte [rcx + rdi]
	WORD $0x24f6; BYTE $0x3a // mul    byte [rdx + rdi]
	LONG $0x38048841         // mov    byte [r8 + rdi], al
	LONG $0x01c78348         // add    rdi, 1
	LONG $0xffc68348         // add    rsi, -1
	JNE  LBB0_585

LBB0_586:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_587:
	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
	WORD $0x24f6; BYTE $0x3a     // mul    byte [rdx + rdi]
	LONG $0x38048841             // mov    byte [r8 + rdi], al
	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
	LONG $0x013a64f6             // mul    byte [rdx + rdi + 1]
	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
	LONG $0x023a64f6             // mul    byte [rdx + rdi + 2]
	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
	LONG $0x033a64f6             // mul    byte [rdx + rdi + 3]
	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
	LONG $0x04c78348             // add    rdi, 4
	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
	JNE  LBB0_587
	JMP  LBB0_825

LBB0_714:
	LONG $0xfce08348             // and    rax, -4
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0xf631                 // xor    esi, esi
	LONG $0x456ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword 0[rbp] /* [rip + .LCPI0_0] */

LBB0_715:
	LONG $0x0c6ffec5; BYTE $0x32               // vmovdqu    ymm1, yword [rdx + rsi]
	LONG $0x146ffec5; BYTE $0x31               // vmovdqu    ymm2, yword [rcx + rsi]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x300c             // vmovdqu    yword [r8 + rsi], ymm1
	LONG $0x4c6ffec5; WORD $0x2032             // vmovdqu    ymm1, yword [rdx + rsi + 32]
	LONG $0x546ffec5; WORD $0x2031             // vmovdqu    ymm2, yword [rcx + rsi + 32]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x304c; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm1
	LONG $0x4c6ffec5; WORD $0x4032             // vmovdqu    ymm1, yword [rdx + rsi + 64]
	LONG $0x546ffec5; WORD $0x4031             // vmovdqu    ymm2, yword [rcx + rsi + 64]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x304c; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm1
	LONG $0x4c6ffec5; WORD $0x6032             // vmovdqu    ymm1, yword [rdx + rsi + 96]
	LONG $0x546ffec5; WORD $0x6031             // vmovdqu    ymm2, yword [rcx + rsi + 96]
	LONG $0xd968f5c5                           // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5                           // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5                           // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5                           // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5                           // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5                           // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5                           // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5                           // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5                           // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x304c; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm1
	LONG $0x80ee8348                           // sub    rsi, -128
	LONG $0x04c08348                           // add    rax, 4
	JNE  LBB0_715

LBB0_716:
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	JE   LBB0_719
	WORD $0xf749; BYTE $0xd9     // neg    r9
	LONG $0x456ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword 0[rbp] /* [rip + .LCPI0_0] */

LBB0_718:
	LONG $0x0c6ffec5; BYTE $0x32   // vmovdqu    ymm1, yword [rdx + rsi]
	LONG $0x146ffec5; BYTE $0x31   // vmovdqu    ymm2, yword [rcx + rsi]
	LONG $0xd968f5c5               // vpunpckhbw    ymm3, ymm1, ymm1
	LONG $0xe268edc5               // vpunpckhbw    ymm4, ymm2, ymm2
	LONG $0xdbd5ddc5               // vpmullw    ymm3, ymm4, ymm3
	LONG $0xd8dbe5c5               // vpand    ymm3, ymm3, ymm0
	LONG $0xc960f5c5               // vpunpcklbw    ymm1, ymm1, ymm1
	LONG $0xd260edc5               // vpunpcklbw    ymm2, ymm2, ymm2
	LONG $0xc9d5edc5               // vpmullw    ymm1, ymm2, ymm1
	LONG $0xc8dbf5c5               // vpand    ymm1, ymm1, ymm0
	LONG $0xcb67f5c5               // vpackuswb    ymm1, ymm1, ymm3
	LONG $0x7f7ec1c4; WORD $0x300c // vmovdqu    yword [r8 + rsi], ymm1
	LONG $0x20c68348               // add    rsi, 32
	WORD $0xff49; BYTE $0xc1       // inc    r9
	JNE  LBB0_718

LBB0_719:
	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
	JE   LBB0_825

LBB0_720:
	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
	LONG $0x03e68348         // and    rsi, 3
	JE   LBB0_722

LBB0_721:
	LONG $0x3904b60f         // movzx    eax, byte [rcx + rdi]
	WORD $0x24f6; BYTE $0x3a // mul    byte [rdx + rdi]
	LONG $0x38048841         // mov    byte [r8 + rdi], al
	LONG $0x01c78348         // add    rdi, 1
	LONG $0xffc68348         // add    rsi, -1
	JNE  LBB0_721

LBB0_722:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB0_825

LBB0_723:
	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
	WORD $0x24f6; BYTE $0x3a     // mul    byte [rdx + rdi]
	LONG $0x38048841             // mov    byte [r8 + rdi], al
	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
	LONG $0x013a64f6             // mul    byte [rdx + rdi + 1]
	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
	LONG $0x023a64f6             // mul    byte [rdx + rdi + 2]
	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
	LONG $0x033a64f6             // mul    byte [rdx + rdi + 3]
	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
	LONG $0x04c78348             // add    rdi, 4
	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
	JNE  LBB0_723

LBB0_825:
	VZEROUPPER
	RET

DATA LCDATA2<>+0x000(SB)/8, $0x00ff00ff00ff00ff
DATA LCDATA2<>+0x008(SB)/8, $0x00ff00ff00ff00ff
DATA LCDATA2<>+0x010(SB)/8, $0x00ff00ff00ff00ff
DATA LCDATA2<>+0x018(SB)/8, $0x00ff00ff00ff00ff
GLOBL LCDATA2<>(SB), 8, $32

TEXT ยท_arithmetic_arr_scalar_avx2(SB), $0-48

	MOVQ typ+0(FP), DI
	MOVQ op+8(FP), SI
	MOVQ inLeft+16(FP), DX
	MOVQ inRight+24(FP), CX
	MOVQ out+32(FP), R8
	MOVQ len+40(FP), R9
	LEAQ LCDATA2<>(SB), BP

	LONG $0x14fe8040         // cmp    sil, 20
	JG   LBB1_12
	WORD $0x8440; BYTE $0xf6 // test    sil, sil
	JE   LBB1_23
	LONG $0x01fe8040         // cmp    sil, 1
	JE   LBB1_31
	LONG $0x02fe8040         // cmp    sil, 2
	JNE  LBB1_1109
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB1_55
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB1_97
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB1_157
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB1_160
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018b             // mov    eax, dword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_11
	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_445
	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_445

LBB1_11:
	WORD $0xf631 // xor    esi, esi

LBB1_665:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_667

LBB1_666:
	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xaf0f; BYTE $0xc8 // imul    ecx, eax
	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_666

LBB1_667:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_668:
	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_668
	JMP  LBB1_1109

LBB1_12:
	LONG $0x15fe8040         // cmp    sil, 21
	JE   LBB1_39
	LONG $0x16fe8040         // cmp    sil, 22
	JE   LBB1_47
	LONG $0x17fe8040         // cmp    sil, 23
	JNE  LBB1_1109
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB1_62
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB1_102
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB1_163
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB1_166
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018b             // mov    eax, dword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_22
	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_448
	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_448

LBB1_22:
	WORD $0xf631 // xor    esi, esi

LBB1_673:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_675

LBB1_674:
	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xaf0f; BYTE $0xc8 // imul    ecx, eax
	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_674

LBB1_675:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_676:
	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_676
	JMP  LBB1_1109

LBB1_23:
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB1_69
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB1_107
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB1_169
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB1_172
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018b             // mov    eax, dword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_30
	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_451
	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_451

LBB1_30:
	WORD $0xf631 // xor    esi, esi

LBB1_681:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_683

LBB1_682:
	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc101             // add    ecx, eax
	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_682

LBB1_683:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_684:
	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_684
	JMP  LBB1_1109

LBB1_31:
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB1_76
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB1_112
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB1_175
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB1_178
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018b             // mov    eax, dword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_38
	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_454
	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_454

LBB1_38:
	WORD $0xf631 // xor    esi, esi

LBB1_689:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_691

LBB1_690:
	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc129             // sub    ecx, eax
	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_690

LBB1_691:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_692:
	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_692
	JMP  LBB1_1109

LBB1_39:
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB1_83
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB1_117
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB1_181
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB1_184
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018b             // mov    eax, dword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_46
	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_457
	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_457

LBB1_46:
	WORD $0xf631 // xor    esi, esi

LBB1_697:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_699

LBB1_698:
	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc101             // add    ecx, eax
	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_698

LBB1_699:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_700:
	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_700
	JMP  LBB1_1109

LBB1_47:
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB1_90
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB1_122
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB1_187
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB1_190
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018b             // mov    eax, dword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_54
	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_460
	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_460

LBB1_54:
	WORD $0xf631 // xor    esi, esi

LBB1_705:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_707

LBB1_706:
	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc129             // sub    ecx, eax
	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_706

LBB1_707:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_708:
	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_708
	JMP  LBB1_1109

LBB1_55:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB1_127
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB1_193
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB1_196
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	LONG $0x0110fbc5         // vmovsd    xmm0, qword [rcx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_61
	LONG $0xc20c8d48         // lea    rcx, [rdx + 8*rax]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_463
	LONG $0xc00c8d49         // lea    rcx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_463

LBB1_61:
	WORD $0xc931 // xor    ecx, ecx

LBB1_713:
	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_715

LBB1_714:
	LONG $0x0c59fbc5; BYTE $0xca   // vmulsd    xmm1, xmm0, qword [rdx + 8*rcx]
	LONG $0x117bc1c4; WORD $0xc80c // vmovsd    qword [r8 + 8*rcx], xmm1
	LONG $0x01c18348               // add    rcx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB1_714

LBB1_715:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB1_1109

LBB1_716:
	LONG $0x0c59fbc5; BYTE $0xca               // vmulsd    xmm1, xmm0, qword [rdx + 8*rcx]
	LONG $0x117bc1c4; WORD $0xc80c             // vmovsd    qword [r8 + 8*rcx], xmm1
	LONG $0x4c59fbc5; WORD $0x08ca             // vmulsd    xmm1, xmm0, qword [rdx + 8*rcx + 8]
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x08 // vmovsd    qword [r8 + 8*rcx + 8], xmm1
	LONG $0x4c59fbc5; WORD $0x10ca             // vmulsd    xmm1, xmm0, qword [rdx + 8*rcx + 16]
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x10 // vmovsd    qword [r8 + 8*rcx + 16], xmm1
	LONG $0x4c59fbc5; WORD $0x18ca             // vmulsd    xmm1, xmm0, qword [rdx + 8*rcx + 24]
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x18 // vmovsd    qword [r8 + 8*rcx + 24], xmm1
	LONG $0x04c18348                           // add    rcx, 4
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JNE  LBB1_716
	JMP  LBB1_1109

LBB1_62:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB1_132
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB1_199
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB1_202
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	LONG $0x0110fbc5         // vmovsd    xmm0, qword [rcx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_68
	LONG $0xc20c8d48         // lea    rcx, [rdx + 8*rax]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_466
	LONG $0xc00c8d49         // lea    rcx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_466

LBB1_68:
	WORD $0xc931 // xor    ecx, ecx

LBB1_721:
	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_723

LBB1_722:
	LONG $0x0c59fbc5; BYTE $0xca   // vmulsd    xmm1, xmm0, qword [rdx + 8*rcx]
	LONG $0x117bc1c4; WORD $0xc80c // vmovsd    qword [r8 + 8*rcx], xmm1
	LONG $0x01c18348               // add    rcx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB1_722

LBB1_723:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB1_1109

LBB1_724:
	LONG $0x0c59fbc5; BYTE $0xca               // vmulsd    xmm1, xmm0, qword [rdx + 8*rcx]
	LONG $0x117bc1c4; WORD $0xc80c             // vmovsd    qword [r8 + 8*rcx], xmm1
	LONG $0x4c59fbc5; WORD $0x08ca             // vmulsd    xmm1, xmm0, qword [rdx + 8*rcx + 8]
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x08 // vmovsd    qword [r8 + 8*rcx + 8], xmm1
	LONG $0x4c59fbc5; WORD $0x10ca             // vmulsd    xmm1, xmm0, qword [rdx + 8*rcx + 16]
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x10 // vmovsd    qword [r8 + 8*rcx + 16], xmm1
	LONG $0x4c59fbc5; WORD $0x18ca             // vmulsd    xmm1, xmm0, qword [rdx + 8*rcx + 24]
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x18 // vmovsd    qword [r8 + 8*rcx + 24], xmm1
	LONG $0x04c18348                           // add    rcx, 4
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JNE  LBB1_724
	JMP  LBB1_1109

LBB1_69:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB1_137
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB1_205
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB1_208
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	LONG $0x0110fbc5         // vmovsd    xmm0, qword [rcx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_75
	LONG $0xc20c8d48         // lea    rcx, [rdx + 8*rax]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_469
	LONG $0xc00c8d49         // lea    rcx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_469

LBB1_75:
	WORD $0xc931 // xor    ecx, ecx

LBB1_729:
	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_731

LBB1_730:
	LONG $0x0c58fbc5; BYTE $0xca   // vaddsd    xmm1, xmm0, qword [rdx + 8*rcx]
	LONG $0x117bc1c4; WORD $0xc80c // vmovsd    qword [r8 + 8*rcx], xmm1
	LONG $0x01c18348               // add    rcx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB1_730

LBB1_731:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB1_1109

LBB1_732:
	LONG $0x0c58fbc5; BYTE $0xca               // vaddsd    xmm1, xmm0, qword [rdx + 8*rcx]
	LONG $0x117bc1c4; WORD $0xc80c             // vmovsd    qword [r8 + 8*rcx], xmm1
	LONG $0x4c58fbc5; WORD $0x08ca             // vaddsd    xmm1, xmm0, qword [rdx + 8*rcx + 8]
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x08 // vmovsd    qword [r8 + 8*rcx + 8], xmm1
	LONG $0x4c58fbc5; WORD $0x10ca             // vaddsd    xmm1, xmm0, qword [rdx + 8*rcx + 16]
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x10 // vmovsd    qword [r8 + 8*rcx + 16], xmm1
	LONG $0x4c58fbc5; WORD $0x18ca             // vaddsd    xmm1, xmm0, qword [rdx + 8*rcx + 24]
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x18 // vmovsd    qword [r8 + 8*rcx + 24], xmm1
	LONG $0x04c18348                           // add    rcx, 4
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JNE  LBB1_732
	JMP  LBB1_1109

LBB1_76:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB1_142
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB1_211
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB1_214
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	LONG $0x0110fbc5         // vmovsd    xmm0, qword [rcx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_82
	LONG $0xc20c8d48         // lea    rcx, [rdx + 8*rax]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_472
	LONG $0xc00c8d49         // lea    rcx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_472

LBB1_82:
	WORD $0xc931 // xor    ecx, ecx

LBB1_737:
	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_739

LBB1_738:
	LONG $0x0c10fbc5; BYTE $0xca   // vmovsd    xmm1, qword [rdx + 8*rcx]
	LONG $0xc85cf3c5               // vsubsd    xmm1, xmm1, xmm0
	LONG $0x117bc1c4; WORD $0xc80c // vmovsd    qword [r8 + 8*rcx], xmm1
	LONG $0x01c18348               // add    rcx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB1_738

LBB1_739:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB1_1109

LBB1_740:
	LONG $0x0c10fbc5; BYTE $0xca               // vmovsd    xmm1, qword [rdx + 8*rcx]
	LONG $0xc85cf3c5                           // vsubsd    xmm1, xmm1, xmm0
	LONG $0x117bc1c4; WORD $0xc80c             // vmovsd    qword [r8 + 8*rcx], xmm1
	LONG $0x4c10fbc5; WORD $0x08ca             // vmovsd    xmm1, qword [rdx + 8*rcx + 8]
	LONG $0xc85cf3c5                           // vsubsd    xmm1, xmm1, xmm0
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x08 // vmovsd    qword [r8 + 8*rcx + 8], xmm1
	LONG $0x4c10fbc5; WORD $0x10ca             // vmovsd    xmm1, qword [rdx + 8*rcx + 16]
	LONG $0xc85cf3c5                           // vsubsd    xmm1, xmm1, xmm0
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x10 // vmovsd    qword [r8 + 8*rcx + 16], xmm1
	LONG $0x4c10fbc5; WORD $0x18ca             // vmovsd    xmm1, qword [rdx + 8*rcx + 24]
	LONG $0xc85cf3c5                           // vsubsd    xmm1, xmm1, xmm0
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x18 // vmovsd    qword [r8 + 8*rcx + 24], xmm1
	LONG $0x04c18348                           // add    rcx, 4
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JNE  LBB1_740
	JMP  LBB1_1109

LBB1_83:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB1_147
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB1_217
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB1_220
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	LONG $0x0110fbc5         // vmovsd    xmm0, qword [rcx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_89
	LONG $0xc20c8d48         // lea    rcx, [rdx + 8*rax]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_475
	LONG $0xc00c8d49         // lea    rcx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_475

LBB1_89:
	WORD $0xc931 // xor    ecx, ecx

LBB1_745:
	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_747

LBB1_746:
	LONG $0x0c58fbc5; BYTE $0xca   // vaddsd    xmm1, xmm0, qword [rdx + 8*rcx]
	LONG $0x117bc1c4; WORD $0xc80c // vmovsd    qword [r8 + 8*rcx], xmm1
	LONG $0x01c18348               // add    rcx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB1_746

LBB1_747:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB1_1109

LBB1_748:
	LONG $0x0c58fbc5; BYTE $0xca               // vaddsd    xmm1, xmm0, qword [rdx + 8*rcx]
	LONG $0x117bc1c4; WORD $0xc80c             // vmovsd    qword [r8 + 8*rcx], xmm1
	LONG $0x4c58fbc5; WORD $0x08ca             // vaddsd    xmm1, xmm0, qword [rdx + 8*rcx + 8]
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x08 // vmovsd    qword [r8 + 8*rcx + 8], xmm1
	LONG $0x4c58fbc5; WORD $0x10ca             // vaddsd    xmm1, xmm0, qword [rdx + 8*rcx + 16]
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x10 // vmovsd    qword [r8 + 8*rcx + 16], xmm1
	LONG $0x4c58fbc5; WORD $0x18ca             // vaddsd    xmm1, xmm0, qword [rdx + 8*rcx + 24]
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x18 // vmovsd    qword [r8 + 8*rcx + 24], xmm1
	LONG $0x04c18348                           // add    rcx, 4
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JNE  LBB1_748
	JMP  LBB1_1109

LBB1_90:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB1_152
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB1_223
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB1_226
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	LONG $0x0110fbc5         // vmovsd    xmm0, qword [rcx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_96
	LONG $0xc20c8d48         // lea    rcx, [rdx + 8*rax]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_478
	LONG $0xc00c8d49         // lea    rcx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_478

LBB1_96:
	WORD $0xc931 // xor    ecx, ecx

LBB1_753:
	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_755

LBB1_754:
	LONG $0x0c10fbc5; BYTE $0xca   // vmovsd    xmm1, qword [rdx + 8*rcx]
	LONG $0xc85cf3c5               // vsubsd    xmm1, xmm1, xmm0
	LONG $0x117bc1c4; WORD $0xc80c // vmovsd    qword [r8 + 8*rcx], xmm1
	LONG $0x01c18348               // add    rcx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB1_754

LBB1_755:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB1_1109

LBB1_756:
	LONG $0x0c10fbc5; BYTE $0xca               // vmovsd    xmm1, qword [rdx + 8*rcx]
	LONG $0xc85cf3c5                           // vsubsd    xmm1, xmm1, xmm0
	LONG $0x117bc1c4; WORD $0xc80c             // vmovsd    qword [r8 + 8*rcx], xmm1
	LONG $0x4c10fbc5; WORD $0x08ca             // vmovsd    xmm1, qword [rdx + 8*rcx + 8]
	LONG $0xc85cf3c5                           // vsubsd    xmm1, xmm1, xmm0
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x08 // vmovsd    qword [r8 + 8*rcx + 8], xmm1
	LONG $0x4c10fbc5; WORD $0x10ca             // vmovsd    xmm1, qword [rdx + 8*rcx + 16]
	LONG $0xc85cf3c5                           // vsubsd    xmm1, xmm1, xmm0
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x10 // vmovsd    qword [r8 + 8*rcx + 16], xmm1
	LONG $0x4c10fbc5; WORD $0x18ca             // vmovsd    xmm1, qword [rdx + 8*rcx + 24]
	LONG $0xc85cf3c5                           // vsubsd    xmm1, xmm1, xmm0
	LONG $0x117bc1c4; WORD $0xc84c; BYTE $0x18 // vmovsd    qword [r8 + 8*rcx + 24], xmm1
	LONG $0x04c18348                           // add    rcx, 4
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JNE  LBB1_756
	JMP  LBB1_1109

LBB1_97:
	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
	JE   LBB1_229
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x098a             // mov    cl, byte [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_101
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	JBE  LBB1_481
	LONG $0x10048d4b         // lea    rax, [r8 + r10]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB1_481

LBB1_101:
	WORD $0xff31 // xor    edi, edi

LBB1_627:
	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
	LONG $0x03e68348         // and    rsi, 3
	JE   LBB1_629

LBB1_628:
	LONG $0x3a04b60f // movzx    eax, byte [rdx + rdi]
	WORD $0xe1f6     // mul    cl
	LONG $0x38048841 // mov    byte [r8 + rdi], al
	LONG $0x01c78348 // add    rdi, 1
	LONG $0xffc68348 // add    rsi, -1
	JNE  LBB1_628

LBB1_629:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_630:
	LONG $0x3a04b60f             // movzx    eax, byte [rdx + rdi]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38048841             // mov    byte [r8 + rdi], al
	LONG $0x3a44b60f; BYTE $0x01 // movzx    eax, byte [rdx + rdi + 1]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
	LONG $0x3a44b60f; BYTE $0x02 // movzx    eax, byte [rdx + rdi + 2]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
	LONG $0x3a44b60f; BYTE $0x03 // movzx    eax, byte [rdx + rdi + 3]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
	LONG $0x04c78348             // add    rdi, 4
	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
	JNE  LBB1_630
	JMP  LBB1_1109

LBB1_102:
	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
	JE   LBB1_232
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x098a             // mov    cl, byte [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_106
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	JBE  LBB1_483
	LONG $0x10048d4b         // lea    rax, [r8 + r10]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB1_483

LBB1_106:
	WORD $0xff31 // xor    edi, edi

LBB1_637:
	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
	LONG $0x03e68348         // and    rsi, 3
	JE   LBB1_639

LBB1_638:
	LONG $0x3a04b60f // movzx    eax, byte [rdx + rdi]
	WORD $0xe1f6     // mul    cl
	LONG $0x38048841 // mov    byte [r8 + rdi], al
	LONG $0x01c78348 // add    rdi, 1
	LONG $0xffc68348 // add    rsi, -1
	JNE  LBB1_638

LBB1_639:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_640:
	LONG $0x3a04b60f             // movzx    eax, byte [rdx + rdi]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38048841             // mov    byte [r8 + rdi], al
	LONG $0x3a44b60f; BYTE $0x01 // movzx    eax, byte [rdx + rdi + 1]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
	LONG $0x3a44b60f; BYTE $0x02 // movzx    eax, byte [rdx + rdi + 2]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
	LONG $0x3a44b60f; BYTE $0x03 // movzx    eax, byte [rdx + rdi + 3]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
	LONG $0x04c78348             // add    rdi, 4
	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
	JNE  LBB1_640
	JMP  LBB1_1109

LBB1_107:
	WORD $0xff83; BYTE $0x02                   // cmp    edi, 2
	JE   LBB1_235
	WORD $0xff83; BYTE $0x03                   // cmp    edi, 3
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018a                               // mov    al, byte [rcx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB1_111
	LONG $0x120c8d4a                           // lea    rcx, [rdx + r10]
	WORD $0x394c; BYTE $0xc1                   // cmp    rcx, r8
	JBE  LBB1_485
	LONG $0x100c8d4b                           // lea    rcx, [r8 + r10]
	WORD $0x3948; BYTE $0xd1                   // cmp    rcx, rdx
	JBE  LBB1_485

LBB1_111:
	WORD $0xf631 // xor    esi, esi

LBB1_761:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_763

LBB1_762:
	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
	WORD $0xc100     // add    cl, al
	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB1_762

LBB1_763:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_764:
	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
	WORD $0xc100                 // add    cl, al
	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
	WORD $0xc100                 // add    cl, al
	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
	WORD $0xc100                 // add    cl, al
	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
	WORD $0xc100                 // add    cl, al
	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_764
	JMP  LBB1_1109

LBB1_112:
	WORD $0xff83; BYTE $0x02                   // cmp    edi, 2
	JE   LBB1_238
	WORD $0xff83; BYTE $0x03                   // cmp    edi, 3
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018a                               // mov    al, byte [rcx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB1_116
	LONG $0x120c8d4a                           // lea    rcx, [rdx + r10]
	WORD $0x394c; BYTE $0xc1                   // cmp    rcx, r8
	JBE  LBB1_488
	LONG $0x100c8d4b                           // lea    rcx, [r8 + r10]
	WORD $0x3948; BYTE $0xd1                   // cmp    rcx, rdx
	JBE  LBB1_488

LBB1_116:
	WORD $0xf631 // xor    esi, esi

LBB1_769:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_771

LBB1_770:
	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
	WORD $0xc128     // sub    cl, al
	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB1_770

LBB1_771:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_772:
	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
	WORD $0xc128                 // sub    cl, al
	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
	WORD $0xc128                 // sub    cl, al
	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
	WORD $0xc128                 // sub    cl, al
	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
	WORD $0xc128                 // sub    cl, al
	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_772
	JMP  LBB1_1109

LBB1_117:
	WORD $0xff83; BYTE $0x02                   // cmp    edi, 2
	JE   LBB1_241
	WORD $0xff83; BYTE $0x03                   // cmp    edi, 3
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018a                               // mov    al, byte [rcx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB1_121
	LONG $0x120c8d4a                           // lea    rcx, [rdx + r10]
	WORD $0x394c; BYTE $0xc1                   // cmp    rcx, r8
	JBE  LBB1_491
	LONG $0x100c8d4b                           // lea    rcx, [r8 + r10]
	WORD $0x3948; BYTE $0xd1                   // cmp    rcx, rdx
	JBE  LBB1_491

LBB1_121:
	WORD $0xf631 // xor    esi, esi

LBB1_777:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_779

LBB1_778:
	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
	WORD $0xc100     // add    cl, al
	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB1_778

LBB1_779:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_780:
	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
	WORD $0xc100                 // add    cl, al
	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
	WORD $0xc100                 // add    cl, al
	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
	WORD $0xc100                 // add    cl, al
	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
	WORD $0xc100                 // add    cl, al
	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_780
	JMP  LBB1_1109

LBB1_122:
	WORD $0xff83; BYTE $0x02                   // cmp    edi, 2
	JE   LBB1_244
	WORD $0xff83; BYTE $0x03                   // cmp    edi, 3
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018a                               // mov    al, byte [rcx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB1_126
	LONG $0x120c8d4a                           // lea    rcx, [rdx + r10]
	WORD $0x394c; BYTE $0xc1                   // cmp    rcx, r8
	JBE  LBB1_494
	LONG $0x100c8d4b                           // lea    rcx, [r8 + r10]
	WORD $0x3948; BYTE $0xd1                   // cmp    rcx, rdx
	JBE  LBB1_494

LBB1_126:
	WORD $0xf631 // xor    esi, esi

LBB1_785:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_787

LBB1_786:
	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
	WORD $0xc128     // sub    cl, al
	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB1_786

LBB1_787:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_788:
	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
	WORD $0xc128                 // sub    cl, al
	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
	WORD $0xc128                 // sub    cl, al
	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
	WORD $0xc128                 // sub    cl, al
	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
	WORD $0xc128                 // sub    cl, al
	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_788
	JMP  LBB1_1109

LBB1_127:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB1_247
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_131
	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_497
	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_497

LBB1_131:
	WORD $0xf631 // xor    esi, esi

LBB1_793:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_795

LBB1_794:
	LONG $0xf20c8b48 // mov    rcx, qword [rdx + 8*rsi]
	LONG $0xc8af0f48 // imul    rcx, rax
	LONG $0xf00c8949 // mov    qword [r8 + 8*rsi], rcx
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB1_794

LBB1_795:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_796:
	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_796
	JMP  LBB1_1109

LBB1_132:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB1_250
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_136
	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_500
	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_500

LBB1_136:
	WORD $0xf631 // xor    esi, esi

LBB1_801:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_803

LBB1_802:
	LONG $0xf20c8b48 // mov    rcx, qword [rdx + 8*rsi]
	LONG $0xc8af0f48 // imul    rcx, rax
	LONG $0xf00c8949 // mov    qword [r8 + 8*rsi], rcx
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB1_802

LBB1_803:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_804:
	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_804
	JMP  LBB1_1109

LBB1_137:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB1_253
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_141
	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_503
	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_503

LBB1_141:
	WORD $0xf631 // xor    esi, esi

LBB1_809:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_811

LBB1_810:
	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x0148; BYTE $0xc1 // add    rcx, rax
	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_810

LBB1_811:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_812:
	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_812
	JMP  LBB1_1109

LBB1_142:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB1_256
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_146
	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_506
	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_506

LBB1_146:
	WORD $0xf631 // xor    esi, esi

LBB1_817:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_819

LBB1_818:
	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x2948; BYTE $0xc1 // sub    rcx, rax
	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_818

LBB1_819:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_820:
	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_820
	JMP  LBB1_1109

LBB1_147:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB1_259
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_151
	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_509
	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_509

LBB1_151:
	WORD $0xf631 // xor    esi, esi

LBB1_825:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_827

LBB1_826:
	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x0148; BYTE $0xc1 // add    rcx, rax
	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_826

LBB1_827:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_828:
	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_828
	JMP  LBB1_1109

LBB1_152:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB1_262
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB1_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_156
	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_512
	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_512

LBB1_156:
	WORD $0xf631 // xor    esi, esi

LBB1_833:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_835

LBB1_834:
	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x2948; BYTE $0xc1 // sub    rcx, rax
	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_834

LBB1_835:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_836:
	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_836
	JMP  LBB1_1109

LBB1_157:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_159
	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_515
	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_515

LBB1_159:
	WORD $0xf631 // xor    esi, esi

LBB1_841:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd1 // mov    rcx, r10
	LONG $0x03e18348         // and    rcx, 3
	JE   LBB1_843

LBB1_842:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	LONG $0xf8af0f66             // imul    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc18348             // add    rcx, -1
	JNE  LBB1_842

LBB1_843:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_844:
	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB1_844
	JMP  LBB1_1109

LBB1_160:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_162
	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_518
	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_518

LBB1_162:
	WORD $0xf631 // xor    esi, esi

LBB1_849:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd1 // mov    rcx, r10
	LONG $0x03e18348         // and    rcx, 3
	JE   LBB1_851

LBB1_850:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	LONG $0xf8af0f66             // imul    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc18348             // add    rcx, -1
	JNE  LBB1_850

LBB1_851:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_852:
	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB1_852
	JMP  LBB1_1109

LBB1_163:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_165
	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_521
	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_521

LBB1_165:
	WORD $0xf631 // xor    esi, esi

LBB1_857:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd1 // mov    rcx, r10
	LONG $0x03e18348         // and    rcx, 3
	JE   LBB1_859

LBB1_858:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	LONG $0xf8af0f66             // imul    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc18348             // add    rcx, -1
	JNE  LBB1_858

LBB1_859:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_860:
	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB1_860
	JMP  LBB1_1109

LBB1_166:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_168
	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_524
	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_524

LBB1_168:
	WORD $0xf631 // xor    esi, esi

LBB1_865:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd1 // mov    rcx, r10
	LONG $0x03e18348         // and    rcx, 3
	JE   LBB1_867

LBB1_866:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	LONG $0xf8af0f66             // imul    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc18348             // add    rcx, -1
	JNE  LBB1_866

LBB1_867:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_868:
	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
	LONG $0xc8af0f66               // imul    cx, ax
	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB1_868
	JMP  LBB1_1109

LBB1_169:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_171
	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_527
	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_527

LBB1_171:
	WORD $0xf631 // xor    esi, esi

LBB1_873:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd1 // mov    rcx, r10
	LONG $0x03e18348         // and    rcx, 3
	JE   LBB1_875

LBB1_874:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	WORD $0x0166; BYTE $0xc7     // add    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc18348             // add    rcx, -1
	JNE  LBB1_874

LBB1_875:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_876:
	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB1_876
	JMP  LBB1_1109

LBB1_172:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_174
	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_530
	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_530

LBB1_174:
	WORD $0xf631 // xor    esi, esi

LBB1_881:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd1 // mov    rcx, r10
	LONG $0x03e18348         // and    rcx, 3
	JE   LBB1_883

LBB1_882:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	WORD $0x0166; BYTE $0xc7     // add    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc18348             // add    rcx, -1
	JNE  LBB1_882

LBB1_883:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_884:
	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB1_884
	JMP  LBB1_1109

LBB1_175:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_177
	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_533
	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_533

LBB1_177:
	WORD $0xf631 // xor    esi, esi

LBB1_889:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd1 // mov    rcx, r10
	LONG $0x03e18348         // and    rcx, 3
	JE   LBB1_891

LBB1_890:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	WORD $0xc729                 // sub    edi, eax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc18348             // add    rcx, -1
	JNE  LBB1_890

LBB1_891:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_892:
	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB1_892
	JMP  LBB1_1109

LBB1_178:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_180
	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_536
	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_536

LBB1_180:
	WORD $0xf631 // xor    esi, esi

LBB1_897:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd1 // mov    rcx, r10
	LONG $0x03e18348         // and    rcx, 3
	JE   LBB1_899

LBB1_898:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	WORD $0xc729                 // sub    edi, eax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc18348             // add    rcx, -1
	JNE  LBB1_898

LBB1_899:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_900:
	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB1_900
	JMP  LBB1_1109

LBB1_181:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_183
	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_539
	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_539

LBB1_183:
	WORD $0xf631 // xor    esi, esi

LBB1_905:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd1 // mov    rcx, r10
	LONG $0x03e18348         // and    rcx, 3
	JE   LBB1_907

LBB1_906:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	WORD $0x0166; BYTE $0xc7     // add    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc18348             // add    rcx, -1
	JNE  LBB1_906

LBB1_907:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_908:
	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB1_908
	JMP  LBB1_1109

LBB1_184:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_186
	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_542
	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_542

LBB1_186:
	WORD $0xf631 // xor    esi, esi

LBB1_913:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd1 // mov    rcx, r10
	LONG $0x03e18348         // and    rcx, 3
	JE   LBB1_915

LBB1_914:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	WORD $0x0166; BYTE $0xc7     // add    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc18348             // add    rcx, -1
	JNE  LBB1_914

LBB1_915:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_916:
	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
	WORD $0x0166; BYTE $0xc1       // add    cx, ax
	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB1_916
	JMP  LBB1_1109

LBB1_187:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_189
	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_545
	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_545

LBB1_189:
	WORD $0xf631 // xor    esi, esi

LBB1_921:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd1 // mov    rcx, r10
	LONG $0x03e18348         // and    rcx, 3
	JE   LBB1_923

LBB1_922:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	WORD $0xc729                 // sub    edi, eax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc18348             // add    rcx, -1
	JNE  LBB1_922

LBB1_923:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_924:
	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB1_924
	JMP  LBB1_1109

LBB1_190:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_192
	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_548
	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_548

LBB1_192:
	WORD $0xf631 // xor    esi, esi

LBB1_929:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd1 // mov    rcx, r10
	LONG $0x03e18348         // and    rcx, 3
	JE   LBB1_931

LBB1_930:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	WORD $0xc729                 // sub    edi, eax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc18348             // add    rcx, -1
	JNE  LBB1_930

LBB1_931:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_932:
	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
	WORD $0xc129                   // sub    ecx, eax
	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB1_932
	JMP  LBB1_1109

LBB1_193:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_195
	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_551
	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_551

LBB1_195:
	WORD $0xf631 // xor    esi, esi

LBB1_937:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_939

LBB1_938:
	LONG $0xf20c8b48 // mov    rcx, qword [rdx + 8*rsi]
	LONG $0xc8af0f48 // imul    rcx, rax
	LONG $0xf00c8949 // mov    qword [r8 + 8*rsi], rcx
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB1_938

LBB1_939:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_940:
	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_940
	JMP  LBB1_1109

LBB1_196:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	LONG $0x0110fac5         // vmovss    xmm0, dword [rcx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_198
	LONG $0x820c8d48         // lea    rcx, [rdx + 4*rax]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_554
	LONG $0x800c8d49         // lea    rcx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_554

LBB1_198:
	WORD $0xc931 // xor    ecx, ecx

LBB1_945:
	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_947

LBB1_946:
	LONG $0x0c59fac5; BYTE $0x8a   // vmulss    xmm1, xmm0, dword [rdx + 4*rcx]
	LONG $0x117ac1c4; WORD $0x880c // vmovss    dword [r8 + 4*rcx], xmm1
	LONG $0x01c18348               // add    rcx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB1_946

LBB1_947:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB1_1109

LBB1_948:
	LONG $0x0c59fac5; BYTE $0x8a               // vmulss    xmm1, xmm0, dword [rdx + 4*rcx]
	LONG $0x117ac1c4; WORD $0x880c             // vmovss    dword [r8 + 4*rcx], xmm1
	LONG $0x4c59fac5; WORD $0x048a             // vmulss    xmm1, xmm0, dword [rdx + 4*rcx + 4]
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x04 // vmovss    dword [r8 + 4*rcx + 4], xmm1
	LONG $0x4c59fac5; WORD $0x088a             // vmulss    xmm1, xmm0, dword [rdx + 4*rcx + 8]
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x08 // vmovss    dword [r8 + 4*rcx + 8], xmm1
	LONG $0x4c59fac5; WORD $0x0c8a             // vmulss    xmm1, xmm0, dword [rdx + 4*rcx + 12]
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x0c // vmovss    dword [r8 + 4*rcx + 12], xmm1
	LONG $0x04c18348                           // add    rcx, 4
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JNE  LBB1_948
	JMP  LBB1_1109

LBB1_199:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_201
	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_557
	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_557

LBB1_201:
	WORD $0xf631 // xor    esi, esi

LBB1_953:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_955

LBB1_954:
	LONG $0xf20c8b48 // mov    rcx, qword [rdx + 8*rsi]
	LONG $0xc8af0f48 // imul    rcx, rax
	LONG $0xf00c8949 // mov    qword [r8 + 8*rsi], rcx
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB1_954

LBB1_955:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_956:
	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
	LONG $0xc8af0f48             // imul    rcx, rax
	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_956
	JMP  LBB1_1109

LBB1_202:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	LONG $0x0110fac5         // vmovss    xmm0, dword [rcx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_204
	LONG $0x820c8d48         // lea    rcx, [rdx + 4*rax]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_560
	LONG $0x800c8d49         // lea    rcx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_560

LBB1_204:
	WORD $0xc931 // xor    ecx, ecx

LBB1_961:
	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_963

LBB1_962:
	LONG $0x0c59fac5; BYTE $0x8a   // vmulss    xmm1, xmm0, dword [rdx + 4*rcx]
	LONG $0x117ac1c4; WORD $0x880c // vmovss    dword [r8 + 4*rcx], xmm1
	LONG $0x01c18348               // add    rcx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB1_962

LBB1_963:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB1_1109

LBB1_964:
	LONG $0x0c59fac5; BYTE $0x8a               // vmulss    xmm1, xmm0, dword [rdx + 4*rcx]
	LONG $0x117ac1c4; WORD $0x880c             // vmovss    dword [r8 + 4*rcx], xmm1
	LONG $0x4c59fac5; WORD $0x048a             // vmulss    xmm1, xmm0, dword [rdx + 4*rcx + 4]
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x04 // vmovss    dword [r8 + 4*rcx + 4], xmm1
	LONG $0x4c59fac5; WORD $0x088a             // vmulss    xmm1, xmm0, dword [rdx + 4*rcx + 8]
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x08 // vmovss    dword [r8 + 4*rcx + 8], xmm1
	LONG $0x4c59fac5; WORD $0x0c8a             // vmulss    xmm1, xmm0, dword [rdx + 4*rcx + 12]
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x0c // vmovss    dword [r8 + 4*rcx + 12], xmm1
	LONG $0x04c18348                           // add    rcx, 4
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JNE  LBB1_964
	JMP  LBB1_1109

LBB1_205:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_207
	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_563
	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_563

LBB1_207:
	WORD $0xf631 // xor    esi, esi

LBB1_969:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_971

LBB1_970:
	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x0148; BYTE $0xc1 // add    rcx, rax
	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_970

LBB1_971:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_972:
	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_972
	JMP  LBB1_1109

LBB1_208:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	LONG $0x0110fac5         // vmovss    xmm0, dword [rcx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_210
	LONG $0x820c8d48         // lea    rcx, [rdx + 4*rax]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_566
	LONG $0x800c8d49         // lea    rcx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_566

LBB1_210:
	WORD $0xc931 // xor    ecx, ecx

LBB1_977:
	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_979

LBB1_978:
	LONG $0x0c58fac5; BYTE $0x8a   // vaddss    xmm1, xmm0, dword [rdx + 4*rcx]
	LONG $0x117ac1c4; WORD $0x880c // vmovss    dword [r8 + 4*rcx], xmm1
	LONG $0x01c18348               // add    rcx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB1_978

LBB1_979:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB1_1109

LBB1_980:
	LONG $0x0c58fac5; BYTE $0x8a               // vaddss    xmm1, xmm0, dword [rdx + 4*rcx]
	LONG $0x117ac1c4; WORD $0x880c             // vmovss    dword [r8 + 4*rcx], xmm1
	LONG $0x4c58fac5; WORD $0x048a             // vaddss    xmm1, xmm0, dword [rdx + 4*rcx + 4]
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x04 // vmovss    dword [r8 + 4*rcx + 4], xmm1
	LONG $0x4c58fac5; WORD $0x088a             // vaddss    xmm1, xmm0, dword [rdx + 4*rcx + 8]
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x08 // vmovss    dword [r8 + 4*rcx + 8], xmm1
	LONG $0x4c58fac5; WORD $0x0c8a             // vaddss    xmm1, xmm0, dword [rdx + 4*rcx + 12]
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x0c // vmovss    dword [r8 + 4*rcx + 12], xmm1
	LONG $0x04c18348                           // add    rcx, 4
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JNE  LBB1_980
	JMP  LBB1_1109

LBB1_211:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_213
	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_569
	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_569

LBB1_213:
	WORD $0xf631 // xor    esi, esi

LBB1_985:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_987

LBB1_986:
	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x2948; BYTE $0xc1 // sub    rcx, rax
	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_986

LBB1_987:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_988:
	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_988
	JMP  LBB1_1109

LBB1_214:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	LONG $0x0110fac5         // vmovss    xmm0, dword [rcx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_216
	LONG $0x820c8d48         // lea    rcx, [rdx + 4*rax]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_572
	LONG $0x800c8d49         // lea    rcx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_572

LBB1_216:
	WORD $0xc931 // xor    ecx, ecx

LBB1_993:
	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_995

LBB1_994:
	LONG $0x0c10fac5; BYTE $0x8a   // vmovss    xmm1, dword [rdx + 4*rcx]
	LONG $0xc85cf2c5               // vsubss    xmm1, xmm1, xmm0
	LONG $0x117ac1c4; WORD $0x880c // vmovss    dword [r8 + 4*rcx], xmm1
	LONG $0x01c18348               // add    rcx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB1_994

LBB1_995:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB1_1109

LBB1_996:
	LONG $0x0c10fac5; BYTE $0x8a               // vmovss    xmm1, dword [rdx + 4*rcx]
	LONG $0xc85cf2c5                           // vsubss    xmm1, xmm1, xmm0
	LONG $0x117ac1c4; WORD $0x880c             // vmovss    dword [r8 + 4*rcx], xmm1
	LONG $0x4c10fac5; WORD $0x048a             // vmovss    xmm1, dword [rdx + 4*rcx + 4]
	LONG $0xc85cf2c5                           // vsubss    xmm1, xmm1, xmm0
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x04 // vmovss    dword [r8 + 4*rcx + 4], xmm1
	LONG $0x4c10fac5; WORD $0x088a             // vmovss    xmm1, dword [rdx + 4*rcx + 8]
	LONG $0xc85cf2c5                           // vsubss    xmm1, xmm1, xmm0
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x08 // vmovss    dword [r8 + 4*rcx + 8], xmm1
	LONG $0x4c10fac5; WORD $0x0c8a             // vmovss    xmm1, dword [rdx + 4*rcx + 12]
	LONG $0xc85cf2c5                           // vsubss    xmm1, xmm1, xmm0
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x0c // vmovss    dword [r8 + 4*rcx + 12], xmm1
	LONG $0x04c18348                           // add    rcx, 4
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JNE  LBB1_996
	JMP  LBB1_1109

LBB1_217:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_219
	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_575
	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_575

LBB1_219:
	WORD $0xf631 // xor    esi, esi

LBB1_1001:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_1003

LBB1_1002:
	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x0148; BYTE $0xc1 // add    rcx, rax
	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_1002

LBB1_1003:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_1004:
	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_1004
	JMP  LBB1_1109

LBB1_220:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	LONG $0x0110fac5         // vmovss    xmm0, dword [rcx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_222
	LONG $0x820c8d48         // lea    rcx, [rdx + 4*rax]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_578
	LONG $0x800c8d49         // lea    rcx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_578

LBB1_222:
	WORD $0xc931 // xor    ecx, ecx

LBB1_1009:
	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_1011

LBB1_1010:
	LONG $0x0c58fac5; BYTE $0x8a   // vaddss    xmm1, xmm0, dword [rdx + 4*rcx]
	LONG $0x117ac1c4; WORD $0x880c // vmovss    dword [r8 + 4*rcx], xmm1
	LONG $0x01c18348               // add    rcx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB1_1010

LBB1_1011:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB1_1109

LBB1_1012:
	LONG $0x0c58fac5; BYTE $0x8a               // vaddss    xmm1, xmm0, dword [rdx + 4*rcx]
	LONG $0x117ac1c4; WORD $0x880c             // vmovss    dword [r8 + 4*rcx], xmm1
	LONG $0x4c58fac5; WORD $0x048a             // vaddss    xmm1, xmm0, dword [rdx + 4*rcx + 4]
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x04 // vmovss    dword [r8 + 4*rcx + 4], xmm1
	LONG $0x4c58fac5; WORD $0x088a             // vaddss    xmm1, xmm0, dword [rdx + 4*rcx + 8]
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x08 // vmovss    dword [r8 + 4*rcx + 8], xmm1
	LONG $0x4c58fac5; WORD $0x0c8a             // vaddss    xmm1, xmm0, dword [rdx + 4*rcx + 12]
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x0c // vmovss    dword [r8 + 4*rcx + 12], xmm1
	LONG $0x04c18348                           // add    rcx, 4
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JNE  LBB1_1012
	JMP  LBB1_1109

LBB1_223:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB1_225
	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_581
	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_581

LBB1_225:
	WORD $0xf631 // xor    esi, esi

LBB1_1017:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_1019

LBB1_1018:
	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x2948; BYTE $0xc1 // sub    rcx, rax
	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_1018

LBB1_1019:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_1020:
	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_1020
	JMP  LBB1_1109

LBB1_226:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	LONG $0x0110fac5         // vmovss    xmm0, dword [rcx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_228
	LONG $0x820c8d48         // lea    rcx, [rdx + 4*rax]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_584
	LONG $0x800c8d49         // lea    rcx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_584

LBB1_228:
	WORD $0xc931 // xor    ecx, ecx

LBB1_1025:
	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_1027

LBB1_1026:
	LONG $0x0c10fac5; BYTE $0x8a   // vmovss    xmm1, dword [rdx + 4*rcx]
	LONG $0xc85cf2c5               // vsubss    xmm1, xmm1, xmm0
	LONG $0x117ac1c4; WORD $0x880c // vmovss    dword [r8 + 4*rcx], xmm1
	LONG $0x01c18348               // add    rcx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB1_1026

LBB1_1027:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB1_1109

LBB1_1028:
	LONG $0x0c10fac5; BYTE $0x8a               // vmovss    xmm1, dword [rdx + 4*rcx]
	LONG $0xc85cf2c5                           // vsubss    xmm1, xmm1, xmm0
	LONG $0x117ac1c4; WORD $0x880c             // vmovss    dword [r8 + 4*rcx], xmm1
	LONG $0x4c10fac5; WORD $0x048a             // vmovss    xmm1, dword [rdx + 4*rcx + 4]
	LONG $0xc85cf2c5                           // vsubss    xmm1, xmm1, xmm0
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x04 // vmovss    dword [r8 + 4*rcx + 4], xmm1
	LONG $0x4c10fac5; WORD $0x088a             // vmovss    xmm1, dword [rdx + 4*rcx + 8]
	LONG $0xc85cf2c5                           // vsubss    xmm1, xmm1, xmm0
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x08 // vmovss    dword [r8 + 4*rcx + 8], xmm1
	LONG $0x4c10fac5; WORD $0x0c8a             // vmovss    xmm1, dword [rdx + 4*rcx + 12]
	LONG $0xc85cf2c5                           // vsubss    xmm1, xmm1, xmm0
	LONG $0x117ac1c4; WORD $0x884c; BYTE $0x0c // vmovss    dword [r8 + 4*rcx + 12], xmm1
	LONG $0x04c18348                           // add    rcx, 4
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JNE  LBB1_1028
	JMP  LBB1_1109

LBB1_229:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x098a             // mov    cl, byte [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_231
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	JBE  LBB1_587
	LONG $0x10048d4b         // lea    rax, [r8 + r10]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB1_587

LBB1_231:
	WORD $0xff31 // xor    edi, edi

LBB1_647:
	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
	LONG $0x03e68348         // and    rsi, 3
	JE   LBB1_649

LBB1_648:
	LONG $0x3a04b60f // movzx    eax, byte [rdx + rdi]
	WORD $0xe1f6     // mul    cl
	LONG $0x38048841 // mov    byte [r8 + rdi], al
	LONG $0x01c78348 // add    rdi, 1
	LONG $0xffc68348 // add    rsi, -1
	JNE  LBB1_648

LBB1_649:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_650:
	LONG $0x3a04b60f             // movzx    eax, byte [rdx + rdi]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38048841             // mov    byte [r8 + rdi], al
	LONG $0x3a44b60f; BYTE $0x01 // movzx    eax, byte [rdx + rdi + 1]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
	LONG $0x3a44b60f; BYTE $0x02 // movzx    eax, byte [rdx + rdi + 2]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
	LONG $0x3a44b60f; BYTE $0x03 // movzx    eax, byte [rdx + rdi + 3]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
	LONG $0x04c78348             // add    rdi, 4
	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
	JNE  LBB1_650
	JMP  LBB1_1109

LBB1_232:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x098a             // mov    cl, byte [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_234
	LONG $0x12048d4a         // lea    rax, [rdx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	JBE  LBB1_589
	LONG $0x10048d4b         // lea    rax, [r8 + r10]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB1_589

LBB1_234:
	WORD $0xff31 // xor    edi, edi

LBB1_657:
	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
	LONG $0x03e68348         // and    rsi, 3
	JE   LBB1_659

LBB1_658:
	LONG $0x3a04b60f // movzx    eax, byte [rdx + rdi]
	WORD $0xe1f6     // mul    cl
	LONG $0x38048841 // mov    byte [r8 + rdi], al
	LONG $0x01c78348 // add    rdi, 1
	LONG $0xffc68348 // add    rsi, -1
	JNE  LBB1_658

LBB1_659:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_660:
	LONG $0x3a04b60f             // movzx    eax, byte [rdx + rdi]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38048841             // mov    byte [r8 + rdi], al
	LONG $0x3a44b60f; BYTE $0x01 // movzx    eax, byte [rdx + rdi + 1]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
	LONG $0x3a44b60f; BYTE $0x02 // movzx    eax, byte [rdx + rdi + 2]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
	LONG $0x3a44b60f; BYTE $0x03 // movzx    eax, byte [rdx + rdi + 3]
	WORD $0xe1f6                 // mul    cl
	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
	LONG $0x04c78348             // add    rdi, 4
	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
	JNE  LBB1_660
	JMP  LBB1_1109

LBB1_235:
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018a                               // mov    al, byte [rcx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB1_237
	LONG $0x120c8d4a                           // lea    rcx, [rdx + r10]
	WORD $0x394c; BYTE $0xc1                   // cmp    rcx, r8
	JBE  LBB1_591
	LONG $0x100c8d4b                           // lea    rcx, [r8 + r10]
	WORD $0x3948; BYTE $0xd1                   // cmp    rcx, rdx
	JBE  LBB1_591

LBB1_237:
	WORD $0xf631 // xor    esi, esi

LBB1_1033:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_1035

LBB1_1034:
	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
	WORD $0xc100     // add    cl, al
	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB1_1034

LBB1_1035:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_1036:
	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
	WORD $0xc100                 // add    cl, al
	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
	WORD $0xc100                 // add    cl, al
	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
	WORD $0xc100                 // add    cl, al
	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
	WORD $0xc100                 // add    cl, al
	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_1036
	JMP  LBB1_1109

LBB1_238:
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018a                               // mov    al, byte [rcx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB1_240
	LONG $0x120c8d4a                           // lea    rcx, [rdx + r10]
	WORD $0x394c; BYTE $0xc1                   // cmp    rcx, r8
	JBE  LBB1_594
	LONG $0x100c8d4b                           // lea    rcx, [r8 + r10]
	WORD $0x3948; BYTE $0xd1                   // cmp    rcx, rdx
	JBE  LBB1_594

LBB1_240:
	WORD $0xf631 // xor    esi, esi

LBB1_1041:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_1043

LBB1_1042:
	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
	WORD $0xc128     // sub    cl, al
	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB1_1042

LBB1_1043:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_1044:
	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
	WORD $0xc128                 // sub    cl, al
	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
	WORD $0xc128                 // sub    cl, al
	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
	WORD $0xc128                 // sub    cl, al
	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
	WORD $0xc128                 // sub    cl, al
	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_1044
	JMP  LBB1_1109

LBB1_241:
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018a                               // mov    al, byte [rcx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB1_243
	LONG $0x120c8d4a                           // lea    rcx, [rdx + r10]
	WORD $0x394c; BYTE $0xc1                   // cmp    rcx, r8
	JBE  LBB1_597
	LONG $0x100c8d4b                           // lea    rcx, [r8 + r10]
	WORD $0x3948; BYTE $0xd1                   // cmp    rcx, rdx
	JBE  LBB1_597

LBB1_243:
	WORD $0xf631 // xor    esi, esi

LBB1_1049:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_1051

LBB1_1050:
	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
	WORD $0xc100     // add    cl, al
	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB1_1050

LBB1_1051:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_1052:
	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
	WORD $0xc100                 // add    cl, al
	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
	WORD $0xc100                 // add    cl, al
	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
	WORD $0xc100                 // add    cl, al
	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
	WORD $0xc100                 // add    cl, al
	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_1052
	JMP  LBB1_1109

LBB1_244:
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018a                               // mov    al, byte [rcx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB1_246
	LONG $0x120c8d4a                           // lea    rcx, [rdx + r10]
	WORD $0x394c; BYTE $0xc1                   // cmp    rcx, r8
	JBE  LBB1_600
	LONG $0x100c8d4b                           // lea    rcx, [r8 + r10]
	WORD $0x3948; BYTE $0xd1                   // cmp    rcx, rdx
	JBE  LBB1_600

LBB1_246:
	WORD $0xf631 // xor    esi, esi

LBB1_1057:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_1059

LBB1_1058:
	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
	WORD $0xc128     // sub    cl, al
	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB1_1058

LBB1_1059:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_1060:
	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
	WORD $0xc128                 // sub    cl, al
	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
	WORD $0xc128                 // sub    cl, al
	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
	WORD $0xc128                 // sub    cl, al
	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
	WORD $0xc128                 // sub    cl, al
	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_1060
	JMP  LBB1_1109

LBB1_247:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018b             // mov    eax, dword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_249
	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_603
	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_603

LBB1_249:
	WORD $0xf631 // xor    esi, esi

LBB1_1065:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_1067

LBB1_1066:
	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xaf0f; BYTE $0xc8 // imul    ecx, eax
	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_1066

LBB1_1067:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_1068:
	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_1068
	JMP  LBB1_1109

LBB1_250:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018b             // mov    eax, dword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_252
	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_606
	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_606

LBB1_252:
	WORD $0xf631 // xor    esi, esi

LBB1_1073:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_1075

LBB1_1074:
	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xaf0f; BYTE $0xc8 // imul    ecx, eax
	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_1074

LBB1_1075:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_1076:
	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_1076
	JMP  LBB1_1109

LBB1_253:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018b             // mov    eax, dword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_255
	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_609
	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_609

LBB1_255:
	WORD $0xf631 // xor    esi, esi

LBB1_1081:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_1083

LBB1_1082:
	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc101             // add    ecx, eax
	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_1082

LBB1_1083:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_1084:
	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_1084
	JMP  LBB1_1109

LBB1_256:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018b             // mov    eax, dword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_258
	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_612
	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_612

LBB1_258:
	WORD $0xf631 // xor    esi, esi

LBB1_1089:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_1091

LBB1_1090:
	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc129             // sub    ecx, eax
	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_1090

LBB1_1091:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_1092:
	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_1092
	JMP  LBB1_1109

LBB1_259:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018b             // mov    eax, dword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_261
	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_615
	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_615

LBB1_261:
	WORD $0xf631 // xor    esi, esi

LBB1_1097:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_1099

LBB1_1098:
	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc101             // add    ecx, eax
	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_1098

LBB1_1099:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_1100:
	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
	WORD $0xc101                 // add    ecx, eax
	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_1100
	JMP  LBB1_1109

LBB1_262:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB1_1109
	WORD $0x018b             // mov    eax, dword [rcx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB1_264
	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
	JBE  LBB1_618
	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
	JBE  LBB1_618

LBB1_264:
	WORD $0xf631 // xor    esi, esi

LBB1_1105:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB1_1107

LBB1_1106:
	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc129             // sub    ecx, eax
	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB1_1106

LBB1_1107:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB1_1109

LBB1_1108:
	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
	WORD $0xc129                 // sub    ecx, eax
	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB1_1108
	JMP  LBB1_1109

LBB1_445:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_661
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_447:
	LONG $0x407de2c4; WORD $0xba0c             // vpmulld    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x407de2c4; WORD $0xba54; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xba5c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xba64; BYTE $0x60 // vpmulld    ymm4, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x0080ba8c407de2c4; WORD $0x0000     // vpmulld    ymm1, ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x00a0ba94407de2c4; WORD $0x0000     // vpmulld    ymm2, ymm0, yword [rdx + 4*rdi + 160]
	QUAD $0x00c0ba9c407de2c4; WORD $0x0000     // vpmulld    ymm3, ymm0, yword [rdx + 4*rdi + 192]
	QUAD $0x00e0baa4407de2c4; WORD $0x0000     // vpmulld    ymm4, ymm0, yword [rdx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_447
	JMP  LBB1_662

LBB1_448:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_669
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_450:
	LONG $0x407de2c4; WORD $0xba0c             // vpmulld    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x407de2c4; WORD $0xba54; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xba5c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xba64; BYTE $0x60 // vpmulld    ymm4, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x0080ba8c407de2c4; WORD $0x0000     // vpmulld    ymm1, ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x00a0ba94407de2c4; WORD $0x0000     // vpmulld    ymm2, ymm0, yword [rdx + 4*rdi + 160]
	QUAD $0x00c0ba9c407de2c4; WORD $0x0000     // vpmulld    ymm3, ymm0, yword [rdx + 4*rdi + 192]
	QUAD $0x00e0baa4407de2c4; WORD $0x0000     // vpmulld    ymm4, ymm0, yword [rdx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_450
	JMP  LBB1_670

LBB1_451:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_677
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_453:
	LONG $0x0cfefdc5; BYTE $0xba               // vpaddd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20ba             // vpaddd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40ba             // vpaddd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x64fefdc5; WORD $0x60ba             // vpaddd    ymm4, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080ba8cfefdc5; BYTE $0x00       // vpaddd    ymm1, ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba94fefdc5; BYTE $0x00       // vpaddd    ymm2, ymm0, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9cfefdc5; BYTE $0x00       // vpaddd    ymm3, ymm0, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baa4fefdc5; BYTE $0x00       // vpaddd    ymm4, ymm0, yword [rdx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_453
	JMP  LBB1_678

LBB1_454:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_685
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_456:
	LONG $0x0c6ffec5; BYTE $0xba               // vmovdqu    ymm1, yword [rdx + 4*rdi]
	LONG $0x546ffec5; WORD $0x20ba             // vmovdqu    ymm2, yword [rdx + 4*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40ba             // vmovdqu    ymm3, yword [rdx + 4*rdi + 64]
	LONG $0x646ffec5; WORD $0x60ba             // vmovdqu    ymm4, yword [rdx + 4*rdi + 96]
	LONG $0xc8faf5c5                           // vpsubd    ymm1, ymm1, ymm0
	LONG $0xd0faedc5                           // vpsubd    ymm2, ymm2, ymm0
	LONG $0xd8fae5c5                           // vpsubd    ymm3, ymm3, ymm0
	LONG $0xe0faddc5                           // vpsubd    ymm4, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080ba8c6ffec5; BYTE $0x00       // vmovdqu    ymm1, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + 4*rdi + 224]
	LONG $0xc8faf5c5                           // vpsubd    ymm1, ymm1, ymm0
	LONG $0xd0faedc5                           // vpsubd    ymm2, ymm2, ymm0
	LONG $0xd8fae5c5                           // vpsubd    ymm3, ymm3, ymm0
	LONG $0xe0faddc5                           // vpsubd    ymm4, ymm4, ymm0
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_456
	JMP  LBB1_686

LBB1_457:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_693
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_459:
	LONG $0x0cfefdc5; BYTE $0xba               // vpaddd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20ba             // vpaddd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40ba             // vpaddd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x64fefdc5; WORD $0x60ba             // vpaddd    ymm4, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080ba8cfefdc5; BYTE $0x00       // vpaddd    ymm1, ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba94fefdc5; BYTE $0x00       // vpaddd    ymm2, ymm0, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9cfefdc5; BYTE $0x00       // vpaddd    ymm3, ymm0, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baa4fefdc5; BYTE $0x00       // vpaddd    ymm4, ymm0, yword [rdx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_459
	JMP  LBB1_694

LBB1_460:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_701
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_462:
	LONG $0x0c6ffec5; BYTE $0xba               // vmovdqu    ymm1, yword [rdx + 4*rdi]
	LONG $0x546ffec5; WORD $0x20ba             // vmovdqu    ymm2, yword [rdx + 4*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40ba             // vmovdqu    ymm3, yword [rdx + 4*rdi + 64]
	LONG $0x646ffec5; WORD $0x60ba             // vmovdqu    ymm4, yword [rdx + 4*rdi + 96]
	LONG $0xc8faf5c5                           // vpsubd    ymm1, ymm1, ymm0
	LONG $0xd0faedc5                           // vpsubd    ymm2, ymm2, ymm0
	LONG $0xd8fae5c5                           // vpsubd    ymm3, ymm3, ymm0
	LONG $0xe0faddc5                           // vpsubd    ymm4, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080ba8c6ffec5; BYTE $0x00       // vmovdqu    ymm1, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + 4*rdi + 224]
	LONG $0xc8faf5c5                           // vpsubd    ymm1, ymm1, ymm0
	LONG $0xd0faedc5                           // vpsubd    ymm2, ymm2, ymm0
	LONG $0xd8fae5c5                           // vpsubd    ymm3, ymm3, ymm0
	LONG $0xe0faddc5                           // vpsubd    ymm4, ymm4, ymm0
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_462
	JMP  LBB1_702

LBB1_463:
	WORD $0xc189                 // mov    ecx, eax
	WORD $0xe183; BYTE $0xf0     // and    ecx, -16
	LONG $0x197de2c4; BYTE $0xc8 // vbroadcastsd    ymm1, xmm0
	LONG $0xf0718d48             // lea    rsi, [rcx - 16]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB1_709
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB1_465:
	LONG $0x1459f5c5; BYTE $0xfa               // vmulpd    ymm2, ymm1, yword [rdx + 8*rdi]
	LONG $0x5c59f5c5; WORD $0x20fa             // vmulpd    ymm3, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x6459f5c5; WORD $0x40fa             // vmulpd    ymm4, ymm1, yword [rdx + 8*rdi + 64]
	LONG $0x6c59f5c5; WORD $0x60fa             // vmulpd    ymm5, ymm1, yword [rdx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf86c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080fa9459f5c5; BYTE $0x00       // vmulpd    ymm2, ymm1, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa9c59f5c5; BYTE $0x00       // vmulpd    ymm3, ymm1, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0faa459f5c5; BYTE $0x00       // vmulpd    ymm4, ymm1, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faac59f5c5; BYTE $0x00       // vmulpd    ymm5, ymm1, yword [rdx + 8*rdi + 224]
	QUAD $0x0080f894117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a4117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB1_465
	JMP  LBB1_710

LBB1_466:
	WORD $0xc189                 // mov    ecx, eax
	WORD $0xe183; BYTE $0xf0     // and    ecx, -16
	LONG $0x197de2c4; BYTE $0xc8 // vbroadcastsd    ymm1, xmm0
	LONG $0xf0718d48             // lea    rsi, [rcx - 16]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB1_717
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB1_468:
	LONG $0x1459f5c5; BYTE $0xfa               // vmulpd    ymm2, ymm1, yword [rdx + 8*rdi]
	LONG $0x5c59f5c5; WORD $0x20fa             // vmulpd    ymm3, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x6459f5c5; WORD $0x40fa             // vmulpd    ymm4, ymm1, yword [rdx + 8*rdi + 64]
	LONG $0x6c59f5c5; WORD $0x60fa             // vmulpd    ymm5, ymm1, yword [rdx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf86c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080fa9459f5c5; BYTE $0x00       // vmulpd    ymm2, ymm1, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa9c59f5c5; BYTE $0x00       // vmulpd    ymm3, ymm1, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0faa459f5c5; BYTE $0x00       // vmulpd    ymm4, ymm1, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faac59f5c5; BYTE $0x00       // vmulpd    ymm5, ymm1, yword [rdx + 8*rdi + 224]
	QUAD $0x0080f894117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a4117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB1_468
	JMP  LBB1_718

LBB1_469:
	WORD $0xc189                 // mov    ecx, eax
	WORD $0xe183; BYTE $0xf0     // and    ecx, -16
	LONG $0x197de2c4; BYTE $0xc8 // vbroadcastsd    ymm1, xmm0
	LONG $0xf0718d48             // lea    rsi, [rcx - 16]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB1_725
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB1_471:
	LONG $0x1458f5c5; BYTE $0xfa               // vaddpd    ymm2, ymm1, yword [rdx + 8*rdi]
	LONG $0x5c58f5c5; WORD $0x20fa             // vaddpd    ymm3, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x6458f5c5; WORD $0x40fa             // vaddpd    ymm4, ymm1, yword [rdx + 8*rdi + 64]
	LONG $0x6c58f5c5; WORD $0x60fa             // vaddpd    ymm5, ymm1, yword [rdx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf86c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080fa9458f5c5; BYTE $0x00       // vaddpd    ymm2, ymm1, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa9c58f5c5; BYTE $0x00       // vaddpd    ymm3, ymm1, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0faa458f5c5; BYTE $0x00       // vaddpd    ymm4, ymm1, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faac58f5c5; BYTE $0x00       // vaddpd    ymm5, ymm1, yword [rdx + 8*rdi + 224]
	QUAD $0x0080f894117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a4117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB1_471
	JMP  LBB1_726

LBB1_472:
	WORD $0xc189                 // mov    ecx, eax
	WORD $0xe183; BYTE $0xf0     // and    ecx, -16
	LONG $0x197de2c4; BYTE $0xc8 // vbroadcastsd    ymm1, xmm0
	LONG $0xf0718d48             // lea    rsi, [rcx - 16]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB1_733
	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
	LONG $0xfee78348             // and    rdi, -2
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	WORD $0xf631                 // xor    esi, esi

LBB1_474:
	LONG $0x1410fdc5; BYTE $0xf2               // vmovupd    ymm2, yword [rdx + 8*rsi]
	LONG $0x5c10fdc5; WORD $0x20f2             // vmovupd    ymm3, yword [rdx + 8*rsi + 32]
	LONG $0x6410fdc5; WORD $0x40f2             // vmovupd    ymm4, yword [rdx + 8*rsi + 64]
	LONG $0x6c10fdc5; WORD $0x60f2             // vmovupd    ymm5, yword [rdx + 8*rsi + 96]
	LONG $0xd15cedc5                           // vsubpd    ymm2, ymm2, ymm1
	LONG $0xd95ce5c5                           // vsubpd    ymm3, ymm3, ymm1
	LONG $0xe15cddc5                           // vsubpd    ymm4, ymm4, ymm1
	LONG $0xe95cd5c5                           // vsubpd    ymm5, ymm5, ymm1
	LONG $0x117dc1c4; WORD $0xf014             // vmovupd    yword [r8 + 8*rsi], ymm2
	LONG $0x117dc1c4; WORD $0xf05c; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm5
	QUAD $0x000080f29410fdc5; BYTE $0x00       // vmovupd    ymm2, yword [rdx + 8*rsi + 128]
	QUAD $0x0000a0f29c10fdc5; BYTE $0x00       // vmovupd    ymm3, yword [rdx + 8*rsi + 160]
	QUAD $0x0000c0f2a410fdc5; BYTE $0x00       // vmovupd    ymm4, yword [rdx + 8*rsi + 192]
	QUAD $0x0000e0f2ac10fdc5; BYTE $0x00       // vmovupd    ymm5, yword [rdx + 8*rsi + 224]
	LONG $0xd15cedc5                           // vsubpd    ymm2, ymm2, ymm1
	LONG $0xd95ce5c5                           // vsubpd    ymm3, ymm3, ymm1
	LONG $0xe15cddc5                           // vsubpd    ymm4, ymm4, ymm1
	LONG $0xe95cd5c5                           // vsubpd    ymm5, ymm5, ymm1
	QUAD $0x0080f094117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rsi + 128], ymm2
	QUAD $0x00a0f09c117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rsi + 160], ymm3
	QUAD $0x00c0f0a4117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rsi + 192], ymm4
	QUAD $0x00e0f0ac117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rsi + 224], ymm5
	LONG $0x20c68348                           // add    rsi, 32
	LONG $0x02c78348                           // add    rdi, 2
	JNE  LBB1_474
	JMP  LBB1_734

LBB1_475:
	WORD $0xc189                 // mov    ecx, eax
	WORD $0xe183; BYTE $0xf0     // and    ecx, -16
	LONG $0x197de2c4; BYTE $0xc8 // vbroadcastsd    ymm1, xmm0
	LONG $0xf0718d48             // lea    rsi, [rcx - 16]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB1_741
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB1_477:
	LONG $0x1458f5c5; BYTE $0xfa               // vaddpd    ymm2, ymm1, yword [rdx + 8*rdi]
	LONG $0x5c58f5c5; WORD $0x20fa             // vaddpd    ymm3, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x6458f5c5; WORD $0x40fa             // vaddpd    ymm4, ymm1, yword [rdx + 8*rdi + 64]
	LONG $0x6c58f5c5; WORD $0x60fa             // vaddpd    ymm5, ymm1, yword [rdx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf86c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080fa9458f5c5; BYTE $0x00       // vaddpd    ymm2, ymm1, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa9c58f5c5; BYTE $0x00       // vaddpd    ymm3, ymm1, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0faa458f5c5; BYTE $0x00       // vaddpd    ymm4, ymm1, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faac58f5c5; BYTE $0x00       // vaddpd    ymm5, ymm1, yword [rdx + 8*rdi + 224]
	QUAD $0x0080f894117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a4117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB1_477
	JMP  LBB1_742

LBB1_478:
	WORD $0xc189                 // mov    ecx, eax
	WORD $0xe183; BYTE $0xf0     // and    ecx, -16
	LONG $0x197de2c4; BYTE $0xc8 // vbroadcastsd    ymm1, xmm0
	LONG $0xf0718d48             // lea    rsi, [rcx - 16]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB1_749
	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
	LONG $0xfee78348             // and    rdi, -2
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	WORD $0xf631                 // xor    esi, esi

LBB1_480:
	LONG $0x1410fdc5; BYTE $0xf2               // vmovupd    ymm2, yword [rdx + 8*rsi]
	LONG $0x5c10fdc5; WORD $0x20f2             // vmovupd    ymm3, yword [rdx + 8*rsi + 32]
	LONG $0x6410fdc5; WORD $0x40f2             // vmovupd    ymm4, yword [rdx + 8*rsi + 64]
	LONG $0x6c10fdc5; WORD $0x60f2             // vmovupd    ymm5, yword [rdx + 8*rsi + 96]
	LONG $0xd15cedc5                           // vsubpd    ymm2, ymm2, ymm1
	LONG $0xd95ce5c5                           // vsubpd    ymm3, ymm3, ymm1
	LONG $0xe15cddc5                           // vsubpd    ymm4, ymm4, ymm1
	LONG $0xe95cd5c5                           // vsubpd    ymm5, ymm5, ymm1
	LONG $0x117dc1c4; WORD $0xf014             // vmovupd    yword [r8 + 8*rsi], ymm2
	LONG $0x117dc1c4; WORD $0xf05c; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm5
	QUAD $0x000080f29410fdc5; BYTE $0x00       // vmovupd    ymm2, yword [rdx + 8*rsi + 128]
	QUAD $0x0000a0f29c10fdc5; BYTE $0x00       // vmovupd    ymm3, yword [rdx + 8*rsi + 160]
	QUAD $0x0000c0f2a410fdc5; BYTE $0x00       // vmovupd    ymm4, yword [rdx + 8*rsi + 192]
	QUAD $0x0000e0f2ac10fdc5; BYTE $0x00       // vmovupd    ymm5, yword [rdx + 8*rsi + 224]
	LONG $0xd15cedc5                           // vsubpd    ymm2, ymm2, ymm1
	LONG $0xd95ce5c5                           // vsubpd    ymm3, ymm3, ymm1
	LONG $0xe15cddc5                           // vsubpd    ymm4, ymm4, ymm1
	LONG $0xe95cd5c5                           // vsubpd    ymm5, ymm5, ymm1
	QUAD $0x0080f094117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rsi + 128], ymm2
	QUAD $0x00a0f09c117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rsi + 160], ymm3
	QUAD $0x00c0f0a4117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rsi + 192], ymm4
	QUAD $0x00e0f0ac117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rsi + 224], ymm5
	LONG $0x20c68348                           // add    rsi, 32
	LONG $0x02c78348                           // add    rdi, 2
	JNE  LBB1_480
	JMP  LBB1_750

LBB1_481:
	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
	WORD $0xe783; BYTE $0xe0     // and    edi, -32
	LONG $0xc16ef9c5             // vmovd    xmm0, ecx
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0xe0778d48             // lea    rsi, [rdi - 32]
	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
	LONG $0x05e8c148             // shr    rax, 5
	LONG $0x01c08348             // add    rax, 1
	WORD $0x8941; BYTE $0xc1     // mov    r9d, eax
	LONG $0x03e18341             // and    r9d, 3
	LONG $0x60fe8348             // cmp    rsi, 96
	JAE  LBB1_621
	WORD $0xf631                 // xor    esi, esi
	JMP  LBB1_623

LBB1_483:
	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
	WORD $0xe783; BYTE $0xe0     // and    edi, -32
	LONG $0xc16ef9c5             // vmovd    xmm0, ecx
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0xe0778d48             // lea    rsi, [rdi - 32]
	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
	LONG $0x05e8c148             // shr    rax, 5
	LONG $0x01c08348             // add    rax, 1
	WORD $0x8941; BYTE $0xc1     // mov    r9d, eax
	LONG $0x03e18341             // and    r9d, 3
	LONG $0x60fe8348             // cmp    rsi, 96
	JAE  LBB1_631
	WORD $0xf631                 // xor    esi, esi
	JMP  LBB1_633

LBB1_485:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x804e8d48             // lea    rcx, [rsi - 128]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_757
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_487:
	LONG $0x0cfcfdc5; BYTE $0x3a               // vpaddb    ymm1, ymm0, yword [rdx + rdi]
	LONG $0x54fcfdc5; WORD $0x203a             // vpaddb    ymm2, ymm0, yword [rdx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x403a             // vpaddb    ymm3, ymm0, yword [rdx + rdi + 64]
	LONG $0x64fcfdc5; WORD $0x603a             // vpaddb    ymm4, ymm0, yword [rdx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x0000803a8cfcfdc5; BYTE $0x00       // vpaddb    ymm1, ymm0, yword [rdx + rdi + 128]
	QUAD $0x0000a03a94fcfdc5; BYTE $0x00       // vpaddb    ymm2, ymm0, yword [rdx + rdi + 160]
	QUAD $0x0000c03a9cfcfdc5; BYTE $0x00       // vpaddb    ymm3, ymm0, yword [rdx + rdi + 192]
	QUAD $0x0000e03aa4fcfdc5; BYTE $0x00       // vpaddb    ymm4, ymm0, yword [rdx + rdi + 224]
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_487
	JMP  LBB1_758

LBB1_488:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x804e8d48             // lea    rcx, [rsi - 128]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_765
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_490:
	LONG $0x0c6ffec5; BYTE $0x3a               // vmovdqu    ymm1, yword [rdx + rdi]
	LONG $0x546ffec5; WORD $0x203a             // vmovdqu    ymm2, yword [rdx + rdi + 32]
	LONG $0x5c6ffec5; WORD $0x403a             // vmovdqu    ymm3, yword [rdx + rdi + 64]
	LONG $0x646ffec5; WORD $0x603a             // vmovdqu    ymm4, yword [rdx + rdi + 96]
	LONG $0xc8f8f5c5                           // vpsubb    ymm1, ymm1, ymm0
	LONG $0xd0f8edc5                           // vpsubb    ymm2, ymm2, ymm0
	LONG $0xd8f8e5c5                           // vpsubb    ymm3, ymm3, ymm0
	LONG $0xe0f8ddc5                           // vpsubb    ymm4, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x0000803a8c6ffec5; BYTE $0x00       // vmovdqu    ymm1, yword [rdx + rdi + 128]
	QUAD $0x0000a03a946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + rdi + 160]
	QUAD $0x0000c03a9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + rdi + 192]
	QUAD $0x0000e03aa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + rdi + 224]
	LONG $0xc8f8f5c5                           // vpsubb    ymm1, ymm1, ymm0
	LONG $0xd0f8edc5                           // vpsubb    ymm2, ymm2, ymm0
	LONG $0xd8f8e5c5                           // vpsubb    ymm3, ymm3, ymm0
	LONG $0xe0f8ddc5                           // vpsubb    ymm4, ymm4, ymm0
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_490
	JMP  LBB1_766

LBB1_491:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x804e8d48             // lea    rcx, [rsi - 128]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_773
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_493:
	LONG $0x0cfcfdc5; BYTE $0x3a               // vpaddb    ymm1, ymm0, yword [rdx + rdi]
	LONG $0x54fcfdc5; WORD $0x203a             // vpaddb    ymm2, ymm0, yword [rdx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x403a             // vpaddb    ymm3, ymm0, yword [rdx + rdi + 64]
	LONG $0x64fcfdc5; WORD $0x603a             // vpaddb    ymm4, ymm0, yword [rdx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x0000803a8cfcfdc5; BYTE $0x00       // vpaddb    ymm1, ymm0, yword [rdx + rdi + 128]
	QUAD $0x0000a03a94fcfdc5; BYTE $0x00       // vpaddb    ymm2, ymm0, yword [rdx + rdi + 160]
	QUAD $0x0000c03a9cfcfdc5; BYTE $0x00       // vpaddb    ymm3, ymm0, yword [rdx + rdi + 192]
	QUAD $0x0000e03aa4fcfdc5; BYTE $0x00       // vpaddb    ymm4, ymm0, yword [rdx + rdi + 224]
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_493
	JMP  LBB1_774

LBB1_494:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x804e8d48             // lea    rcx, [rsi - 128]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_781
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_496:
	LONG $0x0c6ffec5; BYTE $0x3a               // vmovdqu    ymm1, yword [rdx + rdi]
	LONG $0x546ffec5; WORD $0x203a             // vmovdqu    ymm2, yword [rdx + rdi + 32]
	LONG $0x5c6ffec5; WORD $0x403a             // vmovdqu    ymm3, yword [rdx + rdi + 64]
	LONG $0x646ffec5; WORD $0x603a             // vmovdqu    ymm4, yword [rdx + rdi + 96]
	LONG $0xc8f8f5c5                           // vpsubb    ymm1, ymm1, ymm0
	LONG $0xd0f8edc5                           // vpsubb    ymm2, ymm2, ymm0
	LONG $0xd8f8e5c5                           // vpsubb    ymm3, ymm3, ymm0
	LONG $0xe0f8ddc5                           // vpsubb    ymm4, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x0000803a8c6ffec5; BYTE $0x00       // vmovdqu    ymm1, yword [rdx + rdi + 128]
	QUAD $0x0000a03a946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + rdi + 160]
	QUAD $0x0000c03a9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + rdi + 192]
	QUAD $0x0000e03aa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + rdi + 224]
	LONG $0xc8f8f5c5                           // vpsubb    ymm1, ymm1, ymm0
	LONG $0xd0f8edc5                           // vpsubb    ymm2, ymm2, ymm0
	LONG $0xd8f8e5c5                           // vpsubb    ymm3, ymm3, ymm0
	LONG $0xe0f8ddc5                           // vpsubb    ymm4, ymm4, ymm0
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_496
	JMP  LBB1_782

LBB1_497:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	LONG $0xd073f5c5; BYTE $0x20 // vpsrlq    ymm1, ymm0, 32
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_789
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_499:
	LONG $0x146ffec5; BYTE $0xfa               // vmovdqu    ymm2, yword [rdx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60fa             // vmovdqu    ymm5, yword [rdx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf86c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080fa946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0faa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faac6ffec5; BYTE $0x00       // vmovdqu    ymm5, yword [rdx + 8*rdi + 224]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	QUAD $0x0080f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_499
	JMP  LBB1_790

LBB1_500:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	LONG $0xd073f5c5; BYTE $0x20 // vpsrlq    ymm1, ymm0, 32
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_797
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_502:
	LONG $0x146ffec5; BYTE $0xfa               // vmovdqu    ymm2, yword [rdx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60fa             // vmovdqu    ymm5, yword [rdx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf86c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080fa946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0faa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faac6ffec5; BYTE $0x00       // vmovdqu    ymm5, yword [rdx + 8*rdi + 224]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	QUAD $0x0080f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_502
	JMP  LBB1_798

LBB1_503:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_805
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_505:
	LONG $0x0cd4fdc5; BYTE $0xfa               // vpaddq    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20fa             // vpaddq    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40fa             // vpaddq    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x64d4fdc5; WORD $0x60fa             // vpaddq    ymm4, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080fa8cd4fdc5; BYTE $0x00       // vpaddq    ymm1, ymm0, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa94d4fdc5; BYTE $0x00       // vpaddq    ymm2, ymm0, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9cd4fdc5; BYTE $0x00       // vpaddq    ymm3, ymm0, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faa4d4fdc5; BYTE $0x00       // vpaddq    ymm4, ymm0, yword [rdx + 8*rdi + 224]
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_505
	JMP  LBB1_806

LBB1_506:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_813
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_508:
	LONG $0x0c6ffec5; BYTE $0xfa               // vmovdqu    ymm1, yword [rdx + 8*rdi]
	LONG $0x546ffec5; WORD $0x20fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 64]
	LONG $0x646ffec5; WORD $0x60fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 96]
	LONG $0xc8fbf5c5                           // vpsubq    ymm1, ymm1, ymm0
	LONG $0xd0fbedc5                           // vpsubq    ymm2, ymm2, ymm0
	LONG $0xd8fbe5c5                           // vpsubq    ymm3, ymm3, ymm0
	LONG $0xe0fbddc5                           // vpsubq    ymm4, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080fa8c6ffec5; BYTE $0x00       // vmovdqu    ymm1, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + 8*rdi + 224]
	LONG $0xc8fbf5c5                           // vpsubq    ymm1, ymm1, ymm0
	LONG $0xd0fbedc5                           // vpsubq    ymm2, ymm2, ymm0
	LONG $0xd8fbe5c5                           // vpsubq    ymm3, ymm3, ymm0
	LONG $0xe0fbddc5                           // vpsubq    ymm4, ymm4, ymm0
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_508
	JMP  LBB1_814

LBB1_509:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_821
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_511:
	LONG $0x0cd4fdc5; BYTE $0xfa               // vpaddq    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20fa             // vpaddq    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40fa             // vpaddq    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x64d4fdc5; WORD $0x60fa             // vpaddq    ymm4, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080fa8cd4fdc5; BYTE $0x00       // vpaddq    ymm1, ymm0, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa94d4fdc5; BYTE $0x00       // vpaddq    ymm2, ymm0, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9cd4fdc5; BYTE $0x00       // vpaddq    ymm3, ymm0, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faa4d4fdc5; BYTE $0x00       // vpaddq    ymm4, ymm0, yword [rdx + 8*rdi + 224]
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_511
	JMP  LBB1_822

LBB1_512:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_829
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_514:
	LONG $0x0c6ffec5; BYTE $0xfa               // vmovdqu    ymm1, yword [rdx + 8*rdi]
	LONG $0x546ffec5; WORD $0x20fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 64]
	LONG $0x646ffec5; WORD $0x60fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 96]
	LONG $0xc8fbf5c5                           // vpsubq    ymm1, ymm1, ymm0
	LONG $0xd0fbedc5                           // vpsubq    ymm2, ymm2, ymm0
	LONG $0xd8fbe5c5                           // vpsubq    ymm3, ymm3, ymm0
	LONG $0xe0fbddc5                           // vpsubq    ymm4, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080fa8c6ffec5; BYTE $0x00       // vmovdqu    ymm1, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + 8*rdi + 224]
	LONG $0xc8fbf5c5                           // vpsubq    ymm1, ymm1, ymm0
	LONG $0xd0fbedc5                           // vpsubq    ymm2, ymm2, ymm0
	LONG $0xd8fbe5c5                           // vpsubq    ymm3, ymm3, ymm0
	LONG $0xe0fbddc5                           // vpsubq    ymm4, ymm4, ymm0
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_514
	JMP  LBB1_830

LBB1_515:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_837
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_517:
	LONG $0x0cd5fdc5; BYTE $0x7a               // vpmullw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x54d5fdc5; WORD $0x207a             // vpmullw    ymm2, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cd5fdc5; WORD $0x407a             // vpmullw    ymm1, ymm0, yword [rdx + 2*rdi + 64]
	LONG $0x54d5fdc5; WORD $0x607a             // vpmullw    ymm2, ymm0, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_517
	JMP  LBB1_838

LBB1_518:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_845
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_520:
	LONG $0x0cd5fdc5; BYTE $0x7a               // vpmullw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x54d5fdc5; WORD $0x207a             // vpmullw    ymm2, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cd5fdc5; WORD $0x407a             // vpmullw    ymm1, ymm0, yword [rdx + 2*rdi + 64]
	LONG $0x54d5fdc5; WORD $0x607a             // vpmullw    ymm2, ymm0, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_520
	JMP  LBB1_846

LBB1_521:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_853
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_523:
	LONG $0x0cd5fdc5; BYTE $0x7a               // vpmullw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x54d5fdc5; WORD $0x207a             // vpmullw    ymm2, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cd5fdc5; WORD $0x407a             // vpmullw    ymm1, ymm0, yword [rdx + 2*rdi + 64]
	LONG $0x54d5fdc5; WORD $0x607a             // vpmullw    ymm2, ymm0, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_523
	JMP  LBB1_854

LBB1_524:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_861
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_526:
	LONG $0x0cd5fdc5; BYTE $0x7a               // vpmullw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x54d5fdc5; WORD $0x207a             // vpmullw    ymm2, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cd5fdc5; WORD $0x407a             // vpmullw    ymm1, ymm0, yword [rdx + 2*rdi + 64]
	LONG $0x54d5fdc5; WORD $0x607a             // vpmullw    ymm2, ymm0, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_526
	JMP  LBB1_862

LBB1_527:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_869
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_529:
	LONG $0x0cfdfdc5; BYTE $0x7a               // vpaddw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x54fdfdc5; WORD $0x207a             // vpaddw    ymm2, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cfdfdc5; WORD $0x407a             // vpaddw    ymm1, ymm0, yword [rdx + 2*rdi + 64]
	LONG $0x54fdfdc5; WORD $0x607a             // vpaddw    ymm2, ymm0, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_529
	JMP  LBB1_870

LBB1_530:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_877
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_532:
	LONG $0x0cfdfdc5; BYTE $0x7a               // vpaddw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x54fdfdc5; WORD $0x207a             // vpaddw    ymm2, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cfdfdc5; WORD $0x407a             // vpaddw    ymm1, ymm0, yword [rdx + 2*rdi + 64]
	LONG $0x54fdfdc5; WORD $0x607a             // vpaddw    ymm2, ymm0, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_532
	JMP  LBB1_878

LBB1_533:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_885
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_535:
	LONG $0x0c6ffec5; BYTE $0x7a               // vmovdqu    ymm1, yword [rdx + 2*rdi]
	LONG $0x546ffec5; WORD $0x207a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 32]
	LONG $0xc8f9f5c5                           // vpsubw    ymm1, ymm1, ymm0
	LONG $0xd0f9edc5                           // vpsubw    ymm2, ymm2, ymm0
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4c6ffec5; WORD $0x407a             // vmovdqu    ymm1, yword [rdx + 2*rdi + 64]
	LONG $0x546ffec5; WORD $0x607a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 96]
	LONG $0xc8f9f5c5                           // vpsubw    ymm1, ymm1, ymm0
	LONG $0xd0f9edc5                           // vpsubw    ymm2, ymm2, ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_535
	JMP  LBB1_886

LBB1_536:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_893
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_538:
	LONG $0x0c6ffec5; BYTE $0x7a               // vmovdqu    ymm1, yword [rdx + 2*rdi]
	LONG $0x546ffec5; WORD $0x207a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 32]
	LONG $0xc8f9f5c5                           // vpsubw    ymm1, ymm1, ymm0
	LONG $0xd0f9edc5                           // vpsubw    ymm2, ymm2, ymm0
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4c6ffec5; WORD $0x407a             // vmovdqu    ymm1, yword [rdx + 2*rdi + 64]
	LONG $0x546ffec5; WORD $0x607a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 96]
	LONG $0xc8f9f5c5                           // vpsubw    ymm1, ymm1, ymm0
	LONG $0xd0f9edc5                           // vpsubw    ymm2, ymm2, ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_538
	JMP  LBB1_894

LBB1_539:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_901
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_541:
	LONG $0x0cfdfdc5; BYTE $0x7a               // vpaddw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x54fdfdc5; WORD $0x207a             // vpaddw    ymm2, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cfdfdc5; WORD $0x407a             // vpaddw    ymm1, ymm0, yword [rdx + 2*rdi + 64]
	LONG $0x54fdfdc5; WORD $0x607a             // vpaddw    ymm2, ymm0, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_541
	JMP  LBB1_902

LBB1_542:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_909
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_544:
	LONG $0x0cfdfdc5; BYTE $0x7a               // vpaddw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x54fdfdc5; WORD $0x207a             // vpaddw    ymm2, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cfdfdc5; WORD $0x407a             // vpaddw    ymm1, ymm0, yword [rdx + 2*rdi + 64]
	LONG $0x54fdfdc5; WORD $0x607a             // vpaddw    ymm2, ymm0, yword [rdx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_544
	JMP  LBB1_910

LBB1_545:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_917
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_547:
	LONG $0x0c6ffec5; BYTE $0x7a               // vmovdqu    ymm1, yword [rdx + 2*rdi]
	LONG $0x546ffec5; WORD $0x207a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 32]
	LONG $0xc8f9f5c5                           // vpsubw    ymm1, ymm1, ymm0
	LONG $0xd0f9edc5                           // vpsubw    ymm2, ymm2, ymm0
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4c6ffec5; WORD $0x407a             // vmovdqu    ymm1, yword [rdx + 2*rdi + 64]
	LONG $0x546ffec5; WORD $0x607a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 96]
	LONG $0xc8f9f5c5                           // vpsubw    ymm1, ymm1, ymm0
	LONG $0xd0f9edc5                           // vpsubw    ymm2, ymm2, ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_547
	JMP  LBB1_918

LBB1_548:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_925
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_550:
	LONG $0x0c6ffec5; BYTE $0x7a               // vmovdqu    ymm1, yword [rdx + 2*rdi]
	LONG $0x546ffec5; WORD $0x207a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 32]
	LONG $0xc8f9f5c5                           // vpsubw    ymm1, ymm1, ymm0
	LONG $0xd0f9edc5                           // vpsubw    ymm2, ymm2, ymm0
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4c6ffec5; WORD $0x407a             // vmovdqu    ymm1, yword [rdx + 2*rdi + 64]
	LONG $0x546ffec5; WORD $0x607a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 96]
	LONG $0xc8f9f5c5                           // vpsubw    ymm1, ymm1, ymm0
	LONG $0xd0f9edc5                           // vpsubw    ymm2, ymm2, ymm0
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_550
	JMP  LBB1_926

LBB1_551:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	LONG $0xd073f5c5; BYTE $0x20 // vpsrlq    ymm1, ymm0, 32
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_933
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_553:
	LONG $0x146ffec5; BYTE $0xfa               // vmovdqu    ymm2, yword [rdx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60fa             // vmovdqu    ymm5, yword [rdx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf86c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080fa946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0faa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faac6ffec5; BYTE $0x00       // vmovdqu    ymm5, yword [rdx + 8*rdi + 224]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	QUAD $0x0080f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_553
	JMP  LBB1_934

LBB1_554:
	WORD $0xc189                 // mov    ecx, eax
	WORD $0xe183; BYTE $0xe0     // and    ecx, -32
	LONG $0x187de2c4; BYTE $0xc8 // vbroadcastss    ymm1, xmm0
	LONG $0xe0718d48             // lea    rsi, [rcx - 32]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB1_941
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB1_556:
	LONG $0x1459f4c5; BYTE $0xba               // vmulps    ymm2, ymm1, yword [rdx + 4*rdi]
	LONG $0x5c59f4c5; WORD $0x20ba             // vmulps    ymm3, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x6459f4c5; WORD $0x40ba             // vmulps    ymm4, ymm1, yword [rdx + 4*rdi + 64]
	LONG $0x6c59f4c5; WORD $0x60ba             // vmulps    ymm5, ymm1, yword [rdx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb86c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm5
	QUAD $0x000080ba9459f4c5; BYTE $0x00       // vmulps    ymm2, ymm1, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba9c59f4c5; BYTE $0x00       // vmulps    ymm3, ymm1, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0baa459f4c5; BYTE $0x00       // vmulps    ymm4, ymm1, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baac59f4c5; BYTE $0x00       // vmulps    ymm5, ymm1, yword [rdx + 4*rdi + 224]
	QUAD $0x0080b894117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 128], ymm2
	QUAD $0x00a0b89c117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 160], ymm3
	QUAD $0x00c0b8a4117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 192], ymm4
	QUAD $0x00e0b8ac117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 224], ymm5
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB1_556
	JMP  LBB1_942

LBB1_557:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	LONG $0xd073f5c5; BYTE $0x20 // vpsrlq    ymm1, ymm0, 32
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_949
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_559:
	LONG $0x146ffec5; BYTE $0xfa               // vmovdqu    ymm2, yword [rdx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60fa             // vmovdqu    ymm5, yword [rdx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf86c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080fa946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0faa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faac6ffec5; BYTE $0x00       // vmovdqu    ymm5, yword [rdx + 8*rdi + 224]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	QUAD $0x0080f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_559
	JMP  LBB1_950

LBB1_560:
	WORD $0xc189                 // mov    ecx, eax
	WORD $0xe183; BYTE $0xe0     // and    ecx, -32
	LONG $0x187de2c4; BYTE $0xc8 // vbroadcastss    ymm1, xmm0
	LONG $0xe0718d48             // lea    rsi, [rcx - 32]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB1_957
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB1_562:
	LONG $0x1459f4c5; BYTE $0xba               // vmulps    ymm2, ymm1, yword [rdx + 4*rdi]
	LONG $0x5c59f4c5; WORD $0x20ba             // vmulps    ymm3, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x6459f4c5; WORD $0x40ba             // vmulps    ymm4, ymm1, yword [rdx + 4*rdi + 64]
	LONG $0x6c59f4c5; WORD $0x60ba             // vmulps    ymm5, ymm1, yword [rdx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb86c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm5
	QUAD $0x000080ba9459f4c5; BYTE $0x00       // vmulps    ymm2, ymm1, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba9c59f4c5; BYTE $0x00       // vmulps    ymm3, ymm1, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0baa459f4c5; BYTE $0x00       // vmulps    ymm4, ymm1, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baac59f4c5; BYTE $0x00       // vmulps    ymm5, ymm1, yword [rdx + 4*rdi + 224]
	QUAD $0x0080b894117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 128], ymm2
	QUAD $0x00a0b89c117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 160], ymm3
	QUAD $0x00c0b8a4117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 192], ymm4
	QUAD $0x00e0b8ac117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 224], ymm5
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB1_562
	JMP  LBB1_958

LBB1_563:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_965
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_565:
	LONG $0x0cd4fdc5; BYTE $0xfa               // vpaddq    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20fa             // vpaddq    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40fa             // vpaddq    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x64d4fdc5; WORD $0x60fa             // vpaddq    ymm4, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080fa8cd4fdc5; BYTE $0x00       // vpaddq    ymm1, ymm0, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa94d4fdc5; BYTE $0x00       // vpaddq    ymm2, ymm0, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9cd4fdc5; BYTE $0x00       // vpaddq    ymm3, ymm0, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faa4d4fdc5; BYTE $0x00       // vpaddq    ymm4, ymm0, yword [rdx + 8*rdi + 224]
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_565
	JMP  LBB1_966

LBB1_566:
	WORD $0xc189                 // mov    ecx, eax
	WORD $0xe183; BYTE $0xe0     // and    ecx, -32
	LONG $0x187de2c4; BYTE $0xc8 // vbroadcastss    ymm1, xmm0
	LONG $0xe0718d48             // lea    rsi, [rcx - 32]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB1_973
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB1_568:
	LONG $0x1458f4c5; BYTE $0xba               // vaddps    ymm2, ymm1, yword [rdx + 4*rdi]
	LONG $0x5c58f4c5; WORD $0x20ba             // vaddps    ymm3, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x6458f4c5; WORD $0x40ba             // vaddps    ymm4, ymm1, yword [rdx + 4*rdi + 64]
	LONG $0x6c58f4c5; WORD $0x60ba             // vaddps    ymm5, ymm1, yword [rdx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb86c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm5
	QUAD $0x000080ba9458f4c5; BYTE $0x00       // vaddps    ymm2, ymm1, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba9c58f4c5; BYTE $0x00       // vaddps    ymm3, ymm1, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0baa458f4c5; BYTE $0x00       // vaddps    ymm4, ymm1, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baac58f4c5; BYTE $0x00       // vaddps    ymm5, ymm1, yword [rdx + 4*rdi + 224]
	QUAD $0x0080b894117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 128], ymm2
	QUAD $0x00a0b89c117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 160], ymm3
	QUAD $0x00c0b8a4117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 192], ymm4
	QUAD $0x00e0b8ac117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 224], ymm5
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB1_568
	JMP  LBB1_974

LBB1_569:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_981
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_571:
	LONG $0x0c6ffec5; BYTE $0xfa               // vmovdqu    ymm1, yword [rdx + 8*rdi]
	LONG $0x546ffec5; WORD $0x20fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 64]
	LONG $0x646ffec5; WORD $0x60fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 96]
	LONG $0xc8fbf5c5                           // vpsubq    ymm1, ymm1, ymm0
	LONG $0xd0fbedc5                           // vpsubq    ymm2, ymm2, ymm0
	LONG $0xd8fbe5c5                           // vpsubq    ymm3, ymm3, ymm0
	LONG $0xe0fbddc5                           // vpsubq    ymm4, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080fa8c6ffec5; BYTE $0x00       // vmovdqu    ymm1, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + 8*rdi + 224]
	LONG $0xc8fbf5c5                           // vpsubq    ymm1, ymm1, ymm0
	LONG $0xd0fbedc5                           // vpsubq    ymm2, ymm2, ymm0
	LONG $0xd8fbe5c5                           // vpsubq    ymm3, ymm3, ymm0
	LONG $0xe0fbddc5                           // vpsubq    ymm4, ymm4, ymm0
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_571
	JMP  LBB1_982

LBB1_572:
	WORD $0xc189                 // mov    ecx, eax
	WORD $0xe183; BYTE $0xe0     // and    ecx, -32
	LONG $0x187de2c4; BYTE $0xc8 // vbroadcastss    ymm1, xmm0
	LONG $0xe0718d48             // lea    rsi, [rcx - 32]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB1_989
	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
	LONG $0xfee78348             // and    rdi, -2
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	WORD $0xf631                 // xor    esi, esi

LBB1_574:
	LONG $0x1410fcc5; BYTE $0xb2               // vmovups    ymm2, yword [rdx + 4*rsi]
	LONG $0x5c10fcc5; WORD $0x20b2             // vmovups    ymm3, yword [rdx + 4*rsi + 32]
	LONG $0x6410fcc5; WORD $0x40b2             // vmovups    ymm4, yword [rdx + 4*rsi + 64]
	LONG $0x6c10fcc5; WORD $0x60b2             // vmovups    ymm5, yword [rdx + 4*rsi + 96]
	LONG $0xd15cecc5                           // vsubps    ymm2, ymm2, ymm1
	LONG $0xd95ce4c5                           // vsubps    ymm3, ymm3, ymm1
	LONG $0xe15cdcc5                           // vsubps    ymm4, ymm4, ymm1
	LONG $0xe95cd4c5                           // vsubps    ymm5, ymm5, ymm1
	LONG $0x117cc1c4; WORD $0xb014             // vmovups    yword [r8 + 4*rsi], ymm2
	LONG $0x117cc1c4; WORD $0xb05c; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb06c; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm5
	QUAD $0x000080b29410fcc5; BYTE $0x00       // vmovups    ymm2, yword [rdx + 4*rsi + 128]
	QUAD $0x0000a0b29c10fcc5; BYTE $0x00       // vmovups    ymm3, yword [rdx + 4*rsi + 160]
	QUAD $0x0000c0b2a410fcc5; BYTE $0x00       // vmovups    ymm4, yword [rdx + 4*rsi + 192]
	QUAD $0x0000e0b2ac10fcc5; BYTE $0x00       // vmovups    ymm5, yword [rdx + 4*rsi + 224]
	LONG $0xd15cecc5                           // vsubps    ymm2, ymm2, ymm1
	LONG $0xd95ce4c5                           // vsubps    ymm3, ymm3, ymm1
	LONG $0xe15cdcc5                           // vsubps    ymm4, ymm4, ymm1
	LONG $0xe95cd4c5                           // vsubps    ymm5, ymm5, ymm1
	QUAD $0x0080b094117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rsi + 128], ymm2
	QUAD $0x00a0b09c117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rsi + 160], ymm3
	QUAD $0x00c0b0a4117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rsi + 192], ymm4
	QUAD $0x00e0b0ac117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rsi + 224], ymm5
	LONG $0x40c68348                           // add    rsi, 64
	LONG $0x02c78348                           // add    rdi, 2
	JNE  LBB1_574
	JMP  LBB1_990

LBB1_575:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_997
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_577:
	LONG $0x0cd4fdc5; BYTE $0xfa               // vpaddq    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20fa             // vpaddq    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40fa             // vpaddq    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x64d4fdc5; WORD $0x60fa             // vpaddq    ymm4, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080fa8cd4fdc5; BYTE $0x00       // vpaddq    ymm1, ymm0, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa94d4fdc5; BYTE $0x00       // vpaddq    ymm2, ymm0, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9cd4fdc5; BYTE $0x00       // vpaddq    ymm3, ymm0, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faa4d4fdc5; BYTE $0x00       // vpaddq    ymm4, ymm0, yword [rdx + 8*rdi + 224]
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_577
	JMP  LBB1_998

LBB1_578:
	WORD $0xc189                 // mov    ecx, eax
	WORD $0xe183; BYTE $0xe0     // and    ecx, -32
	LONG $0x187de2c4; BYTE $0xc8 // vbroadcastss    ymm1, xmm0
	LONG $0xe0718d48             // lea    rsi, [rcx - 32]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB1_1005
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB1_580:
	LONG $0x1458f4c5; BYTE $0xba               // vaddps    ymm2, ymm1, yword [rdx + 4*rdi]
	LONG $0x5c58f4c5; WORD $0x20ba             // vaddps    ymm3, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x6458f4c5; WORD $0x40ba             // vaddps    ymm4, ymm1, yword [rdx + 4*rdi + 64]
	LONG $0x6c58f4c5; WORD $0x60ba             // vaddps    ymm5, ymm1, yword [rdx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb86c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm5
	QUAD $0x000080ba9458f4c5; BYTE $0x00       // vaddps    ymm2, ymm1, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba9c58f4c5; BYTE $0x00       // vaddps    ymm3, ymm1, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0baa458f4c5; BYTE $0x00       // vaddps    ymm4, ymm1, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baac58f4c5; BYTE $0x00       // vaddps    ymm5, ymm1, yword [rdx + 4*rdi + 224]
	QUAD $0x0080b894117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 128], ymm2
	QUAD $0x00a0b89c117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 160], ymm3
	QUAD $0x00c0b8a4117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 192], ymm4
	QUAD $0x00e0b8ac117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 224], ymm5
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB1_580
	JMP  LBB1_1006

LBB1_581:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_1013
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_583:
	LONG $0x0c6ffec5; BYTE $0xfa               // vmovdqu    ymm1, yword [rdx + 8*rdi]
	LONG $0x546ffec5; WORD $0x20fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 64]
	LONG $0x646ffec5; WORD $0x60fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 96]
	LONG $0xc8fbf5c5                           // vpsubq    ymm1, ymm1, ymm0
	LONG $0xd0fbedc5                           // vpsubq    ymm2, ymm2, ymm0
	LONG $0xd8fbe5c5                           // vpsubq    ymm3, ymm3, ymm0
	LONG $0xe0fbddc5                           // vpsubq    ymm4, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080fa8c6ffec5; BYTE $0x00       // vmovdqu    ymm1, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + 8*rdi + 224]
	LONG $0xc8fbf5c5                           // vpsubq    ymm1, ymm1, ymm0
	LONG $0xd0fbedc5                           // vpsubq    ymm2, ymm2, ymm0
	LONG $0xd8fbe5c5                           // vpsubq    ymm3, ymm3, ymm0
	LONG $0xe0fbddc5                           // vpsubq    ymm4, ymm4, ymm0
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_583
	JMP  LBB1_1014

LBB1_584:
	WORD $0xc189                 // mov    ecx, eax
	WORD $0xe183; BYTE $0xe0     // and    ecx, -32
	LONG $0x187de2c4; BYTE $0xc8 // vbroadcastss    ymm1, xmm0
	LONG $0xe0718d48             // lea    rsi, [rcx - 32]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB1_1021
	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
	LONG $0xfee78348             // and    rdi, -2
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	WORD $0xf631                 // xor    esi, esi

LBB1_586:
	LONG $0x1410fcc5; BYTE $0xb2               // vmovups    ymm2, yword [rdx + 4*rsi]
	LONG $0x5c10fcc5; WORD $0x20b2             // vmovups    ymm3, yword [rdx + 4*rsi + 32]
	LONG $0x6410fcc5; WORD $0x40b2             // vmovups    ymm4, yword [rdx + 4*rsi + 64]
	LONG $0x6c10fcc5; WORD $0x60b2             // vmovups    ymm5, yword [rdx + 4*rsi + 96]
	LONG $0xd15cecc5                           // vsubps    ymm2, ymm2, ymm1
	LONG $0xd95ce4c5                           // vsubps    ymm3, ymm3, ymm1
	LONG $0xe15cdcc5                           // vsubps    ymm4, ymm4, ymm1
	LONG $0xe95cd4c5                           // vsubps    ymm5, ymm5, ymm1
	LONG $0x117cc1c4; WORD $0xb014             // vmovups    yword [r8 + 4*rsi], ymm2
	LONG $0x117cc1c4; WORD $0xb05c; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb06c; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm5
	QUAD $0x000080b29410fcc5; BYTE $0x00       // vmovups    ymm2, yword [rdx + 4*rsi + 128]
	QUAD $0x0000a0b29c10fcc5; BYTE $0x00       // vmovups    ymm3, yword [rdx + 4*rsi + 160]
	QUAD $0x0000c0b2a410fcc5; BYTE $0x00       // vmovups    ymm4, yword [rdx + 4*rsi + 192]
	QUAD $0x0000e0b2ac10fcc5; BYTE $0x00       // vmovups    ymm5, yword [rdx + 4*rsi + 224]
	LONG $0xd15cecc5                           // vsubps    ymm2, ymm2, ymm1
	LONG $0xd95ce4c5                           // vsubps    ymm3, ymm3, ymm1
	LONG $0xe15cdcc5                           // vsubps    ymm4, ymm4, ymm1
	LONG $0xe95cd4c5                           // vsubps    ymm5, ymm5, ymm1
	QUAD $0x0080b094117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rsi + 128], ymm2
	QUAD $0x00a0b09c117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rsi + 160], ymm3
	QUAD $0x00c0b0a4117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rsi + 192], ymm4
	QUAD $0x00e0b0ac117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rsi + 224], ymm5
	LONG $0x40c68348                           // add    rsi, 64
	LONG $0x02c78348                           // add    rdi, 2
	JNE  LBB1_586
	JMP  LBB1_1022

LBB1_587:
	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
	WORD $0xe783; BYTE $0xe0     // and    edi, -32
	LONG $0xc16ef9c5             // vmovd    xmm0, ecx
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0xe0778d48             // lea    rsi, [rdi - 32]
	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
	LONG $0x05e8c148             // shr    rax, 5
	LONG $0x01c08348             // add    rax, 1
	WORD $0x8941; BYTE $0xc1     // mov    r9d, eax
	LONG $0x03e18341             // and    r9d, 3
	LONG $0x60fe8348             // cmp    rsi, 96
	JAE  LBB1_641
	WORD $0xf631                 // xor    esi, esi
	JMP  LBB1_643

LBB1_589:
	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
	WORD $0xe783; BYTE $0xe0     // and    edi, -32
	LONG $0xc16ef9c5             // vmovd    xmm0, ecx
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0xe0778d48             // lea    rsi, [rdi - 32]
	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
	LONG $0x05e8c148             // shr    rax, 5
	LONG $0x01c08348             // add    rax, 1
	WORD $0x8941; BYTE $0xc1     // mov    r9d, eax
	LONG $0x03e18341             // and    r9d, 3
	LONG $0x60fe8348             // cmp    rsi, 96
	JAE  LBB1_651
	WORD $0xf631                 // xor    esi, esi
	JMP  LBB1_653

LBB1_591:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x804e8d48             // lea    rcx, [rsi - 128]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_1029
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_593:
	LONG $0x0cfcfdc5; BYTE $0x3a               // vpaddb    ymm1, ymm0, yword [rdx + rdi]
	LONG $0x54fcfdc5; WORD $0x203a             // vpaddb    ymm2, ymm0, yword [rdx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x403a             // vpaddb    ymm3, ymm0, yword [rdx + rdi + 64]
	LONG $0x64fcfdc5; WORD $0x603a             // vpaddb    ymm4, ymm0, yword [rdx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x0000803a8cfcfdc5; BYTE $0x00       // vpaddb    ymm1, ymm0, yword [rdx + rdi + 128]
	QUAD $0x0000a03a94fcfdc5; BYTE $0x00       // vpaddb    ymm2, ymm0, yword [rdx + rdi + 160]
	QUAD $0x0000c03a9cfcfdc5; BYTE $0x00       // vpaddb    ymm3, ymm0, yword [rdx + rdi + 192]
	QUAD $0x0000e03aa4fcfdc5; BYTE $0x00       // vpaddb    ymm4, ymm0, yword [rdx + rdi + 224]
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_593
	JMP  LBB1_1030

LBB1_594:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x804e8d48             // lea    rcx, [rsi - 128]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_1037
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_596:
	LONG $0x0c6ffec5; BYTE $0x3a               // vmovdqu    ymm1, yword [rdx + rdi]
	LONG $0x546ffec5; WORD $0x203a             // vmovdqu    ymm2, yword [rdx + rdi + 32]
	LONG $0x5c6ffec5; WORD $0x403a             // vmovdqu    ymm3, yword [rdx + rdi + 64]
	LONG $0x646ffec5; WORD $0x603a             // vmovdqu    ymm4, yword [rdx + rdi + 96]
	LONG $0xc8f8f5c5                           // vpsubb    ymm1, ymm1, ymm0
	LONG $0xd0f8edc5                           // vpsubb    ymm2, ymm2, ymm0
	LONG $0xd8f8e5c5                           // vpsubb    ymm3, ymm3, ymm0
	LONG $0xe0f8ddc5                           // vpsubb    ymm4, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x0000803a8c6ffec5; BYTE $0x00       // vmovdqu    ymm1, yword [rdx + rdi + 128]
	QUAD $0x0000a03a946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + rdi + 160]
	QUAD $0x0000c03a9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + rdi + 192]
	QUAD $0x0000e03aa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + rdi + 224]
	LONG $0xc8f8f5c5                           // vpsubb    ymm1, ymm1, ymm0
	LONG $0xd0f8edc5                           // vpsubb    ymm2, ymm2, ymm0
	LONG $0xd8f8e5c5                           // vpsubb    ymm3, ymm3, ymm0
	LONG $0xe0f8ddc5                           // vpsubb    ymm4, ymm4, ymm0
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_596
	JMP  LBB1_1038

LBB1_597:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x804e8d48             // lea    rcx, [rsi - 128]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_1045
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_599:
	LONG $0x0cfcfdc5; BYTE $0x3a               // vpaddb    ymm1, ymm0, yword [rdx + rdi]
	LONG $0x54fcfdc5; WORD $0x203a             // vpaddb    ymm2, ymm0, yword [rdx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x403a             // vpaddb    ymm3, ymm0, yword [rdx + rdi + 64]
	LONG $0x64fcfdc5; WORD $0x603a             // vpaddb    ymm4, ymm0, yword [rdx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x0000803a8cfcfdc5; BYTE $0x00       // vpaddb    ymm1, ymm0, yword [rdx + rdi + 128]
	QUAD $0x0000a03a94fcfdc5; BYTE $0x00       // vpaddb    ymm2, ymm0, yword [rdx + rdi + 160]
	QUAD $0x0000c03a9cfcfdc5; BYTE $0x00       // vpaddb    ymm3, ymm0, yword [rdx + rdi + 192]
	QUAD $0x0000e03aa4fcfdc5; BYTE $0x00       // vpaddb    ymm4, ymm0, yword [rdx + rdi + 224]
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_599
	JMP  LBB1_1046

LBB1_600:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x804e8d48             // lea    rcx, [rsi - 128]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_1053
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_602:
	LONG $0x0c6ffec5; BYTE $0x3a               // vmovdqu    ymm1, yword [rdx + rdi]
	LONG $0x546ffec5; WORD $0x203a             // vmovdqu    ymm2, yword [rdx + rdi + 32]
	LONG $0x5c6ffec5; WORD $0x403a             // vmovdqu    ymm3, yword [rdx + rdi + 64]
	LONG $0x646ffec5; WORD $0x603a             // vmovdqu    ymm4, yword [rdx + rdi + 96]
	LONG $0xc8f8f5c5                           // vpsubb    ymm1, ymm1, ymm0
	LONG $0xd0f8edc5                           // vpsubb    ymm2, ymm2, ymm0
	LONG $0xd8f8e5c5                           // vpsubb    ymm3, ymm3, ymm0
	LONG $0xe0f8ddc5                           // vpsubb    ymm4, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x0000803a8c6ffec5; BYTE $0x00       // vmovdqu    ymm1, yword [rdx + rdi + 128]
	QUAD $0x0000a03a946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + rdi + 160]
	QUAD $0x0000c03a9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + rdi + 192]
	QUAD $0x0000e03aa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + rdi + 224]
	LONG $0xc8f8f5c5                           // vpsubb    ymm1, ymm1, ymm0
	LONG $0xd0f8edc5                           // vpsubb    ymm2, ymm2, ymm0
	LONG $0xd8f8e5c5                           // vpsubb    ymm3, ymm3, ymm0
	LONG $0xe0f8ddc5                           // vpsubb    ymm4, ymm4, ymm0
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_602
	JMP  LBB1_1054

LBB1_603:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_1061
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_605:
	LONG $0x407de2c4; WORD $0xba0c             // vpmulld    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x407de2c4; WORD $0xba54; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xba5c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xba64; BYTE $0x60 // vpmulld    ymm4, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x0080ba8c407de2c4; WORD $0x0000     // vpmulld    ymm1, ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x00a0ba94407de2c4; WORD $0x0000     // vpmulld    ymm2, ymm0, yword [rdx + 4*rdi + 160]
	QUAD $0x00c0ba9c407de2c4; WORD $0x0000     // vpmulld    ymm3, ymm0, yword [rdx + 4*rdi + 192]
	QUAD $0x00e0baa4407de2c4; WORD $0x0000     // vpmulld    ymm4, ymm0, yword [rdx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_605
	JMP  LBB1_1062

LBB1_606:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_1069
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_608:
	LONG $0x407de2c4; WORD $0xba0c             // vpmulld    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x407de2c4; WORD $0xba54; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xba5c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xba64; BYTE $0x60 // vpmulld    ymm4, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x0080ba8c407de2c4; WORD $0x0000     // vpmulld    ymm1, ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x00a0ba94407de2c4; WORD $0x0000     // vpmulld    ymm2, ymm0, yword [rdx + 4*rdi + 160]
	QUAD $0x00c0ba9c407de2c4; WORD $0x0000     // vpmulld    ymm3, ymm0, yword [rdx + 4*rdi + 192]
	QUAD $0x00e0baa4407de2c4; WORD $0x0000     // vpmulld    ymm4, ymm0, yword [rdx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_608
	JMP  LBB1_1070

LBB1_609:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_1077
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_611:
	LONG $0x0cfefdc5; BYTE $0xba               // vpaddd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20ba             // vpaddd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40ba             // vpaddd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x64fefdc5; WORD $0x60ba             // vpaddd    ymm4, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080ba8cfefdc5; BYTE $0x00       // vpaddd    ymm1, ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba94fefdc5; BYTE $0x00       // vpaddd    ymm2, ymm0, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9cfefdc5; BYTE $0x00       // vpaddd    ymm3, ymm0, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baa4fefdc5; BYTE $0x00       // vpaddd    ymm4, ymm0, yword [rdx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_611
	JMP  LBB1_1078

LBB1_612:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_1085
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_614:
	LONG $0x0c6ffec5; BYTE $0xba               // vmovdqu    ymm1, yword [rdx + 4*rdi]
	LONG $0x546ffec5; WORD $0x20ba             // vmovdqu    ymm2, yword [rdx + 4*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40ba             // vmovdqu    ymm3, yword [rdx + 4*rdi + 64]
	LONG $0x646ffec5; WORD $0x60ba             // vmovdqu    ymm4, yword [rdx + 4*rdi + 96]
	LONG $0xc8faf5c5                           // vpsubd    ymm1, ymm1, ymm0
	LONG $0xd0faedc5                           // vpsubd    ymm2, ymm2, ymm0
	LONG $0xd8fae5c5                           // vpsubd    ymm3, ymm3, ymm0
	LONG $0xe0faddc5                           // vpsubd    ymm4, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080ba8c6ffec5; BYTE $0x00       // vmovdqu    ymm1, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + 4*rdi + 224]
	LONG $0xc8faf5c5                           // vpsubd    ymm1, ymm1, ymm0
	LONG $0xd0faedc5                           // vpsubd    ymm2, ymm2, ymm0
	LONG $0xd8fae5c5                           // vpsubd    ymm3, ymm3, ymm0
	LONG $0xe0faddc5                           // vpsubd    ymm4, ymm4, ymm0
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_614
	JMP  LBB1_1086

LBB1_615:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_1093
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_617:
	LONG $0x0cfefdc5; BYTE $0xba               // vpaddd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20ba             // vpaddd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40ba             // vpaddd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x64fefdc5; WORD $0x60ba             // vpaddd    ymm4, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080ba8cfefdc5; BYTE $0x00       // vpaddd    ymm1, ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba94fefdc5; BYTE $0x00       // vpaddd    ymm2, ymm0, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9cfefdc5; BYTE $0x00       // vpaddd    ymm3, ymm0, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baa4fefdc5; BYTE $0x00       // vpaddd    ymm4, ymm0, yword [rdx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_617
	JMP  LBB1_1094

LBB1_618:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
	JE   LBB1_1101
	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
	LONG $0xfee18348             // and    rcx, -2
	WORD $0xf748; BYTE $0xd9     // neg    rcx
	WORD $0xff31                 // xor    edi, edi

LBB1_620:
	LONG $0x0c6ffec5; BYTE $0xba               // vmovdqu    ymm1, yword [rdx + 4*rdi]
	LONG $0x546ffec5; WORD $0x20ba             // vmovdqu    ymm2, yword [rdx + 4*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40ba             // vmovdqu    ymm3, yword [rdx + 4*rdi + 64]
	LONG $0x646ffec5; WORD $0x60ba             // vmovdqu    ymm4, yword [rdx + 4*rdi + 96]
	LONG $0xc8faf5c5                           // vpsubd    ymm1, ymm1, ymm0
	LONG $0xd0faedc5                           // vpsubd    ymm2, ymm2, ymm0
	LONG $0xd8fae5c5                           // vpsubd    ymm3, ymm3, ymm0
	LONG $0xe0faddc5                           // vpsubd    ymm4, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080ba8c6ffec5; BYTE $0x00       // vmovdqu    ymm1, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baa46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rdx + 4*rdi + 224]
	LONG $0xc8faf5c5                           // vpsubd    ymm1, ymm1, ymm0
	LONG $0xd0faedc5                           // vpsubd    ymm2, ymm2, ymm0
	LONG $0xd8fae5c5                           // vpsubd    ymm3, ymm3, ymm0
	LONG $0xe0faddc5                           // vpsubd    ymm4, ymm4, ymm0
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c18348                           // add    rcx, 2
	JNE  LBB1_620
	JMP  LBB1_1102

LBB1_621:
	LONG $0xfce08348             // and    rax, -4
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI1_0] */
	LONG $0xd860fdc5             // vpunpcklbw    ymm3, ymm0, ymm0

LBB1_622:
	LONG $0x246ffec5; BYTE $0x32               // vmovdqu    ymm4, yword [rdx + rsi]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3024             // vmovdqu    yword [r8 + rsi], ymm4
	LONG $0x646ffec5; WORD $0x2032             // vmovdqu    ymm4, yword [rdx + rsi + 32]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm4
	LONG $0x646ffec5; WORD $0x4032             // vmovdqu    ymm4, yword [rdx + rsi + 64]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm4
	LONG $0x646ffec5; WORD $0x6032             // vmovdqu    ymm4, yword [rdx + rsi + 96]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm4
	LONG $0x80ee8348                           // sub    rsi, -128
	LONG $0x04c08348                           // add    rax, 4
	JNE  LBB1_622

LBB1_623:
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	JE   LBB1_626
	WORD $0xf749; BYTE $0xd9     // neg    r9
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI1_0] */
	LONG $0xc060fdc5             // vpunpcklbw    ymm0, ymm0, ymm0

LBB1_625:
	LONG $0x1c6ffec5; BYTE $0x32   // vmovdqu    ymm3, yword [rdx + rsi]
	LONG $0xe368e5c5               // vpunpckhbw    ymm4, ymm3, ymm3
	LONG $0xe1d5ddc5               // vpmullw    ymm4, ymm4, ymm1
	LONG $0xe2dbddc5               // vpand    ymm4, ymm4, ymm2
	LONG $0xdb60e5c5               // vpunpcklbw    ymm3, ymm3, ymm3
	LONG $0xd8d5e5c5               // vpmullw    ymm3, ymm3, ymm0
	LONG $0xdadbe5c5               // vpand    ymm3, ymm3, ymm2
	LONG $0xdc67e5c5               // vpackuswb    ymm3, ymm3, ymm4
	LONG $0x7f7ec1c4; WORD $0x301c // vmovdqu    yword [r8 + rsi], ymm3
	LONG $0x20c68348               // add    rsi, 32
	WORD $0xff49; BYTE $0xc1       // inc    r9
	JNE  LBB1_625

LBB1_626:
	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
	JE   LBB1_1109
	JMP  LBB1_627

LBB1_631:
	LONG $0xfce08348             // and    rax, -4
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI1_0] */
	LONG $0xd860fdc5             // vpunpcklbw    ymm3, ymm0, ymm0

LBB1_632:
	LONG $0x246ffec5; BYTE $0x32               // vmovdqu    ymm4, yword [rdx + rsi]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3024             // vmovdqu    yword [r8 + rsi], ymm4
	LONG $0x646ffec5; WORD $0x2032             // vmovdqu    ymm4, yword [rdx + rsi + 32]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm4
	LONG $0x646ffec5; WORD $0x4032             // vmovdqu    ymm4, yword [rdx + rsi + 64]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm4
	LONG $0x646ffec5; WORD $0x6032             // vmovdqu    ymm4, yword [rdx + rsi + 96]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm4
	LONG $0x80ee8348                           // sub    rsi, -128
	LONG $0x04c08348                           // add    rax, 4
	JNE  LBB1_632

LBB1_633:
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	JE   LBB1_636
	WORD $0xf749; BYTE $0xd9     // neg    r9
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI1_0] */
	LONG $0xc060fdc5             // vpunpcklbw    ymm0, ymm0, ymm0

LBB1_635:
	LONG $0x1c6ffec5; BYTE $0x32   // vmovdqu    ymm3, yword [rdx + rsi]
	LONG $0xe368e5c5               // vpunpckhbw    ymm4, ymm3, ymm3
	LONG $0xe1d5ddc5               // vpmullw    ymm4, ymm4, ymm1
	LONG $0xe2dbddc5               // vpand    ymm4, ymm4, ymm2
	LONG $0xdb60e5c5               // vpunpcklbw    ymm3, ymm3, ymm3
	LONG $0xd8d5e5c5               // vpmullw    ymm3, ymm3, ymm0
	LONG $0xdadbe5c5               // vpand    ymm3, ymm3, ymm2
	LONG $0xdc67e5c5               // vpackuswb    ymm3, ymm3, ymm4
	LONG $0x7f7ec1c4; WORD $0x301c // vmovdqu    yword [r8 + rsi], ymm3
	LONG $0x20c68348               // add    rsi, 32
	WORD $0xff49; BYTE $0xc1       // inc    r9
	JNE  LBB1_635

LBB1_636:
	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
	JE   LBB1_1109
	JMP  LBB1_637

LBB1_641:
	LONG $0xfce08348             // and    rax, -4
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI1_0] */
	LONG $0xd860fdc5             // vpunpcklbw    ymm3, ymm0, ymm0

LBB1_642:
	LONG $0x246ffec5; BYTE $0x32               // vmovdqu    ymm4, yword [rdx + rsi]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3024             // vmovdqu    yword [r8 + rsi], ymm4
	LONG $0x646ffec5; WORD $0x2032             // vmovdqu    ymm4, yword [rdx + rsi + 32]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm4
	LONG $0x646ffec5; WORD $0x4032             // vmovdqu    ymm4, yword [rdx + rsi + 64]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm4
	LONG $0x646ffec5; WORD $0x6032             // vmovdqu    ymm4, yword [rdx + rsi + 96]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm4
	LONG $0x80ee8348                           // sub    rsi, -128
	LONG $0x04c08348                           // add    rax, 4
	JNE  LBB1_642

LBB1_643:
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	JE   LBB1_646
	WORD $0xf749; BYTE $0xd9     // neg    r9
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI1_0] */
	LONG $0xc060fdc5             // vpunpcklbw    ymm0, ymm0, ymm0

LBB1_645:
	LONG $0x1c6ffec5; BYTE $0x32   // vmovdqu    ymm3, yword [rdx + rsi]
	LONG $0xe368e5c5               // vpunpckhbw    ymm4, ymm3, ymm3
	LONG $0xe1d5ddc5               // vpmullw    ymm4, ymm4, ymm1
	LONG $0xe2dbddc5               // vpand    ymm4, ymm4, ymm2
	LONG $0xdb60e5c5               // vpunpcklbw    ymm3, ymm3, ymm3
	LONG $0xd8d5e5c5               // vpmullw    ymm3, ymm3, ymm0
	LONG $0xdadbe5c5               // vpand    ymm3, ymm3, ymm2
	LONG $0xdc67e5c5               // vpackuswb    ymm3, ymm3, ymm4
	LONG $0x7f7ec1c4; WORD $0x301c // vmovdqu    yword [r8 + rsi], ymm3
	LONG $0x20c68348               // add    rsi, 32
	WORD $0xff49; BYTE $0xc1       // inc    r9
	JNE  LBB1_645

LBB1_646:
	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
	JE   LBB1_1109
	JMP  LBB1_647

LBB1_651:
	LONG $0xfce08348             // and    rax, -4
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI1_0] */
	LONG $0xd860fdc5             // vpunpcklbw    ymm3, ymm0, ymm0

LBB1_652:
	LONG $0x246ffec5; BYTE $0x32               // vmovdqu    ymm4, yword [rdx + rsi]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3024             // vmovdqu    yword [r8 + rsi], ymm4
	LONG $0x646ffec5; WORD $0x2032             // vmovdqu    ymm4, yword [rdx + rsi + 32]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm4
	LONG $0x646ffec5; WORD $0x4032             // vmovdqu    ymm4, yword [rdx + rsi + 64]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm4
	LONG $0x646ffec5; WORD $0x6032             // vmovdqu    ymm4, yword [rdx + rsi + 96]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm4
	LONG $0x80ee8348                           // sub    rsi, -128
	LONG $0x04c08348                           // add    rax, 4
	JNE  LBB1_652

LBB1_653:
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	JE   LBB1_656
	WORD $0xf749; BYTE $0xd9     // neg    r9
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI1_0] */
	LONG $0xc060fdc5             // vpunpcklbw    ymm0, ymm0, ymm0

LBB1_655:
	LONG $0x1c6ffec5; BYTE $0x32   // vmovdqu    ymm3, yword [rdx + rsi]
	LONG $0xe368e5c5               // vpunpckhbw    ymm4, ymm3, ymm3
	LONG $0xe1d5ddc5               // vpmullw    ymm4, ymm4, ymm1
	LONG $0xe2dbddc5               // vpand    ymm4, ymm4, ymm2
	LONG $0xdb60e5c5               // vpunpcklbw    ymm3, ymm3, ymm3
	LONG $0xd8d5e5c5               // vpmullw    ymm3, ymm3, ymm0
	LONG $0xdadbe5c5               // vpand    ymm3, ymm3, ymm2
	LONG $0xdc67e5c5               // vpackuswb    ymm3, ymm3, ymm4
	LONG $0x7f7ec1c4; WORD $0x301c // vmovdqu    yword [r8 + rsi], ymm3
	LONG $0x20c68348               // add    rsi, 32
	WORD $0xff49; BYTE $0xc1       // inc    r9
	JNE  LBB1_655

LBB1_656:
	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
	JE   LBB1_1109
	JMP  LBB1_657

LBB1_661:
	WORD $0xff31 // xor    edi, edi

LBB1_662:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_664
	LONG $0x407de2c4; WORD $0xba0c             // vpmulld    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x407de2c4; WORD $0xba54; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xba5c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xba44; BYTE $0x60 // vpmulld    ymm0, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB1_664:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_665

LBB1_669:
	WORD $0xff31 // xor    edi, edi

LBB1_670:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_672
	LONG $0x407de2c4; WORD $0xba0c             // vpmulld    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x407de2c4; WORD $0xba54; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xba5c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xba44; BYTE $0x60 // vpmulld    ymm0, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB1_672:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_673

LBB1_677:
	WORD $0xff31 // xor    edi, edi

LBB1_678:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_680
	LONG $0x0cfefdc5; BYTE $0xba               // vpaddd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20ba             // vpaddd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40ba             // vpaddd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x44fefdc5; WORD $0x60ba             // vpaddd    ymm0, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB1_680:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_681

LBB1_685:
	WORD $0xff31 // xor    edi, edi

LBB1_686:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_688
	LONG $0x0c6ffec5; BYTE $0xba               // vmovdqu    ymm1, yword [rdx + 4*rdi]
	LONG $0x546ffec5; WORD $0x20ba             // vmovdqu    ymm2, yword [rdx + 4*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40ba             // vmovdqu    ymm3, yword [rdx + 4*rdi + 64]
	LONG $0x646ffec5; WORD $0x60ba             // vmovdqu    ymm4, yword [rdx + 4*rdi + 96]
	LONG $0xc8faf5c5                           // vpsubd    ymm1, ymm1, ymm0
	LONG $0xd0faedc5                           // vpsubd    ymm2, ymm2, ymm0
	LONG $0xd8fae5c5                           // vpsubd    ymm3, ymm3, ymm0
	LONG $0xc0faddc5                           // vpsubd    ymm0, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB1_688:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_689

LBB1_693:
	WORD $0xff31 // xor    edi, edi

LBB1_694:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_696
	LONG $0x0cfefdc5; BYTE $0xba               // vpaddd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20ba             // vpaddd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40ba             // vpaddd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x44fefdc5; WORD $0x60ba             // vpaddd    ymm0, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB1_696:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_697

LBB1_701:
	WORD $0xff31 // xor    edi, edi

LBB1_702:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_704
	LONG $0x0c6ffec5; BYTE $0xba               // vmovdqu    ymm1, yword [rdx + 4*rdi]
	LONG $0x546ffec5; WORD $0x20ba             // vmovdqu    ymm2, yword [rdx + 4*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40ba             // vmovdqu    ymm3, yword [rdx + 4*rdi + 64]
	LONG $0x646ffec5; WORD $0x60ba             // vmovdqu    ymm4, yword [rdx + 4*rdi + 96]
	LONG $0xc8faf5c5                           // vpsubd    ymm1, ymm1, ymm0
	LONG $0xd0faedc5                           // vpsubd    ymm2, ymm2, ymm0
	LONG $0xd8fae5c5                           // vpsubd    ymm3, ymm3, ymm0
	LONG $0xc0faddc5                           // vpsubd    ymm0, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB1_704:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_705

LBB1_709:
	WORD $0xff31 // xor    edi, edi

LBB1_710:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_712
	LONG $0x1459f5c5; BYTE $0xfa               // vmulpd    ymm2, ymm1, yword [rdx + 8*rdi]
	LONG $0x5c59f5c5; WORD $0x20fa             // vmulpd    ymm3, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x6459f5c5; WORD $0x40fa             // vmulpd    ymm4, ymm1, yword [rdx + 8*rdi + 64]
	LONG $0x4c59f5c5; WORD $0x60fa             // vmulpd    ymm1, ymm1, yword [rdx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm1

LBB1_712:
	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
	JE   LBB1_1109
	JMP  LBB1_713

LBB1_717:
	WORD $0xff31 // xor    edi, edi

LBB1_718:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_720
	LONG $0x1459f5c5; BYTE $0xfa               // vmulpd    ymm2, ymm1, yword [rdx + 8*rdi]
	LONG $0x5c59f5c5; WORD $0x20fa             // vmulpd    ymm3, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x6459f5c5; WORD $0x40fa             // vmulpd    ymm4, ymm1, yword [rdx + 8*rdi + 64]
	LONG $0x4c59f5c5; WORD $0x60fa             // vmulpd    ymm1, ymm1, yword [rdx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm1

LBB1_720:
	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
	JE   LBB1_1109
	JMP  LBB1_721

LBB1_725:
	WORD $0xff31 // xor    edi, edi

LBB1_726:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_728
	LONG $0x1458f5c5; BYTE $0xfa               // vaddpd    ymm2, ymm1, yword [rdx + 8*rdi]
	LONG $0x5c58f5c5; WORD $0x20fa             // vaddpd    ymm3, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x6458f5c5; WORD $0x40fa             // vaddpd    ymm4, ymm1, yword [rdx + 8*rdi + 64]
	LONG $0x4c58f5c5; WORD $0x60fa             // vaddpd    ymm1, ymm1, yword [rdx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm1

LBB1_728:
	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
	JE   LBB1_1109
	JMP  LBB1_729

LBB1_733:
	WORD $0xf631 // xor    esi, esi

LBB1_734:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_736
	LONG $0x1410fdc5; BYTE $0xf2               // vmovupd    ymm2, yword [rdx + 8*rsi]
	LONG $0x5c10fdc5; WORD $0x20f2             // vmovupd    ymm3, yword [rdx + 8*rsi + 32]
	LONG $0x6410fdc5; WORD $0x40f2             // vmovupd    ymm4, yword [rdx + 8*rsi + 64]
	LONG $0x6c10fdc5; WORD $0x60f2             // vmovupd    ymm5, yword [rdx + 8*rsi + 96]
	LONG $0xd15cedc5                           // vsubpd    ymm2, ymm2, ymm1
	LONG $0xd95ce5c5                           // vsubpd    ymm3, ymm3, ymm1
	LONG $0xe15cddc5                           // vsubpd    ymm4, ymm4, ymm1
	LONG $0xc95cd5c5                           // vsubpd    ymm1, ymm5, ymm1
	LONG $0x117dc1c4; WORD $0xf014             // vmovupd    yword [r8 + 8*rsi], ymm2
	LONG $0x117dc1c4; WORD $0xf05c; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf04c; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm1

LBB1_736:
	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
	JE   LBB1_1109
	JMP  LBB1_737

LBB1_741:
	WORD $0xff31 // xor    edi, edi

LBB1_742:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_744
	LONG $0x1458f5c5; BYTE $0xfa               // vaddpd    ymm2, ymm1, yword [rdx + 8*rdi]
	LONG $0x5c58f5c5; WORD $0x20fa             // vaddpd    ymm3, ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x6458f5c5; WORD $0x40fa             // vaddpd    ymm4, ymm1, yword [rdx + 8*rdi + 64]
	LONG $0x4c58f5c5; WORD $0x60fa             // vaddpd    ymm1, ymm1, yword [rdx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm1

LBB1_744:
	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
	JE   LBB1_1109
	JMP  LBB1_745

LBB1_749:
	WORD $0xf631 // xor    esi, esi

LBB1_750:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_752
	LONG $0x1410fdc5; BYTE $0xf2               // vmovupd    ymm2, yword [rdx + 8*rsi]
	LONG $0x5c10fdc5; WORD $0x20f2             // vmovupd    ymm3, yword [rdx + 8*rsi + 32]
	LONG $0x6410fdc5; WORD $0x40f2             // vmovupd    ymm4, yword [rdx + 8*rsi + 64]
	LONG $0x6c10fdc5; WORD $0x60f2             // vmovupd    ymm5, yword [rdx + 8*rsi + 96]
	LONG $0xd15cedc5                           // vsubpd    ymm2, ymm2, ymm1
	LONG $0xd95ce5c5                           // vsubpd    ymm3, ymm3, ymm1
	LONG $0xe15cddc5                           // vsubpd    ymm4, ymm4, ymm1
	LONG $0xc95cd5c5                           // vsubpd    ymm1, ymm5, ymm1
	LONG $0x117dc1c4; WORD $0xf014             // vmovupd    yword [r8 + 8*rsi], ymm2
	LONG $0x117dc1c4; WORD $0xf05c; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf04c; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm1

LBB1_752:
	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
	JE   LBB1_1109
	JMP  LBB1_753

LBB1_757:
	WORD $0xff31 // xor    edi, edi

LBB1_758:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_760
	LONG $0x0cfcfdc5; BYTE $0x3a               // vpaddb    ymm1, ymm0, yword [rdx + rdi]
	LONG $0x54fcfdc5; WORD $0x203a             // vpaddb    ymm2, ymm0, yword [rdx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x403a             // vpaddb    ymm3, ymm0, yword [rdx + rdi + 64]
	LONG $0x44fcfdc5; WORD $0x603a             // vpaddb    ymm0, ymm0, yword [rdx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB1_760:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_761

LBB1_765:
	WORD $0xff31 // xor    edi, edi

LBB1_766:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_768
	LONG $0x0c6ffec5; BYTE $0x3a               // vmovdqu    ymm1, yword [rdx + rdi]
	LONG $0x546ffec5; WORD $0x203a             // vmovdqu    ymm2, yword [rdx + rdi + 32]
	LONG $0x5c6ffec5; WORD $0x403a             // vmovdqu    ymm3, yword [rdx + rdi + 64]
	LONG $0x646ffec5; WORD $0x603a             // vmovdqu    ymm4, yword [rdx + rdi + 96]
	LONG $0xc8f8f5c5                           // vpsubb    ymm1, ymm1, ymm0
	LONG $0xd0f8edc5                           // vpsubb    ymm2, ymm2, ymm0
	LONG $0xd8f8e5c5                           // vpsubb    ymm3, ymm3, ymm0
	LONG $0xc0f8ddc5                           // vpsubb    ymm0, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB1_768:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_769

LBB1_773:
	WORD $0xff31 // xor    edi, edi

LBB1_774:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_776
	LONG $0x0cfcfdc5; BYTE $0x3a               // vpaddb    ymm1, ymm0, yword [rdx + rdi]
	LONG $0x54fcfdc5; WORD $0x203a             // vpaddb    ymm2, ymm0, yword [rdx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x403a             // vpaddb    ymm3, ymm0, yword [rdx + rdi + 64]
	LONG $0x44fcfdc5; WORD $0x603a             // vpaddb    ymm0, ymm0, yword [rdx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB1_776:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_777

LBB1_781:
	WORD $0xff31 // xor    edi, edi

LBB1_782:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_784
	LONG $0x0c6ffec5; BYTE $0x3a               // vmovdqu    ymm1, yword [rdx + rdi]
	LONG $0x546ffec5; WORD $0x203a             // vmovdqu    ymm2, yword [rdx + rdi + 32]
	LONG $0x5c6ffec5; WORD $0x403a             // vmovdqu    ymm3, yword [rdx + rdi + 64]
	LONG $0x646ffec5; WORD $0x603a             // vmovdqu    ymm4, yword [rdx + rdi + 96]
	LONG $0xc8f8f5c5                           // vpsubb    ymm1, ymm1, ymm0
	LONG $0xd0f8edc5                           // vpsubb    ymm2, ymm2, ymm0
	LONG $0xd8f8e5c5                           // vpsubb    ymm3, ymm3, ymm0
	LONG $0xc0f8ddc5                           // vpsubb    ymm0, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB1_784:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_785

LBB1_789:
	WORD $0xff31 // xor    edi, edi

LBB1_790:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_792
	LONG $0x146ffec5; BYTE $0xfa               // vmovdqu    ymm2, yword [rdx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60fa             // vmovdqu    ymm5, yword [rdx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xc9f4d5c5                           // vpmuludq    ymm1, ymm5, ymm1
	LONG $0xd573cdc5; BYTE $0x20               // vpsrlq    ymm6, ymm5, 32
	LONG $0xf0f4cdc5                           // vpmuludq    ymm6, ymm6, ymm0
	LONG $0xced4f5c5                           // vpaddq    ymm1, ymm1, ymm6
	LONG $0xf173f5c5; BYTE $0x20               // vpsllq    ymm1, ymm1, 32
	LONG $0xc0f4d5c5                           // vpmuludq    ymm0, ymm5, ymm0
	LONG $0xc1d4fdc5                           // vpaddq    ymm0, ymm0, ymm1
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB1_792:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_793

LBB1_797:
	WORD $0xff31 // xor    edi, edi

LBB1_798:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_800
	LONG $0x146ffec5; BYTE $0xfa               // vmovdqu    ymm2, yword [rdx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60fa             // vmovdqu    ymm5, yword [rdx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xc9f4d5c5                           // vpmuludq    ymm1, ymm5, ymm1
	LONG $0xd573cdc5; BYTE $0x20               // vpsrlq    ymm6, ymm5, 32
	LONG $0xf0f4cdc5                           // vpmuludq    ymm6, ymm6, ymm0
	LONG $0xced4f5c5                           // vpaddq    ymm1, ymm1, ymm6
	LONG $0xf173f5c5; BYTE $0x20               // vpsllq    ymm1, ymm1, 32
	LONG $0xc0f4d5c5                           // vpmuludq    ymm0, ymm5, ymm0
	LONG $0xc1d4fdc5                           // vpaddq    ymm0, ymm0, ymm1
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB1_800:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_801

LBB1_805:
	WORD $0xff31 // xor    edi, edi

LBB1_806:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_808
	LONG $0x0cd4fdc5; BYTE $0xfa               // vpaddq    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20fa             // vpaddq    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40fa             // vpaddq    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x44d4fdc5; WORD $0x60fa             // vpaddq    ymm0, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB1_808:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_809

LBB1_813:
	WORD $0xff31 // xor    edi, edi

LBB1_814:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_816
	LONG $0x0c6ffec5; BYTE $0xfa               // vmovdqu    ymm1, yword [rdx + 8*rdi]
	LONG $0x546ffec5; WORD $0x20fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 64]
	LONG $0x646ffec5; WORD $0x60fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 96]
	LONG $0xc8fbf5c5                           // vpsubq    ymm1, ymm1, ymm0
	LONG $0xd0fbedc5                           // vpsubq    ymm2, ymm2, ymm0
	LONG $0xd8fbe5c5                           // vpsubq    ymm3, ymm3, ymm0
	LONG $0xc0fbddc5                           // vpsubq    ymm0, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB1_816:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_817

LBB1_821:
	WORD $0xff31 // xor    edi, edi

LBB1_822:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_824
	LONG $0x0cd4fdc5; BYTE $0xfa               // vpaddq    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20fa             // vpaddq    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40fa             // vpaddq    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x44d4fdc5; WORD $0x60fa             // vpaddq    ymm0, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB1_824:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_825

LBB1_829:
	WORD $0xff31 // xor    edi, edi

LBB1_830:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_832
	LONG $0x0c6ffec5; BYTE $0xfa               // vmovdqu    ymm1, yword [rdx + 8*rdi]
	LONG $0x546ffec5; WORD $0x20fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 64]
	LONG $0x646ffec5; WORD $0x60fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 96]
	LONG $0xc8fbf5c5                           // vpsubq    ymm1, ymm1, ymm0
	LONG $0xd0fbedc5                           // vpsubq    ymm2, ymm2, ymm0
	LONG $0xd8fbe5c5                           // vpsubq    ymm3, ymm3, ymm0
	LONG $0xc0fbddc5                           // vpsubq    ymm0, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB1_832:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_833

LBB1_837:
	WORD $0xff31 // xor    edi, edi

LBB1_838:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_840
	LONG $0x0cd5fdc5; BYTE $0x7a               // vpmullw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x44d5fdc5; WORD $0x207a             // vpmullw    ymm0, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB1_840:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_841

LBB1_845:
	WORD $0xff31 // xor    edi, edi

LBB1_846:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_848
	LONG $0x0cd5fdc5; BYTE $0x7a               // vpmullw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x44d5fdc5; WORD $0x207a             // vpmullw    ymm0, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB1_848:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_849

LBB1_853:
	WORD $0xff31 // xor    edi, edi

LBB1_854:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_856
	LONG $0x0cd5fdc5; BYTE $0x7a               // vpmullw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x44d5fdc5; WORD $0x207a             // vpmullw    ymm0, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB1_856:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_857

LBB1_861:
	WORD $0xff31 // xor    edi, edi

LBB1_862:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_864
	LONG $0x0cd5fdc5; BYTE $0x7a               // vpmullw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x44d5fdc5; WORD $0x207a             // vpmullw    ymm0, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB1_864:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_865

LBB1_869:
	WORD $0xff31 // xor    edi, edi

LBB1_870:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_872
	LONG $0x0cfdfdc5; BYTE $0x7a               // vpaddw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x44fdfdc5; WORD $0x207a             // vpaddw    ymm0, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB1_872:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_873

LBB1_877:
	WORD $0xff31 // xor    edi, edi

LBB1_878:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_880
	LONG $0x0cfdfdc5; BYTE $0x7a               // vpaddw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x44fdfdc5; WORD $0x207a             // vpaddw    ymm0, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB1_880:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_881

LBB1_885:
	WORD $0xff31 // xor    edi, edi

LBB1_886:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_888
	LONG $0x0c6ffec5; BYTE $0x7a               // vmovdqu    ymm1, yword [rdx + 2*rdi]
	LONG $0x546ffec5; WORD $0x207a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 32]
	LONG $0xc8f9f5c5                           // vpsubw    ymm1, ymm1, ymm0
	LONG $0xc0f9edc5                           // vpsubw    ymm0, ymm2, ymm0
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB1_888:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_889

LBB1_893:
	WORD $0xff31 // xor    edi, edi

LBB1_894:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_896
	LONG $0x0c6ffec5; BYTE $0x7a               // vmovdqu    ymm1, yword [rdx + 2*rdi]
	LONG $0x546ffec5; WORD $0x207a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 32]
	LONG $0xc8f9f5c5                           // vpsubw    ymm1, ymm1, ymm0
	LONG $0xc0f9edc5                           // vpsubw    ymm0, ymm2, ymm0
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB1_896:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_897

LBB1_901:
	WORD $0xff31 // xor    edi, edi

LBB1_902:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_904
	LONG $0x0cfdfdc5; BYTE $0x7a               // vpaddw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x44fdfdc5; WORD $0x207a             // vpaddw    ymm0, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB1_904:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_905

LBB1_909:
	WORD $0xff31 // xor    edi, edi

LBB1_910:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_912
	LONG $0x0cfdfdc5; BYTE $0x7a               // vpaddw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x44fdfdc5; WORD $0x207a             // vpaddw    ymm0, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB1_912:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_913

LBB1_917:
	WORD $0xff31 // xor    edi, edi

LBB1_918:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_920
	LONG $0x0c6ffec5; BYTE $0x7a               // vmovdqu    ymm1, yword [rdx + 2*rdi]
	LONG $0x546ffec5; WORD $0x207a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 32]
	LONG $0xc8f9f5c5                           // vpsubw    ymm1, ymm1, ymm0
	LONG $0xc0f9edc5                           // vpsubw    ymm0, ymm2, ymm0
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB1_920:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_921

LBB1_925:
	WORD $0xff31 // xor    edi, edi

LBB1_926:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_928
	LONG $0x0c6ffec5; BYTE $0x7a               // vmovdqu    ymm1, yword [rdx + 2*rdi]
	LONG $0x546ffec5; WORD $0x207a             // vmovdqu    ymm2, yword [rdx + 2*rdi + 32]
	LONG $0xc8f9f5c5                           // vpsubw    ymm1, ymm1, ymm0
	LONG $0xc0f9edc5                           // vpsubw    ymm0, ymm2, ymm0
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB1_928:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_929

LBB1_933:
	WORD $0xff31 // xor    edi, edi

LBB1_934:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_936
	LONG $0x146ffec5; BYTE $0xfa               // vmovdqu    ymm2, yword [rdx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60fa             // vmovdqu    ymm5, yword [rdx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xc9f4d5c5                           // vpmuludq    ymm1, ymm5, ymm1
	LONG $0xd573cdc5; BYTE $0x20               // vpsrlq    ymm6, ymm5, 32
	LONG $0xf0f4cdc5                           // vpmuludq    ymm6, ymm6, ymm0
	LONG $0xced4f5c5                           // vpaddq    ymm1, ymm1, ymm6
	LONG $0xf173f5c5; BYTE $0x20               // vpsllq    ymm1, ymm1, 32
	LONG $0xc0f4d5c5                           // vpmuludq    ymm0, ymm5, ymm0
	LONG $0xc1d4fdc5                           // vpaddq    ymm0, ymm0, ymm1
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB1_936:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_937

LBB1_941:
	WORD $0xff31 // xor    edi, edi

LBB1_942:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_944
	LONG $0x1459f4c5; BYTE $0xba               // vmulps    ymm2, ymm1, yword [rdx + 4*rdi]
	LONG $0x5c59f4c5; WORD $0x20ba             // vmulps    ymm3, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x6459f4c5; WORD $0x40ba             // vmulps    ymm4, ymm1, yword [rdx + 4*rdi + 64]
	LONG $0x4c59f4c5; WORD $0x60ba             // vmulps    ymm1, ymm1, yword [rdx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm1

LBB1_944:
	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
	JE   LBB1_1109
	JMP  LBB1_945

LBB1_949:
	WORD $0xff31 // xor    edi, edi

LBB1_950:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_952
	LONG $0x146ffec5; BYTE $0xfa               // vmovdqu    ymm2, yword [rdx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60fa             // vmovdqu    ymm5, yword [rdx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xc9f4d5c5                           // vpmuludq    ymm1, ymm5, ymm1
	LONG $0xd573cdc5; BYTE $0x20               // vpsrlq    ymm6, ymm5, 32
	LONG $0xf0f4cdc5                           // vpmuludq    ymm6, ymm6, ymm0
	LONG $0xced4f5c5                           // vpaddq    ymm1, ymm1, ymm6
	LONG $0xf173f5c5; BYTE $0x20               // vpsllq    ymm1, ymm1, 32
	LONG $0xc0f4d5c5                           // vpmuludq    ymm0, ymm5, ymm0
	LONG $0xc1d4fdc5                           // vpaddq    ymm0, ymm0, ymm1
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB1_952:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_953

LBB1_957:
	WORD $0xff31 // xor    edi, edi

LBB1_958:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_960
	LONG $0x1459f4c5; BYTE $0xba               // vmulps    ymm2, ymm1, yword [rdx + 4*rdi]
	LONG $0x5c59f4c5; WORD $0x20ba             // vmulps    ymm3, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x6459f4c5; WORD $0x40ba             // vmulps    ymm4, ymm1, yword [rdx + 4*rdi + 64]
	LONG $0x4c59f4c5; WORD $0x60ba             // vmulps    ymm1, ymm1, yword [rdx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm1

LBB1_960:
	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
	JE   LBB1_1109
	JMP  LBB1_961

LBB1_965:
	WORD $0xff31 // xor    edi, edi

LBB1_966:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_968
	LONG $0x0cd4fdc5; BYTE $0xfa               // vpaddq    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20fa             // vpaddq    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40fa             // vpaddq    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x44d4fdc5; WORD $0x60fa             // vpaddq    ymm0, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB1_968:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_969

LBB1_973:
	WORD $0xff31 // xor    edi, edi

LBB1_974:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_976
	LONG $0x1458f4c5; BYTE $0xba               // vaddps    ymm2, ymm1, yword [rdx + 4*rdi]
	LONG $0x5c58f4c5; WORD $0x20ba             // vaddps    ymm3, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x6458f4c5; WORD $0x40ba             // vaddps    ymm4, ymm1, yword [rdx + 4*rdi + 64]
	LONG $0x4c58f4c5; WORD $0x60ba             // vaddps    ymm1, ymm1, yword [rdx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm1

LBB1_976:
	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
	JE   LBB1_1109
	JMP  LBB1_977

LBB1_981:
	WORD $0xff31 // xor    edi, edi

LBB1_982:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_984
	LONG $0x0c6ffec5; BYTE $0xfa               // vmovdqu    ymm1, yword [rdx + 8*rdi]
	LONG $0x546ffec5; WORD $0x20fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 64]
	LONG $0x646ffec5; WORD $0x60fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 96]
	LONG $0xc8fbf5c5                           // vpsubq    ymm1, ymm1, ymm0
	LONG $0xd0fbedc5                           // vpsubq    ymm2, ymm2, ymm0
	LONG $0xd8fbe5c5                           // vpsubq    ymm3, ymm3, ymm0
	LONG $0xc0fbddc5                           // vpsubq    ymm0, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB1_984:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_985

LBB1_989:
	WORD $0xf631 // xor    esi, esi

LBB1_990:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_992
	LONG $0x1410fcc5; BYTE $0xb2               // vmovups    ymm2, yword [rdx + 4*rsi]
	LONG $0x5c10fcc5; WORD $0x20b2             // vmovups    ymm3, yword [rdx + 4*rsi + 32]
	LONG $0x6410fcc5; WORD $0x40b2             // vmovups    ymm4, yword [rdx + 4*rsi + 64]
	LONG $0x6c10fcc5; WORD $0x60b2             // vmovups    ymm5, yword [rdx + 4*rsi + 96]
	LONG $0xd15cecc5                           // vsubps    ymm2, ymm2, ymm1
	LONG $0xd95ce4c5                           // vsubps    ymm3, ymm3, ymm1
	LONG $0xe15cdcc5                           // vsubps    ymm4, ymm4, ymm1
	LONG $0xc95cd4c5                           // vsubps    ymm1, ymm5, ymm1
	LONG $0x117cc1c4; WORD $0xb014             // vmovups    yword [r8 + 4*rsi], ymm2
	LONG $0x117cc1c4; WORD $0xb05c; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb04c; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm1

LBB1_992:
	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
	JE   LBB1_1109
	JMP  LBB1_993

LBB1_997:
	WORD $0xff31 // xor    edi, edi

LBB1_998:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_1000
	LONG $0x0cd4fdc5; BYTE $0xfa               // vpaddq    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20fa             // vpaddq    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40fa             // vpaddq    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x44d4fdc5; WORD $0x60fa             // vpaddq    ymm0, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB1_1000:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_1001

LBB1_1005:
	WORD $0xff31 // xor    edi, edi

LBB1_1006:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_1008
	LONG $0x1458f4c5; BYTE $0xba               // vaddps    ymm2, ymm1, yword [rdx + 4*rdi]
	LONG $0x5c58f4c5; WORD $0x20ba             // vaddps    ymm3, ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x6458f4c5; WORD $0x40ba             // vaddps    ymm4, ymm1, yword [rdx + 4*rdi + 64]
	LONG $0x4c58f4c5; WORD $0x60ba             // vaddps    ymm1, ymm1, yword [rdx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm1

LBB1_1008:
	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
	JE   LBB1_1109
	JMP  LBB1_1009

LBB1_1013:
	WORD $0xff31 // xor    edi, edi

LBB1_1014:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_1016
	LONG $0x0c6ffec5; BYTE $0xfa               // vmovdqu    ymm1, yword [rdx + 8*rdi]
	LONG $0x546ffec5; WORD $0x20fa             // vmovdqu    ymm2, yword [rdx + 8*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40fa             // vmovdqu    ymm3, yword [rdx + 8*rdi + 64]
	LONG $0x646ffec5; WORD $0x60fa             // vmovdqu    ymm4, yword [rdx + 8*rdi + 96]
	LONG $0xc8fbf5c5                           // vpsubq    ymm1, ymm1, ymm0
	LONG $0xd0fbedc5                           // vpsubq    ymm2, ymm2, ymm0
	LONG $0xd8fbe5c5                           // vpsubq    ymm3, ymm3, ymm0
	LONG $0xc0fbddc5                           // vpsubq    ymm0, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB1_1016:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_1017

LBB1_1021:
	WORD $0xf631 // xor    esi, esi

LBB1_1022:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_1024
	LONG $0x1410fcc5; BYTE $0xb2               // vmovups    ymm2, yword [rdx + 4*rsi]
	LONG $0x5c10fcc5; WORD $0x20b2             // vmovups    ymm3, yword [rdx + 4*rsi + 32]
	LONG $0x6410fcc5; WORD $0x40b2             // vmovups    ymm4, yword [rdx + 4*rsi + 64]
	LONG $0x6c10fcc5; WORD $0x60b2             // vmovups    ymm5, yword [rdx + 4*rsi + 96]
	LONG $0xd15cecc5                           // vsubps    ymm2, ymm2, ymm1
	LONG $0xd95ce4c5                           // vsubps    ymm3, ymm3, ymm1
	LONG $0xe15cdcc5                           // vsubps    ymm4, ymm4, ymm1
	LONG $0xc95cd4c5                           // vsubps    ymm1, ymm5, ymm1
	LONG $0x117cc1c4; WORD $0xb014             // vmovups    yword [r8 + 4*rsi], ymm2
	LONG $0x117cc1c4; WORD $0xb05c; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb04c; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm1

LBB1_1024:
	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
	JE   LBB1_1109
	JMP  LBB1_1025

LBB1_1029:
	WORD $0xff31 // xor    edi, edi

LBB1_1030:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_1032
	LONG $0x0cfcfdc5; BYTE $0x3a               // vpaddb    ymm1, ymm0, yword [rdx + rdi]
	LONG $0x54fcfdc5; WORD $0x203a             // vpaddb    ymm2, ymm0, yword [rdx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x403a             // vpaddb    ymm3, ymm0, yword [rdx + rdi + 64]
	LONG $0x44fcfdc5; WORD $0x603a             // vpaddb    ymm0, ymm0, yword [rdx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB1_1032:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_1033

LBB1_1037:
	WORD $0xff31 // xor    edi, edi

LBB1_1038:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_1040
	LONG $0x0c6ffec5; BYTE $0x3a               // vmovdqu    ymm1, yword [rdx + rdi]
	LONG $0x546ffec5; WORD $0x203a             // vmovdqu    ymm2, yword [rdx + rdi + 32]
	LONG $0x5c6ffec5; WORD $0x403a             // vmovdqu    ymm3, yword [rdx + rdi + 64]
	LONG $0x646ffec5; WORD $0x603a             // vmovdqu    ymm4, yword [rdx + rdi + 96]
	LONG $0xc8f8f5c5                           // vpsubb    ymm1, ymm1, ymm0
	LONG $0xd0f8edc5                           // vpsubb    ymm2, ymm2, ymm0
	LONG $0xd8f8e5c5                           // vpsubb    ymm3, ymm3, ymm0
	LONG $0xc0f8ddc5                           // vpsubb    ymm0, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB1_1040:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_1041

LBB1_1045:
	WORD $0xff31 // xor    edi, edi

LBB1_1046:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_1048
	LONG $0x0cfcfdc5; BYTE $0x3a               // vpaddb    ymm1, ymm0, yword [rdx + rdi]
	LONG $0x54fcfdc5; WORD $0x203a             // vpaddb    ymm2, ymm0, yword [rdx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x403a             // vpaddb    ymm3, ymm0, yword [rdx + rdi + 64]
	LONG $0x44fcfdc5; WORD $0x603a             // vpaddb    ymm0, ymm0, yword [rdx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB1_1048:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_1049

LBB1_1053:
	WORD $0xff31 // xor    edi, edi

LBB1_1054:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_1056
	LONG $0x0c6ffec5; BYTE $0x3a               // vmovdqu    ymm1, yword [rdx + rdi]
	LONG $0x546ffec5; WORD $0x203a             // vmovdqu    ymm2, yword [rdx + rdi + 32]
	LONG $0x5c6ffec5; WORD $0x403a             // vmovdqu    ymm3, yword [rdx + rdi + 64]
	LONG $0x646ffec5; WORD $0x603a             // vmovdqu    ymm4, yword [rdx + rdi + 96]
	LONG $0xc8f8f5c5                           // vpsubb    ymm1, ymm1, ymm0
	LONG $0xd0f8edc5                           // vpsubb    ymm2, ymm2, ymm0
	LONG $0xd8f8e5c5                           // vpsubb    ymm3, ymm3, ymm0
	LONG $0xc0f8ddc5                           // vpsubb    ymm0, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB1_1056:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_1057

LBB1_1061:
	WORD $0xff31 // xor    edi, edi

LBB1_1062:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_1064
	LONG $0x407de2c4; WORD $0xba0c             // vpmulld    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x407de2c4; WORD $0xba54; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xba5c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xba44; BYTE $0x60 // vpmulld    ymm0, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB1_1064:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_1065

LBB1_1069:
	WORD $0xff31 // xor    edi, edi

LBB1_1070:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_1072
	LONG $0x407de2c4; WORD $0xba0c             // vpmulld    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x407de2c4; WORD $0xba54; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xba5c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xba44; BYTE $0x60 // vpmulld    ymm0, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB1_1072:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_1073

LBB1_1077:
	WORD $0xff31 // xor    edi, edi

LBB1_1078:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_1080
	LONG $0x0cfefdc5; BYTE $0xba               // vpaddd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20ba             // vpaddd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40ba             // vpaddd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x44fefdc5; WORD $0x60ba             // vpaddd    ymm0, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB1_1080:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_1081

LBB1_1085:
	WORD $0xff31 // xor    edi, edi

LBB1_1086:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_1088
	LONG $0x0c6ffec5; BYTE $0xba               // vmovdqu    ymm1, yword [rdx + 4*rdi]
	LONG $0x546ffec5; WORD $0x20ba             // vmovdqu    ymm2, yword [rdx + 4*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40ba             // vmovdqu    ymm3, yword [rdx + 4*rdi + 64]
	LONG $0x646ffec5; WORD $0x60ba             // vmovdqu    ymm4, yword [rdx + 4*rdi + 96]
	LONG $0xc8faf5c5                           // vpsubd    ymm1, ymm1, ymm0
	LONG $0xd0faedc5                           // vpsubd    ymm2, ymm2, ymm0
	LONG $0xd8fae5c5                           // vpsubd    ymm3, ymm3, ymm0
	LONG $0xc0faddc5                           // vpsubd    ymm0, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB1_1088:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_1089

LBB1_1093:
	WORD $0xff31 // xor    edi, edi

LBB1_1094:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_1096
	LONG $0x0cfefdc5; BYTE $0xba               // vpaddd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20ba             // vpaddd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40ba             // vpaddd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x44fefdc5; WORD $0x60ba             // vpaddd    ymm0, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB1_1096:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB1_1109
	JMP  LBB1_1097

LBB1_1101:
	WORD $0xff31 // xor    edi, edi

LBB1_1102:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB1_1104
	LONG $0x0c6ffec5; BYTE $0xba               // vmovdqu    ymm1, yword [rdx + 4*rdi]
	LONG $0x546ffec5; WORD $0x20ba             // vmovdqu    ymm2, yword [rdx + 4*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40ba             // vmovdqu    ymm3, yword [rdx + 4*rdi + 64]
	LONG $0x646ffec5; WORD $0x60ba             // vmovdqu    ymm4, yword [rdx + 4*rdi + 96]
	LONG $0xc8faf5c5                           // vpsubd    ymm1, ymm1, ymm0
	LONG $0xd0faedc5                           // vpsubd    ymm2, ymm2, ymm0
	LONG $0xd8fae5c5                           // vpsubd    ymm3, ymm3, ymm0
	LONG $0xc0faddc5                           // vpsubd    ymm0, ymm4, ymm0
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB1_1104:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JNE  LBB1_1105

LBB1_1109:
	VZEROUPPER
	RET

DATA LCDATA3<>+0x000(SB)/8, $0x00ff00ff00ff00ff
DATA LCDATA3<>+0x008(SB)/8, $0x00ff00ff00ff00ff
DATA LCDATA3<>+0x010(SB)/8, $0x00ff00ff00ff00ff
DATA LCDATA3<>+0x018(SB)/8, $0x00ff00ff00ff00ff
GLOBL LCDATA3<>(SB), 8, $32

TEXT ยท_arithmetic_scalar_arr_avx2(SB), $0-48

	MOVQ typ+0(FP), DI
	MOVQ op+8(FP), SI
	MOVQ inLeft+16(FP), DX
	MOVQ inRight+24(FP), CX
	MOVQ out+32(FP), R8
	MOVQ len+40(FP), R9
	LEAQ LCDATA3<>(SB), BP

	LONG $0x14fe8040         // cmp    sil, 20
	JG   LBB2_12
	WORD $0x8440; BYTE $0xf6 // test    sil, sil
	JE   LBB2_23
	LONG $0x01fe8040         // cmp    sil, 1
	JE   LBB2_31
	LONG $0x02fe8040         // cmp    sil, 2
	JNE  LBB2_1109
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB2_55
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB2_97
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB2_157
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB2_160
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028b             // mov    eax, dword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_11
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_445
	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_445

LBB2_11:
	WORD $0xf631 // xor    esi, esi

LBB2_665:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_667

LBB2_666:
	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
	WORD $0xaf0f; BYTE $0xd0 // imul    edx, eax
	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_666

LBB2_667:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_668:
	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_668
	JMP  LBB2_1109

LBB2_12:
	LONG $0x15fe8040         // cmp    sil, 21
	JE   LBB2_39
	LONG $0x16fe8040         // cmp    sil, 22
	JE   LBB2_47
	LONG $0x17fe8040         // cmp    sil, 23
	JNE  LBB2_1109
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB2_62
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB2_102
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB2_163
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB2_166
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028b             // mov    eax, dword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_22
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_448
	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_448

LBB2_22:
	WORD $0xf631 // xor    esi, esi

LBB2_673:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_675

LBB2_674:
	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
	WORD $0xaf0f; BYTE $0xd0 // imul    edx, eax
	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_674

LBB2_675:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_676:
	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_676
	JMP  LBB2_1109

LBB2_23:
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB2_69
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB2_107
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB2_169
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB2_172
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028b             // mov    eax, dword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_30
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_451
	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_451

LBB2_30:
	WORD $0xf631 // xor    esi, esi

LBB2_681:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_683

LBB2_682:
	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
	WORD $0xc201             // add    edx, eax
	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_682

LBB2_683:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_684:
	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_684
	JMP  LBB2_1109

LBB2_31:
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB2_76
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB2_112
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB2_175
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB2_178
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b44; BYTE $0x1a // mov    r11d, dword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_38
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_454
	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_454

LBB2_38:
	WORD $0xf631 // xor    esi, esi

LBB2_689:
	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
	WORD $0xf748; BYTE $0xd2 // not    rdx
	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_691

LBB2_690:
	WORD $0x8944; BYTE $0xd8 // mov    eax, r11d
	WORD $0x042b; BYTE $0xb1 // sub    eax, dword [rcx + 4*rsi]
	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_690

LBB2_691:
	LONG $0x03fa8348 // cmp    rdx, 3
	JB   LBB2_1109

LBB2_692:
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_692
	JMP  LBB2_1109

LBB2_39:
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB2_83
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB2_117
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB2_181
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB2_184
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028b             // mov    eax, dword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_46
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_457
	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_457

LBB2_46:
	WORD $0xf631 // xor    esi, esi

LBB2_697:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_699

LBB2_698:
	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
	WORD $0xc201             // add    edx, eax
	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_698

LBB2_699:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_700:
	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_700
	JMP  LBB2_1109

LBB2_47:
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB2_90
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB2_122
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB2_187
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB2_190
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b44; BYTE $0x1a // mov    r11d, dword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_54
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_460
	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_460

LBB2_54:
	WORD $0xf631 // xor    esi, esi

LBB2_705:
	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
	WORD $0xf748; BYTE $0xd2 // not    rdx
	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_707

LBB2_706:
	WORD $0x8944; BYTE $0xd8 // mov    eax, r11d
	WORD $0x042b; BYTE $0xb1 // sub    eax, dword [rcx + 4*rsi]
	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_706

LBB2_707:
	LONG $0x03fa8348 // cmp    rdx, 3
	JB   LBB2_1109

LBB2_708:
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_708
	JMP  LBB2_1109

LBB2_55:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB2_127
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB2_193
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB2_196
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	LONG $0x0210fbc5         // vmovsd    xmm0, qword [rdx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_61
	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_463
	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_463

LBB2_61:
	WORD $0xd231 // xor    edx, edx

LBB2_713:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_715

LBB2_714:
	LONG $0x0c59fbc5; BYTE $0xd1   // vmulsd    xmm1, xmm0, qword [rcx + 8*rdx]
	LONG $0x117bc1c4; WORD $0xd00c // vmovsd    qword [r8 + 8*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB2_714

LBB2_715:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB2_1109

LBB2_716:
	LONG $0x0c59fbc5; BYTE $0xd1               // vmulsd    xmm1, xmm0, qword [rcx + 8*rdx]
	LONG $0x117bc1c4; WORD $0xd00c             // vmovsd    qword [r8 + 8*rdx], xmm1
	LONG $0x4c59fbc5; WORD $0x08d1             // vmulsd    xmm1, xmm0, qword [rcx + 8*rdx + 8]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x08 // vmovsd    qword [r8 + 8*rdx + 8], xmm1
	LONG $0x4c59fbc5; WORD $0x10d1             // vmulsd    xmm1, xmm0, qword [rcx + 8*rdx + 16]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x10 // vmovsd    qword [r8 + 8*rdx + 16], xmm1
	LONG $0x4c59fbc5; WORD $0x18d1             // vmulsd    xmm1, xmm0, qword [rcx + 8*rdx + 24]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x18 // vmovsd    qword [r8 + 8*rdx + 24], xmm1
	LONG $0x04c28348                           // add    rdx, 4
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JNE  LBB2_716
	JMP  LBB2_1109

LBB2_62:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB2_132
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB2_199
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB2_202
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	LONG $0x0210fbc5         // vmovsd    xmm0, qword [rdx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_68
	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_466
	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_466

LBB2_68:
	WORD $0xd231 // xor    edx, edx

LBB2_721:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_723

LBB2_722:
	LONG $0x0c59fbc5; BYTE $0xd1   // vmulsd    xmm1, xmm0, qword [rcx + 8*rdx]
	LONG $0x117bc1c4; WORD $0xd00c // vmovsd    qword [r8 + 8*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB2_722

LBB2_723:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB2_1109

LBB2_724:
	LONG $0x0c59fbc5; BYTE $0xd1               // vmulsd    xmm1, xmm0, qword [rcx + 8*rdx]
	LONG $0x117bc1c4; WORD $0xd00c             // vmovsd    qword [r8 + 8*rdx], xmm1
	LONG $0x4c59fbc5; WORD $0x08d1             // vmulsd    xmm1, xmm0, qword [rcx + 8*rdx + 8]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x08 // vmovsd    qword [r8 + 8*rdx + 8], xmm1
	LONG $0x4c59fbc5; WORD $0x10d1             // vmulsd    xmm1, xmm0, qword [rcx + 8*rdx + 16]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x10 // vmovsd    qword [r8 + 8*rdx + 16], xmm1
	LONG $0x4c59fbc5; WORD $0x18d1             // vmulsd    xmm1, xmm0, qword [rcx + 8*rdx + 24]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x18 // vmovsd    qword [r8 + 8*rdx + 24], xmm1
	LONG $0x04c28348                           // add    rdx, 4
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JNE  LBB2_724
	JMP  LBB2_1109

LBB2_69:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB2_137
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB2_205
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB2_208
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	LONG $0x0210fbc5         // vmovsd    xmm0, qword [rdx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_75
	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_469
	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_469

LBB2_75:
	WORD $0xd231 // xor    edx, edx

LBB2_729:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_731

LBB2_730:
	LONG $0x0c58fbc5; BYTE $0xd1   // vaddsd    xmm1, xmm0, qword [rcx + 8*rdx]
	LONG $0x117bc1c4; WORD $0xd00c // vmovsd    qword [r8 + 8*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB2_730

LBB2_731:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB2_1109

LBB2_732:
	LONG $0x0c58fbc5; BYTE $0xd1               // vaddsd    xmm1, xmm0, qword [rcx + 8*rdx]
	LONG $0x117bc1c4; WORD $0xd00c             // vmovsd    qword [r8 + 8*rdx], xmm1
	LONG $0x4c58fbc5; WORD $0x08d1             // vaddsd    xmm1, xmm0, qword [rcx + 8*rdx + 8]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x08 // vmovsd    qword [r8 + 8*rdx + 8], xmm1
	LONG $0x4c58fbc5; WORD $0x10d1             // vaddsd    xmm1, xmm0, qword [rcx + 8*rdx + 16]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x10 // vmovsd    qword [r8 + 8*rdx + 16], xmm1
	LONG $0x4c58fbc5; WORD $0x18d1             // vaddsd    xmm1, xmm0, qword [rcx + 8*rdx + 24]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x18 // vmovsd    qword [r8 + 8*rdx + 24], xmm1
	LONG $0x04c28348                           // add    rdx, 4
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JNE  LBB2_732
	JMP  LBB2_1109

LBB2_76:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB2_142
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB2_211
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB2_214
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	LONG $0x0210fbc5         // vmovsd    xmm0, qword [rdx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_82
	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_472
	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_472

LBB2_82:
	WORD $0xd231 // xor    edx, edx

LBB2_737:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_739

LBB2_738:
	LONG $0x0c5cfbc5; BYTE $0xd1   // vsubsd    xmm1, xmm0, qword [rcx + 8*rdx]
	LONG $0x117bc1c4; WORD $0xd00c // vmovsd    qword [r8 + 8*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB2_738

LBB2_739:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB2_1109

LBB2_740:
	LONG $0x0c5cfbc5; BYTE $0xd1               // vsubsd    xmm1, xmm0, qword [rcx + 8*rdx]
	LONG $0x117bc1c4; WORD $0xd00c             // vmovsd    qword [r8 + 8*rdx], xmm1
	LONG $0x4c5cfbc5; WORD $0x08d1             // vsubsd    xmm1, xmm0, qword [rcx + 8*rdx + 8]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x08 // vmovsd    qword [r8 + 8*rdx + 8], xmm1
	LONG $0x4c5cfbc5; WORD $0x10d1             // vsubsd    xmm1, xmm0, qword [rcx + 8*rdx + 16]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x10 // vmovsd    qword [r8 + 8*rdx + 16], xmm1
	LONG $0x4c5cfbc5; WORD $0x18d1             // vsubsd    xmm1, xmm0, qword [rcx + 8*rdx + 24]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x18 // vmovsd    qword [r8 + 8*rdx + 24], xmm1
	LONG $0x04c28348                           // add    rdx, 4
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JNE  LBB2_740
	JMP  LBB2_1109

LBB2_83:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB2_147
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB2_217
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB2_220
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	LONG $0x0210fbc5         // vmovsd    xmm0, qword [rdx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_89
	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_475
	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_475

LBB2_89:
	WORD $0xd231 // xor    edx, edx

LBB2_745:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_747

LBB2_746:
	LONG $0x0c58fbc5; BYTE $0xd1   // vaddsd    xmm1, xmm0, qword [rcx + 8*rdx]
	LONG $0x117bc1c4; WORD $0xd00c // vmovsd    qword [r8 + 8*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB2_746

LBB2_747:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB2_1109

LBB2_748:
	LONG $0x0c58fbc5; BYTE $0xd1               // vaddsd    xmm1, xmm0, qword [rcx + 8*rdx]
	LONG $0x117bc1c4; WORD $0xd00c             // vmovsd    qword [r8 + 8*rdx], xmm1
	LONG $0x4c58fbc5; WORD $0x08d1             // vaddsd    xmm1, xmm0, qword [rcx + 8*rdx + 8]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x08 // vmovsd    qword [r8 + 8*rdx + 8], xmm1
	LONG $0x4c58fbc5; WORD $0x10d1             // vaddsd    xmm1, xmm0, qword [rcx + 8*rdx + 16]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x10 // vmovsd    qword [r8 + 8*rdx + 16], xmm1
	LONG $0x4c58fbc5; WORD $0x18d1             // vaddsd    xmm1, xmm0, qword [rcx + 8*rdx + 24]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x18 // vmovsd    qword [r8 + 8*rdx + 24], xmm1
	LONG $0x04c28348                           // add    rdx, 4
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JNE  LBB2_748
	JMP  LBB2_1109

LBB2_90:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB2_152
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB2_223
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB2_226
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	LONG $0x0210fbc5         // vmovsd    xmm0, qword [rdx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_96
	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_478
	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_478

LBB2_96:
	WORD $0xd231 // xor    edx, edx

LBB2_753:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_755

LBB2_754:
	LONG $0x0c5cfbc5; BYTE $0xd1   // vsubsd    xmm1, xmm0, qword [rcx + 8*rdx]
	LONG $0x117bc1c4; WORD $0xd00c // vmovsd    qword [r8 + 8*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB2_754

LBB2_755:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB2_1109

LBB2_756:
	LONG $0x0c5cfbc5; BYTE $0xd1               // vsubsd    xmm1, xmm0, qword [rcx + 8*rdx]
	LONG $0x117bc1c4; WORD $0xd00c             // vmovsd    qword [r8 + 8*rdx], xmm1
	LONG $0x4c5cfbc5; WORD $0x08d1             // vsubsd    xmm1, xmm0, qword [rcx + 8*rdx + 8]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x08 // vmovsd    qword [r8 + 8*rdx + 8], xmm1
	LONG $0x4c5cfbc5; WORD $0x10d1             // vsubsd    xmm1, xmm0, qword [rcx + 8*rdx + 16]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x10 // vmovsd    qword [r8 + 8*rdx + 16], xmm1
	LONG $0x4c5cfbc5; WORD $0x18d1             // vsubsd    xmm1, xmm0, qword [rcx + 8*rdx + 24]
	LONG $0x117bc1c4; WORD $0xd04c; BYTE $0x18 // vmovsd    qword [r8 + 8*rdx + 24], xmm1
	LONG $0x04c28348                           // add    rdx, 4
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JNE  LBB2_756
	JMP  LBB2_1109

LBB2_97:
	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
	JE   LBB2_229
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x128a             // mov    dl, byte [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_101
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	JBE  LBB2_481
	LONG $0x10048d4b         // lea    rax, [r8 + r10]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB2_481

LBB2_101:
	WORD $0xff31 // xor    edi, edi

LBB2_627:
	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
	LONG $0x03e68348         // and    rsi, 3
	JE   LBB2_629

LBB2_628:
	LONG $0x3904b60f // movzx    eax, byte [rcx + rdi]
	WORD $0xe2f6     // mul    dl
	LONG $0x38048841 // mov    byte [r8 + rdi], al
	LONG $0x01c78348 // add    rdi, 1
	LONG $0xffc68348 // add    rsi, -1
	JNE  LBB2_628

LBB2_629:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_630:
	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38048841             // mov    byte [r8 + rdi], al
	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
	LONG $0x04c78348             // add    rdi, 4
	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
	JNE  LBB2_630
	JMP  LBB2_1109

LBB2_102:
	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
	JE   LBB2_232
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x128a             // mov    dl, byte [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_106
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	JBE  LBB2_483
	LONG $0x10048d4b         // lea    rax, [r8 + r10]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB2_483

LBB2_106:
	WORD $0xff31 // xor    edi, edi

LBB2_637:
	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
	LONG $0x03e68348         // and    rsi, 3
	JE   LBB2_639

LBB2_638:
	LONG $0x3904b60f // movzx    eax, byte [rcx + rdi]
	WORD $0xe2f6     // mul    dl
	LONG $0x38048841 // mov    byte [r8 + rdi], al
	LONG $0x01c78348 // add    rdi, 1
	LONG $0xffc68348 // add    rsi, -1
	JNE  LBB2_638

LBB2_639:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_640:
	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38048841             // mov    byte [r8 + rdi], al
	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
	LONG $0x04c78348             // add    rdi, 4
	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
	JNE  LBB2_640
	JMP  LBB2_1109

LBB2_107:
	WORD $0xff83; BYTE $0x02                   // cmp    edi, 2
	JE   LBB2_235
	WORD $0xff83; BYTE $0x03                   // cmp    edi, 3
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028a                               // mov    al, byte [rdx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB2_111
	LONG $0x11148d4a                           // lea    rdx, [rcx + r10]
	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
	JBE  LBB2_485
	LONG $0x10148d4b                           // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca                   // cmp    rdx, rcx
	JBE  LBB2_485

LBB2_111:
	WORD $0xf631 // xor    esi, esi

LBB2_761:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_763

LBB2_762:
	LONG $0x3114b60f // movzx    edx, byte [rcx + rsi]
	WORD $0xc200     // add    dl, al
	LONG $0x30148841 // mov    byte [r8 + rsi], dl
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB2_762

LBB2_763:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_764:
	LONG $0x3114b60f             // movzx    edx, byte [rcx + rsi]
	WORD $0xc200                 // add    dl, al
	LONG $0x30148841             // mov    byte [r8 + rsi], dl
	LONG $0x3154b60f; BYTE $0x01 // movzx    edx, byte [rcx + rsi + 1]
	WORD $0xc200                 // add    dl, al
	LONG $0x30548841; BYTE $0x01 // mov    byte [r8 + rsi + 1], dl
	LONG $0x3154b60f; BYTE $0x02 // movzx    edx, byte [rcx + rsi + 2]
	WORD $0xc200                 // add    dl, al
	LONG $0x30548841; BYTE $0x02 // mov    byte [r8 + rsi + 2], dl
	LONG $0x3154b60f; BYTE $0x03 // movzx    edx, byte [rcx + rsi + 3]
	WORD $0xc200                 // add    dl, al
	LONG $0x30548841; BYTE $0x03 // mov    byte [r8 + rsi + 3], dl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_764
	JMP  LBB2_1109

LBB2_112:
	WORD $0xff83; BYTE $0x02                   // cmp    edi, 2
	JE   LBB2_238
	WORD $0xff83; BYTE $0x03                   // cmp    edi, 3
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028a                               // mov    al, byte [rdx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB2_116
	LONG $0x11148d4a                           // lea    rdx, [rcx + r10]
	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
	JBE  LBB2_488
	LONG $0x10148d4b                           // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca                   // cmp    rdx, rcx
	JBE  LBB2_488

LBB2_116:
	WORD $0xf631 // xor    esi, esi

LBB2_769:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_771

LBB2_770:
	WORD $0xc289             // mov    edx, eax
	WORD $0x142a; BYTE $0x31 // sub    dl, byte [rcx + rsi]
	LONG $0x30148841         // mov    byte [r8 + rsi], dl
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_770

LBB2_771:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_772:
	WORD $0xc289                 // mov    edx, eax
	WORD $0x142a; BYTE $0x31     // sub    dl, byte [rcx + rsi]
	LONG $0x30148841             // mov    byte [r8 + rsi], dl
	WORD $0xc289                 // mov    edx, eax
	LONG $0x0131542a             // sub    dl, byte [rcx + rsi + 1]
	LONG $0x30548841; BYTE $0x01 // mov    byte [r8 + rsi + 1], dl
	WORD $0xc289                 // mov    edx, eax
	LONG $0x0231542a             // sub    dl, byte [rcx + rsi + 2]
	LONG $0x30548841; BYTE $0x02 // mov    byte [r8 + rsi + 2], dl
	WORD $0xc289                 // mov    edx, eax
	LONG $0x0331542a             // sub    dl, byte [rcx + rsi + 3]
	LONG $0x30548841; BYTE $0x03 // mov    byte [r8 + rsi + 3], dl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_772
	JMP  LBB2_1109

LBB2_117:
	WORD $0xff83; BYTE $0x02                   // cmp    edi, 2
	JE   LBB2_241
	WORD $0xff83; BYTE $0x03                   // cmp    edi, 3
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028a                               // mov    al, byte [rdx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB2_121
	LONG $0x11148d4a                           // lea    rdx, [rcx + r10]
	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
	JBE  LBB2_491
	LONG $0x10148d4b                           // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca                   // cmp    rdx, rcx
	JBE  LBB2_491

LBB2_121:
	WORD $0xf631 // xor    esi, esi

LBB2_777:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_779

LBB2_778:
	LONG $0x3114b60f // movzx    edx, byte [rcx + rsi]
	WORD $0xc200     // add    dl, al
	LONG $0x30148841 // mov    byte [r8 + rsi], dl
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB2_778

LBB2_779:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_780:
	LONG $0x3114b60f             // movzx    edx, byte [rcx + rsi]
	WORD $0xc200                 // add    dl, al
	LONG $0x30148841             // mov    byte [r8 + rsi], dl
	LONG $0x3154b60f; BYTE $0x01 // movzx    edx, byte [rcx + rsi + 1]
	WORD $0xc200                 // add    dl, al
	LONG $0x30548841; BYTE $0x01 // mov    byte [r8 + rsi + 1], dl
	LONG $0x3154b60f; BYTE $0x02 // movzx    edx, byte [rcx + rsi + 2]
	WORD $0xc200                 // add    dl, al
	LONG $0x30548841; BYTE $0x02 // mov    byte [r8 + rsi + 2], dl
	LONG $0x3154b60f; BYTE $0x03 // movzx    edx, byte [rcx + rsi + 3]
	WORD $0xc200                 // add    dl, al
	LONG $0x30548841; BYTE $0x03 // mov    byte [r8 + rsi + 3], dl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_780
	JMP  LBB2_1109

LBB2_122:
	WORD $0xff83; BYTE $0x02                   // cmp    edi, 2
	JE   LBB2_244
	WORD $0xff83; BYTE $0x03                   // cmp    edi, 3
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028a                               // mov    al, byte [rdx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB2_126
	LONG $0x11148d4a                           // lea    rdx, [rcx + r10]
	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
	JBE  LBB2_494
	LONG $0x10148d4b                           // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca                   // cmp    rdx, rcx
	JBE  LBB2_494

LBB2_126:
	WORD $0xf631 // xor    esi, esi

LBB2_785:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_787

LBB2_786:
	WORD $0xc289             // mov    edx, eax
	WORD $0x142a; BYTE $0x31 // sub    dl, byte [rcx + rsi]
	LONG $0x30148841         // mov    byte [r8 + rsi], dl
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_786

LBB2_787:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_788:
	WORD $0xc289                 // mov    edx, eax
	WORD $0x142a; BYTE $0x31     // sub    dl, byte [rcx + rsi]
	LONG $0x30148841             // mov    byte [r8 + rsi], dl
	WORD $0xc289                 // mov    edx, eax
	LONG $0x0131542a             // sub    dl, byte [rcx + rsi + 1]
	LONG $0x30548841; BYTE $0x01 // mov    byte [r8 + rsi + 1], dl
	WORD $0xc289                 // mov    edx, eax
	LONG $0x0231542a             // sub    dl, byte [rcx + rsi + 2]
	LONG $0x30548841; BYTE $0x02 // mov    byte [r8 + rsi + 2], dl
	WORD $0xc289                 // mov    edx, eax
	LONG $0x0331542a             // sub    dl, byte [rcx + rsi + 3]
	LONG $0x30548841; BYTE $0x03 // mov    byte [r8 + rsi + 3], dl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_788
	JMP  LBB2_1109

LBB2_127:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB2_247
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_131
	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_497
	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_497

LBB2_131:
	WORD $0xf631 // xor    esi, esi

LBB2_793:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_795

LBB2_794:
	LONG $0xf1148b48 // mov    rdx, qword [rcx + 8*rsi]
	LONG $0xd0af0f48 // imul    rdx, rax
	LONG $0xf0148949 // mov    qword [r8 + 8*rsi], rdx
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB2_794

LBB2_795:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_796:
	LONG $0xf1148b48             // mov    rdx, qword [rcx + 8*rsi]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0148949             // mov    qword [r8 + 8*rsi], rdx
	LONG $0xf1548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rsi + 8]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0548949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rdx
	LONG $0xf1548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rsi + 16]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0548949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rdx
	LONG $0xf1548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rsi + 24]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0548949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rdx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_796
	JMP  LBB2_1109

LBB2_132:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB2_250
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_136
	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_500
	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_500

LBB2_136:
	WORD $0xf631 // xor    esi, esi

LBB2_801:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_803

LBB2_802:
	LONG $0xf1148b48 // mov    rdx, qword [rcx + 8*rsi]
	LONG $0xd0af0f48 // imul    rdx, rax
	LONG $0xf0148949 // mov    qword [r8 + 8*rsi], rdx
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB2_802

LBB2_803:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_804:
	LONG $0xf1148b48             // mov    rdx, qword [rcx + 8*rsi]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0148949             // mov    qword [r8 + 8*rsi], rdx
	LONG $0xf1548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rsi + 8]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0548949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rdx
	LONG $0xf1548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rsi + 16]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0548949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rdx
	LONG $0xf1548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rsi + 24]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0548949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rdx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_804
	JMP  LBB2_1109

LBB2_137:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB2_253
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_141
	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_503
	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_503

LBB2_141:
	WORD $0xf631 // xor    esi, esi

LBB2_809:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_811

LBB2_810:
	LONG $0xf1148b48         // mov    rdx, qword [rcx + 8*rsi]
	WORD $0x0148; BYTE $0xc2 // add    rdx, rax
	LONG $0xf0148949         // mov    qword [r8 + 8*rsi], rdx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_810

LBB2_811:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_812:
	LONG $0xf1148b48             // mov    rdx, qword [rcx + 8*rsi]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0148949             // mov    qword [r8 + 8*rsi], rdx
	LONG $0xf1548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rsi + 8]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0548949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rdx
	LONG $0xf1548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rsi + 16]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0548949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rdx
	LONG $0xf1548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rsi + 24]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0548949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rdx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_812
	JMP  LBB2_1109

LBB2_142:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB2_256
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b4c; BYTE $0x1a // mov    r11, qword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_146
	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_506
	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_506

LBB2_146:
	WORD $0xf631 // xor    esi, esi

LBB2_817:
	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
	WORD $0xf748; BYTE $0xd2 // not    rdx
	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_819

LBB2_818:
	WORD $0x894c; BYTE $0xd8 // mov    rax, r11
	LONG $0xf1042b48         // sub    rax, qword [rcx + 8*rsi]
	LONG $0xf0048949         // mov    qword [r8 + 8*rsi], rax
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_818

LBB2_819:
	LONG $0x03fa8348 // cmp    rdx, 3
	JB   LBB2_1109

LBB2_820:
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_820
	JMP  LBB2_1109

LBB2_147:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB2_259
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_151
	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_509
	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_509

LBB2_151:
	WORD $0xf631 // xor    esi, esi

LBB2_825:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_827

LBB2_826:
	LONG $0xf1148b48         // mov    rdx, qword [rcx + 8*rsi]
	WORD $0x0148; BYTE $0xc2 // add    rdx, rax
	LONG $0xf0148949         // mov    qword [r8 + 8*rsi], rdx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_826

LBB2_827:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_828:
	LONG $0xf1148b48             // mov    rdx, qword [rcx + 8*rsi]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0148949             // mov    qword [r8 + 8*rsi], rdx
	LONG $0xf1548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rsi + 8]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0548949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rdx
	LONG $0xf1548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rsi + 16]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0548949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rdx
	LONG $0xf1548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rsi + 24]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0548949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rdx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_828
	JMP  LBB2_1109

LBB2_152:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB2_262
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB2_1109
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b4c; BYTE $0x1a // mov    r11, qword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_156
	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_512
	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_512

LBB2_156:
	WORD $0xf631 // xor    esi, esi

LBB2_833:
	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
	WORD $0xf748; BYTE $0xd2 // not    rdx
	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_835

LBB2_834:
	WORD $0x894c; BYTE $0xd8 // mov    rax, r11
	LONG $0xf1042b48         // sub    rax, qword [rcx + 8*rsi]
	LONG $0xf0048949         // mov    qword [r8 + 8*rsi], rax
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_834

LBB2_835:
	LONG $0x03fa8348 // cmp    rdx, 3
	JB   LBB2_1109

LBB2_836:
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_836
	JMP  LBB2_1109

LBB2_157:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_159
	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_515
	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_515

LBB2_159:
	WORD $0xf631 // xor    esi, esi

LBB2_841:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd2 // mov    rdx, r10
	LONG $0x03e28348         // and    rdx, 3
	JE   LBB2_843

LBB2_842:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	LONG $0xf8af0f66             // imul    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc28348             // add    rdx, -1
	JNE  LBB2_842

LBB2_843:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_844:
	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB2_844
	JMP  LBB2_1109

LBB2_160:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_162
	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_518
	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_518

LBB2_162:
	WORD $0xf631 // xor    esi, esi

LBB2_849:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd2 // mov    rdx, r10
	LONG $0x03e28348         // and    rdx, 3
	JE   LBB2_851

LBB2_850:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	LONG $0xf8af0f66             // imul    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc28348             // add    rdx, -1
	JNE  LBB2_850

LBB2_851:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_852:
	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB2_852
	JMP  LBB2_1109

LBB2_163:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_165
	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_521
	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_521

LBB2_165:
	WORD $0xf631 // xor    esi, esi

LBB2_857:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd2 // mov    rdx, r10
	LONG $0x03e28348         // and    rdx, 3
	JE   LBB2_859

LBB2_858:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	LONG $0xf8af0f66             // imul    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc28348             // add    rdx, -1
	JNE  LBB2_858

LBB2_859:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_860:
	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB2_860
	JMP  LBB2_1109

LBB2_166:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_168
	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_524
	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_524

LBB2_168:
	WORD $0xf631 // xor    esi, esi

LBB2_865:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd2 // mov    rdx, r10
	LONG $0x03e28348         // and    rdx, 3
	JE   LBB2_867

LBB2_866:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	LONG $0xf8af0f66             // imul    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc28348             // add    rdx, -1
	JNE  LBB2_866

LBB2_867:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_868:
	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
	LONG $0xd0af0f66               // imul    dx, ax
	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB2_868
	JMP  LBB2_1109

LBB2_169:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_171
	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_527
	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_527

LBB2_171:
	WORD $0xf631 // xor    esi, esi

LBB2_873:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd2 // mov    rdx, r10
	LONG $0x03e28348         // and    rdx, 3
	JE   LBB2_875

LBB2_874:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	WORD $0x0166; BYTE $0xc7     // add    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc28348             // add    rdx, -1
	JNE  LBB2_874

LBB2_875:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_876:
	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB2_876
	JMP  LBB2_1109

LBB2_172:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_174
	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_530
	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_530

LBB2_174:
	WORD $0xf631 // xor    esi, esi

LBB2_881:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd2 // mov    rdx, r10
	LONG $0x03e28348         // and    rdx, 3
	JE   LBB2_883

LBB2_882:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	WORD $0x0166; BYTE $0xc7     // add    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc28348             // add    rdx, -1
	JNE  LBB2_882

LBB2_883:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_884:
	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB2_884
	JMP  LBB2_1109

LBB2_175:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_177
	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_533
	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_533

LBB2_177:
	WORD $0xf631 // xor    esi, esi

LBB2_889:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd2 // mov    rdx, r10
	LONG $0x03e28348         // and    rdx, 3
	JE   LBB2_891

LBB2_890:
	WORD $0xc789                 // mov    edi, eax
	LONG $0x713c2b66             // sub    di, word [rcx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc28348             // add    rdx, -1
	JNE  LBB2_890

LBB2_891:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_892:
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71142b66               // sub    dx, word [rcx + 2*rsi]
	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71542b66; BYTE $0x02   // sub    dx, word [rcx + 2*rsi + 2]
	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71542b66; BYTE $0x04   // sub    dx, word [rcx + 2*rsi + 4]
	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71542b66; BYTE $0x06   // sub    dx, word [rcx + 2*rsi + 6]
	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB2_892
	JMP  LBB2_1109

LBB2_178:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_180
	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_536
	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_536

LBB2_180:
	WORD $0xf631 // xor    esi, esi

LBB2_897:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd2 // mov    rdx, r10
	LONG $0x03e28348         // and    rdx, 3
	JE   LBB2_899

LBB2_898:
	WORD $0xc789                 // mov    edi, eax
	LONG $0x713c2b66             // sub    di, word [rcx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc28348             // add    rdx, -1
	JNE  LBB2_898

LBB2_899:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_900:
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71142b66               // sub    dx, word [rcx + 2*rsi]
	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71542b66; BYTE $0x02   // sub    dx, word [rcx + 2*rsi + 2]
	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71542b66; BYTE $0x04   // sub    dx, word [rcx + 2*rsi + 4]
	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71542b66; BYTE $0x06   // sub    dx, word [rcx + 2*rsi + 6]
	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB2_900
	JMP  LBB2_1109

LBB2_181:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_183
	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_539
	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_539

LBB2_183:
	WORD $0xf631 // xor    esi, esi

LBB2_905:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd2 // mov    rdx, r10
	LONG $0x03e28348         // and    rdx, 3
	JE   LBB2_907

LBB2_906:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	WORD $0x0166; BYTE $0xc7     // add    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc28348             // add    rdx, -1
	JNE  LBB2_906

LBB2_907:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_908:
	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB2_908
	JMP  LBB2_1109

LBB2_184:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_186
	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_542
	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_542

LBB2_186:
	WORD $0xf631 // xor    esi, esi

LBB2_913:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd2 // mov    rdx, r10
	LONG $0x03e28348         // and    rdx, 3
	JE   LBB2_915

LBB2_914:
	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
	WORD $0x0166; BYTE $0xc7     // add    di, ax
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc28348             // add    rdx, -1
	JNE  LBB2_914

LBB2_915:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_916:
	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
	WORD $0x0166; BYTE $0xc2       // add    dx, ax
	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB2_916
	JMP  LBB2_1109

LBB2_187:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_189
	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_545
	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_545

LBB2_189:
	WORD $0xf631 // xor    esi, esi

LBB2_921:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd2 // mov    rdx, r10
	LONG $0x03e28348         // and    rdx, 3
	JE   LBB2_923

LBB2_922:
	WORD $0xc789                 // mov    edi, eax
	LONG $0x713c2b66             // sub    di, word [rcx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc28348             // add    rdx, -1
	JNE  LBB2_922

LBB2_923:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_924:
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71142b66               // sub    dx, word [rcx + 2*rsi]
	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71542b66; BYTE $0x02   // sub    dx, word [rcx + 2*rsi + 2]
	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71542b66; BYTE $0x04   // sub    dx, word [rcx + 2*rsi + 4]
	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71542b66; BYTE $0x06   // sub    dx, word [rcx + 2*rsi + 6]
	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB2_924
	JMP  LBB2_1109

LBB2_190:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_192
	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_548
	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_548

LBB2_192:
	WORD $0xf631 // xor    esi, esi

LBB2_929:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd2 // mov    rdx, r10
	LONG $0x03e28348         // and    rdx, 3
	JE   LBB2_931

LBB2_930:
	WORD $0xc789                 // mov    edi, eax
	LONG $0x713c2b66             // sub    di, word [rcx + 2*rsi]
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc28348             // add    rdx, -1
	JNE  LBB2_930

LBB2_931:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_932:
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71142b66               // sub    dx, word [rcx + 2*rsi]
	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71542b66; BYTE $0x02   // sub    dx, word [rcx + 2*rsi + 2]
	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71542b66; BYTE $0x04   // sub    dx, word [rcx + 2*rsi + 4]
	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
	WORD $0xc289                   // mov    edx, eax
	LONG $0x71542b66; BYTE $0x06   // sub    dx, word [rcx + 2*rsi + 6]
	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
	JNE  LBB2_932
	JMP  LBB2_1109

LBB2_193:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_195
	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_551
	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_551

LBB2_195:
	WORD $0xf631 // xor    esi, esi

LBB2_937:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_939

LBB2_938:
	LONG $0xf1148b48 // mov    rdx, qword [rcx + 8*rsi]
	LONG $0xd0af0f48 // imul    rdx, rax
	LONG $0xf0148949 // mov    qword [r8 + 8*rsi], rdx
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB2_938

LBB2_939:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_940:
	LONG $0xf1148b48             // mov    rdx, qword [rcx + 8*rsi]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0148949             // mov    qword [r8 + 8*rsi], rdx
	LONG $0xf1548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rsi + 8]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0548949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rdx
	LONG $0xf1548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rsi + 16]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0548949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rdx
	LONG $0xf1548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rsi + 24]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0548949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rdx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_940
	JMP  LBB2_1109

LBB2_196:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	LONG $0x0210fac5         // vmovss    xmm0, dword [rdx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_198
	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_554
	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_554

LBB2_198:
	WORD $0xd231 // xor    edx, edx

LBB2_945:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_947

LBB2_946:
	LONG $0x0c59fac5; BYTE $0x91   // vmulss    xmm1, xmm0, dword [rcx + 4*rdx]
	LONG $0x117ac1c4; WORD $0x900c // vmovss    dword [r8 + 4*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB2_946

LBB2_947:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB2_1109

LBB2_948:
	LONG $0x0c59fac5; BYTE $0x91               // vmulss    xmm1, xmm0, dword [rcx + 4*rdx]
	LONG $0x117ac1c4; WORD $0x900c             // vmovss    dword [r8 + 4*rdx], xmm1
	LONG $0x4c59fac5; WORD $0x0491             // vmulss    xmm1, xmm0, dword [rcx + 4*rdx + 4]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x04 // vmovss    dword [r8 + 4*rdx + 4], xmm1
	LONG $0x4c59fac5; WORD $0x0891             // vmulss    xmm1, xmm0, dword [rcx + 4*rdx + 8]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x08 // vmovss    dword [r8 + 4*rdx + 8], xmm1
	LONG $0x4c59fac5; WORD $0x0c91             // vmulss    xmm1, xmm0, dword [rcx + 4*rdx + 12]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x0c // vmovss    dword [r8 + 4*rdx + 12], xmm1
	LONG $0x04c28348                           // add    rdx, 4
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JNE  LBB2_948
	JMP  LBB2_1109

LBB2_199:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_201
	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_557
	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_557

LBB2_201:
	WORD $0xf631 // xor    esi, esi

LBB2_953:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_955

LBB2_954:
	LONG $0xf1148b48 // mov    rdx, qword [rcx + 8*rsi]
	LONG $0xd0af0f48 // imul    rdx, rax
	LONG $0xf0148949 // mov    qword [r8 + 8*rsi], rdx
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB2_954

LBB2_955:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_956:
	LONG $0xf1148b48             // mov    rdx, qword [rcx + 8*rsi]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0148949             // mov    qword [r8 + 8*rsi], rdx
	LONG $0xf1548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rsi + 8]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0548949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rdx
	LONG $0xf1548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rsi + 16]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0548949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rdx
	LONG $0xf1548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rsi + 24]
	LONG $0xd0af0f48             // imul    rdx, rax
	LONG $0xf0548949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rdx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_956
	JMP  LBB2_1109

LBB2_202:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	LONG $0x0210fac5         // vmovss    xmm0, dword [rdx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_204
	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_560
	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_560

LBB2_204:
	WORD $0xd231 // xor    edx, edx

LBB2_961:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_963

LBB2_962:
	LONG $0x0c59fac5; BYTE $0x91   // vmulss    xmm1, xmm0, dword [rcx + 4*rdx]
	LONG $0x117ac1c4; WORD $0x900c // vmovss    dword [r8 + 4*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB2_962

LBB2_963:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB2_1109

LBB2_964:
	LONG $0x0c59fac5; BYTE $0x91               // vmulss    xmm1, xmm0, dword [rcx + 4*rdx]
	LONG $0x117ac1c4; WORD $0x900c             // vmovss    dword [r8 + 4*rdx], xmm1
	LONG $0x4c59fac5; WORD $0x0491             // vmulss    xmm1, xmm0, dword [rcx + 4*rdx + 4]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x04 // vmovss    dword [r8 + 4*rdx + 4], xmm1
	LONG $0x4c59fac5; WORD $0x0891             // vmulss    xmm1, xmm0, dword [rcx + 4*rdx + 8]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x08 // vmovss    dword [r8 + 4*rdx + 8], xmm1
	LONG $0x4c59fac5; WORD $0x0c91             // vmulss    xmm1, xmm0, dword [rcx + 4*rdx + 12]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x0c // vmovss    dword [r8 + 4*rdx + 12], xmm1
	LONG $0x04c28348                           // add    rdx, 4
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JNE  LBB2_964
	JMP  LBB2_1109

LBB2_205:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_207
	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_563
	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_563

LBB2_207:
	WORD $0xf631 // xor    esi, esi

LBB2_969:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_971

LBB2_970:
	LONG $0xf1148b48         // mov    rdx, qword [rcx + 8*rsi]
	WORD $0x0148; BYTE $0xc2 // add    rdx, rax
	LONG $0xf0148949         // mov    qword [r8 + 8*rsi], rdx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_970

LBB2_971:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_972:
	LONG $0xf1148b48             // mov    rdx, qword [rcx + 8*rsi]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0148949             // mov    qword [r8 + 8*rsi], rdx
	LONG $0xf1548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rsi + 8]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0548949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rdx
	LONG $0xf1548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rsi + 16]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0548949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rdx
	LONG $0xf1548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rsi + 24]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0548949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rdx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_972
	JMP  LBB2_1109

LBB2_208:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	LONG $0x0210fac5         // vmovss    xmm0, dword [rdx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_210
	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_566
	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_566

LBB2_210:
	WORD $0xd231 // xor    edx, edx

LBB2_977:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_979

LBB2_978:
	LONG $0x0c58fac5; BYTE $0x91   // vaddss    xmm1, xmm0, dword [rcx + 4*rdx]
	LONG $0x117ac1c4; WORD $0x900c // vmovss    dword [r8 + 4*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB2_978

LBB2_979:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB2_1109

LBB2_980:
	LONG $0x0c58fac5; BYTE $0x91               // vaddss    xmm1, xmm0, dword [rcx + 4*rdx]
	LONG $0x117ac1c4; WORD $0x900c             // vmovss    dword [r8 + 4*rdx], xmm1
	LONG $0x4c58fac5; WORD $0x0491             // vaddss    xmm1, xmm0, dword [rcx + 4*rdx + 4]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x04 // vmovss    dword [r8 + 4*rdx + 4], xmm1
	LONG $0x4c58fac5; WORD $0x0891             // vaddss    xmm1, xmm0, dword [rcx + 4*rdx + 8]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x08 // vmovss    dword [r8 + 4*rdx + 8], xmm1
	LONG $0x4c58fac5; WORD $0x0c91             // vaddss    xmm1, xmm0, dword [rcx + 4*rdx + 12]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x0c // vmovss    dword [r8 + 4*rdx + 12], xmm1
	LONG $0x04c28348                           // add    rdx, 4
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JNE  LBB2_980
	JMP  LBB2_1109

LBB2_211:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b4c; BYTE $0x1a // mov    r11, qword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_213
	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_569
	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_569

LBB2_213:
	WORD $0xf631 // xor    esi, esi

LBB2_985:
	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
	WORD $0xf748; BYTE $0xd2 // not    rdx
	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_987

LBB2_986:
	WORD $0x894c; BYTE $0xd8 // mov    rax, r11
	LONG $0xf1042b48         // sub    rax, qword [rcx + 8*rsi]
	LONG $0xf0048949         // mov    qword [r8 + 8*rsi], rax
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_986

LBB2_987:
	LONG $0x03fa8348 // cmp    rdx, 3
	JB   LBB2_1109

LBB2_988:
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_988
	JMP  LBB2_1109

LBB2_214:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	LONG $0x0210fac5         // vmovss    xmm0, dword [rdx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_216
	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_572
	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_572

LBB2_216:
	WORD $0xd231 // xor    edx, edx

LBB2_993:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_995

LBB2_994:
	LONG $0x0c5cfac5; BYTE $0x91   // vsubss    xmm1, xmm0, dword [rcx + 4*rdx]
	LONG $0x117ac1c4; WORD $0x900c // vmovss    dword [r8 + 4*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB2_994

LBB2_995:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB2_1109

LBB2_996:
	LONG $0x0c5cfac5; BYTE $0x91               // vsubss    xmm1, xmm0, dword [rcx + 4*rdx]
	LONG $0x117ac1c4; WORD $0x900c             // vmovss    dword [r8 + 4*rdx], xmm1
	LONG $0x4c5cfac5; WORD $0x0491             // vsubss    xmm1, xmm0, dword [rcx + 4*rdx + 4]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x04 // vmovss    dword [r8 + 4*rdx + 4], xmm1
	LONG $0x4c5cfac5; WORD $0x0891             // vsubss    xmm1, xmm0, dword [rcx + 4*rdx + 8]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x08 // vmovss    dword [r8 + 4*rdx + 8], xmm1
	LONG $0x4c5cfac5; WORD $0x0c91             // vsubss    xmm1, xmm0, dword [rcx + 4*rdx + 12]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x0c // vmovss    dword [r8 + 4*rdx + 12], xmm1
	LONG $0x04c28348                           // add    rdx, 4
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JNE  LBB2_996
	JMP  LBB2_1109

LBB2_217:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_219
	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_575
	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_575

LBB2_219:
	WORD $0xf631 // xor    esi, esi

LBB2_1001:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_1003

LBB2_1002:
	LONG $0xf1148b48         // mov    rdx, qword [rcx + 8*rsi]
	WORD $0x0148; BYTE $0xc2 // add    rdx, rax
	LONG $0xf0148949         // mov    qword [r8 + 8*rsi], rdx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_1002

LBB2_1003:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_1004:
	LONG $0xf1148b48             // mov    rdx, qword [rcx + 8*rsi]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0148949             // mov    qword [r8 + 8*rsi], rdx
	LONG $0xf1548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rsi + 8]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0548949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rdx
	LONG $0xf1548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rsi + 16]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0548949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rdx
	LONG $0xf1548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rsi + 24]
	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
	LONG $0xf0548949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rdx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_1004
	JMP  LBB2_1109

LBB2_220:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	LONG $0x0210fac5         // vmovss    xmm0, dword [rdx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_222
	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_578
	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_578

LBB2_222:
	WORD $0xd231 // xor    edx, edx

LBB2_1009:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_1011

LBB2_1010:
	LONG $0x0c58fac5; BYTE $0x91   // vaddss    xmm1, xmm0, dword [rcx + 4*rdx]
	LONG $0x117ac1c4; WORD $0x900c // vmovss    dword [r8 + 4*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB2_1010

LBB2_1011:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB2_1109

LBB2_1012:
	LONG $0x0c58fac5; BYTE $0x91               // vaddss    xmm1, xmm0, dword [rcx + 4*rdx]
	LONG $0x117ac1c4; WORD $0x900c             // vmovss    dword [r8 + 4*rdx], xmm1
	LONG $0x4c58fac5; WORD $0x0491             // vaddss    xmm1, xmm0, dword [rcx + 4*rdx + 4]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x04 // vmovss    dword [r8 + 4*rdx + 4], xmm1
	LONG $0x4c58fac5; WORD $0x0891             // vaddss    xmm1, xmm0, dword [rcx + 4*rdx + 8]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x08 // vmovss    dword [r8 + 4*rdx + 8], xmm1
	LONG $0x4c58fac5; WORD $0x0c91             // vaddss    xmm1, xmm0, dword [rcx + 4*rdx + 12]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x0c // vmovss    dword [r8 + 4*rdx + 12], xmm1
	LONG $0x04c28348                           // add    rdx, 4
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JNE  LBB2_1012
	JMP  LBB2_1109

LBB2_223:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b4c; BYTE $0x1a // mov    r11, qword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB2_225
	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_581
	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_581

LBB2_225:
	WORD $0xf631 // xor    esi, esi

LBB2_1017:
	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
	WORD $0xf748; BYTE $0xd2 // not    rdx
	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_1019

LBB2_1018:
	WORD $0x894c; BYTE $0xd8 // mov    rax, r11
	LONG $0xf1042b48         // sub    rax, qword [rcx + 8*rsi]
	LONG $0xf0048949         // mov    qword [r8 + 8*rsi], rax
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_1018

LBB2_1019:
	LONG $0x03fa8348 // cmp    rdx, 3
	JB   LBB2_1109

LBB2_1020:
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_1020
	JMP  LBB2_1109

LBB2_226:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	LONG $0x0210fac5         // vmovss    xmm0, dword [rdx]
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_228
	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_584
	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_584

LBB2_228:
	WORD $0xd231 // xor    edx, edx

LBB2_1025:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_1027

LBB2_1026:
	LONG $0x0c5cfac5; BYTE $0x91   // vsubss    xmm1, xmm0, dword [rcx + 4*rdx]
	LONG $0x117ac1c4; WORD $0x900c // vmovss    dword [r8 + 4*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	LONG $0xffc78348               // add    rdi, -1
	JNE  LBB2_1026

LBB2_1027:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB2_1109

LBB2_1028:
	LONG $0x0c5cfac5; BYTE $0x91               // vsubss    xmm1, xmm0, dword [rcx + 4*rdx]
	LONG $0x117ac1c4; WORD $0x900c             // vmovss    dword [r8 + 4*rdx], xmm1
	LONG $0x4c5cfac5; WORD $0x0491             // vsubss    xmm1, xmm0, dword [rcx + 4*rdx + 4]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x04 // vmovss    dword [r8 + 4*rdx + 4], xmm1
	LONG $0x4c5cfac5; WORD $0x0891             // vsubss    xmm1, xmm0, dword [rcx + 4*rdx + 8]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x08 // vmovss    dword [r8 + 4*rdx + 8], xmm1
	LONG $0x4c5cfac5; WORD $0x0c91             // vsubss    xmm1, xmm0, dword [rcx + 4*rdx + 12]
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x0c // vmovss    dword [r8 + 4*rdx + 12], xmm1
	LONG $0x04c28348                           // add    rdx, 4
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JNE  LBB2_1028
	JMP  LBB2_1109

LBB2_229:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x128a             // mov    dl, byte [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_231
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	JBE  LBB2_587
	LONG $0x10048d4b         // lea    rax, [r8 + r10]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB2_587

LBB2_231:
	WORD $0xff31 // xor    edi, edi

LBB2_647:
	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
	LONG $0x03e68348         // and    rsi, 3
	JE   LBB2_649

LBB2_648:
	LONG $0x3904b60f // movzx    eax, byte [rcx + rdi]
	WORD $0xe2f6     // mul    dl
	LONG $0x38048841 // mov    byte [r8 + rdi], al
	LONG $0x01c78348 // add    rdi, 1
	LONG $0xffc68348 // add    rsi, -1
	JNE  LBB2_648

LBB2_649:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_650:
	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38048841             // mov    byte [r8 + rdi], al
	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
	LONG $0x04c78348             // add    rdi, 4
	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
	JNE  LBB2_650
	JMP  LBB2_1109

LBB2_232:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x128a             // mov    dl, byte [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_234
	LONG $0x11048d4a         // lea    rax, [rcx + r10]
	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
	JBE  LBB2_589
	LONG $0x10048d4b         // lea    rax, [r8 + r10]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB2_589

LBB2_234:
	WORD $0xff31 // xor    edi, edi

LBB2_657:
	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
	LONG $0x03e68348         // and    rsi, 3
	JE   LBB2_659

LBB2_658:
	LONG $0x3904b60f // movzx    eax, byte [rcx + rdi]
	WORD $0xe2f6     // mul    dl
	LONG $0x38048841 // mov    byte [r8 + rdi], al
	LONG $0x01c78348 // add    rdi, 1
	LONG $0xffc68348 // add    rsi, -1
	JNE  LBB2_658

LBB2_659:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_660:
	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38048841             // mov    byte [r8 + rdi], al
	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
	WORD $0xe2f6                 // mul    dl
	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
	LONG $0x04c78348             // add    rdi, 4
	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
	JNE  LBB2_660
	JMP  LBB2_1109

LBB2_235:
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028a                               // mov    al, byte [rdx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB2_237
	LONG $0x11148d4a                           // lea    rdx, [rcx + r10]
	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
	JBE  LBB2_591
	LONG $0x10148d4b                           // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca                   // cmp    rdx, rcx
	JBE  LBB2_591

LBB2_237:
	WORD $0xf631 // xor    esi, esi

LBB2_1033:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_1035

LBB2_1034:
	LONG $0x3114b60f // movzx    edx, byte [rcx + rsi]
	WORD $0xc200     // add    dl, al
	LONG $0x30148841 // mov    byte [r8 + rsi], dl
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB2_1034

LBB2_1035:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_1036:
	LONG $0x3114b60f             // movzx    edx, byte [rcx + rsi]
	WORD $0xc200                 // add    dl, al
	LONG $0x30148841             // mov    byte [r8 + rsi], dl
	LONG $0x3154b60f; BYTE $0x01 // movzx    edx, byte [rcx + rsi + 1]
	WORD $0xc200                 // add    dl, al
	LONG $0x30548841; BYTE $0x01 // mov    byte [r8 + rsi + 1], dl
	LONG $0x3154b60f; BYTE $0x02 // movzx    edx, byte [rcx + rsi + 2]
	WORD $0xc200                 // add    dl, al
	LONG $0x30548841; BYTE $0x02 // mov    byte [r8 + rsi + 2], dl
	LONG $0x3154b60f; BYTE $0x03 // movzx    edx, byte [rcx + rsi + 3]
	WORD $0xc200                 // add    dl, al
	LONG $0x30548841; BYTE $0x03 // mov    byte [r8 + rsi + 3], dl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_1036
	JMP  LBB2_1109

LBB2_238:
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028a                               // mov    al, byte [rdx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB2_240
	LONG $0x11148d4a                           // lea    rdx, [rcx + r10]
	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
	JBE  LBB2_594
	LONG $0x10148d4b                           // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca                   // cmp    rdx, rcx
	JBE  LBB2_594

LBB2_240:
	WORD $0xf631 // xor    esi, esi

LBB2_1041:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_1043

LBB2_1042:
	WORD $0xc289             // mov    edx, eax
	WORD $0x142a; BYTE $0x31 // sub    dl, byte [rcx + rsi]
	LONG $0x30148841         // mov    byte [r8 + rsi], dl
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_1042

LBB2_1043:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_1044:
	WORD $0xc289                 // mov    edx, eax
	WORD $0x142a; BYTE $0x31     // sub    dl, byte [rcx + rsi]
	LONG $0x30148841             // mov    byte [r8 + rsi], dl
	WORD $0xc289                 // mov    edx, eax
	LONG $0x0131542a             // sub    dl, byte [rcx + rsi + 1]
	LONG $0x30548841; BYTE $0x01 // mov    byte [r8 + rsi + 1], dl
	WORD $0xc289                 // mov    edx, eax
	LONG $0x0231542a             // sub    dl, byte [rcx + rsi + 2]
	LONG $0x30548841; BYTE $0x02 // mov    byte [r8 + rsi + 2], dl
	WORD $0xc289                 // mov    edx, eax
	LONG $0x0331542a             // sub    dl, byte [rcx + rsi + 3]
	LONG $0x30548841; BYTE $0x03 // mov    byte [r8 + rsi + 3], dl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_1044
	JMP  LBB2_1109

LBB2_241:
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028a                               // mov    al, byte [rdx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB2_243
	LONG $0x11148d4a                           // lea    rdx, [rcx + r10]
	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
	JBE  LBB2_597
	LONG $0x10148d4b                           // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca                   // cmp    rdx, rcx
	JBE  LBB2_597

LBB2_243:
	WORD $0xf631 // xor    esi, esi

LBB2_1049:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_1051

LBB2_1050:
	LONG $0x3114b60f // movzx    edx, byte [rcx + rsi]
	WORD $0xc200     // add    dl, al
	LONG $0x30148841 // mov    byte [r8 + rsi], dl
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB2_1050

LBB2_1051:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_1052:
	LONG $0x3114b60f             // movzx    edx, byte [rcx + rsi]
	WORD $0xc200                 // add    dl, al
	LONG $0x30148841             // mov    byte [r8 + rsi], dl
	LONG $0x3154b60f; BYTE $0x01 // movzx    edx, byte [rcx + rsi + 1]
	WORD $0xc200                 // add    dl, al
	LONG $0x30548841; BYTE $0x01 // mov    byte [r8 + rsi + 1], dl
	LONG $0x3154b60f; BYTE $0x02 // movzx    edx, byte [rcx + rsi + 2]
	WORD $0xc200                 // add    dl, al
	LONG $0x30548841; BYTE $0x02 // mov    byte [r8 + rsi + 2], dl
	LONG $0x3154b60f; BYTE $0x03 // movzx    edx, byte [rcx + rsi + 3]
	WORD $0xc200                 // add    dl, al
	LONG $0x30548841; BYTE $0x03 // mov    byte [r8 + rsi + 3], dl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_1052
	JMP  LBB2_1109

LBB2_244:
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028a                               // mov    al, byte [rdx]
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB2_246
	LONG $0x11148d4a                           // lea    rdx, [rcx + r10]
	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
	JBE  LBB2_600
	LONG $0x10148d4b                           // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca                   // cmp    rdx, rcx
	JBE  LBB2_600

LBB2_246:
	WORD $0xf631 // xor    esi, esi

LBB2_1057:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_1059

LBB2_1058:
	WORD $0xc289             // mov    edx, eax
	WORD $0x142a; BYTE $0x31 // sub    dl, byte [rcx + rsi]
	LONG $0x30148841         // mov    byte [r8 + rsi], dl
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_1058

LBB2_1059:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_1060:
	WORD $0xc289                 // mov    edx, eax
	WORD $0x142a; BYTE $0x31     // sub    dl, byte [rcx + rsi]
	LONG $0x30148841             // mov    byte [r8 + rsi], dl
	WORD $0xc289                 // mov    edx, eax
	LONG $0x0131542a             // sub    dl, byte [rcx + rsi + 1]
	LONG $0x30548841; BYTE $0x01 // mov    byte [r8 + rsi + 1], dl
	WORD $0xc289                 // mov    edx, eax
	LONG $0x0231542a             // sub    dl, byte [rcx + rsi + 2]
	LONG $0x30548841; BYTE $0x02 // mov    byte [r8 + rsi + 2], dl
	WORD $0xc289                 // mov    edx, eax
	LONG $0x0331542a             // sub    dl, byte [rcx + rsi + 3]
	LONG $0x30548841; BYTE $0x03 // mov    byte [r8 + rsi + 3], dl
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_1060
	JMP  LBB2_1109

LBB2_247:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028b             // mov    eax, dword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_249
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_603
	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_603

LBB2_249:
	WORD $0xf631 // xor    esi, esi

LBB2_1065:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_1067

LBB2_1066:
	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
	WORD $0xaf0f; BYTE $0xd0 // imul    edx, eax
	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_1066

LBB2_1067:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_1068:
	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_1068
	JMP  LBB2_1109

LBB2_250:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028b             // mov    eax, dword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_252
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_606
	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_606

LBB2_252:
	WORD $0xf631 // xor    esi, esi

LBB2_1073:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_1075

LBB2_1074:
	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
	WORD $0xaf0f; BYTE $0xd0 // imul    edx, eax
	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_1074

LBB2_1075:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_1076:
	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_1076
	JMP  LBB2_1109

LBB2_253:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028b             // mov    eax, dword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_255
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_609
	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_609

LBB2_255:
	WORD $0xf631 // xor    esi, esi

LBB2_1081:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_1083

LBB2_1082:
	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
	WORD $0xc201             // add    edx, eax
	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_1082

LBB2_1083:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_1084:
	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_1084
	JMP  LBB2_1109

LBB2_256:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b44; BYTE $0x1a // mov    r11d, dword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_258
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_612
	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_612

LBB2_258:
	WORD $0xf631 // xor    esi, esi

LBB2_1089:
	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
	WORD $0xf748; BYTE $0xd2 // not    rdx
	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_1091

LBB2_1090:
	WORD $0x8944; BYTE $0xd8 // mov    eax, r11d
	WORD $0x042b; BYTE $0xb1 // sub    eax, dword [rcx + 4*rsi]
	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_1090

LBB2_1091:
	LONG $0x03fa8348 // cmp    rdx, 3
	JB   LBB2_1109

LBB2_1092:
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_1092
	JMP  LBB2_1109

LBB2_259:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x028b             // mov    eax, dword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_261
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_615
	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_615

LBB2_261:
	WORD $0xf631 // xor    esi, esi

LBB2_1097:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_1099

LBB2_1098:
	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
	WORD $0xc201             // add    edx, eax
	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_1098

LBB2_1099:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB2_1109

LBB2_1100:
	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
	WORD $0xc201                 // add    edx, eax
	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_1100
	JMP  LBB2_1109

LBB2_262:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB2_1109
	WORD $0x8b44; BYTE $0x1a // mov    r11d, dword [rdx]
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB2_264
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB2_618
	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB2_618

LBB2_264:
	WORD $0xf631 // xor    esi, esi

LBB2_1105:
	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
	WORD $0xf748; BYTE $0xd2 // not    rdx
	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB2_1107

LBB2_1106:
	WORD $0x8944; BYTE $0xd8 // mov    eax, r11d
	WORD $0x042b; BYTE $0xb1 // sub    eax, dword [rcx + 4*rsi]
	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB2_1106

LBB2_1107:
	LONG $0x03fa8348 // cmp    rdx, 3
	JB   LBB2_1109

LBB2_1108:
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB2_1108
	JMP  LBB2_1109

LBB2_445:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_661
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_447:
	LONG $0x407de2c4; WORD $0xb90c             // vpmulld    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x407de2c4; WORD $0xb954; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xb95c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xb964; BYTE $0x60 // vpmulld    ymm4, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x0080b98c407de2c4; WORD $0x0000     // vpmulld    ymm1, ymm0, yword [rcx + 4*rdi + 128]
	QUAD $0x00a0b994407de2c4; WORD $0x0000     // vpmulld    ymm2, ymm0, yword [rcx + 4*rdi + 160]
	QUAD $0x00c0b99c407de2c4; WORD $0x0000     // vpmulld    ymm3, ymm0, yword [rcx + 4*rdi + 192]
	QUAD $0x00e0b9a4407de2c4; WORD $0x0000     // vpmulld    ymm4, ymm0, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_447
	JMP  LBB2_662

LBB2_448:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_669
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_450:
	LONG $0x407de2c4; WORD $0xb90c             // vpmulld    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x407de2c4; WORD $0xb954; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xb95c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xb964; BYTE $0x60 // vpmulld    ymm4, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x0080b98c407de2c4; WORD $0x0000     // vpmulld    ymm1, ymm0, yword [rcx + 4*rdi + 128]
	QUAD $0x00a0b994407de2c4; WORD $0x0000     // vpmulld    ymm2, ymm0, yword [rcx + 4*rdi + 160]
	QUAD $0x00c0b99c407de2c4; WORD $0x0000     // vpmulld    ymm3, ymm0, yword [rcx + 4*rdi + 192]
	QUAD $0x00e0b9a4407de2c4; WORD $0x0000     // vpmulld    ymm4, ymm0, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_450
	JMP  LBB2_670

LBB2_451:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_677
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_453:
	LONG $0x0cfefdc5; BYTE $0xb9               // vpaddd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20b9             // vpaddd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40b9             // vpaddd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x64fefdc5; WORD $0x60b9             // vpaddd    ymm4, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080b98cfefdc5; BYTE $0x00       // vpaddd    ymm1, ymm0, yword [rcx + 4*rdi + 128]
	QUAD $0x0000a0b994fefdc5; BYTE $0x00       // vpaddd    ymm2, ymm0, yword [rcx + 4*rdi + 160]
	QUAD $0x0000c0b99cfefdc5; BYTE $0x00       // vpaddd    ymm3, ymm0, yword [rcx + 4*rdi + 192]
	QUAD $0x0000e0b9a4fefdc5; BYTE $0x00       // vpaddd    ymm4, ymm0, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_453
	JMP  LBB2_678

LBB2_454:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0x6e79c1c4; BYTE $0xc3 // vmovd    xmm0, r11d
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_685
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_456:
	LONG $0x0cfafdc5; BYTE $0xb9               // vpsubd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fafdc5; WORD $0x20b9             // vpsubd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfafdc5; WORD $0x40b9             // vpsubd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x64fafdc5; WORD $0x60b9             // vpsubd    ymm4, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080b98cfafdc5; BYTE $0x00       // vpsubd    ymm1, ymm0, yword [rcx + 4*rdi + 128]
	QUAD $0x0000a0b994fafdc5; BYTE $0x00       // vpsubd    ymm2, ymm0, yword [rcx + 4*rdi + 160]
	QUAD $0x0000c0b99cfafdc5; BYTE $0x00       // vpsubd    ymm3, ymm0, yword [rcx + 4*rdi + 192]
	QUAD $0x0000e0b9a4fafdc5; BYTE $0x00       // vpsubd    ymm4, ymm0, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_456
	JMP  LBB2_686

LBB2_457:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_693
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_459:
	LONG $0x0cfefdc5; BYTE $0xb9               // vpaddd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20b9             // vpaddd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40b9             // vpaddd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x64fefdc5; WORD $0x60b9             // vpaddd    ymm4, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080b98cfefdc5; BYTE $0x00       // vpaddd    ymm1, ymm0, yword [rcx + 4*rdi + 128]
	QUAD $0x0000a0b994fefdc5; BYTE $0x00       // vpaddd    ymm2, ymm0, yword [rcx + 4*rdi + 160]
	QUAD $0x0000c0b99cfefdc5; BYTE $0x00       // vpaddd    ymm3, ymm0, yword [rcx + 4*rdi + 192]
	QUAD $0x0000e0b9a4fefdc5; BYTE $0x00       // vpaddd    ymm4, ymm0, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_459
	JMP  LBB2_694

LBB2_460:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0x6e79c1c4; BYTE $0xc3 // vmovd    xmm0, r11d
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_701
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_462:
	LONG $0x0cfafdc5; BYTE $0xb9               // vpsubd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fafdc5; WORD $0x20b9             // vpsubd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfafdc5; WORD $0x40b9             // vpsubd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x64fafdc5; WORD $0x60b9             // vpsubd    ymm4, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080b98cfafdc5; BYTE $0x00       // vpsubd    ymm1, ymm0, yword [rcx + 4*rdi + 128]
	QUAD $0x0000a0b994fafdc5; BYTE $0x00       // vpsubd    ymm2, ymm0, yword [rcx + 4*rdi + 160]
	QUAD $0x0000c0b99cfafdc5; BYTE $0x00       // vpsubd    ymm3, ymm0, yword [rcx + 4*rdi + 192]
	QUAD $0x0000e0b9a4fafdc5; BYTE $0x00       // vpsubd    ymm4, ymm0, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_462
	JMP  LBB2_702

LBB2_463:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xf0     // and    edx, -16
	LONG $0x197de2c4; BYTE $0xc8 // vbroadcastsd    ymm1, xmm0
	LONG $0xf0728d48             // lea    rsi, [rdx - 16]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB2_709
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB2_465:
	LONG $0x1459f5c5; BYTE $0xf9               // vmulpd    ymm2, ymm1, yword [rcx + 8*rdi]
	LONG $0x5c59f5c5; WORD $0x20f9             // vmulpd    ymm3, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x6459f5c5; WORD $0x40f9             // vmulpd    ymm4, ymm1, yword [rcx + 8*rdi + 64]
	LONG $0x6c59f5c5; WORD $0x60f9             // vmulpd    ymm5, ymm1, yword [rcx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf86c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080f99459f5c5; BYTE $0x00       // vmulpd    ymm2, ymm1, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f99c59f5c5; BYTE $0x00       // vmulpd    ymm3, ymm1, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f9a459f5c5; BYTE $0x00       // vmulpd    ymm4, ymm1, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9ac59f5c5; BYTE $0x00       // vmulpd    ymm5, ymm1, yword [rcx + 8*rdi + 224]
	QUAD $0x0080f894117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a4117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB2_465
	JMP  LBB2_710

LBB2_466:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xf0     // and    edx, -16
	LONG $0x197de2c4; BYTE $0xc8 // vbroadcastsd    ymm1, xmm0
	LONG $0xf0728d48             // lea    rsi, [rdx - 16]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB2_717
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB2_468:
	LONG $0x1459f5c5; BYTE $0xf9               // vmulpd    ymm2, ymm1, yword [rcx + 8*rdi]
	LONG $0x5c59f5c5; WORD $0x20f9             // vmulpd    ymm3, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x6459f5c5; WORD $0x40f9             // vmulpd    ymm4, ymm1, yword [rcx + 8*rdi + 64]
	LONG $0x6c59f5c5; WORD $0x60f9             // vmulpd    ymm5, ymm1, yword [rcx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf86c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080f99459f5c5; BYTE $0x00       // vmulpd    ymm2, ymm1, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f99c59f5c5; BYTE $0x00       // vmulpd    ymm3, ymm1, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f9a459f5c5; BYTE $0x00       // vmulpd    ymm4, ymm1, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9ac59f5c5; BYTE $0x00       // vmulpd    ymm5, ymm1, yword [rcx + 8*rdi + 224]
	QUAD $0x0080f894117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a4117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB2_468
	JMP  LBB2_718

LBB2_469:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xf0     // and    edx, -16
	LONG $0x197de2c4; BYTE $0xc8 // vbroadcastsd    ymm1, xmm0
	LONG $0xf0728d48             // lea    rsi, [rdx - 16]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB2_725
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB2_471:
	LONG $0x1458f5c5; BYTE $0xf9               // vaddpd    ymm2, ymm1, yword [rcx + 8*rdi]
	LONG $0x5c58f5c5; WORD $0x20f9             // vaddpd    ymm3, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x6458f5c5; WORD $0x40f9             // vaddpd    ymm4, ymm1, yword [rcx + 8*rdi + 64]
	LONG $0x6c58f5c5; WORD $0x60f9             // vaddpd    ymm5, ymm1, yword [rcx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf86c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080f99458f5c5; BYTE $0x00       // vaddpd    ymm2, ymm1, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f99c58f5c5; BYTE $0x00       // vaddpd    ymm3, ymm1, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f9a458f5c5; BYTE $0x00       // vaddpd    ymm4, ymm1, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9ac58f5c5; BYTE $0x00       // vaddpd    ymm5, ymm1, yword [rcx + 8*rdi + 224]
	QUAD $0x0080f894117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a4117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB2_471
	JMP  LBB2_726

LBB2_472:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xf0     // and    edx, -16
	LONG $0x197de2c4; BYTE $0xc8 // vbroadcastsd    ymm1, xmm0
	LONG $0xf0728d48             // lea    rsi, [rdx - 16]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB2_733
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB2_474:
	LONG $0x145cf5c5; BYTE $0xf9               // vsubpd    ymm2, ymm1, yword [rcx + 8*rdi]
	LONG $0x5c5cf5c5; WORD $0x20f9             // vsubpd    ymm3, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x645cf5c5; WORD $0x40f9             // vsubpd    ymm4, ymm1, yword [rcx + 8*rdi + 64]
	LONG $0x6c5cf5c5; WORD $0x60f9             // vsubpd    ymm5, ymm1, yword [rcx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf86c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080f9945cf5c5; BYTE $0x00       // vsubpd    ymm2, ymm1, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f99c5cf5c5; BYTE $0x00       // vsubpd    ymm3, ymm1, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f9a45cf5c5; BYTE $0x00       // vsubpd    ymm4, ymm1, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9ac5cf5c5; BYTE $0x00       // vsubpd    ymm5, ymm1, yword [rcx + 8*rdi + 224]
	QUAD $0x0080f894117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a4117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB2_474
	JMP  LBB2_734

LBB2_475:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xf0     // and    edx, -16
	LONG $0x197de2c4; BYTE $0xc8 // vbroadcastsd    ymm1, xmm0
	LONG $0xf0728d48             // lea    rsi, [rdx - 16]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB2_741
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB2_477:
	LONG $0x1458f5c5; BYTE $0xf9               // vaddpd    ymm2, ymm1, yword [rcx + 8*rdi]
	LONG $0x5c58f5c5; WORD $0x20f9             // vaddpd    ymm3, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x6458f5c5; WORD $0x40f9             // vaddpd    ymm4, ymm1, yword [rcx + 8*rdi + 64]
	LONG $0x6c58f5c5; WORD $0x60f9             // vaddpd    ymm5, ymm1, yword [rcx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf86c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080f99458f5c5; BYTE $0x00       // vaddpd    ymm2, ymm1, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f99c58f5c5; BYTE $0x00       // vaddpd    ymm3, ymm1, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f9a458f5c5; BYTE $0x00       // vaddpd    ymm4, ymm1, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9ac58f5c5; BYTE $0x00       // vaddpd    ymm5, ymm1, yword [rcx + 8*rdi + 224]
	QUAD $0x0080f894117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a4117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB2_477
	JMP  LBB2_742

LBB2_478:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xf0     // and    edx, -16
	LONG $0x197de2c4; BYTE $0xc8 // vbroadcastsd    ymm1, xmm0
	LONG $0xf0728d48             // lea    rsi, [rdx - 16]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB2_749
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB2_480:
	LONG $0x145cf5c5; BYTE $0xf9               // vsubpd    ymm2, ymm1, yword [rcx + 8*rdi]
	LONG $0x5c5cf5c5; WORD $0x20f9             // vsubpd    ymm3, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x645cf5c5; WORD $0x40f9             // vsubpd    ymm4, ymm1, yword [rcx + 8*rdi + 64]
	LONG $0x6c5cf5c5; WORD $0x60f9             // vsubpd    ymm5, ymm1, yword [rcx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf86c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080f9945cf5c5; BYTE $0x00       // vsubpd    ymm2, ymm1, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f99c5cf5c5; BYTE $0x00       // vsubpd    ymm3, ymm1, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f9a45cf5c5; BYTE $0x00       // vsubpd    ymm4, ymm1, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9ac5cf5c5; BYTE $0x00       // vsubpd    ymm5, ymm1, yword [rcx + 8*rdi + 224]
	QUAD $0x0080f894117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a4117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac117dc1c4; WORD $0x0000     // vmovupd    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB2_480
	JMP  LBB2_750

LBB2_481:
	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
	WORD $0xe783; BYTE $0xe0     // and    edi, -32
	LONG $0xc26ef9c5             // vmovd    xmm0, edx
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0xe0778d48             // lea    rsi, [rdi - 32]
	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
	LONG $0x05e8c148             // shr    rax, 5
	LONG $0x01c08348             // add    rax, 1
	WORD $0x8941; BYTE $0xc1     // mov    r9d, eax
	LONG $0x03e18341             // and    r9d, 3
	LONG $0x60fe8348             // cmp    rsi, 96
	JAE  LBB2_621
	WORD $0xf631                 // xor    esi, esi
	JMP  LBB2_623

LBB2_483:
	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
	WORD $0xe783; BYTE $0xe0     // and    edi, -32
	LONG $0xc26ef9c5             // vmovd    xmm0, edx
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0xe0778d48             // lea    rsi, [rdi - 32]
	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
	LONG $0x05e8c148             // shr    rax, 5
	LONG $0x01c08348             // add    rax, 1
	WORD $0x8941; BYTE $0xc1     // mov    r9d, eax
	LONG $0x03e18341             // and    r9d, 3
	LONG $0x60fe8348             // cmp    rsi, 96
	JAE  LBB2_631
	WORD $0xf631                 // xor    esi, esi
	JMP  LBB2_633

LBB2_485:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x80568d48             // lea    rdx, [rsi - 128]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_757
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_487:
	LONG $0x0cfcfdc5; BYTE $0x39               // vpaddb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54fcfdc5; WORD $0x2039             // vpaddb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x4039             // vpaddb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x64fcfdc5; WORD $0x6039             // vpaddb    ymm4, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x000080398cfcfdc5; BYTE $0x00       // vpaddb    ymm1, ymm0, yword [rcx + rdi + 128]
	QUAD $0x0000a03994fcfdc5; BYTE $0x00       // vpaddb    ymm2, ymm0, yword [rcx + rdi + 160]
	QUAD $0x0000c0399cfcfdc5; BYTE $0x00       // vpaddb    ymm3, ymm0, yword [rcx + rdi + 192]
	QUAD $0x0000e039a4fcfdc5; BYTE $0x00       // vpaddb    ymm4, ymm0, yword [rcx + rdi + 224]
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_487
	JMP  LBB2_758

LBB2_488:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x80568d48             // lea    rdx, [rsi - 128]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_765
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_490:
	LONG $0x0cf8fdc5; BYTE $0x39               // vpsubb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54f8fdc5; WORD $0x2039             // vpsubb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cf8fdc5; WORD $0x4039             // vpsubb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x64f8fdc5; WORD $0x6039             // vpsubb    ymm4, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x000080398cf8fdc5; BYTE $0x00       // vpsubb    ymm1, ymm0, yword [rcx + rdi + 128]
	QUAD $0x0000a03994f8fdc5; BYTE $0x00       // vpsubb    ymm2, ymm0, yword [rcx + rdi + 160]
	QUAD $0x0000c0399cf8fdc5; BYTE $0x00       // vpsubb    ymm3, ymm0, yword [rcx + rdi + 192]
	QUAD $0x0000e039a4f8fdc5; BYTE $0x00       // vpsubb    ymm4, ymm0, yword [rcx + rdi + 224]
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_490
	JMP  LBB2_766

LBB2_491:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x80568d48             // lea    rdx, [rsi - 128]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_773
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_493:
	LONG $0x0cfcfdc5; BYTE $0x39               // vpaddb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54fcfdc5; WORD $0x2039             // vpaddb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x4039             // vpaddb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x64fcfdc5; WORD $0x6039             // vpaddb    ymm4, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x000080398cfcfdc5; BYTE $0x00       // vpaddb    ymm1, ymm0, yword [rcx + rdi + 128]
	QUAD $0x0000a03994fcfdc5; BYTE $0x00       // vpaddb    ymm2, ymm0, yword [rcx + rdi + 160]
	QUAD $0x0000c0399cfcfdc5; BYTE $0x00       // vpaddb    ymm3, ymm0, yword [rcx + rdi + 192]
	QUAD $0x0000e039a4fcfdc5; BYTE $0x00       // vpaddb    ymm4, ymm0, yword [rcx + rdi + 224]
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_493
	JMP  LBB2_774

LBB2_494:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x80568d48             // lea    rdx, [rsi - 128]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_781
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_496:
	LONG $0x0cf8fdc5; BYTE $0x39               // vpsubb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54f8fdc5; WORD $0x2039             // vpsubb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cf8fdc5; WORD $0x4039             // vpsubb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x64f8fdc5; WORD $0x6039             // vpsubb    ymm4, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x000080398cf8fdc5; BYTE $0x00       // vpsubb    ymm1, ymm0, yword [rcx + rdi + 128]
	QUAD $0x0000a03994f8fdc5; BYTE $0x00       // vpsubb    ymm2, ymm0, yword [rcx + rdi + 160]
	QUAD $0x0000c0399cf8fdc5; BYTE $0x00       // vpsubb    ymm3, ymm0, yword [rcx + rdi + 192]
	QUAD $0x0000e039a4f8fdc5; BYTE $0x00       // vpsubb    ymm4, ymm0, yword [rcx + rdi + 224]
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_496
	JMP  LBB2_782

LBB2_497:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	LONG $0xd073f5c5; BYTE $0x20 // vpsrlq    ymm1, ymm0, 32
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_789
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_499:
	LONG $0x146ffec5; BYTE $0xf9               // vmovdqu    ymm2, yword [rcx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20f9             // vmovdqu    ymm3, yword [rcx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40f9             // vmovdqu    ymm4, yword [rcx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60f9             // vmovdqu    ymm5, yword [rcx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf86c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080f9946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f99c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f9a46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9ac6ffec5; BYTE $0x00       // vmovdqu    ymm5, yword [rcx + 8*rdi + 224]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	QUAD $0x0080f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_499
	JMP  LBB2_790

LBB2_500:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	LONG $0xd073f5c5; BYTE $0x20 // vpsrlq    ymm1, ymm0, 32
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_797
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_502:
	LONG $0x146ffec5; BYTE $0xf9               // vmovdqu    ymm2, yword [rcx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20f9             // vmovdqu    ymm3, yword [rcx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40f9             // vmovdqu    ymm4, yword [rcx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60f9             // vmovdqu    ymm5, yword [rcx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf86c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080f9946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f99c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f9a46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9ac6ffec5; BYTE $0x00       // vmovdqu    ymm5, yword [rcx + 8*rdi + 224]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	QUAD $0x0080f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_502
	JMP  LBB2_798

LBB2_503:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_805
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_505:
	LONG $0x0cd4fdc5; BYTE $0xf9               // vpaddq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20f9             // vpaddq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40f9             // vpaddq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x64d4fdc5; WORD $0x60f9             // vpaddq    ymm4, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080f98cd4fdc5; BYTE $0x00       // vpaddq    ymm1, ymm0, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f994d4fdc5; BYTE $0x00       // vpaddq    ymm2, ymm0, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f99cd4fdc5; BYTE $0x00       // vpaddq    ymm3, ymm0, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9a4d4fdc5; BYTE $0x00       // vpaddq    ymm4, ymm0, yword [rcx + 8*rdi + 224]
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_505
	JMP  LBB2_806

LBB2_506:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9c1c4; BYTE $0xc3 // vmovq    xmm0, r11
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_813
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_508:
	LONG $0x0cfbfdc5; BYTE $0xf9               // vpsubq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54fbfdc5; WORD $0x20f9             // vpsubq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cfbfdc5; WORD $0x40f9             // vpsubq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x64fbfdc5; WORD $0x60f9             // vpsubq    ymm4, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080f98cfbfdc5; BYTE $0x00       // vpsubq    ymm1, ymm0, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f994fbfdc5; BYTE $0x00       // vpsubq    ymm2, ymm0, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f99cfbfdc5; BYTE $0x00       // vpsubq    ymm3, ymm0, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9a4fbfdc5; BYTE $0x00       // vpsubq    ymm4, ymm0, yword [rcx + 8*rdi + 224]
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_508
	JMP  LBB2_814

LBB2_509:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_821
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_511:
	LONG $0x0cd4fdc5; BYTE $0xf9               // vpaddq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20f9             // vpaddq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40f9             // vpaddq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x64d4fdc5; WORD $0x60f9             // vpaddq    ymm4, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080f98cd4fdc5; BYTE $0x00       // vpaddq    ymm1, ymm0, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f994d4fdc5; BYTE $0x00       // vpaddq    ymm2, ymm0, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f99cd4fdc5; BYTE $0x00       // vpaddq    ymm3, ymm0, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9a4d4fdc5; BYTE $0x00       // vpaddq    ymm4, ymm0, yword [rcx + 8*rdi + 224]
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_511
	JMP  LBB2_822

LBB2_512:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9c1c4; BYTE $0xc3 // vmovq    xmm0, r11
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_829
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_514:
	LONG $0x0cfbfdc5; BYTE $0xf9               // vpsubq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54fbfdc5; WORD $0x20f9             // vpsubq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cfbfdc5; WORD $0x40f9             // vpsubq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x64fbfdc5; WORD $0x60f9             // vpsubq    ymm4, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080f98cfbfdc5; BYTE $0x00       // vpsubq    ymm1, ymm0, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f994fbfdc5; BYTE $0x00       // vpsubq    ymm2, ymm0, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f99cfbfdc5; BYTE $0x00       // vpsubq    ymm3, ymm0, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9a4fbfdc5; BYTE $0x00       // vpsubq    ymm4, ymm0, yword [rcx + 8*rdi + 224]
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_514
	JMP  LBB2_830

LBB2_515:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_837
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_517:
	LONG $0x0cd5fdc5; BYTE $0x79               // vpmullw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x54d5fdc5; WORD $0x2079             // vpmullw    ymm2, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cd5fdc5; WORD $0x4079             // vpmullw    ymm1, ymm0, yword [rcx + 2*rdi + 64]
	LONG $0x54d5fdc5; WORD $0x6079             // vpmullw    ymm2, ymm0, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_517
	JMP  LBB2_838

LBB2_518:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_845
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_520:
	LONG $0x0cd5fdc5; BYTE $0x79               // vpmullw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x54d5fdc5; WORD $0x2079             // vpmullw    ymm2, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cd5fdc5; WORD $0x4079             // vpmullw    ymm1, ymm0, yword [rcx + 2*rdi + 64]
	LONG $0x54d5fdc5; WORD $0x6079             // vpmullw    ymm2, ymm0, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_520
	JMP  LBB2_846

LBB2_521:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_853
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_523:
	LONG $0x0cd5fdc5; BYTE $0x79               // vpmullw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x54d5fdc5; WORD $0x2079             // vpmullw    ymm2, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cd5fdc5; WORD $0x4079             // vpmullw    ymm1, ymm0, yword [rcx + 2*rdi + 64]
	LONG $0x54d5fdc5; WORD $0x6079             // vpmullw    ymm2, ymm0, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_523
	JMP  LBB2_854

LBB2_524:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_861
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_526:
	LONG $0x0cd5fdc5; BYTE $0x79               // vpmullw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x54d5fdc5; WORD $0x2079             // vpmullw    ymm2, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cd5fdc5; WORD $0x4079             // vpmullw    ymm1, ymm0, yword [rcx + 2*rdi + 64]
	LONG $0x54d5fdc5; WORD $0x6079             // vpmullw    ymm2, ymm0, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_526
	JMP  LBB2_862

LBB2_527:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_869
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_529:
	LONG $0x0cfdfdc5; BYTE $0x79               // vpaddw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x54fdfdc5; WORD $0x2079             // vpaddw    ymm2, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cfdfdc5; WORD $0x4079             // vpaddw    ymm1, ymm0, yword [rcx + 2*rdi + 64]
	LONG $0x54fdfdc5; WORD $0x6079             // vpaddw    ymm2, ymm0, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_529
	JMP  LBB2_870

LBB2_530:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_877
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_532:
	LONG $0x0cfdfdc5; BYTE $0x79               // vpaddw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x54fdfdc5; WORD $0x2079             // vpaddw    ymm2, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cfdfdc5; WORD $0x4079             // vpaddw    ymm1, ymm0, yword [rcx + 2*rdi + 64]
	LONG $0x54fdfdc5; WORD $0x6079             // vpaddw    ymm2, ymm0, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_532
	JMP  LBB2_878

LBB2_533:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_885
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_535:
	LONG $0x0cf9fdc5; BYTE $0x79               // vpsubw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x54f9fdc5; WORD $0x2079             // vpsubw    ymm2, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cf9fdc5; WORD $0x4079             // vpsubw    ymm1, ymm0, yword [rcx + 2*rdi + 64]
	LONG $0x54f9fdc5; WORD $0x6079             // vpsubw    ymm2, ymm0, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_535
	JMP  LBB2_886

LBB2_536:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_893
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_538:
	LONG $0x0cf9fdc5; BYTE $0x79               // vpsubw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x54f9fdc5; WORD $0x2079             // vpsubw    ymm2, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cf9fdc5; WORD $0x4079             // vpsubw    ymm1, ymm0, yword [rcx + 2*rdi + 64]
	LONG $0x54f9fdc5; WORD $0x6079             // vpsubw    ymm2, ymm0, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_538
	JMP  LBB2_894

LBB2_539:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_901
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_541:
	LONG $0x0cfdfdc5; BYTE $0x79               // vpaddw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x54fdfdc5; WORD $0x2079             // vpaddw    ymm2, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cfdfdc5; WORD $0x4079             // vpaddw    ymm1, ymm0, yword [rcx + 2*rdi + 64]
	LONG $0x54fdfdc5; WORD $0x6079             // vpaddw    ymm2, ymm0, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_541
	JMP  LBB2_902

LBB2_542:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_909
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_544:
	LONG $0x0cfdfdc5; BYTE $0x79               // vpaddw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x54fdfdc5; WORD $0x2079             // vpaddw    ymm2, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cfdfdc5; WORD $0x4079             // vpaddw    ymm1, ymm0, yword [rcx + 2*rdi + 64]
	LONG $0x54fdfdc5; WORD $0x6079             // vpaddw    ymm2, ymm0, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_544
	JMP  LBB2_910

LBB2_545:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_917
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_547:
	LONG $0x0cf9fdc5; BYTE $0x79               // vpsubw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x54f9fdc5; WORD $0x2079             // vpsubw    ymm2, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cf9fdc5; WORD $0x4079             // vpsubw    ymm1, ymm0, yword [rcx + 2*rdi + 64]
	LONG $0x54f9fdc5; WORD $0x6079             // vpsubw    ymm2, ymm0, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_547
	JMP  LBB2_918

LBB2_548:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x797de2c4; BYTE $0xc0 // vpbroadcastw    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_925
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_550:
	LONG $0x0cf9fdc5; BYTE $0x79               // vpsubw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x54f9fdc5; WORD $0x2079             // vpsubw    ymm2, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm2
	LONG $0x4cf9fdc5; WORD $0x4079             // vpsubw    ymm1, ymm0, yword [rcx + 2*rdi + 64]
	LONG $0x54f9fdc5; WORD $0x6079             // vpsubw    ymm2, ymm0, yword [rcx + 2*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x784c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm2
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_550
	JMP  LBB2_926

LBB2_551:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	LONG $0xd073f5c5; BYTE $0x20 // vpsrlq    ymm1, ymm0, 32
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_933
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_553:
	LONG $0x146ffec5; BYTE $0xf9               // vmovdqu    ymm2, yword [rcx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20f9             // vmovdqu    ymm3, yword [rcx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40f9             // vmovdqu    ymm4, yword [rcx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60f9             // vmovdqu    ymm5, yword [rcx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf86c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080f9946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f99c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f9a46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9ac6ffec5; BYTE $0x00       // vmovdqu    ymm5, yword [rcx + 8*rdi + 224]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	QUAD $0x0080f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_553
	JMP  LBB2_934

LBB2_554:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xe0     // and    edx, -32
	LONG $0x187de2c4; BYTE $0xc8 // vbroadcastss    ymm1, xmm0
	LONG $0xe0728d48             // lea    rsi, [rdx - 32]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB2_941
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB2_556:
	LONG $0x1459f4c5; BYTE $0xb9               // vmulps    ymm2, ymm1, yword [rcx + 4*rdi]
	LONG $0x5c59f4c5; WORD $0x20b9             // vmulps    ymm3, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x6459f4c5; WORD $0x40b9             // vmulps    ymm4, ymm1, yword [rcx + 4*rdi + 64]
	LONG $0x6c59f4c5; WORD $0x60b9             // vmulps    ymm5, ymm1, yword [rcx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb86c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm5
	QUAD $0x000080b99459f4c5; BYTE $0x00       // vmulps    ymm2, ymm1, yword [rcx + 4*rdi + 128]
	QUAD $0x0000a0b99c59f4c5; BYTE $0x00       // vmulps    ymm3, ymm1, yword [rcx + 4*rdi + 160]
	QUAD $0x0000c0b9a459f4c5; BYTE $0x00       // vmulps    ymm4, ymm1, yword [rcx + 4*rdi + 192]
	QUAD $0x0000e0b9ac59f4c5; BYTE $0x00       // vmulps    ymm5, ymm1, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b894117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 128], ymm2
	QUAD $0x00a0b89c117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 160], ymm3
	QUAD $0x00c0b8a4117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 192], ymm4
	QUAD $0x00e0b8ac117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 224], ymm5
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB2_556
	JMP  LBB2_942

LBB2_557:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	LONG $0xd073f5c5; BYTE $0x20 // vpsrlq    ymm1, ymm0, 32
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_949
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_559:
	LONG $0x146ffec5; BYTE $0xf9               // vmovdqu    ymm2, yword [rcx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20f9             // vmovdqu    ymm3, yword [rcx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40f9             // vmovdqu    ymm4, yword [rcx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60f9             // vmovdqu    ymm5, yword [rcx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf86c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm5
	QUAD $0x000080f9946ffec5; BYTE $0x00       // vmovdqu    ymm2, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f99c6ffec5; BYTE $0x00       // vmovdqu    ymm3, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f9a46ffec5; BYTE $0x00       // vmovdqu    ymm4, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9ac6ffec5; BYTE $0x00       // vmovdqu    ymm5, yword [rcx + 8*rdi + 224]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                           // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                           // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	QUAD $0x0080f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm2
	QUAD $0x00a0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm3
	QUAD $0x00c0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm4
	QUAD $0x00e0f8ac7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm5
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_559
	JMP  LBB2_950

LBB2_560:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xe0     // and    edx, -32
	LONG $0x187de2c4; BYTE $0xc8 // vbroadcastss    ymm1, xmm0
	LONG $0xe0728d48             // lea    rsi, [rdx - 32]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB2_957
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB2_562:
	LONG $0x1459f4c5; BYTE $0xb9               // vmulps    ymm2, ymm1, yword [rcx + 4*rdi]
	LONG $0x5c59f4c5; WORD $0x20b9             // vmulps    ymm3, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x6459f4c5; WORD $0x40b9             // vmulps    ymm4, ymm1, yword [rcx + 4*rdi + 64]
	LONG $0x6c59f4c5; WORD $0x60b9             // vmulps    ymm5, ymm1, yword [rcx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb86c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm5
	QUAD $0x000080b99459f4c5; BYTE $0x00       // vmulps    ymm2, ymm1, yword [rcx + 4*rdi + 128]
	QUAD $0x0000a0b99c59f4c5; BYTE $0x00       // vmulps    ymm3, ymm1, yword [rcx + 4*rdi + 160]
	QUAD $0x0000c0b9a459f4c5; BYTE $0x00       // vmulps    ymm4, ymm1, yword [rcx + 4*rdi + 192]
	QUAD $0x0000e0b9ac59f4c5; BYTE $0x00       // vmulps    ymm5, ymm1, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b894117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 128], ymm2
	QUAD $0x00a0b89c117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 160], ymm3
	QUAD $0x00c0b8a4117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 192], ymm4
	QUAD $0x00e0b8ac117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 224], ymm5
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB2_562
	JMP  LBB2_958

LBB2_563:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_965
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_565:
	LONG $0x0cd4fdc5; BYTE $0xf9               // vpaddq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20f9             // vpaddq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40f9             // vpaddq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x64d4fdc5; WORD $0x60f9             // vpaddq    ymm4, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080f98cd4fdc5; BYTE $0x00       // vpaddq    ymm1, ymm0, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f994d4fdc5; BYTE $0x00       // vpaddq    ymm2, ymm0, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f99cd4fdc5; BYTE $0x00       // vpaddq    ymm3, ymm0, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9a4d4fdc5; BYTE $0x00       // vpaddq    ymm4, ymm0, yword [rcx + 8*rdi + 224]
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_565
	JMP  LBB2_966

LBB2_566:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xe0     // and    edx, -32
	LONG $0x187de2c4; BYTE $0xc8 // vbroadcastss    ymm1, xmm0
	LONG $0xe0728d48             // lea    rsi, [rdx - 32]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB2_973
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB2_568:
	LONG $0x1458f4c5; BYTE $0xb9               // vaddps    ymm2, ymm1, yword [rcx + 4*rdi]
	LONG $0x5c58f4c5; WORD $0x20b9             // vaddps    ymm3, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x6458f4c5; WORD $0x40b9             // vaddps    ymm4, ymm1, yword [rcx + 4*rdi + 64]
	LONG $0x6c58f4c5; WORD $0x60b9             // vaddps    ymm5, ymm1, yword [rcx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb86c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm5
	QUAD $0x000080b99458f4c5; BYTE $0x00       // vaddps    ymm2, ymm1, yword [rcx + 4*rdi + 128]
	QUAD $0x0000a0b99c58f4c5; BYTE $0x00       // vaddps    ymm3, ymm1, yword [rcx + 4*rdi + 160]
	QUAD $0x0000c0b9a458f4c5; BYTE $0x00       // vaddps    ymm4, ymm1, yword [rcx + 4*rdi + 192]
	QUAD $0x0000e0b9ac58f4c5; BYTE $0x00       // vaddps    ymm5, ymm1, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b894117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 128], ymm2
	QUAD $0x00a0b89c117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 160], ymm3
	QUAD $0x00c0b8a4117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 192], ymm4
	QUAD $0x00e0b8ac117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 224], ymm5
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB2_568
	JMP  LBB2_974

LBB2_569:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9c1c4; BYTE $0xc3 // vmovq    xmm0, r11
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_981
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_571:
	LONG $0x0cfbfdc5; BYTE $0xf9               // vpsubq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54fbfdc5; WORD $0x20f9             // vpsubq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cfbfdc5; WORD $0x40f9             // vpsubq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x64fbfdc5; WORD $0x60f9             // vpsubq    ymm4, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080f98cfbfdc5; BYTE $0x00       // vpsubq    ymm1, ymm0, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f994fbfdc5; BYTE $0x00       // vpsubq    ymm2, ymm0, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f99cfbfdc5; BYTE $0x00       // vpsubq    ymm3, ymm0, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9a4fbfdc5; BYTE $0x00       // vpsubq    ymm4, ymm0, yword [rcx + 8*rdi + 224]
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_571
	JMP  LBB2_982

LBB2_572:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xe0     // and    edx, -32
	LONG $0x187de2c4; BYTE $0xc8 // vbroadcastss    ymm1, xmm0
	LONG $0xe0728d48             // lea    rsi, [rdx - 32]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB2_989
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB2_574:
	LONG $0x145cf4c5; BYTE $0xb9               // vsubps    ymm2, ymm1, yword [rcx + 4*rdi]
	LONG $0x5c5cf4c5; WORD $0x20b9             // vsubps    ymm3, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x645cf4c5; WORD $0x40b9             // vsubps    ymm4, ymm1, yword [rcx + 4*rdi + 64]
	LONG $0x6c5cf4c5; WORD $0x60b9             // vsubps    ymm5, ymm1, yword [rcx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb86c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm5
	QUAD $0x000080b9945cf4c5; BYTE $0x00       // vsubps    ymm2, ymm1, yword [rcx + 4*rdi + 128]
	QUAD $0x0000a0b99c5cf4c5; BYTE $0x00       // vsubps    ymm3, ymm1, yword [rcx + 4*rdi + 160]
	QUAD $0x0000c0b9a45cf4c5; BYTE $0x00       // vsubps    ymm4, ymm1, yword [rcx + 4*rdi + 192]
	QUAD $0x0000e0b9ac5cf4c5; BYTE $0x00       // vsubps    ymm5, ymm1, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b894117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 128], ymm2
	QUAD $0x00a0b89c117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 160], ymm3
	QUAD $0x00c0b8a4117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 192], ymm4
	QUAD $0x00e0b8ac117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 224], ymm5
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB2_574
	JMP  LBB2_990

LBB2_575:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9e1c4; BYTE $0xc0 // vmovq    xmm0, rax
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_997
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_577:
	LONG $0x0cd4fdc5; BYTE $0xf9               // vpaddq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20f9             // vpaddq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40f9             // vpaddq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x64d4fdc5; WORD $0x60f9             // vpaddq    ymm4, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080f98cd4fdc5; BYTE $0x00       // vpaddq    ymm1, ymm0, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f994d4fdc5; BYTE $0x00       // vpaddq    ymm2, ymm0, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f99cd4fdc5; BYTE $0x00       // vpaddq    ymm3, ymm0, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9a4d4fdc5; BYTE $0x00       // vpaddq    ymm4, ymm0, yword [rcx + 8*rdi + 224]
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_577
	JMP  LBB2_998

LBB2_578:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xe0     // and    edx, -32
	LONG $0x187de2c4; BYTE $0xc8 // vbroadcastss    ymm1, xmm0
	LONG $0xe0728d48             // lea    rsi, [rdx - 32]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB2_1005
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB2_580:
	LONG $0x1458f4c5; BYTE $0xb9               // vaddps    ymm2, ymm1, yword [rcx + 4*rdi]
	LONG $0x5c58f4c5; WORD $0x20b9             // vaddps    ymm3, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x6458f4c5; WORD $0x40b9             // vaddps    ymm4, ymm1, yword [rcx + 4*rdi + 64]
	LONG $0x6c58f4c5; WORD $0x60b9             // vaddps    ymm5, ymm1, yword [rcx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb86c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm5
	QUAD $0x000080b99458f4c5; BYTE $0x00       // vaddps    ymm2, ymm1, yword [rcx + 4*rdi + 128]
	QUAD $0x0000a0b99c58f4c5; BYTE $0x00       // vaddps    ymm3, ymm1, yword [rcx + 4*rdi + 160]
	QUAD $0x0000c0b9a458f4c5; BYTE $0x00       // vaddps    ymm4, ymm1, yword [rcx + 4*rdi + 192]
	QUAD $0x0000e0b9ac58f4c5; BYTE $0x00       // vaddps    ymm5, ymm1, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b894117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 128], ymm2
	QUAD $0x00a0b89c117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 160], ymm3
	QUAD $0x00c0b8a4117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 192], ymm4
	QUAD $0x00e0b8ac117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 224], ymm5
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB2_580
	JMP  LBB2_1006

LBB2_581:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0     // and    esi, -16
	LONG $0x6ef9c1c4; BYTE $0xc3 // vmovq    xmm0, r11
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_1013
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_583:
	LONG $0x0cfbfdc5; BYTE $0xf9               // vpsubq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54fbfdc5; WORD $0x20f9             // vpsubq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cfbfdc5; WORD $0x40f9             // vpsubq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x64fbfdc5; WORD $0x60f9             // vpsubq    ymm4, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm4
	QUAD $0x000080f98cfbfdc5; BYTE $0x00       // vpsubq    ymm1, ymm0, yword [rcx + 8*rdi + 128]
	QUAD $0x0000a0f994fbfdc5; BYTE $0x00       // vpsubq    ymm2, ymm0, yword [rcx + 8*rdi + 160]
	QUAD $0x0000c0f99cfbfdc5; BYTE $0x00       // vpsubq    ymm3, ymm0, yword [rcx + 8*rdi + 192]
	QUAD $0x0000e0f9a4fbfdc5; BYTE $0x00       // vpsubq    ymm4, ymm0, yword [rcx + 8*rdi + 224]
	QUAD $0x0080f88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 128], ymm1
	QUAD $0x00a0f8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 160], ymm2
	QUAD $0x00c0f89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 192], ymm3
	QUAD $0x00e0f8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 8*rdi + 224], ymm4
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_583
	JMP  LBB2_1014

LBB2_584:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xe0     // and    edx, -32
	LONG $0x187de2c4; BYTE $0xc8 // vbroadcastss    ymm1, xmm0
	LONG $0xe0728d48             // lea    rsi, [rdx - 32]
	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
	JE   LBB2_1021
	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
	LONG $0xfee68348             // and    rsi, -2
	WORD $0xf748; BYTE $0xde     // neg    rsi
	WORD $0xff31                 // xor    edi, edi

LBB2_586:
	LONG $0x145cf4c5; BYTE $0xb9               // vsubps    ymm2, ymm1, yword [rcx + 4*rdi]
	LONG $0x5c5cf4c5; WORD $0x20b9             // vsubps    ymm3, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x645cf4c5; WORD $0x40b9             // vsubps    ymm4, ymm1, yword [rcx + 4*rdi + 64]
	LONG $0x6c5cf4c5; WORD $0x60b9             // vsubps    ymm5, ymm1, yword [rcx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb86c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm5
	QUAD $0x000080b9945cf4c5; BYTE $0x00       // vsubps    ymm2, ymm1, yword [rcx + 4*rdi + 128]
	QUAD $0x0000a0b99c5cf4c5; BYTE $0x00       // vsubps    ymm3, ymm1, yword [rcx + 4*rdi + 160]
	QUAD $0x0000c0b9a45cf4c5; BYTE $0x00       // vsubps    ymm4, ymm1, yword [rcx + 4*rdi + 192]
	QUAD $0x0000e0b9ac5cf4c5; BYTE $0x00       // vsubps    ymm5, ymm1, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b894117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 128], ymm2
	QUAD $0x00a0b89c117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 160], ymm3
	QUAD $0x00c0b8a4117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 192], ymm4
	QUAD $0x00e0b8ac117cc1c4; WORD $0x0000     // vmovups    yword [r8 + 4*rdi + 224], ymm5
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB2_586
	JMP  LBB2_1022

LBB2_587:
	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
	WORD $0xe783; BYTE $0xe0     // and    edi, -32
	LONG $0xc26ef9c5             // vmovd    xmm0, edx
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0xe0778d48             // lea    rsi, [rdi - 32]
	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
	LONG $0x05e8c148             // shr    rax, 5
	LONG $0x01c08348             // add    rax, 1
	WORD $0x8941; BYTE $0xc1     // mov    r9d, eax
	LONG $0x03e18341             // and    r9d, 3
	LONG $0x60fe8348             // cmp    rsi, 96
	JAE  LBB2_641
	WORD $0xf631                 // xor    esi, esi
	JMP  LBB2_643

LBB2_589:
	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
	WORD $0xe783; BYTE $0xe0     // and    edi, -32
	LONG $0xc26ef9c5             // vmovd    xmm0, edx
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0xe0778d48             // lea    rsi, [rdi - 32]
	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
	LONG $0x05e8c148             // shr    rax, 5
	LONG $0x01c08348             // add    rax, 1
	WORD $0x8941; BYTE $0xc1     // mov    r9d, eax
	LONG $0x03e18341             // and    r9d, 3
	LONG $0x60fe8348             // cmp    rsi, 96
	JAE  LBB2_651
	WORD $0xf631                 // xor    esi, esi
	JMP  LBB2_653

LBB2_591:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x80568d48             // lea    rdx, [rsi - 128]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_1029
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_593:
	LONG $0x0cfcfdc5; BYTE $0x39               // vpaddb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54fcfdc5; WORD $0x2039             // vpaddb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x4039             // vpaddb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x64fcfdc5; WORD $0x6039             // vpaddb    ymm4, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x000080398cfcfdc5; BYTE $0x00       // vpaddb    ymm1, ymm0, yword [rcx + rdi + 128]
	QUAD $0x0000a03994fcfdc5; BYTE $0x00       // vpaddb    ymm2, ymm0, yword [rcx + rdi + 160]
	QUAD $0x0000c0399cfcfdc5; BYTE $0x00       // vpaddb    ymm3, ymm0, yword [rcx + rdi + 192]
	QUAD $0x0000e039a4fcfdc5; BYTE $0x00       // vpaddb    ymm4, ymm0, yword [rcx + rdi + 224]
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_593
	JMP  LBB2_1030

LBB2_594:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x80568d48             // lea    rdx, [rsi - 128]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_1037
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_596:
	LONG $0x0cf8fdc5; BYTE $0x39               // vpsubb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54f8fdc5; WORD $0x2039             // vpsubb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cf8fdc5; WORD $0x4039             // vpsubb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x64f8fdc5; WORD $0x6039             // vpsubb    ymm4, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x000080398cf8fdc5; BYTE $0x00       // vpsubb    ymm1, ymm0, yword [rcx + rdi + 128]
	QUAD $0x0000a03994f8fdc5; BYTE $0x00       // vpsubb    ymm2, ymm0, yword [rcx + rdi + 160]
	QUAD $0x0000c0399cf8fdc5; BYTE $0x00       // vpsubb    ymm3, ymm0, yword [rcx + rdi + 192]
	QUAD $0x0000e039a4f8fdc5; BYTE $0x00       // vpsubb    ymm4, ymm0, yword [rcx + rdi + 224]
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_596
	JMP  LBB2_1038

LBB2_597:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x80568d48             // lea    rdx, [rsi - 128]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_1045
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_599:
	LONG $0x0cfcfdc5; BYTE $0x39               // vpaddb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54fcfdc5; WORD $0x2039             // vpaddb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x4039             // vpaddb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x64fcfdc5; WORD $0x6039             // vpaddb    ymm4, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x000080398cfcfdc5; BYTE $0x00       // vpaddb    ymm1, ymm0, yword [rcx + rdi + 128]
	QUAD $0x0000a03994fcfdc5; BYTE $0x00       // vpaddb    ymm2, ymm0, yword [rcx + rdi + 160]
	QUAD $0x0000c0399cfcfdc5; BYTE $0x00       // vpaddb    ymm3, ymm0, yword [rcx + rdi + 192]
	QUAD $0x0000e039a4fcfdc5; BYTE $0x00       // vpaddb    ymm4, ymm0, yword [rcx + rdi + 224]
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_599
	JMP  LBB2_1046

LBB2_600:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0x80     // and    esi, -128
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x787de2c4; BYTE $0xc0 // vpbroadcastb    ymm0, xmm0
	LONG $0x80568d48             // lea    rdx, [rsi - 128]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x07e9c149             // shr    r9, 7
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_1053
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_602:
	LONG $0x0cf8fdc5; BYTE $0x39               // vpsubb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54f8fdc5; WORD $0x2039             // vpsubb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cf8fdc5; WORD $0x4039             // vpsubb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x64f8fdc5; WORD $0x6039             // vpsubb    ymm4, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3864; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm4
	QUAD $0x000080398cf8fdc5; BYTE $0x00       // vpsubb    ymm1, ymm0, yword [rcx + rdi + 128]
	QUAD $0x0000a03994f8fdc5; BYTE $0x00       // vpsubb    ymm2, ymm0, yword [rcx + rdi + 160]
	QUAD $0x0000c0399cf8fdc5; BYTE $0x00       // vpsubb    ymm3, ymm0, yword [rcx + rdi + 192]
	QUAD $0x0000e039a4f8fdc5; BYTE $0x00       // vpsubb    ymm4, ymm0, yword [rcx + rdi + 224]
	QUAD $0x0080388c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 128], ymm1
	QUAD $0x00a038947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 160], ymm2
	QUAD $0x00c0389c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 192], ymm3
	QUAD $0x00e038a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_602
	JMP  LBB2_1054

LBB2_603:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_1061
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_605:
	LONG $0x407de2c4; WORD $0xb90c             // vpmulld    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x407de2c4; WORD $0xb954; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xb95c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xb964; BYTE $0x60 // vpmulld    ymm4, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x0080b98c407de2c4; WORD $0x0000     // vpmulld    ymm1, ymm0, yword [rcx + 4*rdi + 128]
	QUAD $0x00a0b994407de2c4; WORD $0x0000     // vpmulld    ymm2, ymm0, yword [rcx + 4*rdi + 160]
	QUAD $0x00c0b99c407de2c4; WORD $0x0000     // vpmulld    ymm3, ymm0, yword [rcx + 4*rdi + 192]
	QUAD $0x00e0b9a4407de2c4; WORD $0x0000     // vpmulld    ymm4, ymm0, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_605
	JMP  LBB2_1062

LBB2_606:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_1069
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_608:
	LONG $0x407de2c4; WORD $0xb90c             // vpmulld    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x407de2c4; WORD $0xb954; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xb95c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xb964; BYTE $0x60 // vpmulld    ymm4, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x0080b98c407de2c4; WORD $0x0000     // vpmulld    ymm1, ymm0, yword [rcx + 4*rdi + 128]
	QUAD $0x00a0b994407de2c4; WORD $0x0000     // vpmulld    ymm2, ymm0, yword [rcx + 4*rdi + 160]
	QUAD $0x00c0b99c407de2c4; WORD $0x0000     // vpmulld    ymm3, ymm0, yword [rcx + 4*rdi + 192]
	QUAD $0x00e0b9a4407de2c4; WORD $0x0000     // vpmulld    ymm4, ymm0, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_608
	JMP  LBB2_1070

LBB2_609:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_1077
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_611:
	LONG $0x0cfefdc5; BYTE $0xb9               // vpaddd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20b9             // vpaddd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40b9             // vpaddd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x64fefdc5; WORD $0x60b9             // vpaddd    ymm4, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080b98cfefdc5; BYTE $0x00       // vpaddd    ymm1, ymm0, yword [rcx + 4*rdi + 128]
	QUAD $0x0000a0b994fefdc5; BYTE $0x00       // vpaddd    ymm2, ymm0, yword [rcx + 4*rdi + 160]
	QUAD $0x0000c0b99cfefdc5; BYTE $0x00       // vpaddd    ymm3, ymm0, yword [rcx + 4*rdi + 192]
	QUAD $0x0000e0b9a4fefdc5; BYTE $0x00       // vpaddd    ymm4, ymm0, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_611
	JMP  LBB2_1078

LBB2_612:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0x6e79c1c4; BYTE $0xc3 // vmovd    xmm0, r11d
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_1085
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_614:
	LONG $0x0cfafdc5; BYTE $0xb9               // vpsubd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fafdc5; WORD $0x20b9             // vpsubd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfafdc5; WORD $0x40b9             // vpsubd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x64fafdc5; WORD $0x60b9             // vpsubd    ymm4, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080b98cfafdc5; BYTE $0x00       // vpsubd    ymm1, ymm0, yword [rcx + 4*rdi + 128]
	QUAD $0x0000a0b994fafdc5; BYTE $0x00       // vpsubd    ymm2, ymm0, yword [rcx + 4*rdi + 160]
	QUAD $0x0000c0b99cfafdc5; BYTE $0x00       // vpsubd    ymm3, ymm0, yword [rcx + 4*rdi + 192]
	QUAD $0x0000e0b9a4fafdc5; BYTE $0x00       // vpsubd    ymm4, ymm0, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_614
	JMP  LBB2_1086

LBB2_615:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xc06ef9c5             // vmovd    xmm0, eax
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_1093
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_617:
	LONG $0x0cfefdc5; BYTE $0xb9               // vpaddd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20b9             // vpaddd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40b9             // vpaddd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x64fefdc5; WORD $0x60b9             // vpaddd    ymm4, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080b98cfefdc5; BYTE $0x00       // vpaddd    ymm1, ymm0, yword [rcx + 4*rdi + 128]
	QUAD $0x0000a0b994fefdc5; BYTE $0x00       // vpaddd    ymm2, ymm0, yword [rcx + 4*rdi + 160]
	QUAD $0x0000c0b99cfefdc5; BYTE $0x00       // vpaddd    ymm3, ymm0, yword [rcx + 4*rdi + 192]
	QUAD $0x0000e0b9a4fefdc5; BYTE $0x00       // vpaddd    ymm4, ymm0, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_617
	JMP  LBB2_1094

LBB2_618:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0x6e79c1c4; BYTE $0xc3 // vmovd    xmm0, r11d
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
	LONG $0x05e9c149             // shr    r9, 5
	LONG $0x01c18349             // add    r9, 1
	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
	JE   LBB2_1101
	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
	LONG $0xfee28348             // and    rdx, -2
	WORD $0xf748; BYTE $0xda     // neg    rdx
	WORD $0xff31                 // xor    edi, edi

LBB2_620:
	LONG $0x0cfafdc5; BYTE $0xb9               // vpsubd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fafdc5; WORD $0x20b9             // vpsubd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfafdc5; WORD $0x40b9             // vpsubd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x64fafdc5; WORD $0x60b9             // vpsubd    ymm4, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb864; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm4
	QUAD $0x000080b98cfafdc5; BYTE $0x00       // vpsubd    ymm1, ymm0, yword [rcx + 4*rdi + 128]
	QUAD $0x0000a0b994fafdc5; BYTE $0x00       // vpsubd    ymm2, ymm0, yword [rcx + 4*rdi + 160]
	QUAD $0x0000c0b99cfafdc5; BYTE $0x00       // vpsubd    ymm3, ymm0, yword [rcx + 4*rdi + 192]
	QUAD $0x0000e0b9a4fafdc5; BYTE $0x00       // vpsubd    ymm4, ymm0, yword [rcx + 4*rdi + 224]
	QUAD $0x0080b88c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 128], ymm1
	QUAD $0x00a0b8947f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 160], ymm2
	QUAD $0x00c0b89c7f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 192], ymm3
	QUAD $0x00e0b8a47f7ec1c4; WORD $0x0000     // vmovdqu    yword [r8 + 4*rdi + 224], ymm4
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB2_620
	JMP  LBB2_1102

LBB2_621:
	LONG $0xfce08348             // and    rax, -4
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI2_0] */
	LONG $0xd860fdc5             // vpunpcklbw    ymm3, ymm0, ymm0

LBB2_622:
	LONG $0x246ffec5; BYTE $0x31               // vmovdqu    ymm4, yword [rcx + rsi]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3024             // vmovdqu    yword [r8 + rsi], ymm4
	LONG $0x646ffec5; WORD $0x2031             // vmovdqu    ymm4, yword [rcx + rsi + 32]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm4
	LONG $0x646ffec5; WORD $0x4031             // vmovdqu    ymm4, yword [rcx + rsi + 64]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm4
	LONG $0x646ffec5; WORD $0x6031             // vmovdqu    ymm4, yword [rcx + rsi + 96]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm4
	LONG $0x80ee8348                           // sub    rsi, -128
	LONG $0x04c08348                           // add    rax, 4
	JNE  LBB2_622

LBB2_623:
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	JE   LBB2_626
	WORD $0xf749; BYTE $0xd9     // neg    r9
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI2_0] */
	LONG $0xc060fdc5             // vpunpcklbw    ymm0, ymm0, ymm0

LBB2_625:
	LONG $0x1c6ffec5; BYTE $0x31   // vmovdqu    ymm3, yword [rcx + rsi]
	LONG $0xe368e5c5               // vpunpckhbw    ymm4, ymm3, ymm3
	LONG $0xe1d5ddc5               // vpmullw    ymm4, ymm4, ymm1
	LONG $0xe2dbddc5               // vpand    ymm4, ymm4, ymm2
	LONG $0xdb60e5c5               // vpunpcklbw    ymm3, ymm3, ymm3
	LONG $0xd8d5e5c5               // vpmullw    ymm3, ymm3, ymm0
	LONG $0xdadbe5c5               // vpand    ymm3, ymm3, ymm2
	LONG $0xdc67e5c5               // vpackuswb    ymm3, ymm3, ymm4
	LONG $0x7f7ec1c4; WORD $0x301c // vmovdqu    yword [r8 + rsi], ymm3
	LONG $0x20c68348               // add    rsi, 32
	WORD $0xff49; BYTE $0xc1       // inc    r9
	JNE  LBB2_625

LBB2_626:
	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
	JE   LBB2_1109
	JMP  LBB2_627

LBB2_631:
	LONG $0xfce08348             // and    rax, -4
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI2_0] */
	LONG $0xd860fdc5             // vpunpcklbw    ymm3, ymm0, ymm0

LBB2_632:
	LONG $0x246ffec5; BYTE $0x31               // vmovdqu    ymm4, yword [rcx + rsi]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3024             // vmovdqu    yword [r8 + rsi], ymm4
	LONG $0x646ffec5; WORD $0x2031             // vmovdqu    ymm4, yword [rcx + rsi + 32]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm4
	LONG $0x646ffec5; WORD $0x4031             // vmovdqu    ymm4, yword [rcx + rsi + 64]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm4
	LONG $0x646ffec5; WORD $0x6031             // vmovdqu    ymm4, yword [rcx + rsi + 96]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm4
	LONG $0x80ee8348                           // sub    rsi, -128
	LONG $0x04c08348                           // add    rax, 4
	JNE  LBB2_632

LBB2_633:
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	JE   LBB2_636
	WORD $0xf749; BYTE $0xd9     // neg    r9
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI2_0] */
	LONG $0xc060fdc5             // vpunpcklbw    ymm0, ymm0, ymm0

LBB2_635:
	LONG $0x1c6ffec5; BYTE $0x31   // vmovdqu    ymm3, yword [rcx + rsi]
	LONG $0xe368e5c5               // vpunpckhbw    ymm4, ymm3, ymm3
	LONG $0xe1d5ddc5               // vpmullw    ymm4, ymm4, ymm1
	LONG $0xe2dbddc5               // vpand    ymm4, ymm4, ymm2
	LONG $0xdb60e5c5               // vpunpcklbw    ymm3, ymm3, ymm3
	LONG $0xd8d5e5c5               // vpmullw    ymm3, ymm3, ymm0
	LONG $0xdadbe5c5               // vpand    ymm3, ymm3, ymm2
	LONG $0xdc67e5c5               // vpackuswb    ymm3, ymm3, ymm4
	LONG $0x7f7ec1c4; WORD $0x301c // vmovdqu    yword [r8 + rsi], ymm3
	LONG $0x20c68348               // add    rsi, 32
	WORD $0xff49; BYTE $0xc1       // inc    r9
	JNE  LBB2_635

LBB2_636:
	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
	JE   LBB2_1109
	JMP  LBB2_637

LBB2_641:
	LONG $0xfce08348             // and    rax, -4
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI2_0] */
	LONG $0xd860fdc5             // vpunpcklbw    ymm3, ymm0, ymm0

LBB2_642:
	LONG $0x246ffec5; BYTE $0x31               // vmovdqu    ymm4, yword [rcx + rsi]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3024             // vmovdqu    yword [r8 + rsi], ymm4
	LONG $0x646ffec5; WORD $0x2031             // vmovdqu    ymm4, yword [rcx + rsi + 32]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm4
	LONG $0x646ffec5; WORD $0x4031             // vmovdqu    ymm4, yword [rcx + rsi + 64]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm4
	LONG $0x646ffec5; WORD $0x6031             // vmovdqu    ymm4, yword [rcx + rsi + 96]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm4
	LONG $0x80ee8348                           // sub    rsi, -128
	LONG $0x04c08348                           // add    rax, 4
	JNE  LBB2_642

LBB2_643:
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	JE   LBB2_646
	WORD $0xf749; BYTE $0xd9     // neg    r9
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI2_0] */
	LONG $0xc060fdc5             // vpunpcklbw    ymm0, ymm0, ymm0

LBB2_645:
	LONG $0x1c6ffec5; BYTE $0x31   // vmovdqu    ymm3, yword [rcx + rsi]
	LONG $0xe368e5c5               // vpunpckhbw    ymm4, ymm3, ymm3
	LONG $0xe1d5ddc5               // vpmullw    ymm4, ymm4, ymm1
	LONG $0xe2dbddc5               // vpand    ymm4, ymm4, ymm2
	LONG $0xdb60e5c5               // vpunpcklbw    ymm3, ymm3, ymm3
	LONG $0xd8d5e5c5               // vpmullw    ymm3, ymm3, ymm0
	LONG $0xdadbe5c5               // vpand    ymm3, ymm3, ymm2
	LONG $0xdc67e5c5               // vpackuswb    ymm3, ymm3, ymm4
	LONG $0x7f7ec1c4; WORD $0x301c // vmovdqu    yword [r8 + rsi], ymm3
	LONG $0x20c68348               // add    rsi, 32
	WORD $0xff49; BYTE $0xc1       // inc    r9
	JNE  LBB2_645

LBB2_646:
	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
	JE   LBB2_1109
	JMP  LBB2_647

LBB2_651:
	LONG $0xfce08348             // and    rax, -4
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI2_0] */
	LONG $0xd860fdc5             // vpunpcklbw    ymm3, ymm0, ymm0

LBB2_652:
	LONG $0x246ffec5; BYTE $0x31               // vmovdqu    ymm4, yword [rcx + rsi]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3024             // vmovdqu    yword [r8 + rsi], ymm4
	LONG $0x646ffec5; WORD $0x2031             // vmovdqu    ymm4, yword [rcx + rsi + 32]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm4
	LONG $0x646ffec5; WORD $0x4031             // vmovdqu    ymm4, yword [rcx + rsi + 64]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm4
	LONG $0x646ffec5; WORD $0x6031             // vmovdqu    ymm4, yword [rcx + rsi + 96]
	LONG $0xec68ddc5                           // vpunpckhbw    ymm5, ymm4, ymm4
	LONG $0xe9d5d5c5                           // vpmullw    ymm5, ymm5, ymm1
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xe460ddc5                           // vpunpcklbw    ymm4, ymm4, ymm4
	LONG $0xe3d5ddc5                           // vpmullw    ymm4, ymm4, ymm3
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe567ddc5                           // vpackuswb    ymm4, ymm4, ymm5
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm4
	LONG $0x80ee8348                           // sub    rsi, -128
	LONG $0x04c08348                           // add    rax, 4
	JNE  LBB2_652

LBB2_653:
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	JE   LBB2_656
	WORD $0xf749; BYTE $0xd9     // neg    r9
	LONG $0xc868fdc5             // vpunpckhbw    ymm1, ymm0, ymm0
	LONG $0x556ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI2_0] */
	LONG $0xc060fdc5             // vpunpcklbw    ymm0, ymm0, ymm0

LBB2_655:
	LONG $0x1c6ffec5; BYTE $0x31   // vmovdqu    ymm3, yword [rcx + rsi]
	LONG $0xe368e5c5               // vpunpckhbw    ymm4, ymm3, ymm3
	LONG $0xe1d5ddc5               // vpmullw    ymm4, ymm4, ymm1
	LONG $0xe2dbddc5               // vpand    ymm4, ymm4, ymm2
	LONG $0xdb60e5c5               // vpunpcklbw    ymm3, ymm3, ymm3
	LONG $0xd8d5e5c5               // vpmullw    ymm3, ymm3, ymm0
	LONG $0xdadbe5c5               // vpand    ymm3, ymm3, ymm2
	LONG $0xdc67e5c5               // vpackuswb    ymm3, ymm3, ymm4
	LONG $0x7f7ec1c4; WORD $0x301c // vmovdqu    yword [r8 + rsi], ymm3
	LONG $0x20c68348               // add    rsi, 32
	WORD $0xff49; BYTE $0xc1       // inc    r9
	JNE  LBB2_655

LBB2_656:
	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
	JE   LBB2_1109
	JMP  LBB2_657

LBB2_661:
	WORD $0xff31 // xor    edi, edi

LBB2_662:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_664
	LONG $0x407de2c4; WORD $0xb90c             // vpmulld    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x407de2c4; WORD $0xb954; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xb95c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xb944; BYTE $0x60 // vpmulld    ymm0, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB2_664:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_665

LBB2_669:
	WORD $0xff31 // xor    edi, edi

LBB2_670:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_672
	LONG $0x407de2c4; WORD $0xb90c             // vpmulld    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x407de2c4; WORD $0xb954; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xb95c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xb944; BYTE $0x60 // vpmulld    ymm0, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB2_672:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_673

LBB2_677:
	WORD $0xff31 // xor    edi, edi

LBB2_678:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_680
	LONG $0x0cfefdc5; BYTE $0xb9               // vpaddd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20b9             // vpaddd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40b9             // vpaddd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x44fefdc5; WORD $0x60b9             // vpaddd    ymm0, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB2_680:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_681

LBB2_685:
	WORD $0xff31 // xor    edi, edi

LBB2_686:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_688
	LONG $0x0cfafdc5; BYTE $0xb9               // vpsubd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fafdc5; WORD $0x20b9             // vpsubd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfafdc5; WORD $0x40b9             // vpsubd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x44fafdc5; WORD $0x60b9             // vpsubd    ymm0, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB2_688:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_689

LBB2_693:
	WORD $0xff31 // xor    edi, edi

LBB2_694:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_696
	LONG $0x0cfefdc5; BYTE $0xb9               // vpaddd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20b9             // vpaddd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40b9             // vpaddd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x44fefdc5; WORD $0x60b9             // vpaddd    ymm0, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB2_696:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_697

LBB2_701:
	WORD $0xff31 // xor    edi, edi

LBB2_702:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_704
	LONG $0x0cfafdc5; BYTE $0xb9               // vpsubd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fafdc5; WORD $0x20b9             // vpsubd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfafdc5; WORD $0x40b9             // vpsubd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x44fafdc5; WORD $0x60b9             // vpsubd    ymm0, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB2_704:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_705

LBB2_709:
	WORD $0xff31 // xor    edi, edi

LBB2_710:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_712
	LONG $0x1459f5c5; BYTE $0xf9               // vmulpd    ymm2, ymm1, yword [rcx + 8*rdi]
	LONG $0x5c59f5c5; WORD $0x20f9             // vmulpd    ymm3, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x6459f5c5; WORD $0x40f9             // vmulpd    ymm4, ymm1, yword [rcx + 8*rdi + 64]
	LONG $0x4c59f5c5; WORD $0x60f9             // vmulpd    ymm1, ymm1, yword [rcx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm1

LBB2_712:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB2_1109
	JMP  LBB2_713

LBB2_717:
	WORD $0xff31 // xor    edi, edi

LBB2_718:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_720
	LONG $0x1459f5c5; BYTE $0xf9               // vmulpd    ymm2, ymm1, yword [rcx + 8*rdi]
	LONG $0x5c59f5c5; WORD $0x20f9             // vmulpd    ymm3, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x6459f5c5; WORD $0x40f9             // vmulpd    ymm4, ymm1, yword [rcx + 8*rdi + 64]
	LONG $0x4c59f5c5; WORD $0x60f9             // vmulpd    ymm1, ymm1, yword [rcx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm1

LBB2_720:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB2_1109
	JMP  LBB2_721

LBB2_725:
	WORD $0xff31 // xor    edi, edi

LBB2_726:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_728
	LONG $0x1458f5c5; BYTE $0xf9               // vaddpd    ymm2, ymm1, yword [rcx + 8*rdi]
	LONG $0x5c58f5c5; WORD $0x20f9             // vaddpd    ymm3, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x6458f5c5; WORD $0x40f9             // vaddpd    ymm4, ymm1, yword [rcx + 8*rdi + 64]
	LONG $0x4c58f5c5; WORD $0x60f9             // vaddpd    ymm1, ymm1, yword [rcx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm1

LBB2_728:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB2_1109
	JMP  LBB2_729

LBB2_733:
	WORD $0xff31 // xor    edi, edi

LBB2_734:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_736
	LONG $0x145cf5c5; BYTE $0xf9               // vsubpd    ymm2, ymm1, yword [rcx + 8*rdi]
	LONG $0x5c5cf5c5; WORD $0x20f9             // vsubpd    ymm3, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x645cf5c5; WORD $0x40f9             // vsubpd    ymm4, ymm1, yword [rcx + 8*rdi + 64]
	LONG $0x4c5cf5c5; WORD $0x60f9             // vsubpd    ymm1, ymm1, yword [rcx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm1

LBB2_736:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB2_1109
	JMP  LBB2_737

LBB2_741:
	WORD $0xff31 // xor    edi, edi

LBB2_742:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_744
	LONG $0x1458f5c5; BYTE $0xf9               // vaddpd    ymm2, ymm1, yword [rcx + 8*rdi]
	LONG $0x5c58f5c5; WORD $0x20f9             // vaddpd    ymm3, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x6458f5c5; WORD $0x40f9             // vaddpd    ymm4, ymm1, yword [rcx + 8*rdi + 64]
	LONG $0x4c58f5c5; WORD $0x60f9             // vaddpd    ymm1, ymm1, yword [rcx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm1

LBB2_744:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB2_1109
	JMP  LBB2_745

LBB2_749:
	WORD $0xff31 // xor    edi, edi

LBB2_750:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_752
	LONG $0x145cf5c5; BYTE $0xf9               // vsubpd    ymm2, ymm1, yword [rcx + 8*rdi]
	LONG $0x5c5cf5c5; WORD $0x20f9             // vsubpd    ymm3, ymm1, yword [rcx + 8*rdi + 32]
	LONG $0x645cf5c5; WORD $0x40f9             // vsubpd    ymm4, ymm1, yword [rcx + 8*rdi + 64]
	LONG $0x4c5cf5c5; WORD $0x60f9             // vsubpd    ymm1, ymm1, yword [rcx + 8*rdi + 96]
	LONG $0x117dc1c4; WORD $0xf814             // vmovupd    yword [r8 + 8*rdi], ymm2
	LONG $0x117dc1c4; WORD $0xf85c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf864; BYTE $0x40 // vmovupd    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x60 // vmovupd    yword [r8 + 8*rdi + 96], ymm1

LBB2_752:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB2_1109
	JMP  LBB2_753

LBB2_757:
	WORD $0xff31 // xor    edi, edi

LBB2_758:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_760
	LONG $0x0cfcfdc5; BYTE $0x39               // vpaddb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54fcfdc5; WORD $0x2039             // vpaddb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x4039             // vpaddb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x44fcfdc5; WORD $0x6039             // vpaddb    ymm0, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB2_760:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_761

LBB2_765:
	WORD $0xff31 // xor    edi, edi

LBB2_766:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_768
	LONG $0x0cf8fdc5; BYTE $0x39               // vpsubb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54f8fdc5; WORD $0x2039             // vpsubb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cf8fdc5; WORD $0x4039             // vpsubb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x44f8fdc5; WORD $0x6039             // vpsubb    ymm0, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB2_768:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_769

LBB2_773:
	WORD $0xff31 // xor    edi, edi

LBB2_774:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_776
	LONG $0x0cfcfdc5; BYTE $0x39               // vpaddb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54fcfdc5; WORD $0x2039             // vpaddb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x4039             // vpaddb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x44fcfdc5; WORD $0x6039             // vpaddb    ymm0, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB2_776:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_777

LBB2_781:
	WORD $0xff31 // xor    edi, edi

LBB2_782:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_784
	LONG $0x0cf8fdc5; BYTE $0x39               // vpsubb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54f8fdc5; WORD $0x2039             // vpsubb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cf8fdc5; WORD $0x4039             // vpsubb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x44f8fdc5; WORD $0x6039             // vpsubb    ymm0, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB2_784:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_785

LBB2_789:
	WORD $0xff31 // xor    edi, edi

LBB2_790:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_792
	LONG $0x146ffec5; BYTE $0xf9               // vmovdqu    ymm2, yword [rcx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20f9             // vmovdqu    ymm3, yword [rcx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40f9             // vmovdqu    ymm4, yword [rcx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60f9             // vmovdqu    ymm5, yword [rcx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xc9f4d5c5                           // vpmuludq    ymm1, ymm5, ymm1
	LONG $0xd573cdc5; BYTE $0x20               // vpsrlq    ymm6, ymm5, 32
	LONG $0xf0f4cdc5                           // vpmuludq    ymm6, ymm6, ymm0
	LONG $0xced4f5c5                           // vpaddq    ymm1, ymm1, ymm6
	LONG $0xf173f5c5; BYTE $0x20               // vpsllq    ymm1, ymm1, 32
	LONG $0xc0f4d5c5                           // vpmuludq    ymm0, ymm5, ymm0
	LONG $0xc1d4fdc5                           // vpaddq    ymm0, ymm0, ymm1
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB2_792:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_793

LBB2_797:
	WORD $0xff31 // xor    edi, edi

LBB2_798:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_800
	LONG $0x146ffec5; BYTE $0xf9               // vmovdqu    ymm2, yword [rcx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20f9             // vmovdqu    ymm3, yword [rcx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40f9             // vmovdqu    ymm4, yword [rcx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60f9             // vmovdqu    ymm5, yword [rcx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xc9f4d5c5                           // vpmuludq    ymm1, ymm5, ymm1
	LONG $0xd573cdc5; BYTE $0x20               // vpsrlq    ymm6, ymm5, 32
	LONG $0xf0f4cdc5                           // vpmuludq    ymm6, ymm6, ymm0
	LONG $0xced4f5c5                           // vpaddq    ymm1, ymm1, ymm6
	LONG $0xf173f5c5; BYTE $0x20               // vpsllq    ymm1, ymm1, 32
	LONG $0xc0f4d5c5                           // vpmuludq    ymm0, ymm5, ymm0
	LONG $0xc1d4fdc5                           // vpaddq    ymm0, ymm0, ymm1
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB2_800:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_801

LBB2_805:
	WORD $0xff31 // xor    edi, edi

LBB2_806:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_808
	LONG $0x0cd4fdc5; BYTE $0xf9               // vpaddq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20f9             // vpaddq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40f9             // vpaddq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x44d4fdc5; WORD $0x60f9             // vpaddq    ymm0, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB2_808:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_809

LBB2_813:
	WORD $0xff31 // xor    edi, edi

LBB2_814:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_816
	LONG $0x0cfbfdc5; BYTE $0xf9               // vpsubq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54fbfdc5; WORD $0x20f9             // vpsubq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cfbfdc5; WORD $0x40f9             // vpsubq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x44fbfdc5; WORD $0x60f9             // vpsubq    ymm0, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB2_816:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_817

LBB2_821:
	WORD $0xff31 // xor    edi, edi

LBB2_822:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_824
	LONG $0x0cd4fdc5; BYTE $0xf9               // vpaddq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20f9             // vpaddq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40f9             // vpaddq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x44d4fdc5; WORD $0x60f9             // vpaddq    ymm0, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB2_824:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_825

LBB2_829:
	WORD $0xff31 // xor    edi, edi

LBB2_830:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_832
	LONG $0x0cfbfdc5; BYTE $0xf9               // vpsubq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54fbfdc5; WORD $0x20f9             // vpsubq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cfbfdc5; WORD $0x40f9             // vpsubq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x44fbfdc5; WORD $0x60f9             // vpsubq    ymm0, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB2_832:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_833

LBB2_837:
	WORD $0xff31 // xor    edi, edi

LBB2_838:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_840
	LONG $0x0cd5fdc5; BYTE $0x79               // vpmullw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x44d5fdc5; WORD $0x2079             // vpmullw    ymm0, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB2_840:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_841

LBB2_845:
	WORD $0xff31 // xor    edi, edi

LBB2_846:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_848
	LONG $0x0cd5fdc5; BYTE $0x79               // vpmullw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x44d5fdc5; WORD $0x2079             // vpmullw    ymm0, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB2_848:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_849

LBB2_853:
	WORD $0xff31 // xor    edi, edi

LBB2_854:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_856
	LONG $0x0cd5fdc5; BYTE $0x79               // vpmullw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x44d5fdc5; WORD $0x2079             // vpmullw    ymm0, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB2_856:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_857

LBB2_861:
	WORD $0xff31 // xor    edi, edi

LBB2_862:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_864
	LONG $0x0cd5fdc5; BYTE $0x79               // vpmullw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x44d5fdc5; WORD $0x2079             // vpmullw    ymm0, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB2_864:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_865

LBB2_869:
	WORD $0xff31 // xor    edi, edi

LBB2_870:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_872
	LONG $0x0cfdfdc5; BYTE $0x79               // vpaddw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x44fdfdc5; WORD $0x2079             // vpaddw    ymm0, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB2_872:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_873

LBB2_877:
	WORD $0xff31 // xor    edi, edi

LBB2_878:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_880
	LONG $0x0cfdfdc5; BYTE $0x79               // vpaddw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x44fdfdc5; WORD $0x2079             // vpaddw    ymm0, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB2_880:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_881

LBB2_885:
	WORD $0xff31 // xor    edi, edi

LBB2_886:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_888
	LONG $0x0cf9fdc5; BYTE $0x79               // vpsubw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x44f9fdc5; WORD $0x2079             // vpsubw    ymm0, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB2_888:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_889

LBB2_893:
	WORD $0xff31 // xor    edi, edi

LBB2_894:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_896
	LONG $0x0cf9fdc5; BYTE $0x79               // vpsubw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x44f9fdc5; WORD $0x2079             // vpsubw    ymm0, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB2_896:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_897

LBB2_901:
	WORD $0xff31 // xor    edi, edi

LBB2_902:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_904
	LONG $0x0cfdfdc5; BYTE $0x79               // vpaddw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x44fdfdc5; WORD $0x2079             // vpaddw    ymm0, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB2_904:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_905

LBB2_909:
	WORD $0xff31 // xor    edi, edi

LBB2_910:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_912
	LONG $0x0cfdfdc5; BYTE $0x79               // vpaddw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x44fdfdc5; WORD $0x2079             // vpaddw    ymm0, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB2_912:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_913

LBB2_917:
	WORD $0xff31 // xor    edi, edi

LBB2_918:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_920
	LONG $0x0cf9fdc5; BYTE $0x79               // vpsubw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x44f9fdc5; WORD $0x2079             // vpsubw    ymm0, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB2_920:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_921

LBB2_925:
	WORD $0xff31 // xor    edi, edi

LBB2_926:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_928
	LONG $0x0cf9fdc5; BYTE $0x79               // vpsubw    ymm1, ymm0, yword [rcx + 2*rdi]
	LONG $0x44f9fdc5; WORD $0x2079             // vpsubw    ymm0, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB2_928:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_929

LBB2_933:
	WORD $0xff31 // xor    edi, edi

LBB2_934:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_936
	LONG $0x146ffec5; BYTE $0xf9               // vmovdqu    ymm2, yword [rcx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20f9             // vmovdqu    ymm3, yword [rcx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40f9             // vmovdqu    ymm4, yword [rcx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60f9             // vmovdqu    ymm5, yword [rcx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xc9f4d5c5                           // vpmuludq    ymm1, ymm5, ymm1
	LONG $0xd573cdc5; BYTE $0x20               // vpsrlq    ymm6, ymm5, 32
	LONG $0xf0f4cdc5                           // vpmuludq    ymm6, ymm6, ymm0
	LONG $0xced4f5c5                           // vpaddq    ymm1, ymm1, ymm6
	LONG $0xf173f5c5; BYTE $0x20               // vpsllq    ymm1, ymm1, 32
	LONG $0xc0f4d5c5                           // vpmuludq    ymm0, ymm5, ymm0
	LONG $0xc1d4fdc5                           // vpaddq    ymm0, ymm0, ymm1
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB2_936:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_937

LBB2_941:
	WORD $0xff31 // xor    edi, edi

LBB2_942:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_944
	LONG $0x1459f4c5; BYTE $0xb9               // vmulps    ymm2, ymm1, yword [rcx + 4*rdi]
	LONG $0x5c59f4c5; WORD $0x20b9             // vmulps    ymm3, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x6459f4c5; WORD $0x40b9             // vmulps    ymm4, ymm1, yword [rcx + 4*rdi + 64]
	LONG $0x4c59f4c5; WORD $0x60b9             // vmulps    ymm1, ymm1, yword [rcx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm1

LBB2_944:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB2_1109
	JMP  LBB2_945

LBB2_949:
	WORD $0xff31 // xor    edi, edi

LBB2_950:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_952
	LONG $0x146ffec5; BYTE $0xf9               // vmovdqu    ymm2, yword [rcx + 8*rdi]
	LONG $0x5c6ffec5; WORD $0x20f9             // vmovdqu    ymm3, yword [rcx + 8*rdi + 32]
	LONG $0x646ffec5; WORD $0x40f9             // vmovdqu    ymm4, yword [rcx + 8*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60f9             // vmovdqu    ymm5, yword [rcx + 8*rdi + 96]
	LONG $0xf1f4edc5                           // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                           // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                           // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                           // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                           // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                           // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                           // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                           // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xc9f4d5c5                           // vpmuludq    ymm1, ymm5, ymm1
	LONG $0xd573cdc5; BYTE $0x20               // vpsrlq    ymm6, ymm5, 32
	LONG $0xf0f4cdc5                           // vpmuludq    ymm6, ymm6, ymm0
	LONG $0xced4f5c5                           // vpaddq    ymm1, ymm1, ymm6
	LONG $0xf173f5c5; BYTE $0x20               // vpsllq    ymm1, ymm1, 32
	LONG $0xc0f4d5c5                           // vpmuludq    ymm0, ymm5, ymm0
	LONG $0xc1d4fdc5                           // vpaddq    ymm0, ymm0, ymm1
	LONG $0x7f7ec1c4; WORD $0xf814             // vmovdqu    yword [r8 + 8*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf864; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB2_952:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_953

LBB2_957:
	WORD $0xff31 // xor    edi, edi

LBB2_958:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_960
	LONG $0x1459f4c5; BYTE $0xb9               // vmulps    ymm2, ymm1, yword [rcx + 4*rdi]
	LONG $0x5c59f4c5; WORD $0x20b9             // vmulps    ymm3, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x6459f4c5; WORD $0x40b9             // vmulps    ymm4, ymm1, yword [rcx + 4*rdi + 64]
	LONG $0x4c59f4c5; WORD $0x60b9             // vmulps    ymm1, ymm1, yword [rcx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm1

LBB2_960:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB2_1109
	JMP  LBB2_961

LBB2_965:
	WORD $0xff31 // xor    edi, edi

LBB2_966:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_968
	LONG $0x0cd4fdc5; BYTE $0xf9               // vpaddq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20f9             // vpaddq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40f9             // vpaddq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x44d4fdc5; WORD $0x60f9             // vpaddq    ymm0, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB2_968:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_969

LBB2_973:
	WORD $0xff31 // xor    edi, edi

LBB2_974:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_976
	LONG $0x1458f4c5; BYTE $0xb9               // vaddps    ymm2, ymm1, yword [rcx + 4*rdi]
	LONG $0x5c58f4c5; WORD $0x20b9             // vaddps    ymm3, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x6458f4c5; WORD $0x40b9             // vaddps    ymm4, ymm1, yword [rcx + 4*rdi + 64]
	LONG $0x4c58f4c5; WORD $0x60b9             // vaddps    ymm1, ymm1, yword [rcx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm1

LBB2_976:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB2_1109
	JMP  LBB2_977

LBB2_981:
	WORD $0xff31 // xor    edi, edi

LBB2_982:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_984
	LONG $0x0cfbfdc5; BYTE $0xf9               // vpsubq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54fbfdc5; WORD $0x20f9             // vpsubq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cfbfdc5; WORD $0x40f9             // vpsubq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x44fbfdc5; WORD $0x60f9             // vpsubq    ymm0, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB2_984:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_985

LBB2_989:
	WORD $0xff31 // xor    edi, edi

LBB2_990:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_992
	LONG $0x145cf4c5; BYTE $0xb9               // vsubps    ymm2, ymm1, yword [rcx + 4*rdi]
	LONG $0x5c5cf4c5; WORD $0x20b9             // vsubps    ymm3, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x645cf4c5; WORD $0x40b9             // vsubps    ymm4, ymm1, yword [rcx + 4*rdi + 64]
	LONG $0x4c5cf4c5; WORD $0x60b9             // vsubps    ymm1, ymm1, yword [rcx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm1

LBB2_992:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB2_1109
	JMP  LBB2_993

LBB2_997:
	WORD $0xff31 // xor    edi, edi

LBB2_998:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_1000
	LONG $0x0cd4fdc5; BYTE $0xf9               // vpaddq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54d4fdc5; WORD $0x20f9             // vpaddq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cd4fdc5; WORD $0x40f9             // vpaddq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x44d4fdc5; WORD $0x60f9             // vpaddq    ymm0, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB2_1000:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_1001

LBB2_1005:
	WORD $0xff31 // xor    edi, edi

LBB2_1006:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_1008
	LONG $0x1458f4c5; BYTE $0xb9               // vaddps    ymm2, ymm1, yword [rcx + 4*rdi]
	LONG $0x5c58f4c5; WORD $0x20b9             // vaddps    ymm3, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x6458f4c5; WORD $0x40b9             // vaddps    ymm4, ymm1, yword [rcx + 4*rdi + 64]
	LONG $0x4c58f4c5; WORD $0x60b9             // vaddps    ymm1, ymm1, yword [rcx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm1

LBB2_1008:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB2_1109
	JMP  LBB2_1009

LBB2_1013:
	WORD $0xff31 // xor    edi, edi

LBB2_1014:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_1016
	LONG $0x0cfbfdc5; BYTE $0xf9               // vpsubq    ymm1, ymm0, yword [rcx + 8*rdi]
	LONG $0x54fbfdc5; WORD $0x20f9             // vpsubq    ymm2, ymm0, yword [rcx + 8*rdi + 32]
	LONG $0x5cfbfdc5; WORD $0x40f9             // vpsubq    ymm3, ymm0, yword [rcx + 8*rdi + 64]
	LONG $0x44fbfdc5; WORD $0x60f9             // vpsubq    ymm0, ymm0, yword [rcx + 8*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xf80c             // vmovdqu    yword [r8 + 8*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xf854; BYTE $0x20 // vmovdqu    yword [r8 + 8*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xf85c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xf844; BYTE $0x60 // vmovdqu    yword [r8 + 8*rdi + 96], ymm0

LBB2_1016:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_1017

LBB2_1021:
	WORD $0xff31 // xor    edi, edi

LBB2_1022:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_1024
	LONG $0x145cf4c5; BYTE $0xb9               // vsubps    ymm2, ymm1, yword [rcx + 4*rdi]
	LONG $0x5c5cf4c5; WORD $0x20b9             // vsubps    ymm3, ymm1, yword [rcx + 4*rdi + 32]
	LONG $0x645cf4c5; WORD $0x40b9             // vsubps    ymm4, ymm1, yword [rcx + 4*rdi + 64]
	LONG $0x4c5cf4c5; WORD $0x60b9             // vsubps    ymm1, ymm1, yword [rcx + 4*rdi + 96]
	LONG $0x117cc1c4; WORD $0xb814             // vmovups    yword [r8 + 4*rdi], ymm2
	LONG $0x117cc1c4; WORD $0xb85c; BYTE $0x20 // vmovups    yword [r8 + 4*rdi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb864; BYTE $0x40 // vmovups    yword [r8 + 4*rdi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb84c; BYTE $0x60 // vmovups    yword [r8 + 4*rdi + 96], ymm1

LBB2_1024:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB2_1109
	JMP  LBB2_1025

LBB2_1029:
	WORD $0xff31 // xor    edi, edi

LBB2_1030:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_1032
	LONG $0x0cfcfdc5; BYTE $0x39               // vpaddb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54fcfdc5; WORD $0x2039             // vpaddb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x4039             // vpaddb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x44fcfdc5; WORD $0x6039             // vpaddb    ymm0, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB2_1032:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_1033

LBB2_1037:
	WORD $0xff31 // xor    edi, edi

LBB2_1038:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_1040
	LONG $0x0cf8fdc5; BYTE $0x39               // vpsubb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54f8fdc5; WORD $0x2039             // vpsubb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cf8fdc5; WORD $0x4039             // vpsubb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x44f8fdc5; WORD $0x6039             // vpsubb    ymm0, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB2_1040:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_1041

LBB2_1045:
	WORD $0xff31 // xor    edi, edi

LBB2_1046:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_1048
	LONG $0x0cfcfdc5; BYTE $0x39               // vpaddb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54fcfdc5; WORD $0x2039             // vpaddb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cfcfdc5; WORD $0x4039             // vpaddb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x44fcfdc5; WORD $0x6039             // vpaddb    ymm0, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB2_1048:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_1049

LBB2_1053:
	WORD $0xff31 // xor    edi, edi

LBB2_1054:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_1056
	LONG $0x0cf8fdc5; BYTE $0x39               // vpsubb    ymm1, ymm0, yword [rcx + rdi]
	LONG $0x54f8fdc5; WORD $0x2039             // vpsubb    ymm2, ymm0, yword [rcx + rdi + 32]
	LONG $0x5cf8fdc5; WORD $0x4039             // vpsubb    ymm3, ymm0, yword [rcx + rdi + 64]
	LONG $0x44f8fdc5; WORD $0x6039             // vpsubb    ymm0, ymm0, yword [rcx + rdi + 96]
	LONG $0x7f7ec1c4; WORD $0x380c             // vmovdqu    yword [r8 + rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x3854; BYTE $0x20 // vmovdqu    yword [r8 + rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0x385c; BYTE $0x40 // vmovdqu    yword [r8 + rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x3844; BYTE $0x60 // vmovdqu    yword [r8 + rdi + 96], ymm0

LBB2_1056:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_1057

LBB2_1061:
	WORD $0xff31 // xor    edi, edi

LBB2_1062:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_1064
	LONG $0x407de2c4; WORD $0xb90c             // vpmulld    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x407de2c4; WORD $0xb954; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xb95c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xb944; BYTE $0x60 // vpmulld    ymm0, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB2_1064:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_1065

LBB2_1069:
	WORD $0xff31 // xor    edi, edi

LBB2_1070:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_1072
	LONG $0x407de2c4; WORD $0xb90c             // vpmulld    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x407de2c4; WORD $0xb954; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x407de2c4; WORD $0xb95c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x407de2c4; WORD $0xb944; BYTE $0x60 // vpmulld    ymm0, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB2_1072:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_1073

LBB2_1077:
	WORD $0xff31 // xor    edi, edi

LBB2_1078:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_1080
	LONG $0x0cfefdc5; BYTE $0xb9               // vpaddd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20b9             // vpaddd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40b9             // vpaddd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x44fefdc5; WORD $0x60b9             // vpaddd    ymm0, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB2_1080:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_1081

LBB2_1085:
	WORD $0xff31 // xor    edi, edi

LBB2_1086:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_1088
	LONG $0x0cfafdc5; BYTE $0xb9               // vpsubd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fafdc5; WORD $0x20b9             // vpsubd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfafdc5; WORD $0x40b9             // vpsubd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x44fafdc5; WORD $0x60b9             // vpsubd    ymm0, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB2_1088:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_1089

LBB2_1093:
	WORD $0xff31 // xor    edi, edi

LBB2_1094:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_1096
	LONG $0x0cfefdc5; BYTE $0xb9               // vpaddd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fefdc5; WORD $0x20b9             // vpaddd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfefdc5; WORD $0x40b9             // vpaddd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x44fefdc5; WORD $0x60b9             // vpaddd    ymm0, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB2_1096:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB2_1109
	JMP  LBB2_1097

LBB2_1101:
	WORD $0xff31 // xor    edi, edi

LBB2_1102:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB2_1104
	LONG $0x0cfafdc5; BYTE $0xb9               // vpsubd    ymm1, ymm0, yword [rcx + 4*rdi]
	LONG $0x54fafdc5; WORD $0x20b9             // vpsubd    ymm2, ymm0, yword [rcx + 4*rdi + 32]
	LONG $0x5cfafdc5; WORD $0x40b9             // vpsubd    ymm3, ymm0, yword [rcx + 4*rdi + 64]
	LONG $0x44fafdc5; WORD $0x60b9             // vpsubd    ymm0, ymm0, yword [rcx + 4*rdi + 96]
	LONG $0x7f7ec1c4; WORD $0xb80c             // vmovdqu    yword [r8 + 4*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0xb854; BYTE $0x20 // vmovdqu    yword [r8 + 4*rdi + 32], ymm2
	LONG $0x7f7ec1c4; WORD $0xb85c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rdi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0xb844; BYTE $0x60 // vmovdqu    yword [r8 + 4*rdi + 96], ymm0

LBB2_1104:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JNE  LBB2_1105

LBB2_1109:
	VZEROUPPER
	RET

DATA LCDATA4<>+0x000(SB)/8, $0x8000000000000000
DATA LCDATA4<>+0x008(SB)/8, $0x3ff0000000000000
DATA LCDATA4<>+0x010(SB)/8, $0x0000000000000001
DATA LCDATA4<>+0x018(SB)/8, $0x7fffffffffffffff
DATA LCDATA4<>+0x020(SB)/8, $0x8000000000000000
DATA LCDATA4<>+0x028(SB)/8, $0x8000000000000000
DATA LCDATA4<>+0x030(SB)/8, $0x000000000c080400
DATA LCDATA4<>+0x038(SB)/8, $0x0000000000000000
DATA LCDATA4<>+0x040(SB)/8, $0x8000000000000001
DATA LCDATA4<>+0x048(SB)/8, $0x000000007fffffff
DATA LCDATA4<>+0x050(SB)/8, $0x0000000000000000
DATA LCDATA4<>+0x058(SB)/8, $0x0000000000000000
DATA LCDATA4<>+0x060(SB)/8, $0x0001000100010001
DATA LCDATA4<>+0x068(SB)/8, $0x0001000100010001
DATA LCDATA4<>+0x070(SB)/8, $0x0001000100010001
DATA LCDATA4<>+0x078(SB)/8, $0x0001000100010001
DATA LCDATA4<>+0x080(SB)/8, $0x0101010101010101
DATA LCDATA4<>+0x088(SB)/8, $0x0101010101010101
DATA LCDATA4<>+0x090(SB)/8, $0x0101010101010101
DATA LCDATA4<>+0x098(SB)/8, $0x0101010101010101
DATA LCDATA4<>+0x0a0(SB)/8, $0x0d0c090805040100
DATA LCDATA4<>+0x0a8(SB)/8, $0x0f0e0d0c0d0c0908
DATA LCDATA4<>+0x0b0(SB)/8, $0x1d1c191815141110
DATA LCDATA4<>+0x0b8(SB)/8, $0x1f1e1d1c1d1c1918
GLOBL LCDATA4<>(SB), 8, $192

TEXT ยท_arithmetic_unary_same_types_avx2(SB), $0-40

	MOVQ typ+0(FP), DI
	MOVQ op+8(FP), SI
	MOVQ input+16(FP), DX
	MOVQ output+24(FP), CX
	MOVQ len+32(FP), R8
	LEAQ LCDATA4<>(SB), BP

	LONG $0x13fe8040         // cmp    sil, 19
	JLE  LBB3_12
	LONG $0x14fe8040         // cmp    sil, 20
	JE   LBB3_22
	LONG $0x19fe8040         // cmp    sil, 25
	JE   LBB3_30
	LONG $0x1afe8040         // cmp    sil, 26
	JNE  LBB3_865
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB3_46
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB3_81
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB3_131
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB3_134
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JAE  LBB3_221
	WORD $0xd231             // xor    edx, edx
	JMP  LBB3_373

LBB3_12:
	LONG $0x04fe8040         // cmp    sil, 4
	JE   LBB3_38
	LONG $0x05fe8040         // cmp    sil, 5
	JNE  LBB3_865
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB3_53
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB3_86
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB3_137
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB3_140
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_21
	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_374
	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_374

LBB3_21:
	WORD $0xf631 // xor    esi, esi

LBB3_616:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_618

LBB3_617:
	WORD $0xc031             // xor    eax, eax
	WORD $0x042b; BYTE $0xb2 // sub    eax, dword [rdx + 4*rsi]
	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB3_617

LBB3_618:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_619:
	WORD $0xc031             // xor    eax, eax
	WORD $0x042b; BYTE $0xb2 // sub    eax, dword [rdx + 4*rsi]
	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
	WORD $0xc031             // xor    eax, eax
	LONG $0x04b2442b         // sub    eax, dword [rdx + 4*rsi + 4]
	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
	WORD $0xc031             // xor    eax, eax
	LONG $0x08b2442b         // sub    eax, dword [rdx + 4*rsi + 8]
	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
	WORD $0xc031             // xor    eax, eax
	LONG $0x0cb2442b         // sub    eax, dword [rdx + 4*rsi + 12]
	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
	LONG $0x04c68348         // add    rsi, 4
	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
	JNE  LBB3_619
	JMP  LBB3_865

LBB3_22:
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB3_60
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB3_91
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB3_143
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB3_146
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_29
	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_377
	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_377

LBB3_29:
	WORD $0xf631 // xor    esi, esi

LBB3_380:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
	LONG $0x03e08348         // and    rax, 3
	JE   LBB3_382

LBB3_381:
	WORD $0xff31             // xor    edi, edi
	LONG $0x00b23c83         // cmp    dword [rdx + 4*rsi], 0
	LONG $0xd7950f40         // setne    dil
	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB3_381

LBB3_382:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_383:
	WORD $0xc031                 // xor    eax, eax
	LONG $0x00b23c83             // cmp    dword [rdx + 4*rsi], 0
	WORD $0x950f; BYTE $0xd0     // setne    al
	WORD $0x0489; BYTE $0xb1     // mov    dword [rcx + 4*rsi], eax
	WORD $0xc031                 // xor    eax, eax
	LONG $0x04b27c83; BYTE $0x00 // cmp    dword [rdx + 4*rsi + 4], 0
	WORD $0x950f; BYTE $0xd0     // setne    al
	LONG $0x04b14489             // mov    dword [rcx + 4*rsi + 4], eax
	WORD $0xc031                 // xor    eax, eax
	LONG $0x08b27c83; BYTE $0x00 // cmp    dword [rdx + 4*rsi + 8], 0
	WORD $0x950f; BYTE $0xd0     // setne    al
	LONG $0x08b14489             // mov    dword [rcx + 4*rsi + 8], eax
	WORD $0xc031                 // xor    eax, eax
	LONG $0x0cb27c83; BYTE $0x00 // cmp    dword [rdx + 4*rsi + 12], 0
	WORD $0x950f; BYTE $0xd0     // setne    al
	LONG $0x0cb14489             // mov    dword [rcx + 4*rsi + 12], eax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_383
	JMP  LBB3_865

LBB3_30:
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB3_67
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB3_96
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB3_149
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB3_152
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_37
	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_384
	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_384

LBB3_37:
	WORD $0xf631 // xor    esi, esi

LBB3_624:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_626

LBB3_625:
	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB3_625

LBB3_626:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_627:
	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
	LONG $0x08b2448b         // mov    eax, dword [rdx + 4*rsi + 8]
	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
	LONG $0x0cb2448b         // mov    eax, dword [rdx + 4*rsi + 12]
	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
	LONG $0x04c68348         // add    rsi, 4
	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
	JNE  LBB3_627
	JMP  LBB3_865

LBB3_38:
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB3_74
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB3_101
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB3_155
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB3_158
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_45
	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_387
	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_387

LBB3_45:
	WORD $0xf631 // xor    esi, esi

LBB3_632:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_634

LBB3_633:
	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB3_633

LBB3_634:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_635:
	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
	LONG $0x08b2448b         // mov    eax, dword [rdx + 4*rsi + 8]
	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
	LONG $0x0cb2448b         // mov    eax, dword [rdx + 4*rsi + 12]
	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
	LONG $0x04c68348         // add    rsi, 4
	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
	JNE  LBB3_635
	JMP  LBB3_865

LBB3_46:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB3_106
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB3_161
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB3_164
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_52
	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_390
	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_390

LBB3_52:
	WORD $0xf631 // xor    esi, esi

LBB3_640:
	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
	WORD $0xf748; BYTE $0xd0     // not    rax
	WORD $0x014c; BYTE $0xc8     // add    rax, r9
	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
	LONG $0x03e78348             // and    rdi, 3
	JE   LBB3_643
	LONG $0x4528f9c5; BYTE $0x20 // vmovapd    xmm0, oword 32[rbp] /* [rip + .LCPI3_2] */

LBB3_642:
	LONG $0x0c10fbc5; BYTE $0xf2 // vmovsd    xmm1, qword [rdx + 8*rsi]
	LONG $0xc857f1c5             // vxorpd    xmm1, xmm1, xmm0
	LONG $0x0c13f9c5; BYTE $0xf1 // vmovlpd    qword [rcx + 8*rsi], xmm1
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB3_642

LBB3_643:
	LONG $0x03f88348             // cmp    rax, 3
	JB   LBB3_865
	LONG $0x4528f9c5; BYTE $0x20 // vmovapd    xmm0, oword 32[rbp] /* [rip + .LCPI3_2] */

LBB3_645:
	LONG $0x0c10fbc5; BYTE $0xf2   // vmovsd    xmm1, qword [rdx + 8*rsi]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x0c13f9c5; BYTE $0xf1   // vmovlpd    qword [rcx + 8*rsi], xmm1
	LONG $0x4c10fbc5; WORD $0x08f2 // vmovsd    xmm1, qword [rdx + 8*rsi + 8]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x4c13f9c5; WORD $0x08f1 // vmovlpd    qword [rcx + 8*rsi + 8], xmm1
	LONG $0x4c10fbc5; WORD $0x10f2 // vmovsd    xmm1, qword [rdx + 8*rsi + 16]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x4c13f9c5; WORD $0x10f1 // vmovlpd    qword [rcx + 8*rsi + 16], xmm1
	LONG $0x4c10fbc5; WORD $0x18f2 // vmovsd    xmm1, qword [rdx + 8*rsi + 24]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x4c13f9c5; WORD $0x18f1 // vmovlpd    qword [rcx + 8*rsi + 24], xmm1
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
	JNE  LBB3_645
	JMP  LBB3_865

LBB3_53:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB3_111
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB3_167
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB3_170
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_59
	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_393
	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_393

LBB3_59:
	WORD $0xf631 // xor    esi, esi

LBB3_650:
	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
	WORD $0xf748; BYTE $0xd0     // not    rax
	WORD $0x014c; BYTE $0xc8     // add    rax, r9
	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
	LONG $0x03e78348             // and    rdi, 3
	JE   LBB3_653
	LONG $0x4528f9c5; BYTE $0x20 // vmovapd    xmm0, oword 32[rbp] /* [rip + .LCPI3_2] */

LBB3_652:
	LONG $0x0c10fbc5; BYTE $0xf2 // vmovsd    xmm1, qword [rdx + 8*rsi]
	LONG $0xc857f1c5             // vxorpd    xmm1, xmm1, xmm0
	LONG $0x0c13f9c5; BYTE $0xf1 // vmovlpd    qword [rcx + 8*rsi], xmm1
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB3_652

LBB3_653:
	LONG $0x03f88348             // cmp    rax, 3
	JB   LBB3_865
	LONG $0x4528f9c5; BYTE $0x20 // vmovapd    xmm0, oword 32[rbp] /* [rip + .LCPI3_2] */

LBB3_655:
	LONG $0x0c10fbc5; BYTE $0xf2   // vmovsd    xmm1, qword [rdx + 8*rsi]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x0c13f9c5; BYTE $0xf1   // vmovlpd    qword [rcx + 8*rsi], xmm1
	LONG $0x4c10fbc5; WORD $0x08f2 // vmovsd    xmm1, qword [rdx + 8*rsi + 8]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x4c13f9c5; WORD $0x08f1 // vmovlpd    qword [rcx + 8*rsi + 8], xmm1
	LONG $0x4c10fbc5; WORD $0x10f2 // vmovsd    xmm1, qword [rdx + 8*rsi + 16]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x4c13f9c5; WORD $0x10f1 // vmovlpd    qword [rcx + 8*rsi + 16], xmm1
	LONG $0x4c10fbc5; WORD $0x18f2 // vmovsd    xmm1, qword [rdx + 8*rsi + 24]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x4c13f9c5; WORD $0x18f1 // vmovlpd    qword [rcx + 8*rsi + 24], xmm1
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
	JNE  LBB3_655
	JMP  LBB3_865

LBB3_60:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB3_116
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB3_173
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB3_176
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8944; BYTE $0xc0 // mov    eax, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_66
	LONG $0xc2348d48         // lea    rsi, [rdx + 8*rax]
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	JBE  LBB3_396
	LONG $0xc1348d48         // lea    rsi, [rcx + 8*rax]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	JBE  LBB3_396

LBB3_66:
	WORD $0xf631 // xor    esi, esi

LBB3_399:
	WORD $0x8948; BYTE $0xf7     // mov    rdi, rsi
	WORD $0xf748; BYTE $0xd7     // not    rdi
	WORD $0x01a8                 // test    al, 1
	JE   LBB3_401
	LONG $0x0410fbc5; BYTE $0xf2 // vmovsd    xmm0, qword [rdx + 8*rsi]
	LONG $0x4d54f9c5; BYTE $0x20 // vandpd    xmm1, xmm0, oword 32[rbp] /* [rip + .LCPI3_2] */
	LONG $0x5512fbc5; BYTE $0x08 // vmovddup    xmm2, qword 8[rbp] /* [rip + .LCPI3_1] */
	LONG $0xc956e9c5             // vorpd    xmm1, xmm2, xmm1
	LONG $0xd257e9c5             // vxorpd    xmm2, xmm2, xmm2
	LONG $0xc2c2fbc5; BYTE $0x00 // vcmpeqsd    xmm0, xmm0, xmm2
	LONG $0xc155f9c5             // vandnpd    xmm0, xmm0, xmm1
	LONG $0x0413f9c5; BYTE $0xf1 // vmovlpd    qword [rcx + 8*rsi], xmm0
	LONG $0x01ce8348             // or    rsi, 1

LBB3_401:
	WORD $0x0148; BYTE $0xc7     // add    rdi, rax
	JE   LBB3_865
	LONG $0x4528f9c5; BYTE $0x20 // vmovapd    xmm0, oword 32[rbp] /* [rip + .LCPI3_2] */
	LONG $0x4d12fbc5; BYTE $0x08 // vmovddup    xmm1, qword 8[rbp] /* [rip + .LCPI3_1] */
	LONG $0xd257e9c5             // vxorpd    xmm2, xmm2, xmm2

LBB3_403:
	LONG $0x1c10fbc5; BYTE $0xf2   // vmovsd    xmm3, qword [rdx + 8*rsi]
	LONG $0xe054e1c5               // vandpd    xmm4, xmm3, xmm0
	LONG $0xe456f1c5               // vorpd    xmm4, xmm1, xmm4
	LONG $0xdac2e3c5; BYTE $0x00   // vcmpeqsd    xmm3, xmm3, xmm2
	LONG $0xdc55e1c5               // vandnpd    xmm3, xmm3, xmm4
	LONG $0x1c13f9c5; BYTE $0xf1   // vmovlpd    qword [rcx + 8*rsi], xmm3
	LONG $0x5c10fbc5; WORD $0x08f2 // vmovsd    xmm3, qword [rdx + 8*rsi + 8]
	LONG $0xe054e1c5               // vandpd    xmm4, xmm3, xmm0
	LONG $0xe456f1c5               // vorpd    xmm4, xmm1, xmm4
	LONG $0xdac2e3c5; BYTE $0x00   // vcmpeqsd    xmm3, xmm3, xmm2
	LONG $0xdc55e1c5               // vandnpd    xmm3, xmm3, xmm4
	LONG $0x5c13f9c5; WORD $0x08f1 // vmovlpd    qword [rcx + 8*rsi + 8], xmm3
	LONG $0x02c68348               // add    rsi, 2
	WORD $0x3948; BYTE $0xf0       // cmp    rax, rsi
	JNE  LBB3_403
	JMP  LBB3_865

LBB3_67:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB3_121
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB3_179
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB3_182
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_73
	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_404
	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_404

LBB3_73:
	WORD $0xf631 // xor    esi, esi

LBB3_660:
	QUAD $0xffffffffffffba49; WORD $0x7fff // mov    r10, 9223372036854775807
	WORD $0x8949; BYTE $0xf0               // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0               // not    r8
	WORD $0x014d; BYTE $0xc8               // add    r8, r9
	WORD $0x894c; BYTE $0xc8               // mov    rax, r9
	LONG $0x03e08348                       // and    rax, 3
	JE   LBB3_662

LBB3_661:
	LONG $0xf23c8b48         // mov    rdi, qword [rdx + 8*rsi]
	WORD $0x214c; BYTE $0xd7 // and    rdi, r10
	LONG $0xf13c8948         // mov    qword [rcx + 8*rsi], rdi
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB3_661

LBB3_662:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_663:
	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
	WORD $0x214c; BYTE $0xd0     // and    rax, r10
	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
	WORD $0x214c; BYTE $0xd0     // and    rax, r10
	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
	WORD $0x214c; BYTE $0xd0     // and    rax, r10
	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
	WORD $0x214c; BYTE $0xd0     // and    rax, r10
	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_663
	JMP  LBB3_865

LBB3_74:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB3_126
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB3_185
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB3_188
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_80
	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_407
	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_407

LBB3_80:
	WORD $0xf631 // xor    esi, esi

LBB3_668:
	QUAD $0xffffffffffffba49; WORD $0x7fff // mov    r10, 9223372036854775807
	WORD $0x8949; BYTE $0xf0               // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0               // not    r8
	WORD $0x014d; BYTE $0xc8               // add    r8, r9
	WORD $0x894c; BYTE $0xc8               // mov    rax, r9
	LONG $0x03e08348                       // and    rax, 3
	JE   LBB3_670

LBB3_669:
	LONG $0xf23c8b48         // mov    rdi, qword [rdx + 8*rsi]
	WORD $0x214c; BYTE $0xd7 // and    rdi, r10
	LONG $0xf13c8948         // mov    qword [rcx + 8*rsi], rdi
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB3_669

LBB3_670:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_671:
	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
	WORD $0x214c; BYTE $0xd0     // and    rax, r10
	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
	WORD $0x214c; BYTE $0xd0     // and    rax, r10
	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
	WORD $0x214c; BYTE $0xd0     // and    rax, r10
	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
	WORD $0x214c; BYTE $0xd0     // and    rax, r10
	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_671
	JMP  LBB3_865

LBB3_81:
	WORD $0xff83; BYTE $0x02                   // cmp    edi, 2
	JE   LBB3_191
	WORD $0xff83; BYTE $0x03                   // cmp    edi, 3
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0                   // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1                   // mov    r9d, r8d
	LONG $0x80f88141; WORD $0x0000; BYTE $0x00 // cmp    r8d, 128
	JB   LBB3_85
	LONG $0x0a048d4a                           // lea    rax, [rdx + r9]
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JBE  LBB3_410
	LONG $0x09048d4a                           // lea    rax, [rcx + r9]
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JBE  LBB3_410

LBB3_85:
	WORD $0xf631 // xor    esi, esi

LBB3_676:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_678

LBB3_677:
	LONG $0x14b60f44; BYTE $0x32 // movzx    r10d, byte [rdx + rsi]
	WORD $0xc031                 // xor    eax, eax
	WORD $0x2844; BYTE $0xd0     // sub    al, r10b
	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB3_677

LBB3_678:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_679:
	WORD $0xc031                 // xor    eax, eax
	WORD $0x042a; BYTE $0x32     // sub    al, byte [rdx + rsi]
	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
	WORD $0xc031                 // xor    eax, eax
	LONG $0x0132442a             // sub    al, byte [rdx + rsi + 1]
	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
	WORD $0xc031                 // xor    eax, eax
	LONG $0x0232442a             // sub    al, byte [rdx + rsi + 2]
	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
	WORD $0xff31                 // xor    edi, edi
	WORD $0x2840; BYTE $0xc7     // sub    dil, al
	LONG $0x317c8840; BYTE $0x03 // mov    byte [rcx + rsi + 3], dil
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_679
	JMP  LBB3_865

LBB3_86:
	WORD $0xff83; BYTE $0x02                   // cmp    edi, 2
	JE   LBB3_194
	WORD $0xff83; BYTE $0x03                   // cmp    edi, 3
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0                   // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1                   // mov    r9d, r8d
	LONG $0x80f88141; WORD $0x0000; BYTE $0x00 // cmp    r8d, 128
	JB   LBB3_90
	LONG $0x0a048d4a                           // lea    rax, [rdx + r9]
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JBE  LBB3_413
	LONG $0x09048d4a                           // lea    rax, [rcx + r9]
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JBE  LBB3_413

LBB3_90:
	WORD $0xf631 // xor    esi, esi

LBB3_684:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_686

LBB3_685:
	LONG $0x14b60f44; BYTE $0x32 // movzx    r10d, byte [rdx + rsi]
	WORD $0xc031                 // xor    eax, eax
	WORD $0x2844; BYTE $0xd0     // sub    al, r10b
	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB3_685

LBB3_686:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_687:
	WORD $0xc031                 // xor    eax, eax
	WORD $0x042a; BYTE $0x32     // sub    al, byte [rdx + rsi]
	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
	WORD $0xc031                 // xor    eax, eax
	LONG $0x0132442a             // sub    al, byte [rdx + rsi + 1]
	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
	WORD $0xc031                 // xor    eax, eax
	LONG $0x0232442a             // sub    al, byte [rdx + rsi + 2]
	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
	WORD $0xff31                 // xor    edi, edi
	WORD $0x2840; BYTE $0xc7     // sub    dil, al
	LONG $0x317c8840; BYTE $0x03 // mov    byte [rcx + rsi + 3], dil
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_687
	JMP  LBB3_865

LBB3_91:
	WORD $0xff83; BYTE $0x02                   // cmp    edi, 2
	JE   LBB3_197
	WORD $0xff83; BYTE $0x03                   // cmp    edi, 3
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0                   // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc3                   // mov    r11d, r8d
	LONG $0x80f88141; WORD $0x0000; BYTE $0x00 // cmp    r8d, 128
	JB   LBB3_95
	LONG $0x1a348d4a                           // lea    rsi, [rdx + r11]
	WORD $0x3948; BYTE $0xce                   // cmp    rsi, rcx
	JBE  LBB3_416
	LONG $0x19348d4a                           // lea    rsi, [rcx + r11]
	WORD $0x3948; BYTE $0xd6                   // cmp    rsi, rdx
	JBE  LBB3_416

LBB3_95:
	WORD $0xf631 // xor    esi, esi

LBB3_419:
	WORD $0x8949; BYTE $0xf2     // mov    r10, rsi
	WORD $0xf749; BYTE $0xd2     // not    r10
	LONG $0x01c3f641             // test    r11b, 1
	JE   LBB3_421
	LONG $0x32048a44             // mov    r8b, byte [rdx + rsi]
	WORD $0x8445; BYTE $0xc0     // test    r8b, r8b
	LONG $0xd1950f41             // setne    r9b
	WORD $0xf641; BYTE $0xd9     // neg    r9b
	WORD $0x8445; BYTE $0xc0     // test    r8b, r8b
	LONG $0xc1b60f45             // movzx    r8d, r9b
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xf84e0f41             // cmovle    edi, r8d
	LONG $0x313c8840             // mov    byte [rcx + rsi], dil
	LONG $0x01ce8348             // or    rsi, 1

LBB3_421:
	WORD $0x014d; BYTE $0xda     // add    r10, r11
	JE   LBB3_865
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1

LBB3_423:
	LONG $0x04b60f44; BYTE $0x32   // movzx    r8d, byte [rdx + rsi]
	WORD $0x8445; BYTE $0xc0       // test    r8b, r8b
	WORD $0x950f; BYTE $0xd0       // setne    al
	WORD $0xd8f6                   // neg    al
	WORD $0x8445; BYTE $0xc0       // test    r8b, r8b
	WORD $0xb60f; BYTE $0xc0       // movzx    eax, al
	WORD $0x4f0f; BYTE $0xc7       // cmovg    eax, edi
	WORD $0x0488; BYTE $0x31       // mov    byte [rcx + rsi], al
	LONG $0x44b60f44; WORD $0x0132 // movzx    r8d, byte [rdx + rsi + 1]
	WORD $0x8445; BYTE $0xc0       // test    r8b, r8b
	WORD $0x950f; BYTE $0xd0       // setne    al
	WORD $0xd8f6                   // neg    al
	WORD $0x8445; BYTE $0xc0       // test    r8b, r8b
	WORD $0xb60f; BYTE $0xc0       // movzx    eax, al
	WORD $0x4f0f; BYTE $0xc7       // cmovg    eax, edi
	LONG $0x01314488               // mov    byte [rcx + rsi + 1], al
	LONG $0x02c68348               // add    rsi, 2
	WORD $0x3949; BYTE $0xf3       // cmp    r11, rsi
	JNE  LBB3_423
	JMP  LBB3_865

LBB3_96:
	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
	JE   LBB3_200
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc2 // mov    r10d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_100
	LONG $0x12348d4a         // lea    rsi, [rdx + r10]
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	JBE  LBB3_424
	LONG $0x11348d4a         // lea    rsi, [rcx + r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	JBE  LBB3_424

LBB3_100:
	WORD $0xf631 // xor    esi, esi

LBB3_427:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	LONG $0x01c2f641         // test    r10b, 1
	JE   LBB3_429
	LONG $0x323cbe0f         // movsx    edi, byte [rdx + rsi]
	WORD $0x8941; BYTE $0xf9 // mov    r9d, edi
	LONG $0x07f9c141         // sar    r9d, 7
	WORD $0x0144; BYTE $0xcf // add    edi, r9d
	WORD $0x3144; BYTE $0xcf // xor    edi, r9d
	LONG $0x313c8840         // mov    byte [rcx + rsi], dil
	LONG $0x01ce8348         // or    rsi, 1

LBB3_429:
	WORD $0x014d; BYTE $0xd0 // add    r8, r10
	JE   LBB3_865

LBB3_430:
	LONG $0x323cbe0f             // movsx    edi, byte [rdx + rsi]
	WORD $0xf889                 // mov    eax, edi
	WORD $0xf8c1; BYTE $0x07     // sar    eax, 7
	WORD $0xc701                 // add    edi, eax
	WORD $0xc731                 // xor    edi, eax
	LONG $0x313c8840             // mov    byte [rcx + rsi], dil
	LONG $0x3244be0f; BYTE $0x01 // movsx    eax, byte [rdx + rsi + 1]
	WORD $0xc789                 // mov    edi, eax
	WORD $0xffc1; BYTE $0x07     // sar    edi, 7
	WORD $0xf801                 // add    eax, edi
	WORD $0xf831                 // xor    eax, edi
	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
	LONG $0x02c68348             // add    rsi, 2
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB3_430
	JMP  LBB3_865

LBB3_101:
	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
	JE   LBB3_203
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc2 // mov    r10d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_105
	LONG $0x12348d4a         // lea    rsi, [rdx + r10]
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	JBE  LBB3_431
	LONG $0x11348d4a         // lea    rsi, [rcx + r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	JBE  LBB3_431

LBB3_105:
	WORD $0xf631 // xor    esi, esi

LBB3_434:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	LONG $0x01c2f641         // test    r10b, 1
	JE   LBB3_436
	LONG $0x323cbe0f         // movsx    edi, byte [rdx + rsi]
	WORD $0x8941; BYTE $0xf9 // mov    r9d, edi
	LONG $0x07f9c141         // sar    r9d, 7
	WORD $0x0144; BYTE $0xcf // add    edi, r9d
	WORD $0x3144; BYTE $0xcf // xor    edi, r9d
	LONG $0x313c8840         // mov    byte [rcx + rsi], dil
	LONG $0x01ce8348         // or    rsi, 1

LBB3_436:
	WORD $0x014d; BYTE $0xd0 // add    r8, r10
	JE   LBB3_865

LBB3_437:
	LONG $0x323cbe0f             // movsx    edi, byte [rdx + rsi]
	WORD $0xf889                 // mov    eax, edi
	WORD $0xf8c1; BYTE $0x07     // sar    eax, 7
	WORD $0xc701                 // add    edi, eax
	WORD $0xc731                 // xor    edi, eax
	LONG $0x313c8840             // mov    byte [rcx + rsi], dil
	LONG $0x3244be0f; BYTE $0x01 // movsx    eax, byte [rdx + rsi + 1]
	WORD $0xc789                 // mov    edi, eax
	WORD $0xffc1; BYTE $0x07     // sar    edi, 7
	WORD $0xf801                 // add    eax, edi
	WORD $0xf831                 // xor    eax, edi
	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
	LONG $0x02c68348             // add    rsi, 2
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB3_437
	JMP  LBB3_865

LBB3_106:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB3_206
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JAE  LBB3_265
	WORD $0xd231             // xor    edx, edx
	JMP  LBB3_444

LBB3_111:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB3_209
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_115
	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_445
	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_445

LBB3_115:
	WORD $0xf631 // xor    esi, esi

LBB3_692:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_694

LBB3_693:
	WORD $0xc031     // xor    eax, eax
	LONG $0xf2042b48 // sub    rax, qword [rdx + 8*rsi]
	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB3_693

LBB3_694:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_695:
	WORD $0xc031                 // xor    eax, eax
	LONG $0xf2042b48             // sub    rax, qword [rdx + 8*rsi]
	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
	WORD $0xc031                 // xor    eax, eax
	LONG $0xf2442b48; BYTE $0x08 // sub    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
	WORD $0xc031                 // xor    eax, eax
	LONG $0xf2442b48; BYTE $0x10 // sub    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
	WORD $0xc031                 // xor    eax, eax
	LONG $0xf2442b48; BYTE $0x18 // sub    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_695
	JMP  LBB3_865

LBB3_116:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB3_212
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_120
	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_448
	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_448

LBB3_120:
	WORD $0xf631 // xor    esi, esi

LBB3_451:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
	LONG $0x03e08348         // and    rax, 3
	JE   LBB3_453

LBB3_452:
	WORD $0xff31                 // xor    edi, edi
	LONG $0xf23c8348; BYTE $0x00 // cmp    qword [rdx + 8*rsi], 0
	LONG $0xd7950f40             // setne    dil
	LONG $0xf13c8948             // mov    qword [rcx + 8*rsi], rdi
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB3_452

LBB3_453:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_454:
	WORD $0xc031                   // xor    eax, eax
	LONG $0xf23c8348; BYTE $0x00   // cmp    qword [rdx + 8*rsi], 0
	WORD $0x950f; BYTE $0xd0       // setne    al
	LONG $0xf1048948               // mov    qword [rcx + 8*rsi], rax
	WORD $0xc031                   // xor    eax, eax
	LONG $0xf27c8348; WORD $0x0008 // cmp    qword [rdx + 8*rsi + 8], 0
	WORD $0x950f; BYTE $0xd0       // setne    al
	LONG $0xf1448948; BYTE $0x08   // mov    qword [rcx + 8*rsi + 8], rax
	WORD $0xc031                   // xor    eax, eax
	LONG $0xf27c8348; WORD $0x0010 // cmp    qword [rdx + 8*rsi + 16], 0
	WORD $0x950f; BYTE $0xd0       // setne    al
	LONG $0xf1448948; BYTE $0x10   // mov    qword [rcx + 8*rsi + 16], rax
	WORD $0xc031                   // xor    eax, eax
	LONG $0xf27c8348; WORD $0x0018 // cmp    qword [rdx + 8*rsi + 24], 0
	WORD $0x950f; BYTE $0xd0       // setne    al
	LONG $0xf1448948; BYTE $0x18   // mov    qword [rcx + 8*rsi + 24], rax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
	JNE  LBB3_454
	JMP  LBB3_865

LBB3_121:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB3_215
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_125
	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_455
	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_455

LBB3_125:
	WORD $0xf631 // xor    esi, esi

LBB3_700:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_702

LBB3_701:
	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB3_701

LBB3_702:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_703:
	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_703
	JMP  LBB3_865

LBB3_126:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB3_218
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB3_865
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_130
	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_458
	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_458

LBB3_130:
	WORD $0xf631 // xor    esi, esi

LBB3_708:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_710

LBB3_709:
	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB3_709

LBB3_710:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_711:
	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_711
	JMP  LBB3_865

LBB3_131:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x40f88341         // cmp    r8d, 64
	JAE  LBB3_279
	WORD $0xd231             // xor    edx, edx
	JMP  LBB3_467

LBB3_134:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_136
	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_468
	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_468

LBB3_136:
	WORD $0xf631 // xor    esi, esi

LBB3_716:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
	LONG $0x03e08348         // and    rax, 3
	JE   LBB3_718

LBB3_717:
	WORD $0xff31     // xor    edi, edi
	LONG $0x723c2b66 // sub    di, word [rdx + 2*rsi]
	LONG $0x713c8966 // mov    word [rcx + 2*rsi], di
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc08348 // add    rax, -1
	JNE  LBB3_717

LBB3_718:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_719:
	WORD $0xc031                 // xor    eax, eax
	LONG $0x72042b66             // sub    ax, word [rdx + 2*rsi]
	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
	WORD $0xc031                 // xor    eax, eax
	LONG $0x72442b66; BYTE $0x02 // sub    ax, word [rdx + 2*rsi + 2]
	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
	WORD $0xc031                 // xor    eax, eax
	LONG $0x72442b66; BYTE $0x04 // sub    ax, word [rdx + 2*rsi + 4]
	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
	WORD $0xc031                 // xor    eax, eax
	LONG $0x72442b66; BYTE $0x06 // sub    ax, word [rdx + 2*rsi + 6]
	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_719
	JMP  LBB3_865

LBB3_137:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_139
	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_471
	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_471

LBB3_139:
	WORD $0xf631 // xor    esi, esi

LBB3_724:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
	LONG $0x03e08348         // and    rax, 3
	JE   LBB3_726

LBB3_725:
	WORD $0xff31     // xor    edi, edi
	LONG $0x723c2b66 // sub    di, word [rdx + 2*rsi]
	LONG $0x713c8966 // mov    word [rcx + 2*rsi], di
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc08348 // add    rax, -1
	JNE  LBB3_725

LBB3_726:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_727:
	WORD $0xc031                 // xor    eax, eax
	LONG $0x72042b66             // sub    ax, word [rdx + 2*rsi]
	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
	WORD $0xc031                 // xor    eax, eax
	LONG $0x72442b66; BYTE $0x02 // sub    ax, word [rdx + 2*rsi + 2]
	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
	WORD $0xc031                 // xor    eax, eax
	LONG $0x72442b66; BYTE $0x04 // sub    ax, word [rdx + 2*rsi + 4]
	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
	WORD $0xc031                 // xor    eax, eax
	LONG $0x72442b66; BYTE $0x06 // sub    ax, word [rdx + 2*rsi + 6]
	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_727
	JMP  LBB3_865

LBB3_140:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_142
	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_474
	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_474

LBB3_142:
	WORD $0xf631 // xor    esi, esi

LBB3_732:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
	LONG $0x03e08348         // and    rax, 3
	JE   LBB3_734

LBB3_733:
	WORD $0xff31     // xor    edi, edi
	LONG $0x723c2b66 // sub    di, word [rdx + 2*rsi]
	LONG $0x713c8966 // mov    word [rcx + 2*rsi], di
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc08348 // add    rax, -1
	JNE  LBB3_733

LBB3_734:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_735:
	WORD $0xc031                 // xor    eax, eax
	LONG $0x72042b66             // sub    ax, word [rdx + 2*rsi]
	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
	WORD $0xc031                 // xor    eax, eax
	LONG $0x72442b66; BYTE $0x02 // sub    ax, word [rdx + 2*rsi + 2]
	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
	WORD $0xc031                 // xor    eax, eax
	LONG $0x72442b66; BYTE $0x04 // sub    ax, word [rdx + 2*rsi + 4]
	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
	WORD $0xc031                 // xor    eax, eax
	LONG $0x72442b66; BYTE $0x06 // sub    ax, word [rdx + 2*rsi + 6]
	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_735
	JMP  LBB3_865

LBB3_143:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_145
	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_477
	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_477

LBB3_145:
	WORD $0xf631 // xor    esi, esi

LBB3_740:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
	LONG $0x03e08348         // and    rax, 3
	JE   LBB3_742

LBB3_741:
	WORD $0xff31                 // xor    edi, edi
	LONG $0x723c8366; BYTE $0x00 // cmp    word [rdx + 2*rsi], 0
	LONG $0xd7950f40             // setne    dil
	LONG $0x713c8966             // mov    word [rcx + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc08348             // add    rax, -1
	JNE  LBB3_741

LBB3_742:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_743:
	WORD $0xc031                   // xor    eax, eax
	LONG $0x723c8366; BYTE $0x00   // cmp    word [rdx + 2*rsi], 0
	WORD $0x950f; BYTE $0xd0       // setne    al
	LONG $0x71048966               // mov    word [rcx + 2*rsi], ax
	WORD $0xc031                   // xor    eax, eax
	LONG $0x727c8366; WORD $0x0002 // cmp    word [rdx + 2*rsi + 2], 0
	WORD $0x950f; BYTE $0xd0       // setne    al
	LONG $0x71448966; BYTE $0x02   // mov    word [rcx + 2*rsi + 2], ax
	WORD $0xc031                   // xor    eax, eax
	LONG $0x727c8366; WORD $0x0004 // cmp    word [rdx + 2*rsi + 4], 0
	WORD $0x950f; BYTE $0xd0       // setne    al
	LONG $0x71448966; BYTE $0x04   // mov    word [rcx + 2*rsi + 4], ax
	WORD $0xc031                   // xor    eax, eax
	LONG $0x727c8366; WORD $0x0006 // cmp    word [rdx + 2*rsi + 6], 0
	WORD $0x950f; BYTE $0xd0       // setne    al
	LONG $0x71448966; BYTE $0x06   // mov    word [rcx + 2*rsi + 6], ax
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
	JNE  LBB3_743
	JMP  LBB3_865

LBB3_146:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_148
	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_480
	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_480

LBB3_148:
	WORD $0xf631 // xor    esi, esi

LBB3_748:
	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
	WORD $0xf748; BYTE $0xd0     // not    rax
	LONG $0x01c1f641             // test    r9b, 1
	JE   LBB3_750
	LONG $0x04b70f44; BYTE $0x72 // movzx    r8d, word [rdx + 2*rsi]
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	LONG $0xc0854566             // test    r8w, r8w
	LONG $0xd2950f41             // setne    r10b
	WORD $0xf741; BYTE $0xda     // neg    r10d
	LONG $0xc0854566             // test    r8w, r8w
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xfa4e0f41             // cmovle    edi, r10d
	LONG $0x713c8966             // mov    word [rcx + 2*rsi], di
	LONG $0x01ce8348             // or    rsi, 1

LBB3_750:
	WORD $0x014c; BYTE $0xc8       // add    rax, r9
	JE   LBB3_865
	LONG $0x0001b841; WORD $0x0000 // mov    r8d, 1

LBB3_752:
	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
	WORD $0xc031                 // xor    eax, eax
	WORD $0x8566; BYTE $0xff     // test    di, di
	WORD $0x950f; BYTE $0xd0     // setne    al
	WORD $0xd8f7                 // neg    eax
	WORD $0x8566; BYTE $0xff     // test    di, di
	LONG $0xc04f0f41             // cmovg    eax, r8d
	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
	LONG $0x7244b70f; BYTE $0x02 // movzx    eax, word [rdx + 2*rsi + 2]
	WORD $0xff31                 // xor    edi, edi
	WORD $0x8566; BYTE $0xc0     // test    ax, ax
	LONG $0xd7950f40             // setne    dil
	WORD $0xdff7                 // neg    edi
	WORD $0x8566; BYTE $0xc0     // test    ax, ax
	LONG $0xf84f0f41             // cmovg    edi, r8d
	LONG $0x717c8966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], di
	LONG $0x02c68348             // add    rsi, 2
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_752
	JMP  LBB3_865

LBB3_149:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_151
	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_483
	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_483

LBB3_151:
	WORD $0xf631 // xor    esi, esi

LBB3_598:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_600

LBB3_599:
	LONG $0x7204b70f // movzx    eax, word [rdx + 2*rsi]
	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB3_599

LBB3_600:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_601:
	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
	LONG $0x7244b70f; BYTE $0x02 // movzx    eax, word [rdx + 2*rsi + 2]
	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
	LONG $0x7244b70f; BYTE $0x04 // movzx    eax, word [rdx + 2*rsi + 4]
	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
	LONG $0x7244b70f; BYTE $0x06 // movzx    eax, word [rdx + 2*rsi + 6]
	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_601
	JMP  LBB3_865

LBB3_152:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_154
	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_485
	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_485

LBB3_154:
	WORD $0xf631 // xor    esi, esi

LBB3_757:
	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
	WORD $0xf748; BYTE $0xd0 // not    rax
	LONG $0x01c1f641         // test    r9b, 1
	JE   LBB3_759
	LONG $0x723cbf0f         // movsx    edi, word [rdx + 2*rsi]
	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
	LONG $0x0ff8c141         // sar    r8d, 15
	WORD $0x0144; BYTE $0xc7 // add    edi, r8d
	WORD $0x3144; BYTE $0xc7 // xor    edi, r8d
	LONG $0x713c8966         // mov    word [rcx + 2*rsi], di
	LONG $0x01ce8348         // or    rsi, 1

LBB3_759:
	WORD $0x014c; BYTE $0xc8 // add    rax, r9
	JE   LBB3_865

LBB3_760:
	LONG $0x7204bf0f             // movsx    eax, word [rdx + 2*rsi]
	WORD $0xc789                 // mov    edi, eax
	WORD $0xffc1; BYTE $0x0f     // sar    edi, 15
	WORD $0xf801                 // add    eax, edi
	WORD $0xf831                 // xor    eax, edi
	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
	LONG $0x7244bf0f; BYTE $0x02 // movsx    eax, word [rdx + 2*rsi + 2]
	WORD $0xc789                 // mov    edi, eax
	WORD $0xffc1; BYTE $0x0f     // sar    edi, 15
	WORD $0xf801                 // add    eax, edi
	WORD $0xf831                 // xor    eax, edi
	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
	LONG $0x02c68348             // add    rsi, 2
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_760
	JMP  LBB3_865

LBB3_155:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_157
	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_488
	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_488

LBB3_157:
	WORD $0xf631 // xor    esi, esi

LBB3_608:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_610

LBB3_609:
	LONG $0x7204b70f // movzx    eax, word [rdx + 2*rsi]
	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB3_609

LBB3_610:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_611:
	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
	LONG $0x7244b70f; BYTE $0x02 // movzx    eax, word [rdx + 2*rsi + 2]
	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
	LONG $0x7244b70f; BYTE $0x04 // movzx    eax, word [rdx + 2*rsi + 4]
	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
	LONG $0x7244b70f; BYTE $0x06 // movzx    eax, word [rdx + 2*rsi + 6]
	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_611
	JMP  LBB3_865

LBB3_158:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_160
	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_490
	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_490

LBB3_160:
	WORD $0xf631 // xor    esi, esi

LBB3_765:
	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
	WORD $0xf748; BYTE $0xd0 // not    rax
	LONG $0x01c1f641         // test    r9b, 1
	JE   LBB3_767
	LONG $0x723cbf0f         // movsx    edi, word [rdx + 2*rsi]
	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
	LONG $0x0ff8c141         // sar    r8d, 15
	WORD $0x0144; BYTE $0xc7 // add    edi, r8d
	WORD $0x3144; BYTE $0xc7 // xor    edi, r8d
	LONG $0x713c8966         // mov    word [rcx + 2*rsi], di
	LONG $0x01ce8348         // or    rsi, 1

LBB3_767:
	WORD $0x014c; BYTE $0xc8 // add    rax, r9
	JE   LBB3_865

LBB3_768:
	LONG $0x7204bf0f             // movsx    eax, word [rdx + 2*rsi]
	WORD $0xc789                 // mov    edi, eax
	WORD $0xffc1; BYTE $0x0f     // sar    edi, 15
	WORD $0xf801                 // add    eax, edi
	WORD $0xf831                 // xor    eax, edi
	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
	LONG $0x7244bf0f; BYTE $0x02 // movsx    eax, word [rdx + 2*rsi + 2]
	WORD $0xc789                 // mov    edi, eax
	WORD $0xffc1; BYTE $0x0f     // sar    edi, 15
	WORD $0xf801                 // add    eax, edi
	WORD $0xf831                 // xor    eax, edi
	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
	LONG $0x02c68348             // add    rsi, 2
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_768
	JMP  LBB3_865

LBB3_161:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_163
	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_493
	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_493

LBB3_163:
	WORD $0xf631 // xor    esi, esi

LBB3_773:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_775

LBB3_774:
	WORD $0xc031     // xor    eax, eax
	LONG $0xf2042b48 // sub    rax, qword [rdx + 8*rsi]
	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB3_774

LBB3_775:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_776:
	WORD $0xc031                 // xor    eax, eax
	LONG $0xf2042b48             // sub    rax, qword [rdx + 8*rsi]
	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
	WORD $0xc031                 // xor    eax, eax
	LONG $0xf2442b48; BYTE $0x08 // sub    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
	WORD $0xc031                 // xor    eax, eax
	LONG $0xf2442b48; BYTE $0x10 // sub    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
	WORD $0xc031                 // xor    eax, eax
	LONG $0xf2442b48; BYTE $0x18 // sub    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_776
	JMP  LBB3_865

LBB3_164:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_166
	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_496
	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_496

LBB3_166:
	WORD $0xf631 // xor    esi, esi

LBB3_781:
	WORD $0x8948; BYTE $0xf0       // mov    rax, rsi
	WORD $0xf748; BYTE $0xd0       // not    rax
	WORD $0x014c; BYTE $0xc8       // add    rax, r9
	WORD $0x894c; BYTE $0xcf       // mov    rdi, r9
	LONG $0x03e78348               // and    rdi, 3
	JE   LBB3_784
	LONG $0x1879e2c4; WORD $0x4445 // vbroadcastss    xmm0, dword 68[rbp] /* [rip + .LCPI3_7] */

LBB3_783:
	LONG $0x0c10fac5; BYTE $0xb2 // vmovss    xmm1, dword [rdx + 4*rsi]
	LONG $0xc857f1c5             // vxorpd    xmm1, xmm1, xmm0
	LONG $0x0c11fac5; BYTE $0xb1 // vmovss    dword [rcx + 4*rsi], xmm1
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB3_783

LBB3_784:
	LONG $0x03f88348               // cmp    rax, 3
	JB   LBB3_865
	LONG $0x1879e2c4; WORD $0x4445 // vbroadcastss    xmm0, dword 68[rbp] /* [rip + .LCPI3_7] */

LBB3_786:
	LONG $0x0c10fac5; BYTE $0xb2   // vmovss    xmm1, dword [rdx + 4*rsi]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x0c11fac5; BYTE $0xb1   // vmovss    dword [rcx + 4*rsi], xmm1
	LONG $0x4c10fac5; WORD $0x04b2 // vmovss    xmm1, dword [rdx + 4*rsi + 4]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x4c11fac5; WORD $0x04b1 // vmovss    dword [rcx + 4*rsi + 4], xmm1
	LONG $0x4c10fac5; WORD $0x08b2 // vmovss    xmm1, dword [rdx + 4*rsi + 8]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x4c11fac5; WORD $0x08b1 // vmovss    dword [rcx + 4*rsi + 8], xmm1
	LONG $0x4c10fac5; WORD $0x0cb2 // vmovss    xmm1, dword [rdx + 4*rsi + 12]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x4c11fac5; WORD $0x0cb1 // vmovss    dword [rcx + 4*rsi + 12], xmm1
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
	JNE  LBB3_786
	JMP  LBB3_865

LBB3_167:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_169
	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_499
	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_499

LBB3_169:
	WORD $0xf631 // xor    esi, esi

LBB3_791:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_793

LBB3_792:
	WORD $0xc031     // xor    eax, eax
	LONG $0xf2042b48 // sub    rax, qword [rdx + 8*rsi]
	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB3_792

LBB3_793:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_794:
	WORD $0xc031                 // xor    eax, eax
	LONG $0xf2042b48             // sub    rax, qword [rdx + 8*rsi]
	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
	WORD $0xc031                 // xor    eax, eax
	LONG $0xf2442b48; BYTE $0x08 // sub    rax, qword [rdx + 8*rsi + 8]
	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
	WORD $0xc031                 // xor    eax, eax
	LONG $0xf2442b48; BYTE $0x10 // sub    rax, qword [rdx + 8*rsi + 16]
	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
	WORD $0xc031                 // xor    eax, eax
	LONG $0xf2442b48; BYTE $0x18 // sub    rax, qword [rdx + 8*rsi + 24]
	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_794
	JMP  LBB3_865

LBB3_170:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_172
	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_502
	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_502

LBB3_172:
	WORD $0xf631 // xor    esi, esi

LBB3_799:
	WORD $0x8948; BYTE $0xf0       // mov    rax, rsi
	WORD $0xf748; BYTE $0xd0       // not    rax
	WORD $0x014c; BYTE $0xc8       // add    rax, r9
	WORD $0x894c; BYTE $0xcf       // mov    rdi, r9
	LONG $0x03e78348               // and    rdi, 3
	JE   LBB3_802
	LONG $0x1879e2c4; WORD $0x4445 // vbroadcastss    xmm0, dword 68[rbp] /* [rip + .LCPI3_7] */

LBB3_801:
	LONG $0x0c10fac5; BYTE $0xb2 // vmovss    xmm1, dword [rdx + 4*rsi]
	LONG $0xc857f1c5             // vxorpd    xmm1, xmm1, xmm0
	LONG $0x0c11fac5; BYTE $0xb1 // vmovss    dword [rcx + 4*rsi], xmm1
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB3_801

LBB3_802:
	LONG $0x03f88348               // cmp    rax, 3
	JB   LBB3_865
	LONG $0x1879e2c4; WORD $0x4445 // vbroadcastss    xmm0, dword 68[rbp] /* [rip + .LCPI3_7] */

LBB3_804:
	LONG $0x0c10fac5; BYTE $0xb2   // vmovss    xmm1, dword [rdx + 4*rsi]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x0c11fac5; BYTE $0xb1   // vmovss    dword [rcx + 4*rsi], xmm1
	LONG $0x4c10fac5; WORD $0x04b2 // vmovss    xmm1, dword [rdx + 4*rsi + 4]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x4c11fac5; WORD $0x04b1 // vmovss    dword [rcx + 4*rsi + 4], xmm1
	LONG $0x4c10fac5; WORD $0x08b2 // vmovss    xmm1, dword [rdx + 4*rsi + 8]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x4c11fac5; WORD $0x08b1 // vmovss    dword [rcx + 4*rsi + 8], xmm1
	LONG $0x4c10fac5; WORD $0x0cb2 // vmovss    xmm1, dword [rdx + 4*rsi + 12]
	LONG $0xc857f1c5               // vxorpd    xmm1, xmm1, xmm0
	LONG $0x4c11fac5; WORD $0x0cb1 // vmovss    dword [rcx + 4*rsi + 12], xmm1
	LONG $0x04c68348               // add    rsi, 4
	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
	JNE  LBB3_804
	JMP  LBB3_865

LBB3_173:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc3 // mov    r11d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_175
	LONG $0xda348d4a         // lea    rsi, [rdx + 8*r11]
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	JBE  LBB3_505
	LONG $0xd9348d4a         // lea    rsi, [rcx + 8*r11]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	JBE  LBB3_505

LBB3_175:
	WORD $0xf631 // xor    esi, esi

LBB3_508:
	WORD $0x8949; BYTE $0xf2     // mov    r10, rsi
	WORD $0xf749; BYTE $0xd2     // not    r10
	LONG $0x01c3f641             // test    r11b, 1
	JE   LBB3_510
	LONG $0xf2048b4c             // mov    r8, qword [rdx + 8*rsi]
	WORD $0x3145; BYTE $0xc9     // xor    r9d, r9d
	WORD $0x854d; BYTE $0xc0     // test    r8, r8
	LONG $0xd1950f41             // setne    r9b
	WORD $0xf749; BYTE $0xd9     // neg    r9
	WORD $0x854d; BYTE $0xc0     // test    r8, r8
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xf94e0f49             // cmovle    rdi, r9
	LONG $0xf13c8948             // mov    qword [rcx + 8*rsi], rdi
	LONG $0x01ce8348             // or    rsi, 1

LBB3_510:
	WORD $0x014d; BYTE $0xda       // add    r10, r11
	JE   LBB3_865
	LONG $0x0001b841; WORD $0x0000 // mov    r8d, 1

LBB3_512:
	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
	WORD $0xff31                 // xor    edi, edi
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	LONG $0xd7950f40             // setne    dil
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	LONG $0xf84f0f49             // cmovg    rdi, r8
	LONG $0xf13c8948             // mov    qword [rcx + 8*rsi], rdi
	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
	WORD $0xff31                 // xor    edi, edi
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	LONG $0xd7950f40             // setne    dil
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	LONG $0xf84f0f49             // cmovg    rdi, r8
	LONG $0xf17c8948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rdi
	LONG $0x02c68348             // add    rsi, 2
	WORD $0x3949; BYTE $0xf3     // cmp    r11, rsi
	JNE  LBB3_512
	JMP  LBB3_865

LBB3_176:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8944; BYTE $0xc0 // mov    eax, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_178
	LONG $0x82348d48         // lea    rsi, [rdx + 4*rax]
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	JBE  LBB3_513
	LONG $0x81348d48         // lea    rsi, [rcx + 4*rax]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	JBE  LBB3_513

LBB3_178:
	WORD $0xf631 // xor    esi, esi

LBB3_516:
	WORD $0x8949; BYTE $0xf0     // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0     // not    r8
	WORD $0x01a8                 // test    al, 1
	JE   LBB3_518
	LONG $0x0410fac5; BYTE $0xb2 // vmovss    xmm0, dword [rdx + 4*rsi]
	LONG $0xf850f8c5             // vmovmskps    edi, xmm0
	WORD $0xe783; BYTE $0x01     // and    edi, 1
	WORD $0xdff7                 // neg    edi
	WORD $0xcf83; BYTE $0x01     // or    edi, 1
	LONG $0xcf2aaac5             // vcvtsi2ss    xmm1, xmm10, edi
	LONG $0xd257e8c5             // vxorps    xmm2, xmm2, xmm2
	LONG $0xc2c2fac5; BYTE $0x00 // vcmpeqss    xmm0, xmm0, xmm2
	LONG $0xc155f8c5             // vandnps    xmm0, xmm0, xmm1
	LONG $0x0411fac5; BYTE $0xb1 // vmovss    dword [rcx + 4*rsi], xmm0
	LONG $0x01ce8348             // or    rsi, 1

LBB3_518:
	WORD $0x0149; BYTE $0xc0 // add    r8, rax
	JE   LBB3_865
	LONG $0xc057f8c5         // vxorps    xmm0, xmm0, xmm0

LBB3_520:
	LONG $0x0c10fac5; BYTE $0xb2   // vmovss    xmm1, dword [rdx + 4*rsi]
	LONG $0xf950f8c5               // vmovmskps    edi, xmm1
	WORD $0xe783; BYTE $0x01       // and    edi, 1
	WORD $0xdff7                   // neg    edi
	WORD $0xcf83; BYTE $0x01       // or    edi, 1
	LONG $0xd72aaac5               // vcvtsi2ss    xmm2, xmm10, edi
	LONG $0xc8c2f2c5; BYTE $0x00   // vcmpeqss    xmm1, xmm1, xmm0
	LONG $0xca55f0c5               // vandnps    xmm1, xmm1, xmm2
	LONG $0x0c11fac5; BYTE $0xb1   // vmovss    dword [rcx + 4*rsi], xmm1
	LONG $0x4c10fac5; WORD $0x04b2 // vmovss    xmm1, dword [rdx + 4*rsi + 4]
	LONG $0xf950f8c5               // vmovmskps    edi, xmm1
	WORD $0xe783; BYTE $0x01       // and    edi, 1
	WORD $0xdff7                   // neg    edi
	WORD $0xcf83; BYTE $0x01       // or    edi, 1
	LONG $0xd72aaac5               // vcvtsi2ss    xmm2, xmm10, edi
	LONG $0xc8c2f2c5; BYTE $0x00   // vcmpeqss    xmm1, xmm1, xmm0
	LONG $0xca55f0c5               // vandnps    xmm1, xmm1, xmm2
	LONG $0x4c11fac5; WORD $0x04b1 // vmovss    dword [rcx + 4*rsi + 4], xmm1
	LONG $0x02c68348               // add    rsi, 2
	WORD $0x3948; BYTE $0xf0       // cmp    rax, rsi
	JNE  LBB3_520
	JMP  LBB3_865

LBB3_179:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc2 // mov    r10d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_181
	LONG $0xd2348d4a         // lea    rsi, [rdx + 8*r10]
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	JBE  LBB3_521
	LONG $0xd1348d4a         // lea    rsi, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	JBE  LBB3_521

LBB3_181:
	WORD $0xf631 // xor    esi, esi

LBB3_524:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	LONG $0x01c2f641         // test    r10b, 1
	JE   LBB3_526
	LONG $0xf2048b4c         // mov    r8, qword [rdx + 8*rsi]
	WORD $0x894c; BYTE $0xc7 // mov    rdi, r8
	WORD $0xf748; BYTE $0xdf // neg    rdi
	LONG $0xf84c0f49         // cmovl    rdi, r8
	LONG $0xf13c8948         // mov    qword [rcx + 8*rsi], rdi
	LONG $0x01ce8348         // or    rsi, 1

LBB3_526:
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	JE   LBB3_865

LBB3_527:
	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
	WORD $0x8948; BYTE $0xc7     // mov    rdi, rax
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	LONG $0xf84c0f48             // cmovl    rdi, rax
	LONG $0xf13c8948             // mov    qword [rcx + 8*rsi], rdi
	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
	WORD $0x8948; BYTE $0xc7     // mov    rdi, rax
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	LONG $0xf84c0f48             // cmovl    rdi, rax
	LONG $0xf17c8948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rdi
	LONG $0x02c68348             // add    rsi, 2
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB3_527
	JMP  LBB3_865

LBB3_182:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_184
	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_528
	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_528

LBB3_184:
	WORD $0xf631 // xor    esi, esi

LBB3_809:
	WORD $0x8949; BYTE $0xf0       // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0       // not    r8
	WORD $0x014d; BYTE $0xc8       // add    r8, r9
	WORD $0x894c; BYTE $0xcf       // mov    rdi, r9
	LONG $0x03e78348               // and    rdi, 3
	JE   LBB3_812
	LONG $0xffffba41; WORD $0x7fff // mov    r10d, 2147483647

LBB3_811:
	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
	WORD $0x2144; BYTE $0xd0 // and    eax, r10d
	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB3_811

LBB3_812:
	LONG $0x03f88349             // cmp    r8, 3
	JB   LBB3_865
	LONG $0xffffffb8; BYTE $0x7f // mov    eax, 2147483647

LBB3_814:
	WORD $0x3c8b; BYTE $0xb2 // mov    edi, dword [rdx + 4*rsi]
	WORD $0xc721             // and    edi, eax
	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
	LONG $0x04b27c8b         // mov    edi, dword [rdx + 4*rsi + 4]
	WORD $0xc721             // and    edi, eax
	LONG $0x04b17c89         // mov    dword [rcx + 4*rsi + 4], edi
	LONG $0x08b27c8b         // mov    edi, dword [rdx + 4*rsi + 8]
	WORD $0xc721             // and    edi, eax
	LONG $0x08b17c89         // mov    dword [rcx + 4*rsi + 8], edi
	LONG $0x0cb27c8b         // mov    edi, dword [rdx + 4*rsi + 12]
	WORD $0xc721             // and    edi, eax
	LONG $0x0cb17c89         // mov    dword [rcx + 4*rsi + 12], edi
	LONG $0x04c68348         // add    rsi, 4
	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
	JNE  LBB3_814
	JMP  LBB3_865

LBB3_185:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc2 // mov    r10d, r8d
	LONG $0x10f88341         // cmp    r8d, 16
	JB   LBB3_187
	LONG $0xd2348d4a         // lea    rsi, [rdx + 8*r10]
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	JBE  LBB3_531
	LONG $0xd1348d4a         // lea    rsi, [rcx + 8*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	JBE  LBB3_531

LBB3_187:
	WORD $0xf631 // xor    esi, esi

LBB3_534:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	LONG $0x01c2f641         // test    r10b, 1
	JE   LBB3_536
	LONG $0xf2048b4c         // mov    r8, qword [rdx + 8*rsi]
	WORD $0x894c; BYTE $0xc7 // mov    rdi, r8
	WORD $0xf748; BYTE $0xdf // neg    rdi
	LONG $0xf84c0f49         // cmovl    rdi, r8
	LONG $0xf13c8948         // mov    qword [rcx + 8*rsi], rdi
	LONG $0x01ce8348         // or    rsi, 1

LBB3_536:
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	JE   LBB3_865

LBB3_537:
	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
	WORD $0x8948; BYTE $0xc7     // mov    rdi, rax
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	LONG $0xf84c0f48             // cmovl    rdi, rax
	LONG $0xf13c8948             // mov    qword [rcx + 8*rsi], rdi
	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
	WORD $0x8948; BYTE $0xc7     // mov    rdi, rax
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	LONG $0xf84c0f48             // cmovl    rdi, rax
	LONG $0xf17c8948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rdi
	LONG $0x02c68348             // add    rsi, 2
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB3_537
	JMP  LBB3_865

LBB3_188:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_190
	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_538
	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_538

LBB3_190:
	WORD $0xf631 // xor    esi, esi

LBB3_819:
	WORD $0x8949; BYTE $0xf0       // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0       // not    r8
	WORD $0x014d; BYTE $0xc8       // add    r8, r9
	WORD $0x894c; BYTE $0xcf       // mov    rdi, r9
	LONG $0x03e78348               // and    rdi, 3
	JE   LBB3_822
	LONG $0xffffba41; WORD $0x7fff // mov    r10d, 2147483647

LBB3_821:
	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
	WORD $0x2144; BYTE $0xd0 // and    eax, r10d
	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB3_821

LBB3_822:
	LONG $0x03f88349             // cmp    r8, 3
	JB   LBB3_865
	LONG $0xffffffb8; BYTE $0x7f // mov    eax, 2147483647

LBB3_824:
	WORD $0x3c8b; BYTE $0xb2 // mov    edi, dword [rdx + 4*rsi]
	WORD $0xc721             // and    edi, eax
	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
	LONG $0x04b27c8b         // mov    edi, dword [rdx + 4*rsi + 4]
	WORD $0xc721             // and    edi, eax
	LONG $0x04b17c89         // mov    dword [rcx + 4*rsi + 4], edi
	LONG $0x08b27c8b         // mov    edi, dword [rdx + 4*rsi + 8]
	WORD $0xc721             // and    edi, eax
	LONG $0x08b17c89         // mov    dword [rcx + 4*rsi + 8], edi
	LONG $0x0cb27c8b         // mov    edi, dword [rdx + 4*rsi + 12]
	WORD $0xc721             // and    edi, eax
	LONG $0x0cb17c89         // mov    dword [rcx + 4*rsi + 12], edi
	LONG $0x04c68348         // add    rsi, 4
	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
	JNE  LBB3_824
	JMP  LBB3_865

LBB3_191:
	WORD $0x8545; BYTE $0xc0                   // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1                   // mov    r9d, r8d
	LONG $0x80f88141; WORD $0x0000; BYTE $0x00 // cmp    r8d, 128
	JAE  LBB3_338
	WORD $0xd231                               // xor    edx, edx
	JMP  LBB3_547

LBB3_194:
	WORD $0x8545; BYTE $0xc0                   // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1                   // mov    r9d, r8d
	LONG $0x80f88141; WORD $0x0000; BYTE $0x00 // cmp    r8d, 128
	JB   LBB3_196
	LONG $0x0a048d4a                           // lea    rax, [rdx + r9]
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JBE  LBB3_548
	LONG $0x09048d4a                           // lea    rax, [rcx + r9]
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JBE  LBB3_548

LBB3_196:
	WORD $0xf631 // xor    esi, esi

LBB3_829:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_831

LBB3_830:
	LONG $0x14b60f44; BYTE $0x32 // movzx    r10d, byte [rdx + rsi]
	WORD $0xc031                 // xor    eax, eax
	WORD $0x2844; BYTE $0xd0     // sub    al, r10b
	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
	LONG $0x01c68348             // add    rsi, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB3_830

LBB3_831:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_832:
	WORD $0xc031                 // xor    eax, eax
	WORD $0x042a; BYTE $0x32     // sub    al, byte [rdx + rsi]
	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
	WORD $0xc031                 // xor    eax, eax
	LONG $0x0132442a             // sub    al, byte [rdx + rsi + 1]
	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
	WORD $0xc031                 // xor    eax, eax
	LONG $0x0232442a             // sub    al, byte [rdx + rsi + 2]
	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
	WORD $0xff31                 // xor    edi, edi
	WORD $0x2840; BYTE $0xc7     // sub    dil, al
	LONG $0x317c8840; BYTE $0x03 // mov    byte [rcx + rsi + 3], dil
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_832
	JMP  LBB3_865

LBB3_197:
	WORD $0x8545; BYTE $0xc0                   // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1                   // mov    r9d, r8d
	LONG $0x80f88141; WORD $0x0000; BYTE $0x00 // cmp    r8d, 128
	JB   LBB3_199
	LONG $0x0a048d4a                           // lea    rax, [rdx + r9]
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JBE  LBB3_551
	LONG $0x09048d4a                           // lea    rax, [rcx + r9]
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JBE  LBB3_551

LBB3_199:
	WORD $0xf631 // xor    esi, esi

LBB3_554:
	WORD $0x8948; BYTE $0xf7 // mov    rdi, rsi
	WORD $0xf748; BYTE $0xd7 // not    rdi
	WORD $0x014c; BYTE $0xcf // add    rdi, r9
	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
	LONG $0x03e08348         // and    rax, 3
	JE   LBB3_556

LBB3_555:
	LONG $0x00323c80 // cmp    byte [rdx + rsi], 0
	LONG $0x3114950f // setne    byte [rcx + rsi]
	LONG $0x01c68348 // add    rsi, 1
	LONG $0xffc08348 // add    rax, -1
	JNE  LBB3_555

LBB3_556:
	LONG $0x03ff8348 // cmp    rdi, 3
	JB   LBB3_865

LBB3_557:
	LONG $0x00323c80             // cmp    byte [rdx + rsi], 0
	LONG $0x3114950f             // setne    byte [rcx + rsi]
	LONG $0x01327c80; BYTE $0x00 // cmp    byte [rdx + rsi + 1], 0
	LONG $0x3154950f; BYTE $0x01 // setne    byte [rcx + rsi + 1]
	LONG $0x02327c80; BYTE $0x00 // cmp    byte [rdx + rsi + 2], 0
	LONG $0x3154950f; BYTE $0x02 // setne    byte [rcx + rsi + 2]
	LONG $0x03327c80; BYTE $0x00 // cmp    byte [rdx + rsi + 3], 0
	LONG $0x3154950f; BYTE $0x03 // setne    byte [rcx + rsi + 3]
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_557
	JMP  LBB3_865

LBB3_200:
	WORD $0x8545; BYTE $0xc0                   // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1                   // mov    r9d, r8d
	LONG $0x80f88141; WORD $0x0000; BYTE $0x00 // cmp    r8d, 128
	JB   LBB3_202
	LONG $0x0a048d4a                           // lea    rax, [rdx + r9]
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JBE  LBB3_558
	LONG $0x09048d4a                           // lea    rax, [rcx + r9]
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JBE  LBB3_558

LBB3_202:
	WORD $0xf631 // xor    esi, esi

LBB3_837:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_839

LBB3_838:
	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB3_838

LBB3_839:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_840:
	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_840
	JMP  LBB3_865

LBB3_203:
	WORD $0x8545; BYTE $0xc0                   // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1                   // mov    r9d, r8d
	LONG $0x80f88141; WORD $0x0000; BYTE $0x00 // cmp    r8d, 128
	JB   LBB3_205
	LONG $0x0a048d4a                           // lea    rax, [rdx + r9]
	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
	JBE  LBB3_561
	LONG $0x09048d4a                           // lea    rax, [rcx + r9]
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JBE  LBB3_561

LBB3_205:
	WORD $0xf631 // xor    esi, esi

LBB3_845:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_847

LBB3_846:
	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB3_846

LBB3_847:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_848:
	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
	LONG $0x04c68348             // add    rsi, 4
	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
	JNE  LBB3_848
	JMP  LBB3_865

LBB3_206:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_208
	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_564
	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_564

LBB3_208:
	WORD $0xf631 // xor    esi, esi

LBB3_853:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_855

LBB3_854:
	WORD $0xc031             // xor    eax, eax
	WORD $0x042b; BYTE $0xb2 // sub    eax, dword [rdx + 4*rsi]
	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB3_854

LBB3_855:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_856:
	WORD $0xc031             // xor    eax, eax
	WORD $0x042b; BYTE $0xb2 // sub    eax, dword [rdx + 4*rsi]
	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
	WORD $0xc031             // xor    eax, eax
	LONG $0x04b2442b         // sub    eax, dword [rdx + 4*rsi + 4]
	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
	WORD $0xc031             // xor    eax, eax
	LONG $0x08b2442b         // sub    eax, dword [rdx + 4*rsi + 8]
	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
	WORD $0xc031             // xor    eax, eax
	LONG $0x0cb2442b         // sub    eax, dword [rdx + 4*rsi + 12]
	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
	LONG $0x04c68348         // add    rsi, 4
	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
	JNE  LBB3_856
	JMP  LBB3_865

LBB3_209:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_211
	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
	JBE  LBB3_567
	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JBE  LBB3_567

LBB3_211:
	WORD $0xf631 // xor    esi, esi

LBB3_861:
	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB3_863

LBB3_862:
	WORD $0xc031             // xor    eax, eax
	WORD $0x042b; BYTE $0xb2 // sub    eax, dword [rdx + 4*rsi]
	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
	LONG $0x01c68348         // add    rsi, 1
	LONG $0xffc78348         // add    rdi, -1
	JNE  LBB3_862

LBB3_863:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_865

LBB3_864:
	WORD $0xc031             // xor    eax, eax
	WORD $0x042b; BYTE $0xb2 // sub    eax, dword [rdx + 4*rsi]
	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
	WORD $0xc031             // xor    eax, eax
	LONG $0x04b2442b         // sub    eax, dword [rdx + 4*rsi + 4]
	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
	WORD $0xc031             // xor    eax, eax
	LONG $0x08b2442b         // sub    eax, dword [rdx + 4*rsi + 8]
	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
	WORD $0xc031             // xor    eax, eax
	LONG $0x0cb2442b         // sub    eax, dword [rdx + 4*rsi + 12]
	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
	LONG $0x04c68348         // add    rsi, 4
	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
	JNE  LBB3_864
	JMP  LBB3_865

LBB3_212:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc3 // mov    r11d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_214
	LONG $0x9a348d4a         // lea    rsi, [rdx + 4*r11]
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	JBE  LBB3_570
	LONG $0x99348d4a         // lea    rsi, [rcx + 4*r11]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	JBE  LBB3_570

LBB3_214:
	WORD $0xf631 // xor    esi, esi

LBB3_573:
	WORD $0x8949; BYTE $0xf2     // mov    r10, rsi
	WORD $0xf749; BYTE $0xd2     // not    r10
	LONG $0x01c3f641             // test    r11b, 1
	JE   LBB3_575
	LONG $0xb2048b44             // mov    r8d, dword [rdx + 4*rsi]
	WORD $0x3145; BYTE $0xc9     // xor    r9d, r9d
	WORD $0x8545; BYTE $0xc0     // test    r8d, r8d
	LONG $0xd1950f41             // setne    r9b
	WORD $0xf741; BYTE $0xd9     // neg    r9d
	WORD $0x8545; BYTE $0xc0     // test    r8d, r8d
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xf94e0f41             // cmovle    edi, r9d
	WORD $0x3c89; BYTE $0xb1     // mov    dword [rcx + 4*rsi], edi
	LONG $0x01ce8348             // or    rsi, 1

LBB3_575:
	WORD $0x014d; BYTE $0xda       // add    r10, r11
	JE   LBB3_865
	LONG $0x0001b841; WORD $0x0000 // mov    r8d, 1

LBB3_577:
	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
	WORD $0xff31             // xor    edi, edi
	WORD $0xc085             // test    eax, eax
	LONG $0xd7950f40         // setne    dil
	WORD $0xdff7             // neg    edi
	WORD $0xc085             // test    eax, eax
	LONG $0xf84f0f41         // cmovg    edi, r8d
	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
	WORD $0xff31             // xor    edi, edi
	WORD $0xc085             // test    eax, eax
	LONG $0xd7950f40         // setne    dil
	WORD $0xdff7             // neg    edi
	WORD $0xc085             // test    eax, eax
	LONG $0xf84f0f41         // cmovg    edi, r8d
	LONG $0x04b17c89         // mov    dword [rcx + 4*rsi + 4], edi
	LONG $0x02c68348         // add    rsi, 2
	WORD $0x3949; BYTE $0xf3 // cmp    r11, rsi
	JNE  LBB3_577
	JMP  LBB3_865

LBB3_215:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc2 // mov    r10d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_217
	LONG $0x92348d4a         // lea    rsi, [rdx + 4*r10]
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	JBE  LBB3_578
	LONG $0x91348d4a         // lea    rsi, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	JBE  LBB3_578

LBB3_217:
	WORD $0xf631 // xor    esi, esi

LBB3_581:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	LONG $0x01c2f641         // test    r10b, 1
	JE   LBB3_583
	LONG $0xb2048b44         // mov    r8d, dword [rdx + 4*rsi]
	WORD $0x8944; BYTE $0xc7 // mov    edi, r8d
	WORD $0xdff7             // neg    edi
	LONG $0xf84c0f41         // cmovl    edi, r8d
	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
	LONG $0x01ce8348         // or    rsi, 1

LBB3_583:
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	JE   LBB3_865

LBB3_584:
	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
	WORD $0xc789             // mov    edi, eax
	WORD $0xdff7             // neg    edi
	WORD $0x4c0f; BYTE $0xf8 // cmovl    edi, eax
	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
	WORD $0xc789             // mov    edi, eax
	WORD $0xdff7             // neg    edi
	WORD $0x4c0f; BYTE $0xf8 // cmovl    edi, eax
	LONG $0x04b17c89         // mov    dword [rcx + 4*rsi + 4], edi
	LONG $0x02c68348         // add    rsi, 2
	WORD $0x3949; BYTE $0xf2 // cmp    r10, rsi
	JNE  LBB3_584
	JMP  LBB3_865

LBB3_218:
	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
	JLE  LBB3_865
	WORD $0x8945; BYTE $0xc2 // mov    r10d, r8d
	LONG $0x20f88341         // cmp    r8d, 32
	JB   LBB3_220
	LONG $0x92348d4a         // lea    rsi, [rdx + 4*r10]
	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
	JBE  LBB3_585
	LONG $0x91348d4a         // lea    rsi, [rcx + 4*r10]
	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
	JBE  LBB3_585

LBB3_220:
	WORD $0xf631 // xor    esi, esi

LBB3_588:
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	WORD $0xf749; BYTE $0xd1 // not    r9
	LONG $0x01c2f641         // test    r10b, 1
	JE   LBB3_590
	LONG $0xb2048b44         // mov    r8d, dword [rdx + 4*rsi]
	WORD $0x8944; BYTE $0xc7 // mov    edi, r8d
	WORD $0xdff7             // neg    edi
	LONG $0xf84c0f41         // cmovl    edi, r8d
	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
	LONG $0x01ce8348         // or    rsi, 1

LBB3_590:
	WORD $0x014d; BYTE $0xd1 // add    r9, r10
	JE   LBB3_865

LBB3_591:
	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
	WORD $0xc789             // mov    edi, eax
	WORD $0xdff7             // neg    edi
	WORD $0x4c0f; BYTE $0xf8 // cmovl    edi, eax
	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
	WORD $0xc789             // mov    edi, eax
	WORD $0xdff7             // neg    edi
	WORD $0x4c0f; BYTE $0xf8 // cmovl    edi, eax
	LONG $0x04b17c89         // mov    dword [rcx + 4*rsi + 4], edi
	LONG $0x02c68348         // add    rsi, 2
	WORD $0x3949; BYTE $0xf2 // cmp    r10, rsi
	JNE  LBB3_591
	JMP  LBB3_865

LBB3_221:
	WORD $0x8944; BYTE $0xca // mov    edx, r9d
	WORD $0xe283; BYTE $0xe0 // and    edx, -32
	LONG $0xe0428d48         // lea    rax, [rdx - 32]
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x05efc148         // shr    rdi, 5
	LONG $0x01c78348         // add    rdi, 1
	WORD $0xfe89             // mov    esi, edi
	WORD $0xe683; BYTE $0x03 // and    esi, 3
	LONG $0x60f88348         // cmp    rax, 96
	JAE  LBB3_367
	WORD $0xc031             // xor    eax, eax
	JMP  LBB3_369

LBB3_265:
	WORD $0x8944; BYTE $0xca // mov    edx, r9d
	WORD $0xe283; BYTE $0xf0 // and    edx, -16
	LONG $0xf0428d48         // lea    rax, [rdx - 16]
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x04efc148         // shr    rdi, 4
	LONG $0x01c78348         // add    rdi, 1
	WORD $0xfe89             // mov    esi, edi
	WORD $0xe683; BYTE $0x03 // and    esi, 3
	LONG $0x30f88348         // cmp    rax, 48
	JAE  LBB3_438
	WORD $0xc031             // xor    eax, eax
	JMP  LBB3_440

LBB3_279:
	WORD $0x8944; BYTE $0xca       // mov    edx, r9d
	WORD $0xe283; BYTE $0xc0       // and    edx, -64
	LONG $0xc0428d48               // lea    rax, [rdx - 64]
	WORD $0x8948; BYTE $0xc7       // mov    rdi, rax
	LONG $0x06efc148               // shr    rdi, 6
	LONG $0x01c78348               // add    rdi, 1
	WORD $0xfe89                   // mov    esi, edi
	WORD $0xe683; BYTE $0x03       // and    esi, 3
	LONG $0x00c03d48; WORD $0x0000 // cmp    rax, 192
	JAE  LBB3_461
	WORD $0xc031                   // xor    eax, eax
	JMP  LBB3_463

LBB3_338:
	WORD $0x8944; BYTE $0xca       // mov    edx, r9d
	WORD $0xe283; BYTE $0x80       // and    edx, -128
	LONG $0x80428d48               // lea    rax, [rdx - 128]
	WORD $0x8948; BYTE $0xc7       // mov    rdi, rax
	LONG $0x07efc148               // shr    rdi, 7
	LONG $0x01c78348               // add    rdi, 1
	WORD $0xfe89                   // mov    esi, edi
	WORD $0xe683; BYTE $0x03       // and    esi, 3
	LONG $0x01803d48; WORD $0x0000 // cmp    rax, 384
	JAE  LBB3_541
	WORD $0xc031                   // xor    eax, eax
	JMP  LBB3_543

LBB3_374:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	LONG $0xe0468d48         // lea    rax, [rsi - 32]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x05e8c149         // shr    r8, 5
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_612
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0

LBB3_376:
	LONG $0x0cfafdc5; BYTE $0xba         // vpsubd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x54fafdc5; WORD $0x20ba       // vpsubd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5cfafdc5; WORD $0x40ba       // vpsubd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x64fafdc5; WORD $0x60ba       // vpsubd    ymm4, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x0c7ffec5; BYTE $0xb9         // vmovdqu    yword [rcx + 4*rdi], ymm1
	LONG $0x547ffec5; WORD $0x20b9       // vmovdqu    yword [rcx + 4*rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x40b9       // vmovdqu    yword [rcx + 4*rdi + 64], ymm3
	LONG $0x647ffec5; WORD $0x60b9       // vmovdqu    yword [rcx + 4*rdi + 96], ymm4
	QUAD $0x000080ba8cfafdc5; BYTE $0x00 // vpsubd    ymm1, ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba94fafdc5; BYTE $0x00 // vpsubd    ymm2, ymm0, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9cfafdc5; BYTE $0x00 // vpsubd    ymm3, ymm0, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baa4fafdc5; BYTE $0x00 // vpsubd    ymm4, ymm0, yword [rdx + 4*rdi + 224]
	QUAD $0x000080b98c7ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 4*rdi + 128], ymm1
	QUAD $0x0000a0b9947ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 4*rdi + 160], ymm2
	QUAD $0x0000c0b99c7ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 4*rdi + 192], ymm3
	QUAD $0x0000e0b9a47ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 4*rdi + 224], ymm4
	LONG $0x40c78348                     // add    rdi, 64
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_376
	JMP  LBB3_613

LBB3_377:
	WORD $0x8944; BYTE $0xce       // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0       // and    esi, -32
	WORD $0xff31                   // xor    edi, edi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x587de2c4; WORD $0x404d // vpbroadcastd    ymm1, dword 64[rbp] /* [rip + .LCPI3_3] */

LBB3_378:
	LONG $0x1476fdc5; BYTE $0xba   // vpcmpeqd    ymm2, ymm0, yword [rdx + 4*rdi]
	LONG $0xd1dfedc5               // vpandn    ymm2, ymm2, ymm1
	LONG $0x5c76fdc5; WORD $0x20ba // vpcmpeqd    ymm3, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0xd9dfe5c5               // vpandn    ymm3, ymm3, ymm1
	LONG $0x6476fdc5; WORD $0x40ba // vpcmpeqd    ymm4, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x6c76fdc5; WORD $0x60ba // vpcmpeqd    ymm5, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0xe1dfddc5               // vpandn    ymm4, ymm4, ymm1
	LONG $0xe9dfd5c5               // vpandn    ymm5, ymm5, ymm1
	LONG $0x147ffec5; BYTE $0xb9   // vmovdqu    yword [rcx + 4*rdi], ymm2
	LONG $0x5c7ffec5; WORD $0x20b9 // vmovdqu    yword [rcx + 4*rdi + 32], ymm3
	LONG $0x647ffec5; WORD $0x40b9 // vmovdqu    yword [rcx + 4*rdi + 64], ymm4
	LONG $0x6c7ffec5; WORD $0x60b9 // vmovdqu    yword [rcx + 4*rdi + 96], ymm5
	LONG $0x20c78348               // add    rdi, 32
	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
	JNE  LBB3_378
	WORD $0x394c; BYTE $0xce       // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_380

LBB3_384:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	LONG $0xe0468d48         // lea    rax, [rsi - 32]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x05e8c149         // shr    r8, 5
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_620
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi

LBB3_386:
	LONG $0x0410fcc5; BYTE $0xba         // vmovups    ymm0, yword [rdx + 4*rdi]
	LONG $0x4c10fcc5; WORD $0x20ba       // vmovups    ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x5410fcc5; WORD $0x40ba       // vmovups    ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5c10fcc5; WORD $0x60ba       // vmovups    ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x0411fcc5; BYTE $0xb9         // vmovups    yword [rcx + 4*rdi], ymm0
	LONG $0x4c11fcc5; WORD $0x20b9       // vmovups    yword [rcx + 4*rdi + 32], ymm1
	LONG $0x5411fcc5; WORD $0x40b9       // vmovups    yword [rcx + 4*rdi + 64], ymm2
	LONG $0x5c11fcc5; WORD $0x60b9       // vmovups    yword [rcx + 4*rdi + 96], ymm3
	QUAD $0x000080ba8410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba8c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9410fdc5; BYTE $0x00 // vmovupd    ymm2, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0ba9c10fdc5; BYTE $0x00 // vmovupd    ymm3, yword [rdx + 4*rdi + 224]
	QUAD $0x000080b98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 128], ymm0
	QUAD $0x0000a0b98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 160], ymm1
	QUAD $0x0000c0b99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 192], ymm2
	QUAD $0x0000e0b99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 224], ymm3
	LONG $0x40c78348                     // add    rdi, 64
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_386
	JMP  LBB3_621

LBB3_387:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	LONG $0xe0468d48         // lea    rax, [rsi - 32]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x05e8c149         // shr    r8, 5
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_628
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi

LBB3_389:
	LONG $0x0410fcc5; BYTE $0xba         // vmovups    ymm0, yword [rdx + 4*rdi]
	LONG $0x4c10fcc5; WORD $0x20ba       // vmovups    ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x5410fcc5; WORD $0x40ba       // vmovups    ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5c10fcc5; WORD $0x60ba       // vmovups    ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x0411fcc5; BYTE $0xb9         // vmovups    yword [rcx + 4*rdi], ymm0
	LONG $0x4c11fcc5; WORD $0x20b9       // vmovups    yword [rcx + 4*rdi + 32], ymm1
	LONG $0x5411fcc5; WORD $0x40b9       // vmovups    yword [rcx + 4*rdi + 64], ymm2
	LONG $0x5c11fcc5; WORD $0x60b9       // vmovups    yword [rcx + 4*rdi + 96], ymm3
	QUAD $0x000080ba8410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba8c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9410fdc5; BYTE $0x00 // vmovupd    ymm2, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0ba9c10fdc5; BYTE $0x00 // vmovupd    ymm3, yword [rdx + 4*rdi + 224]
	QUAD $0x000080b98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 128], ymm0
	QUAD $0x0000a0b98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 160], ymm1
	QUAD $0x0000c0b99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 192], ymm2
	QUAD $0x0000e0b99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 224], ymm3
	LONG $0x40c78348                     // add    rdi, 64
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_389
	JMP  LBB3_629

LBB3_390:
	WORD $0x8944; BYTE $0xce       // mov    esi, r9d
	WORD $0xe683; BYTE $0xf0       // and    esi, -16
	LONG $0xf0468d48               // lea    rax, [rsi - 16]
	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
	LONG $0x04e8c149               // shr    r8, 4
	LONG $0x01c08349               // add    r8, 1
	WORD $0x8548; BYTE $0xc0       // test    rax, rax
	JE   LBB3_636
	WORD $0x894c; BYTE $0xc0       // mov    rax, r8
	LONG $0xfee08348               // and    rax, -2
	WORD $0xf748; BYTE $0xd8       // neg    rax
	WORD $0xff31                   // xor    edi, edi
	LONG $0x197de2c4; WORD $0x0045 // vbroadcastsd    ymm0, qword 0[rbp] /* [rip + .LCPI3_0] */

LBB3_392:
	LONG $0x0c57fdc5; BYTE $0xfa         // vxorpd    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x5457fdc5; WORD $0x20fa       // vxorpd    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5c57fdc5; WORD $0x40fa       // vxorpd    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x6457fdc5; WORD $0x60fa       // vxorpd    ymm4, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xf9         // vmovupd    yword [rcx + 8*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20f9       // vmovupd    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40f9       // vmovupd    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x6411fdc5; WORD $0x60f9       // vmovupd    yword [rcx + 8*rdi + 96], ymm4
	QUAD $0x000080fa8c57fdc5; BYTE $0x00 // vxorpd    ymm1, ymm0, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa9457fdc5; BYTE $0x00 // vxorpd    ymm2, ymm0, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9c57fdc5; BYTE $0x00 // vxorpd    ymm3, ymm0, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faa457fdc5; BYTE $0x00 // vxorpd    ymm4, ymm0, yword [rdx + 8*rdi + 224]
	QUAD $0x000080f98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 128], ymm1
	QUAD $0x0000a0f99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 160], ymm2
	QUAD $0x0000c0f99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 192], ymm3
	QUAD $0x0000e0f9a411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 224], ymm4
	LONG $0x20c78348                     // add    rdi, 32
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_392
	JMP  LBB3_637

LBB3_393:
	WORD $0x8944; BYTE $0xce       // mov    esi, r9d
	WORD $0xe683; BYTE $0xf0       // and    esi, -16
	LONG $0xf0468d48               // lea    rax, [rsi - 16]
	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
	LONG $0x04e8c149               // shr    r8, 4
	LONG $0x01c08349               // add    r8, 1
	WORD $0x8548; BYTE $0xc0       // test    rax, rax
	JE   LBB3_646
	WORD $0x894c; BYTE $0xc0       // mov    rax, r8
	LONG $0xfee08348               // and    rax, -2
	WORD $0xf748; BYTE $0xd8       // neg    rax
	WORD $0xff31                   // xor    edi, edi
	LONG $0x197de2c4; WORD $0x0045 // vbroadcastsd    ymm0, qword 0[rbp] /* [rip + .LCPI3_0] */

LBB3_395:
	LONG $0x0c57fdc5; BYTE $0xfa         // vxorpd    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x5457fdc5; WORD $0x20fa       // vxorpd    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5c57fdc5; WORD $0x40fa       // vxorpd    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x6457fdc5; WORD $0x60fa       // vxorpd    ymm4, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xf9         // vmovupd    yword [rcx + 8*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20f9       // vmovupd    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40f9       // vmovupd    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x6411fdc5; WORD $0x60f9       // vmovupd    yword [rcx + 8*rdi + 96], ymm4
	QUAD $0x000080fa8c57fdc5; BYTE $0x00 // vxorpd    ymm1, ymm0, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa9457fdc5; BYTE $0x00 // vxorpd    ymm2, ymm0, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9c57fdc5; BYTE $0x00 // vxorpd    ymm3, ymm0, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faa457fdc5; BYTE $0x00 // vxorpd    ymm4, ymm0, yword [rdx + 8*rdi + 224]
	QUAD $0x000080f98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 128], ymm1
	QUAD $0x0000a0f99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 160], ymm2
	QUAD $0x0000c0f99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 192], ymm3
	QUAD $0x0000e0f9a411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 224], ymm4
	LONG $0x20c78348                     // add    rdi, 32
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_395
	JMP  LBB3_647

LBB3_396:
	WORD $0xc689                   // mov    esi, eax
	WORD $0xe683; BYTE $0xf0       // and    esi, -16
	WORD $0xff31                   // xor    edi, edi
	LONG $0xc057f9c5               // vxorpd    xmm0, xmm0, xmm0
	LONG $0x197de2c4; WORD $0x004d // vbroadcastsd    ymm1, qword 0[rbp] /* [rip + .LCPI3_0] */
	LONG $0x197de2c4; WORD $0x0855 // vbroadcastsd    ymm2, qword 8[rbp] /* [rip + .LCPI3_1] */

LBB3_397:
	LONG $0x1c10fdc5; BYTE $0xfa   // vmovupd    ymm3, yword [rdx + 8*rdi]
	LONG $0x6410fdc5; WORD $0x20fa // vmovupd    ymm4, yword [rdx + 8*rdi + 32]
	LONG $0x6c10fdc5; WORD $0x40fa // vmovupd    ymm5, yword [rdx + 8*rdi + 64]
	LONG $0x7410fdc5; WORD $0x60fa // vmovupd    ymm6, yword [rdx + 8*rdi + 96]
	LONG $0xf954e5c5               // vandpd    ymm7, ymm3, ymm1
	LONG $0xff56edc5               // vorpd    ymm7, ymm2, ymm7
	LONG $0xc1545dc5               // vandpd    ymm8, ymm4, ymm1
	LONG $0xc2563dc5               // vorpd    ymm8, ymm8, ymm2
	LONG $0xc95455c5               // vandpd    ymm9, ymm5, ymm1
	LONG $0xca5635c5               // vorpd    ymm9, ymm9, ymm2
	LONG $0xd1544dc5               // vandpd    ymm10, ymm6, ymm1
	LONG $0xd2562dc5               // vorpd    ymm10, ymm10, ymm2
	LONG $0xd8c2e5c5; BYTE $0x04   // vcmpneqpd    ymm3, ymm3, ymm0
	LONG $0xdf54e5c5               // vandpd    ymm3, ymm3, ymm7
	LONG $0xe0c2ddc5; BYTE $0x04   // vcmpneqpd    ymm4, ymm4, ymm0
	LONG $0xe454bdc5               // vandpd    ymm4, ymm8, ymm4
	LONG $0xe8c2d5c5; BYTE $0x04   // vcmpneqpd    ymm5, ymm5, ymm0
	LONG $0xed54b5c5               // vandpd    ymm5, ymm9, ymm5
	LONG $0xf0c2cdc5; BYTE $0x04   // vcmpneqpd    ymm6, ymm6, ymm0
	LONG $0xf654adc5               // vandpd    ymm6, ymm10, ymm6
	LONG $0x1c11fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm3
	LONG $0x6411fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm4
	LONG $0x6c11fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm5
	LONG $0x7411fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm6
	LONG $0x10c78348               // add    rdi, 16
	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
	JNE  LBB3_397
	WORD $0x3948; BYTE $0xc6       // cmp    rsi, rax
	JE   LBB3_865
	JMP  LBB3_399

LBB3_404:
	WORD $0x8944; BYTE $0xce       // mov    esi, r9d
	WORD $0xe683; BYTE $0xf0       // and    esi, -16
	LONG $0xf0468d48               // lea    rax, [rsi - 16]
	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
	LONG $0x04e8c149               // shr    r8, 4
	LONG $0x01c08349               // add    r8, 1
	WORD $0x8548; BYTE $0xc0       // test    rax, rax
	JE   LBB3_656
	WORD $0x894c; BYTE $0xc0       // mov    rax, r8
	LONG $0xfee08348               // and    rax, -2
	WORD $0xf748; BYTE $0xd8       // neg    rax
	WORD $0xff31                   // xor    edi, edi
	LONG $0x197de2c4; WORD $0x1845 // vbroadcastsd    ymm0, qword 24[rbp] /* [rip + .LCPI3_8] */

LBB3_406:
	LONG $0x0c54fdc5; BYTE $0xfa         // vandpd    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x5454fdc5; WORD $0x20fa       // vandpd    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5c54fdc5; WORD $0x40fa       // vandpd    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x6454fdc5; WORD $0x60fa       // vandpd    ymm4, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xf9         // vmovupd    yword [rcx + 8*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20f9       // vmovupd    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40f9       // vmovupd    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x6411fdc5; WORD $0x60f9       // vmovupd    yword [rcx + 8*rdi + 96], ymm4
	QUAD $0x000080fa8c54fdc5; BYTE $0x00 // vandpd    ymm1, ymm0, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa9454fdc5; BYTE $0x00 // vandpd    ymm2, ymm0, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9c54fdc5; BYTE $0x00 // vandpd    ymm3, ymm0, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faa454fdc5; BYTE $0x00 // vandpd    ymm4, ymm0, yword [rdx + 8*rdi + 224]
	QUAD $0x000080f98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 128], ymm1
	QUAD $0x0000a0f99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 160], ymm2
	QUAD $0x0000c0f99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 192], ymm3
	QUAD $0x0000e0f9a411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 224], ymm4
	LONG $0x20c78348                     // add    rdi, 32
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_406
	JMP  LBB3_657

LBB3_407:
	WORD $0x8944; BYTE $0xce       // mov    esi, r9d
	WORD $0xe683; BYTE $0xf0       // and    esi, -16
	LONG $0xf0468d48               // lea    rax, [rsi - 16]
	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
	LONG $0x04e8c149               // shr    r8, 4
	LONG $0x01c08349               // add    r8, 1
	WORD $0x8548; BYTE $0xc0       // test    rax, rax
	JE   LBB3_664
	WORD $0x894c; BYTE $0xc0       // mov    rax, r8
	LONG $0xfee08348               // and    rax, -2
	WORD $0xf748; BYTE $0xd8       // neg    rax
	WORD $0xff31                   // xor    edi, edi
	LONG $0x197de2c4; WORD $0x1845 // vbroadcastsd    ymm0, qword 24[rbp] /* [rip + .LCPI3_8] */

LBB3_409:
	LONG $0x0c54fdc5; BYTE $0xfa         // vandpd    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x5454fdc5; WORD $0x20fa       // vandpd    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5c54fdc5; WORD $0x40fa       // vandpd    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x6454fdc5; WORD $0x60fa       // vandpd    ymm4, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xf9         // vmovupd    yword [rcx + 8*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20f9       // vmovupd    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40f9       // vmovupd    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x6411fdc5; WORD $0x60f9       // vmovupd    yword [rcx + 8*rdi + 96], ymm4
	QUAD $0x000080fa8c54fdc5; BYTE $0x00 // vandpd    ymm1, ymm0, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa9454fdc5; BYTE $0x00 // vandpd    ymm2, ymm0, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9c54fdc5; BYTE $0x00 // vandpd    ymm3, ymm0, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faa454fdc5; BYTE $0x00 // vandpd    ymm4, ymm0, yword [rdx + 8*rdi + 224]
	QUAD $0x000080f98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 128], ymm1
	QUAD $0x0000a0f99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 160], ymm2
	QUAD $0x0000c0f99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 192], ymm3
	QUAD $0x0000e0f9a411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 224], ymm4
	LONG $0x20c78348                     // add    rdi, 32
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_409
	JMP  LBB3_665

LBB3_410:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0x80 // and    esi, -128
	LONG $0x80468d48         // lea    rax, [rsi - 128]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x07e8c149         // shr    r8, 7
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_672
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0

LBB3_412:
	LONG $0x0cf8fdc5; BYTE $0x3a               // vpsubb    ymm1, ymm0, yword [rdx + rdi]
	LONG $0x54f8fdc5; WORD $0x203a             // vpsubb    ymm2, ymm0, yword [rdx + rdi + 32]
	LONG $0x5cf8fdc5; WORD $0x403a             // vpsubb    ymm3, ymm0, yword [rdx + rdi + 64]
	LONG $0x64f8fdc5; WORD $0x603a             // vpsubb    ymm4, ymm0, yword [rdx + rdi + 96]
	LONG $0x0c7ffec5; BYTE $0x39               // vmovdqu    yword [rcx + rdi], ymm1
	LONG $0x547ffec5; WORD $0x2039             // vmovdqu    yword [rcx + rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x4039             // vmovdqu    yword [rcx + rdi + 64], ymm3
	LONG $0x647ffec5; WORD $0x6039             // vmovdqu    yword [rcx + rdi + 96], ymm4
	QUAD $0x0000803a8cf8fdc5; BYTE $0x00       // vpsubb    ymm1, ymm0, yword [rdx + rdi + 128]
	QUAD $0x0000a03a94f8fdc5; BYTE $0x00       // vpsubb    ymm2, ymm0, yword [rdx + rdi + 160]
	QUAD $0x0000c03a9cf8fdc5; BYTE $0x00       // vpsubb    ymm3, ymm0, yword [rdx + rdi + 192]
	QUAD $0x0000e03aa4f8fdc5; BYTE $0x00       // vpsubb    ymm4, ymm0, yword [rdx + rdi + 224]
	QUAD $0x000080398c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + rdi + 128], ymm1
	QUAD $0x0000a039947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + rdi + 160], ymm2
	QUAD $0x0000c0399c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + rdi + 192], ymm3
	QUAD $0x0000e039a47ffec5; BYTE $0x00       // vmovdqu    yword [rcx + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c08348                           // add    rax, 2
	JNE  LBB3_412
	JMP  LBB3_673

LBB3_413:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0x80 // and    esi, -128
	LONG $0x80468d48         // lea    rax, [rsi - 128]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x07e8c149         // shr    r8, 7
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_680
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0

LBB3_415:
	LONG $0x0cf8fdc5; BYTE $0x3a               // vpsubb    ymm1, ymm0, yword [rdx + rdi]
	LONG $0x54f8fdc5; WORD $0x203a             // vpsubb    ymm2, ymm0, yword [rdx + rdi + 32]
	LONG $0x5cf8fdc5; WORD $0x403a             // vpsubb    ymm3, ymm0, yword [rdx + rdi + 64]
	LONG $0x64f8fdc5; WORD $0x603a             // vpsubb    ymm4, ymm0, yword [rdx + rdi + 96]
	LONG $0x0c7ffec5; BYTE $0x39               // vmovdqu    yword [rcx + rdi], ymm1
	LONG $0x547ffec5; WORD $0x2039             // vmovdqu    yword [rcx + rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x4039             // vmovdqu    yword [rcx + rdi + 64], ymm3
	LONG $0x647ffec5; WORD $0x6039             // vmovdqu    yword [rcx + rdi + 96], ymm4
	QUAD $0x0000803a8cf8fdc5; BYTE $0x00       // vpsubb    ymm1, ymm0, yword [rdx + rdi + 128]
	QUAD $0x0000a03a94f8fdc5; BYTE $0x00       // vpsubb    ymm2, ymm0, yword [rdx + rdi + 160]
	QUAD $0x0000c03a9cf8fdc5; BYTE $0x00       // vpsubb    ymm3, ymm0, yword [rdx + rdi + 192]
	QUAD $0x0000e03aa4f8fdc5; BYTE $0x00       // vpsubb    ymm4, ymm0, yword [rdx + rdi + 224]
	QUAD $0x000080398c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + rdi + 128], ymm1
	QUAD $0x0000a039947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + rdi + 160], ymm2
	QUAD $0x0000c0399c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + rdi + 192], ymm3
	QUAD $0x0000e039a47ffec5; BYTE $0x00       // vmovdqu    yword [rcx + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c08348                           // add    rax, 2
	JNE  LBB3_415
	JMP  LBB3_681

LBB3_416:
	WORD $0x8944; BYTE $0xde // mov    esi, r11d
	WORD $0xe683; BYTE $0x80 // and    esi, -128
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
	QUAD $0x00000080956ffdc5 // vmovdqa    ymm2, yword 128[rbp] /* [rip + .LCPI3_6] */

LBB3_417:
	LONG $0x1c6ffec5; BYTE $0x3a   // vmovdqu    ymm3, yword [rdx + rdi]
	LONG $0x646ffec5; WORD $0x203a // vmovdqu    ymm4, yword [rdx + rdi + 32]
	LONG $0x6c6ffec5; WORD $0x403a // vmovdqu    ymm5, yword [rdx + rdi + 64]
	LONG $0x746ffec5; WORD $0x603a // vmovdqu    ymm6, yword [rdx + rdi + 96]
	LONG $0xf874e5c5               // vpcmpeqb    ymm7, ymm3, ymm0
	LONG $0xf9efc5c5               // vpxor    ymm7, ymm7, ymm1
	LONG $0xc0745dc5               // vpcmpeqb    ymm8, ymm4, ymm0
	LONG $0xc1ef3dc5               // vpxor    ymm8, ymm8, ymm1
	LONG $0xc87455c5               // vpcmpeqb    ymm9, ymm5, ymm0
	LONG $0xc9ef35c5               // vpxor    ymm9, ymm9, ymm1
	LONG $0xd0744dc5               // vpcmpeqb    ymm10, ymm6, ymm0
	LONG $0xd1ef2dc5               // vpxor    ymm10, ymm10, ymm1
	LONG $0xdb64edc5               // vpcmpgtb    ymm3, ymm2, ymm3
	LONG $0xe464edc5               // vpcmpgtb    ymm4, ymm2, ymm4
	LONG $0xed64edc5               // vpcmpgtb    ymm5, ymm2, ymm5
	LONG $0xf664edc5               // vpcmpgtb    ymm6, ymm2, ymm6
	LONG $0x4c6de3c4; WORD $0x30df // vpblendvb    ymm3, ymm2, ymm7, ymm3
	LONG $0x4c6dc3c4; WORD $0x40e0 // vpblendvb    ymm4, ymm2, ymm8, ymm4
	LONG $0x4c6dc3c4; WORD $0x50e9 // vpblendvb    ymm5, ymm2, ymm9, ymm5
	LONG $0x4c6dc3c4; WORD $0x60f2 // vpblendvb    ymm6, ymm2, ymm10, ymm6
	LONG $0x1c7ffec5; BYTE $0x39   // vmovdqu    yword [rcx + rdi], ymm3
	LONG $0x647ffec5; WORD $0x2039 // vmovdqu    yword [rcx + rdi + 32], ymm4
	LONG $0x6c7ffec5; WORD $0x4039 // vmovdqu    yword [rcx + rdi + 64], ymm5
	LONG $0x747ffec5; WORD $0x6039 // vmovdqu    yword [rcx + rdi + 96], ymm6
	LONG $0x80ef8348               // sub    rdi, -128
	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
	JNE  LBB3_417
	WORD $0x394c; BYTE $0xde       // cmp    rsi, r11
	JE   LBB3_865
	JMP  LBB3_419

LBB3_424:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	WORD $0xff31                 // xor    edi, edi
	LONG $0x456ff9c5; BYTE $0x30 // vmovdqa    xmm0, oword 48[rbp] /* [rip + .LCPI3_11] */

LBB3_425:
	LONG $0x217de2c4; WORD $0x3a0c             // vpmovsxbd    ymm1, qword [rdx + rdi]
	LONG $0x217de2c4; WORD $0x3a54; BYTE $0x08 // vpmovsxbd    ymm2, qword [rdx + rdi + 8]
	LONG $0x217de2c4; WORD $0x3a5c; BYTE $0x10 // vpmovsxbd    ymm3, qword [rdx + rdi + 16]
	LONG $0x217de2c4; WORD $0x3a64; BYTE $0x18 // vpmovsxbd    ymm4, qword [rdx + rdi + 24]
	LONG $0xe172d5c5; BYTE $0x07               // vpsrad    ymm5, ymm1, 7
	LONG $0xe272cdc5; BYTE $0x07               // vpsrad    ymm6, ymm2, 7
	LONG $0xe372c5c5; BYTE $0x07               // vpsrad    ymm7, ymm3, 7
	LONG $0xe472bdc5; BYTE $0x07               // vpsrad    ymm8, ymm4, 7
	LONG $0xc9fed5c5                           // vpaddd    ymm1, ymm5, ymm1
	LONG $0xd2fecdc5                           // vpaddd    ymm2, ymm6, ymm2
	LONG $0xdbfec5c5                           // vpaddd    ymm3, ymm7, ymm3
	LONG $0xe4febdc5                           // vpaddd    ymm4, ymm8, ymm4
	LONG $0xcdeff5c5                           // vpxor    ymm1, ymm1, ymm5
	LONG $0xd6efedc5                           // vpxor    ymm2, ymm2, ymm6
	LONG $0xdfefe5c5                           // vpxor    ymm3, ymm3, ymm7
	LONG $0xe4efbdc5                           // vpxor    ymm4, ymm8, ymm4
	LONG $0x397de3c4; WORD $0x01cd             // vextracti128    xmm5, ymm1, 1
	LONG $0x0051e2c4; BYTE $0xe8               // vpshufb    xmm5, xmm5, xmm0
	LONG $0x0071e2c4; BYTE $0xc8               // vpshufb    xmm1, xmm1, xmm0
	LONG $0xcd62f1c5                           // vpunpckldq    xmm1, xmm1, xmm5
	LONG $0x397de3c4; WORD $0x01d5             // vextracti128    xmm5, ymm2, 1
	LONG $0x0051e2c4; BYTE $0xe8               // vpshufb    xmm5, xmm5, xmm0
	LONG $0x0069e2c4; BYTE $0xd0               // vpshufb    xmm2, xmm2, xmm0
	LONG $0xd562e9c5                           // vpunpckldq    xmm2, xmm2, xmm5
	LONG $0x397de3c4; WORD $0x01dd             // vextracti128    xmm5, ymm3, 1
	LONG $0x0051e2c4; BYTE $0xe8               // vpshufb    xmm5, xmm5, xmm0
	LONG $0x0061e2c4; BYTE $0xd8               // vpshufb    xmm3, xmm3, xmm0
	LONG $0xdd62e1c5                           // vpunpckldq    xmm3, xmm3, xmm5
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0x0051e2c4; BYTE $0xe8               // vpshufb    xmm5, xmm5, xmm0
	LONG $0x0059e2c4; BYTE $0xe0               // vpshufb    xmm4, xmm4, xmm0
	LONG $0xe562d9c5                           // vpunpckldq    xmm4, xmm4, xmm5
	LONG $0x3865e3c4; WORD $0x01dc             // vinserti128    ymm3, ymm3, xmm4, 1
	LONG $0x3875e3c4; WORD $0x01ca             // vinserti128    ymm1, ymm1, xmm2, 1
	LONG $0xcb6cf5c5                           // vpunpcklqdq    ymm1, ymm1, ymm3
	LONG $0x00fde3c4; WORD $0xd8c9             // vpermq    ymm1, ymm1, 216
	LONG $0x0c7ffec5; BYTE $0x39               // vmovdqu    yword [rcx + rdi], ymm1
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB3_425
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB3_865
	JMP  LBB3_427

LBB3_431:
	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	WORD $0xff31                 // xor    edi, edi
	LONG $0x456ff9c5; BYTE $0x30 // vmovdqa    xmm0, oword 48[rbp] /* [rip + .LCPI3_11] */

LBB3_432:
	LONG $0x217de2c4; WORD $0x3a0c             // vpmovsxbd    ymm1, qword [rdx + rdi]
	LONG $0x217de2c4; WORD $0x3a54; BYTE $0x08 // vpmovsxbd    ymm2, qword [rdx + rdi + 8]
	LONG $0x217de2c4; WORD $0x3a5c; BYTE $0x10 // vpmovsxbd    ymm3, qword [rdx + rdi + 16]
	LONG $0x217de2c4; WORD $0x3a64; BYTE $0x18 // vpmovsxbd    ymm4, qword [rdx + rdi + 24]
	LONG $0xe172d5c5; BYTE $0x07               // vpsrad    ymm5, ymm1, 7
	LONG $0xe272cdc5; BYTE $0x07               // vpsrad    ymm6, ymm2, 7
	LONG $0xe372c5c5; BYTE $0x07               // vpsrad    ymm7, ymm3, 7
	LONG $0xe472bdc5; BYTE $0x07               // vpsrad    ymm8, ymm4, 7
	LONG $0xc9fed5c5                           // vpaddd    ymm1, ymm5, ymm1
	LONG $0xd2fecdc5                           // vpaddd    ymm2, ymm6, ymm2
	LONG $0xdbfec5c5                           // vpaddd    ymm3, ymm7, ymm3
	LONG $0xe4febdc5                           // vpaddd    ymm4, ymm8, ymm4
	LONG $0xcdeff5c5                           // vpxor    ymm1, ymm1, ymm5
	LONG $0xd6efedc5                           // vpxor    ymm2, ymm2, ymm6
	LONG $0xdfefe5c5                           // vpxor    ymm3, ymm3, ymm7
	LONG $0xe4efbdc5                           // vpxor    ymm4, ymm8, ymm4
	LONG $0x397de3c4; WORD $0x01cd             // vextracti128    xmm5, ymm1, 1
	LONG $0x0051e2c4; BYTE $0xe8               // vpshufb    xmm5, xmm5, xmm0
	LONG $0x0071e2c4; BYTE $0xc8               // vpshufb    xmm1, xmm1, xmm0
	LONG $0xcd62f1c5                           // vpunpckldq    xmm1, xmm1, xmm5
	LONG $0x397de3c4; WORD $0x01d5             // vextracti128    xmm5, ymm2, 1
	LONG $0x0051e2c4; BYTE $0xe8               // vpshufb    xmm5, xmm5, xmm0
	LONG $0x0069e2c4; BYTE $0xd0               // vpshufb    xmm2, xmm2, xmm0
	LONG $0xd562e9c5                           // vpunpckldq    xmm2, xmm2, xmm5
	LONG $0x397de3c4; WORD $0x01dd             // vextracti128    xmm5, ymm3, 1
	LONG $0x0051e2c4; BYTE $0xe8               // vpshufb    xmm5, xmm5, xmm0
	LONG $0x0061e2c4; BYTE $0xd8               // vpshufb    xmm3, xmm3, xmm0
	LONG $0xdd62e1c5                           // vpunpckldq    xmm3, xmm3, xmm5
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0x0051e2c4; BYTE $0xe8               // vpshufb    xmm5, xmm5, xmm0
	LONG $0x0059e2c4; BYTE $0xe0               // vpshufb    xmm4, xmm4, xmm0
	LONG $0xe562d9c5                           // vpunpckldq    xmm4, xmm4, xmm5
	LONG $0x3865e3c4; WORD $0x01dc             // vinserti128    ymm3, ymm3, xmm4, 1
	LONG $0x3875e3c4; WORD $0x01ca             // vinserti128    ymm1, ymm1, xmm2, 1
	LONG $0xcb6cf5c5                           // vpunpcklqdq    ymm1, ymm1, ymm3
	LONG $0x00fde3c4; WORD $0xd8c9             // vpermq    ymm1, ymm1, 216
	LONG $0x0c7ffec5; BYTE $0x39               // vmovdqu    yword [rcx + rdi], ymm1
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB3_432
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB3_865
	JMP  LBB3_434

LBB3_445:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	LONG $0xf0468d48         // lea    rax, [rsi - 16]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x04e8c149         // shr    r8, 4
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_688
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0

LBB3_447:
	LONG $0x0cfbfdc5; BYTE $0xfa         // vpsubq    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x54fbfdc5; WORD $0x20fa       // vpsubq    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5cfbfdc5; WORD $0x40fa       // vpsubq    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x64fbfdc5; WORD $0x60fa       // vpsubq    ymm4, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x0c7ffec5; BYTE $0xf9         // vmovdqu    yword [rcx + 8*rdi], ymm1
	LONG $0x547ffec5; WORD $0x20f9       // vmovdqu    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x40f9       // vmovdqu    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x647ffec5; WORD $0x60f9       // vmovdqu    yword [rcx + 8*rdi + 96], ymm4
	QUAD $0x000080fa8cfbfdc5; BYTE $0x00 // vpsubq    ymm1, ymm0, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa94fbfdc5; BYTE $0x00 // vpsubq    ymm2, ymm0, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9cfbfdc5; BYTE $0x00 // vpsubq    ymm3, ymm0, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faa4fbfdc5; BYTE $0x00 // vpsubq    ymm4, ymm0, yword [rdx + 8*rdi + 224]
	QUAD $0x000080f98c7ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 8*rdi + 128], ymm1
	QUAD $0x0000a0f9947ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 8*rdi + 160], ymm2
	QUAD $0x0000c0f99c7ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 8*rdi + 192], ymm3
	QUAD $0x0000e0f9a47ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 8*rdi + 224], ymm4
	LONG $0x20c78348                     // add    rdi, 32
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_447
	JMP  LBB3_689

LBB3_448:
	WORD $0x8944; BYTE $0xce       // mov    esi, r9d
	WORD $0xe683; BYTE $0xf0       // and    esi, -16
	WORD $0xff31                   // xor    edi, edi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x597de2c4; WORD $0x104d // vpbroadcastq    ymm1, qword 16[rbp] /* [rip + .LCPI3_4] */

LBB3_449:
	LONG $0x297de2c4; WORD $0xfa14             // vpcmpeqq    ymm2, ymm0, yword [rdx + 8*rdi]
	LONG $0xd1dfedc5                           // vpandn    ymm2, ymm2, ymm1
	LONG $0x297de2c4; WORD $0xfa5c; BYTE $0x20 // vpcmpeqq    ymm3, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0xd9dfe5c5                           // vpandn    ymm3, ymm3, ymm1
	LONG $0x297de2c4; WORD $0xfa64; BYTE $0x40 // vpcmpeqq    ymm4, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x297de2c4; WORD $0xfa6c; BYTE $0x60 // vpcmpeqq    ymm5, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0xe1dfddc5                           // vpandn    ymm4, ymm4, ymm1
	LONG $0xe9dfd5c5                           // vpandn    ymm5, ymm5, ymm1
	LONG $0x147ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm2
	LONG $0x5c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm3
	LONG $0x647ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm4
	LONG $0x6c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm5
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB3_449
	WORD $0x394c; BYTE $0xce                   // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_451

LBB3_455:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	LONG $0xf0468d48         // lea    rax, [rsi - 16]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x04e8c149         // shr    r8, 4
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_696
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi

LBB3_457:
	LONG $0x0410fcc5; BYTE $0xfa         // vmovups    ymm0, yword [rdx + 8*rdi]
	LONG $0x4c10fcc5; WORD $0x20fa       // vmovups    ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x5410fcc5; WORD $0x40fa       // vmovups    ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5c10fcc5; WORD $0x60fa       // vmovups    ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x0411fcc5; BYTE $0xf9         // vmovups    yword [rcx + 8*rdi], ymm0
	LONG $0x4c11fcc5; WORD $0x20f9       // vmovups    yword [rcx + 8*rdi + 32], ymm1
	LONG $0x5411fcc5; WORD $0x40f9       // vmovups    yword [rcx + 8*rdi + 64], ymm2
	LONG $0x5c11fcc5; WORD $0x60f9       // vmovups    yword [rcx + 8*rdi + 96], ymm3
	QUAD $0x000080fa8410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa8c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9410fdc5; BYTE $0x00 // vmovupd    ymm2, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0fa9c10fdc5; BYTE $0x00 // vmovupd    ymm3, yword [rdx + 8*rdi + 224]
	QUAD $0x000080f98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 128], ymm0
	QUAD $0x0000a0f98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 160], ymm1
	QUAD $0x0000c0f99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 192], ymm2
	QUAD $0x0000e0f99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 224], ymm3
	LONG $0x20c78348                     // add    rdi, 32
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_457
	JMP  LBB3_697

LBB3_458:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	LONG $0xf0468d48         // lea    rax, [rsi - 16]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x04e8c149         // shr    r8, 4
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_704
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi

LBB3_460:
	LONG $0x0410fcc5; BYTE $0xfa         // vmovups    ymm0, yword [rdx + 8*rdi]
	LONG $0x4c10fcc5; WORD $0x20fa       // vmovups    ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x5410fcc5; WORD $0x40fa       // vmovups    ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5c10fcc5; WORD $0x60fa       // vmovups    ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x0411fcc5; BYTE $0xf9         // vmovups    yword [rcx + 8*rdi], ymm0
	LONG $0x4c11fcc5; WORD $0x20f9       // vmovups    yword [rcx + 8*rdi + 32], ymm1
	LONG $0x5411fcc5; WORD $0x40f9       // vmovups    yword [rcx + 8*rdi + 64], ymm2
	LONG $0x5c11fcc5; WORD $0x60f9       // vmovups    yword [rcx + 8*rdi + 96], ymm3
	QUAD $0x000080fa8410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa8c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9410fdc5; BYTE $0x00 // vmovupd    ymm2, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0fa9c10fdc5; BYTE $0x00 // vmovupd    ymm3, yword [rdx + 8*rdi + 224]
	QUAD $0x000080f98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 128], ymm0
	QUAD $0x0000a0f98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 160], ymm1
	QUAD $0x0000c0f99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 192], ymm2
	QUAD $0x0000e0f99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 224], ymm3
	LONG $0x20c78348                     // add    rdi, 32
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_460
	JMP  LBB3_705

LBB3_468:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	LONG $0xe0468d48         // lea    rax, [rsi - 32]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x05e8c149         // shr    r8, 5
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_712
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0

LBB3_470:
	LONG $0x0cf9fdc5; BYTE $0x7a   // vpsubw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x54f9fdc5; WORD $0x207a // vpsubw    ymm2, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x0c7ffec5; BYTE $0x79   // vmovdqu    yword [rcx + 2*rdi], ymm1
	LONG $0x547ffec5; WORD $0x2079 // vmovdqu    yword [rcx + 2*rdi + 32], ymm2
	LONG $0x4cf9fdc5; WORD $0x407a // vpsubw    ymm1, ymm0, yword [rdx + 2*rdi + 64]
	LONG $0x54f9fdc5; WORD $0x607a // vpsubw    ymm2, ymm0, yword [rdx + 2*rdi + 96]
	LONG $0x4c7ffec5; WORD $0x4079 // vmovdqu    yword [rcx + 2*rdi + 64], ymm1
	LONG $0x547ffec5; WORD $0x6079 // vmovdqu    yword [rcx + 2*rdi + 96], ymm2
	LONG $0x40c78348               // add    rdi, 64
	LONG $0x02c08348               // add    rax, 2
	JNE  LBB3_470
	JMP  LBB3_713

LBB3_471:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	LONG $0xe0468d48         // lea    rax, [rsi - 32]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x05e8c149         // shr    r8, 5
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_720
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0

LBB3_473:
	LONG $0x0cf9fdc5; BYTE $0x7a   // vpsubw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x54f9fdc5; WORD $0x207a // vpsubw    ymm2, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x0c7ffec5; BYTE $0x79   // vmovdqu    yword [rcx + 2*rdi], ymm1
	LONG $0x547ffec5; WORD $0x2079 // vmovdqu    yword [rcx + 2*rdi + 32], ymm2
	LONG $0x4cf9fdc5; WORD $0x407a // vpsubw    ymm1, ymm0, yword [rdx + 2*rdi + 64]
	LONG $0x54f9fdc5; WORD $0x607a // vpsubw    ymm2, ymm0, yword [rdx + 2*rdi + 96]
	LONG $0x4c7ffec5; WORD $0x4079 // vmovdqu    yword [rcx + 2*rdi + 64], ymm1
	LONG $0x547ffec5; WORD $0x6079 // vmovdqu    yword [rcx + 2*rdi + 96], ymm2
	LONG $0x40c78348               // add    rdi, 64
	LONG $0x02c08348               // add    rax, 2
	JNE  LBB3_473
	JMP  LBB3_721

LBB3_474:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	LONG $0xe0468d48         // lea    rax, [rsi - 32]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x05e8c149         // shr    r8, 5
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_728
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0

LBB3_476:
	LONG $0x0cf9fdc5; BYTE $0x7a   // vpsubw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x54f9fdc5; WORD $0x207a // vpsubw    ymm2, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x0c7ffec5; BYTE $0x79   // vmovdqu    yword [rcx + 2*rdi], ymm1
	LONG $0x547ffec5; WORD $0x2079 // vmovdqu    yword [rcx + 2*rdi + 32], ymm2
	LONG $0x4cf9fdc5; WORD $0x407a // vpsubw    ymm1, ymm0, yword [rdx + 2*rdi + 64]
	LONG $0x54f9fdc5; WORD $0x607a // vpsubw    ymm2, ymm0, yword [rdx + 2*rdi + 96]
	LONG $0x4c7ffec5; WORD $0x4079 // vmovdqu    yword [rcx + 2*rdi + 64], ymm1
	LONG $0x547ffec5; WORD $0x6079 // vmovdqu    yword [rcx + 2*rdi + 96], ymm2
	LONG $0x40c78348               // add    rdi, 64
	LONG $0x02c08348               // add    rax, 2
	JNE  LBB3_476
	JMP  LBB3_729

LBB3_477:
	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xe0468d48             // lea    rax, [rsi - 32]
	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
	LONG $0x05e8c149             // shr    r8, 5
	LONG $0x01c08349             // add    r8, 1
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	JE   LBB3_736
	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
	LONG $0xfee08348             // and    rax, -2
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0xff31                 // xor    edi, edi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0x4d6ffdc5; BYTE $0x60 // vmovdqa    ymm1, yword 96[rbp] /* [rip + .LCPI3_5] */

LBB3_479:
	LONG $0x1475fdc5; BYTE $0x7a   // vpcmpeqw    ymm2, ymm0, yword [rdx + 2*rdi]
	LONG $0xd1dfedc5               // vpandn    ymm2, ymm2, ymm1
	LONG $0x5c75fdc5; WORD $0x207a // vpcmpeqw    ymm3, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0xd9dfe5c5               // vpandn    ymm3, ymm3, ymm1
	LONG $0x147ffec5; BYTE $0x79   // vmovdqu    yword [rcx + 2*rdi], ymm2
	LONG $0x5c7ffec5; WORD $0x2079 // vmovdqu    yword [rcx + 2*rdi + 32], ymm3
	LONG $0x5475fdc5; WORD $0x407a // vpcmpeqw    ymm2, ymm0, yword [rdx + 2*rdi + 64]
	LONG $0xd1dfedc5               // vpandn    ymm2, ymm2, ymm1
	LONG $0x5c75fdc5; WORD $0x607a // vpcmpeqw    ymm3, ymm0, yword [rdx + 2*rdi + 96]
	LONG $0xd9dfe5c5               // vpandn    ymm3, ymm3, ymm1
	LONG $0x547ffec5; WORD $0x4079 // vmovdqu    yword [rcx + 2*rdi + 64], ymm2
	LONG $0x5c7ffec5; WORD $0x6079 // vmovdqu    yword [rcx + 2*rdi + 96], ymm3
	LONG $0x40c78348               // add    rdi, 64
	LONG $0x02c08348               // add    rax, 2
	JNE  LBB3_479
	JMP  LBB3_737

LBB3_480:
	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	LONG $0xe0468d48             // lea    rax, [rsi - 32]
	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
	LONG $0x05e8c149             // shr    r8, 5
	LONG $0x01c08349             // add    r8, 1
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	JE   LBB3_744
	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
	LONG $0xfee08348             // and    rax, -2
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0xff31                 // xor    edi, edi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5             // vpcmpeqd    ymm1, ymm1, ymm1
	LONG $0x556ffdc5; BYTE $0x60 // vmovdqa    ymm2, yword 96[rbp] /* [rip + .LCPI3_5] */

LBB3_482:
	LONG $0x1c6ffec5; BYTE $0x7a   // vmovdqu    ymm3, yword [rdx + 2*rdi]
	LONG $0x646ffec5; WORD $0x207a // vmovdqu    ymm4, yword [rdx + 2*rdi + 32]
	LONG $0xe875e5c5               // vpcmpeqw    ymm5, ymm3, ymm0
	LONG $0xe9efd5c5               // vpxor    ymm5, ymm5, ymm1
	LONG $0xf075ddc5               // vpcmpeqw    ymm6, ymm4, ymm0
	LONG $0xf1efcdc5               // vpxor    ymm6, ymm6, ymm1
	LONG $0xdb65edc5               // vpcmpgtw    ymm3, ymm2, ymm3
	LONG $0xe465edc5               // vpcmpgtw    ymm4, ymm2, ymm4
	LONG $0x4c6de3c4; WORD $0x30dd // vpblendvb    ymm3, ymm2, ymm5, ymm3
	LONG $0x4c6de3c4; WORD $0x40e6 // vpblendvb    ymm4, ymm2, ymm6, ymm4
	LONG $0x1c7ffec5; BYTE $0x79   // vmovdqu    yword [rcx + 2*rdi], ymm3
	LONG $0x647ffec5; WORD $0x2079 // vmovdqu    yword [rcx + 2*rdi + 32], ymm4
	LONG $0x5c6ffec5; WORD $0x407a // vmovdqu    ymm3, yword [rdx + 2*rdi + 64]
	LONG $0x646ffec5; WORD $0x607a // vmovdqu    ymm4, yword [rdx + 2*rdi + 96]
	LONG $0xe875e5c5               // vpcmpeqw    ymm5, ymm3, ymm0
	LONG $0xe9efd5c5               // vpxor    ymm5, ymm5, ymm1
	LONG $0xf075ddc5               // vpcmpeqw    ymm6, ymm4, ymm0
	LONG $0xf1efcdc5               // vpxor    ymm6, ymm6, ymm1
	LONG $0xdb65edc5               // vpcmpgtw    ymm3, ymm2, ymm3
	LONG $0xe465edc5               // vpcmpgtw    ymm4, ymm2, ymm4
	LONG $0x4c6de3c4; WORD $0x30dd // vpblendvb    ymm3, ymm2, ymm5, ymm3
	LONG $0x4c6de3c4; WORD $0x40e6 // vpblendvb    ymm4, ymm2, ymm6, ymm4
	LONG $0x5c7ffec5; WORD $0x4079 // vmovdqu    yword [rcx + 2*rdi + 64], ymm3
	LONG $0x647ffec5; WORD $0x6079 // vmovdqu    yword [rcx + 2*rdi + 96], ymm4
	LONG $0x40c78348               // add    rdi, 64
	LONG $0x02c08348               // add    rax, 2
	JNE  LBB3_482
	JMP  LBB3_745

LBB3_483:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	LONG $0xe0468d48         // lea    rax, [rsi - 32]
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x05efc148         // shr    rdi, 5
	LONG $0x01c78348         // add    rdi, 1
	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
	LONG $0x03e08341         // and    r8d, 3
	LONG $0x60f88348         // cmp    rax, 96
	JAE  LBB3_592
	WORD $0xc031             // xor    eax, eax
	JMP  LBB3_594

LBB3_485:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	LONG $0xf0468d48         // lea    rax, [rsi - 16]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x04e8c149         // shr    r8, 4
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_753
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi
	QUAD $0x000000a0856ffdc5 // vmovdqa    ymm0, yword 160[rbp] /* [rip + .LCPI3_10] */

LBB3_487:
	LONG $0x237de2c4; WORD $0x7a0c             // vpmovsxwd    ymm1, oword [rdx + 2*rdi]
	LONG $0x237de2c4; WORD $0x7a54; BYTE $0x10 // vpmovsxwd    ymm2, oword [rdx + 2*rdi + 16]
	LONG $0xe272e5c5; BYTE $0x0f               // vpsrad    ymm3, ymm2, 15
	LONG $0xe172ddc5; BYTE $0x0f               // vpsrad    ymm4, ymm1, 15
	LONG $0xc9feddc5                           // vpaddd    ymm1, ymm4, ymm1
	LONG $0xd2fee5c5                           // vpaddd    ymm2, ymm3, ymm2
	LONG $0xd3efedc5                           // vpxor    ymm2, ymm2, ymm3
	LONG $0xcceff5c5                           // vpxor    ymm1, ymm1, ymm4
	LONG $0x0075e2c4; BYTE $0xc8               // vpshufb    ymm1, ymm1, ymm0
	LONG $0x00fde3c4; WORD $0xe8c9             // vpermq    ymm1, ymm1, 232
	LONG $0x006de2c4; BYTE $0xd0               // vpshufb    ymm2, ymm2, ymm0
	LONG $0x00fde3c4; WORD $0xe8d2             // vpermq    ymm2, ymm2, 232
	LONG $0x547ffac5; WORD $0x1079             // vmovdqu    oword [rcx + 2*rdi + 16], xmm2
	LONG $0x0c7ffac5; BYTE $0x79               // vmovdqu    oword [rcx + 2*rdi], xmm1
	LONG $0x237de2c4; WORD $0x7a4c; BYTE $0x20 // vpmovsxwd    ymm1, oword [rdx + 2*rdi + 32]
	LONG $0x237de2c4; WORD $0x7a54; BYTE $0x30 // vpmovsxwd    ymm2, oword [rdx + 2*rdi + 48]
	LONG $0xe272e5c5; BYTE $0x0f               // vpsrad    ymm3, ymm2, 15
	LONG $0xe172ddc5; BYTE $0x0f               // vpsrad    ymm4, ymm1, 15
	LONG $0xc9feddc5                           // vpaddd    ymm1, ymm4, ymm1
	LONG $0xd2fee5c5                           // vpaddd    ymm2, ymm3, ymm2
	LONG $0xd3efedc5                           // vpxor    ymm2, ymm2, ymm3
	LONG $0xcceff5c5                           // vpxor    ymm1, ymm1, ymm4
	LONG $0x0075e2c4; BYTE $0xc8               // vpshufb    ymm1, ymm1, ymm0
	LONG $0x00fde3c4; WORD $0xe8c9             // vpermq    ymm1, ymm1, 232
	LONG $0x006de2c4; BYTE $0xd0               // vpshufb    ymm2, ymm2, ymm0
	LONG $0x00fde3c4; WORD $0xe8d2             // vpermq    ymm2, ymm2, 232
	LONG $0x547ffac5; WORD $0x3079             // vmovdqu    oword [rcx + 2*rdi + 48], xmm2
	LONG $0x4c7ffac5; WORD $0x2079             // vmovdqu    oword [rcx + 2*rdi + 32], xmm1
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c08348                           // add    rax, 2
	JNE  LBB3_487
	JMP  LBB3_754

LBB3_488:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	LONG $0xe0468d48         // lea    rax, [rsi - 32]
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x05efc148         // shr    rdi, 5
	LONG $0x01c78348         // add    rdi, 1
	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
	LONG $0x03e08341         // and    r8d, 3
	LONG $0x60f88348         // cmp    rax, 96
	JAE  LBB3_602
	WORD $0xc031             // xor    eax, eax
	JMP  LBB3_604

LBB3_490:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	LONG $0xf0468d48         // lea    rax, [rsi - 16]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x04e8c149         // shr    r8, 4
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_761
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi
	QUAD $0x000000a0856ffdc5 // vmovdqa    ymm0, yword 160[rbp] /* [rip + .LCPI3_10] */

LBB3_492:
	LONG $0x237de2c4; WORD $0x7a0c             // vpmovsxwd    ymm1, oword [rdx + 2*rdi]
	LONG $0x237de2c4; WORD $0x7a54; BYTE $0x10 // vpmovsxwd    ymm2, oword [rdx + 2*rdi + 16]
	LONG $0xe272e5c5; BYTE $0x0f               // vpsrad    ymm3, ymm2, 15
	LONG $0xe172ddc5; BYTE $0x0f               // vpsrad    ymm4, ymm1, 15
	LONG $0xc9feddc5                           // vpaddd    ymm1, ymm4, ymm1
	LONG $0xd2fee5c5                           // vpaddd    ymm2, ymm3, ymm2
	LONG $0xd3efedc5                           // vpxor    ymm2, ymm2, ymm3
	LONG $0xcceff5c5                           // vpxor    ymm1, ymm1, ymm4
	LONG $0x0075e2c4; BYTE $0xc8               // vpshufb    ymm1, ymm1, ymm0
	LONG $0x00fde3c4; WORD $0xe8c9             // vpermq    ymm1, ymm1, 232
	LONG $0x006de2c4; BYTE $0xd0               // vpshufb    ymm2, ymm2, ymm0
	LONG $0x00fde3c4; WORD $0xe8d2             // vpermq    ymm2, ymm2, 232
	LONG $0x547ffac5; WORD $0x1079             // vmovdqu    oword [rcx + 2*rdi + 16], xmm2
	LONG $0x0c7ffac5; BYTE $0x79               // vmovdqu    oword [rcx + 2*rdi], xmm1
	LONG $0x237de2c4; WORD $0x7a4c; BYTE $0x20 // vpmovsxwd    ymm1, oword [rdx + 2*rdi + 32]
	LONG $0x237de2c4; WORD $0x7a54; BYTE $0x30 // vpmovsxwd    ymm2, oword [rdx + 2*rdi + 48]
	LONG $0xe272e5c5; BYTE $0x0f               // vpsrad    ymm3, ymm2, 15
	LONG $0xe172ddc5; BYTE $0x0f               // vpsrad    ymm4, ymm1, 15
	LONG $0xc9feddc5                           // vpaddd    ymm1, ymm4, ymm1
	LONG $0xd2fee5c5                           // vpaddd    ymm2, ymm3, ymm2
	LONG $0xd3efedc5                           // vpxor    ymm2, ymm2, ymm3
	LONG $0xcceff5c5                           // vpxor    ymm1, ymm1, ymm4
	LONG $0x0075e2c4; BYTE $0xc8               // vpshufb    ymm1, ymm1, ymm0
	LONG $0x00fde3c4; WORD $0xe8c9             // vpermq    ymm1, ymm1, 232
	LONG $0x006de2c4; BYTE $0xd0               // vpshufb    ymm2, ymm2, ymm0
	LONG $0x00fde3c4; WORD $0xe8d2             // vpermq    ymm2, ymm2, 232
	LONG $0x547ffac5; WORD $0x3079             // vmovdqu    oword [rcx + 2*rdi + 48], xmm2
	LONG $0x4c7ffac5; WORD $0x2079             // vmovdqu    oword [rcx + 2*rdi + 32], xmm1
	LONG $0x20c78348                           // add    rdi, 32
	LONG $0x02c08348                           // add    rax, 2
	JNE  LBB3_492
	JMP  LBB3_762

LBB3_493:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	LONG $0xf0468d48         // lea    rax, [rsi - 16]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x04e8c149         // shr    r8, 4
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_769
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0

LBB3_495:
	LONG $0x0cfbfdc5; BYTE $0xfa         // vpsubq    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x54fbfdc5; WORD $0x20fa       // vpsubq    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5cfbfdc5; WORD $0x40fa       // vpsubq    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x64fbfdc5; WORD $0x60fa       // vpsubq    ymm4, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x0c7ffec5; BYTE $0xf9         // vmovdqu    yword [rcx + 8*rdi], ymm1
	LONG $0x547ffec5; WORD $0x20f9       // vmovdqu    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x40f9       // vmovdqu    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x647ffec5; WORD $0x60f9       // vmovdqu    yword [rcx + 8*rdi + 96], ymm4
	QUAD $0x000080fa8cfbfdc5; BYTE $0x00 // vpsubq    ymm1, ymm0, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa94fbfdc5; BYTE $0x00 // vpsubq    ymm2, ymm0, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9cfbfdc5; BYTE $0x00 // vpsubq    ymm3, ymm0, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faa4fbfdc5; BYTE $0x00 // vpsubq    ymm4, ymm0, yword [rdx + 8*rdi + 224]
	QUAD $0x000080f98c7ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 8*rdi + 128], ymm1
	QUAD $0x0000a0f9947ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 8*rdi + 160], ymm2
	QUAD $0x0000c0f99c7ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 8*rdi + 192], ymm3
	QUAD $0x0000e0f9a47ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 8*rdi + 224], ymm4
	LONG $0x20c78348                     // add    rdi, 32
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_495
	JMP  LBB3_770

LBB3_496:
	WORD $0x8944; BYTE $0xce       // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0       // and    esi, -32
	LONG $0xe0468d48               // lea    rax, [rsi - 32]
	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
	LONG $0x05e8c149               // shr    r8, 5
	LONG $0x01c08349               // add    r8, 1
	WORD $0x8548; BYTE $0xc0       // test    rax, rax
	JE   LBB3_777
	WORD $0x894c; BYTE $0xc0       // mov    rax, r8
	LONG $0xfee08348               // and    rax, -2
	WORD $0xf748; BYTE $0xd8       // neg    rax
	WORD $0xff31                   // xor    edi, edi
	LONG $0x187de2c4; WORD $0x4445 // vbroadcastss    ymm0, dword 68[rbp] /* [rip + .LCPI3_7] */

LBB3_498:
	LONG $0x0c57fdc5; BYTE $0xba         // vxorpd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x5457fdc5; WORD $0x20ba       // vxorpd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5c57fdc5; WORD $0x40ba       // vxorpd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x6457fdc5; WORD $0x60ba       // vxorpd    ymm4, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xb9         // vmovupd    yword [rcx + 4*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20b9       // vmovupd    yword [rcx + 4*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40b9       // vmovupd    yword [rcx + 4*rdi + 64], ymm3
	LONG $0x6411fdc5; WORD $0x60b9       // vmovupd    yword [rcx + 4*rdi + 96], ymm4
	QUAD $0x000080ba8c57fdc5; BYTE $0x00 // vxorpd    ymm1, ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba9457fdc5; BYTE $0x00 // vxorpd    ymm2, ymm0, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9c57fdc5; BYTE $0x00 // vxorpd    ymm3, ymm0, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baa457fdc5; BYTE $0x00 // vxorpd    ymm4, ymm0, yword [rdx + 4*rdi + 224]
	QUAD $0x000080b98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 128], ymm1
	QUAD $0x0000a0b99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 160], ymm2
	QUAD $0x0000c0b99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 192], ymm3
	QUAD $0x0000e0b9a411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 224], ymm4
	LONG $0x40c78348                     // add    rdi, 64
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_498
	JMP  LBB3_778

LBB3_499:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	LONG $0xf0468d48         // lea    rax, [rsi - 16]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x04e8c149         // shr    r8, 4
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_787
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0

LBB3_501:
	LONG $0x0cfbfdc5; BYTE $0xfa         // vpsubq    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x54fbfdc5; WORD $0x20fa       // vpsubq    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5cfbfdc5; WORD $0x40fa       // vpsubq    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x64fbfdc5; WORD $0x60fa       // vpsubq    ymm4, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x0c7ffec5; BYTE $0xf9         // vmovdqu    yword [rcx + 8*rdi], ymm1
	LONG $0x547ffec5; WORD $0x20f9       // vmovdqu    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x40f9       // vmovdqu    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x647ffec5; WORD $0x60f9       // vmovdqu    yword [rcx + 8*rdi + 96], ymm4
	QUAD $0x000080fa8cfbfdc5; BYTE $0x00 // vpsubq    ymm1, ymm0, yword [rdx + 8*rdi + 128]
	QUAD $0x0000a0fa94fbfdc5; BYTE $0x00 // vpsubq    ymm2, ymm0, yword [rdx + 8*rdi + 160]
	QUAD $0x0000c0fa9cfbfdc5; BYTE $0x00 // vpsubq    ymm3, ymm0, yword [rdx + 8*rdi + 192]
	QUAD $0x0000e0faa4fbfdc5; BYTE $0x00 // vpsubq    ymm4, ymm0, yword [rdx + 8*rdi + 224]
	QUAD $0x000080f98c7ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 8*rdi + 128], ymm1
	QUAD $0x0000a0f9947ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 8*rdi + 160], ymm2
	QUAD $0x0000c0f99c7ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 8*rdi + 192], ymm3
	QUAD $0x0000e0f9a47ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 8*rdi + 224], ymm4
	LONG $0x20c78348                     // add    rdi, 32
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_501
	JMP  LBB3_788

LBB3_502:
	WORD $0x8944; BYTE $0xce       // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0       // and    esi, -32
	LONG $0xe0468d48               // lea    rax, [rsi - 32]
	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
	LONG $0x05e8c149               // shr    r8, 5
	LONG $0x01c08349               // add    r8, 1
	WORD $0x8548; BYTE $0xc0       // test    rax, rax
	JE   LBB3_795
	WORD $0x894c; BYTE $0xc0       // mov    rax, r8
	LONG $0xfee08348               // and    rax, -2
	WORD $0xf748; BYTE $0xd8       // neg    rax
	WORD $0xff31                   // xor    edi, edi
	LONG $0x187de2c4; WORD $0x4445 // vbroadcastss    ymm0, dword 68[rbp] /* [rip + .LCPI3_7] */

LBB3_504:
	LONG $0x0c57fdc5; BYTE $0xba         // vxorpd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x5457fdc5; WORD $0x20ba       // vxorpd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5c57fdc5; WORD $0x40ba       // vxorpd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x6457fdc5; WORD $0x60ba       // vxorpd    ymm4, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xb9         // vmovupd    yword [rcx + 4*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20b9       // vmovupd    yword [rcx + 4*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40b9       // vmovupd    yword [rcx + 4*rdi + 64], ymm3
	LONG $0x6411fdc5; WORD $0x60b9       // vmovupd    yword [rcx + 4*rdi + 96], ymm4
	QUAD $0x000080ba8c57fdc5; BYTE $0x00 // vxorpd    ymm1, ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba9457fdc5; BYTE $0x00 // vxorpd    ymm2, ymm0, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9c57fdc5; BYTE $0x00 // vxorpd    ymm3, ymm0, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baa457fdc5; BYTE $0x00 // vxorpd    ymm4, ymm0, yword [rdx + 4*rdi + 224]
	QUAD $0x000080b98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 128], ymm1
	QUAD $0x0000a0b99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 160], ymm2
	QUAD $0x0000c0b99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 192], ymm3
	QUAD $0x0000e0b9a411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 224], ymm4
	LONG $0x40c78348                     // add    rdi, 64
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_504
	JMP  LBB3_796

LBB3_505:
	WORD $0x8944; BYTE $0xde       // mov    esi, r11d
	WORD $0xe683; BYTE $0xf0       // and    esi, -16
	WORD $0xff31                   // xor    edi, edi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5               // vpcmpeqd    ymm1, ymm1, ymm1
	LONG $0x597de2c4; WORD $0x1055 // vpbroadcastq    ymm2, qword 16[rbp] /* [rip + .LCPI3_4] */

LBB3_506:
	LONG $0x1c6ffec5; BYTE $0xfa   // vmovdqu    ymm3, yword [rdx + 8*rdi]
	LONG $0x646ffec5; WORD $0x20fa // vmovdqu    ymm4, yword [rdx + 8*rdi + 32]
	LONG $0x6c6ffec5; WORD $0x40fa // vmovdqu    ymm5, yword [rdx + 8*rdi + 64]
	LONG $0x746ffec5; WORD $0x60fa // vmovdqu    ymm6, yword [rdx + 8*rdi + 96]
	LONG $0x2965e2c4; BYTE $0xf8   // vpcmpeqq    ymm7, ymm3, ymm0
	LONG $0xf9efc5c5               // vpxor    ymm7, ymm7, ymm1
	LONG $0x295d62c4; BYTE $0xc0   // vpcmpeqq    ymm8, ymm4, ymm0
	LONG $0xc1ef3dc5               // vpxor    ymm8, ymm8, ymm1
	LONG $0x295562c4; BYTE $0xc8   // vpcmpeqq    ymm9, ymm5, ymm0
	LONG $0xc9ef35c5               // vpxor    ymm9, ymm9, ymm1
	LONG $0x294d62c4; BYTE $0xd0   // vpcmpeqq    ymm10, ymm6, ymm0
	LONG $0xd1ef2dc5               // vpxor    ymm10, ymm10, ymm1
	LONG $0x376de2c4; BYTE $0xdb   // vpcmpgtq    ymm3, ymm2, ymm3
	LONG $0x376de2c4; BYTE $0xe4   // vpcmpgtq    ymm4, ymm2, ymm4
	LONG $0x376de2c4; BYTE $0xed   // vpcmpgtq    ymm5, ymm2, ymm5
	LONG $0x376de2c4; BYTE $0xf6   // vpcmpgtq    ymm6, ymm2, ymm6
	LONG $0x4b6de3c4; WORD $0x30df // vblendvpd    ymm3, ymm2, ymm7, ymm3
	LONG $0x4b6dc3c4; WORD $0x40e0 // vblendvpd    ymm4, ymm2, ymm8, ymm4
	LONG $0x4b6dc3c4; WORD $0x50e9 // vblendvpd    ymm5, ymm2, ymm9, ymm5
	LONG $0x4b6dc3c4; WORD $0x60f2 // vblendvpd    ymm6, ymm2, ymm10, ymm6
	LONG $0x1c11fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm3
	LONG $0x6411fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm4
	LONG $0x6c11fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm5
	LONG $0x7411fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm6
	LONG $0x10c78348               // add    rdi, 16
	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
	JNE  LBB3_506
	WORD $0x394c; BYTE $0xde       // cmp    rsi, r11
	JE   LBB3_865
	JMP  LBB3_508

LBB3_513:
	WORD $0xc689                   // mov    esi, eax
	WORD $0xe683; BYTE $0xe0       // and    esi, -32
	WORD $0xff31                   // xor    edi, edi
	LONG $0xc057f8c5               // vxorps    xmm0, xmm0, xmm0
	LONG $0x587de2c4; WORD $0x404d // vpbroadcastd    ymm1, dword 64[rbp] /* [rip + .LCPI3_3] */

LBB3_514:
	LONG $0x146ffec5; BYTE $0xba   // vmovdqu    ymm2, yword [rdx + 4*rdi]
	LONG $0x5c6ffec5; WORD $0x20ba // vmovdqu    ymm3, yword [rdx + 4*rdi + 32]
	LONG $0x646ffec5; WORD $0x40ba // vmovdqu    ymm4, yword [rdx + 4*rdi + 64]
	LONG $0x6c6ffec5; WORD $0x60ba // vmovdqu    ymm5, yword [rdx + 4*rdi + 96]
	LONG $0xe272cdc5; BYTE $0x1f   // vpsrad    ymm6, ymm2, 31
	LONG $0xf1ebcdc5               // vpor    ymm6, ymm6, ymm1
	LONG $0xe372c5c5; BYTE $0x1f   // vpsrad    ymm7, ymm3, 31
	LONG $0xf9ebc5c5               // vpor    ymm7, ymm7, ymm1
	LONG $0xe472bdc5; BYTE $0x1f   // vpsrad    ymm8, ymm4, 31
	LONG $0xc1eb3dc5               // vpor    ymm8, ymm8, ymm1
	LONG $0xe572b5c5; BYTE $0x1f   // vpsrad    ymm9, ymm5, 31
	LONG $0xc9eb35c5               // vpor    ymm9, ymm9, ymm1
	LONG $0xf65bfcc5               // vcvtdq2ps    ymm6, ymm6
	LONG $0xff5bfcc5               // vcvtdq2ps    ymm7, ymm7
	LONG $0x5b7c41c4; BYTE $0xc0   // vcvtdq2ps    ymm8, ymm8
	LONG $0x5b7c41c4; BYTE $0xc9   // vcvtdq2ps    ymm9, ymm9
	LONG $0xd0c2ecc5; BYTE $0x04   // vcmpneqps    ymm2, ymm2, ymm0
	LONG $0xd654ecc5               // vandps    ymm2, ymm2, ymm6
	LONG $0xd8c2e4c5; BYTE $0x04   // vcmpneqps    ymm3, ymm3, ymm0
	LONG $0xdf54e4c5               // vandps    ymm3, ymm3, ymm7
	LONG $0xe0c2dcc5; BYTE $0x04   // vcmpneqps    ymm4, ymm4, ymm0
	LONG $0xe454bcc5               // vandps    ymm4, ymm8, ymm4
	LONG $0xe8c2d4c5; BYTE $0x04   // vcmpneqps    ymm5, ymm5, ymm0
	LONG $0xed54b4c5               // vandps    ymm5, ymm9, ymm5
	LONG $0x1411fcc5; BYTE $0xb9   // vmovups    yword [rcx + 4*rdi], ymm2
	LONG $0x5c11fcc5; WORD $0x20b9 // vmovups    yword [rcx + 4*rdi + 32], ymm3
	LONG $0x6411fcc5; WORD $0x40b9 // vmovups    yword [rcx + 4*rdi + 64], ymm4
	LONG $0x6c11fcc5; WORD $0x60b9 // vmovups    yword [rcx + 4*rdi + 96], ymm5
	LONG $0x20c78348               // add    rdi, 32
	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
	JNE  LBB3_514
	WORD $0x3948; BYTE $0xc6       // cmp    rsi, rax
	JE   LBB3_865
	JMP  LBB3_516

LBB3_521:
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0

LBB3_522:
	LONG $0x0c6ffec5; BYTE $0xfa   // vmovdqu    ymm1, yword [rdx + 8*rdi]
	LONG $0x546ffec5; WORD $0x20fa // vmovdqu    ymm2, yword [rdx + 8*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40fa // vmovdqu    ymm3, yword [rdx + 8*rdi + 64]
	LONG $0xe1fbfdc5               // vpsubq    ymm4, ymm0, ymm1
	LONG $0x4b75e3c4; WORD $0x10cc // vblendvpd    ymm1, ymm1, ymm4, ymm1
	LONG $0x646ffec5; WORD $0x60fa // vmovdqu    ymm4, yword [rdx + 8*rdi + 96]
	LONG $0xeafbfdc5               // vpsubq    ymm5, ymm0, ymm2
	LONG $0x4b6de3c4; WORD $0x20d5 // vblendvpd    ymm2, ymm2, ymm5, ymm2
	LONG $0xebfbfdc5               // vpsubq    ymm5, ymm0, ymm3
	LONG $0x4b65e3c4; WORD $0x30dd // vblendvpd    ymm3, ymm3, ymm5, ymm3
	LONG $0xecfbfdc5               // vpsubq    ymm5, ymm0, ymm4
	LONG $0x4b5de3c4; WORD $0x40e5 // vblendvpd    ymm4, ymm4, ymm5, ymm4
	LONG $0x0c11fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x6411fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm4
	LONG $0x10c78348               // add    rdi, 16
	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
	JNE  LBB3_522
	WORD $0x394c; BYTE $0xd6       // cmp    rsi, r10
	JE   LBB3_865
	JMP  LBB3_524

LBB3_528:
	WORD $0x8944; BYTE $0xce       // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0       // and    esi, -32
	LONG $0xe0468d48               // lea    rax, [rsi - 32]
	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
	LONG $0x05e8c149               // shr    r8, 5
	LONG $0x01c08349               // add    r8, 1
	WORD $0x8548; BYTE $0xc0       // test    rax, rax
	JE   LBB3_805
	WORD $0x894c; BYTE $0xc0       // mov    rax, r8
	LONG $0xfee08348               // and    rax, -2
	WORD $0xf748; BYTE $0xd8       // neg    rax
	WORD $0xff31                   // xor    edi, edi
	LONG $0x187de2c4; WORD $0x4845 // vbroadcastss    ymm0, dword 72[rbp] /* [rip + .LCPI3_9] */

LBB3_530:
	LONG $0x0c54fdc5; BYTE $0xba         // vandpd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x5454fdc5; WORD $0x20ba       // vandpd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5c54fdc5; WORD $0x40ba       // vandpd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x6454fdc5; WORD $0x60ba       // vandpd    ymm4, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xb9         // vmovupd    yword [rcx + 4*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20b9       // vmovupd    yword [rcx + 4*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40b9       // vmovupd    yword [rcx + 4*rdi + 64], ymm3
	LONG $0x6411fdc5; WORD $0x60b9       // vmovupd    yword [rcx + 4*rdi + 96], ymm4
	QUAD $0x000080ba8c54fdc5; BYTE $0x00 // vandpd    ymm1, ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba9454fdc5; BYTE $0x00 // vandpd    ymm2, ymm0, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9c54fdc5; BYTE $0x00 // vandpd    ymm3, ymm0, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baa454fdc5; BYTE $0x00 // vandpd    ymm4, ymm0, yword [rdx + 4*rdi + 224]
	QUAD $0x000080b98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 128], ymm1
	QUAD $0x0000a0b99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 160], ymm2
	QUAD $0x0000c0b99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 192], ymm3
	QUAD $0x0000e0b9a411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 224], ymm4
	LONG $0x40c78348                     // add    rdi, 64
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_530
	JMP  LBB3_806

LBB3_531:
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xf0 // and    esi, -16
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0

LBB3_532:
	LONG $0x0c6ffec5; BYTE $0xfa   // vmovdqu    ymm1, yword [rdx + 8*rdi]
	LONG $0x546ffec5; WORD $0x20fa // vmovdqu    ymm2, yword [rdx + 8*rdi + 32]
	LONG $0x5c6ffec5; WORD $0x40fa // vmovdqu    ymm3, yword [rdx + 8*rdi + 64]
	LONG $0xe1fbfdc5               // vpsubq    ymm4, ymm0, ymm1
	LONG $0x4b75e3c4; WORD $0x10cc // vblendvpd    ymm1, ymm1, ymm4, ymm1
	LONG $0x646ffec5; WORD $0x60fa // vmovdqu    ymm4, yword [rdx + 8*rdi + 96]
	LONG $0xeafbfdc5               // vpsubq    ymm5, ymm0, ymm2
	LONG $0x4b6de3c4; WORD $0x20d5 // vblendvpd    ymm2, ymm2, ymm5, ymm2
	LONG $0xebfbfdc5               // vpsubq    ymm5, ymm0, ymm3
	LONG $0x4b65e3c4; WORD $0x30dd // vblendvpd    ymm3, ymm3, ymm5, ymm3
	LONG $0xecfbfdc5               // vpsubq    ymm5, ymm0, ymm4
	LONG $0x4b5de3c4; WORD $0x40e5 // vblendvpd    ymm4, ymm4, ymm5, ymm4
	LONG $0x0c11fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x6411fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm4
	LONG $0x10c78348               // add    rdi, 16
	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
	JNE  LBB3_532
	WORD $0x394c; BYTE $0xd6       // cmp    rsi, r10
	JE   LBB3_865
	JMP  LBB3_534

LBB3_538:
	WORD $0x8944; BYTE $0xce       // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0       // and    esi, -32
	LONG $0xe0468d48               // lea    rax, [rsi - 32]
	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
	LONG $0x05e8c149               // shr    r8, 5
	LONG $0x01c08349               // add    r8, 1
	WORD $0x8548; BYTE $0xc0       // test    rax, rax
	JE   LBB3_815
	WORD $0x894c; BYTE $0xc0       // mov    rax, r8
	LONG $0xfee08348               // and    rax, -2
	WORD $0xf748; BYTE $0xd8       // neg    rax
	WORD $0xff31                   // xor    edi, edi
	LONG $0x187de2c4; WORD $0x4845 // vbroadcastss    ymm0, dword 72[rbp] /* [rip + .LCPI3_9] */

LBB3_540:
	LONG $0x0c54fdc5; BYTE $0xba         // vandpd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x5454fdc5; WORD $0x20ba       // vandpd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5c54fdc5; WORD $0x40ba       // vandpd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x6454fdc5; WORD $0x60ba       // vandpd    ymm4, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xb9         // vmovupd    yword [rcx + 4*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20b9       // vmovupd    yword [rcx + 4*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40b9       // vmovupd    yword [rcx + 4*rdi + 64], ymm3
	LONG $0x6411fdc5; WORD $0x60b9       // vmovupd    yword [rcx + 4*rdi + 96], ymm4
	QUAD $0x000080ba8c54fdc5; BYTE $0x00 // vandpd    ymm1, ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba9454fdc5; BYTE $0x00 // vandpd    ymm2, ymm0, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9c54fdc5; BYTE $0x00 // vandpd    ymm3, ymm0, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baa454fdc5; BYTE $0x00 // vandpd    ymm4, ymm0, yword [rdx + 4*rdi + 224]
	QUAD $0x000080b98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 128], ymm1
	QUAD $0x0000a0b99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 160], ymm2
	QUAD $0x0000c0b99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 192], ymm3
	QUAD $0x0000e0b9a411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 224], ymm4
	LONG $0x40c78348                     // add    rdi, 64
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_540
	JMP  LBB3_816

LBB3_548:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0x80 // and    esi, -128
	LONG $0x80468d48         // lea    rax, [rsi - 128]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x07e8c149         // shr    r8, 7
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_825
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0

LBB3_550:
	LONG $0x0cf8fdc5; BYTE $0x3a               // vpsubb    ymm1, ymm0, yword [rdx + rdi]
	LONG $0x54f8fdc5; WORD $0x203a             // vpsubb    ymm2, ymm0, yword [rdx + rdi + 32]
	LONG $0x5cf8fdc5; WORD $0x403a             // vpsubb    ymm3, ymm0, yword [rdx + rdi + 64]
	LONG $0x64f8fdc5; WORD $0x603a             // vpsubb    ymm4, ymm0, yword [rdx + rdi + 96]
	LONG $0x0c7ffec5; BYTE $0x39               // vmovdqu    yword [rcx + rdi], ymm1
	LONG $0x547ffec5; WORD $0x2039             // vmovdqu    yword [rcx + rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x4039             // vmovdqu    yword [rcx + rdi + 64], ymm3
	LONG $0x647ffec5; WORD $0x6039             // vmovdqu    yword [rcx + rdi + 96], ymm4
	QUAD $0x0000803a8cf8fdc5; BYTE $0x00       // vpsubb    ymm1, ymm0, yword [rdx + rdi + 128]
	QUAD $0x0000a03a94f8fdc5; BYTE $0x00       // vpsubb    ymm2, ymm0, yword [rdx + rdi + 160]
	QUAD $0x0000c03a9cf8fdc5; BYTE $0x00       // vpsubb    ymm3, ymm0, yword [rdx + rdi + 192]
	QUAD $0x0000e03aa4f8fdc5; BYTE $0x00       // vpsubb    ymm4, ymm0, yword [rdx + rdi + 224]
	QUAD $0x000080398c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + rdi + 128], ymm1
	QUAD $0x0000a039947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + rdi + 160], ymm2
	QUAD $0x0000c0399c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + rdi + 192], ymm3
	QUAD $0x0000e039a47ffec5; BYTE $0x00       // vmovdqu    yword [rcx + rdi + 224], ymm4
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c08348                           // add    rax, 2
	JNE  LBB3_550
	JMP  LBB3_826

LBB3_551:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0x80 // and    esi, -128
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	QUAD $0x000000808d6ffdc5 // vmovdqa    ymm1, yword 128[rbp] /* [rip + .LCPI3_6] */

LBB3_552:
	LONG $0x1474fdc5; BYTE $0x3a   // vpcmpeqb    ymm2, ymm0, yword [rdx + rdi]
	LONG $0xd1dfedc5               // vpandn    ymm2, ymm2, ymm1
	LONG $0x5c74fdc5; WORD $0x203a // vpcmpeqb    ymm3, ymm0, yword [rdx + rdi + 32]
	LONG $0xd9dfe5c5               // vpandn    ymm3, ymm3, ymm1
	LONG $0x6474fdc5; WORD $0x403a // vpcmpeqb    ymm4, ymm0, yword [rdx + rdi + 64]
	LONG $0x6c74fdc5; WORD $0x603a // vpcmpeqb    ymm5, ymm0, yword [rdx + rdi + 96]
	LONG $0xe1dfddc5               // vpandn    ymm4, ymm4, ymm1
	LONG $0xe9dfd5c5               // vpandn    ymm5, ymm5, ymm1
	LONG $0x147ffec5; BYTE $0x39   // vmovdqu    yword [rcx + rdi], ymm2
	LONG $0x5c7ffec5; WORD $0x2039 // vmovdqu    yword [rcx + rdi + 32], ymm3
	LONG $0x647ffec5; WORD $0x4039 // vmovdqu    yword [rcx + rdi + 64], ymm4
	LONG $0x6c7ffec5; WORD $0x6039 // vmovdqu    yword [rcx + rdi + 96], ymm5
	LONG $0x80ef8348               // sub    rdi, -128
	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
	JNE  LBB3_552
	WORD $0x394c; BYTE $0xce       // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_554

LBB3_558:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0x80 // and    esi, -128
	LONG $0x80468d48         // lea    rax, [rsi - 128]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x07e8c149         // shr    r8, 7
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_833
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi

LBB3_560:
	LONG $0x0410fcc5; BYTE $0x3a               // vmovups    ymm0, yword [rdx + rdi]
	LONG $0x4c10fcc5; WORD $0x203a             // vmovups    ymm1, yword [rdx + rdi + 32]
	LONG $0x5410fcc5; WORD $0x403a             // vmovups    ymm2, yword [rdx + rdi + 64]
	LONG $0x5c10fcc5; WORD $0x603a             // vmovups    ymm3, yword [rdx + rdi + 96]
	LONG $0x0411fcc5; BYTE $0x39               // vmovups    yword [rcx + rdi], ymm0
	LONG $0x4c11fcc5; WORD $0x2039             // vmovups    yword [rcx + rdi + 32], ymm1
	LONG $0x5411fcc5; WORD $0x4039             // vmovups    yword [rcx + rdi + 64], ymm2
	LONG $0x5c11fcc5; WORD $0x6039             // vmovups    yword [rcx + rdi + 96], ymm3
	QUAD $0x0000803a8410fdc5; BYTE $0x00       // vmovupd    ymm0, yword [rdx + rdi + 128]
	QUAD $0x0000a03a8c10fdc5; BYTE $0x00       // vmovupd    ymm1, yword [rdx + rdi + 160]
	QUAD $0x0000c03a9410fdc5; BYTE $0x00       // vmovupd    ymm2, yword [rdx + rdi + 192]
	QUAD $0x0000e03a9c10fdc5; BYTE $0x00       // vmovupd    ymm3, yword [rdx + rdi + 224]
	QUAD $0x000080398411fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 128], ymm0
	QUAD $0x0000a0398c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 160], ymm1
	QUAD $0x0000c0399411fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 192], ymm2
	QUAD $0x0000e0399c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 224], ymm3
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c08348                           // add    rax, 2
	JNE  LBB3_560
	JMP  LBB3_834

LBB3_561:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0x80 // and    esi, -128
	LONG $0x80468d48         // lea    rax, [rsi - 128]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x07e8c149         // shr    r8, 7
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_841
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi

LBB3_563:
	LONG $0x0410fcc5; BYTE $0x3a               // vmovups    ymm0, yword [rdx + rdi]
	LONG $0x4c10fcc5; WORD $0x203a             // vmovups    ymm1, yword [rdx + rdi + 32]
	LONG $0x5410fcc5; WORD $0x403a             // vmovups    ymm2, yword [rdx + rdi + 64]
	LONG $0x5c10fcc5; WORD $0x603a             // vmovups    ymm3, yword [rdx + rdi + 96]
	LONG $0x0411fcc5; BYTE $0x39               // vmovups    yword [rcx + rdi], ymm0
	LONG $0x4c11fcc5; WORD $0x2039             // vmovups    yword [rcx + rdi + 32], ymm1
	LONG $0x5411fcc5; WORD $0x4039             // vmovups    yword [rcx + rdi + 64], ymm2
	LONG $0x5c11fcc5; WORD $0x6039             // vmovups    yword [rcx + rdi + 96], ymm3
	QUAD $0x0000803a8410fdc5; BYTE $0x00       // vmovupd    ymm0, yword [rdx + rdi + 128]
	QUAD $0x0000a03a8c10fdc5; BYTE $0x00       // vmovupd    ymm1, yword [rdx + rdi + 160]
	QUAD $0x0000c03a9410fdc5; BYTE $0x00       // vmovupd    ymm2, yword [rdx + rdi + 192]
	QUAD $0x0000e03a9c10fdc5; BYTE $0x00       // vmovupd    ymm3, yword [rdx + rdi + 224]
	QUAD $0x000080398411fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 128], ymm0
	QUAD $0x0000a0398c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 160], ymm1
	QUAD $0x0000c0399411fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 192], ymm2
	QUAD $0x0000e0399c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 224], ymm3
	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
	LONG $0x02c08348                           // add    rax, 2
	JNE  LBB3_563
	JMP  LBB3_842

LBB3_564:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	LONG $0xe0468d48         // lea    rax, [rsi - 32]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x05e8c149         // shr    r8, 5
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_849
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0

LBB3_566:
	LONG $0x0cfafdc5; BYTE $0xba         // vpsubd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x54fafdc5; WORD $0x20ba       // vpsubd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5cfafdc5; WORD $0x40ba       // vpsubd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x64fafdc5; WORD $0x60ba       // vpsubd    ymm4, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x0c7ffec5; BYTE $0xb9         // vmovdqu    yword [rcx + 4*rdi], ymm1
	LONG $0x547ffec5; WORD $0x20b9       // vmovdqu    yword [rcx + 4*rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x40b9       // vmovdqu    yword [rcx + 4*rdi + 64], ymm3
	LONG $0x647ffec5; WORD $0x60b9       // vmovdqu    yword [rcx + 4*rdi + 96], ymm4
	QUAD $0x000080ba8cfafdc5; BYTE $0x00 // vpsubd    ymm1, ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba94fafdc5; BYTE $0x00 // vpsubd    ymm2, ymm0, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9cfafdc5; BYTE $0x00 // vpsubd    ymm3, ymm0, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baa4fafdc5; BYTE $0x00 // vpsubd    ymm4, ymm0, yword [rdx + 4*rdi + 224]
	QUAD $0x000080b98c7ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 4*rdi + 128], ymm1
	QUAD $0x0000a0b9947ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 4*rdi + 160], ymm2
	QUAD $0x0000c0b99c7ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 4*rdi + 192], ymm3
	QUAD $0x0000e0b9a47ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 4*rdi + 224], ymm4
	LONG $0x40c78348                     // add    rdi, 64
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_566
	JMP  LBB3_850

LBB3_567:
	WORD $0x8944; BYTE $0xce // mov    esi, r9d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	LONG $0xe0468d48         // lea    rax, [rsi - 32]
	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
	LONG $0x05e8c149         // shr    r8, 5
	LONG $0x01c08349         // add    r8, 1
	WORD $0x8548; BYTE $0xc0 // test    rax, rax
	JE   LBB3_857
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0xfee08348         // and    rax, -2
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0

LBB3_569:
	LONG $0x0cfafdc5; BYTE $0xba         // vpsubd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x54fafdc5; WORD $0x20ba       // vpsubd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5cfafdc5; WORD $0x40ba       // vpsubd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x64fafdc5; WORD $0x60ba       // vpsubd    ymm4, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x0c7ffec5; BYTE $0xb9         // vmovdqu    yword [rcx + 4*rdi], ymm1
	LONG $0x547ffec5; WORD $0x20b9       // vmovdqu    yword [rcx + 4*rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x40b9       // vmovdqu    yword [rcx + 4*rdi + 64], ymm3
	LONG $0x647ffec5; WORD $0x60b9       // vmovdqu    yword [rcx + 4*rdi + 96], ymm4
	QUAD $0x000080ba8cfafdc5; BYTE $0x00 // vpsubd    ymm1, ymm0, yword [rdx + 4*rdi + 128]
	QUAD $0x0000a0ba94fafdc5; BYTE $0x00 // vpsubd    ymm2, ymm0, yword [rdx + 4*rdi + 160]
	QUAD $0x0000c0ba9cfafdc5; BYTE $0x00 // vpsubd    ymm3, ymm0, yword [rdx + 4*rdi + 192]
	QUAD $0x0000e0baa4fafdc5; BYTE $0x00 // vpsubd    ymm4, ymm0, yword [rdx + 4*rdi + 224]
	QUAD $0x000080b98c7ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 4*rdi + 128], ymm1
	QUAD $0x0000a0b9947ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 4*rdi + 160], ymm2
	QUAD $0x0000c0b99c7ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 4*rdi + 192], ymm3
	QUAD $0x0000e0b9a47ffec5; BYTE $0x00 // vmovdqu    yword [rcx + 4*rdi + 224], ymm4
	LONG $0x40c78348                     // add    rdi, 64
	LONG $0x02c08348                     // add    rax, 2
	JNE  LBB3_569
	JMP  LBB3_858

LBB3_570:
	WORD $0x8944; BYTE $0xde       // mov    esi, r11d
	WORD $0xe683; BYTE $0xe0       // and    esi, -32
	WORD $0xff31                   // xor    edi, edi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5               // vpcmpeqd    ymm1, ymm1, ymm1
	LONG $0x587de2c4; WORD $0x4055 // vpbroadcastd    ymm2, dword 64[rbp] /* [rip + .LCPI3_3] */

LBB3_571:
	LONG $0x1c6ffec5; BYTE $0xba   // vmovdqu    ymm3, yword [rdx + 4*rdi]
	LONG $0x646ffec5; WORD $0x20ba // vmovdqu    ymm4, yword [rdx + 4*rdi + 32]
	LONG $0x6c6ffec5; WORD $0x40ba // vmovdqu    ymm5, yword [rdx + 4*rdi + 64]
	LONG $0x746ffec5; WORD $0x60ba // vmovdqu    ymm6, yword [rdx + 4*rdi + 96]
	LONG $0xf876e5c5               // vpcmpeqd    ymm7, ymm3, ymm0
	LONG $0xf9efc5c5               // vpxor    ymm7, ymm7, ymm1
	LONG $0xc0765dc5               // vpcmpeqd    ymm8, ymm4, ymm0
	LONG $0xc1ef3dc5               // vpxor    ymm8, ymm8, ymm1
	LONG $0xc87655c5               // vpcmpeqd    ymm9, ymm5, ymm0
	LONG $0xc9ef35c5               // vpxor    ymm9, ymm9, ymm1
	LONG $0xd0764dc5               // vpcmpeqd    ymm10, ymm6, ymm0
	LONG $0xd1ef2dc5               // vpxor    ymm10, ymm10, ymm1
	LONG $0xdb66edc5               // vpcmpgtd    ymm3, ymm2, ymm3
	LONG $0xe466edc5               // vpcmpgtd    ymm4, ymm2, ymm4
	LONG $0xed66edc5               // vpcmpgtd    ymm5, ymm2, ymm5
	LONG $0xf666edc5               // vpcmpgtd    ymm6, ymm2, ymm6
	LONG $0x4a6de3c4; WORD $0x30df // vblendvps    ymm3, ymm2, ymm7, ymm3
	LONG $0x4a6dc3c4; WORD $0x40e0 // vblendvps    ymm4, ymm2, ymm8, ymm4
	LONG $0x4a6dc3c4; WORD $0x50e9 // vblendvps    ymm5, ymm2, ymm9, ymm5
	LONG $0x4a6dc3c4; WORD $0x60f2 // vblendvps    ymm6, ymm2, ymm10, ymm6
	LONG $0x1c11fcc5; BYTE $0xb9   // vmovups    yword [rcx + 4*rdi], ymm3
	LONG $0x6411fcc5; WORD $0x20b9 // vmovups    yword [rcx + 4*rdi + 32], ymm4
	LONG $0x6c11fcc5; WORD $0x40b9 // vmovups    yword [rcx + 4*rdi + 64], ymm5
	LONG $0x7411fcc5; WORD $0x60b9 // vmovups    yword [rcx + 4*rdi + 96], ymm6
	LONG $0x20c78348               // add    rdi, 32
	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
	JNE  LBB3_571
	WORD $0x394c; BYTE $0xde       // cmp    rsi, r11
	JE   LBB3_865
	JMP  LBB3_573

LBB3_578:
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB3_579:
	LONG $0x1e7de2c4; WORD $0xba04             // vpabsd    ymm0, yword [rdx + 4*rdi]
	LONG $0x1e7de2c4; WORD $0xba4c; BYTE $0x20 // vpabsd    ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x1e7de2c4; WORD $0xba54; BYTE $0x40 // vpabsd    ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x1e7de2c4; WORD $0xba5c; BYTE $0x60 // vpabsd    ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB3_579
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB3_865
	JMP  LBB3_581

LBB3_585:
	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
	WORD $0xe683; BYTE $0xe0 // and    esi, -32
	WORD $0xff31             // xor    edi, edi

LBB3_586:
	LONG $0x1e7de2c4; WORD $0xba04             // vpabsd    ymm0, yword [rdx + 4*rdi]
	LONG $0x1e7de2c4; WORD $0xba4c; BYTE $0x20 // vpabsd    ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x1e7de2c4; WORD $0xba54; BYTE $0x40 // vpabsd    ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x1e7de2c4; WORD $0xba5c; BYTE $0x60 // vpabsd    ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB3_586
	WORD $0x394c; BYTE $0xd6                   // cmp    rsi, r10
	JE   LBB3_865
	JMP  LBB3_588

LBB3_367:
	LONG $0xfce78348         // and    rdi, -4
	WORD $0xf748; BYTE $0xdf // neg    rdi
	WORD $0xc031             // xor    eax, eax
	LONG $0xc057f9c5         // vxorpd    xmm0, xmm0, xmm0

LBB3_368:
	LONG $0x0411fdc5; BYTE $0x81         // vmovupd    yword [rcx + 4*rax], ymm0
	LONG $0x4411fdc5; WORD $0x2081       // vmovupd    yword [rcx + 4*rax + 32], ymm0
	LONG $0x4411fdc5; WORD $0x4081       // vmovupd    yword [rcx + 4*rax + 64], ymm0
	LONG $0x4411fdc5; WORD $0x6081       // vmovupd    yword [rcx + 4*rax + 96], ymm0
	QUAD $0x000080818411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rax + 128], ymm0
	QUAD $0x0000a0818411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rax + 160], ymm0
	QUAD $0x0000c0818411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rax + 192], ymm0
	QUAD $0x0000e0818411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rax + 224], ymm0
	QUAD $0x000100818411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rax + 256], ymm0
	QUAD $0x000120818411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rax + 288], ymm0
	QUAD $0x000140818411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rax + 320], ymm0
	QUAD $0x000160818411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rax + 352], ymm0
	QUAD $0x000180818411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rax + 384], ymm0
	QUAD $0x0001a0818411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rax + 416], ymm0
	QUAD $0x0001c0818411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rax + 448], ymm0
	QUAD $0x0001e0818411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rax + 480], ymm0
	LONG $0x80e88348                     // sub    rax, -128
	LONG $0x04c78348                     // add    rdi, 4
	JNE  LBB3_368

LBB3_369:
	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
	JE   LBB3_372
	LONG $0x81048d48         // lea    rax, [rcx + 4*rax]
	LONG $0x60c08348         // add    rax, 96
	WORD $0xf748; BYTE $0xde // neg    rsi
	LONG $0xc057f9c5         // vxorpd    xmm0, xmm0, xmm0

LBB3_371:
	LONG $0x4011fdc5; BYTE $0xa0 // vmovupd    yword [rax - 96], ymm0
	LONG $0x4011fdc5; BYTE $0xc0 // vmovupd    yword [rax - 64], ymm0
	LONG $0x4011fdc5; BYTE $0xe0 // vmovupd    yword [rax - 32], ymm0
	LONG $0x0011fdc5             // vmovupd    yword [rax], ymm0
	LONG $0x80e88348             // sub    rax, -128
	WORD $0xff48; BYTE $0xc6     // inc    rsi
	JNE  LBB3_371

LBB3_372:
	WORD $0x394c; BYTE $0xca // cmp    rdx, r9
	JE   LBB3_865

LBB3_373:
	LONG $0x009104c7; WORD $0x0000; BYTE $0x00 // mov    dword [rcx + 4*rdx], 0
	LONG $0x01c28348                           // add    rdx, 1
	WORD $0x3949; BYTE $0xd1                   // cmp    r9, rdx
	JNE  LBB3_373
	JMP  LBB3_865

LBB3_438:
	LONG $0xfce78348         // and    rdi, -4
	WORD $0xf748; BYTE $0xdf // neg    rdi
	WORD $0xc031             // xor    eax, eax
	LONG $0xc057f9c5         // vxorpd    xmm0, xmm0, xmm0

LBB3_439:
	LONG $0x0411fdc5; BYTE $0xc1         // vmovupd    yword [rcx + 8*rax], ymm0
	LONG $0x4411fdc5; WORD $0x20c1       // vmovupd    yword [rcx + 8*rax + 32], ymm0
	LONG $0x4411fdc5; WORD $0x40c1       // vmovupd    yword [rcx + 8*rax + 64], ymm0
	LONG $0x4411fdc5; WORD $0x60c1       // vmovupd    yword [rcx + 8*rax + 96], ymm0
	QUAD $0x000080c18411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rax + 128], ymm0
	QUAD $0x0000a0c18411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rax + 160], ymm0
	QUAD $0x0000c0c18411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rax + 192], ymm0
	QUAD $0x0000e0c18411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rax + 224], ymm0
	QUAD $0x000100c18411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rax + 256], ymm0
	QUAD $0x000120c18411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rax + 288], ymm0
	QUAD $0x000140c18411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rax + 320], ymm0
	QUAD $0x000160c18411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rax + 352], ymm0
	QUAD $0x000180c18411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rax + 384], ymm0
	QUAD $0x0001a0c18411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rax + 416], ymm0
	QUAD $0x0001c0c18411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rax + 448], ymm0
	QUAD $0x0001e0c18411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rax + 480], ymm0
	LONG $0x40c08348                     // add    rax, 64
	LONG $0x04c78348                     // add    rdi, 4
	JNE  LBB3_439

LBB3_440:
	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
	JE   LBB3_443
	LONG $0xc1048d48         // lea    rax, [rcx + 8*rax]
	LONG $0x60c08348         // add    rax, 96
	WORD $0xf748; BYTE $0xde // neg    rsi
	LONG $0xc057f9c5         // vxorpd    xmm0, xmm0, xmm0

LBB3_442:
	LONG $0x4011fdc5; BYTE $0xa0 // vmovupd    yword [rax - 96], ymm0
	LONG $0x4011fdc5; BYTE $0xc0 // vmovupd    yword [rax - 64], ymm0
	LONG $0x4011fdc5; BYTE $0xe0 // vmovupd    yword [rax - 32], ymm0
	LONG $0x0011fdc5             // vmovupd    yword [rax], ymm0
	LONG $0x80e88348             // sub    rax, -128
	WORD $0xff48; BYTE $0xc6     // inc    rsi
	JNE  LBB3_442

LBB3_443:
	WORD $0x394c; BYTE $0xca // cmp    rdx, r9
	JE   LBB3_865

LBB3_444:
	QUAD $0x00000000d104c748 // mov    qword [rcx + 8*rdx], 0
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3949; BYTE $0xd1 // cmp    r9, rdx
	JNE  LBB3_444
	JMP  LBB3_865

LBB3_461:
	LONG $0xfce78348         // and    rdi, -4
	WORD $0xf748; BYTE $0xdf // neg    rdi
	WORD $0xc031             // xor    eax, eax
	LONG $0xc057f9c5         // vxorpd    xmm0, xmm0, xmm0

LBB3_462:
	LONG $0x0411fdc5; BYTE $0x41         // vmovupd    yword [rcx + 2*rax], ymm0
	LONG $0x4411fdc5; WORD $0x2041       // vmovupd    yword [rcx + 2*rax + 32], ymm0
	LONG $0x4411fdc5; WORD $0x4041       // vmovupd    yword [rcx + 2*rax + 64], ymm0
	LONG $0x4411fdc5; WORD $0x6041       // vmovupd    yword [rcx + 2*rax + 96], ymm0
	QUAD $0x000080418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 128], ymm0
	QUAD $0x0000a0418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 160], ymm0
	QUAD $0x0000c0418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 192], ymm0
	QUAD $0x0000e0418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 224], ymm0
	QUAD $0x000100418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 256], ymm0
	QUAD $0x000120418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 288], ymm0
	QUAD $0x000140418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 320], ymm0
	QUAD $0x000160418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 352], ymm0
	QUAD $0x000180418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 384], ymm0
	QUAD $0x0001a0418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 416], ymm0
	QUAD $0x0001c0418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 448], ymm0
	QUAD $0x0001e0418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 480], ymm0
	LONG $0x01000548; WORD $0x0000       // add    rax, 256
	LONG $0x04c78348                     // add    rdi, 4
	JNE  LBB3_462

LBB3_463:
	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
	JE   LBB3_466
	LONG $0x41048d48         // lea    rax, [rcx + 2*rax]
	LONG $0x60c08348         // add    rax, 96
	WORD $0xf748; BYTE $0xde // neg    rsi
	LONG $0xc057f9c5         // vxorpd    xmm0, xmm0, xmm0

LBB3_465:
	LONG $0x4011fdc5; BYTE $0xa0 // vmovupd    yword [rax - 96], ymm0
	LONG $0x4011fdc5; BYTE $0xc0 // vmovupd    yword [rax - 64], ymm0
	LONG $0x4011fdc5; BYTE $0xe0 // vmovupd    yword [rax - 32], ymm0
	LONG $0x0011fdc5             // vmovupd    yword [rax], ymm0
	LONG $0x80e88348             // sub    rax, -128
	WORD $0xff48; BYTE $0xc6     // inc    rsi
	JNE  LBB3_465

LBB3_466:
	WORD $0x394c; BYTE $0xca // cmp    rdx, r9
	JE   LBB3_865

LBB3_467:
	LONG $0x5104c766; WORD $0x0000 // mov    word [rcx + 2*rdx], 0
	LONG $0x01c28348               // add    rdx, 1
	WORD $0x3949; BYTE $0xd1       // cmp    r9, rdx
	JNE  LBB3_467
	JMP  LBB3_865

LBB3_541:
	LONG $0xfce78348         // and    rdi, -4
	WORD $0xf748; BYTE $0xdf // neg    rdi
	WORD $0xc031             // xor    eax, eax
	LONG $0xc057f9c5         // vxorpd    xmm0, xmm0, xmm0

LBB3_542:
	LONG $0x0411fdc5; BYTE $0x01         // vmovupd    yword [rcx + rax], ymm0
	LONG $0x4411fdc5; WORD $0x2001       // vmovupd    yword [rcx + rax + 32], ymm0
	LONG $0x4411fdc5; WORD $0x4001       // vmovupd    yword [rcx + rax + 64], ymm0
	LONG $0x4411fdc5; WORD $0x6001       // vmovupd    yword [rcx + rax + 96], ymm0
	QUAD $0x000080018411fdc5; BYTE $0x00 // vmovupd    yword [rcx + rax + 128], ymm0
	QUAD $0x0000a0018411fdc5; BYTE $0x00 // vmovupd    yword [rcx + rax + 160], ymm0
	QUAD $0x0000c0018411fdc5; BYTE $0x00 // vmovupd    yword [rcx + rax + 192], ymm0
	QUAD $0x0000e0018411fdc5; BYTE $0x00 // vmovupd    yword [rcx + rax + 224], ymm0
	QUAD $0x000100018411fdc5; BYTE $0x00 // vmovupd    yword [rcx + rax + 256], ymm0
	QUAD $0x000120018411fdc5; BYTE $0x00 // vmovupd    yword [rcx + rax + 288], ymm0
	QUAD $0x000140018411fdc5; BYTE $0x00 // vmovupd    yword [rcx + rax + 320], ymm0
	QUAD $0x000160018411fdc5; BYTE $0x00 // vmovupd    yword [rcx + rax + 352], ymm0
	QUAD $0x000180018411fdc5; BYTE $0x00 // vmovupd    yword [rcx + rax + 384], ymm0
	QUAD $0x0001a0018411fdc5; BYTE $0x00 // vmovupd    yword [rcx + rax + 416], ymm0
	QUAD $0x0001c0018411fdc5; BYTE $0x00 // vmovupd    yword [rcx + rax + 448], ymm0
	QUAD $0x0001e0018411fdc5; BYTE $0x00 // vmovupd    yword [rcx + rax + 480], ymm0
	LONG $0x02000548; WORD $0x0000       // add    rax, 512
	LONG $0x04c78348                     // add    rdi, 4
	JNE  LBB3_542

LBB3_543:
	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
	JE   LBB3_546
	WORD $0x0148; BYTE $0xc8 // add    rax, rcx
	LONG $0x60c08348         // add    rax, 96
	WORD $0xf748; BYTE $0xde // neg    rsi
	LONG $0xc057f9c5         // vxorpd    xmm0, xmm0, xmm0

LBB3_545:
	LONG $0x4011fdc5; BYTE $0xa0 // vmovupd    yword [rax - 96], ymm0
	LONG $0x4011fdc5; BYTE $0xc0 // vmovupd    yword [rax - 64], ymm0
	LONG $0x4011fdc5; BYTE $0xe0 // vmovupd    yword [rax - 32], ymm0
	LONG $0x0011fdc5             // vmovupd    yword [rax], ymm0
	LONG $0x80e88348             // sub    rax, -128
	WORD $0xff48; BYTE $0xc6     // inc    rsi
	JNE  LBB3_545

LBB3_546:
	WORD $0x394c; BYTE $0xca // cmp    rdx, r9
	JE   LBB3_865

LBB3_547:
	LONG $0x001104c6         // mov    byte [rcx + rdx], 0
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3949; BYTE $0xd1 // cmp    r9, rdx
	JNE  LBB3_547

LBB3_865:
	VZEROUPPER
	RET

LBB3_592:
	LONG $0xfce78348         // and    rdi, -4
	WORD $0xf748; BYTE $0xdf // neg    rdi
	WORD $0xc031             // xor    eax, eax

LBB3_593:
	LONG $0x0410fcc5; BYTE $0x42         // vmovups    ymm0, yword [rdx + 2*rax]
	LONG $0x4c10fcc5; WORD $0x2042       // vmovups    ymm1, yword [rdx + 2*rax + 32]
	LONG $0x0411fcc5; BYTE $0x41         // vmovups    yword [rcx + 2*rax], ymm0
	LONG $0x4c11fcc5; WORD $0x2041       // vmovups    yword [rcx + 2*rax + 32], ymm1
	LONG $0x4410fcc5; WORD $0x4042       // vmovups    ymm0, yword [rdx + 2*rax + 64]
	LONG $0x4c10fcc5; WORD $0x6042       // vmovups    ymm1, yword [rdx + 2*rax + 96]
	LONG $0x4411fcc5; WORD $0x4041       // vmovups    yword [rcx + 2*rax + 64], ymm0
	LONG $0x4c11fcc5; WORD $0x6041       // vmovups    yword [rcx + 2*rax + 96], ymm1
	QUAD $0x000080428410fcc5; BYTE $0x00 // vmovups    ymm0, yword [rdx + 2*rax + 128]
	QUAD $0x0000a0428c10fcc5; BYTE $0x00 // vmovups    ymm1, yword [rdx + 2*rax + 160]
	QUAD $0x000080418411fcc5; BYTE $0x00 // vmovups    yword [rcx + 2*rax + 128], ymm0
	QUAD $0x0000a0418c11fcc5; BYTE $0x00 // vmovups    yword [rcx + 2*rax + 160], ymm1
	QUAD $0x0000c0428410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 2*rax + 192]
	QUAD $0x0000e0428c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 2*rax + 224]
	QUAD $0x0000c0418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 192], ymm0
	QUAD $0x0000e0418c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 224], ymm1
	LONG $0x80e88348                     // sub    rax, -128
	LONG $0x04c78348                     // add    rdi, 4
	JNE  LBB3_593

LBB3_594:
	WORD $0x854d; BYTE $0xc0 // test    r8, r8
	JE   LBB3_597
	WORD $0x0148; BYTE $0xc0 // add    rax, rax
	LONG $0x20c08348         // add    rax, 32
	WORD $0xf749; BYTE $0xd8 // neg    r8

LBB3_596:
	LONG $0x4410fdc5; WORD $0xe002 // vmovupd    ymm0, yword [rdx + rax - 32]
	LONG $0x0c10fdc5; BYTE $0x02   // vmovupd    ymm1, yword [rdx + rax]
	LONG $0x4411fdc5; WORD $0xe001 // vmovupd    yword [rcx + rax - 32], ymm0
	LONG $0x0c11fdc5; BYTE $0x01   // vmovupd    yword [rcx + rax], ymm1
	LONG $0x40c08348               // add    rax, 64
	WORD $0xff49; BYTE $0xc0       // inc    r8
	JNE  LBB3_596

LBB3_597:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_598

LBB3_602:
	LONG $0xfce78348         // and    rdi, -4
	WORD $0xf748; BYTE $0xdf // neg    rdi
	WORD $0xc031             // xor    eax, eax

LBB3_603:
	LONG $0x0410fcc5; BYTE $0x42         // vmovups    ymm0, yword [rdx + 2*rax]
	LONG $0x4c10fcc5; WORD $0x2042       // vmovups    ymm1, yword [rdx + 2*rax + 32]
	LONG $0x0411fcc5; BYTE $0x41         // vmovups    yword [rcx + 2*rax], ymm0
	LONG $0x4c11fcc5; WORD $0x2041       // vmovups    yword [rcx + 2*rax + 32], ymm1
	LONG $0x4410fcc5; WORD $0x4042       // vmovups    ymm0, yword [rdx + 2*rax + 64]
	LONG $0x4c10fcc5; WORD $0x6042       // vmovups    ymm1, yword [rdx + 2*rax + 96]
	LONG $0x4411fcc5; WORD $0x4041       // vmovups    yword [rcx + 2*rax + 64], ymm0
	LONG $0x4c11fcc5; WORD $0x6041       // vmovups    yword [rcx + 2*rax + 96], ymm1
	QUAD $0x000080428410fcc5; BYTE $0x00 // vmovups    ymm0, yword [rdx + 2*rax + 128]
	QUAD $0x0000a0428c10fcc5; BYTE $0x00 // vmovups    ymm1, yword [rdx + 2*rax + 160]
	QUAD $0x000080418411fcc5; BYTE $0x00 // vmovups    yword [rcx + 2*rax + 128], ymm0
	QUAD $0x0000a0418c11fcc5; BYTE $0x00 // vmovups    yword [rcx + 2*rax + 160], ymm1
	QUAD $0x0000c0428410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 2*rax + 192]
	QUAD $0x0000e0428c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 2*rax + 224]
	QUAD $0x0000c0418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 192], ymm0
	QUAD $0x0000e0418c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 224], ymm1
	LONG $0x80e88348                     // sub    rax, -128
	LONG $0x04c78348                     // add    rdi, 4
	JNE  LBB3_603

LBB3_604:
	WORD $0x854d; BYTE $0xc0 // test    r8, r8
	JE   LBB3_607
	WORD $0x0148; BYTE $0xc0 // add    rax, rax
	LONG $0x20c08348         // add    rax, 32
	WORD $0xf749; BYTE $0xd8 // neg    r8

LBB3_606:
	LONG $0x4410fdc5; WORD $0xe002 // vmovupd    ymm0, yword [rdx + rax - 32]
	LONG $0x0c10fdc5; BYTE $0x02   // vmovupd    ymm1, yword [rdx + rax]
	LONG $0x4411fdc5; WORD $0xe001 // vmovupd    yword [rcx + rax - 32], ymm0
	LONG $0x0c11fdc5; BYTE $0x01   // vmovupd    yword [rcx + rax], ymm1
	LONG $0x40c08348               // add    rax, 64
	WORD $0xff49; BYTE $0xc0       // inc    r8
	JNE  LBB3_606

LBB3_607:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_608

LBB3_612:
	WORD $0xff31 // xor    edi, edi

LBB3_613:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_615
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x0cfafdc5; BYTE $0xba   // vpsubd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x54fafdc5; WORD $0x20ba // vpsubd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5cfafdc5; WORD $0x40ba // vpsubd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x44fafdc5; WORD $0x60ba // vpsubd    ymm0, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x0c7ffec5; BYTE $0xb9   // vmovdqu    yword [rcx + 4*rdi], ymm1
	LONG $0x547ffec5; WORD $0x20b9 // vmovdqu    yword [rcx + 4*rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x40b9 // vmovdqu    yword [rcx + 4*rdi + 64], ymm3
	LONG $0x447ffec5; WORD $0x60b9 // vmovdqu    yword [rcx + 4*rdi + 96], ymm0

LBB3_615:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_616

LBB3_620:
	WORD $0xff31 // xor    edi, edi

LBB3_621:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_623
	LONG $0x0410fdc5; BYTE $0xba   // vmovupd    ymm0, yword [rdx + 4*rdi]
	LONG $0x4c10fdc5; WORD $0x20ba // vmovupd    ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x5410fdc5; WORD $0x40ba // vmovupd    ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5c10fdc5; WORD $0x60ba // vmovupd    ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x0411fdc5; BYTE $0xb9   // vmovupd    yword [rcx + 4*rdi], ymm0
	LONG $0x4c11fdc5; WORD $0x20b9 // vmovupd    yword [rcx + 4*rdi + 32], ymm1
	LONG $0x5411fdc5; WORD $0x40b9 // vmovupd    yword [rcx + 4*rdi + 64], ymm2
	LONG $0x5c11fdc5; WORD $0x60b9 // vmovupd    yword [rcx + 4*rdi + 96], ymm3

LBB3_623:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_624

LBB3_628:
	WORD $0xff31 // xor    edi, edi

LBB3_629:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_631
	LONG $0x0410fdc5; BYTE $0xba   // vmovupd    ymm0, yword [rdx + 4*rdi]
	LONG $0x4c10fdc5; WORD $0x20ba // vmovupd    ymm1, yword [rdx + 4*rdi + 32]
	LONG $0x5410fdc5; WORD $0x40ba // vmovupd    ymm2, yword [rdx + 4*rdi + 64]
	LONG $0x5c10fdc5; WORD $0x60ba // vmovupd    ymm3, yword [rdx + 4*rdi + 96]
	LONG $0x0411fdc5; BYTE $0xb9   // vmovupd    yword [rcx + 4*rdi], ymm0
	LONG $0x4c11fdc5; WORD $0x20b9 // vmovupd    yword [rcx + 4*rdi + 32], ymm1
	LONG $0x5411fdc5; WORD $0x40b9 // vmovupd    yword [rcx + 4*rdi + 64], ymm2
	LONG $0x5c11fdc5; WORD $0x60b9 // vmovupd    yword [rcx + 4*rdi + 96], ymm3

LBB3_631:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_632

LBB3_636:
	WORD $0xff31 // xor    edi, edi

LBB3_637:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_639
	LONG $0x197de2c4; WORD $0x0045 // vbroadcastsd    ymm0, qword 0[rbp] /* [rip + .LCPI3_0] */
	LONG $0x0c57fdc5; BYTE $0xfa   // vxorpd    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x5457fdc5; WORD $0x20fa // vxorpd    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5c57fdc5; WORD $0x40fa // vxorpd    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x4457fdc5; WORD $0x60fa // vxorpd    ymm0, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x4411fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm0

LBB3_639:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_640

LBB3_646:
	WORD $0xff31 // xor    edi, edi

LBB3_647:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_649
	LONG $0x197de2c4; WORD $0x0045 // vbroadcastsd    ymm0, qword 0[rbp] /* [rip + .LCPI3_0] */
	LONG $0x0c57fdc5; BYTE $0xfa   // vxorpd    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x5457fdc5; WORD $0x20fa // vxorpd    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5c57fdc5; WORD $0x40fa // vxorpd    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x4457fdc5; WORD $0x60fa // vxorpd    ymm0, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x4411fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm0

LBB3_649:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_650

LBB3_656:
	WORD $0xff31 // xor    edi, edi

LBB3_657:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_659
	LONG $0x197de2c4; WORD $0x1845 // vbroadcastsd    ymm0, qword 24[rbp] /* [rip + .LCPI3_8] */
	LONG $0x0c54fdc5; BYTE $0xfa   // vandpd    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x5454fdc5; WORD $0x20fa // vandpd    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5c54fdc5; WORD $0x40fa // vandpd    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x4454fdc5; WORD $0x60fa // vandpd    ymm0, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x4411fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm0

LBB3_659:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_660

LBB3_664:
	WORD $0xff31 // xor    edi, edi

LBB3_665:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_667
	LONG $0x197de2c4; WORD $0x1845 // vbroadcastsd    ymm0, qword 24[rbp] /* [rip + .LCPI3_8] */
	LONG $0x0c54fdc5; BYTE $0xfa   // vandpd    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x5454fdc5; WORD $0x20fa // vandpd    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5c54fdc5; WORD $0x40fa // vandpd    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x4454fdc5; WORD $0x60fa // vandpd    ymm0, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x4411fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm0

LBB3_667:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_668

LBB3_672:
	WORD $0xff31 // xor    edi, edi

LBB3_673:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_675
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x0cf8fdc5; BYTE $0x3a   // vpsubb    ymm1, ymm0, yword [rdx + rdi]
	LONG $0x54f8fdc5; WORD $0x203a // vpsubb    ymm2, ymm0, yword [rdx + rdi + 32]
	LONG $0x5cf8fdc5; WORD $0x403a // vpsubb    ymm3, ymm0, yword [rdx + rdi + 64]
	LONG $0x44f8fdc5; WORD $0x603a // vpsubb    ymm0, ymm0, yword [rdx + rdi + 96]
	LONG $0x0c7ffec5; BYTE $0x39   // vmovdqu    yword [rcx + rdi], ymm1
	LONG $0x547ffec5; WORD $0x2039 // vmovdqu    yword [rcx + rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x4039 // vmovdqu    yword [rcx + rdi + 64], ymm3
	LONG $0x447ffec5; WORD $0x6039 // vmovdqu    yword [rcx + rdi + 96], ymm0

LBB3_675:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_676

LBB3_680:
	WORD $0xff31 // xor    edi, edi

LBB3_681:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_683
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x0cf8fdc5; BYTE $0x3a   // vpsubb    ymm1, ymm0, yword [rdx + rdi]
	LONG $0x54f8fdc5; WORD $0x203a // vpsubb    ymm2, ymm0, yword [rdx + rdi + 32]
	LONG $0x5cf8fdc5; WORD $0x403a // vpsubb    ymm3, ymm0, yword [rdx + rdi + 64]
	LONG $0x44f8fdc5; WORD $0x603a // vpsubb    ymm0, ymm0, yword [rdx + rdi + 96]
	LONG $0x0c7ffec5; BYTE $0x39   // vmovdqu    yword [rcx + rdi], ymm1
	LONG $0x547ffec5; WORD $0x2039 // vmovdqu    yword [rcx + rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x4039 // vmovdqu    yword [rcx + rdi + 64], ymm3
	LONG $0x447ffec5; WORD $0x6039 // vmovdqu    yword [rcx + rdi + 96], ymm0

LBB3_683:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_684

LBB3_688:
	WORD $0xff31 // xor    edi, edi

LBB3_689:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_691
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x0cfbfdc5; BYTE $0xfa   // vpsubq    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x54fbfdc5; WORD $0x20fa // vpsubq    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5cfbfdc5; WORD $0x40fa // vpsubq    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x44fbfdc5; WORD $0x60fa // vpsubq    ymm0, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x0c7ffec5; BYTE $0xf9   // vmovdqu    yword [rcx + 8*rdi], ymm1
	LONG $0x547ffec5; WORD $0x20f9 // vmovdqu    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x40f9 // vmovdqu    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x447ffec5; WORD $0x60f9 // vmovdqu    yword [rcx + 8*rdi + 96], ymm0

LBB3_691:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_692

LBB3_696:
	WORD $0xff31 // xor    edi, edi

LBB3_697:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_699
	LONG $0x0410fdc5; BYTE $0xfa   // vmovupd    ymm0, yword [rdx + 8*rdi]
	LONG $0x4c10fdc5; WORD $0x20fa // vmovupd    ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x5410fdc5; WORD $0x40fa // vmovupd    ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5c10fdc5; WORD $0x60fa // vmovupd    ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x0411fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm0
	LONG $0x4c11fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm1
	LONG $0x5411fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm2
	LONG $0x5c11fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm3

LBB3_699:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_700

LBB3_704:
	WORD $0xff31 // xor    edi, edi

LBB3_705:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_707
	LONG $0x0410fdc5; BYTE $0xfa   // vmovupd    ymm0, yword [rdx + 8*rdi]
	LONG $0x4c10fdc5; WORD $0x20fa // vmovupd    ymm1, yword [rdx + 8*rdi + 32]
	LONG $0x5410fdc5; WORD $0x40fa // vmovupd    ymm2, yword [rdx + 8*rdi + 64]
	LONG $0x5c10fdc5; WORD $0x60fa // vmovupd    ymm3, yword [rdx + 8*rdi + 96]
	LONG $0x0411fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm0
	LONG $0x4c11fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm1
	LONG $0x5411fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm2
	LONG $0x5c11fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm3

LBB3_707:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_708

LBB3_712:
	WORD $0xff31 // xor    edi, edi

LBB3_713:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_715
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x0cf9fdc5; BYTE $0x7a   // vpsubw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x44f9fdc5; WORD $0x207a // vpsubw    ymm0, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x0c7ffec5; BYTE $0x79   // vmovdqu    yword [rcx + 2*rdi], ymm1
	LONG $0x447ffec5; WORD $0x2079 // vmovdqu    yword [rcx + 2*rdi + 32], ymm0

LBB3_715:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_716

LBB3_720:
	WORD $0xff31 // xor    edi, edi

LBB3_721:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_723
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x0cf9fdc5; BYTE $0x7a   // vpsubw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x44f9fdc5; WORD $0x207a // vpsubw    ymm0, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x0c7ffec5; BYTE $0x79   // vmovdqu    yword [rcx + 2*rdi], ymm1
	LONG $0x447ffec5; WORD $0x2079 // vmovdqu    yword [rcx + 2*rdi + 32], ymm0

LBB3_723:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_724

LBB3_728:
	WORD $0xff31 // xor    edi, edi

LBB3_729:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_731
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x0cf9fdc5; BYTE $0x7a   // vpsubw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x44f9fdc5; WORD $0x207a // vpsubw    ymm0, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0x0c7ffec5; BYTE $0x79   // vmovdqu    yword [rcx + 2*rdi], ymm1
	LONG $0x447ffec5; WORD $0x2079 // vmovdqu    yword [rcx + 2*rdi + 32], ymm0

LBB3_731:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_732

LBB3_736:
	WORD $0xff31 // xor    edi, edi

LBB3_737:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_739
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x0c75fdc5; BYTE $0x7a   // vpcmpeqw    ymm1, ymm0, yword [rdx + 2*rdi]
	LONG $0x556ffdc5; BYTE $0x60   // vmovdqa    ymm2, yword 96[rbp] /* [rip + .LCPI3_5] */
	LONG $0x4475fdc5; WORD $0x207a // vpcmpeqw    ymm0, ymm0, yword [rdx + 2*rdi + 32]
	LONG $0xcadff5c5               // vpandn    ymm1, ymm1, ymm2
	LONG $0xc2dffdc5               // vpandn    ymm0, ymm0, ymm2
	LONG $0x0c7ffec5; BYTE $0x79   // vmovdqu    yword [rcx + 2*rdi], ymm1
	LONG $0x447ffec5; WORD $0x2079 // vmovdqu    yword [rcx + 2*rdi + 32], ymm0

LBB3_739:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_740

LBB3_744:
	WORD $0xff31 // xor    edi, edi

LBB3_745:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_747
	LONG $0x046ffec5; BYTE $0x7a   // vmovdqu    ymm0, yword [rdx + 2*rdi]
	LONG $0x4c6ffec5; WORD $0x207a // vmovdqu    ymm1, yword [rdx + 2*rdi + 32]
	LONG $0xd2efe9c5               // vpxor    xmm2, xmm2, xmm2
	LONG $0xda75fdc5               // vpcmpeqw    ymm3, ymm0, ymm2
	LONG $0xe476ddc5               // vpcmpeqd    ymm4, ymm4, ymm4
	LONG $0xdcefe5c5               // vpxor    ymm3, ymm3, ymm4
	LONG $0xd275f5c5               // vpcmpeqw    ymm2, ymm1, ymm2
	LONG $0xd4efedc5               // vpxor    ymm2, ymm2, ymm4
	LONG $0x656ffdc5; BYTE $0x60   // vmovdqa    ymm4, yword 96[rbp] /* [rip + .LCPI3_5] */
	LONG $0xc065ddc5               // vpcmpgtw    ymm0, ymm4, ymm0
	LONG $0xc965ddc5               // vpcmpgtw    ymm1, ymm4, ymm1
	LONG $0x4c5de3c4; WORD $0x00c3 // vpblendvb    ymm0, ymm4, ymm3, ymm0
	LONG $0x4c5de3c4; WORD $0x10ca // vpblendvb    ymm1, ymm4, ymm2, ymm1
	LONG $0x047ffec5; BYTE $0x79   // vmovdqu    yword [rcx + 2*rdi], ymm0
	LONG $0x4c7ffec5; WORD $0x2079 // vmovdqu    yword [rcx + 2*rdi + 32], ymm1

LBB3_747:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_748

LBB3_753:
	WORD $0xff31 // xor    edi, edi

LBB3_754:
	LONG $0x01c0f641                           // test    r8b, 1
	JE   LBB3_756
	LONG $0x237de2c4; WORD $0x7a04             // vpmovsxwd    ymm0, oword [rdx + 2*rdi]
	LONG $0x237de2c4; WORD $0x7a4c; BYTE $0x10 // vpmovsxwd    ymm1, oword [rdx + 2*rdi + 16]
	LONG $0xe172edc5; BYTE $0x0f               // vpsrad    ymm2, ymm1, 15
	LONG $0xe072e5c5; BYTE $0x0f               // vpsrad    ymm3, ymm0, 15
	LONG $0xc0fee5c5                           // vpaddd    ymm0, ymm3, ymm0
	LONG $0xc9feedc5                           // vpaddd    ymm1, ymm2, ymm1
	LONG $0xcaeff5c5                           // vpxor    ymm1, ymm1, ymm2
	LONG $0xc3effdc5                           // vpxor    ymm0, ymm0, ymm3
	QUAD $0x000000a0956ffdc5                   // vmovdqa    ymm2, yword 160[rbp] /* [rip + .LCPI3_10] */
	LONG $0x007de2c4; BYTE $0xc2               // vpshufb    ymm0, ymm0, ymm2
	LONG $0x00fde3c4; WORD $0xe8c0             // vpermq    ymm0, ymm0, 232
	LONG $0x0075e2c4; BYTE $0xca               // vpshufb    ymm1, ymm1, ymm2
	LONG $0x00fde3c4; WORD $0xe8c9             // vpermq    ymm1, ymm1, 232
	LONG $0x4c7ffac5; WORD $0x1079             // vmovdqu    oword [rcx + 2*rdi + 16], xmm1
	LONG $0x047ffac5; BYTE $0x79               // vmovdqu    oword [rcx + 2*rdi], xmm0

LBB3_756:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_757

LBB3_761:
	WORD $0xff31 // xor    edi, edi

LBB3_762:
	LONG $0x01c0f641                           // test    r8b, 1
	JE   LBB3_764
	LONG $0x237de2c4; WORD $0x7a04             // vpmovsxwd    ymm0, oword [rdx + 2*rdi]
	LONG $0x237de2c4; WORD $0x7a4c; BYTE $0x10 // vpmovsxwd    ymm1, oword [rdx + 2*rdi + 16]
	LONG $0xe172edc5; BYTE $0x0f               // vpsrad    ymm2, ymm1, 15
	LONG $0xe072e5c5; BYTE $0x0f               // vpsrad    ymm3, ymm0, 15
	LONG $0xc0fee5c5                           // vpaddd    ymm0, ymm3, ymm0
	LONG $0xc9feedc5                           // vpaddd    ymm1, ymm2, ymm1
	LONG $0xcaeff5c5                           // vpxor    ymm1, ymm1, ymm2
	LONG $0xc3effdc5                           // vpxor    ymm0, ymm0, ymm3
	QUAD $0x000000a0956ffdc5                   // vmovdqa    ymm2, yword 160[rbp] /* [rip + .LCPI3_10] */
	LONG $0x007de2c4; BYTE $0xc2               // vpshufb    ymm0, ymm0, ymm2
	LONG $0x00fde3c4; WORD $0xe8c0             // vpermq    ymm0, ymm0, 232
	LONG $0x0075e2c4; BYTE $0xca               // vpshufb    ymm1, ymm1, ymm2
	LONG $0x00fde3c4; WORD $0xe8c9             // vpermq    ymm1, ymm1, 232
	LONG $0x4c7ffac5; WORD $0x1079             // vmovdqu    oword [rcx + 2*rdi + 16], xmm1
	LONG $0x047ffac5; BYTE $0x79               // vmovdqu    oword [rcx + 2*rdi], xmm0

LBB3_764:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_765

LBB3_769:
	WORD $0xff31 // xor    edi, edi

LBB3_770:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_772
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x0cfbfdc5; BYTE $0xfa   // vpsubq    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x54fbfdc5; WORD $0x20fa // vpsubq    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5cfbfdc5; WORD $0x40fa // vpsubq    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x44fbfdc5; WORD $0x60fa // vpsubq    ymm0, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x0c7ffec5; BYTE $0xf9   // vmovdqu    yword [rcx + 8*rdi], ymm1
	LONG $0x547ffec5; WORD $0x20f9 // vmovdqu    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x40f9 // vmovdqu    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x447ffec5; WORD $0x60f9 // vmovdqu    yword [rcx + 8*rdi + 96], ymm0

LBB3_772:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_773

LBB3_777:
	WORD $0xff31 // xor    edi, edi

LBB3_778:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_780
	LONG $0x187de2c4; WORD $0x4445 // vbroadcastss    ymm0, dword 68[rbp] /* [rip + .LCPI3_7] */
	LONG $0x0c57fdc5; BYTE $0xba   // vxorpd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x5457fdc5; WORD $0x20ba // vxorpd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5c57fdc5; WORD $0x40ba // vxorpd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x4457fdc5; WORD $0x60ba // vxorpd    ymm0, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xb9   // vmovupd    yword [rcx + 4*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20b9 // vmovupd    yword [rcx + 4*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40b9 // vmovupd    yword [rcx + 4*rdi + 64], ymm3
	LONG $0x4411fdc5; WORD $0x60b9 // vmovupd    yword [rcx + 4*rdi + 96], ymm0

LBB3_780:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_781

LBB3_787:
	WORD $0xff31 // xor    edi, edi

LBB3_788:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_790
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x0cfbfdc5; BYTE $0xfa   // vpsubq    ymm1, ymm0, yword [rdx + 8*rdi]
	LONG $0x54fbfdc5; WORD $0x20fa // vpsubq    ymm2, ymm0, yword [rdx + 8*rdi + 32]
	LONG $0x5cfbfdc5; WORD $0x40fa // vpsubq    ymm3, ymm0, yword [rdx + 8*rdi + 64]
	LONG $0x44fbfdc5; WORD $0x60fa // vpsubq    ymm0, ymm0, yword [rdx + 8*rdi + 96]
	LONG $0x0c7ffec5; BYTE $0xf9   // vmovdqu    yword [rcx + 8*rdi], ymm1
	LONG $0x547ffec5; WORD $0x20f9 // vmovdqu    yword [rcx + 8*rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x40f9 // vmovdqu    yword [rcx + 8*rdi + 64], ymm3
	LONG $0x447ffec5; WORD $0x60f9 // vmovdqu    yword [rcx + 8*rdi + 96], ymm0

LBB3_790:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_791

LBB3_795:
	WORD $0xff31 // xor    edi, edi

LBB3_796:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_798
	LONG $0x187de2c4; WORD $0x4445 // vbroadcastss    ymm0, dword 68[rbp] /* [rip + .LCPI3_7] */
	LONG $0x0c57fdc5; BYTE $0xba   // vxorpd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x5457fdc5; WORD $0x20ba // vxorpd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5c57fdc5; WORD $0x40ba // vxorpd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x4457fdc5; WORD $0x60ba // vxorpd    ymm0, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xb9   // vmovupd    yword [rcx + 4*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20b9 // vmovupd    yword [rcx + 4*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40b9 // vmovupd    yword [rcx + 4*rdi + 64], ymm3
	LONG $0x4411fdc5; WORD $0x60b9 // vmovupd    yword [rcx + 4*rdi + 96], ymm0

LBB3_798:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_799

LBB3_805:
	WORD $0xff31 // xor    edi, edi

LBB3_806:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_808
	LONG $0x187de2c4; WORD $0x4845 // vbroadcastss    ymm0, dword 72[rbp] /* [rip + .LCPI3_9] */
	LONG $0x0c54fdc5; BYTE $0xba   // vandpd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x5454fdc5; WORD $0x20ba // vandpd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5c54fdc5; WORD $0x40ba // vandpd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x4454fdc5; WORD $0x60ba // vandpd    ymm0, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xb9   // vmovupd    yword [rcx + 4*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20b9 // vmovupd    yword [rcx + 4*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40b9 // vmovupd    yword [rcx + 4*rdi + 64], ymm3
	LONG $0x4411fdc5; WORD $0x60b9 // vmovupd    yword [rcx + 4*rdi + 96], ymm0

LBB3_808:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_809

LBB3_815:
	WORD $0xff31 // xor    edi, edi

LBB3_816:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_818
	LONG $0x187de2c4; WORD $0x4845 // vbroadcastss    ymm0, dword 72[rbp] /* [rip + .LCPI3_9] */
	LONG $0x0c54fdc5; BYTE $0xba   // vandpd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x5454fdc5; WORD $0x20ba // vandpd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5c54fdc5; WORD $0x40ba // vandpd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x4454fdc5; WORD $0x60ba // vandpd    ymm0, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x0c11fdc5; BYTE $0xb9   // vmovupd    yword [rcx + 4*rdi], ymm1
	LONG $0x5411fdc5; WORD $0x20b9 // vmovupd    yword [rcx + 4*rdi + 32], ymm2
	LONG $0x5c11fdc5; WORD $0x40b9 // vmovupd    yword [rcx + 4*rdi + 64], ymm3
	LONG $0x4411fdc5; WORD $0x60b9 // vmovupd    yword [rcx + 4*rdi + 96], ymm0

LBB3_818:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_819

LBB3_825:
	WORD $0xff31 // xor    edi, edi

LBB3_826:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_828
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x0cf8fdc5; BYTE $0x3a   // vpsubb    ymm1, ymm0, yword [rdx + rdi]
	LONG $0x54f8fdc5; WORD $0x203a // vpsubb    ymm2, ymm0, yword [rdx + rdi + 32]
	LONG $0x5cf8fdc5; WORD $0x403a // vpsubb    ymm3, ymm0, yword [rdx + rdi + 64]
	LONG $0x44f8fdc5; WORD $0x603a // vpsubb    ymm0, ymm0, yword [rdx + rdi + 96]
	LONG $0x0c7ffec5; BYTE $0x39   // vmovdqu    yword [rcx + rdi], ymm1
	LONG $0x547ffec5; WORD $0x2039 // vmovdqu    yword [rcx + rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x4039 // vmovdqu    yword [rcx + rdi + 64], ymm3
	LONG $0x447ffec5; WORD $0x6039 // vmovdqu    yword [rcx + rdi + 96], ymm0

LBB3_828:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_829

LBB3_833:
	WORD $0xff31 // xor    edi, edi

LBB3_834:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_836
	LONG $0x0410fdc5; BYTE $0x3a   // vmovupd    ymm0, yword [rdx + rdi]
	LONG $0x4c10fdc5; WORD $0x203a // vmovupd    ymm1, yword [rdx + rdi + 32]
	LONG $0x5410fdc5; WORD $0x403a // vmovupd    ymm2, yword [rdx + rdi + 64]
	LONG $0x5c10fdc5; WORD $0x603a // vmovupd    ymm3, yword [rdx + rdi + 96]
	LONG $0x0411fdc5; BYTE $0x39   // vmovupd    yword [rcx + rdi], ymm0
	LONG $0x4c11fdc5; WORD $0x2039 // vmovupd    yword [rcx + rdi + 32], ymm1
	LONG $0x5411fdc5; WORD $0x4039 // vmovupd    yword [rcx + rdi + 64], ymm2
	LONG $0x5c11fdc5; WORD $0x6039 // vmovupd    yword [rcx + rdi + 96], ymm3

LBB3_836:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_837

LBB3_841:
	WORD $0xff31 // xor    edi, edi

LBB3_842:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_844
	LONG $0x0410fdc5; BYTE $0x3a   // vmovupd    ymm0, yword [rdx + rdi]
	LONG $0x4c10fdc5; WORD $0x203a // vmovupd    ymm1, yword [rdx + rdi + 32]
	LONG $0x5410fdc5; WORD $0x403a // vmovupd    ymm2, yword [rdx + rdi + 64]
	LONG $0x5c10fdc5; WORD $0x603a // vmovupd    ymm3, yword [rdx + rdi + 96]
	LONG $0x0411fdc5; BYTE $0x39   // vmovupd    yword [rcx + rdi], ymm0
	LONG $0x4c11fdc5; WORD $0x2039 // vmovupd    yword [rcx + rdi + 32], ymm1
	LONG $0x5411fdc5; WORD $0x4039 // vmovupd    yword [rcx + rdi + 64], ymm2
	LONG $0x5c11fdc5; WORD $0x6039 // vmovupd    yword [rcx + rdi + 96], ymm3

LBB3_844:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_845

LBB3_849:
	WORD $0xff31 // xor    edi, edi

LBB3_850:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_852
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x0cfafdc5; BYTE $0xba   // vpsubd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x54fafdc5; WORD $0x20ba // vpsubd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5cfafdc5; WORD $0x40ba // vpsubd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x44fafdc5; WORD $0x60ba // vpsubd    ymm0, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x0c7ffec5; BYTE $0xb9   // vmovdqu    yword [rcx + 4*rdi], ymm1
	LONG $0x547ffec5; WORD $0x20b9 // vmovdqu    yword [rcx + 4*rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x40b9 // vmovdqu    yword [rcx + 4*rdi + 64], ymm3
	LONG $0x447ffec5; WORD $0x60b9 // vmovdqu    yword [rcx + 4*rdi + 96], ymm0

LBB3_852:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_853

LBB3_857:
	WORD $0xff31 // xor    edi, edi

LBB3_858:
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB3_860
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x0cfafdc5; BYTE $0xba   // vpsubd    ymm1, ymm0, yword [rdx + 4*rdi]
	LONG $0x54fafdc5; WORD $0x20ba // vpsubd    ymm2, ymm0, yword [rdx + 4*rdi + 32]
	LONG $0x5cfafdc5; WORD $0x40ba // vpsubd    ymm3, ymm0, yword [rdx + 4*rdi + 64]
	LONG $0x44fafdc5; WORD $0x60ba // vpsubd    ymm0, ymm0, yword [rdx + 4*rdi + 96]
	LONG $0x0c7ffec5; BYTE $0xb9   // vmovdqu    yword [rcx + 4*rdi], ymm1
	LONG $0x547ffec5; WORD $0x20b9 // vmovdqu    yword [rcx + 4*rdi + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x40b9 // vmovdqu    yword [rcx + 4*rdi + 64], ymm3
	LONG $0x447ffec5; WORD $0x60b9 // vmovdqu    yword [rcx + 4*rdi + 96], ymm0

LBB3_860:
	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
	JE   LBB3_865
	JMP  LBB3_861

DATA LCDATA5<>+0x000(SB)/8, $0x8000000000000000
DATA LCDATA5<>+0x008(SB)/8, $0x3ff0000000000000
DATA LCDATA5<>+0x010(SB)/8, $0x43e0000000000000
DATA LCDATA5<>+0x018(SB)/8, $0x41e0000000000000
DATA LCDATA5<>+0x020(SB)/8, $0xbff0000000000000
DATA LCDATA5<>+0x028(SB)/8, $0x0000000000000001
DATA LCDATA5<>+0x030(SB)/8, $0x8000000000000000
DATA LCDATA5<>+0x038(SB)/8, $0x8000000000000000
DATA LCDATA5<>+0x040(SB)/8, $0x0001000100010001
DATA LCDATA5<>+0x048(SB)/8, $0x0001000100010001
DATA LCDATA5<>+0x050(SB)/8, $0x0101010101010101
DATA LCDATA5<>+0x058(SB)/8, $0x0000000000000000
DATA LCDATA5<>+0x060(SB)/8, $0x0001000100010001
DATA LCDATA5<>+0x068(SB)/8, $0x0000000000000000
DATA LCDATA5<>+0x070(SB)/8, $0x0000000001010101
DATA LCDATA5<>+0x078(SB)/8, $0x0000000000000000
DATA LCDATA5<>+0x080(SB)/8, $0x0101010101010101
DATA LCDATA5<>+0x088(SB)/8, $0x0101010101010101
DATA LCDATA5<>+0x090(SB)/8, $0x800000007fffffff
DATA LCDATA5<>+0x098(SB)/8, $0x000000013f800000
DATA LCDATA5<>+0x0a0(SB)/8, $0x4f0000005f000000
DATA LCDATA5<>+0x0a8(SB)/8, $0x00000000bf800000
DATA LCDATA5<>+0x0b0(SB)/8, $0x0000000000000000
DATA LCDATA5<>+0x0b8(SB)/8, $0x0000000000000000
DATA LCDATA5<>+0x0c0(SB)/8, $0x0001000100010001
DATA LCDATA5<>+0x0c8(SB)/8, $0x0001000100010001
DATA LCDATA5<>+0x0d0(SB)/8, $0x0001000100010001
DATA LCDATA5<>+0x0d8(SB)/8, $0x0001000100010001
DATA LCDATA5<>+0x0e0(SB)/8, $0x0101010101010101
DATA LCDATA5<>+0x0e8(SB)/8, $0x0101010101010101
DATA LCDATA5<>+0x0f0(SB)/8, $0x0101010101010101
DATA LCDATA5<>+0x0f8(SB)/8, $0x0101010101010101
GLOBL LCDATA5<>(SB), 8, $256

TEXT ยท_arithmetic_unary_diff_type_avx2(SB), $0-48

	MOVQ itype+0(FP), DI
	MOVQ otype+8(FP), SI
	MOVQ op+16(FP), DX
	MOVQ input+24(FP), CX
	MOVQ output+32(FP), R8
	MOVQ len+40(FP), R9
	LEAQ LCDATA5<>(SB), BP

	WORD $0xfa80; BYTE $0x14 // cmp    dl, 20
	JNE  LBB4_1351
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JG   LBB4_14
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JLE  LBB4_26
	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
	JE   LBB4_46
	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
	JE   LBB4_54
	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
	JNE  LBB4_1351
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JG   LBB4_94
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JLE  LBB4_164
	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
	JE   LBB4_267
	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
	JE   LBB4_270
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_13
	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_870
	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_870

LBB4_13:
	WORD $0xd231 // xor    edx, edx

LBB4_873:
	WORD $0x8949; BYTE $0xd1 // mov    r9, rdx
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x0149; BYTE $0xc1 // add    r9, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_875

LBB4_874:
	WORD $0xf631     // xor    esi, esi
	LONG $0x00913c83 // cmp    dword [rcx + 4*rdx], 0
	LONG $0xd6950f40 // setne    sil
	LONG $0x90348941 // mov    dword [r8 + 4*rdx], esi
	LONG $0x01c28348 // add    rdx, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB4_874

LBB4_875:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB4_1351

LBB4_876:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x00913c83             // cmp    dword [rcx + 4*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90348941             // mov    dword [r8 + 4*rdx], esi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x04917c83; BYTE $0x00 // cmp    dword [rcx + 4*rdx + 4], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90748941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], esi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x08917c83; BYTE $0x00 // cmp    dword [rcx + 4*rdx + 8], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90748941; BYTE $0x08 // mov    dword [r8 + 4*rdx + 8], esi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x0c917c83; BYTE $0x00 // cmp    dword [rcx + 4*rdx + 12], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90748941; BYTE $0x0c // mov    dword [r8 + 4*rdx + 12], esi
	LONG $0x04c28348             // add    rdx, 4
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_876
	JMP  LBB4_1351

LBB4_14:
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JLE  LBB4_36
	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
	JE   LBB4_62
	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
	JE   LBB4_70
	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
	JNE  LBB4_1351
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JG   LBB4_101
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JLE  LBB4_169
	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
	JE   LBB4_273
	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
	JE   LBB4_276
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
	LONG $0x04f98341         // cmp    r9d, 4
	JAE  LBB4_450
	WORD $0xf631             // xor    esi, esi
	JMP  LBB4_1292

LBB4_26:
	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
	JE   LBB4_78
	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
	JNE  LBB4_1351
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JG   LBB4_108
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JLE  LBB4_174
	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
	JE   LBB4_279
	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
	JE   LBB4_282
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_35
	LONG $0x19148d4a         // lea    rdx, [rcx + r11]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_877
	LONG $0x98148d4b         // lea    rdx, [r8 + 4*r11]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_877

LBB4_35:
	WORD $0xd231 // xor    edx, edx

LBB4_880:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c3f641             // test    r11b, 1
	JE   LBB4_882
	LONG $0x110c8a44             // mov    r9b, byte [rcx + rdx]
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
	LONG $0xd2950f41             // setne    r10b
	WORD $0xf741; BYTE $0xda     // neg    r10d
	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xfa4e0f41             // cmovle    edi, r10d
	LONG $0x903c8941             // mov    dword [r8 + 4*rdx], edi
	LONG $0x01ca8348             // or    rdx, 1

LBB4_882:
	WORD $0x014c; BYTE $0xde     // add    rsi, r11
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_884:
	LONG $0x1104b60f             // movzx    eax, byte [rcx + rdx]
	WORD $0xff31                 // xor    edi, edi
	WORD $0xc084                 // test    al, al
	LONG $0xd7950f40             // setne    dil
	WORD $0xdff7                 // neg    edi
	WORD $0xc084                 // test    al, al
	WORD $0x4f0f; BYTE $0xfe     // cmovg    edi, esi
	LONG $0x903c8941             // mov    dword [r8 + 4*rdx], edi
	LONG $0x1144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdx + 1]
	WORD $0xff31                 // xor    edi, edi
	WORD $0xc084                 // test    al, al
	LONG $0xd7950f40             // setne    dil
	WORD $0xdff7                 // neg    edi
	WORD $0xc084                 // test    al, al
	WORD $0x4f0f; BYTE $0xfe     // cmovg    edi, esi
	LONG $0x907c8941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], edi
	LONG $0x02c28348             // add    rdx, 2
	WORD $0x3949; BYTE $0xd3     // cmp    r11, rdx
	JNE  LBB4_884
	JMP  LBB4_1351

LBB4_36:
	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
	JE   LBB4_86
	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
	JNE  LBB4_1351
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JG   LBB4_115
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JLE  LBB4_179
	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
	JE   LBB4_285
	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
	JE   LBB4_288
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_456
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_459

LBB4_46:
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JG   LBB4_122
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JLE  LBB4_184
	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
	JE   LBB4_291
	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
	JE   LBB4_294
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB4_460
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_463

LBB4_54:
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JG   LBB4_129
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JLE  LBB4_189
	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
	JE   LBB4_297
	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
	JE   LBB4_300
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB4_464
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_467

LBB4_62:
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JG   LBB4_136
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JLE  LBB4_194
	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
	JE   LBB4_303
	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
	JE   LBB4_306
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_469
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_472

LBB4_70:
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JG   LBB4_143
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JLE  LBB4_199
	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
	JE   LBB4_309
	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
	JE   LBB4_312
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x08f98341         // cmp    r9d, 8
	JAE  LBB4_474
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_1298

LBB4_78:
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JG   LBB4_150
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JLE  LBB4_204
	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
	JE   LBB4_315
	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
	JE   LBB4_318
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_85
	LONG $0x01148d48         // lea    rdx, [rcx + rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_885
	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_885

LBB4_85:
	WORD $0xd231 // xor    edx, edx

LBB4_888:
	WORD $0x8949; BYTE $0xd1 // mov    r9, rdx
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x0149; BYTE $0xc1 // add    r9, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_890

LBB4_889:
	WORD $0xf631     // xor    esi, esi
	LONG $0x00113c80 // cmp    byte [rcx + rdx], 0
	LONG $0xd6950f40 // setne    sil
	LONG $0x90348941 // mov    dword [r8 + 4*rdx], esi
	LONG $0x01c28348 // add    rdx, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB4_889

LBB4_890:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB4_1351

LBB4_891:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90348941             // mov    dword [r8 + 4*rdx], esi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x01117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 1], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90748941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], esi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x02117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 2], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90748941; BYTE $0x08 // mov    dword [r8 + 4*rdx + 8], esi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x03117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 3], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90748941; BYTE $0x0c // mov    dword [r8 + 4*rdx + 12], esi
	LONG $0x04c28348             // add    rdx, 4
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_891
	JMP  LBB4_1351

LBB4_86:
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JG   LBB4_157
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JLE  LBB4_209
	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
	JE   LBB4_321
	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
	JE   LBB4_324
	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_93
	LONG $0x99148d4a         // lea    rdx, [rcx + 4*r11]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_892
	LONG $0x98148d4b         // lea    rdx, [r8 + 4*r11]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_892

LBB4_93:
	WORD $0xd231 // xor    edx, edx

LBB4_895:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c3f641             // test    r11b, 1
	JE   LBB4_897
	LONG $0x910c8b44             // mov    r9d, dword [rcx + 4*rdx]
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	WORD $0x8545; BYTE $0xc9     // test    r9d, r9d
	LONG $0xd2950f41             // setne    r10b
	WORD $0xf741; BYTE $0xda     // neg    r10d
	WORD $0x8545; BYTE $0xc9     // test    r9d, r9d
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xfa4e0f41             // cmovle    edi, r10d
	LONG $0x903c8941             // mov    dword [r8 + 4*rdx], edi
	LONG $0x01ca8348             // or    rdx, 1

LBB4_897:
	WORD $0x014c; BYTE $0xde     // add    rsi, r11
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_899:
	WORD $0x3c8b; BYTE $0x91     // mov    edi, dword [rcx + 4*rdx]
	WORD $0xc031                 // xor    eax, eax
	WORD $0xff85                 // test    edi, edi
	WORD $0x950f; BYTE $0xd0     // setne    al
	WORD $0xd8f7                 // neg    eax
	WORD $0xff85                 // test    edi, edi
	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
	LONG $0x90048941             // mov    dword [r8 + 4*rdx], eax
	LONG $0x0491448b             // mov    eax, dword [rcx + 4*rdx + 4]
	WORD $0xff31                 // xor    edi, edi
	WORD $0xc085                 // test    eax, eax
	LONG $0xd7950f40             // setne    dil
	WORD $0xdff7                 // neg    edi
	WORD $0xc085                 // test    eax, eax
	WORD $0x4f0f; BYTE $0xfe     // cmovg    edi, esi
	LONG $0x907c8941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], edi
	LONG $0x02c28348             // add    rdx, 2
	WORD $0x3949; BYTE $0xd3     // cmp    r11, rdx
	JNE  LBB4_899
	JMP  LBB4_1351

LBB4_94:
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JLE  LBB4_214
	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
	JE   LBB4_327
	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
	JE   LBB4_330
	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_483
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_486

LBB4_101:
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JLE  LBB4_219
	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
	JE   LBB4_333
	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
	JE   LBB4_336
	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_107
	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_900
	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_900

LBB4_107:
	WORD $0xd231 // xor    edx, edx

LBB4_903:
	WORD $0x8948; BYTE $0xd6       // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6       // not    rsi
	WORD $0x01a8                   // test    al, 1
	JE   LBB4_905
	LONG $0x0410fbc5; BYTE $0xd1   // vmovsd    xmm0, qword [rcx + 8*rdx]
	LONG $0x4d54f9c5; BYTE $0x30   // vandpd    xmm1, xmm0, oword 48[rbp] /* [rip + .LCPI4_2] */
	LONG $0x5512fbc5; BYTE $0x08   // vmovddup    xmm2, qword 8[rbp] /* [rip + .LCPI4_1] */
	LONG $0xc956e9c5               // vorpd    xmm1, xmm2, xmm1
	LONG $0xd257e9c5               // vxorpd    xmm2, xmm2, xmm2
	LONG $0xc2c2fbc5; BYTE $0x00   // vcmpeqsd    xmm0, xmm0, xmm2
	LONG $0xc155f9c5               // vandnpd    xmm0, xmm0, xmm1
	LONG $0x1379c1c4; WORD $0xd004 // vmovlpd    qword [r8 + 8*rdx], xmm0
	LONG $0x01ca8348               // or    rdx, 1

LBB4_905:
	WORD $0x0148; BYTE $0xc6     // add    rsi, rax
	JE   LBB4_1351
	LONG $0x4528f9c5; BYTE $0x30 // vmovapd    xmm0, oword 48[rbp] /* [rip + .LCPI4_2] */
	LONG $0x4d12fbc5; BYTE $0x08 // vmovddup    xmm1, qword 8[rbp] /* [rip + .LCPI4_1] */
	LONG $0xd257e9c5             // vxorpd    xmm2, xmm2, xmm2

LBB4_907:
	LONG $0x1c10fbc5; BYTE $0xd1               // vmovsd    xmm3, qword [rcx + 8*rdx]
	LONG $0xe054e1c5                           // vandpd    xmm4, xmm3, xmm0
	LONG $0xe456f1c5                           // vorpd    xmm4, xmm1, xmm4
	LONG $0xdac2e3c5; BYTE $0x00               // vcmpeqsd    xmm3, xmm3, xmm2
	LONG $0xdc55e1c5                           // vandnpd    xmm3, xmm3, xmm4
	LONG $0x1379c1c4; WORD $0xd01c             // vmovlpd    qword [r8 + 8*rdx], xmm3
	LONG $0x5c10fbc5; WORD $0x08d1             // vmovsd    xmm3, qword [rcx + 8*rdx + 8]
	LONG $0xe054e1c5                           // vandpd    xmm4, xmm3, xmm0
	LONG $0xe456f1c5                           // vorpd    xmm4, xmm1, xmm4
	LONG $0xdac2e3c5; BYTE $0x00               // vcmpeqsd    xmm3, xmm3, xmm2
	LONG $0xdc55e1c5                           // vandnpd    xmm3, xmm3, xmm4
	LONG $0x1379c1c4; WORD $0xd05c; BYTE $0x08 // vmovlpd    qword [r8 + 8*rdx + 8], xmm3
	LONG $0x02c28348                           // add    rdx, 2
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JNE  LBB4_907
	JMP  LBB4_1351

LBB4_108:
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JLE  LBB4_224
	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
	JE   LBB4_339
	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
	JE   LBB4_342
	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_114
	LONG $0x01148d48         // lea    rdx, [rcx + rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_908
	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_908

LBB4_114:
	WORD $0xd231 // xor    edx, edx

LBB4_911:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x01a8             // test    al, 1
	JE   LBB4_1254
	LONG $0x00113c80         // cmp    byte [rcx + rdx], 0
	JNE  LBB4_1250
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	JMP  LBB4_1251

LBB4_115:
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JLE  LBB4_229
	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
	JE   LBB4_345
	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
	JE   LBB4_348
	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_496
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_499

LBB4_122:
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JLE  LBB4_234
	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
	JE   LBB4_351
	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
	JE   LBB4_354
	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_503
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_506

LBB4_129:
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JLE  LBB4_239
	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
	JE   LBB4_357
	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
	JE   LBB4_360
	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_510
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_513

LBB4_136:
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JLE  LBB4_244
	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
	JE   LBB4_363
	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
	JE   LBB4_366
	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_519
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_522

LBB4_143:
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JLE  LBB4_249
	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
	JE   LBB4_369
	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
	JE   LBB4_372
	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_528
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_531

LBB4_150:
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JLE  LBB4_257
	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
	JE   LBB4_375
	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
	JE   LBB4_378
	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_156
	LONG $0x01148d48         // lea    rdx, [rcx + rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_914
	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_914

LBB4_156:
	WORD $0xd231 // xor    edx, edx

LBB4_917:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	WORD $0x0148; BYTE $0xc6     // add    rsi, rax
	WORD $0x8948; BYTE $0xc7     // mov    rdi, rax
	LONG $0x03e78348             // and    rdi, 3
	JE   LBB4_922
	LONG $0x457efac5; BYTE $0x08 // vmovq    xmm0, qword 8[rbp] /* [rip + .LCPI4_1] */
	JMP  LBB4_920

LBB4_919:
	LONG $0xd679c1c4; WORD $0xd00c // vmovq    qword [r8 + 8*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	LONG $0xffc78348               // add    rdi, -1
	JE   LBB4_922

LBB4_920:
	LONG $0x00113c80 // cmp    byte [rcx + rdx], 0
	LONG $0xc86ff9c5 // vmovdqa    xmm1, xmm0
	JNE  LBB4_919
	LONG $0xc9eff1c5 // vpxor    xmm1, xmm1, xmm1
	JMP  LBB4_919

LBB4_157:
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JLE  LBB4_262
	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
	JE   LBB4_381
	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
	JE   LBB4_384
	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_538
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_541

LBB4_164:
	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
	JE   LBB4_387
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_168
	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_933
	LONG $0x00148d49         // lea    rdx, [r8 + rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_933

LBB4_168:
	WORD $0xd231 // xor    edx, edx

LBB4_936:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_938

LBB4_937:
	LONG $0x00913c83             // cmp    dword [rcx + 4*rdx], 0
	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
	LONG $0x01c28348             // add    rdx, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB4_937

LBB4_938:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB4_1351

LBB4_939:
	LONG $0x00913c83               // cmp    dword [rcx + 4*rdx], 0
	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
	LONG $0x04917c83; BYTE $0x00   // cmp    dword [rcx + 4*rdx + 4], 0
	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
	LONG $0x08917c83; BYTE $0x00   // cmp    dword [rcx + 4*rdx + 8], 0
	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
	LONG $0x0c917c83; BYTE $0x00   // cmp    dword [rcx + 4*rdx + 12], 0
	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
	LONG $0x04c28348               // add    rdx, 4
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_939
	JMP  LBB4_1351

LBB4_169:
	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
	JE   LBB4_390
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_173
	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_940
	LONG $0x00148d49         // lea    rdx, [r8 + rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_940

LBB4_173:
	WORD $0xd231 // xor    edx, edx

LBB4_943:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	WORD $0x01a8                 // test    al, 1
	JE   LBB4_945
	LONG $0x0410fbc5; BYTE $0xd1 // vmovsd    xmm0, qword [rcx + 8*rdx]
	WORD $0xff31                 // xor    edi, edi
	LONG $0xc9eff1c5             // vpxor    xmm1, xmm1, xmm1
	LONG $0xc82ef9c5             // vucomisd    xmm1, xmm0
	LONG $0x4554f9c5; BYTE $0x30 // vandpd    xmm0, xmm0, oword 48[rbp] /* [rip + .LCPI4_2] */
	LONG $0x4d12fbc5; BYTE $0x08 // vmovddup    xmm1, qword 8[rbp] /* [rip + .LCPI4_1] */
	LONG $0xc056f1c5             // vorpd    xmm0, xmm1, xmm0
	LONG $0xd82cfbc5             // vcvttsd2si    ebx, xmm0
	WORD $0x440f; BYTE $0xdf     // cmove    ebx, edi
	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
	LONG $0x01ca8348             // or    rdx, 1

LBB4_945:
	WORD $0x0148; BYTE $0xc6     // add    rsi, rax
	JE   LBB4_1351
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc057f9c5             // vxorpd    xmm0, xmm0, xmm0
	LONG $0x4d28f9c5; BYTE $0x30 // vmovapd    xmm1, oword 48[rbp] /* [rip + .LCPI4_2] */
	LONG $0x5512fbc5; BYTE $0x08 // vmovddup    xmm2, qword 8[rbp] /* [rip + .LCPI4_1] */

LBB4_947:
	LONG $0x1c10fbc5; BYTE $0xd1   // vmovsd    xmm3, qword [rcx + 8*rdx]
	LONG $0xc32ef9c5               // vucomisd    xmm0, xmm3
	LONG $0xd954e1c5               // vandpd    xmm3, xmm3, xmm1
	LONG $0xdb56e9c5               // vorpd    xmm3, xmm2, xmm3
	LONG $0xfb2cfbc5               // vcvttsd2si    edi, xmm3
	WORD $0x440f; BYTE $0xfe       // cmove    edi, esi
	LONG $0x103c8841               // mov    byte [r8 + rdx], dil
	LONG $0x5c10fbc5; WORD $0x08d1 // vmovsd    xmm3, qword [rcx + 8*rdx + 8]
	LONG $0xc32ef9c5               // vucomisd    xmm0, xmm3
	LONG $0xd954e1c5               // vandpd    xmm3, xmm3, xmm1
	LONG $0xdb56e9c5               // vorpd    xmm3, xmm2, xmm3
	LONG $0xfb2cfbc5               // vcvttsd2si    edi, xmm3
	WORD $0x440f; BYTE $0xfe       // cmove    edi, esi
	LONG $0x107c8841; BYTE $0x01   // mov    byte [r8 + rdx + 1], dil
	LONG $0x02c28348               // add    rdx, 2
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_947
	JMP  LBB4_1351

LBB4_174:
	WORD $0xfe83; BYTE $0x02                   // cmp    esi, 2
	JE   LBB4_393
	WORD $0xfe83; BYTE $0x03                   // cmp    esi, 3
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB4_178
	LONG $0x11148d4a                           // lea    rdx, [rcx + r10]
	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
	JBE  LBB4_948
	LONG $0x10148d4b                           // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca                   // cmp    rdx, rcx
	JBE  LBB4_948

LBB4_178:
	WORD $0x3145; BYTE $0xdb // xor    r11d, r11d

LBB4_951:
	WORD $0x894c; BYTE $0xde     // mov    rsi, r11
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c2f641             // test    r10b, 1
	JE   LBB4_953
	LONG $0x193c8a42             // mov    dil, byte [rcx + r11]
	WORD $0x8440; BYTE $0xff     // test    dil, dil
	LONG $0xd1950f41             // setne    r9b
	WORD $0xf641; BYTE $0xd9     // neg    r9b
	WORD $0x8440; BYTE $0xff     // test    dil, dil
	LONG $0xc9b60f45             // movzx    r9d, r9b
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xf94e0f41             // cmovle    edi, r9d
	LONG $0x183c8843             // mov    byte [r8 + r11], dil
	LONG $0x01cb8349             // or    r11, 1

LBB4_953:
	WORD $0x014c; BYTE $0xd6     // add    rsi, r10
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_955:
	LONG $0x04b60f42; BYTE $0x19   // movzx    eax, byte [rcx + r11]
	WORD $0xc084                   // test    al, al
	WORD $0x950f; BYTE $0xd2       // setne    dl
	WORD $0xdaf6                   // neg    dl
	WORD $0xc084                   // test    al, al
	WORD $0xb60f; BYTE $0xc2       // movzx    eax, dl
	WORD $0x4f0f; BYTE $0xc6       // cmovg    eax, esi
	LONG $0x18048843               // mov    byte [r8 + r11], al
	LONG $0x44b60f42; WORD $0x0119 // movzx    eax, byte [rcx + r11 + 1]
	WORD $0xc084                   // test    al, al
	WORD $0x950f; BYTE $0xd2       // setne    dl
	WORD $0xdaf6                   // neg    dl
	WORD $0xc084                   // test    al, al
	WORD $0xb60f; BYTE $0xc2       // movzx    eax, dl
	WORD $0x4f0f; BYTE $0xc6       // cmovg    eax, esi
	LONG $0x18448843; BYTE $0x01   // mov    byte [r8 + r11 + 1], al
	LONG $0x02c38349               // add    r11, 2
	WORD $0x394d; BYTE $0xda       // cmp    r10, r11
	JNE  LBB4_955
	JMP  LBB4_1351

LBB4_179:
	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
	JE   LBB4_396
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_183
	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_956
	LONG $0x00148d49         // lea    rdx, [r8 + rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_956

LBB4_183:
	WORD $0xd231 // xor    edx, edx

LBB4_959:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_961

LBB4_960:
	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
	LONG $0x01c28348             // add    rdx, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB4_960

LBB4_961:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB4_1351

LBB4_962:
	LONG $0xd13c8348; BYTE $0x00   // cmp    qword [rcx + 8*rdx], 0
	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
	LONG $0xd17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rdx + 8], 0
	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
	LONG $0xd17c8348; WORD $0x0010 // cmp    qword [rcx + 8*rdx + 16], 0
	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
	LONG $0xd17c8348; WORD $0x0018 // cmp    qword [rcx + 8*rdx + 24], 0
	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
	LONG $0x04c28348               // add    rdx, 4
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_962
	JMP  LBB4_1351

LBB4_184:
	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
	JE   LBB4_399
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JB   LBB4_188
	LONG $0x41148d48         // lea    rdx, [rcx + 2*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_963
	LONG $0x00148d49         // lea    rdx, [r8 + rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_963

LBB4_188:
	WORD $0xd231 // xor    edx, edx

LBB4_966:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_968

LBB4_967:
	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
	LONG $0x01c28348             // add    rdx, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB4_967

LBB4_968:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB4_1351

LBB4_969:
	LONG $0x513c8366; BYTE $0x00   // cmp    word [rcx + 2*rdx], 0
	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
	LONG $0x517c8366; WORD $0x0002 // cmp    word [rcx + 2*rdx + 2], 0
	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
	LONG $0x517c8366; WORD $0x0004 // cmp    word [rcx + 2*rdx + 4], 0
	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
	LONG $0x517c8366; WORD $0x0006 // cmp    word [rcx + 2*rdx + 6], 0
	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
	LONG $0x04c28348               // add    rdx, 4
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_969
	JMP  LBB4_1351

LBB4_189:
	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
	JE   LBB4_402
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JB   LBB4_193
	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_970
	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_970

LBB4_193:
	WORD $0x3145; BYTE $0xdb // xor    r11d, r11d

LBB4_973:
	WORD $0x894c; BYTE $0xde     // mov    rsi, r11
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c2f641             // test    r10b, 1
	JE   LBB4_975
	LONG $0x3cb70f42; BYTE $0x59 // movzx    edi, word [rcx + 2*r11]
	WORD $0x8566; BYTE $0xff     // test    di, di
	LONG $0xd1950f41             // setne    r9b
	WORD $0xf641; BYTE $0xd9     // neg    r9b
	WORD $0x8566; BYTE $0xff     // test    di, di
	LONG $0xc9b60f45             // movzx    r9d, r9b
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xf94e0f41             // cmovle    edi, r9d
	LONG $0x183c8843             // mov    byte [r8 + r11], dil
	LONG $0x01cb8349             // or    r11, 1

LBB4_975:
	WORD $0x014c; BYTE $0xd6     // add    rsi, r10
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_977:
	LONG $0x3cb70f42; BYTE $0x59   // movzx    edi, word [rcx + 2*r11]
	WORD $0x8566; BYTE $0xff       // test    di, di
	WORD $0x950f; BYTE $0xd0       // setne    al
	WORD $0xd8f6                   // neg    al
	WORD $0x8566; BYTE $0xff       // test    di, di
	WORD $0xb60f; BYTE $0xc0       // movzx    eax, al
	WORD $0x4f0f; BYTE $0xc6       // cmovg    eax, esi
	LONG $0x18048843               // mov    byte [r8 + r11], al
	LONG $0x44b70f42; WORD $0x0259 // movzx    eax, word [rcx + 2*r11 + 2]
	WORD $0x8566; BYTE $0xc0       // test    ax, ax
	WORD $0x950f; BYTE $0xd2       // setne    dl
	WORD $0xdaf6                   // neg    dl
	WORD $0x8566; BYTE $0xc0       // test    ax, ax
	WORD $0xb60f; BYTE $0xc2       // movzx    eax, dl
	WORD $0x4f0f; BYTE $0xc6       // cmovg    eax, esi
	LONG $0x18448843; BYTE $0x01   // mov    byte [r8 + r11 + 1], al
	LONG $0x02c38349               // add    r11, 2
	WORD $0x394d; BYTE $0xda       // cmp    r10, r11
	JNE  LBB4_977
	JMP  LBB4_1351

LBB4_194:
	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
	JE   LBB4_405
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_198
	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_978
	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_978

LBB4_198:
	WORD $0x3145; BYTE $0xdb // xor    r11d, r11d

LBB4_981:
	WORD $0x894c; BYTE $0xde     // mov    rsi, r11
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c2f641             // test    r10b, 1
	JE   LBB4_983
	LONG $0xd93c8b4a             // mov    rdi, qword [rcx + 8*r11]
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	LONG $0xd1950f41             // setne    r9b
	WORD $0xf641; BYTE $0xd9     // neg    r9b
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	LONG $0xc9b60f45             // movzx    r9d, r9b
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xf94e0f41             // cmovle    edi, r9d
	LONG $0x183c8843             // mov    byte [r8 + r11], dil
	LONG $0x01cb8349             // or    r11, 1

LBB4_983:
	WORD $0x014c; BYTE $0xd6     // add    rsi, r10
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_985:
	LONG $0xd93c8b4a             // mov    rdi, qword [rcx + 8*r11]
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	WORD $0x950f; BYTE $0xd0     // setne    al
	WORD $0xd8f6                 // neg    al
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	WORD $0xb60f; BYTE $0xc0     // movzx    eax, al
	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
	LONG $0x18048843             // mov    byte [r8 + r11], al
	LONG $0xd9448b4a; BYTE $0x08 // mov    rax, qword [rcx + 8*r11 + 8]
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	WORD $0x950f; BYTE $0xd2     // setne    dl
	WORD $0xdaf6                 // neg    dl
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
	LONG $0x18448843; BYTE $0x01 // mov    byte [r8 + r11 + 1], al
	LONG $0x02c38349             // add    r11, 2
	WORD $0x394d; BYTE $0xda     // cmp    r10, r11
	JNE  LBB4_985
	JMP  LBB4_1351

LBB4_199:
	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
	JE   LBB4_408
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_203
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_986
	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_986

LBB4_203:
	WORD $0xd231 // xor    edx, edx

LBB4_989:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c2f641             // test    r10b, 1
	JE   LBB4_991
	LONG $0x046ef9c5; BYTE $0x91 // vmovd    xmm0, dword [rcx + 4*rdx]
	LONG $0xc77ef9c5             // vmovd    edi, xmm0
	WORD $0xff85                 // test    edi, edi
	LONG $0xd7990f40             // setns    dil
	WORD $0x0040; BYTE $0xff     // add    dil, dil
	LONG $0xffc78040             // add    dil, -1
	WORD $0x3145; BYTE $0xc9     // xor    r9d, r9d
	LONG $0xc9eff1c5             // vpxor    xmm1, xmm1, xmm1
	LONG $0xc82ef8c5             // vucomiss    xmm1, xmm0
	LONG $0xffb60f40             // movzx    edi, dil
	LONG $0xf9440f41             // cmove    edi, r9d
	LONG $0x103c8841             // mov    byte [r8 + rdx], dil
	LONG $0x01ca8348             // or    rdx, 1

LBB4_991:
	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
	JE   LBB4_1351
	WORD $0xf631             // xor    esi, esi
	LONG $0xc057f8c5         // vxorps    xmm0, xmm0, xmm0

LBB4_993:
	LONG $0x0c6ef9c5; BYTE $0x91   // vmovd    xmm1, dword [rcx + 4*rdx]
	LONG $0xcf7ef9c5               // vmovd    edi, xmm1
	WORD $0xff85                   // test    edi, edi
	WORD $0x990f; BYTE $0xd0       // setns    al
	WORD $0xc000                   // add    al, al
	WORD $0xff04                   // add    al, -1
	LONG $0xc12ef8c5               // vucomiss    xmm0, xmm1
	WORD $0xb60f; BYTE $0xc0       // movzx    eax, al
	WORD $0x440f; BYTE $0xc6       // cmove    eax, esi
	LONG $0x10048841               // mov    byte [r8 + rdx], al
	LONG $0x4c6ef9c5; WORD $0x0491 // vmovd    xmm1, dword [rcx + 4*rdx + 4]
	LONG $0xc87ef9c5               // vmovd    eax, xmm1
	WORD $0xc085                   // test    eax, eax
	WORD $0x990f; BYTE $0xd0       // setns    al
	WORD $0xc000                   // add    al, al
	WORD $0xff04                   // add    al, -1
	LONG $0xc12ef8c5               // vucomiss    xmm0, xmm1
	WORD $0xb60f; BYTE $0xc0       // movzx    eax, al
	WORD $0x440f; BYTE $0xc6       // cmove    eax, esi
	LONG $0x10448841; BYTE $0x01   // mov    byte [r8 + rdx + 1], al
	LONG $0x02c28348               // add    rdx, 2
	WORD $0x3949; BYTE $0xd2       // cmp    r10, rdx
	JNE  LBB4_993
	JMP  LBB4_1351

LBB4_204:
	WORD $0xfe83; BYTE $0x02                   // cmp    esi, 2
	JE   LBB4_411
	WORD $0xfe83; BYTE $0x03                   // cmp    esi, 3
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8                   // mov    eax, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB4_208
	LONG $0x01148d48                           // lea    rdx, [rcx + rax]
	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
	JBE  LBB4_994
	LONG $0x00148d49                           // lea    rdx, [r8 + rax]
	WORD $0x3948; BYTE $0xca                   // cmp    rdx, rcx
	JBE  LBB4_994

LBB4_208:
	WORD $0xd231 // xor    edx, edx

LBB4_997:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_999

LBB4_998:
	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
	LONG $0x01c28348             // add    rdx, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB4_998

LBB4_999:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB4_1351

LBB4_1000:
	LONG $0x00113c80               // cmp    byte [rcx + rdx], 0
	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
	LONG $0x02117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 2], 0
	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
	LONG $0x03117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 3], 0
	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
	LONG $0x04c28348               // add    rdx, 4
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_1000
	JMP  LBB4_1351

LBB4_209:
	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
	JE   LBB4_414
	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_213
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1001
	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1001

LBB4_213:
	WORD $0x3145; BYTE $0xdb // xor    r11d, r11d

LBB4_1004:
	WORD $0x894c; BYTE $0xde     // mov    rsi, r11
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c2f641             // test    r10b, 1
	JE   LBB4_1006
	LONG $0x993c8b42             // mov    edi, dword [rcx + 4*r11]
	WORD $0xff85                 // test    edi, edi
	LONG $0xd1950f41             // setne    r9b
	WORD $0xf641; BYTE $0xd9     // neg    r9b
	WORD $0xff85                 // test    edi, edi
	LONG $0xc9b60f45             // movzx    r9d, r9b
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xf94e0f41             // cmovle    edi, r9d
	LONG $0x183c8843             // mov    byte [r8 + r11], dil
	LONG $0x01cb8349             // or    r11, 1

LBB4_1006:
	WORD $0x014c; BYTE $0xd6     // add    rsi, r10
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_1008:
	LONG $0x993c8b42             // mov    edi, dword [rcx + 4*r11]
	WORD $0xff85                 // test    edi, edi
	WORD $0x950f; BYTE $0xd0     // setne    al
	WORD $0xd8f6                 // neg    al
	WORD $0xff85                 // test    edi, edi
	WORD $0xb60f; BYTE $0xc0     // movzx    eax, al
	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
	LONG $0x18048843             // mov    byte [r8 + r11], al
	LONG $0x99448b42; BYTE $0x04 // mov    eax, dword [rcx + 4*r11 + 4]
	WORD $0xc085                 // test    eax, eax
	WORD $0x950f; BYTE $0xd2     // setne    dl
	WORD $0xdaf6                 // neg    dl
	WORD $0xc085                 // test    eax, eax
	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
	LONG $0x18448843; BYTE $0x01 // mov    byte [r8 + r11 + 1], al
	LONG $0x02c38349             // add    r11, 2
	WORD $0x394d; BYTE $0xda     // cmp    r10, r11
	JNE  LBB4_1008
	JMP  LBB4_1351

LBB4_214:
	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
	JE   LBB4_417
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_577
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_580

LBB4_219:
	WORD $0xfe83; BYTE $0x07               // cmp    esi, 7
	JE   LBB4_420
	WORD $0xfe83; BYTE $0x08               // cmp    esi, 8
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9               // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca               // mov    r10d, r9d
	QUAD $0x000000000000bb49; WORD $0x8000 // mov    r11, -9223372036854775808
	LONG $0x04f98341                       // cmp    r9d, 4
	JAE  LBB4_581
	WORD $0xf631                           // xor    esi, esi
	JMP  LBB4_1286

LBB4_224:
	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
	JE   LBB4_423
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_228
	LONG $0x19148d4a         // lea    rdx, [rcx + r11]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1009
	LONG $0xd8148d4b         // lea    rdx, [r8 + 8*r11]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1009

LBB4_228:
	WORD $0xd231 // xor    edx, edx

LBB4_1012:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c3f641             // test    r11b, 1
	JE   LBB4_1014
	LONG $0x110c8a44             // mov    r9b, byte [rcx + rdx]
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
	LONG $0xd2950f41             // setne    r10b
	WORD $0xf749; BYTE $0xda     // neg    r10
	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xfa4e0f49             // cmovle    rdi, r10
	LONG $0xd03c8949             // mov    qword [r8 + 8*rdx], rdi
	LONG $0x01ca8348             // or    rdx, 1

LBB4_1014:
	WORD $0x014c; BYTE $0xde     // add    rsi, r11
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_1016:
	LONG $0x1104b60f             // movzx    eax, byte [rcx + rdx]
	WORD $0xff31                 // xor    edi, edi
	WORD $0xc084                 // test    al, al
	LONG $0xd7950f40             // setne    dil
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	WORD $0xc084                 // test    al, al
	LONG $0xfe4f0f48             // cmovg    rdi, rsi
	LONG $0xd03c8949             // mov    qword [r8 + 8*rdx], rdi
	LONG $0x1144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdx + 1]
	WORD $0xff31                 // xor    edi, edi
	WORD $0xc084                 // test    al, al
	LONG $0xd7950f40             // setne    dil
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	WORD $0xc084                 // test    al, al
	LONG $0xfe4f0f48             // cmovg    rdi, rsi
	LONG $0xd07c8949; BYTE $0x08 // mov    qword [r8 + 8*rdx + 8], rdi
	LONG $0x02c28348             // add    rdx, 2
	WORD $0x3949; BYTE $0xd3     // cmp    r11, rdx
	JNE  LBB4_1016
	JMP  LBB4_1351

LBB4_229:
	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
	JE   LBB4_426
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_233
	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1017
	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1017

LBB4_233:
	WORD $0xd231 // xor    edx, edx

LBB4_1020:
	WORD $0x8949; BYTE $0xd1 // mov    r9, rdx
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x0149; BYTE $0xc1 // add    r9, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_1022

LBB4_1021:
	WORD $0xf631                 // xor    esi, esi
	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0xd0348949             // mov    qword [r8 + 8*rdx], rsi
	LONG $0x01c28348             // add    rdx, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB4_1021

LBB4_1022:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB4_1351

LBB4_1023:
	WORD $0xf631                   // xor    esi, esi
	LONG $0xd13c8348; BYTE $0x00   // cmp    qword [rcx + 8*rdx], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0xd0348949               // mov    qword [r8 + 8*rdx], rsi
	WORD $0xf631                   // xor    esi, esi
	LONG $0xd17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rdx + 8], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0xd0748949; BYTE $0x08   // mov    qword [r8 + 8*rdx + 8], rsi
	WORD $0xf631                   // xor    esi, esi
	LONG $0xd17c8348; WORD $0x0010 // cmp    qword [rcx + 8*rdx + 16], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0xd0748949; BYTE $0x10   // mov    qword [r8 + 8*rdx + 16], rsi
	WORD $0xf631                   // xor    esi, esi
	LONG $0xd17c8348; WORD $0x0018 // cmp    qword [rcx + 8*rdx + 24], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0xd0748949; BYTE $0x18   // mov    qword [r8 + 8*rdx + 24], rsi
	LONG $0x04c28348               // add    rdx, 4
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_1023
	JMP  LBB4_1351

LBB4_234:
	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
	JE   LBB4_429
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_590
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_593

LBB4_239:
	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
	JE   LBB4_432
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_594
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_597

LBB4_244:
	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
	JE   LBB4_435
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_248
	LONG $0xd9148d4a         // lea    rdx, [rcx + 8*r11]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1024
	LONG $0xd8148d4b         // lea    rdx, [r8 + 8*r11]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1024

LBB4_248:
	WORD $0xd231 // xor    edx, edx

LBB4_1027:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c3f641             // test    r11b, 1
	JE   LBB4_1029
	LONG $0xd10c8b4c             // mov    r9, qword [rcx + 8*rdx]
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	LONG $0xd2950f41             // setne    r10b
	WORD $0xf749; BYTE $0xda     // neg    r10
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xfa4e0f49             // cmovle    rdi, r10
	LONG $0xd03c8949             // mov    qword [r8 + 8*rdx], rdi
	LONG $0x01ca8348             // or    rdx, 1

LBB4_1029:
	WORD $0x014c; BYTE $0xde     // add    rsi, r11
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_1031:
	LONG $0xd13c8b48             // mov    rdi, qword [rcx + 8*rdx]
	WORD $0xc031                 // xor    eax, eax
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	WORD $0x950f; BYTE $0xd0     // setne    al
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	LONG $0xc64f0f48             // cmovg    rax, rsi
	LONG $0xd0048949             // mov    qword [r8 + 8*rdx], rax
	LONG $0xd1448b48; BYTE $0x08 // mov    rax, qword [rcx + 8*rdx + 8]
	WORD $0xff31                 // xor    edi, edi
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	LONG $0xd7950f40             // setne    dil
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	LONG $0xfe4f0f48             // cmovg    rdi, rsi
	LONG $0xd07c8949; BYTE $0x08 // mov    qword [r8 + 8*rdx + 8], rdi
	LONG $0x02c28348             // add    rdx, 2
	WORD $0x3949; BYTE $0xd3     // cmp    r11, rdx
	JNE  LBB4_1031
	JMP  LBB4_1351

LBB4_249:
	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
	JE   LBB4_438
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x01f98341         // cmp    r9d, 1
	JNE  LBB4_602
	WORD $0xc031             // xor    eax, eax
	JMP  LBB4_254

LBB4_257:
	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
	JE   LBB4_441
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_261
	LONG $0x01148d48         // lea    rdx, [rcx + rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1032
	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1032

LBB4_261:
	WORD $0xd231 // xor    edx, edx

LBB4_1035:
	WORD $0x8949; BYTE $0xd1 // mov    r9, rdx
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x0149; BYTE $0xc1 // add    r9, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_1037

LBB4_1036:
	WORD $0xf631     // xor    esi, esi
	LONG $0x00113c80 // cmp    byte [rcx + rdx], 0
	LONG $0xd6950f40 // setne    sil
	LONG $0xd0348949 // mov    qword [r8 + 8*rdx], rsi
	LONG $0x01c28348 // add    rdx, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB4_1036

LBB4_1037:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB4_1351

LBB4_1038:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0xd0348949             // mov    qword [r8 + 8*rdx], rsi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x01117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 1], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0xd0748949; BYTE $0x08 // mov    qword [r8 + 8*rdx + 8], rsi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x02117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 2], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0xd0748949; BYTE $0x10 // mov    qword [r8 + 8*rdx + 16], rsi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x03117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 3], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0xd0748949; BYTE $0x18 // mov    qword [r8 + 8*rdx + 24], rsi
	LONG $0x04c28348             // add    rdx, 4
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_1038
	JMP  LBB4_1351

LBB4_262:
	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
	JE   LBB4_444
	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
	JNE  LBB4_1351
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_613
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_616

LBB4_267:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB4_618
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_621

LBB4_270:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB4_622
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_625

LBB4_273:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	WORD $0xd231             // xor    edx, edx
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_626
	WORD $0xf631             // xor    esi, esi
	JMP  LBB4_629

LBB4_276:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	WORD $0xd231             // xor    edx, edx
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_631
	WORD $0xf631             // xor    esi, esi
	JMP  LBB4_634

LBB4_279:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JB   LBB4_281
	LONG $0x19148d4a         // lea    rdx, [rcx + r11]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1039
	LONG $0x58148d4b         // lea    rdx, [r8 + 2*r11]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1039

LBB4_281:
	WORD $0xd231 // xor    edx, edx

LBB4_1042:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c3f641             // test    r11b, 1
	JE   LBB4_1044
	LONG $0x110c8a44             // mov    r9b, byte [rcx + rdx]
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
	LONG $0xd2950f41             // setne    r10b
	WORD $0xf741; BYTE $0xda     // neg    r10d
	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xfa4e0f41             // cmovle    edi, r10d
	LONG $0x3c894166; BYTE $0x50 // mov    word [r8 + 2*rdx], di
	LONG $0x01ca8348             // or    rdx, 1

LBB4_1044:
	WORD $0x014c; BYTE $0xde     // add    rsi, r11
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_1046:
	LONG $0x1104b60f               // movzx    eax, byte [rcx + rdx]
	WORD $0xff31                   // xor    edi, edi
	WORD $0xc084                   // test    al, al
	LONG $0xd7950f40               // setne    dil
	WORD $0xdff7                   // neg    edi
	WORD $0xc084                   // test    al, al
	WORD $0x4f0f; BYTE $0xfe       // cmovg    edi, esi
	LONG $0x3c894166; BYTE $0x50   // mov    word [r8 + 2*rdx], di
	LONG $0x1144b60f; BYTE $0x01   // movzx    eax, byte [rcx + rdx + 1]
	WORD $0xff31                   // xor    edi, edi
	WORD $0xc084                   // test    al, al
	LONG $0xd7950f40               // setne    dil
	WORD $0xdff7                   // neg    edi
	WORD $0xc084                   // test    al, al
	WORD $0x4f0f; BYTE $0xfe       // cmovg    edi, esi
	LONG $0x7c894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], di
	LONG $0x02c28348               // add    rdx, 2
	WORD $0x3949; BYTE $0xd3       // cmp    r11, rdx
	JNE  LBB4_1046
	JMP  LBB4_1351

LBB4_282:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JB   LBB4_284
	LONG $0x19148d4a         // lea    rdx, [rcx + r11]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1047
	LONG $0x58148d4b         // lea    rdx, [r8 + 2*r11]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1047

LBB4_284:
	WORD $0xd231 // xor    edx, edx

LBB4_1050:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c3f641             // test    r11b, 1
	JE   LBB4_1052
	LONG $0x110c8a44             // mov    r9b, byte [rcx + rdx]
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
	LONG $0xd2950f41             // setne    r10b
	WORD $0xf741; BYTE $0xda     // neg    r10d
	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xfa4e0f41             // cmovle    edi, r10d
	LONG $0x3c894166; BYTE $0x50 // mov    word [r8 + 2*rdx], di
	LONG $0x01ca8348             // or    rdx, 1

LBB4_1052:
	WORD $0x014c; BYTE $0xde     // add    rsi, r11
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_1054:
	LONG $0x1104b60f               // movzx    eax, byte [rcx + rdx]
	WORD $0xff31                   // xor    edi, edi
	WORD $0xc084                   // test    al, al
	LONG $0xd7950f40               // setne    dil
	WORD $0xdff7                   // neg    edi
	WORD $0xc084                   // test    al, al
	WORD $0x4f0f; BYTE $0xfe       // cmovg    edi, esi
	LONG $0x3c894166; BYTE $0x50   // mov    word [r8 + 2*rdx], di
	LONG $0x1144b60f; BYTE $0x01   // movzx    eax, byte [rcx + rdx + 1]
	WORD $0xff31                   // xor    edi, edi
	WORD $0xc084                   // test    al, al
	LONG $0xd7950f40               // setne    dil
	WORD $0xdff7                   // neg    edi
	WORD $0xc084                   // test    al, al
	WORD $0x4f0f; BYTE $0xfe       // cmovg    edi, esi
	LONG $0x7c894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], di
	LONG $0x02c28348               // add    rdx, 2
	WORD $0x3949; BYTE $0xd3       // cmp    r11, rdx
	JNE  LBB4_1054
	JMP  LBB4_1351

LBB4_285:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_642
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_645

LBB4_288:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_646
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_649

LBB4_291:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_293
	LONG $0x41148d48         // lea    rdx, [rcx + 2*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1055
	LONG $0x40148d49         // lea    rdx, [r8 + 2*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1055

LBB4_293:
	WORD $0xd231 // xor    edx, edx

LBB4_1321:
	WORD $0x8949; BYTE $0xd1 // mov    r9, rdx
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x0149; BYTE $0xc1 // add    r9, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_1323

LBB4_1322:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x34894166; BYTE $0x50 // mov    word [r8 + 2*rdx], si
	LONG $0x01c28348             // add    rdx, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB4_1322

LBB4_1323:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB4_1351

LBB4_1324:
	WORD $0xf631                   // xor    esi, esi
	LONG $0x513c8366; BYTE $0x00   // cmp    word [rcx + 2*rdx], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x34894166; BYTE $0x50   // mov    word [r8 + 2*rdx], si
	WORD $0xf631                   // xor    esi, esi
	LONG $0x517c8366; WORD $0x0002 // cmp    word [rcx + 2*rdx + 2], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x74894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], si
	WORD $0xf631                   // xor    esi, esi
	LONG $0x517c8366; WORD $0x0004 // cmp    word [rcx + 2*rdx + 4], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x74894166; WORD $0x0450 // mov    word [r8 + 2*rdx + 4], si
	WORD $0xf631                   // xor    esi, esi
	LONG $0x517c8366; WORD $0x0006 // cmp    word [rcx + 2*rdx + 6], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x74894166; WORD $0x0650 // mov    word [r8 + 2*rdx + 6], si
	LONG $0x04c28348               // add    rdx, 4
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_1324
	JMP  LBB4_1351

LBB4_294:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_296
	LONG $0x41148d48         // lea    rdx, [rcx + 2*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1058
	LONG $0x40148d49         // lea    rdx, [r8 + 2*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1058

LBB4_296:
	WORD $0xd231 // xor    edx, edx

LBB4_1329:
	WORD $0x8949; BYTE $0xd1 // mov    r9, rdx
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x0149; BYTE $0xc1 // add    r9, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_1331

LBB4_1330:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x34894166; BYTE $0x50 // mov    word [r8 + 2*rdx], si
	LONG $0x01c28348             // add    rdx, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB4_1330

LBB4_1331:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB4_1351

LBB4_1332:
	WORD $0xf631                   // xor    esi, esi
	LONG $0x513c8366; BYTE $0x00   // cmp    word [rcx + 2*rdx], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x34894166; BYTE $0x50   // mov    word [r8 + 2*rdx], si
	WORD $0xf631                   // xor    esi, esi
	LONG $0x517c8366; WORD $0x0002 // cmp    word [rcx + 2*rdx + 2], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x74894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], si
	WORD $0xf631                   // xor    esi, esi
	LONG $0x517c8366; WORD $0x0004 // cmp    word [rcx + 2*rdx + 4], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x74894166; WORD $0x0450 // mov    word [r8 + 2*rdx + 4], si
	WORD $0xf631                   // xor    esi, esi
	LONG $0x517c8366; WORD $0x0006 // cmp    word [rcx + 2*rdx + 6], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x74894166; WORD $0x0650 // mov    word [r8 + 2*rdx + 6], si
	LONG $0x04c28348               // add    rdx, 4
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_1332
	JMP  LBB4_1351

LBB4_297:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_299
	LONG $0x59148d4a         // lea    rdx, [rcx + 2*r11]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1061
	LONG $0x58148d4b         // lea    rdx, [r8 + 2*r11]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1061

LBB4_299:
	WORD $0xd231 // xor    edx, edx

LBB4_1337:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c3f641             // test    r11b, 1
	JE   LBB4_1339
	LONG $0x0cb70f44; BYTE $0x51 // movzx    r9d, word [rcx + 2*rdx]
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	LONG $0xc9854566             // test    r9w, r9w
	LONG $0xd2950f41             // setne    r10b
	WORD $0xf741; BYTE $0xda     // neg    r10d
	LONG $0xc9854566             // test    r9w, r9w
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xfa4e0f41             // cmovle    edi, r10d
	LONG $0x3c894166; BYTE $0x50 // mov    word [r8 + 2*rdx], di
	LONG $0x01ca8348             // or    rdx, 1

LBB4_1339:
	WORD $0x014c; BYTE $0xde     // add    rsi, r11
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_1341:
	LONG $0x513cb70f               // movzx    edi, word [rcx + 2*rdx]
	WORD $0xc031                   // xor    eax, eax
	WORD $0x8566; BYTE $0xff       // test    di, di
	WORD $0x950f; BYTE $0xd0       // setne    al
	WORD $0xd8f7                   // neg    eax
	WORD $0x8566; BYTE $0xff       // test    di, di
	WORD $0x4f0f; BYTE $0xc6       // cmovg    eax, esi
	LONG $0x04894166; BYTE $0x50   // mov    word [r8 + 2*rdx], ax
	LONG $0x5144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rdx + 2]
	WORD $0xff31                   // xor    edi, edi
	WORD $0x8566; BYTE $0xc0       // test    ax, ax
	LONG $0xd7950f40               // setne    dil
	WORD $0xdff7                   // neg    edi
	WORD $0x8566; BYTE $0xc0       // test    ax, ax
	WORD $0x4f0f; BYTE $0xfe       // cmovg    edi, esi
	LONG $0x7c894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], di
	LONG $0x02c28348               // add    rdx, 2
	WORD $0x3949; BYTE $0xd3       // cmp    r11, rdx
	JNE  LBB4_1341
	JMP  LBB4_1351

LBB4_300:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_302
	LONG $0x59148d4a         // lea    rdx, [rcx + 2*r11]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1064
	LONG $0x58148d4b         // lea    rdx, [r8 + 2*r11]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1064

LBB4_302:
	WORD $0xd231 // xor    edx, edx

LBB4_1346:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c3f641             // test    r11b, 1
	JE   LBB4_1348
	LONG $0x0cb70f44; BYTE $0x51 // movzx    r9d, word [rcx + 2*rdx]
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	LONG $0xc9854566             // test    r9w, r9w
	LONG $0xd2950f41             // setne    r10b
	WORD $0xf741; BYTE $0xda     // neg    r10d
	LONG $0xc9854566             // test    r9w, r9w
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xfa4e0f41             // cmovle    edi, r10d
	LONG $0x3c894166; BYTE $0x50 // mov    word [r8 + 2*rdx], di
	LONG $0x01ca8348             // or    rdx, 1

LBB4_1348:
	WORD $0x014c; BYTE $0xde     // add    rsi, r11
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_1350:
	LONG $0x513cb70f               // movzx    edi, word [rcx + 2*rdx]
	WORD $0xc031                   // xor    eax, eax
	WORD $0x8566; BYTE $0xff       // test    di, di
	WORD $0x950f; BYTE $0xd0       // setne    al
	WORD $0xd8f7                   // neg    eax
	WORD $0x8566; BYTE $0xff       // test    di, di
	WORD $0x4f0f; BYTE $0xc6       // cmovg    eax, esi
	LONG $0x04894166; BYTE $0x50   // mov    word [r8 + 2*rdx], ax
	LONG $0x5144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rdx + 2]
	WORD $0xff31                   // xor    edi, edi
	WORD $0x8566; BYTE $0xc0       // test    ax, ax
	LONG $0xd7950f40               // setne    dil
	WORD $0xdff7                   // neg    edi
	WORD $0x8566; BYTE $0xc0       // test    ax, ax
	WORD $0x4f0f; BYTE $0xfe       // cmovg    edi, esi
	LONG $0x7c894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], di
	LONG $0x02c28348               // add    rdx, 2
	WORD $0x3949; BYTE $0xd3       // cmp    r11, rdx
	JNE  LBB4_1350
	JMP  LBB4_1351

LBB4_303:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_662
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_665

LBB4_306:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_667
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_670

LBB4_309:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB4_672
	WORD $0xf631             // xor    esi, esi
	JMP  LBB4_675

LBB4_312:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB4_677
	WORD $0xf631             // xor    esi, esi
	JMP  LBB4_680

LBB4_315:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JB   LBB4_317
	LONG $0x01148d48         // lea    rdx, [rcx + rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1067
	LONG $0x40148d49         // lea    rdx, [r8 + 2*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1067

LBB4_317:
	WORD $0xd231 // xor    edx, edx

LBB4_1070:
	WORD $0x8949; BYTE $0xd1 // mov    r9, rdx
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x0149; BYTE $0xc1 // add    r9, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_1072

LBB4_1071:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x34894166; BYTE $0x50 // mov    word [r8 + 2*rdx], si
	LONG $0x01c28348             // add    rdx, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB4_1071

LBB4_1072:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB4_1351

LBB4_1073:
	WORD $0xf631                   // xor    esi, esi
	LONG $0x00113c80               // cmp    byte [rcx + rdx], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x34894166; BYTE $0x50   // mov    word [r8 + 2*rdx], si
	WORD $0xf631                   // xor    esi, esi
	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x74894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], si
	WORD $0xf631                   // xor    esi, esi
	LONG $0x02117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 2], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x74894166; WORD $0x0450 // mov    word [r8 + 2*rdx + 4], si
	WORD $0xf631                   // xor    esi, esi
	LONG $0x03117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 3], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x74894166; WORD $0x0650 // mov    word [r8 + 2*rdx + 6], si
	LONG $0x04c28348               // add    rdx, 4
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_1073
	JMP  LBB4_1351

LBB4_318:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JB   LBB4_320
	LONG $0x01148d48         // lea    rdx, [rcx + rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1074
	LONG $0x40148d49         // lea    rdx, [r8 + 2*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1074

LBB4_320:
	WORD $0xd231 // xor    edx, edx

LBB4_1077:
	WORD $0x8949; BYTE $0xd1 // mov    r9, rdx
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x0149; BYTE $0xc1 // add    r9, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_1079

LBB4_1078:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x34894166; BYTE $0x50 // mov    word [r8 + 2*rdx], si
	LONG $0x01c28348             // add    rdx, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB4_1078

LBB4_1079:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB4_1351

LBB4_1080:
	WORD $0xf631                   // xor    esi, esi
	LONG $0x00113c80               // cmp    byte [rcx + rdx], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x34894166; BYTE $0x50   // mov    word [r8 + 2*rdx], si
	WORD $0xf631                   // xor    esi, esi
	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x74894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], si
	WORD $0xf631                   // xor    esi, esi
	LONG $0x02117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 2], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x74894166; WORD $0x0450 // mov    word [r8 + 2*rdx + 4], si
	WORD $0xf631                   // xor    esi, esi
	LONG $0x03117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 3], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0x74894166; WORD $0x0650 // mov    word [r8 + 2*rdx + 6], si
	LONG $0x04c28348               // add    rdx, 4
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_1080
	JMP  LBB4_1351

LBB4_321:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB4_688
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_691

LBB4_324:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB4_693
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_696

LBB4_327:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_698
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_701

LBB4_330:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB4_702
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_705

LBB4_333:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x04f98341         // cmp    r9d, 4
	JAE  LBB4_709
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_1306

LBB4_336:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_712
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_715

LBB4_339:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_341
	LONG $0x19148d4a         // lea    rdx, [rcx + r11]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1081
	LONG $0xd8148d4b         // lea    rdx, [r8 + 8*r11]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1081

LBB4_341:
	WORD $0xd231 // xor    edx, edx

LBB4_1084:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c3f641             // test    r11b, 1
	JE   LBB4_1086
	LONG $0x110c8a44             // mov    r9b, byte [rcx + rdx]
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
	LONG $0xd2950f41             // setne    r10b
	WORD $0xf749; BYTE $0xda     // neg    r10
	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xfa4e0f49             // cmovle    rdi, r10
	LONG $0xd03c8949             // mov    qword [r8 + 8*rdx], rdi
	LONG $0x01ca8348             // or    rdx, 1

LBB4_1086:
	WORD $0x014c; BYTE $0xde     // add    rsi, r11
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_1088:
	LONG $0x1104b60f             // movzx    eax, byte [rcx + rdx]
	WORD $0xff31                 // xor    edi, edi
	WORD $0xc084                 // test    al, al
	LONG $0xd7950f40             // setne    dil
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	WORD $0xc084                 // test    al, al
	LONG $0xfe4f0f48             // cmovg    rdi, rsi
	LONG $0xd03c8949             // mov    qword [r8 + 8*rdx], rdi
	LONG $0x1144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdx + 1]
	WORD $0xff31                 // xor    edi, edi
	WORD $0xc084                 // test    al, al
	LONG $0xd7950f40             // setne    dil
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	WORD $0xc084                 // test    al, al
	LONG $0xfe4f0f48             // cmovg    rdi, rsi
	LONG $0xd07c8949; BYTE $0x08 // mov    qword [r8 + 8*rdx + 8], rdi
	LONG $0x02c28348             // add    rdx, 2
	WORD $0x3949; BYTE $0xd3     // cmp    r11, rdx
	JNE  LBB4_1088
	JMP  LBB4_1351

LBB4_342:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_344
	LONG $0x01148d48         // lea    rdx, [rcx + rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1089
	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1089

LBB4_344:
	WORD $0xd231 // xor    edx, edx

LBB4_1092:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x01a8             // test    al, 1
	JE   LBB4_1269
	LONG $0x00113c80         // cmp    byte [rcx + rdx], 0
	JNE  LBB4_1265
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	JMP  LBB4_1266

LBB4_345:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_347
	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1095
	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1095

LBB4_347:
	WORD $0xd231 // xor    edx, edx

LBB4_1098:
	WORD $0x8949; BYTE $0xd1 // mov    r9, rdx
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x0149; BYTE $0xc1 // add    r9, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_1100

LBB4_1099:
	WORD $0xf631                 // xor    esi, esi
	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0xd0348949             // mov    qword [r8 + 8*rdx], rsi
	LONG $0x01c28348             // add    rdx, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB4_1099

LBB4_1100:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB4_1351

LBB4_1101:
	WORD $0xf631                   // xor    esi, esi
	LONG $0xd13c8348; BYTE $0x00   // cmp    qword [rcx + 8*rdx], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0xd0348949               // mov    qword [r8 + 8*rdx], rsi
	WORD $0xf631                   // xor    esi, esi
	LONG $0xd17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rdx + 8], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0xd0748949; BYTE $0x08   // mov    qword [r8 + 8*rdx + 8], rsi
	WORD $0xf631                   // xor    esi, esi
	LONG $0xd17c8348; WORD $0x0010 // cmp    qword [rcx + 8*rdx + 16], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0xd0748949; BYTE $0x10   // mov    qword [r8 + 8*rdx + 16], rsi
	WORD $0xf631                   // xor    esi, esi
	LONG $0xd17c8348; WORD $0x0018 // cmp    qword [rcx + 8*rdx + 24], 0
	LONG $0xd6950f40               // setne    sil
	LONG $0xd0748949; BYTE $0x18   // mov    qword [r8 + 8*rdx + 24], rsi
	LONG $0x04c28348               // add    rdx, 4
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_1101
	JMP  LBB4_1351

LBB4_348:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_728
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_731

LBB4_351:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_735
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_738

LBB4_354:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB4_739
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_742

LBB4_357:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_746
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_749

LBB4_360:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB4_751
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_754

LBB4_363:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_365
	LONG $0xd9148d4a         // lea    rdx, [rcx + 8*r11]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1102
	LONG $0xd8148d4b         // lea    rdx, [r8 + 8*r11]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1102

LBB4_365:
	WORD $0xd231 // xor    edx, edx

LBB4_1105:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c3f641             // test    r11b, 1
	JE   LBB4_1107
	LONG $0xd10c8b4c             // mov    r9, qword [rcx + 8*rdx]
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	LONG $0xd2950f41             // setne    r10b
	WORD $0xf749; BYTE $0xda     // neg    r10
	WORD $0x854d; BYTE $0xc9     // test    r9, r9
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xfa4e0f49             // cmovle    rdi, r10
	LONG $0xd03c8949             // mov    qword [r8 + 8*rdx], rdi
	LONG $0x01ca8348             // or    rdx, 1

LBB4_1107:
	WORD $0x014c; BYTE $0xde     // add    rsi, r11
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_1109:
	LONG $0xd13c8b48             // mov    rdi, qword [rcx + 8*rdx]
	WORD $0xc031                 // xor    eax, eax
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	WORD $0x950f; BYTE $0xd0     // setne    al
	WORD $0xf748; BYTE $0xd8     // neg    rax
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	LONG $0xc64f0f48             // cmovg    rax, rsi
	LONG $0xd0048949             // mov    qword [r8 + 8*rdx], rax
	LONG $0xd1448b48; BYTE $0x08 // mov    rax, qword [rcx + 8*rdx + 8]
	WORD $0xff31                 // xor    edi, edi
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	LONG $0xd7950f40             // setne    dil
	WORD $0xf748; BYTE $0xdf     // neg    rdi
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	LONG $0xfe4f0f48             // cmovg    rdi, rsi
	LONG $0xd07c8949; BYTE $0x08 // mov    qword [r8 + 8*rdx + 8], rdi
	LONG $0x02c28348             // add    rdx, 2
	WORD $0x3949; BYTE $0xd3     // cmp    r11, rdx
	JNE  LBB4_1109
	JMP  LBB4_1351

LBB4_366:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_763
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_766

LBB4_369:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x04f98341         // cmp    r9d, 4
	JAE  LBB4_772
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_1312

LBB4_372:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_374
	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1110
	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1110

LBB4_374:
	WORD $0xd231 // xor    edx, edx

LBB4_1113:
	WORD $0x8948; BYTE $0xd6       // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6       // not    rsi
	WORD $0x01a8                   // test    al, 1
	JE   LBB4_1115
	LONG $0x0410fac5; BYTE $0x91   // vmovss    xmm0, dword [rcx + 4*rdx]
	LONG $0xf850f8c5               // vmovmskps    edi, xmm0
	WORD $0xe783; BYTE $0x01       // and    edi, 1
	WORD $0xdff7                   // neg    edi
	WORD $0xcf83; BYTE $0x01       // or    edi, 1
	LONG $0xcf2aaac5               // vcvtsi2ss    xmm1, xmm10, edi
	LONG $0xd257e8c5               // vxorps    xmm2, xmm2, xmm2
	LONG $0xc2c2fac5; BYTE $0x00   // vcmpeqss    xmm0, xmm0, xmm2
	LONG $0xc155f8c5               // vandnps    xmm0, xmm0, xmm1
	LONG $0x117ac1c4; WORD $0x9004 // vmovss    dword [r8 + 4*rdx], xmm0
	LONG $0x01ca8348               // or    rdx, 1

LBB4_1115:
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	JE   LBB4_1351
	LONG $0xc057f8c5         // vxorps    xmm0, xmm0, xmm0

LBB4_1117:
	LONG $0x0c10fac5; BYTE $0x91               // vmovss    xmm1, dword [rcx + 4*rdx]
	LONG $0xf150f8c5                           // vmovmskps    esi, xmm1
	WORD $0xe683; BYTE $0x01                   // and    esi, 1
	WORD $0xdef7                               // neg    esi
	WORD $0xce83; BYTE $0x01                   // or    esi, 1
	LONG $0xd62aaac5                           // vcvtsi2ss    xmm2, xmm10, esi
	LONG $0xc8c2f2c5; BYTE $0x00               // vcmpeqss    xmm1, xmm1, xmm0
	LONG $0xca55f0c5                           // vandnps    xmm1, xmm1, xmm2
	LONG $0x117ac1c4; WORD $0x900c             // vmovss    dword [r8 + 4*rdx], xmm1
	LONG $0x4c10fac5; WORD $0x0491             // vmovss    xmm1, dword [rcx + 4*rdx + 4]
	LONG $0xf150f8c5                           // vmovmskps    esi, xmm1
	WORD $0xe683; BYTE $0x01                   // and    esi, 1
	WORD $0xdef7                               // neg    esi
	WORD $0xce83; BYTE $0x01                   // or    esi, 1
	LONG $0xd62aaac5                           // vcvtsi2ss    xmm2, xmm10, esi
	LONG $0xc8c2f2c5; BYTE $0x00               // vcmpeqss    xmm1, xmm1, xmm0
	LONG $0xca55f0c5                           // vandnps    xmm1, xmm1, xmm2
	LONG $0x117ac1c4; WORD $0x904c; BYTE $0x04 // vmovss    dword [r8 + 4*rdx + 4], xmm1
	LONG $0x02c28348                           // add    rdx, 2
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JNE  LBB4_1117
	JMP  LBB4_1351

LBB4_375:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_377
	LONG $0x01148d48         // lea    rdx, [rcx + rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1118
	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1118

LBB4_377:
	WORD $0xd231 // xor    edx, edx

LBB4_1121:
	WORD $0x8949; BYTE $0xd1 // mov    r9, rdx
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x0149; BYTE $0xc1 // add    r9, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_1123

LBB4_1122:
	WORD $0xf631     // xor    esi, esi
	LONG $0x00113c80 // cmp    byte [rcx + rdx], 0
	LONG $0xd6950f40 // setne    sil
	LONG $0xd0348949 // mov    qword [r8 + 8*rdx], rsi
	LONG $0x01c28348 // add    rdx, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB4_1122

LBB4_1123:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB4_1351

LBB4_1124:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0xd0348949             // mov    qword [r8 + 8*rdx], rsi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x01117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 1], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0xd0748949; BYTE $0x08 // mov    qword [r8 + 8*rdx + 8], rsi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x02117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 2], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0xd0748949; BYTE $0x10 // mov    qword [r8 + 8*rdx + 16], rsi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x03117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 3], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0xd0748949; BYTE $0x18 // mov    qword [r8 + 8*rdx + 24], rsi
	LONG $0x04c28348             // add    rdx, 4
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_1124
	JMP  LBB4_1351

LBB4_378:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_380
	LONG $0x01148d48         // lea    rdx, [rcx + rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1125
	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1125

LBB4_380:
	WORD $0xd231 // xor    edx, edx

LBB4_1128:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_1133
	QUAD $0x00000098856ef9c5 // vmovd    xmm0, dword 152[rbp] /* [rip + .LCPI4_5] */
	JMP  LBB4_1131

LBB4_1130:
	LONG $0x7e79c1c4; WORD $0x900c // vmovd    dword [r8 + 4*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	LONG $0xffc78348               // add    rdi, -1
	JE   LBB4_1133

LBB4_1131:
	LONG $0x00113c80 // cmp    byte [rcx + rdx], 0
	LONG $0xc86ff9c5 // vmovdqa    xmm1, xmm0
	JNE  LBB4_1130
	LONG $0xc9eff1c5 // vpxor    xmm1, xmm1, xmm1
	JMP  LBB4_1130

LBB4_381:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_784
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_787

LBB4_384:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB4_789
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_792

LBB4_387:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_389
	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1144
	LONG $0x00148d49         // lea    rdx, [r8 + rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1144

LBB4_389:
	WORD $0xd231 // xor    edx, edx

LBB4_1147:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_1149

LBB4_1148:
	LONG $0x00913c83             // cmp    dword [rcx + 4*rdx], 0
	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
	LONG $0x01c28348             // add    rdx, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB4_1148

LBB4_1149:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB4_1351

LBB4_1150:
	LONG $0x00913c83               // cmp    dword [rcx + 4*rdx], 0
	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
	LONG $0x04917c83; BYTE $0x00   // cmp    dword [rcx + 4*rdx + 4], 0
	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
	LONG $0x08917c83; BYTE $0x00   // cmp    dword [rcx + 4*rdx + 8], 0
	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
	LONG $0x0c917c83; BYTE $0x00   // cmp    dword [rcx + 4*rdx + 12], 0
	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
	LONG $0x04c28348               // add    rdx, 4
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_1150
	JMP  LBB4_1351

LBB4_390:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_392
	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1151
	LONG $0x00148d49         // lea    rdx, [r8 + rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1151

LBB4_392:
	WORD $0xd231 // xor    edx, edx

LBB4_1154:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	WORD $0x01a8                 // test    al, 1
	JE   LBB4_1156
	LONG $0x0410fbc5; BYTE $0xd1 // vmovsd    xmm0, qword [rcx + 8*rdx]
	WORD $0xff31                 // xor    edi, edi
	LONG $0xc9eff1c5             // vpxor    xmm1, xmm1, xmm1
	LONG $0xc82ef9c5             // vucomisd    xmm1, xmm0
	LONG $0x4554f9c5; BYTE $0x30 // vandpd    xmm0, xmm0, oword 48[rbp] /* [rip + .LCPI4_2] */
	LONG $0x4d12fbc5; BYTE $0x08 // vmovddup    xmm1, qword 8[rbp] /* [rip + .LCPI4_1] */
	LONG $0xc056f1c5             // vorpd    xmm0, xmm1, xmm0
	LONG $0xd82cfbc5             // vcvttsd2si    ebx, xmm0
	WORD $0x440f; BYTE $0xdf     // cmove    ebx, edi
	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
	LONG $0x01ca8348             // or    rdx, 1

LBB4_1156:
	WORD $0x0148; BYTE $0xc6     // add    rsi, rax
	JE   LBB4_1351
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc057f9c5             // vxorpd    xmm0, xmm0, xmm0
	LONG $0x4d28f9c5; BYTE $0x30 // vmovapd    xmm1, oword 48[rbp] /* [rip + .LCPI4_2] */
	LONG $0x5512fbc5; BYTE $0x08 // vmovddup    xmm2, qword 8[rbp] /* [rip + .LCPI4_1] */

LBB4_1158:
	LONG $0x1c10fbc5; BYTE $0xd1   // vmovsd    xmm3, qword [rcx + 8*rdx]
	LONG $0xc32ef9c5               // vucomisd    xmm0, xmm3
	LONG $0xd954e1c5               // vandpd    xmm3, xmm3, xmm1
	LONG $0xdb56e9c5               // vorpd    xmm3, xmm2, xmm3
	LONG $0xfb2cfbc5               // vcvttsd2si    edi, xmm3
	WORD $0x440f; BYTE $0xfe       // cmove    edi, esi
	LONG $0x103c8841               // mov    byte [r8 + rdx], dil
	LONG $0x5c10fbc5; WORD $0x08d1 // vmovsd    xmm3, qword [rcx + 8*rdx + 8]
	LONG $0xc32ef9c5               // vucomisd    xmm0, xmm3
	LONG $0xd954e1c5               // vandpd    xmm3, xmm3, xmm1
	LONG $0xdb56e9c5               // vorpd    xmm3, xmm2, xmm3
	LONG $0xfb2cfbc5               // vcvttsd2si    edi, xmm3
	WORD $0x440f; BYTE $0xfe       // cmove    edi, esi
	LONG $0x107c8841; BYTE $0x01   // mov    byte [r8 + rdx + 1], dil
	LONG $0x02c28348               // add    rdx, 2
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_1158
	JMP  LBB4_1351

LBB4_393:
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca                   // mov    r10d, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB4_395
	LONG $0x11148d4a                           // lea    rdx, [rcx + r10]
	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
	JBE  LBB4_1159
	LONG $0x10148d4b                           // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca                   // cmp    rdx, rcx
	JBE  LBB4_1159

LBB4_395:
	WORD $0x3145; BYTE $0xdb // xor    r11d, r11d

LBB4_1162:
	WORD $0x894c; BYTE $0xde     // mov    rsi, r11
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c2f641             // test    r10b, 1
	JE   LBB4_1164
	LONG $0x193c8a42             // mov    dil, byte [rcx + r11]
	WORD $0x8440; BYTE $0xff     // test    dil, dil
	LONG $0xd1950f41             // setne    r9b
	WORD $0xf641; BYTE $0xd9     // neg    r9b
	WORD $0x8440; BYTE $0xff     // test    dil, dil
	LONG $0xc9b60f45             // movzx    r9d, r9b
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xf94e0f41             // cmovle    edi, r9d
	LONG $0x183c8843             // mov    byte [r8 + r11], dil
	LONG $0x01cb8349             // or    r11, 1

LBB4_1164:
	WORD $0x014c; BYTE $0xd6     // add    rsi, r10
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_1166:
	LONG $0x04b60f42; BYTE $0x19   // movzx    eax, byte [rcx + r11]
	WORD $0xc084                   // test    al, al
	WORD $0x950f; BYTE $0xd2       // setne    dl
	WORD $0xdaf6                   // neg    dl
	WORD $0xc084                   // test    al, al
	WORD $0xb60f; BYTE $0xc2       // movzx    eax, dl
	WORD $0x4f0f; BYTE $0xc6       // cmovg    eax, esi
	LONG $0x18048843               // mov    byte [r8 + r11], al
	LONG $0x44b60f42; WORD $0x0119 // movzx    eax, byte [rcx + r11 + 1]
	WORD $0xc084                   // test    al, al
	WORD $0x950f; BYTE $0xd2       // setne    dl
	WORD $0xdaf6                   // neg    dl
	WORD $0xc084                   // test    al, al
	WORD $0xb60f; BYTE $0xc2       // movzx    eax, dl
	WORD $0x4f0f; BYTE $0xc6       // cmovg    eax, esi
	LONG $0x18448843; BYTE $0x01   // mov    byte [r8 + r11 + 1], al
	LONG $0x02c38349               // add    r11, 2
	WORD $0x394d; BYTE $0xda       // cmp    r10, r11
	JNE  LBB4_1166
	JMP  LBB4_1351

LBB4_396:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_398
	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1167
	LONG $0x00148d49         // lea    rdx, [r8 + rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1167

LBB4_398:
	WORD $0xd231 // xor    edx, edx

LBB4_1170:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_1172

LBB4_1171:
	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
	LONG $0x01c28348             // add    rdx, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB4_1171

LBB4_1172:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB4_1351

LBB4_1173:
	LONG $0xd13c8348; BYTE $0x00   // cmp    qword [rcx + 8*rdx], 0
	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
	LONG $0xd17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rdx + 8], 0
	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
	LONG $0xd17c8348; WORD $0x0010 // cmp    qword [rcx + 8*rdx + 16], 0
	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
	LONG $0xd17c8348; WORD $0x0018 // cmp    qword [rcx + 8*rdx + 24], 0
	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
	LONG $0x04c28348               // add    rdx, 4
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_1173
	JMP  LBB4_1351

LBB4_399:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JB   LBB4_401
	LONG $0x41148d48         // lea    rdx, [rcx + 2*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1174
	LONG $0x00148d49         // lea    rdx, [r8 + rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1174

LBB4_401:
	WORD $0xd231 // xor    edx, edx

LBB4_1177:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_1179

LBB4_1178:
	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
	LONG $0x01c28348             // add    rdx, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB4_1178

LBB4_1179:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB4_1351

LBB4_1180:
	LONG $0x513c8366; BYTE $0x00   // cmp    word [rcx + 2*rdx], 0
	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
	LONG $0x517c8366; WORD $0x0002 // cmp    word [rcx + 2*rdx + 2], 0
	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
	LONG $0x517c8366; WORD $0x0004 // cmp    word [rcx + 2*rdx + 4], 0
	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
	LONG $0x517c8366; WORD $0x0006 // cmp    word [rcx + 2*rdx + 6], 0
	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
	LONG $0x04c28348               // add    rdx, 4
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_1180
	JMP  LBB4_1351

LBB4_402:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x40f98341         // cmp    r9d, 64
	JB   LBB4_404
	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1181
	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1181

LBB4_404:
	WORD $0x3145; BYTE $0xdb // xor    r11d, r11d

LBB4_1184:
	WORD $0x894c; BYTE $0xde     // mov    rsi, r11
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c2f641             // test    r10b, 1
	JE   LBB4_1186
	LONG $0x3cb70f42; BYTE $0x59 // movzx    edi, word [rcx + 2*r11]
	WORD $0x8566; BYTE $0xff     // test    di, di
	LONG $0xd1950f41             // setne    r9b
	WORD $0xf641; BYTE $0xd9     // neg    r9b
	WORD $0x8566; BYTE $0xff     // test    di, di
	LONG $0xc9b60f45             // movzx    r9d, r9b
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xf94e0f41             // cmovle    edi, r9d
	LONG $0x183c8843             // mov    byte [r8 + r11], dil
	LONG $0x01cb8349             // or    r11, 1

LBB4_1186:
	WORD $0x014c; BYTE $0xd6     // add    rsi, r10
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_1188:
	LONG $0x3cb70f42; BYTE $0x59   // movzx    edi, word [rcx + 2*r11]
	WORD $0x8566; BYTE $0xff       // test    di, di
	WORD $0x950f; BYTE $0xd0       // setne    al
	WORD $0xd8f6                   // neg    al
	WORD $0x8566; BYTE $0xff       // test    di, di
	WORD $0xb60f; BYTE $0xc0       // movzx    eax, al
	WORD $0x4f0f; BYTE $0xc6       // cmovg    eax, esi
	LONG $0x18048843               // mov    byte [r8 + r11], al
	LONG $0x44b70f42; WORD $0x0259 // movzx    eax, word [rcx + 2*r11 + 2]
	WORD $0x8566; BYTE $0xc0       // test    ax, ax
	WORD $0x950f; BYTE $0xd2       // setne    dl
	WORD $0xdaf6                   // neg    dl
	WORD $0x8566; BYTE $0xc0       // test    ax, ax
	WORD $0xb60f; BYTE $0xc2       // movzx    eax, dl
	WORD $0x4f0f; BYTE $0xc6       // cmovg    eax, esi
	LONG $0x18448843; BYTE $0x01   // mov    byte [r8 + r11 + 1], al
	LONG $0x02c38349               // add    r11, 2
	WORD $0x394d; BYTE $0xda       // cmp    r10, r11
	JNE  LBB4_1188
	JMP  LBB4_1351

LBB4_405:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JB   LBB4_407
	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1189
	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1189

LBB4_407:
	WORD $0x3145; BYTE $0xdb // xor    r11d, r11d

LBB4_1192:
	WORD $0x894c; BYTE $0xde     // mov    rsi, r11
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c2f641             // test    r10b, 1
	JE   LBB4_1194
	LONG $0xd93c8b4a             // mov    rdi, qword [rcx + 8*r11]
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	LONG $0xd1950f41             // setne    r9b
	WORD $0xf641; BYTE $0xd9     // neg    r9b
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	LONG $0xc9b60f45             // movzx    r9d, r9b
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xf94e0f41             // cmovle    edi, r9d
	LONG $0x183c8843             // mov    byte [r8 + r11], dil
	LONG $0x01cb8349             // or    r11, 1

LBB4_1194:
	WORD $0x014c; BYTE $0xd6     // add    rsi, r10
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_1196:
	LONG $0xd93c8b4a             // mov    rdi, qword [rcx + 8*r11]
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	WORD $0x950f; BYTE $0xd0     // setne    al
	WORD $0xd8f6                 // neg    al
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	WORD $0xb60f; BYTE $0xc0     // movzx    eax, al
	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
	LONG $0x18048843             // mov    byte [r8 + r11], al
	LONG $0xd9448b4a; BYTE $0x08 // mov    rax, qword [rcx + 8*r11 + 8]
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	WORD $0x950f; BYTE $0xd2     // setne    dl
	WORD $0xdaf6                 // neg    dl
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
	LONG $0x18448843; BYTE $0x01 // mov    byte [r8 + r11 + 1], al
	LONG $0x02c38349             // add    r11, 2
	WORD $0x394d; BYTE $0xda     // cmp    r10, r11
	JNE  LBB4_1196
	JMP  LBB4_1351

LBB4_408:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_410
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1197
	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1197

LBB4_410:
	WORD $0xd231 // xor    edx, edx

LBB4_1200:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c2f641             // test    r10b, 1
	JE   LBB4_1202
	LONG $0x046ef9c5; BYTE $0x91 // vmovd    xmm0, dword [rcx + 4*rdx]
	LONG $0xc77ef9c5             // vmovd    edi, xmm0
	WORD $0xff85                 // test    edi, edi
	LONG $0xd7990f40             // setns    dil
	WORD $0x0040; BYTE $0xff     // add    dil, dil
	LONG $0xffc78040             // add    dil, -1
	WORD $0x3145; BYTE $0xc9     // xor    r9d, r9d
	LONG $0xc9eff1c5             // vpxor    xmm1, xmm1, xmm1
	LONG $0xc82ef8c5             // vucomiss    xmm1, xmm0
	LONG $0xffb60f40             // movzx    edi, dil
	LONG $0xf9440f41             // cmove    edi, r9d
	LONG $0x103c8841             // mov    byte [r8 + rdx], dil
	LONG $0x01ca8348             // or    rdx, 1

LBB4_1202:
	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
	JE   LBB4_1351
	WORD $0xf631             // xor    esi, esi
	LONG $0xc057f8c5         // vxorps    xmm0, xmm0, xmm0

LBB4_1204:
	LONG $0x0c6ef9c5; BYTE $0x91   // vmovd    xmm1, dword [rcx + 4*rdx]
	LONG $0xcf7ef9c5               // vmovd    edi, xmm1
	WORD $0xff85                   // test    edi, edi
	WORD $0x990f; BYTE $0xd0       // setns    al
	WORD $0xc000                   // add    al, al
	WORD $0xff04                   // add    al, -1
	LONG $0xc12ef8c5               // vucomiss    xmm0, xmm1
	WORD $0xb60f; BYTE $0xc0       // movzx    eax, al
	WORD $0x440f; BYTE $0xc6       // cmove    eax, esi
	LONG $0x10048841               // mov    byte [r8 + rdx], al
	LONG $0x4c6ef9c5; WORD $0x0491 // vmovd    xmm1, dword [rcx + 4*rdx + 4]
	LONG $0xc87ef9c5               // vmovd    eax, xmm1
	WORD $0xc085                   // test    eax, eax
	WORD $0x990f; BYTE $0xd0       // setns    al
	WORD $0xc000                   // add    al, al
	WORD $0xff04                   // add    al, -1
	LONG $0xc12ef8c5               // vucomiss    xmm0, xmm1
	WORD $0xb60f; BYTE $0xc0       // movzx    eax, al
	WORD $0x440f; BYTE $0xc6       // cmove    eax, esi
	LONG $0x10448841; BYTE $0x01   // mov    byte [r8 + rdx + 1], al
	LONG $0x02c28348               // add    rdx, 2
	WORD $0x3949; BYTE $0xd2       // cmp    r10, rdx
	JNE  LBB4_1204
	JMP  LBB4_1351

LBB4_411:
	WORD $0x8545; BYTE $0xc9                   // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8                   // mov    eax, r9d
	LONG $0x80f98141; WORD $0x0000; BYTE $0x00 // cmp    r9d, 128
	JB   LBB4_413
	LONG $0x01148d48                           // lea    rdx, [rcx + rax]
	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
	JBE  LBB4_1205
	LONG $0x00148d49                           // lea    rdx, [r8 + rax]
	WORD $0x3948; BYTE $0xca                   // cmp    rdx, rcx
	JBE  LBB4_1205

LBB4_413:
	WORD $0xd231 // xor    edx, edx

LBB4_1208:
	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6 // not    rsi
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_1210

LBB4_1209:
	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
	LONG $0x01c28348             // add    rdx, 1
	LONG $0xffc78348             // add    rdi, -1
	JNE  LBB4_1209

LBB4_1210:
	LONG $0x03fe8348 // cmp    rsi, 3
	JB   LBB4_1351

LBB4_1211:
	LONG $0x00113c80               // cmp    byte [rcx + rdx], 0
	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
	LONG $0x02117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 2], 0
	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
	LONG $0x03117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 3], 0
	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
	LONG $0x04c28348               // add    rdx, 4
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JNE  LBB4_1211
	JMP  LBB4_1351

LBB4_414:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_416
	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1212
	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1212

LBB4_416:
	WORD $0x3145; BYTE $0xdb // xor    r11d, r11d

LBB4_1215:
	WORD $0x894c; BYTE $0xde     // mov    rsi, r11
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c2f641             // test    r10b, 1
	JE   LBB4_1217
	LONG $0x993c8b42             // mov    edi, dword [rcx + 4*r11]
	WORD $0xff85                 // test    edi, edi
	LONG $0xd1950f41             // setne    r9b
	WORD $0xf641; BYTE $0xd9     // neg    r9b
	WORD $0xff85                 // test    edi, edi
	LONG $0xc9b60f45             // movzx    r9d, r9b
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xf94e0f41             // cmovle    edi, r9d
	LONG $0x183c8843             // mov    byte [r8 + r11], dil
	LONG $0x01cb8349             // or    r11, 1

LBB4_1217:
	WORD $0x014c; BYTE $0xd6     // add    rsi, r10
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_1219:
	LONG $0x993c8b42             // mov    edi, dword [rcx + 4*r11]
	WORD $0xff85                 // test    edi, edi
	WORD $0x950f; BYTE $0xd0     // setne    al
	WORD $0xd8f6                 // neg    al
	WORD $0xff85                 // test    edi, edi
	WORD $0xb60f; BYTE $0xc0     // movzx    eax, al
	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
	LONG $0x18048843             // mov    byte [r8 + r11], al
	LONG $0x99448b42; BYTE $0x04 // mov    eax, dword [rcx + 4*r11 + 4]
	WORD $0xc085                 // test    eax, eax
	WORD $0x950f; BYTE $0xd2     // setne    dl
	WORD $0xdaf6                 // neg    dl
	WORD $0xc085                 // test    eax, eax
	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
	LONG $0x18448843; BYTE $0x01 // mov    byte [r8 + r11 + 1], al
	LONG $0x02c38349             // add    r11, 2
	WORD $0x394d; BYTE $0xda     // cmp    r10, r11
	JNE  LBB4_1219
	JMP  LBB4_1351

LBB4_417:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_419
	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1220
	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1220

LBB4_419:
	WORD $0xd231 // xor    edx, edx

LBB4_1223:
	WORD $0x8949; BYTE $0xd1 // mov    r9, rdx
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x0149; BYTE $0xc1 // add    r9, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_1225

LBB4_1224:
	WORD $0xf631     // xor    esi, esi
	LONG $0x00913c83 // cmp    dword [rcx + 4*rdx], 0
	LONG $0xd6950f40 // setne    sil
	LONG $0x90348941 // mov    dword [r8 + 4*rdx], esi
	LONG $0x01c28348 // add    rdx, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB4_1224

LBB4_1225:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB4_1351

LBB4_1226:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x00913c83             // cmp    dword [rcx + 4*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90348941             // mov    dword [r8 + 4*rdx], esi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x04917c83; BYTE $0x00 // cmp    dword [rcx + 4*rdx + 4], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90748941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], esi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x08917c83; BYTE $0x00 // cmp    dword [rcx + 4*rdx + 8], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90748941; BYTE $0x08 // mov    dword [r8 + 4*rdx + 8], esi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x0c917c83; BYTE $0x00 // cmp    dword [rcx + 4*rdx + 12], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90748941; BYTE $0x0c // mov    dword [r8 + 4*rdx + 12], esi
	LONG $0x04c28348             // add    rdx, 4
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_1226
	JMP  LBB4_1351

LBB4_420:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	WORD $0xd231             // xor    edx, edx
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_831
	WORD $0xf631             // xor    esi, esi
	JMP  LBB4_834

LBB4_423:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_425
	LONG $0x19148d4a         // lea    rdx, [rcx + r11]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1227
	LONG $0x98148d4b         // lea    rdx, [r8 + 4*r11]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1227

LBB4_425:
	WORD $0xd231 // xor    edx, edx

LBB4_1230:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c3f641             // test    r11b, 1
	JE   LBB4_1232
	LONG $0x110c8a44             // mov    r9b, byte [rcx + rdx]
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
	LONG $0xd2950f41             // setne    r10b
	WORD $0xf741; BYTE $0xda     // neg    r10d
	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xfa4e0f41             // cmovle    edi, r10d
	LONG $0x903c8941             // mov    dword [r8 + 4*rdx], edi
	LONG $0x01ca8348             // or    rdx, 1

LBB4_1232:
	WORD $0x014c; BYTE $0xde     // add    rsi, r11
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_1234:
	LONG $0x1104b60f             // movzx    eax, byte [rcx + rdx]
	WORD $0xff31                 // xor    edi, edi
	WORD $0xc084                 // test    al, al
	LONG $0xd7950f40             // setne    dil
	WORD $0xdff7                 // neg    edi
	WORD $0xc084                 // test    al, al
	WORD $0x4f0f; BYTE $0xfe     // cmovg    edi, esi
	LONG $0x903c8941             // mov    dword [r8 + 4*rdx], edi
	LONG $0x1144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdx + 1]
	WORD $0xff31                 // xor    edi, edi
	WORD $0xc084                 // test    al, al
	LONG $0xd7950f40             // setne    dil
	WORD $0xdff7                 // neg    edi
	WORD $0xc084                 // test    al, al
	WORD $0x4f0f; BYTE $0xfe     // cmovg    edi, esi
	LONG $0x907c8941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], edi
	LONG $0x02c28348             // add    rdx, 2
	WORD $0x3949; BYTE $0xd3     // cmp    r11, rdx
	JNE  LBB4_1234
	JMP  LBB4_1351

LBB4_426:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_839
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_842

LBB4_429:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB4_843
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_846

LBB4_432:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB4_847
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_850

LBB4_435:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0x10f98341         // cmp    r9d, 16
	JAE  LBB4_852
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_855

LBB4_438:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JAE  LBB4_857
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_860

LBB4_441:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_443
	LONG $0x01148d48         // lea    rdx, [rcx + rax]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1235
	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1235

LBB4_443:
	WORD $0xd231 // xor    edx, edx

LBB4_1238:
	WORD $0x8949; BYTE $0xd1 // mov    r9, rdx
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x0149; BYTE $0xc1 // add    r9, rax
	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
	LONG $0x03e78348         // and    rdi, 3
	JE   LBB4_1240

LBB4_1239:
	WORD $0xf631     // xor    esi, esi
	LONG $0x00113c80 // cmp    byte [rcx + rdx], 0
	LONG $0xd6950f40 // setne    sil
	LONG $0x90348941 // mov    dword [r8 + 4*rdx], esi
	LONG $0x01c28348 // add    rdx, 1
	LONG $0xffc78348 // add    rdi, -1
	JNE  LBB4_1239

LBB4_1240:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB4_1351

LBB4_1241:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90348941             // mov    dword [r8 + 4*rdx], esi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x01117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 1], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90748941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], esi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x02117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 2], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90748941; BYTE $0x08 // mov    dword [r8 + 4*rdx + 8], esi
	WORD $0xf631                 // xor    esi, esi
	LONG $0x03117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 3], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90748941; BYTE $0x0c // mov    dword [r8 + 4*rdx + 12], esi
	LONG $0x04c28348             // add    rdx, 4
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_1241
	JMP  LBB4_1351

LBB4_444:
	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
	JLE  LBB4_1351
	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
	LONG $0x20f98341         // cmp    r9d, 32
	JB   LBB4_446
	LONG $0x99148d4a         // lea    rdx, [rcx + 4*r11]
	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
	JBE  LBB4_1242
	LONG $0x98148d4b         // lea    rdx, [r8 + 4*r11]
	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
	JBE  LBB4_1242

LBB4_446:
	WORD $0xd231 // xor    edx, edx

LBB4_1245:
	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
	WORD $0xf748; BYTE $0xd6     // not    rsi
	LONG $0x01c3f641             // test    r11b, 1
	JE   LBB4_1247
	LONG $0x910c8b44             // mov    r9d, dword [rcx + 4*rdx]
	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
	WORD $0x8545; BYTE $0xc9     // test    r9d, r9d
	LONG $0xd2950f41             // setne    r10b
	WORD $0xf741; BYTE $0xda     // neg    r10d
	WORD $0x8545; BYTE $0xc9     // test    r9d, r9d
	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
	LONG $0xfa4e0f41             // cmovle    edi, r10d
	LONG $0x903c8941             // mov    dword [r8 + 4*rdx], edi
	LONG $0x01ca8348             // or    rdx, 1

LBB4_1247:
	WORD $0x014c; BYTE $0xde     // add    rsi, r11
	JE   LBB4_1351
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_1249:
	WORD $0x3c8b; BYTE $0x91     // mov    edi, dword [rcx + 4*rdx]
	WORD $0xc031                 // xor    eax, eax
	WORD $0xff85                 // test    edi, edi
	WORD $0x950f; BYTE $0xd0     // setne    al
	WORD $0xd8f7                 // neg    eax
	WORD $0xff85                 // test    edi, edi
	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
	LONG $0x90048941             // mov    dword [r8 + 4*rdx], eax
	LONG $0x0491448b             // mov    eax, dword [rcx + 4*rdx + 4]
	WORD $0xff31                 // xor    edi, edi
	WORD $0xc085                 // test    eax, eax
	LONG $0xd7950f40             // setne    dil
	WORD $0xdff7                 // neg    edi
	WORD $0xc085                 // test    eax, eax
	WORD $0x4f0f; BYTE $0xfe     // cmovg    edi, esi
	LONG $0x907c8941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], edi
	LONG $0x02c28348             // add    rdx, 2
	WORD $0x3949; BYTE $0xd3     // cmp    r11, rdx
	JNE  LBB4_1249
	JMP  LBB4_1351

LBB4_922:
	LONG $0x03fe8348             // cmp    rsi, 3
	JB   LBB4_1351
	LONG $0x457efac5; BYTE $0x08 // vmovq    xmm0, qword 8[rbp] /* [rip + .LCPI4_1] */
	JMP  LBB4_925

LBB4_924:
	LONG $0xd679c1c4; WORD $0xd04c; BYTE $0x18 // vmovq    qword [r8 + 8*rdx + 24], xmm1
	LONG $0x04c28348                           // add    rdx, 4
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JE   LBB4_1351

LBB4_925:
	LONG $0x00113c80               // cmp    byte [rcx + rdx], 0
	LONG $0xc86ff9c5               // vmovdqa    xmm1, xmm0
	JNE  LBB4_926
	LONG $0xc9eff1c5               // vpxor    xmm1, xmm1, xmm1
	LONG $0xd679c1c4; WORD $0xd00c // vmovq    qword [r8 + 8*rdx], xmm1
	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
	LONG $0xc86ff9c5               // vmovdqa    xmm1, xmm0
	JE   LBB4_930

LBB4_927:
	LONG $0xd679c1c4; WORD $0xd04c; BYTE $0x08 // vmovq    qword [r8 + 8*rdx + 8], xmm1
	LONG $0x02117c80; BYTE $0x00               // cmp    byte [rcx + rdx + 2], 0
	LONG $0xc86ff9c5                           // vmovdqa    xmm1, xmm0
	JNE  LBB4_928

LBB4_931:
	LONG $0xc9eff1c5                           // vpxor    xmm1, xmm1, xmm1
	LONG $0xd679c1c4; WORD $0xd04c; BYTE $0x10 // vmovq    qword [r8 + 8*rdx + 16], xmm1
	LONG $0x03117c80; BYTE $0x00               // cmp    byte [rcx + rdx + 3], 0
	LONG $0xc86ff9c5                           // vmovdqa    xmm1, xmm0
	JNE  LBB4_924
	JMP  LBB4_932

LBB4_926:
	LONG $0xd679c1c4; WORD $0xd00c // vmovq    qword [r8 + 8*rdx], xmm1
	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
	LONG $0xc86ff9c5               // vmovdqa    xmm1, xmm0
	JNE  LBB4_927

LBB4_930:
	LONG $0xc9eff1c5                           // vpxor    xmm1, xmm1, xmm1
	LONG $0xd679c1c4; WORD $0xd04c; BYTE $0x08 // vmovq    qword [r8 + 8*rdx + 8], xmm1
	LONG $0x02117c80; BYTE $0x00               // cmp    byte [rcx + rdx + 2], 0
	LONG $0xc86ff9c5                           // vmovdqa    xmm1, xmm0
	JE   LBB4_931

LBB4_928:
	LONG $0xd679c1c4; WORD $0xd04c; BYTE $0x10 // vmovq    qword [r8 + 8*rdx + 16], xmm1
	LONG $0x03117c80; BYTE $0x00               // cmp    byte [rcx + rdx + 3], 0
	LONG $0xc86ff9c5                           // vmovdqa    xmm1, xmm0
	JNE  LBB4_924

LBB4_932:
	LONG $0xc9eff1c5 // vpxor    xmm1, xmm1, xmm1
	JMP  LBB4_924

LBB4_1133:
	LONG $0x03fe8348         // cmp    rsi, 3
	JB   LBB4_1351
	QUAD $0x00000098856ef9c5 // vmovd    xmm0, dword 152[rbp] /* [rip + .LCPI4_5] */
	JMP  LBB4_1136

LBB4_1135:
	LONG $0x7e79c1c4; WORD $0x904c; BYTE $0x0c // vmovd    dword [r8 + 4*rdx + 12], xmm1
	LONG $0x04c28348                           // add    rdx, 4
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JE   LBB4_1351

LBB4_1136:
	LONG $0x00113c80               // cmp    byte [rcx + rdx], 0
	LONG $0xc86ff9c5               // vmovdqa    xmm1, xmm0
	JNE  LBB4_1137
	LONG $0xc9eff1c5               // vpxor    xmm1, xmm1, xmm1
	LONG $0x7e79c1c4; WORD $0x900c // vmovd    dword [r8 + 4*rdx], xmm1
	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
	LONG $0xc86ff9c5               // vmovdqa    xmm1, xmm0
	JE   LBB4_1141

LBB4_1138:
	LONG $0x7e79c1c4; WORD $0x904c; BYTE $0x04 // vmovd    dword [r8 + 4*rdx + 4], xmm1
	LONG $0x02117c80; BYTE $0x00               // cmp    byte [rcx + rdx + 2], 0
	LONG $0xc86ff9c5                           // vmovdqa    xmm1, xmm0
	JNE  LBB4_1139

LBB4_1142:
	LONG $0xc9eff1c5                           // vpxor    xmm1, xmm1, xmm1
	LONG $0x7e79c1c4; WORD $0x904c; BYTE $0x08 // vmovd    dword [r8 + 4*rdx + 8], xmm1
	LONG $0x03117c80; BYTE $0x00               // cmp    byte [rcx + rdx + 3], 0
	LONG $0xc86ff9c5                           // vmovdqa    xmm1, xmm0
	JNE  LBB4_1135
	JMP  LBB4_1143

LBB4_1137:
	LONG $0x7e79c1c4; WORD $0x900c // vmovd    dword [r8 + 4*rdx], xmm1
	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
	LONG $0xc86ff9c5               // vmovdqa    xmm1, xmm0
	JNE  LBB4_1138

LBB4_1141:
	LONG $0xc9eff1c5                           // vpxor    xmm1, xmm1, xmm1
	LONG $0x7e79c1c4; WORD $0x904c; BYTE $0x04 // vmovd    dword [r8 + 4*rdx + 4], xmm1
	LONG $0x02117c80; BYTE $0x00               // cmp    byte [rcx + rdx + 2], 0
	LONG $0xc86ff9c5                           // vmovdqa    xmm1, xmm0
	JE   LBB4_1142

LBB4_1139:
	LONG $0x7e79c1c4; WORD $0x904c; BYTE $0x08 // vmovd    dword [r8 + 4*rdx + 8], xmm1
	LONG $0x03117c80; BYTE $0x00               // cmp    byte [rcx + rdx + 3], 0
	LONG $0xc86ff9c5                           // vmovdqa    xmm1, xmm0
	JNE  LBB4_1135

LBB4_1143:
	LONG $0xc9eff1c5 // vpxor    xmm1, xmm1, xmm1
	JMP  LBB4_1135

LBB4_450:
	WORD $0xc689                         // mov    esi, eax
	WORD $0xe683; BYTE $0xfc             // and    esi, -4
	LONG $0xfc568d48                     // lea    rdx, [rsi - 4]
	WORD $0x8949; BYTE $0xd1             // mov    r9, rdx
	LONG $0x02e9c149                     // shr    r9, 2
	LONG $0x01c18349                     // add    r9, 1
	WORD $0x8548; BYTE $0xd2             // test    rdx, rdx
	JE   LBB4_1288
	WORD $0x894c; BYTE $0xca             // mov    rdx, r9
	LONG $0xfee28348                     // and    rdx, -2
	WORD $0xf748; BYTE $0xda             // neg    rdx
	WORD $0xff31                         // xor    edi, edi
	LONG $0x197de2c4; WORD $0x0045       // vbroadcastsd    ymm0, qword 0[rbp] /* [rip + .LCPI4_0] */
	LONG $0x197de2c4; WORD $0x084d       // vbroadcastsd    ymm1, qword 8[rbp] /* [rip + .LCPI4_1] */
	LONG $0x573941c4; BYTE $0xc0         // vxorpd    xmm8, xmm8, xmm8
	LONG $0x197de2c4; WORD $0x185d       // vbroadcastsd    ymm3, qword 24[rbp] /* [rip + .LCPI4_7] */
	QUAD $0x000094a51879e2c4; BYTE $0x00 // vbroadcastss    xmm4, dword 148[rbp] /* [rip + .LCPI4_4] */

LBB4_452:
	LONG $0x2c10fdc5; BYTE $0xf9               // vmovupd    ymm5, yword [rcx + 8*rdi]
	LONG $0xf5c2bdc5; BYTE $0x00               // vcmpeqpd    ymm6, ymm8, ymm5
	LONG $0xe854d5c5                           // vandpd    ymm5, ymm5, ymm0
	LONG $0xed56f5c5                           // vorpd    ymm5, ymm1, ymm5
	LONG $0xfbc2d5c5; BYTE $0x01               // vcmpltpd    ymm7, ymm5, ymm3
	LONG $0x197de3c4; WORD $0x01fa             // vextractf128    xmm2, ymm7, 1
	LONG $0xd26bc1c5                           // vpackssdw    xmm2, xmm7, xmm2
	LONG $0xfb5cd5c5                           // vsubpd    ymm7, ymm5, ymm3
	LONG $0xffe6fdc5                           // vcvttpd2dq    xmm7, ymm7
	LONG $0xede6fdc5                           // vcvttpd2dq    xmm5, ymm5
	LONG $0xfc57c1c5                           // vxorpd    xmm7, xmm7, xmm4
	LONG $0x4a41e3c4; WORD $0x20d5             // vblendvps    xmm2, xmm7, xmm5, xmm2
	LONG $0x197de3c4; WORD $0x01f5             // vextractf128    xmm5, ymm6, 1
	LONG $0xed6bc9c5                           // vpackssdw    xmm5, xmm6, xmm5
	LONG $0xd2dfd1c5                           // vpandn    xmm2, xmm5, xmm2
	LONG $0x7f7ac1c4; WORD $0xb814             // vmovdqu    oword [r8 + 4*rdi], xmm2
	LONG $0x5410fdc5; WORD $0x20f9             // vmovupd    ymm2, yword [rcx + 8*rdi + 32]
	LONG $0xeac2bdc5; BYTE $0x00               // vcmpeqpd    ymm5, ymm8, ymm2
	LONG $0x197de3c4; WORD $0x01ee             // vextractf128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xd054edc5                           // vandpd    ymm2, ymm2, ymm0
	LONG $0xd256f5c5                           // vorpd    ymm2, ymm1, ymm2
	LONG $0xf3c2edc5; BYTE $0x01               // vcmpltpd    ymm6, ymm2, ymm3
	LONG $0x197de3c4; WORD $0x01f7             // vextractf128    xmm7, ymm6, 1
	LONG $0xf76bc9c5                           // vpackssdw    xmm6, xmm6, xmm7
	LONG $0xfb5cedc5                           // vsubpd    ymm7, ymm2, ymm3
	LONG $0xffe6fdc5                           // vcvttpd2dq    xmm7, ymm7
	LONG $0xfc57c1c5                           // vxorpd    xmm7, xmm7, xmm4
	LONG $0xd2e6fdc5                           // vcvttpd2dq    xmm2, ymm2
	LONG $0x4a41e3c4; WORD $0x60d2             // vblendvps    xmm2, xmm7, xmm2, xmm6
	LONG $0xd2dfd1c5                           // vpandn    xmm2, xmm5, xmm2
	LONG $0x7f7ac1c4; WORD $0xb854; BYTE $0x10 // vmovdqu    oword [r8 + 4*rdi + 16], xmm2
	LONG $0x08c78348                           // add    rdi, 8
	LONG $0x02c28348                           // add    rdx, 2
	JNE  LBB4_452
	JMP  LBB4_1289

LBB4_456:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xf0             // and    edx, -16
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5                     // vpcmpeqd    ymm1, ymm1, ymm1
	QUAD $0x00009c955879e2c4; BYTE $0x00 // vpbroadcastd    xmm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_457:
	LONG $0x297de2c4; WORD $0xf11c             // vpcmpeqq    ymm3, ymm0, yword [rcx + 8*rsi]
	LONG $0xd9efe5c5                           // vpxor    ymm3, ymm3, ymm1
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5                           // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xdadbe1c5                           // vpand    xmm3, xmm3, xmm2
	LONG $0x297de2c4; WORD $0xf164; BYTE $0x20 // vpcmpeqq    ymm4, ymm0, yword [rcx + 8*rsi + 32]
	LONG $0xe1efddc5                           // vpxor    ymm4, ymm4, ymm1
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe2dbd9c5                           // vpand    xmm4, xmm4, xmm2
	LONG $0x297de2c4; WORD $0xf16c; BYTE $0x40 // vpcmpeqq    ymm5, ymm0, yword [rcx + 8*rsi + 64]
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xeadbd1c5                           // vpand    xmm5, xmm5, xmm2
	LONG $0x297de2c4; WORD $0xf174; BYTE $0x60 // vpcmpeqq    ymm6, ymm0, yword [rcx + 8*rsi + 96]
	LONG $0xf1efcdc5                           // vpxor    ymm6, ymm6, ymm1
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5                           // vpackssdw    xmm6, xmm6, xmm7
	LONG $0xf2dbc9c5                           // vpand    xmm6, xmm6, xmm2
	LONG $0x7f7ac1c4; WORD $0xb01c             // vmovdqu    oword [r8 + 4*rsi], xmm3
	LONG $0x7f7ac1c4; WORD $0xb064; BYTE $0x10 // vmovdqu    oword [r8 + 4*rsi + 16], xmm4
	LONG $0x7f7ac1c4; WORD $0xb06c; BYTE $0x20 // vmovdqu    oword [r8 + 4*rsi + 32], xmm5
	LONG $0x7f7ac1c4; WORD $0xb074; BYTE $0x30 // vmovdqu    oword [r8 + 4*rsi + 48], xmm6
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_457
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_459:
	WORD $0xf631                 // xor    esi, esi
	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90348941             // mov    dword [r8 + 4*rdx], esi
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_459
	JMP  LBB4_1351

LBB4_460:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5                     // vpcmpeqd    xmm1, xmm1, xmm1
	QUAD $0x00009c95587de2c4; BYTE $0x00 // vpbroadcastd    ymm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_461:
	LONG $0x1c75f9c5; BYTE $0x71               // vpcmpeqw    xmm3, xmm0, oword [rcx + 2*rsi]
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x337de2c4; BYTE $0xdb               // vpmovzxwd    ymm3, xmm3
	LONG $0x6475f9c5; WORD $0x1071             // vpcmpeqw    xmm4, xmm0, oword [rcx + 2*rsi + 16]
	LONG $0xdadbe5c5                           // vpand    ymm3, ymm3, ymm2
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x337de2c4; BYTE $0xe4               // vpmovzxwd    ymm4, xmm4
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0x6c75f9c5; WORD $0x2071             // vpcmpeqw    xmm5, xmm0, oword [rcx + 2*rsi + 32]
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x337de2c4; BYTE $0xed               // vpmovzxwd    ymm5, xmm5
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0x7475f9c5; WORD $0x3071             // vpcmpeqw    xmm6, xmm0, oword [rcx + 2*rsi + 48]
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x337de2c4; BYTE $0xf6               // vpmovzxwd    ymm6, xmm6
	LONG $0xf2dbcdc5                           // vpand    ymm6, ymm6, ymm2
	LONG $0x7f7ec1c4; WORD $0xb01c             // vmovdqu    yword [r8 + 4*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0xb064; BYTE $0x20 // vmovdqu    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x7f7ec1c4; WORD $0xb06c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rsi + 64], ymm5
	LONG $0x7f7ec1c4; WORD $0xb074; BYTE $0x60 // vmovdqu    yword [r8 + 4*rsi + 96], ymm6
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_461
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_463:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90348941             // mov    dword [r8 + 4*rdx], esi
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_463
	JMP  LBB4_1351

LBB4_464:
	WORD $0x8944; BYTE $0xd2             // mov    edx, r10d
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0         // vpcmpeqd    xmm8, xmm8, xmm8
	QUAD $0x00009c95187de2c4; BYTE $0x00 // vbroadcastss    ymm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_465:
	LONG $0x1c6ffac5; BYTE $0x71               // vmovdqu    xmm3, oword [rcx + 2*rsi]
	LONG $0x646ffac5; WORD $0x1071             // vmovdqu    xmm4, oword [rcx + 2*rsi + 16]
	LONG $0x6c6ffac5; WORD $0x2071             // vmovdqu    xmm5, oword [rcx + 2*rsi + 32]
	LONG $0x746ffac5; WORD $0x3071             // vmovdqu    xmm6, oword [rcx + 2*rsi + 48]
	LONG $0xf865e1c5                           // vpcmpgtw    xmm7, xmm3, xmm0
	LONG $0x237d62c4; BYTE $0xcf               // vpmovsxwd    ymm9, xmm7
	LONG $0xc865d9c5                           // vpcmpgtw    xmm1, xmm4, xmm0
	LONG $0x237d62c4; BYTE $0xd1               // vpmovsxwd    ymm10, xmm1
	LONG $0xf865d1c5                           // vpcmpgtw    xmm7, xmm5, xmm0
	LONG $0x237de2c4; BYTE $0xff               // vpmovsxwd    ymm7, xmm7
	LONG $0xc865c9c5                           // vpcmpgtw    xmm1, xmm6, xmm0
	LONG $0x237de2c4; BYTE $0xc9               // vpmovsxwd    ymm1, xmm1
	LONG $0xd875e1c5                           // vpcmpeqw    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x237de2c4; BYTE $0xdb               // vpmovsxwd    ymm3, xmm3
	LONG $0xe075d9c5                           // vpcmpeqw    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x237de2c4; BYTE $0xe4               // vpmovsxwd    ymm4, xmm4
	LONG $0xe875d1c5                           // vpcmpeqw    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x237de2c4; BYTE $0xed               // vpmovsxwd    ymm5, xmm5
	LONG $0xf075c9c5                           // vpcmpeqw    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x237de2c4; BYTE $0xf6               // vpmovsxwd    ymm6, xmm6
	LONG $0x4a65e3c4; WORD $0x90da             // vblendvps    ymm3, ymm3, ymm2, ymm9
	LONG $0x4a5de3c4; WORD $0xa0e2             // vblendvps    ymm4, ymm4, ymm2, ymm10
	LONG $0x4a55e3c4; WORD $0x70ea             // vblendvps    ymm5, ymm5, ymm2, ymm7
	LONG $0x4a4de3c4; WORD $0x10ca             // vblendvps    ymm1, ymm6, ymm2, ymm1
	LONG $0x117cc1c4; WORD $0xb01c             // vmovups    yword [r8 + 4*rsi], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x117cc1c4; WORD $0xb06c; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm5
	LONG $0x117cc1c4; WORD $0xb04c; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm1
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_465
	WORD $0x394c; BYTE $0xd2                   // cmp    rdx, r10
	JE   LBB4_1351

LBB4_467:
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_468:
	LONG $0x513cb70f         // movzx    edi, word [rcx + 2*rdx]
	WORD $0xc031             // xor    eax, eax
	WORD $0x8566; BYTE $0xff // test    di, di
	WORD $0x950f; BYTE $0xd0 // setne    al
	WORD $0xd8f7             // neg    eax
	WORD $0x8566; BYTE $0xff // test    di, di
	WORD $0x4f0f; BYTE $0xc6 // cmovg    eax, esi
	LONG $0x90048941         // mov    dword [r8 + 4*rdx], eax
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
	JNE  LBB4_468
	JMP  LBB4_1351

LBB4_469:
	WORD $0x8944; BYTE $0xd2             // mov    edx, r10d
	WORD $0xe283; BYTE $0xf0             // and    edx, -16
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0x763d41c4; BYTE $0xc0         // vpcmpeqd    ymm8, ymm8, ymm8
	QUAD $0x00009c951879e2c4; BYTE $0x00 // vbroadcastss    xmm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_470:
	LONG $0x246ffec5; BYTE $0xf1               // vmovdqu    ymm4, yword [rcx + 8*rsi]
	LONG $0x6c6ffec5; WORD $0x20f1             // vmovdqu    ymm5, yword [rcx + 8*rsi + 32]
	LONG $0x746ffec5; WORD $0x40f1             // vmovdqu    ymm6, yword [rcx + 8*rsi + 64]
	LONG $0x7c6ffec5; WORD $0x60f1             // vmovdqu    ymm7, yword [rcx + 8*rsi + 96]
	LONG $0x375de2c4; BYTE $0xd8               // vpcmpgtq    ymm3, ymm4, ymm0
	LONG $0x397de3c4; WORD $0x01d9             // vextracti128    xmm1, ymm3, 1
	LONG $0xc96b61c5                           // vpackssdw    xmm9, xmm3, xmm1
	LONG $0x3755e2c4; BYTE $0xc8               // vpcmpgtq    ymm1, ymm5, ymm0
	LONG $0x397de3c4; WORD $0x01cb             // vextracti128    xmm3, ymm1, 1
	LONG $0xd36b71c5                           // vpackssdw    xmm10, xmm1, xmm3
	LONG $0x374de2c4; BYTE $0xd8               // vpcmpgtq    ymm3, ymm6, ymm0
	LONG $0x397de3c4; WORD $0x01d9             // vextracti128    xmm1, ymm3, 1
	LONG $0xd96b61c5                           // vpackssdw    xmm11, xmm3, xmm1
	LONG $0x3745e2c4; BYTE $0xd8               // vpcmpgtq    ymm3, ymm7, ymm0
	LONG $0x397de3c4; WORD $0x01d9             // vextracti128    xmm1, ymm3, 1
	LONG $0xc96be1c5                           // vpackssdw    xmm1, xmm3, xmm1
	LONG $0x295de2c4; BYTE $0xd8               // vpcmpeqq    ymm3, ymm4, ymm0
	LONG $0xdbefbdc5                           // vpxor    ymm3, ymm8, ymm3
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5                           // vpackssdw    xmm3, xmm3, xmm4
	LONG $0x2955e2c4; BYTE $0xe0               // vpcmpeqq    ymm4, ymm5, ymm0
	LONG $0xe4efbdc5                           // vpxor    ymm4, ymm8, ymm4
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0x294de2c4; BYTE $0xe8               // vpcmpeqq    ymm5, ymm6, ymm0
	LONG $0xedefbdc5                           // vpxor    ymm5, ymm8, ymm5
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0x2945e2c4; BYTE $0xf0               // vpcmpeqq    ymm6, ymm7, ymm0
	LONG $0xf6efbdc5                           // vpxor    ymm6, ymm8, ymm6
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5                           // vpackssdw    xmm6, xmm6, xmm7
	LONG $0x4a61e3c4; WORD $0x90da             // vblendvps    xmm3, xmm3, xmm2, xmm9
	LONG $0x4a59e3c4; WORD $0xa0e2             // vblendvps    xmm4, xmm4, xmm2, xmm10
	LONG $0x4a51e3c4; WORD $0xb0ea             // vblendvps    xmm5, xmm5, xmm2, xmm11
	LONG $0x4a49e3c4; WORD $0x10ca             // vblendvps    xmm1, xmm6, xmm2, xmm1
	LONG $0x1178c1c4; WORD $0xb01c             // vmovups    oword [r8 + 4*rsi], xmm3
	LONG $0x1178c1c4; WORD $0xb064; BYTE $0x10 // vmovups    oword [r8 + 4*rsi + 16], xmm4
	LONG $0x1178c1c4; WORD $0xb06c; BYTE $0x20 // vmovups    oword [r8 + 4*rsi + 32], xmm5
	LONG $0x1178c1c4; WORD $0xb04c; BYTE $0x30 // vmovups    oword [r8 + 4*rsi + 48], xmm1
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_470
	WORD $0x394c; BYTE $0xd2                   // cmp    rdx, r10
	JE   LBB4_1351

LBB4_472:
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_473:
	LONG $0xd13c8b48         // mov    rdi, qword [rcx + 8*rdx]
	WORD $0xc031             // xor    eax, eax
	WORD $0x8548; BYTE $0xff // test    rdi, rdi
	WORD $0x950f; BYTE $0xd0 // setne    al
	WORD $0xd8f7             // neg    eax
	WORD $0x8548; BYTE $0xff // test    rdi, rdi
	WORD $0x4f0f; BYTE $0xc6 // cmovg    eax, esi
	LONG $0x90048941         // mov    dword [r8 + 4*rdx], eax
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
	JNE  LBB4_473
	JMP  LBB4_1351

LBB4_474:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xf8             // and    edx, -8
	LONG $0xf8728d48                     // lea    rsi, [rdx - 8]
	WORD $0x8949; BYTE $0xf1             // mov    r9, rsi
	LONG $0x03e9c149                     // shr    r9, 3
	LONG $0x01c18349                     // add    r9, 1
	WORD $0x8548; BYTE $0xf6             // test    rsi, rsi
	JE   LBB4_1294
	WORD $0x894c; BYTE $0xcf             // mov    rdi, r9
	LONG $0xfee78348                     // and    rdi, -2
	WORD $0xf748; BYTE $0xdf             // neg    rdi
	WORD $0xf631                         // xor    esi, esi
	QUAD $0x00009c85587de2c4; BYTE $0x00 // vpbroadcastd    ymm0, dword 156[rbp] /* [rip + .LCPI4_8] */
	LONG $0xc957f0c5                     // vxorps    xmm1, xmm1, xmm1
	QUAD $0x0000a495187de2c4; BYTE $0x00 // vbroadcastss    ymm2, dword 164[rbp] /* [rip + .LCPI4_10] */
	QUAD $0x0000949d187de2c4; BYTE $0x00 // vbroadcastss    ymm3, dword 148[rbp] /* [rip + .LCPI4_4] */

LBB4_476:
	LONG $0x246ffec5; BYTE $0xb1               // vmovdqu    ymm4, yword [rcx + 4*rsi]
	LONG $0xe472d5c5; BYTE $0x1f               // vpsrad    ymm5, ymm4, 31
	LONG $0xe8ebd5c5                           // vpor    ymm5, ymm5, ymm0
	LONG $0xed5bfcc5                           // vcvtdq2ps    ymm5, ymm5
	LONG $0xf2c2d4c5; BYTE $0x01               // vcmpltps    ymm6, ymm5, ymm2
	LONG $0xfa5cd4c5                           // vsubps    ymm7, ymm5, ymm2
	LONG $0xff5bfec5                           // vcvttps2dq    ymm7, ymm7
	LONG $0xfb57c4c5                           // vxorps    ymm7, ymm7, ymm3
	LONG $0xed5bfec5                           // vcvttps2dq    ymm5, ymm5
	LONG $0x4a45e3c4; WORD $0x60ed             // vblendvps    ymm5, ymm7, ymm5, ymm6
	LONG $0xe1c2dcc5; BYTE $0x04               // vcmpneqps    ymm4, ymm4, ymm1
	LONG $0xe554dcc5                           // vandps    ymm4, ymm4, ymm5
	LONG $0x117cc1c4; WORD $0xb024             // vmovups    yword [r8 + 4*rsi], ymm4
	LONG $0x646ffec5; WORD $0x20b1             // vmovdqu    ymm4, yword [rcx + 4*rsi + 32]
	LONG $0xe472d5c5; BYTE $0x1f               // vpsrad    ymm5, ymm4, 31
	LONG $0xe8ebd5c5                           // vpor    ymm5, ymm5, ymm0
	LONG $0xed5bfcc5                           // vcvtdq2ps    ymm5, ymm5
	LONG $0xf2c2d4c5; BYTE $0x01               // vcmpltps    ymm6, ymm5, ymm2
	LONG $0xfa5cd4c5                           // vsubps    ymm7, ymm5, ymm2
	LONG $0xff5bfec5                           // vcvttps2dq    ymm7, ymm7
	LONG $0xfb57c4c5                           // vxorps    ymm7, ymm7, ymm3
	LONG $0xed5bfec5                           // vcvttps2dq    ymm5, ymm5
	LONG $0x4a45e3c4; WORD $0x60ed             // vblendvps    ymm5, ymm7, ymm5, ymm6
	LONG $0xe1c2dcc5; BYTE $0x04               // vcmpneqps    ymm4, ymm4, ymm1
	LONG $0xe554dcc5                           // vandps    ymm4, ymm4, ymm5
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x10c68348                           // add    rsi, 16
	LONG $0x02c78348                           // add    rdi, 2
	JNE  LBB4_476
	JMP  LBB4_1295

LBB4_483:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xf0             // and    edx, -16
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	QUAD $0x00009c8d5879e2c4; BYTE $0x00 // vpbroadcastd    xmm1, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_484:
	LONG $0x1476f9c5; BYTE $0xb1               // vpcmpeqd    xmm2, xmm0, oword [rcx + 4*rsi]
	LONG $0xd1dfe9c5                           // vpandn    xmm2, xmm2, xmm1
	LONG $0xd2e6fec5                           // vcvtdq2pd    ymm2, xmm2
	LONG $0x5c76f9c5; WORD $0x10b1             // vpcmpeqd    xmm3, xmm0, oword [rcx + 4*rsi + 16]
	LONG $0xd9dfe1c5                           // vpandn    xmm3, xmm3, xmm1
	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
	LONG $0x6476f9c5; WORD $0x20b1             // vpcmpeqd    xmm4, xmm0, oword [rcx + 4*rsi + 32]
	LONG $0xe1dfd9c5                           // vpandn    xmm4, xmm4, xmm1
	LONG $0xe4e6fec5                           // vcvtdq2pd    ymm4, xmm4
	LONG $0x6c76f9c5; WORD $0x30b1             // vpcmpeqd    xmm5, xmm0, oword [rcx + 4*rsi + 48]
	LONG $0xe9dfd1c5                           // vpandn    xmm5, xmm5, xmm1
	LONG $0xede6fec5                           // vcvtdq2pd    ymm5, xmm5
	LONG $0x117dc1c4; WORD $0xf014             // vmovupd    yword [r8 + 8*rsi], ymm2
	LONG $0x117dc1c4; WORD $0xf05c; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm5
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_484
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_486:
	LONG $0x457efac5; BYTE $0x08 // vmovq    xmm0, qword 8[rbp] /* [rip + .LCPI4_1] */
	JMP  LBB4_488

LBB4_487:
	LONG $0xd679c1c4; WORD $0xd00c // vmovq    qword [r8 + 8*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JE   LBB4_1351

LBB4_488:
	LONG $0x00913c83 // cmp    dword [rcx + 4*rdx], 0
	LONG $0xc86ff9c5 // vmovdqa    xmm1, xmm0
	JNE  LBB4_487
	LONG $0xc9eff1c5 // vpxor    xmm1, xmm1, xmm1
	JMP  LBB4_487

LBB4_496:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xf0             // and    edx, -16
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5                     // vpcmpeqd    ymm1, ymm1, ymm1
	QUAD $0x00009c955879e2c4; BYTE $0x00 // vpbroadcastd    xmm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_497:
	LONG $0x297de2c4; WORD $0xf11c             // vpcmpeqq    ymm3, ymm0, yword [rcx + 8*rsi]
	LONG $0xd9efe5c5                           // vpxor    ymm3, ymm3, ymm1
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5                           // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xdadbe1c5                           // vpand    xmm3, xmm3, xmm2
	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
	LONG $0x297de2c4; WORD $0xf164; BYTE $0x20 // vpcmpeqq    ymm4, ymm0, yword [rcx + 8*rsi + 32]
	LONG $0xe1efddc5                           // vpxor    ymm4, ymm4, ymm1
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe2dbd9c5                           // vpand    xmm4, xmm4, xmm2
	LONG $0xe4e6fec5                           // vcvtdq2pd    ymm4, xmm4
	LONG $0x297de2c4; WORD $0xf16c; BYTE $0x40 // vpcmpeqq    ymm5, ymm0, yword [rcx + 8*rsi + 64]
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xeadbd1c5                           // vpand    xmm5, xmm5, xmm2
	LONG $0xede6fec5                           // vcvtdq2pd    ymm5, xmm5
	LONG $0x297de2c4; WORD $0xf174; BYTE $0x60 // vpcmpeqq    ymm6, ymm0, yword [rcx + 8*rsi + 96]
	LONG $0xf1efcdc5                           // vpxor    ymm6, ymm6, ymm1
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5                           // vpackssdw    xmm6, xmm6, xmm7
	LONG $0xf2dbc9c5                           // vpand    xmm6, xmm6, xmm2
	LONG $0xf6e6fec5                           // vcvtdq2pd    ymm6, xmm6
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x117dc1c4; WORD $0xf074; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm6
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_497
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_499:
	LONG $0x457efac5; BYTE $0x08 // vmovq    xmm0, qword 8[rbp] /* [rip + .LCPI4_1] */
	JMP  LBB4_501

LBB4_500:
	LONG $0xd679c1c4; WORD $0xd00c // vmovq    qword [r8 + 8*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JE   LBB4_1351

LBB4_501:
	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
	LONG $0xc86ff9c5             // vmovdqa    xmm1, xmm0
	JNE  LBB4_500
	LONG $0xc9eff1c5             // vpxor    xmm1, xmm1, xmm1
	JMP  LBB4_500

LBB4_503:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xf0             // and    edx, -16
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5                     // vpcmpeqd    xmm1, xmm1, xmm1
	QUAD $0x00009c955879e2c4; BYTE $0x00 // vpbroadcastd    xmm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_504:
	LONG $0x1c7efac5; BYTE $0x71               // vmovq    xmm3, qword [rcx + 2*rsi]
	LONG $0x647efac5; WORD $0x0871             // vmovq    xmm4, qword [rcx + 2*rsi + 8]
	LONG $0x6c7efac5; WORD $0x1071             // vmovq    xmm5, qword [rcx + 2*rsi + 16]
	LONG $0x747efac5; WORD $0x1871             // vmovq    xmm6, qword [rcx + 2*rsi + 24]
	LONG $0xd875e1c5                           // vpcmpeqw    xmm3, xmm3, xmm0
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x3379e2c4; BYTE $0xdb               // vpmovzxwd    xmm3, xmm3
	LONG $0xdadbe1c5                           // vpand    xmm3, xmm3, xmm2
	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
	LONG $0xe075d9c5                           // vpcmpeqw    xmm4, xmm4, xmm0
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x3379e2c4; BYTE $0xe4               // vpmovzxwd    xmm4, xmm4
	LONG $0xe2dbd9c5                           // vpand    xmm4, xmm4, xmm2
	LONG $0xe4e6fec5                           // vcvtdq2pd    ymm4, xmm4
	LONG $0xe875d1c5                           // vpcmpeqw    xmm5, xmm5, xmm0
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x3379e2c4; BYTE $0xed               // vpmovzxwd    xmm5, xmm5
	LONG $0xeadbd1c5                           // vpand    xmm5, xmm5, xmm2
	LONG $0xede6fec5                           // vcvtdq2pd    ymm5, xmm5
	LONG $0xf075c9c5                           // vpcmpeqw    xmm6, xmm6, xmm0
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x3379e2c4; BYTE $0xf6               // vpmovzxwd    xmm6, xmm6
	LONG $0xf2dbc9c5                           // vpand    xmm6, xmm6, xmm2
	LONG $0xf6e6fec5                           // vcvtdq2pd    ymm6, xmm6
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x117dc1c4; WORD $0xf074; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm6
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_504
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_506:
	LONG $0x457efac5; BYTE $0x08 // vmovq    xmm0, qword 8[rbp] /* [rip + .LCPI4_1] */
	JMP  LBB4_508

LBB4_507:
	LONG $0xd679c1c4; WORD $0xd00c // vmovq    qword [r8 + 8*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JE   LBB4_1351

LBB4_508:
	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
	LONG $0xc86ff9c5             // vmovdqa    xmm1, xmm0
	JNE  LBB4_507
	LONG $0xc9eff1c5             // vpxor    xmm1, xmm1, xmm1
	JMP  LBB4_507

LBB4_510:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0   // vpcmpeqd    xmm8, xmm8, xmm8
	LONG $0x197de2c4; WORD $0x0855 // vbroadcastsd    ymm2, qword 8[rbp] /* [rip + .LCPI4_1] */

LBB4_511:
	LONG $0x1c7efac5; BYTE $0x71               // vmovq    xmm3, qword [rcx + 2*rsi]
	LONG $0x647efac5; WORD $0x0871             // vmovq    xmm4, qword [rcx + 2*rsi + 8]
	LONG $0x6c7efac5; WORD $0x1071             // vmovq    xmm5, qword [rcx + 2*rsi + 16]
	LONG $0x747efac5; WORD $0x1871             // vmovq    xmm6, qword [rcx + 2*rsi + 24]
	LONG $0xf865e1c5                           // vpcmpgtw    xmm7, xmm3, xmm0
	LONG $0x247d62c4; BYTE $0xcf               // vpmovsxwq    ymm9, xmm7
	LONG $0xc865d9c5                           // vpcmpgtw    xmm1, xmm4, xmm0
	LONG $0x247d62c4; BYTE $0xd1               // vpmovsxwq    ymm10, xmm1
	LONG $0xf865d1c5                           // vpcmpgtw    xmm7, xmm5, xmm0
	LONG $0x247de2c4; BYTE $0xff               // vpmovsxwq    ymm7, xmm7
	LONG $0xc865c9c5                           // vpcmpgtw    xmm1, xmm6, xmm0
	LONG $0xd875e1c5                           // vpcmpeqw    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x2379e2c4; BYTE $0xdb               // vpmovsxwd    xmm3, xmm3
	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
	LONG $0xe075d9c5                           // vpcmpeqw    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x2379e2c4; BYTE $0xe4               // vpmovsxwd    xmm4, xmm4
	LONG $0xe4e6fec5                           // vcvtdq2pd    ymm4, xmm4
	LONG $0xe875d1c5                           // vpcmpeqw    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x2379e2c4; BYTE $0xed               // vpmovsxwd    xmm5, xmm5
	LONG $0xede6fec5                           // vcvtdq2pd    ymm5, xmm5
	LONG $0x247de2c4; BYTE $0xc9               // vpmovsxwq    ymm1, xmm1
	LONG $0xf075c9c5                           // vpcmpeqw    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x2379e2c4; BYTE $0xf6               // vpmovsxwd    xmm6, xmm6
	LONG $0xf6e6fec5                           // vcvtdq2pd    ymm6, xmm6
	LONG $0x4b65e3c4; WORD $0x90da             // vblendvpd    ymm3, ymm3, ymm2, ymm9
	LONG $0x4b5de3c4; WORD $0xa0e2             // vblendvpd    ymm4, ymm4, ymm2, ymm10
	LONG $0x4b55e3c4; WORD $0x70ea             // vblendvpd    ymm5, ymm5, ymm2, ymm7
	LONG $0x4b4de3c4; WORD $0x10ca             // vblendvpd    ymm1, ymm6, ymm2, ymm1
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x117dc1c4; WORD $0xf04c; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm1
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_511
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_513:
	LONG $0x4510fbc5; BYTE $0x20 // vmovsd    xmm0, qword 32[rbp] /* [rip + .LCPI4_13] */
	LONG $0x4d10fbc5; BYTE $0x08 // vmovsd    xmm1, qword 8[rbp] /* [rip + .LCPI4_1] */
	JMP  LBB4_515

LBB4_514:
	LONG $0x117bc1c4; WORD $0xd01c // vmovsd    qword [r8 + 8*rdx], xmm3
	LONG $0x01c28348               // add    rdx, 1
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JE   LBB4_1351

LBB4_515:
	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
	LONG $0xd028f9c5             // vmovapd    xmm2, xmm0
	JNE  LBB4_517
	LONG $0xd257e9c5             // vxorpd    xmm2, xmm2, xmm2

LBB4_517:
	LONG $0xd928f9c5 // vmovapd    xmm3, xmm1
	JG   LBB4_514
	LONG $0xda28f9c5 // vmovapd    xmm3, xmm2
	JMP  LBB4_514

LBB4_519:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x762541c4; BYTE $0xdb   // vpcmpeqd    ymm11, ymm11, ymm11
	LONG $0x197de2c4; WORD $0x0855 // vbroadcastsd    ymm2, qword 8[rbp] /* [rip + .LCPI4_1] */

LBB4_520:
	LONG $0x1c6ffec5; BYTE $0xf1               // vmovdqu    ymm3, yword [rcx + 8*rsi]
	LONG $0x646ffec5; WORD $0x20f1             // vmovdqu    ymm4, yword [rcx + 8*rsi + 32]
	LONG $0x6c6ffec5; WORD $0x40f1             // vmovdqu    ymm5, yword [rcx + 8*rsi + 64]
	LONG $0x746ffec5; WORD $0x60f1             // vmovdqu    ymm6, yword [rcx + 8*rsi + 96]
	LONG $0x3765e2c4; BYTE $0xf8               // vpcmpgtq    ymm7, ymm3, ymm0
	LONG $0x375d62c4; BYTE $0xc0               // vpcmpgtq    ymm8, ymm4, ymm0
	LONG $0x375562c4; BYTE $0xc8               // vpcmpgtq    ymm9, ymm5, ymm0
	LONG $0x374d62c4; BYTE $0xd0               // vpcmpgtq    ymm10, ymm6, ymm0
	LONG $0x2965e2c4; BYTE $0xd8               // vpcmpeqq    ymm3, ymm3, ymm0
	LONG $0xdbefa5c5                           // vpxor    ymm3, ymm11, ymm3
	LONG $0x397de3c4; WORD $0x01d9             // vextracti128    xmm1, ymm3, 1
	LONG $0xc96be1c5                           // vpackssdw    xmm1, xmm3, xmm1
	LONG $0xc9e6fec5                           // vcvtdq2pd    ymm1, xmm1
	LONG $0x295de2c4; BYTE $0xd8               // vpcmpeqq    ymm3, ymm4, ymm0
	LONG $0xdbefa5c5                           // vpxor    ymm3, ymm11, ymm3
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5                           // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
	LONG $0x2955e2c4; BYTE $0xe0               // vpcmpeqq    ymm4, ymm5, ymm0
	LONG $0xe4efa5c5                           // vpxor    ymm4, ymm11, ymm4
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe4e6fec5                           // vcvtdq2pd    ymm4, xmm4
	LONG $0x294de2c4; BYTE $0xe8               // vpcmpeqq    ymm5, ymm6, ymm0
	LONG $0xedefa5c5                           // vpxor    ymm5, ymm11, ymm5
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xede6fec5                           // vcvtdq2pd    ymm5, xmm5
	LONG $0x4b75e3c4; WORD $0x70ca             // vblendvpd    ymm1, ymm1, ymm2, ymm7
	LONG $0x4b65e3c4; WORD $0x80da             // vblendvpd    ymm3, ymm3, ymm2, ymm8
	LONG $0x4b5de3c4; WORD $0x90e2             // vblendvpd    ymm4, ymm4, ymm2, ymm9
	LONG $0x4b55e3c4; WORD $0xa0ea             // vblendvpd    ymm5, ymm5, ymm2, ymm10
	LONG $0x117dc1c4; WORD $0xf00c             // vmovupd    yword [r8 + 8*rsi], ymm1
	LONG $0x117dc1c4; WORD $0xf05c; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm5
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_520
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_522:
	LONG $0x4510fbc5; BYTE $0x20 // vmovsd    xmm0, qword 32[rbp] /* [rip + .LCPI4_13] */
	LONG $0x4d10fbc5; BYTE $0x08 // vmovsd    xmm1, qword 8[rbp] /* [rip + .LCPI4_1] */
	JMP  LBB4_524

LBB4_523:
	LONG $0x117bc1c4; WORD $0xd01c // vmovsd    qword [r8 + 8*rdx], xmm3
	LONG $0x01c28348               // add    rdx, 1
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JE   LBB4_1351

LBB4_524:
	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
	LONG $0xd028f9c5             // vmovapd    xmm2, xmm0
	JNE  LBB4_526
	LONG $0xd257e9c5             // vxorpd    xmm2, xmm2, xmm2

LBB4_526:
	LONG $0xd928f9c5 // vmovapd    xmm3, xmm1
	JG   LBB4_523
	LONG $0xda28f9c5 // vmovapd    xmm3, xmm2
	JMP  LBB4_523

LBB4_528:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xf0             // and    edx, -16
	WORD $0xf631                         // xor    esi, esi
	LONG $0x573841c4; BYTE $0xc0         // vxorps    xmm8, xmm8, xmm8
	QUAD $0x00009c8d5879e2c4; BYTE $0x00 // vpbroadcastd    xmm1, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_529:
	LONG $0x1410f8c5; BYTE $0xb1               // vmovups    xmm2, oword [rcx + 4*rsi]
	LONG $0x5c10f8c5; WORD $0x10b1             // vmovups    xmm3, oword [rcx + 4*rsi + 16]
	LONG $0x6410f8c5; WORD $0x20b1             // vmovups    xmm4, oword [rcx + 4*rsi + 32]
	LONG $0x6c10f8c5; WORD $0x30b1             // vmovups    xmm5, oword [rcx + 4*rsi + 48]
	LONG $0xf2c2b8c5; BYTE $0x00               // vcmpeqps    xmm6, xmm8, xmm2
	LONG $0x257de2c4; BYTE $0xf6               // vpmovsxdq    ymm6, xmm6
	LONG $0xfbc2b8c5; BYTE $0x00               // vcmpeqps    xmm7, xmm8, xmm3
	LONG $0x257de2c4; BYTE $0xff               // vpmovsxdq    ymm7, xmm7
	LONG $0xc4c2b8c5; BYTE $0x00               // vcmpeqps    xmm0, xmm8, xmm4
	LONG $0x257d62c4; BYTE $0xc8               // vpmovsxdq    ymm9, xmm0
	LONG $0xc5c2b8c5; BYTE $0x00               // vcmpeqps    xmm0, xmm8, xmm5
	LONG $0x257de2c4; BYTE $0xc0               // vpmovsxdq    ymm0, xmm0
	LONG $0xe272e9c5; BYTE $0x1f               // vpsrad    xmm2, xmm2, 31
	LONG $0xd1ebe9c5                           // vpor    xmm2, xmm2, xmm1
	LONG $0xe372e1c5; BYTE $0x1f               // vpsrad    xmm3, xmm3, 31
	LONG $0xd9ebe1c5                           // vpor    xmm3, xmm3, xmm1
	LONG $0xe472d9c5; BYTE $0x1f               // vpsrad    xmm4, xmm4, 31
	LONG $0xe1ebd9c5                           // vpor    xmm4, xmm4, xmm1
	LONG $0xe572d1c5; BYTE $0x1f               // vpsrad    xmm5, xmm5, 31
	LONG $0xe9ebd1c5                           // vpor    xmm5, xmm5, xmm1
	LONG $0xd25bf8c5                           // vcvtdq2ps    xmm2, xmm2
	LONG $0xdb5bf8c5                           // vcvtdq2ps    xmm3, xmm3
	LONG $0xe45bf8c5                           // vcvtdq2ps    xmm4, xmm4
	LONG $0xed5bf8c5                           // vcvtdq2ps    xmm5, xmm5
	LONG $0xd25afcc5                           // vcvtps2pd    ymm2, xmm2
	LONG $0xd2dfcdc5                           // vpandn    ymm2, ymm6, ymm2
	LONG $0xdb5afcc5                           // vcvtps2pd    ymm3, xmm3
	LONG $0xdbdfc5c5                           // vpandn    ymm3, ymm7, ymm3
	LONG $0xe45afcc5                           // vcvtps2pd    ymm4, xmm4
	LONG $0xed5afcc5                           // vcvtps2pd    ymm5, xmm5
	LONG $0xe4dfb5c5                           // vpandn    ymm4, ymm9, ymm4
	LONG $0xc5dffdc5                           // vpandn    ymm0, ymm0, ymm5
	LONG $0x7f7ec1c4; WORD $0xf014             // vmovdqu    yword [r8 + 8*rsi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf05c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rsi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf064; BYTE $0x40 // vmovdqu    yword [r8 + 8*rsi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf044; BYTE $0x60 // vmovdqu    yword [r8 + 8*rsi + 96], ymm0
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_529
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_531:
	LONG $0xc0eff9c5 // vpxor    xmm0, xmm0, xmm0
	JMP  LBB4_533

LBB4_532:
	LONG $0xd679c1c4; WORD $0xd00c // vmovq    qword [r8 + 8*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JE   LBB4_1351

LBB4_533:
	LONG $0x1410fac5; BYTE $0x91 // vmovss    xmm2, dword [rcx + 4*rdx]
	LONG $0xc9eff1c5             // vpxor    xmm1, xmm1, xmm1
	LONG $0xc22ef8c5             // vucomiss    xmm0, xmm2
	JE   LBB4_532
	LONG $0xf250f8c5             // vmovmskps    esi, xmm2
	WORD $0xe683; BYTE $0x01     // and    esi, 1
	WORD $0xdef7                 // neg    esi
	WORD $0xce83; BYTE $0x01     // or    esi, 1
	LONG $0xce2aaac5             // vcvtsi2ss    xmm1, xmm10, esi
	LONG $0xc95af2c5             // vcvtss2sd    xmm1, xmm1, xmm1
	JMP  LBB4_532

LBB4_538:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0   // vpcmpeqd    xmm8, xmm8, xmm8
	LONG $0x197de2c4; WORD $0x0855 // vbroadcastsd    ymm2, qword 8[rbp] /* [rip + .LCPI4_1] */

LBB4_539:
	LONG $0x1c6ffac5; BYTE $0xb1               // vmovdqu    xmm3, oword [rcx + 4*rsi]
	LONG $0x646ffac5; WORD $0x10b1             // vmovdqu    xmm4, oword [rcx + 4*rsi + 16]
	LONG $0x6c6ffac5; WORD $0x20b1             // vmovdqu    xmm5, oword [rcx + 4*rsi + 32]
	LONG $0x746ffac5; WORD $0x30b1             // vmovdqu    xmm6, oword [rcx + 4*rsi + 48]
	LONG $0xf866e1c5                           // vpcmpgtd    xmm7, xmm3, xmm0
	LONG $0x257d62c4; BYTE $0xcf               // vpmovsxdq    ymm9, xmm7
	LONG $0xc866d9c5                           // vpcmpgtd    xmm1, xmm4, xmm0
	LONG $0x257d62c4; BYTE $0xd1               // vpmovsxdq    ymm10, xmm1
	LONG $0xf866d1c5                           // vpcmpgtd    xmm7, xmm5, xmm0
	LONG $0x257de2c4; BYTE $0xff               // vpmovsxdq    ymm7, xmm7
	LONG $0xc866c9c5                           // vpcmpgtd    xmm1, xmm6, xmm0
	LONG $0x257de2c4; BYTE $0xc9               // vpmovsxdq    ymm1, xmm1
	LONG $0xd876e1c5                           // vpcmpeqd    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
	LONG $0xe076d9c5                           // vpcmpeqd    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0xe4e6fec5                           // vcvtdq2pd    ymm4, xmm4
	LONG $0xe876d1c5                           // vpcmpeqd    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0xede6fec5                           // vcvtdq2pd    ymm5, xmm5
	LONG $0xf076c9c5                           // vpcmpeqd    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0xf6e6fec5                           // vcvtdq2pd    ymm6, xmm6
	LONG $0x4b65e3c4; WORD $0x90da             // vblendvpd    ymm3, ymm3, ymm2, ymm9
	LONG $0x4b5de3c4; WORD $0xa0e2             // vblendvpd    ymm4, ymm4, ymm2, ymm10
	LONG $0x4b55e3c4; WORD $0x70ea             // vblendvpd    ymm5, ymm5, ymm2, ymm7
	LONG $0x4b4de3c4; WORD $0x10ca             // vblendvpd    ymm1, ymm6, ymm2, ymm1
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x117dc1c4; WORD $0xf04c; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm1
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_539
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_541:
	LONG $0x4510fbc5; BYTE $0x20 // vmovsd    xmm0, qword 32[rbp] /* [rip + .LCPI4_13] */
	LONG $0x4d10fbc5; BYTE $0x08 // vmovsd    xmm1, qword 8[rbp] /* [rip + .LCPI4_1] */
	JMP  LBB4_543

LBB4_542:
	LONG $0x117bc1c4; WORD $0xd01c // vmovsd    qword [r8 + 8*rdx], xmm3
	LONG $0x01c28348               // add    rdx, 1
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JE   LBB4_1351

LBB4_543:
	LONG $0x00913c83 // cmp    dword [rcx + 4*rdx], 0
	LONG $0xd028f9c5 // vmovapd    xmm2, xmm0
	JNE  LBB4_545
	LONG $0xd257e9c5 // vxorpd    xmm2, xmm2, xmm2

LBB4_545:
	LONG $0xd928f9c5 // vmovapd    xmm3, xmm1
	JG   LBB4_542
	LONG $0xda28f9c5 // vmovapd    xmm3, xmm2
	JMP  LBB4_542

LBB4_577:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5               // vpcmpeqd    xmm1, xmm1, xmm1
	LONG $0x597de2c4; WORD $0x2855 // vpbroadcastq    ymm2, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_578:
	LONG $0x1c76f9c5; BYTE $0xb1               // vpcmpeqd    xmm3, xmm0, oword [rcx + 4*rsi]
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x357de2c4; BYTE $0xdb               // vpmovzxdq    ymm3, xmm3
	LONG $0x6476f9c5; WORD $0x10b1             // vpcmpeqd    xmm4, xmm0, oword [rcx + 4*rsi + 16]
	LONG $0xdadbe5c5                           // vpand    ymm3, ymm3, ymm2
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x357de2c4; BYTE $0xe4               // vpmovzxdq    ymm4, xmm4
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0x6c76f9c5; WORD $0x20b1             // vpcmpeqd    xmm5, xmm0, oword [rcx + 4*rsi + 32]
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x357de2c4; BYTE $0xed               // vpmovzxdq    ymm5, xmm5
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0x7476f9c5; WORD $0x30b1             // vpcmpeqd    xmm6, xmm0, oword [rcx + 4*rsi + 48]
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x357de2c4; BYTE $0xf6               // vpmovzxdq    ymm6, xmm6
	LONG $0xf2dbcdc5                           // vpand    ymm6, ymm6, ymm2
	LONG $0x7f7ec1c4; WORD $0xf01c             // vmovdqu    yword [r8 + 8*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0xf064; BYTE $0x20 // vmovdqu    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x7f7ec1c4; WORD $0xf06c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x7f7ec1c4; WORD $0xf074; BYTE $0x60 // vmovdqu    yword [r8 + 8*rsi + 96], ymm6
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_578
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_580:
	WORD $0xf631             // xor    esi, esi
	LONG $0x00913c83         // cmp    dword [rcx + 4*rdx], 0
	LONG $0xd6950f40         // setne    sil
	LONG $0xd0348949         // mov    qword [r8 + 8*rdx], rsi
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JNE  LBB4_580
	JMP  LBB4_1351

LBB4_581:
	WORD $0x8944; BYTE $0xd6       // mov    esi, r10d
	WORD $0xe683; BYTE $0xfc       // and    esi, -4
	LONG $0xfc468d48               // lea    rax, [rsi - 4]
	WORD $0x8949; BYTE $0xc1       // mov    r9, rax
	LONG $0x02e9c149               // shr    r9, 2
	LONG $0x01c18349               // add    r9, 1
	WORD $0x8548; BYTE $0xc0       // test    rax, rax
	JE   LBB4_1282
	WORD $0x894d; BYTE $0xce       // mov    r14, r9
	LONG $0xfee68349               // and    r14, -2
	WORD $0xf749; BYTE $0xde       // neg    r14
	WORD $0xff31                   // xor    edi, edi
	LONG $0x197de2c4; WORD $0x0045 // vbroadcastsd    ymm0, qword 0[rbp] /* [rip + .LCPI4_0] */
	LONG $0x573941c4; BYTE $0xc0   // vxorpd    xmm8, xmm8, xmm8
	LONG $0x197de2c4; WORD $0x0855 // vbroadcastsd    ymm2, qword 8[rbp] /* [rip + .LCPI4_1] */
	LONG $0x5d10fbc5; BYTE $0x10   // vmovsd    xmm3, qword 16[rbp] /* [rip + .LCPI4_6] */

LBB4_583:
	LONG $0x2410fdc5; BYTE $0xf9               // vmovupd    ymm4, yword [rcx + 8*rdi]
	LONG $0xe854ddc5                           // vandpd    ymm5, ymm4, ymm0
	LONG $0xed56edc5                           // vorpd    ymm5, ymm2, ymm5
	LONG $0x197de3c4; WORD $0x01ee             // vextractf128    xmm6, ymm5, 1
	LONG $0xfb5ccbc5                           // vsubsd    xmm7, xmm6, xmm3
	LONG $0x2cfbe1c4; BYTE $0xdf               // vcvttsd2si    rbx, xmm7
	WORD $0x314c; BYTE $0xdb                   // xor    rbx, r11
	LONG $0x2cfbe1c4; BYTE $0xd6               // vcvttsd2si    rdx, xmm6
	LONG $0xf32ef9c5                           // vucomisd    xmm6, xmm3
	LONG $0xd3430f48                           // cmovae    rdx, rbx
	LONG $0x0479e3c4; WORD $0x4ef6             // vpermilps    xmm6, xmm6, 78
	LONG $0xfb5ccbc5                           // vsubsd    xmm7, xmm6, xmm3
	LONG $0x2cfbe1c4; BYTE $0xdf               // vcvttsd2si    rbx, xmm7
	WORD $0x314c; BYTE $0xdb                   // xor    rbx, r11
	LONG $0x2cfbe1c4; BYTE $0xc6               // vcvttsd2si    rax, xmm6
	LONG $0xf32ef9c5                           // vucomisd    xmm6, xmm3
	LONG $0x6ef9e1c4; BYTE $0xf2               // vmovq    xmm6, rdx
	LONG $0xc3430f48                           // cmovae    rax, rbx
	LONG $0x6ef9e1c4; BYTE $0xf8               // vmovq    xmm7, rax
	LONG $0xcb5cd3c5                           // vsubsd    xmm1, xmm5, xmm3
	LONG $0x2cfbe1c4; BYTE $0xc1               // vcvttsd2si    rax, xmm1
	WORD $0x314c; BYTE $0xd8                   // xor    rax, r11
	LONG $0x2cfbe1c4; BYTE $0xd5               // vcvttsd2si    rdx, xmm5
	LONG $0xeb2ef9c5                           // vucomisd    xmm5, xmm3
	LONG $0xd0430f48                           // cmovae    rdx, rax
	LONG $0x0479e3c4; WORD $0x4ecd             // vpermilps    xmm1, xmm5, 78
	LONG $0xeb5cf3c5                           // vsubsd    xmm5, xmm1, xmm3
	LONG $0x2cfbe1c4; BYTE $0xc5               // vcvttsd2si    rax, xmm5
	LONG $0x6ef9e1c4; BYTE $0xea               // vmovq    xmm5, rdx
	WORD $0x314c; BYTE $0xd8                   // xor    rax, r11
	LONG $0x2cfbe1c4; BYTE $0xd1               // vcvttsd2si    rdx, xmm1
	LONG $0xcb2ef9c5                           // vucomisd    xmm1, xmm3
	LONG $0xcf6cc9c5                           // vpunpcklqdq    xmm1, xmm6, xmm7
	LONG $0xd0430f48                           // cmovae    rdx, rax
	LONG $0x6ef9e1c4; BYTE $0xf2               // vmovq    xmm6, rdx
	LONG $0xee6cd1c5                           // vpunpcklqdq    xmm5, xmm5, xmm6
	LONG $0x3855e3c4; WORD $0x01c9             // vinserti128    ymm1, ymm5, xmm1, 1
	LONG $0xe4c2bdc5; BYTE $0x04               // vcmpneqpd    ymm4, ymm8, ymm4
	LONG $0xc954ddc5                           // vandpd    ymm1, ymm4, ymm1
	LONG $0x117dc1c4; WORD $0xf80c             // vmovupd    yword [r8 + 8*rdi], ymm1
	LONG $0x6410fdc5; WORD $0x20f9             // vmovupd    ymm4, yword [rcx + 8*rdi + 32]
	LONG $0xc854ddc5                           // vandpd    ymm1, ymm4, ymm0
	LONG $0xc956edc5                           // vorpd    ymm1, ymm2, ymm1
	LONG $0x197de3c4; WORD $0x01cd             // vextractf128    xmm5, ymm1, 1
	LONG $0xf35cd3c5                           // vsubsd    xmm6, xmm5, xmm3
	LONG $0x2cfbe1c4; BYTE $0xc6               // vcvttsd2si    rax, xmm6
	WORD $0x314c; BYTE $0xd8                   // xor    rax, r11
	LONG $0x2cfbe1c4; BYTE $0xd5               // vcvttsd2si    rdx, xmm5
	LONG $0xeb2ef9c5                           // vucomisd    xmm5, xmm3
	LONG $0xd0430f48                           // cmovae    rdx, rax
	LONG $0x0479e3c4; WORD $0x4eed             // vpermilps    xmm5, xmm5, 78
	LONG $0xf35cd3c5                           // vsubsd    xmm6, xmm5, xmm3
	LONG $0x2cfbe1c4; BYTE $0xc6               // vcvttsd2si    rax, xmm6
	LONG $0x6ef9e1c4; BYTE $0xf2               // vmovq    xmm6, rdx
	WORD $0x314c; BYTE $0xd8                   // xor    rax, r11
	LONG $0x2cfbe1c4; BYTE $0xd5               // vcvttsd2si    rdx, xmm5
	LONG $0xeb2ef9c5                           // vucomisd    xmm5, xmm3
	LONG $0xd0430f48                           // cmovae    rdx, rax
	LONG $0x6ef9e1c4; BYTE $0xea               // vmovq    xmm5, rdx
	LONG $0xfb5cf3c5                           // vsubsd    xmm7, xmm1, xmm3
	LONG $0x2cfbe1c4; BYTE $0xc7               // vcvttsd2si    rax, xmm7
	LONG $0xed6cc9c5                           // vpunpcklqdq    xmm5, xmm6, xmm5
	WORD $0x314c; BYTE $0xd8                   // xor    rax, r11
	LONG $0x2cfbe1c4; BYTE $0xd1               // vcvttsd2si    rdx, xmm1
	LONG $0xcb2ef9c5                           // vucomisd    xmm1, xmm3
	LONG $0xd0430f48                           // cmovae    rdx, rax
	LONG $0x0479e3c4; WORD $0x4ec9             // vpermilps    xmm1, xmm1, 78
	LONG $0xf35cf3c5                           // vsubsd    xmm6, xmm1, xmm3
	LONG $0x2cfbe1c4; BYTE $0xc6               // vcvttsd2si    rax, xmm6
	LONG $0x6ef9e1c4; BYTE $0xf2               // vmovq    xmm6, rdx
	WORD $0x314c; BYTE $0xd8                   // xor    rax, r11
	LONG $0x2cfbe1c4; BYTE $0xd1               // vcvttsd2si    rdx, xmm1
	LONG $0xcb2ef9c5                           // vucomisd    xmm1, xmm3
	LONG $0xd0430f48                           // cmovae    rdx, rax
	LONG $0x6ef9e1c4; BYTE $0xca               // vmovq    xmm1, rdx
	LONG $0xc96cc9c5                           // vpunpcklqdq    xmm1, xmm6, xmm1
	LONG $0x3875e3c4; WORD $0x01cd             // vinserti128    ymm1, ymm1, xmm5, 1
	LONG $0xe4c2bdc5; BYTE $0x04               // vcmpneqpd    ymm4, ymm8, ymm4
	LONG $0xc954ddc5                           // vandpd    ymm1, ymm4, ymm1
	LONG $0x117dc1c4; WORD $0xf84c; BYTE $0x20 // vmovupd    yword [r8 + 8*rdi + 32], ymm1
	LONG $0x08c78348                           // add    rdi, 8
	LONG $0x02c68349                           // add    r14, 2
	JNE  LBB4_583
	JMP  LBB4_1283

LBB4_590:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5               // vpcmpeqd    xmm1, xmm1, xmm1
	LONG $0x597de2c4; WORD $0x2855 // vpbroadcastq    ymm2, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_591:
	LONG $0x1c7efac5; BYTE $0x71               // vmovq    xmm3, qword [rcx + 2*rsi]
	LONG $0x647efac5; WORD $0x0871             // vmovq    xmm4, qword [rcx + 2*rsi + 8]
	LONG $0x6c7efac5; WORD $0x1071             // vmovq    xmm5, qword [rcx + 2*rsi + 16]
	LONG $0x747efac5; WORD $0x1871             // vmovq    xmm6, qword [rcx + 2*rsi + 24]
	LONG $0xd875e1c5                           // vpcmpeqw    xmm3, xmm3, xmm0
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x347de2c4; BYTE $0xdb               // vpmovzxwq    ymm3, xmm3
	LONG $0xdadbe5c5                           // vpand    ymm3, ymm3, ymm2
	LONG $0xe075d9c5                           // vpcmpeqw    xmm4, xmm4, xmm0
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x347de2c4; BYTE $0xe4               // vpmovzxwq    ymm4, xmm4
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe875d1c5                           // vpcmpeqw    xmm5, xmm5, xmm0
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x347de2c4; BYTE $0xed               // vpmovzxwq    ymm5, xmm5
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xf075c9c5                           // vpcmpeqw    xmm6, xmm6, xmm0
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x347de2c4; BYTE $0xf6               // vpmovzxwq    ymm6, xmm6
	LONG $0xf2dbcdc5                           // vpand    ymm6, ymm6, ymm2
	LONG $0x7f7ec1c4; WORD $0xf01c             // vmovdqu    yword [r8 + 8*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0xf064; BYTE $0x20 // vmovdqu    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x7f7ec1c4; WORD $0xf06c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x7f7ec1c4; WORD $0xf074; BYTE $0x60 // vmovdqu    yword [r8 + 8*rsi + 96], ymm6
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_591
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_593:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0xd0348949             // mov    qword [r8 + 8*rdx], rsi
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_593
	JMP  LBB4_1351

LBB4_594:
	WORD $0x8944; BYTE $0xd2       // mov    edx, r10d
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0   // vpcmpeqd    xmm8, xmm8, xmm8
	LONG $0x197de2c4; WORD $0x2855 // vbroadcastsd    ymm2, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_595:
	LONG $0x1c7efac5; BYTE $0x71               // vmovq    xmm3, qword [rcx + 2*rsi]
	LONG $0x647efac5; WORD $0x0871             // vmovq    xmm4, qword [rcx + 2*rsi + 8]
	LONG $0x6c7efac5; WORD $0x1071             // vmovq    xmm5, qword [rcx + 2*rsi + 16]
	LONG $0x747efac5; WORD $0x1871             // vmovq    xmm6, qword [rcx + 2*rsi + 24]
	LONG $0xf865e1c5                           // vpcmpgtw    xmm7, xmm3, xmm0
	LONG $0x247d62c4; BYTE $0xcf               // vpmovsxwq    ymm9, xmm7
	LONG $0xc865d9c5                           // vpcmpgtw    xmm1, xmm4, xmm0
	LONG $0x247d62c4; BYTE $0xd1               // vpmovsxwq    ymm10, xmm1
	LONG $0xf865d1c5                           // vpcmpgtw    xmm7, xmm5, xmm0
	LONG $0x247de2c4; BYTE $0xff               // vpmovsxwq    ymm7, xmm7
	LONG $0xc865c9c5                           // vpcmpgtw    xmm1, xmm6, xmm0
	LONG $0x247de2c4; BYTE $0xc9               // vpmovsxwq    ymm1, xmm1
	LONG $0xd875e1c5                           // vpcmpeqw    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x247de2c4; BYTE $0xdb               // vpmovsxwq    ymm3, xmm3
	LONG $0xe075d9c5                           // vpcmpeqw    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x247de2c4; BYTE $0xe4               // vpmovsxwq    ymm4, xmm4
	LONG $0xe875d1c5                           // vpcmpeqw    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x247de2c4; BYTE $0xed               // vpmovsxwq    ymm5, xmm5
	LONG $0xf075c9c5                           // vpcmpeqw    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x247de2c4; BYTE $0xf6               // vpmovsxwq    ymm6, xmm6
	LONG $0x4b65e3c4; WORD $0x90da             // vblendvpd    ymm3, ymm3, ymm2, ymm9
	LONG $0x4b5de3c4; WORD $0xa0e2             // vblendvpd    ymm4, ymm4, ymm2, ymm10
	LONG $0x4b55e3c4; WORD $0x70ea             // vblendvpd    ymm5, ymm5, ymm2, ymm7
	LONG $0x4b4de3c4; WORD $0x10ca             // vblendvpd    ymm1, ymm6, ymm2, ymm1
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x117dc1c4; WORD $0xf04c; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm1
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_595
	WORD $0x394c; BYTE $0xd2                   // cmp    rdx, r10
	JE   LBB4_1351

LBB4_597:
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_598:
	LONG $0x513cb70f         // movzx    edi, word [rcx + 2*rdx]
	WORD $0xc031             // xor    eax, eax
	WORD $0x8566; BYTE $0xff // test    di, di
	WORD $0x950f; BYTE $0xd0 // setne    al
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0x8566; BYTE $0xff // test    di, di
	LONG $0xc64f0f48         // cmovg    rax, rsi
	LONG $0xd0048949         // mov    qword [r8 + 8*rdx], rax
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
	JNE  LBB4_598
	JMP  LBB4_1351

LBB4_602:
	WORD $0x8944; BYTE $0xd6               // mov    esi, r10d
	WORD $0xe683; BYTE $0xfe               // and    esi, -2
	WORD $0xc031                           // xor    eax, eax
	LONG $0xc057f8c5                       // vxorps    xmm0, xmm0, xmm0
	QUAD $0x000000a08d10fac5               // vmovss    xmm1, dword 160[rbp] /* [rip + .LCPI4_9] */
	QUAD $0x000000000000b949; WORD $0x8000 // mov    r9, -9223372036854775808
	JMP  LBB4_605

LBB4_603:
	LONG $0xd250f8c5             // vmovmskps    edx, xmm2
	WORD $0xe283; BYTE $0x01     // and    edx, 1
	WORD $0xdaf7                 // neg    edx
	WORD $0xca83; BYTE $0x01     // or    edx, 1
	LONG $0xd22adac5             // vcvtsi2ss    xmm2, xmm4, edx
	LONG $0xd95ceac5             // vsubss    xmm3, xmm2, xmm1
	LONG $0x2cfae1c4; BYTE $0xfb // vcvttss2si    rdi, xmm3
	WORD $0x314c; BYTE $0xcf     // xor    rdi, r9
	LONG $0x2cfae1c4; BYTE $0xd2 // vcvttss2si    rdx, xmm2
	LONG $0xd12ef8c5             // vucomiss    xmm2, xmm1
	LONG $0xd7430f48             // cmovae    rdx, rdi
	LONG $0xc0548949; BYTE $0x08 // mov    qword [r8 + 8*rax + 8], rdx
	LONG $0x02c08348             // add    rax, 2
	WORD $0x3948; BYTE $0xc6     // cmp    rsi, rax
	JE   LBB4_254

LBB4_605:
	LONG $0x1410fac5; BYTE $0x81 // vmovss    xmm2, dword [rcx + 4*rax]
	LONG $0xc22ef8c5             // vucomiss    xmm0, xmm2
	JNE  LBB4_607
	WORD $0xd231                 // xor    edx, edx
	JMP  LBB4_608

LBB4_607:
	LONG $0xd250f8c5             // vmovmskps    edx, xmm2
	WORD $0xe283; BYTE $0x01     // and    edx, 1
	WORD $0xdaf7                 // neg    edx
	WORD $0xca83; BYTE $0x01     // or    edx, 1
	LONG $0xd22adac5             // vcvtsi2ss    xmm2, xmm4, edx
	LONG $0xd95ceac5             // vsubss    xmm3, xmm2, xmm1
	LONG $0x2cfae1c4; BYTE $0xfb // vcvttss2si    rdi, xmm3
	WORD $0x314c; BYTE $0xcf     // xor    rdi, r9
	LONG $0x2cfae1c4; BYTE $0xd2 // vcvttss2si    rdx, xmm2
	LONG $0xd12ef8c5             // vucomiss    xmm2, xmm1
	LONG $0xd7430f48             // cmovae    rdx, rdi

LBB4_608:
	LONG $0xc0148949               // mov    qword [r8 + 8*rax], rdx
	LONG $0x5410fac5; WORD $0x0481 // vmovss    xmm2, dword [rcx + 4*rax + 4]
	LONG $0xc22ef8c5               // vucomiss    xmm0, xmm2
	JNE  LBB4_603
	WORD $0xd231                   // xor    edx, edx
	LONG $0xc0548949; BYTE $0x08   // mov    qword [r8 + 8*rax + 8], rdx
	LONG $0x02c08348               // add    rax, 2
	WORD $0x3948; BYTE $0xc6       // cmp    rsi, rax
	JNE  LBB4_605

LBB4_254:
	LONG $0x01c2f641             // test    r10b, 1
	JE   LBB4_1351
	LONG $0x0410fac5; BYTE $0x81 // vmovss    xmm0, dword [rcx + 4*rax]
	LONG $0xc957f0c5             // vxorps    xmm1, xmm1, xmm1
	LONG $0xc82ef8c5             // vucomiss    xmm1, xmm0
	JNE  LBB4_1280
	WORD $0xc931                 // xor    ecx, ecx
	JMP  LBB4_1281

LBB4_613:
	WORD $0x8944; BYTE $0xd2       // mov    edx, r10d
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0   // vpcmpeqd    xmm8, xmm8, xmm8
	LONG $0x197de2c4; WORD $0x2855 // vbroadcastsd    ymm2, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_614:
	LONG $0x1c6ffac5; BYTE $0xb1               // vmovdqu    xmm3, oword [rcx + 4*rsi]
	LONG $0x646ffac5; WORD $0x10b1             // vmovdqu    xmm4, oword [rcx + 4*rsi + 16]
	LONG $0x6c6ffac5; WORD $0x20b1             // vmovdqu    xmm5, oword [rcx + 4*rsi + 32]
	LONG $0x746ffac5; WORD $0x30b1             // vmovdqu    xmm6, oword [rcx + 4*rsi + 48]
	LONG $0xf866e1c5                           // vpcmpgtd    xmm7, xmm3, xmm0
	LONG $0x257d62c4; BYTE $0xcf               // vpmovsxdq    ymm9, xmm7
	LONG $0xc866d9c5                           // vpcmpgtd    xmm1, xmm4, xmm0
	LONG $0x257d62c4; BYTE $0xd1               // vpmovsxdq    ymm10, xmm1
	LONG $0xf866d1c5                           // vpcmpgtd    xmm7, xmm5, xmm0
	LONG $0x257de2c4; BYTE $0xff               // vpmovsxdq    ymm7, xmm7
	LONG $0xc866c9c5                           // vpcmpgtd    xmm1, xmm6, xmm0
	LONG $0x257de2c4; BYTE $0xc9               // vpmovsxdq    ymm1, xmm1
	LONG $0xd876e1c5                           // vpcmpeqd    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x257de2c4; BYTE $0xdb               // vpmovsxdq    ymm3, xmm3
	LONG $0xe076d9c5                           // vpcmpeqd    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x257de2c4; BYTE $0xe4               // vpmovsxdq    ymm4, xmm4
	LONG $0xe876d1c5                           // vpcmpeqd    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x257de2c4; BYTE $0xed               // vpmovsxdq    ymm5, xmm5
	LONG $0xf076c9c5                           // vpcmpeqd    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x257de2c4; BYTE $0xf6               // vpmovsxdq    ymm6, xmm6
	LONG $0x4b65e3c4; WORD $0x90da             // vblendvpd    ymm3, ymm3, ymm2, ymm9
	LONG $0x4b5de3c4; WORD $0xa0e2             // vblendvpd    ymm4, ymm4, ymm2, ymm10
	LONG $0x4b55e3c4; WORD $0x70ea             // vblendvpd    ymm5, ymm5, ymm2, ymm7
	LONG $0x4b4de3c4; WORD $0x10ca             // vblendvpd    ymm1, ymm6, ymm2, ymm1
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x117dc1c4; WORD $0xf04c; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm1
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_614
	WORD $0x394c; BYTE $0xd2                   // cmp    rdx, r10
	JE   LBB4_1351

LBB4_616:
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_617:
	WORD $0x3c8b; BYTE $0x91 // mov    edi, dword [rcx + 4*rdx]
	WORD $0xc031             // xor    eax, eax
	WORD $0xff85             // test    edi, edi
	WORD $0x950f; BYTE $0xd0 // setne    al
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff85             // test    edi, edi
	LONG $0xc64f0f48         // cmovg    rax, rsi
	LONG $0xd0048949         // mov    qword [r8 + 8*rdx], rax
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
	JNE  LBB4_617
	JMP  LBB4_1351

LBB4_618:
	WORD $0xc289             // mov    edx, eax
	WORD $0xe283; BYTE $0xe0 // and    edx, -32
	WORD $0xf631             // xor    esi, esi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1

LBB4_619:
	LONG $0x1476fdc5; BYTE $0xb1               // vpcmpeqd    ymm2, ymm0, yword [rcx + 4*rsi]
	LONG $0xd1efedc5                           // vpxor    ymm2, ymm2, ymm1
	LONG $0x397de3c4; WORD $0x01d3             // vextracti128    xmm3, ymm2, 1
	LONG $0xd36be9c5                           // vpackssdw    xmm2, xmm2, xmm3
	LONG $0xd271e9c5; BYTE $0x0f               // vpsrlw    xmm2, xmm2, 15
	LONG $0x5c76fdc5; WORD $0x20b1             // vpcmpeqd    ymm3, ymm0, yword [rcx + 4*rsi + 32]
	LONG $0xd9efe5c5                           // vpxor    ymm3, ymm3, ymm1
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5                           // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xd371e1c5; BYTE $0x0f               // vpsrlw    xmm3, xmm3, 15
	LONG $0x6476fdc5; WORD $0x40b1             // vpcmpeqd    ymm4, ymm0, yword [rcx + 4*rsi + 64]
	LONG $0xe1efddc5                           // vpxor    ymm4, ymm4, ymm1
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xd471d9c5; BYTE $0x0f               // vpsrlw    xmm4, xmm4, 15
	LONG $0x6c76fdc5; WORD $0x60b1             // vpcmpeqd    ymm5, ymm0, yword [rcx + 4*rsi + 96]
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xd571d1c5; BYTE $0x0f               // vpsrlw    xmm5, xmm5, 15
	LONG $0x7f7ac1c4; WORD $0x7014             // vmovdqu    oword [r8 + 2*rsi], xmm2
	LONG $0x7f7ac1c4; WORD $0x705c; BYTE $0x10 // vmovdqu    oword [r8 + 2*rsi + 16], xmm3
	LONG $0x7f7ac1c4; WORD $0x7064; BYTE $0x20 // vmovdqu    oword [r8 + 2*rsi + 32], xmm4
	LONG $0x7f7ac1c4; WORD $0x706c; BYTE $0x30 // vmovdqu    oword [r8 + 2*rsi + 48], xmm5
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_619
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_621:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x00913c83             // cmp    dword [rcx + 4*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x34894166; BYTE $0x50 // mov    word [r8 + 2*rdx], si
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_621
	JMP  LBB4_1351

LBB4_622:
	WORD $0xc289             // mov    edx, eax
	WORD $0xe283; BYTE $0xe0 // and    edx, -32
	WORD $0xf631             // xor    esi, esi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1

LBB4_623:
	LONG $0x1476fdc5; BYTE $0xb1               // vpcmpeqd    ymm2, ymm0, yword [rcx + 4*rsi]
	LONG $0xd1efedc5                           // vpxor    ymm2, ymm2, ymm1
	LONG $0x397de3c4; WORD $0x01d3             // vextracti128    xmm3, ymm2, 1
	LONG $0xd36be9c5                           // vpackssdw    xmm2, xmm2, xmm3
	LONG $0xd271e9c5; BYTE $0x0f               // vpsrlw    xmm2, xmm2, 15
	LONG $0x5c76fdc5; WORD $0x20b1             // vpcmpeqd    ymm3, ymm0, yword [rcx + 4*rsi + 32]
	LONG $0xd9efe5c5                           // vpxor    ymm3, ymm3, ymm1
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5                           // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xd371e1c5; BYTE $0x0f               // vpsrlw    xmm3, xmm3, 15
	LONG $0x6476fdc5; WORD $0x40b1             // vpcmpeqd    ymm4, ymm0, yword [rcx + 4*rsi + 64]
	LONG $0xe1efddc5                           // vpxor    ymm4, ymm4, ymm1
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xd471d9c5; BYTE $0x0f               // vpsrlw    xmm4, xmm4, 15
	LONG $0x6c76fdc5; WORD $0x60b1             // vpcmpeqd    ymm5, ymm0, yword [rcx + 4*rsi + 96]
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xd571d1c5; BYTE $0x0f               // vpsrlw    xmm5, xmm5, 15
	LONG $0x7f7ac1c4; WORD $0x7014             // vmovdqu    oword [r8 + 2*rsi], xmm2
	LONG $0x7f7ac1c4; WORD $0x705c; BYTE $0x10 // vmovdqu    oword [r8 + 2*rsi + 16], xmm3
	LONG $0x7f7ac1c4; WORD $0x7064; BYTE $0x20 // vmovdqu    oword [r8 + 2*rsi + 32], xmm4
	LONG $0x7f7ac1c4; WORD $0x706c; BYTE $0x30 // vmovdqu    oword [r8 + 2*rsi + 48], xmm5
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_623
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_625:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x00913c83             // cmp    dword [rcx + 4*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x34894166; BYTE $0x50 // mov    word [r8 + 2*rdx], si
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_625
	JMP  LBB4_1351

LBB4_626:
	WORD $0xc689                   // mov    esi, eax
	WORD $0xe683; BYTE $0xf0       // and    esi, -16
	WORD $0xff31                   // xor    edi, edi
	LONG $0x197de2c4; WORD $0x0045 // vbroadcastsd    ymm0, qword 0[rbp] /* [rip + .LCPI4_0] */
	LONG $0x573141c4; BYTE $0xc9   // vxorpd    xmm9, xmm9, xmm9
	LONG $0x197de2c4; WORD $0x0855 // vbroadcastsd    ymm2, qword 8[rbp] /* [rip + .LCPI4_1] */
	LONG $0xef2941c4; BYTE $0xd2   // vpxor    xmm10, xmm10, xmm10

LBB4_627:
	LONG $0x2410fdc5; BYTE $0xf9   // vmovupd    ymm4, yword [rcx + 8*rdi]
	LONG $0x6c10fdc5; WORD $0x20f9 // vmovupd    ymm5, yword [rcx + 8*rdi + 32]
	LONG $0x7410fdc5; WORD $0x40f9 // vmovupd    ymm6, yword [rcx + 8*rdi + 64]
	LONG $0x7c10fdc5; WORD $0x60f9 // vmovupd    ymm7, yword [rcx + 8*rdi + 96]
	LONG $0xc4c235c5; BYTE $0x00   // vcmpeqpd    ymm8, ymm9, ymm4
	LONG $0x197d63c4; WORD $0x01c1 // vextractf128    xmm1, ymm8, 1
	LONG $0xc96bb9c5               // vpackssdw    xmm1, xmm8, xmm1
	LONG $0xd96b71c5               // vpackssdw    xmm11, xmm1, xmm1
	LONG $0xc5c235c5; BYTE $0x00   // vcmpeqpd    ymm8, ymm9, ymm5
	LONG $0x197d63c4; WORD $0x01c3 // vextractf128    xmm3, ymm8, 1
	LONG $0xdb6bb9c5               // vpackssdw    xmm3, xmm8, xmm3
	LONG $0xe36b61c5               // vpackssdw    xmm12, xmm3, xmm3
	LONG $0xc6c235c5; BYTE $0x00   // vcmpeqpd    ymm8, ymm9, ymm6
	LONG $0x197d63c4; WORD $0x01c1 // vextractf128    xmm1, ymm8, 1
	LONG $0xc96bb9c5               // vpackssdw    xmm1, xmm8, xmm1
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0xc7c235c5; BYTE $0x00   // vcmpeqpd    ymm8, ymm9, ymm7
	LONG $0x197d63c4; WORD $0x01c3 // vextractf128    xmm3, ymm8, 1
	LONG $0xdb6bb9c5               // vpackssdw    xmm3, xmm8, xmm3
	LONG $0xdb6be1c5               // vpackssdw    xmm3, xmm3, xmm3
	LONG $0xe054ddc5               // vandpd    ymm4, ymm4, ymm0
	LONG $0xe456edc5               // vorpd    ymm4, ymm2, ymm4
	LONG $0xe854d5c5               // vandpd    ymm5, ymm5, ymm0
	LONG $0xed56edc5               // vorpd    ymm5, ymm2, ymm5
	LONG $0xf054cdc5               // vandpd    ymm6, ymm6, ymm0
	LONG $0xf656edc5               // vorpd    ymm6, ymm2, ymm6
	LONG $0xf854c5c5               // vandpd    ymm7, ymm7, ymm0
	LONG $0xff56edc5               // vorpd    ymm7, ymm2, ymm7
	LONG $0xe4e6fdc5               // vcvttpd2dq    xmm4, ymm4
	LONG $0xede6fdc5               // vcvttpd2dq    xmm5, ymm5
	LONG $0x2b59e2c4; BYTE $0xe4   // vpackusdw    xmm4, xmm4, xmm4
	LONG $0x2b51e2c4; BYTE $0xed   // vpackusdw    xmm5, xmm5, xmm5
	LONG $0xf6e6fdc5               // vcvttpd2dq    xmm6, ymm6
	LONG $0x2b49e2c4; BYTE $0xf6   // vpackusdw    xmm6, xmm6, xmm6
	LONG $0xffe6fdc5               // vcvttpd2dq    xmm7, ymm7
	LONG $0x2b41e2c4; BYTE $0xff   // vpackusdw    xmm7, xmm7, xmm7
	LONG $0x4c59c3c4; WORD $0xb0e2 // vpblendvb    xmm4, xmm4, xmm10, xmm11
	LONG $0x4c51c3c4; WORD $0xc0ea // vpblendvb    xmm5, xmm5, xmm10, xmm12
	LONG $0x4c49c3c4; WORD $0x10ca // vpblendvb    xmm1, xmm6, xmm10, xmm1
	LONG $0x4c41c3c4; WORD $0x30da // vpblendvb    xmm3, xmm7, xmm10, xmm3
	LONG $0x3875e3c4; WORD $0x01cb // vinserti128    ymm1, ymm1, xmm3, 1
	LONG $0x385de3c4; WORD $0x01dd // vinserti128    ymm3, ymm4, xmm5, 1
	LONG $0xc96ce5c5               // vpunpcklqdq    ymm1, ymm3, ymm1
	LONG $0x00fde3c4; WORD $0xd8c9 // vpermq    ymm1, ymm1, 216
	LONG $0x7f7ec1c4; WORD $0x780c // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x10c78348               // add    rdi, 16
	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
	JNE  LBB4_627
	WORD $0x3948; BYTE $0xc6       // cmp    rsi, rax
	JE   LBB4_1351

LBB4_629:
	LONG $0xc057f9c5             // vxorpd    xmm0, xmm0, xmm0
	LONG $0x4d28f9c5; BYTE $0x30 // vmovapd    xmm1, oword 48[rbp] /* [rip + .LCPI4_2] */
	LONG $0x5512fbc5; BYTE $0x08 // vmovddup    xmm2, qword 8[rbp] /* [rip + .LCPI4_1] */

LBB4_630:
	LONG $0x1c10fbc5; BYTE $0xf1 // vmovsd    xmm3, qword [rcx + 8*rsi]
	LONG $0xc32ef9c5             // vucomisd    xmm0, xmm3
	LONG $0xd954e1c5             // vandpd    xmm3, xmm3, xmm1
	LONG $0xdb56e9c5             // vorpd    xmm3, xmm2, xmm3
	LONG $0xfb2cfbc5             // vcvttsd2si    edi, xmm3
	WORD $0x440f; BYTE $0xfa     // cmove    edi, edx
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
	JNE  LBB4_630
	JMP  LBB4_1351

LBB4_631:
	WORD $0xc689                   // mov    esi, eax
	WORD $0xe683; BYTE $0xf0       // and    esi, -16
	WORD $0xff31                   // xor    edi, edi
	LONG $0x197de2c4; WORD $0x0045 // vbroadcastsd    ymm0, qword 0[rbp] /* [rip + .LCPI4_0] */
	LONG $0x573141c4; BYTE $0xc9   // vxorpd    xmm9, xmm9, xmm9
	LONG $0x197de2c4; WORD $0x0855 // vbroadcastsd    ymm2, qword 8[rbp] /* [rip + .LCPI4_1] */
	LONG $0xef2941c4; BYTE $0xd2   // vpxor    xmm10, xmm10, xmm10

LBB4_632:
	LONG $0x2410fdc5; BYTE $0xf9   // vmovupd    ymm4, yword [rcx + 8*rdi]
	LONG $0x6c10fdc5; WORD $0x20f9 // vmovupd    ymm5, yword [rcx + 8*rdi + 32]
	LONG $0x7410fdc5; WORD $0x40f9 // vmovupd    ymm6, yword [rcx + 8*rdi + 64]
	LONG $0x7c10fdc5; WORD $0x60f9 // vmovupd    ymm7, yword [rcx + 8*rdi + 96]
	LONG $0xc4c235c5; BYTE $0x00   // vcmpeqpd    ymm8, ymm9, ymm4
	LONG $0x197d63c4; WORD $0x01c1 // vextractf128    xmm1, ymm8, 1
	LONG $0xc96bb9c5               // vpackssdw    xmm1, xmm8, xmm1
	LONG $0xd96b71c5               // vpackssdw    xmm11, xmm1, xmm1
	LONG $0xc5c235c5; BYTE $0x00   // vcmpeqpd    ymm8, ymm9, ymm5
	LONG $0x197d63c4; WORD $0x01c3 // vextractf128    xmm3, ymm8, 1
	LONG $0xdb6bb9c5               // vpackssdw    xmm3, xmm8, xmm3
	LONG $0xe36b61c5               // vpackssdw    xmm12, xmm3, xmm3
	LONG $0xc6c235c5; BYTE $0x00   // vcmpeqpd    ymm8, ymm9, ymm6
	LONG $0x197d63c4; WORD $0x01c1 // vextractf128    xmm1, ymm8, 1
	LONG $0xc96bb9c5               // vpackssdw    xmm1, xmm8, xmm1
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0xc7c235c5; BYTE $0x00   // vcmpeqpd    ymm8, ymm9, ymm7
	LONG $0x197d63c4; WORD $0x01c3 // vextractf128    xmm3, ymm8, 1
	LONG $0xdb6bb9c5               // vpackssdw    xmm3, xmm8, xmm3
	LONG $0xdb6be1c5               // vpackssdw    xmm3, xmm3, xmm3
	LONG $0xe054ddc5               // vandpd    ymm4, ymm4, ymm0
	LONG $0xe456edc5               // vorpd    ymm4, ymm2, ymm4
	LONG $0xe854d5c5               // vandpd    ymm5, ymm5, ymm0
	LONG $0xed56edc5               // vorpd    ymm5, ymm2, ymm5
	LONG $0xf054cdc5               // vandpd    ymm6, ymm6, ymm0
	LONG $0xf656edc5               // vorpd    ymm6, ymm2, ymm6
	LONG $0xf854c5c5               // vandpd    ymm7, ymm7, ymm0
	LONG $0xff56edc5               // vorpd    ymm7, ymm2, ymm7
	LONG $0xe4e6fdc5               // vcvttpd2dq    xmm4, ymm4
	LONG $0xede6fdc5               // vcvttpd2dq    xmm5, ymm5
	LONG $0xe46bd9c5               // vpackssdw    xmm4, xmm4, xmm4
	LONG $0xed6bd1c5               // vpackssdw    xmm5, xmm5, xmm5
	LONG $0xf6e6fdc5               // vcvttpd2dq    xmm6, ymm6
	LONG $0xf66bc9c5               // vpackssdw    xmm6, xmm6, xmm6
	LONG $0xffe6fdc5               // vcvttpd2dq    xmm7, ymm7
	LONG $0xff6bc1c5               // vpackssdw    xmm7, xmm7, xmm7
	LONG $0x4c59c3c4; WORD $0xb0e2 // vpblendvb    xmm4, xmm4, xmm10, xmm11
	LONG $0x4c51c3c4; WORD $0xc0ea // vpblendvb    xmm5, xmm5, xmm10, xmm12
	LONG $0x4c49c3c4; WORD $0x10ca // vpblendvb    xmm1, xmm6, xmm10, xmm1
	LONG $0x4c41c3c4; WORD $0x30da // vpblendvb    xmm3, xmm7, xmm10, xmm3
	LONG $0x3875e3c4; WORD $0x01cb // vinserti128    ymm1, ymm1, xmm3, 1
	LONG $0x385de3c4; WORD $0x01dd // vinserti128    ymm3, ymm4, xmm5, 1
	LONG $0xc96ce5c5               // vpunpcklqdq    ymm1, ymm3, ymm1
	LONG $0x00fde3c4; WORD $0xd8c9 // vpermq    ymm1, ymm1, 216
	LONG $0x7f7ec1c4; WORD $0x780c // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x10c78348               // add    rdi, 16
	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
	JNE  LBB4_632
	WORD $0x3948; BYTE $0xc6       // cmp    rsi, rax
	JE   LBB4_1351

LBB4_634:
	LONG $0xc057f9c5             // vxorpd    xmm0, xmm0, xmm0
	LONG $0x4d28f9c5; BYTE $0x30 // vmovapd    xmm1, oword 48[rbp] /* [rip + .LCPI4_2] */
	LONG $0x5512fbc5; BYTE $0x08 // vmovddup    xmm2, qword 8[rbp] /* [rip + .LCPI4_1] */

LBB4_635:
	LONG $0x1c10fbc5; BYTE $0xf1 // vmovsd    xmm3, qword [rcx + 8*rsi]
	LONG $0xc32ef9c5             // vucomisd    xmm0, xmm3
	LONG $0xd954e1c5             // vandpd    xmm3, xmm3, xmm1
	LONG $0xdb56e9c5             // vorpd    xmm3, xmm2, xmm3
	LONG $0xfb2cfbc5             // vcvttsd2si    edi, xmm3
	WORD $0x440f; BYTE $0xfa     // cmove    edi, edx
	LONG $0x3c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], di
	LONG $0x01c68348             // add    rsi, 1
	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
	JNE  LBB4_635
	JMP  LBB4_1351

LBB4_642:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xf0     // and    edx, -16
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5             // vpcmpeqd    ymm1, ymm1, ymm1
	LONG $0x556ff9c5; BYTE $0x60 // vmovdqa    xmm2, oword 96[rbp] /* [rip + .LCPI4_16] */

LBB4_643:
	LONG $0x297de2c4; WORD $0xf11c             // vpcmpeqq    ymm3, ymm0, yword [rcx + 8*rsi]
	LONG $0xd9efe5c5                           // vpxor    ymm3, ymm3, ymm1
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5                           // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
	LONG $0xdadbe1c5                           // vpand    xmm3, xmm3, xmm2
	LONG $0x297de2c4; WORD $0xf164; BYTE $0x20 // vpcmpeqq    ymm4, ymm0, yword [rcx + 8*rsi + 32]
	LONG $0xe1efddc5                           // vpxor    ymm4, ymm4, ymm1
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe46bd9c5                           // vpackssdw    xmm4, xmm4, xmm4
	LONG $0x297de2c4; WORD $0xf16c; BYTE $0x40 // vpcmpeqq    ymm5, ymm0, yword [rcx + 8*rsi + 64]
	LONG $0xe2dbd9c5                           // vpand    xmm4, xmm4, xmm2
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xed6bd1c5                           // vpackssdw    xmm5, xmm5, xmm5
	LONG $0xeadbd1c5                           // vpand    xmm5, xmm5, xmm2
	LONG $0x297de2c4; WORD $0xf174; BYTE $0x60 // vpcmpeqq    ymm6, ymm0, yword [rcx + 8*rsi + 96]
	LONG $0xf1efcdc5                           // vpxor    ymm6, ymm6, ymm1
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5                           // vpackssdw    xmm6, xmm6, xmm7
	LONG $0xf66bc9c5                           // vpackssdw    xmm6, xmm6, xmm6
	LONG $0xf2dbc9c5                           // vpand    xmm6, xmm6, xmm2
	LONG $0x3855e3c4; WORD $0x01ee             // vinserti128    ymm5, ymm5, xmm6, 1
	LONG $0x3865e3c4; WORD $0x01dc             // vinserti128    ymm3, ymm3, xmm4, 1
	LONG $0xdd6ce5c5                           // vpunpcklqdq    ymm3, ymm3, ymm5
	LONG $0x00fde3c4; WORD $0xd8db             // vpermq    ymm3, ymm3, 216
	LONG $0x7f7ec1c4; WORD $0x701c             // vmovdqu    yword [r8 + 2*rsi], ymm3
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_643
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_645:
	WORD $0xf631                 // xor    esi, esi
	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x34894166; BYTE $0x50 // mov    word [r8 + 2*rdx], si
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_645
	JMP  LBB4_1351

LBB4_646:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xf0     // and    edx, -16
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5             // vpcmpeqd    ymm1, ymm1, ymm1
	LONG $0x556ff9c5; BYTE $0x60 // vmovdqa    xmm2, oword 96[rbp] /* [rip + .LCPI4_16] */

LBB4_647:
	LONG $0x297de2c4; WORD $0xf11c             // vpcmpeqq    ymm3, ymm0, yword [rcx + 8*rsi]
	LONG $0xd9efe5c5                           // vpxor    ymm3, ymm3, ymm1
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5                           // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
	LONG $0xdadbe1c5                           // vpand    xmm3, xmm3, xmm2
	LONG $0x297de2c4; WORD $0xf164; BYTE $0x20 // vpcmpeqq    ymm4, ymm0, yword [rcx + 8*rsi + 32]
	LONG $0xe1efddc5                           // vpxor    ymm4, ymm4, ymm1
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe46bd9c5                           // vpackssdw    xmm4, xmm4, xmm4
	LONG $0x297de2c4; WORD $0xf16c; BYTE $0x40 // vpcmpeqq    ymm5, ymm0, yword [rcx + 8*rsi + 64]
	LONG $0xe2dbd9c5                           // vpand    xmm4, xmm4, xmm2
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xed6bd1c5                           // vpackssdw    xmm5, xmm5, xmm5
	LONG $0xeadbd1c5                           // vpand    xmm5, xmm5, xmm2
	LONG $0x297de2c4; WORD $0xf174; BYTE $0x60 // vpcmpeqq    ymm6, ymm0, yword [rcx + 8*rsi + 96]
	LONG $0xf1efcdc5                           // vpxor    ymm6, ymm6, ymm1
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5                           // vpackssdw    xmm6, xmm6, xmm7
	LONG $0xf66bc9c5                           // vpackssdw    xmm6, xmm6, xmm6
	LONG $0xf2dbc9c5                           // vpand    xmm6, xmm6, xmm2
	LONG $0x3855e3c4; WORD $0x01ee             // vinserti128    ymm5, ymm5, xmm6, 1
	LONG $0x3865e3c4; WORD $0x01dc             // vinserti128    ymm3, ymm3, xmm4, 1
	LONG $0xdd6ce5c5                           // vpunpcklqdq    ymm3, ymm3, ymm5
	LONG $0x00fde3c4; WORD $0xd8db             // vpermq    ymm3, ymm3, 216
	LONG $0x7f7ec1c4; WORD $0x701c             // vmovdqu    yword [r8 + 2*rsi], ymm3
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_647
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_649:
	WORD $0xf631                 // xor    esi, esi
	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x34894166; BYTE $0x50 // mov    word [r8 + 2*rdx], si
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_649
	JMP  LBB4_1351

LBB4_662:
	WORD $0x8944; BYTE $0xd2     // mov    edx, r10d
	WORD $0xe283; BYTE $0xf0     // and    edx, -16
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0x763541c4; BYTE $0xc9 // vpcmpeqd    ymm9, ymm9, ymm9
	LONG $0x556f79c5; BYTE $0x60 // vmovdqa    xmm10, oword 96[rbp] /* [rip + .LCPI4_16] */

LBB4_663:
	LONG $0x3c6ffec5; BYTE $0xf1   // vmovdqu    ymm7, yword [rcx + 8*rsi]
	LONG $0x446f7ec5; WORD $0x20f1 // vmovdqu    ymm8, yword [rcx + 8*rsi + 32]
	LONG $0x746ffec5; WORD $0x40f1 // vmovdqu    ymm6, yword [rcx + 8*rsi + 64]
	LONG $0x646ffec5; WORD $0x60f1 // vmovdqu    ymm4, yword [rcx + 8*rsi + 96]
	LONG $0x3745e2c4; BYTE $0xd8   // vpcmpgtq    ymm3, ymm7, ymm0
	LONG $0x397de3c4; WORD $0x01dd // vextracti128    xmm5, ymm3, 1
	LONG $0xdd6be1c5               // vpackssdw    xmm3, xmm3, xmm5
	LONG $0xdb6b61c5               // vpackssdw    xmm11, xmm3, xmm3
	LONG $0x373de2c4; BYTE $0xe8   // vpcmpgtq    ymm5, ymm8, ymm0
	LONG $0x397de3c4; WORD $0x01e9 // vextracti128    xmm1, ymm5, 1
	LONG $0xc96bd1c5               // vpackssdw    xmm1, xmm5, xmm1
	LONG $0xe16b71c5               // vpackssdw    xmm12, xmm1, xmm1
	LONG $0x374de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm6, ymm0
	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
	LONG $0xca6bf1c5               // vpackssdw    xmm1, xmm1, xmm2
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0x375de2c4; BYTE $0xd0   // vpcmpgtq    ymm2, ymm4, ymm0
	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
	LONG $0xd36be9c5               // vpackssdw    xmm2, xmm2, xmm3
	LONG $0xd26be9c5               // vpackssdw    xmm2, xmm2, xmm2
	LONG $0x2945e2c4; BYTE $0xd8   // vpcmpeqq    ymm3, ymm7, ymm0
	LONG $0xdbefb5c5               // vpxor    ymm3, ymm9, ymm3
	LONG $0x397de3c4; WORD $0x01df // vextracti128    xmm7, ymm3, 1
	LONG $0xdf6be1c5               // vpackssdw    xmm3, xmm3, xmm7
	LONG $0xdb6be1c5               // vpackssdw    xmm3, xmm3, xmm3
	LONG $0x293de2c4; BYTE $0xf8   // vpcmpeqq    ymm7, ymm8, ymm0
	LONG $0xffefb5c5               // vpxor    ymm7, ymm9, ymm7
	LONG $0x397de3c4; WORD $0x01fd // vextracti128    xmm5, ymm7, 1
	LONG $0xed6bc1c5               // vpackssdw    xmm5, xmm7, xmm5
	LONG $0xed6bd1c5               // vpackssdw    xmm5, xmm5, xmm5
	LONG $0x294de2c4; BYTE $0xf0   // vpcmpeqq    ymm6, ymm6, ymm0
	LONG $0xf6efb5c5               // vpxor    ymm6, ymm9, ymm6
	LONG $0x397de3c4; WORD $0x01f7 // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5               // vpackssdw    xmm6, xmm6, xmm7
	LONG $0xf66bc9c5               // vpackssdw    xmm6, xmm6, xmm6
	LONG $0x295de2c4; BYTE $0xe0   // vpcmpeqq    ymm4, ymm4, ymm0
	LONG $0xe4efb5c5               // vpxor    ymm4, ymm9, ymm4
	LONG $0x397de3c4; WORD $0x01e7 // vextracti128    xmm7, ymm4, 1
	LONG $0xe76bd9c5               // vpackssdw    xmm4, xmm4, xmm7
	LONG $0xe46bd9c5               // vpackssdw    xmm4, xmm4, xmm4
	LONG $0x4c61c3c4; WORD $0xb0da // vpblendvb    xmm3, xmm3, xmm10, xmm11
	LONG $0x4c51c3c4; WORD $0xc0ea // vpblendvb    xmm5, xmm5, xmm10, xmm12
	LONG $0x4c49c3c4; WORD $0x10ca // vpblendvb    xmm1, xmm6, xmm10, xmm1
	LONG $0x4c59c3c4; WORD $0x20d2 // vpblendvb    xmm2, xmm4, xmm10, xmm2
	LONG $0x3875e3c4; WORD $0x01ca // vinserti128    ymm1, ymm1, xmm2, 1
	LONG $0x3865e3c4; WORD $0x01d5 // vinserti128    ymm2, ymm3, xmm5, 1
	LONG $0xc96cedc5               // vpunpcklqdq    ymm1, ymm2, ymm1
	LONG $0x00fde3c4; WORD $0xd8c9 // vpermq    ymm1, ymm1, 216
	LONG $0x7f7ec1c4; WORD $0x700c // vmovdqu    yword [r8 + 2*rsi], ymm1
	LONG $0x10c68348               // add    rsi, 16
	WORD $0x3948; BYTE $0xf2       // cmp    rdx, rsi
	JNE  LBB4_663
	WORD $0x394c; BYTE $0xd2       // cmp    rdx, r10
	JE   LBB4_1351

LBB4_665:
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_666:
	LONG $0xd13c8b48             // mov    rdi, qword [rcx + 8*rdx]
	WORD $0xc031                 // xor    eax, eax
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	WORD $0x950f; BYTE $0xd0     // setne    al
	WORD $0xd8f7                 // neg    eax
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
	LONG $0x04894166; BYTE $0x50 // mov    word [r8 + 2*rdx], ax
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
	JNE  LBB4_666
	JMP  LBB4_1351

LBB4_667:
	WORD $0x8944; BYTE $0xd2     // mov    edx, r10d
	WORD $0xe283; BYTE $0xf0     // and    edx, -16
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0x763541c4; BYTE $0xc9 // vpcmpeqd    ymm9, ymm9, ymm9
	LONG $0x556f79c5; BYTE $0x60 // vmovdqa    xmm10, oword 96[rbp] /* [rip + .LCPI4_16] */

LBB4_668:
	LONG $0x3c6ffec5; BYTE $0xf1   // vmovdqu    ymm7, yword [rcx + 8*rsi]
	LONG $0x446f7ec5; WORD $0x20f1 // vmovdqu    ymm8, yword [rcx + 8*rsi + 32]
	LONG $0x746ffec5; WORD $0x40f1 // vmovdqu    ymm6, yword [rcx + 8*rsi + 64]
	LONG $0x646ffec5; WORD $0x60f1 // vmovdqu    ymm4, yword [rcx + 8*rsi + 96]
	LONG $0x3745e2c4; BYTE $0xd8   // vpcmpgtq    ymm3, ymm7, ymm0
	LONG $0x397de3c4; WORD $0x01dd // vextracti128    xmm5, ymm3, 1
	LONG $0xdd6be1c5               // vpackssdw    xmm3, xmm3, xmm5
	LONG $0xdb6b61c5               // vpackssdw    xmm11, xmm3, xmm3
	LONG $0x373de2c4; BYTE $0xe8   // vpcmpgtq    ymm5, ymm8, ymm0
	LONG $0x397de3c4; WORD $0x01e9 // vextracti128    xmm1, ymm5, 1
	LONG $0xc96bd1c5               // vpackssdw    xmm1, xmm5, xmm1
	LONG $0xe16b71c5               // vpackssdw    xmm12, xmm1, xmm1
	LONG $0x374de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm6, ymm0
	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
	LONG $0xca6bf1c5               // vpackssdw    xmm1, xmm1, xmm2
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0x375de2c4; BYTE $0xd0   // vpcmpgtq    ymm2, ymm4, ymm0
	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
	LONG $0xd36be9c5               // vpackssdw    xmm2, xmm2, xmm3
	LONG $0xd26be9c5               // vpackssdw    xmm2, xmm2, xmm2
	LONG $0x2945e2c4; BYTE $0xd8   // vpcmpeqq    ymm3, ymm7, ymm0
	LONG $0xdbefb5c5               // vpxor    ymm3, ymm9, ymm3
	LONG $0x397de3c4; WORD $0x01df // vextracti128    xmm7, ymm3, 1
	LONG $0xdf6be1c5               // vpackssdw    xmm3, xmm3, xmm7
	LONG $0xdb6be1c5               // vpackssdw    xmm3, xmm3, xmm3
	LONG $0x293de2c4; BYTE $0xf8   // vpcmpeqq    ymm7, ymm8, ymm0
	LONG $0xffefb5c5               // vpxor    ymm7, ymm9, ymm7
	LONG $0x397de3c4; WORD $0x01fd // vextracti128    xmm5, ymm7, 1
	LONG $0xed6bc1c5               // vpackssdw    xmm5, xmm7, xmm5
	LONG $0xed6bd1c5               // vpackssdw    xmm5, xmm5, xmm5
	LONG $0x294de2c4; BYTE $0xf0   // vpcmpeqq    ymm6, ymm6, ymm0
	LONG $0xf6efb5c5               // vpxor    ymm6, ymm9, ymm6
	LONG $0x397de3c4; WORD $0x01f7 // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5               // vpackssdw    xmm6, xmm6, xmm7
	LONG $0xf66bc9c5               // vpackssdw    xmm6, xmm6, xmm6
	LONG $0x295de2c4; BYTE $0xe0   // vpcmpeqq    ymm4, ymm4, ymm0
	LONG $0xe4efb5c5               // vpxor    ymm4, ymm9, ymm4
	LONG $0x397de3c4; WORD $0x01e7 // vextracti128    xmm7, ymm4, 1
	LONG $0xe76bd9c5               // vpackssdw    xmm4, xmm4, xmm7
	LONG $0xe46bd9c5               // vpackssdw    xmm4, xmm4, xmm4
	LONG $0x4c61c3c4; WORD $0xb0da // vpblendvb    xmm3, xmm3, xmm10, xmm11
	LONG $0x4c51c3c4; WORD $0xc0ea // vpblendvb    xmm5, xmm5, xmm10, xmm12
	LONG $0x4c49c3c4; WORD $0x10ca // vpblendvb    xmm1, xmm6, xmm10, xmm1
	LONG $0x4c59c3c4; WORD $0x20d2 // vpblendvb    xmm2, xmm4, xmm10, xmm2
	LONG $0x3875e3c4; WORD $0x01ca // vinserti128    ymm1, ymm1, xmm2, 1
	LONG $0x3865e3c4; WORD $0x01d5 // vinserti128    ymm2, ymm3, xmm5, 1
	LONG $0xc96cedc5               // vpunpcklqdq    ymm1, ymm2, ymm1
	LONG $0x00fde3c4; WORD $0xd8c9 // vpermq    ymm1, ymm1, 216
	LONG $0x7f7ec1c4; WORD $0x700c // vmovdqu    yword [r8 + 2*rsi], ymm1
	LONG $0x10c68348               // add    rsi, 16
	WORD $0x3948; BYTE $0xf2       // cmp    rdx, rsi
	JNE  LBB4_668
	WORD $0x394c; BYTE $0xd2       // cmp    rdx, r10
	JE   LBB4_1351

LBB4_670:
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_671:
	LONG $0xd13c8b48             // mov    rdi, qword [rcx + 8*rdx]
	WORD $0xc031                 // xor    eax, eax
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	WORD $0x950f; BYTE $0xd0     // setne    al
	WORD $0xd8f7                 // neg    eax
	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
	LONG $0x04894166; BYTE $0x50 // mov    word [r8 + 2*rdx], ax
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
	JNE  LBB4_671
	JMP  LBB4_1351

LBB4_672:
	WORD $0xc689                 // mov    esi, eax
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	WORD $0xff31                 // xor    edi, edi
	LONG $0x573041c4; BYTE $0xc9 // vxorps    xmm9, xmm9, xmm9
	LONG $0x762d41c4; BYTE $0xd2 // vpcmpeqd    ymm10, ymm10, ymm10
	LONG $0x5d6f79c5; BYTE $0x40 // vmovdqa    xmm11, oword 64[rbp] /* [rip + .LCPI4_11] */
	LONG $0x761941c4; BYTE $0xe4 // vpcmpeqd    xmm12, xmm12, xmm12

LBB4_673:
	LONG $0x2410fcc5; BYTE $0xb9               // vmovups    ymm4, yword [rcx + 4*rdi]
	LONG $0x6c10fcc5; WORD $0x20b9             // vmovups    ymm5, yword [rcx + 4*rdi + 32]
	LONG $0x7410fcc5; WORD $0x40b9             // vmovups    ymm6, yword [rcx + 4*rdi + 64]
	LONG $0x7c10fcc5; WORD $0x60b9             // vmovups    ymm7, yword [rcx + 4*rdi + 96]
	LONG $0xc4c234c5; BYTE $0x00               // vcmpeqps    ymm8, ymm9, ymm4
	LONG $0x197d63c4; WORD $0x01c0             // vextractf128    xmm0, ymm8, 1
	LONG $0xe86b39c5                           // vpackssdw    xmm13, xmm8, xmm0
	LONG $0xc5c234c5; BYTE $0x00               // vcmpeqps    ymm8, ymm9, ymm5
	LONG $0x197d63c4; WORD $0x01c1             // vextractf128    xmm1, ymm8, 1
	LONG $0xc96bb9c5                           // vpackssdw    xmm1, xmm8, xmm1
	LONG $0xc6c234c5; BYTE $0x00               // vcmpeqps    ymm8, ymm9, ymm6
	LONG $0x197d63c4; WORD $0x01c2             // vextractf128    xmm2, ymm8, 1
	LONG $0xd26bb9c5                           // vpackssdw    xmm2, xmm8, xmm2
	LONG $0xc7c234c5; BYTE $0x00               // vcmpeqps    ymm8, ymm9, ymm7
	LONG $0x197d63c4; WORD $0x01c3             // vextractf128    xmm3, ymm8, 1
	LONG $0xdb6bb9c5                           // vpackssdw    xmm3, xmm8, xmm3
	LONG $0x665dc1c4; BYTE $0xe2               // vpcmpgtd    ymm4, ymm4, ymm10
	LONG $0x397de3c4; WORD $0x01e0             // vextracti128    xmm0, ymm4, 1
	LONG $0xc06bd9c5                           // vpackssdw    xmm0, xmm4, xmm0
	LONG $0x6655c1c4; BYTE $0xe2               // vpcmpgtd    ymm4, ymm5, ymm10
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0x664dc1c4; BYTE $0xea               // vpcmpgtd    ymm5, ymm6, ymm10
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0x6645c1c4; BYTE $0xf2               // vpcmpgtd    ymm6, ymm7, ymm10
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5                           // vpackssdw    xmm6, xmm6, xmm7
	LONG $0x4c19c3c4; WORD $0x00c3             // vpblendvb    xmm0, xmm12, xmm11, xmm0
	LONG $0xc0df91c5                           // vpandn    xmm0, xmm13, xmm0
	LONG $0x4c19c3c4; WORD $0x40e3             // vpblendvb    xmm4, xmm12, xmm11, xmm4
	LONG $0xccdff1c5                           // vpandn    xmm1, xmm1, xmm4
	LONG $0x4c19c3c4; WORD $0x50e3             // vpblendvb    xmm4, xmm12, xmm11, xmm5
	LONG $0x4c19c3c4; WORD $0x60eb             // vpblendvb    xmm5, xmm12, xmm11, xmm6
	LONG $0xd4dfe9c5                           // vpandn    xmm2, xmm2, xmm4
	LONG $0xdddfe1c5                           // vpandn    xmm3, xmm3, xmm5
	LONG $0x7f7ac1c4; WORD $0x7804             // vmovdqu    oword [r8 + 2*rdi], xmm0
	LONG $0x7f7ac1c4; WORD $0x784c; BYTE $0x10 // vmovdqu    oword [r8 + 2*rdi + 16], xmm1
	LONG $0x7f7ac1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    oword [r8 + 2*rdi + 32], xmm2
	LONG $0x7f7ac1c4; WORD $0x785c; BYTE $0x30 // vmovdqu    oword [r8 + 2*rdi + 48], xmm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB4_673
	WORD $0x3948; BYTE $0xc6                   // cmp    rsi, rax
	JE   LBB4_1351

LBB4_675:
	LONG $0xc0eff9c5 // vpxor    xmm0, xmm0, xmm0

LBB4_676:
	LONG $0x0c6ef9c5; BYTE $0xb1 // vmovd    xmm1, dword [rcx + 4*rsi]
	LONG $0xcf7ef9c5             // vmovd    edi, xmm1
	WORD $0xd231                 // xor    edx, edx
	WORD $0xff85                 // test    edi, edi
	WORD $0x990f; BYTE $0xd2     // setns    dl
	LONG $0xc12ef8c5             // vucomiss    xmm0, xmm1
	LONG $0xff12548d             // lea    edx, [rdx + rdx - 1]
	LONG $0xd2440f41             // cmove    edx, r10d
	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
	LONG $0x01c68348             // add    rsi, 1
	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
	JNE  LBB4_676
	JMP  LBB4_1351

LBB4_677:
	WORD $0xc689                 // mov    esi, eax
	WORD $0xe683; BYTE $0xe0     // and    esi, -32
	WORD $0xff31                 // xor    edi, edi
	LONG $0x573041c4; BYTE $0xc9 // vxorps    xmm9, xmm9, xmm9
	LONG $0x762d41c4; BYTE $0xd2 // vpcmpeqd    ymm10, ymm10, ymm10
	LONG $0x5d6f79c5; BYTE $0x40 // vmovdqa    xmm11, oword 64[rbp] /* [rip + .LCPI4_11] */
	LONG $0x761941c4; BYTE $0xe4 // vpcmpeqd    xmm12, xmm12, xmm12

LBB4_678:
	LONG $0x2410fcc5; BYTE $0xb9               // vmovups    ymm4, yword [rcx + 4*rdi]
	LONG $0x6c10fcc5; WORD $0x20b9             // vmovups    ymm5, yword [rcx + 4*rdi + 32]
	LONG $0x7410fcc5; WORD $0x40b9             // vmovups    ymm6, yword [rcx + 4*rdi + 64]
	LONG $0x7c10fcc5; WORD $0x60b9             // vmovups    ymm7, yword [rcx + 4*rdi + 96]
	LONG $0xc4c234c5; BYTE $0x00               // vcmpeqps    ymm8, ymm9, ymm4
	LONG $0x197d63c4; WORD $0x01c0             // vextractf128    xmm0, ymm8, 1
	LONG $0xe86b39c5                           // vpackssdw    xmm13, xmm8, xmm0
	LONG $0xc5c234c5; BYTE $0x00               // vcmpeqps    ymm8, ymm9, ymm5
	LONG $0x197d63c4; WORD $0x01c1             // vextractf128    xmm1, ymm8, 1
	LONG $0xc96bb9c5                           // vpackssdw    xmm1, xmm8, xmm1
	LONG $0xc6c234c5; BYTE $0x00               // vcmpeqps    ymm8, ymm9, ymm6
	LONG $0x197d63c4; WORD $0x01c2             // vextractf128    xmm2, ymm8, 1
	LONG $0xd26bb9c5                           // vpackssdw    xmm2, xmm8, xmm2
	LONG $0xc7c234c5; BYTE $0x00               // vcmpeqps    ymm8, ymm9, ymm7
	LONG $0x197d63c4; WORD $0x01c3             // vextractf128    xmm3, ymm8, 1
	LONG $0xdb6bb9c5                           // vpackssdw    xmm3, xmm8, xmm3
	LONG $0x665dc1c4; BYTE $0xe2               // vpcmpgtd    ymm4, ymm4, ymm10
	LONG $0x397de3c4; WORD $0x01e0             // vextracti128    xmm0, ymm4, 1
	LONG $0xc06bd9c5                           // vpackssdw    xmm0, xmm4, xmm0
	LONG $0x6655c1c4; BYTE $0xe2               // vpcmpgtd    ymm4, ymm5, ymm10
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0x664dc1c4; BYTE $0xea               // vpcmpgtd    ymm5, ymm6, ymm10
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0x6645c1c4; BYTE $0xf2               // vpcmpgtd    ymm6, ymm7, ymm10
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5                           // vpackssdw    xmm6, xmm6, xmm7
	LONG $0x4c19c3c4; WORD $0x00c3             // vpblendvb    xmm0, xmm12, xmm11, xmm0
	LONG $0xc0df91c5                           // vpandn    xmm0, xmm13, xmm0
	LONG $0x4c19c3c4; WORD $0x40e3             // vpblendvb    xmm4, xmm12, xmm11, xmm4
	LONG $0xccdff1c5                           // vpandn    xmm1, xmm1, xmm4
	LONG $0x4c19c3c4; WORD $0x50e3             // vpblendvb    xmm4, xmm12, xmm11, xmm5
	LONG $0x4c19c3c4; WORD $0x60eb             // vpblendvb    xmm5, xmm12, xmm11, xmm6
	LONG $0xd4dfe9c5                           // vpandn    xmm2, xmm2, xmm4
	LONG $0xdddfe1c5                           // vpandn    xmm3, xmm3, xmm5
	LONG $0x7f7ac1c4; WORD $0x7804             // vmovdqu    oword [r8 + 2*rdi], xmm0
	LONG $0x7f7ac1c4; WORD $0x784c; BYTE $0x10 // vmovdqu    oword [r8 + 2*rdi + 16], xmm1
	LONG $0x7f7ac1c4; WORD $0x7854; BYTE $0x20 // vmovdqu    oword [r8 + 2*rdi + 32], xmm2
	LONG $0x7f7ac1c4; WORD $0x785c; BYTE $0x30 // vmovdqu    oword [r8 + 2*rdi + 48], xmm3
	LONG $0x20c78348                           // add    rdi, 32
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB4_678
	WORD $0x3948; BYTE $0xc6                   // cmp    rsi, rax
	JE   LBB4_1351

LBB4_680:
	LONG $0xc0eff9c5 // vpxor    xmm0, xmm0, xmm0

LBB4_681:
	LONG $0x0c6ef9c5; BYTE $0xb1 // vmovd    xmm1, dword [rcx + 4*rsi]
	LONG $0xcf7ef9c5             // vmovd    edi, xmm1
	WORD $0xd231                 // xor    edx, edx
	WORD $0xff85                 // test    edi, edi
	WORD $0x990f; BYTE $0xd2     // setns    dl
	LONG $0xc12ef8c5             // vucomiss    xmm0, xmm1
	LONG $0xff12548d             // lea    edx, [rdx + rdx - 1]
	LONG $0xd2440f41             // cmove    edx, r10d
	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
	LONG $0x01c68348             // add    rsi, 1
	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
	JNE  LBB4_681
	JMP  LBB4_1351

LBB4_688:
	WORD $0x8944; BYTE $0xd2     // mov    edx, r10d
	WORD $0xe283; BYTE $0xe0     // and    edx, -32
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0x763d41c4; BYTE $0xc0 // vpcmpeqd    ymm8, ymm8, ymm8
	LONG $0x4d6f79c5; BYTE $0x40 // vmovdqa    xmm9, oword 64[rbp] /* [rip + .LCPI4_11] */

LBB4_689:
	LONG $0x246ffec5; BYTE $0xb1               // vmovdqu    ymm4, yword [rcx + 4*rsi]
	LONG $0x6c6ffec5; WORD $0x20b1             // vmovdqu    ymm5, yword [rcx + 4*rsi + 32]
	LONG $0x746ffec5; WORD $0x40b1             // vmovdqu    ymm6, yword [rcx + 4*rsi + 64]
	LONG $0x7c6ffec5; WORD $0x60b1             // vmovdqu    ymm7, yword [rcx + 4*rsi + 96]
	LONG $0xd866ddc5                           // vpcmpgtd    ymm3, ymm4, ymm0
	LONG $0x397de3c4; WORD $0x01d9             // vextracti128    xmm1, ymm3, 1
	LONG $0xd16b61c5                           // vpackssdw    xmm10, xmm3, xmm1
	LONG $0xc866d5c5                           // vpcmpgtd    ymm1, ymm5, ymm0
	LONG $0x397de3c4; WORD $0x01ca             // vextracti128    xmm2, ymm1, 1
	LONG $0xda6b71c5                           // vpackssdw    xmm11, xmm1, xmm2
	LONG $0xd066cdc5                           // vpcmpgtd    ymm2, ymm6, ymm0
	LONG $0x397de3c4; WORD $0x01d3             // vextracti128    xmm3, ymm2, 1
	LONG $0xd36be9c5                           // vpackssdw    xmm2, xmm2, xmm3
	LONG $0xd866c5c5                           // vpcmpgtd    ymm3, ymm7, ymm0
	LONG $0x397de3c4; WORD $0x01d9             // vextracti128    xmm1, ymm3, 1
	LONG $0xc96be1c5                           // vpackssdw    xmm1, xmm3, xmm1
	LONG $0xd876ddc5                           // vpcmpeqd    ymm3, ymm4, ymm0
	LONG $0xdbefbdc5                           // vpxor    ymm3, ymm8, ymm3
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5                           // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xe076d5c5                           // vpcmpeqd    ymm4, ymm5, ymm0
	LONG $0xe4efbdc5                           // vpxor    ymm4, ymm8, ymm4
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe876cdc5                           // vpcmpeqd    ymm5, ymm6, ymm0
	LONG $0xedefbdc5                           // vpxor    ymm5, ymm8, ymm5
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xf076c5c5                           // vpcmpeqd    ymm6, ymm7, ymm0
	LONG $0xf6efbdc5                           // vpxor    ymm6, ymm8, ymm6
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5                           // vpackssdw    xmm6, xmm6, xmm7
	LONG $0x4c61c3c4; WORD $0xa0d9             // vpblendvb    xmm3, xmm3, xmm9, xmm10
	LONG $0x4c59c3c4; WORD $0xb0e1             // vpblendvb    xmm4, xmm4, xmm9, xmm11
	LONG $0x4c51c3c4; WORD $0x20d1             // vpblendvb    xmm2, xmm5, xmm9, xmm2
	LONG $0x4c49c3c4; WORD $0x10c9             // vpblendvb    xmm1, xmm6, xmm9, xmm1
	LONG $0x7f7ac1c4; WORD $0x701c             // vmovdqu    oword [r8 + 2*rsi], xmm3
	LONG $0x7f7ac1c4; WORD $0x7064; BYTE $0x10 // vmovdqu    oword [r8 + 2*rsi + 16], xmm4
	LONG $0x7f7ac1c4; WORD $0x7054; BYTE $0x20 // vmovdqu    oword [r8 + 2*rsi + 32], xmm2
	LONG $0x7f7ac1c4; WORD $0x704c; BYTE $0x30 // vmovdqu    oword [r8 + 2*rsi + 48], xmm1
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_689
	WORD $0x394c; BYTE $0xd2                   // cmp    rdx, r10
	JE   LBB4_1351

LBB4_691:
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_692:
	WORD $0x3c8b; BYTE $0x91     // mov    edi, dword [rcx + 4*rdx]
	WORD $0xc031                 // xor    eax, eax
	WORD $0xff85                 // test    edi, edi
	WORD $0x950f; BYTE $0xd0     // setne    al
	WORD $0xd8f7                 // neg    eax
	WORD $0xff85                 // test    edi, edi
	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
	LONG $0x04894166; BYTE $0x50 // mov    word [r8 + 2*rdx], ax
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
	JNE  LBB4_692
	JMP  LBB4_1351

LBB4_693:
	WORD $0x8944; BYTE $0xd2     // mov    edx, r10d
	WORD $0xe283; BYTE $0xe0     // and    edx, -32
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0x763d41c4; BYTE $0xc0 // vpcmpeqd    ymm8, ymm8, ymm8
	LONG $0x4d6f79c5; BYTE $0x40 // vmovdqa    xmm9, oword 64[rbp] /* [rip + .LCPI4_11] */

LBB4_694:
	LONG $0x246ffec5; BYTE $0xb1               // vmovdqu    ymm4, yword [rcx + 4*rsi]
	LONG $0x6c6ffec5; WORD $0x20b1             // vmovdqu    ymm5, yword [rcx + 4*rsi + 32]
	LONG $0x746ffec5; WORD $0x40b1             // vmovdqu    ymm6, yword [rcx + 4*rsi + 64]
	LONG $0x7c6ffec5; WORD $0x60b1             // vmovdqu    ymm7, yword [rcx + 4*rsi + 96]
	LONG $0xd866ddc5                           // vpcmpgtd    ymm3, ymm4, ymm0
	LONG $0x397de3c4; WORD $0x01d9             // vextracti128    xmm1, ymm3, 1
	LONG $0xd16b61c5                           // vpackssdw    xmm10, xmm3, xmm1
	LONG $0xc866d5c5                           // vpcmpgtd    ymm1, ymm5, ymm0
	LONG $0x397de3c4; WORD $0x01ca             // vextracti128    xmm2, ymm1, 1
	LONG $0xda6b71c5                           // vpackssdw    xmm11, xmm1, xmm2
	LONG $0xd066cdc5                           // vpcmpgtd    ymm2, ymm6, ymm0
	LONG $0x397de3c4; WORD $0x01d3             // vextracti128    xmm3, ymm2, 1
	LONG $0xd36be9c5                           // vpackssdw    xmm2, xmm2, xmm3
	LONG $0xd866c5c5                           // vpcmpgtd    ymm3, ymm7, ymm0
	LONG $0x397de3c4; WORD $0x01d9             // vextracti128    xmm1, ymm3, 1
	LONG $0xc96be1c5                           // vpackssdw    xmm1, xmm3, xmm1
	LONG $0xd876ddc5                           // vpcmpeqd    ymm3, ymm4, ymm0
	LONG $0xdbefbdc5                           // vpxor    ymm3, ymm8, ymm3
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5                           // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xe076d5c5                           // vpcmpeqd    ymm4, ymm5, ymm0
	LONG $0xe4efbdc5                           // vpxor    ymm4, ymm8, ymm4
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe876cdc5                           // vpcmpeqd    ymm5, ymm6, ymm0
	LONG $0xedefbdc5                           // vpxor    ymm5, ymm8, ymm5
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xf076c5c5                           // vpcmpeqd    ymm6, ymm7, ymm0
	LONG $0xf6efbdc5                           // vpxor    ymm6, ymm8, ymm6
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5                           // vpackssdw    xmm6, xmm6, xmm7
	LONG $0x4c61c3c4; WORD $0xa0d9             // vpblendvb    xmm3, xmm3, xmm9, xmm10
	LONG $0x4c59c3c4; WORD $0xb0e1             // vpblendvb    xmm4, xmm4, xmm9, xmm11
	LONG $0x4c51c3c4; WORD $0x20d1             // vpblendvb    xmm2, xmm5, xmm9, xmm2
	LONG $0x4c49c3c4; WORD $0x10c9             // vpblendvb    xmm1, xmm6, xmm9, xmm1
	LONG $0x7f7ac1c4; WORD $0x701c             // vmovdqu    oword [r8 + 2*rsi], xmm3
	LONG $0x7f7ac1c4; WORD $0x7064; BYTE $0x10 // vmovdqu    oword [r8 + 2*rsi + 16], xmm4
	LONG $0x7f7ac1c4; WORD $0x7054; BYTE $0x20 // vmovdqu    oword [r8 + 2*rsi + 32], xmm2
	LONG $0x7f7ac1c4; WORD $0x704c; BYTE $0x30 // vmovdqu    oword [r8 + 2*rsi + 48], xmm1
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_694
	WORD $0x394c; BYTE $0xd2                   // cmp    rdx, r10
	JE   LBB4_1351

LBB4_696:
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_697:
	WORD $0x3c8b; BYTE $0x91     // mov    edi, dword [rcx + 4*rdx]
	WORD $0xc031                 // xor    eax, eax
	WORD $0xff85                 // test    edi, edi
	WORD $0x950f; BYTE $0xd0     // setne    al
	WORD $0xd8f7                 // neg    eax
	WORD $0xff85                 // test    edi, edi
	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
	LONG $0x04894166; BYTE $0x50 // mov    word [r8 + 2*rdx], ax
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
	JNE  LBB4_697
	JMP  LBB4_1351

LBB4_698:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5               // vpcmpeqd    xmm1, xmm1, xmm1
	LONG $0x597de2c4; WORD $0x2855 // vpbroadcastq    ymm2, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_699:
	LONG $0x1c76f9c5; BYTE $0xb1               // vpcmpeqd    xmm3, xmm0, oword [rcx + 4*rsi]
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x357de2c4; BYTE $0xdb               // vpmovzxdq    ymm3, xmm3
	LONG $0x6476f9c5; WORD $0x10b1             // vpcmpeqd    xmm4, xmm0, oword [rcx + 4*rsi + 16]
	LONG $0xdadbe5c5                           // vpand    ymm3, ymm3, ymm2
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x357de2c4; BYTE $0xe4               // vpmovzxdq    ymm4, xmm4
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0x6c76f9c5; WORD $0x20b1             // vpcmpeqd    xmm5, xmm0, oword [rcx + 4*rsi + 32]
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x357de2c4; BYTE $0xed               // vpmovzxdq    ymm5, xmm5
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0x7476f9c5; WORD $0x30b1             // vpcmpeqd    xmm6, xmm0, oword [rcx + 4*rsi + 48]
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x357de2c4; BYTE $0xf6               // vpmovzxdq    ymm6, xmm6
	LONG $0xf2dbcdc5                           // vpand    ymm6, ymm6, ymm2
	LONG $0x7f7ec1c4; WORD $0xf01c             // vmovdqu    yword [r8 + 8*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0xf064; BYTE $0x20 // vmovdqu    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x7f7ec1c4; WORD $0xf06c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x7f7ec1c4; WORD $0xf074; BYTE $0x60 // vmovdqu    yword [r8 + 8*rsi + 96], ymm6
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_699
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_701:
	WORD $0xf631             // xor    esi, esi
	LONG $0x00913c83         // cmp    dword [rcx + 4*rdx], 0
	LONG $0xd6950f40         // setne    sil
	LONG $0xd0348949         // mov    qword [r8 + 8*rdx], rsi
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JNE  LBB4_701
	JMP  LBB4_1351

LBB4_702:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	QUAD $0x0000988d587de2c4; BYTE $0x00 // vpbroadcastd    ymm1, dword 152[rbp] /* [rip + .LCPI4_5] */

LBB4_703:
	LONG $0x1476fdc5; BYTE $0xb1               // vpcmpeqd    ymm2, ymm0, yword [rcx + 4*rsi]
	LONG $0xd1dfedc5                           // vpandn    ymm2, ymm2, ymm1
	LONG $0x5c76fdc5; WORD $0x20b1             // vpcmpeqd    ymm3, ymm0, yword [rcx + 4*rsi + 32]
	LONG $0xd9dfe5c5                           // vpandn    ymm3, ymm3, ymm1
	LONG $0x6476fdc5; WORD $0x40b1             // vpcmpeqd    ymm4, ymm0, yword [rcx + 4*rsi + 64]
	LONG $0x6c76fdc5; WORD $0x60b1             // vpcmpeqd    ymm5, ymm0, yword [rcx + 4*rsi + 96]
	LONG $0xe1dfddc5                           // vpandn    ymm4, ymm4, ymm1
	LONG $0xe9dfd5c5                           // vpandn    ymm5, ymm5, ymm1
	LONG $0x7f7ec1c4; WORD $0xb014             // vmovdqu    yword [r8 + 4*rsi], ymm2
	LONG $0x7f7ec1c4; WORD $0xb05c; BYTE $0x20 // vmovdqu    yword [r8 + 4*rsi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xb064; BYTE $0x40 // vmovdqu    yword [r8 + 4*rsi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xb06c; BYTE $0x60 // vmovdqu    yword [r8 + 4*rsi + 96], ymm5
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_703
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_705:
	QUAD $0x00000098856ef9c5 // vmovd    xmm0, dword 152[rbp] /* [rip + .LCPI4_5] */
	JMP  LBB4_707

LBB4_706:
	LONG $0x7e79c1c4; WORD $0x900c // vmovd    dword [r8 + 4*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JE   LBB4_1351

LBB4_707:
	LONG $0x00913c83 // cmp    dword [rcx + 4*rdx], 0
	LONG $0xc86ff9c5 // vmovdqa    xmm1, xmm0
	JNE  LBB4_706
	LONG $0xc9eff1c5 // vpxor    xmm1, xmm1, xmm1
	JMP  LBB4_706

LBB4_709:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xfc       // and    edx, -4
	LONG $0xfc728d48               // lea    rsi, [rdx - 4]
	WORD $0x8949; BYTE $0xf1       // mov    r9, rsi
	LONG $0x02e9c149               // shr    r9, 2
	LONG $0x01c18349               // add    r9, 1
	WORD $0x8548; BYTE $0xf6       // test    rsi, rsi
	JE   LBB4_1302
	WORD $0x894c; BYTE $0xcf       // mov    rdi, r9
	LONG $0xfee78348               // and    rdi, -2
	WORD $0xf748; BYTE $0xdf       // neg    rdi
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc057f9c5               // vxorpd    xmm0, xmm0, xmm0
	LONG $0x197de2c4; WORD $0x004d // vbroadcastsd    ymm1, qword 0[rbp] /* [rip + .LCPI4_0] */
	LONG $0x197de2c4; WORD $0x0855 // vbroadcastsd    ymm2, qword 8[rbp] /* [rip + .LCPI4_1] */

LBB4_711:
	LONG $0x1c10fdc5; BYTE $0xf1               // vmovupd    ymm3, yword [rcx + 8*rsi]
	LONG $0xe154e5c5                           // vandpd    ymm4, ymm3, ymm1
	LONG $0xe456edc5                           // vorpd    ymm4, ymm2, ymm4
	LONG $0x197de3c4; WORD $0x01e5             // vextractf128    xmm5, ymm4, 1
	LONG $0x2cfbe1c4; BYTE $0xdd               // vcvttsd2si    rbx, xmm5
	LONG $0x6ef9e1c4; BYTE $0xf3               // vmovq    xmm6, rbx
	LONG $0x0479e3c4; WORD $0x4eed             // vpermilps    xmm5, xmm5, 78
	LONG $0x2cfbe1c4; BYTE $0xdd               // vcvttsd2si    rbx, xmm5
	LONG $0x6ef9e1c4; BYTE $0xeb               // vmovq    xmm5, rbx
	LONG $0xed6cc9c5                           // vpunpcklqdq    xmm5, xmm6, xmm5
	LONG $0x2cfbe1c4; BYTE $0xdc               // vcvttsd2si    rbx, xmm4
	LONG $0x6ef9e1c4; BYTE $0xf3               // vmovq    xmm6, rbx
	LONG $0x0479e3c4; WORD $0x4ee4             // vpermilps    xmm4, xmm4, 78
	LONG $0x2cfbe1c4; BYTE $0xdc               // vcvttsd2si    rbx, xmm4
	LONG $0x6ef9e1c4; BYTE $0xe3               // vmovq    xmm4, rbx
	LONG $0xe46cc9c5                           // vpunpcklqdq    xmm4, xmm6, xmm4
	LONG $0x385de3c4; WORD $0x01e5             // vinserti128    ymm4, ymm4, xmm5, 1
	LONG $0xd8c2e5c5; BYTE $0x04               // vcmpneqpd    ymm3, ymm3, ymm0
	LONG $0xdc54e5c5                           // vandpd    ymm3, ymm3, ymm4
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x5c10fdc5; WORD $0x20f1             // vmovupd    ymm3, yword [rcx + 8*rsi + 32]
	LONG $0xe154e5c5                           // vandpd    ymm4, ymm3, ymm1
	LONG $0xe456edc5                           // vorpd    ymm4, ymm2, ymm4
	LONG $0x197de3c4; WORD $0x01e5             // vextractf128    xmm5, ymm4, 1
	LONG $0x2cfbe1c4; BYTE $0xdd               // vcvttsd2si    rbx, xmm5
	LONG $0x6ef9e1c4; BYTE $0xf3               // vmovq    xmm6, rbx
	LONG $0x0479e3c4; WORD $0x4eed             // vpermilps    xmm5, xmm5, 78
	LONG $0x2cfbe1c4; BYTE $0xdd               // vcvttsd2si    rbx, xmm5
	LONG $0x6ef9e1c4; BYTE $0xeb               // vmovq    xmm5, rbx
	LONG $0xed6cc9c5                           // vpunpcklqdq    xmm5, xmm6, xmm5
	LONG $0x2cfbe1c4; BYTE $0xdc               // vcvttsd2si    rbx, xmm4
	LONG $0x6ef9e1c4; BYTE $0xf3               // vmovq    xmm6, rbx
	LONG $0x0479e3c4; WORD $0x4ee4             // vpermilps    xmm4, xmm4, 78
	LONG $0x2cfbe1c4; BYTE $0xdc               // vcvttsd2si    rbx, xmm4
	LONG $0x6ef9e1c4; BYTE $0xe3               // vmovq    xmm4, rbx
	LONG $0xe46cc9c5                           // vpunpcklqdq    xmm4, xmm6, xmm4
	LONG $0x385de3c4; WORD $0x01e5             // vinserti128    ymm4, ymm4, xmm5, 1
	LONG $0xd8c2e5c5; BYTE $0x04               // vcmpneqpd    ymm3, ymm3, ymm0
	LONG $0xdc54e5c5                           // vandpd    ymm3, ymm3, ymm4
	LONG $0x117dc1c4; WORD $0xf05c; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm3
	LONG $0x08c68348                           // add    rsi, 8
	LONG $0x02c78348                           // add    rdi, 2
	JNE  LBB4_711
	JMP  LBB4_1303

LBB4_712:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xf0             // and    edx, -16
	WORD $0xf631                         // xor    esi, esi
	LONG $0x197de2c4; WORD $0x0845       // vbroadcastsd    ymm0, qword 8[rbp] /* [rip + .LCPI4_1] */
	LONG $0xd05afdc5                     // vcvtpd2ps    xmm2, ymm0
	LONG $0x573941c4; BYTE $0xc0         // vxorpd    xmm8, xmm8, xmm8
	QUAD $0x0000908d1879e2c4; BYTE $0x00 // vbroadcastss    xmm1, dword 144[rbp] /* [rip + .LCPI4_3] */
	LONG $0xd154e9c5                     // vandpd    xmm2, xmm2, xmm1

LBB4_713:
	LONG $0x1c10fdc5; BYTE $0xf1               // vmovupd    ymm3, yword [rcx + 8*rsi]
	LONG $0x6410fdc5; WORD $0x20f1             // vmovupd    ymm4, yword [rcx + 8*rsi + 32]
	LONG $0x6c10fdc5; WORD $0x40f1             // vmovupd    ymm5, yword [rcx + 8*rsi + 64]
	LONG $0x7410fdc5; WORD $0x60f1             // vmovupd    ymm6, yword [rcx + 8*rsi + 96]
	LONG $0xfbc2bdc5; BYTE $0x00               // vcmpeqpd    ymm7, ymm8, ymm3
	LONG $0x197de3c4; WORD $0x01f8             // vextractf128    xmm0, ymm7, 1
	LONG $0xc86b41c5                           // vpackssdw    xmm9, xmm7, xmm0
	LONG $0xfcc2bdc5; BYTE $0x00               // vcmpeqpd    ymm7, ymm8, ymm4
	LONG $0x197de3c4; WORD $0x01f8             // vextractf128    xmm0, ymm7, 1
	LONG $0xd06b41c5                           // vpackssdw    xmm10, xmm7, xmm0
	LONG $0xfdc2bdc5; BYTE $0x00               // vcmpeqpd    ymm7, ymm8, ymm5
	LONG $0x197de3c4; WORD $0x01f8             // vextractf128    xmm0, ymm7, 1
	LONG $0xd86b41c5                           // vpackssdw    xmm11, xmm7, xmm0
	LONG $0xfec2bdc5; BYTE $0x00               // vcmpeqpd    ymm7, ymm8, ymm6
	LONG $0x197de3c4; WORD $0x01f8             // vextractf128    xmm0, ymm7, 1
	LONG $0xc06bc1c5                           // vpackssdw    xmm0, xmm7, xmm0
	LONG $0xdb5afdc5                           // vcvtpd2ps    xmm3, ymm3
	LONG $0xdb55f1c5                           // vandnpd    xmm3, xmm1, xmm3
	LONG $0xdb56e9c5                           // vorpd    xmm3, xmm2, xmm3
	LONG $0xe45afdc5                           // vcvtpd2ps    xmm4, ymm4
	LONG $0xdbdfb1c5                           // vpandn    xmm3, xmm9, xmm3
	LONG $0xe455f1c5                           // vandnpd    xmm4, xmm1, xmm4
	LONG $0xe456e9c5                           // vorpd    xmm4, xmm2, xmm4
	LONG $0xe4dfa9c5                           // vpandn    xmm4, xmm10, xmm4
	LONG $0xed5afdc5                           // vcvtpd2ps    xmm5, ymm5
	LONG $0xed55f1c5                           // vandnpd    xmm5, xmm1, xmm5
	LONG $0xed56e9c5                           // vorpd    xmm5, xmm2, xmm5
	LONG $0xeddfa1c5                           // vpandn    xmm5, xmm11, xmm5
	LONG $0xf65afdc5                           // vcvtpd2ps    xmm6, ymm6
	LONG $0xf655f1c5                           // vandnpd    xmm6, xmm1, xmm6
	LONG $0xf656e9c5                           // vorpd    xmm6, xmm2, xmm6
	LONG $0xc6dff9c5                           // vpandn    xmm0, xmm0, xmm6
	LONG $0x7f7ac1c4; WORD $0xb01c             // vmovdqu    oword [r8 + 4*rsi], xmm3
	LONG $0x7f7ac1c4; WORD $0xb064; BYTE $0x10 // vmovdqu    oword [r8 + 4*rsi + 16], xmm4
	LONG $0x7f7ac1c4; WORD $0xb06c; BYTE $0x20 // vmovdqu    oword [r8 + 4*rsi + 32], xmm5
	LONG $0x7f7ac1c4; WORD $0xb044; BYTE $0x30 // vmovdqu    oword [r8 + 4*rsi + 48], xmm0
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_713
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_715:
	LONG $0xc057f9c5                     // vxorpd    xmm0, xmm0, xmm0
	QUAD $0x0000948d5879e2c4; BYTE $0x00 // vpbroadcastd    xmm1, dword 148[rbp] /* [rip + .LCPI4_4] */
	QUAD $0x000098955879e2c4; BYTE $0x00 // vpbroadcastd    xmm2, dword 152[rbp] /* [rip + .LCPI4_5] */
	JMP  LBB4_717

LBB4_716:
	LONG $0x7e79c1c4; WORD $0x901c // vmovd    dword [r8 + 4*rdx], xmm3
	LONG $0x01c28348               // add    rdx, 1
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JE   LBB4_1351

LBB4_717:
	LONG $0x2410fbc5; BYTE $0xd1 // vmovsd    xmm4, qword [rcx + 8*rdx]
	LONG $0xc42ef9c5             // vucomisd    xmm0, xmm4
	LONG $0xdbefe1c5             // vpxor    xmm3, xmm3, xmm3
	JE   LBB4_716
	LONG $0xdc5adbc5             // vcvtsd2ss    xmm3, xmm4, xmm4
	LONG $0xd9dbe1c5             // vpand    xmm3, xmm3, xmm1
	LONG $0xdbebe9c5             // vpor    xmm3, xmm2, xmm3
	JMP  LBB4_716

LBB4_728:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xf0             // and    edx, -16
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5                     // vpcmpeqd    ymm1, ymm1, ymm1
	QUAD $0x000098955879e2c4; BYTE $0x00 // vpbroadcastd    xmm2, dword 152[rbp] /* [rip + .LCPI4_5] */

LBB4_729:
	LONG $0x297de2c4; WORD $0xf11c             // vpcmpeqq    ymm3, ymm0, yword [rcx + 8*rsi]
	LONG $0xd9efe5c5                           // vpxor    ymm3, ymm3, ymm1
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5                           // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xdadbe1c5                           // vpand    xmm3, xmm3, xmm2
	LONG $0x297de2c4; WORD $0xf164; BYTE $0x20 // vpcmpeqq    ymm4, ymm0, yword [rcx + 8*rsi + 32]
	LONG $0xe1efddc5                           // vpxor    ymm4, ymm4, ymm1
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe2dbd9c5                           // vpand    xmm4, xmm4, xmm2
	LONG $0x297de2c4; WORD $0xf16c; BYTE $0x40 // vpcmpeqq    ymm5, ymm0, yword [rcx + 8*rsi + 64]
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xeadbd1c5                           // vpand    xmm5, xmm5, xmm2
	LONG $0x297de2c4; WORD $0xf174; BYTE $0x60 // vpcmpeqq    ymm6, ymm0, yword [rcx + 8*rsi + 96]
	LONG $0xf1efcdc5                           // vpxor    ymm6, ymm6, ymm1
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5                           // vpackssdw    xmm6, xmm6, xmm7
	LONG $0xf2dbc9c5                           // vpand    xmm6, xmm6, xmm2
	LONG $0x7f7ac1c4; WORD $0xb01c             // vmovdqu    oword [r8 + 4*rsi], xmm3
	LONG $0x7f7ac1c4; WORD $0xb064; BYTE $0x10 // vmovdqu    oword [r8 + 4*rsi + 16], xmm4
	LONG $0x7f7ac1c4; WORD $0xb06c; BYTE $0x20 // vmovdqu    oword [r8 + 4*rsi + 32], xmm5
	LONG $0x7f7ac1c4; WORD $0xb074; BYTE $0x30 // vmovdqu    oword [r8 + 4*rsi + 48], xmm6
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_729
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_731:
	QUAD $0x00000098856ef9c5 // vmovd    xmm0, dword 152[rbp] /* [rip + .LCPI4_5] */
	JMP  LBB4_733

LBB4_732:
	LONG $0x7e79c1c4; WORD $0x900c // vmovd    dword [r8 + 4*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JE   LBB4_1351

LBB4_733:
	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
	LONG $0xc86ff9c5             // vmovdqa    xmm1, xmm0
	JNE  LBB4_732
	LONG $0xc9eff1c5             // vpxor    xmm1, xmm1, xmm1
	JMP  LBB4_732

LBB4_735:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5               // vpcmpeqd    xmm1, xmm1, xmm1
	LONG $0x597de2c4; WORD $0x2855 // vpbroadcastq    ymm2, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_736:
	LONG $0x1c7efac5; BYTE $0x71               // vmovq    xmm3, qword [rcx + 2*rsi]
	LONG $0x647efac5; WORD $0x0871             // vmovq    xmm4, qword [rcx + 2*rsi + 8]
	LONG $0x6c7efac5; WORD $0x1071             // vmovq    xmm5, qword [rcx + 2*rsi + 16]
	LONG $0x747efac5; WORD $0x1871             // vmovq    xmm6, qword [rcx + 2*rsi + 24]
	LONG $0xd875e1c5                           // vpcmpeqw    xmm3, xmm3, xmm0
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x347de2c4; BYTE $0xdb               // vpmovzxwq    ymm3, xmm3
	LONG $0xdadbe5c5                           // vpand    ymm3, ymm3, ymm2
	LONG $0xe075d9c5                           // vpcmpeqw    xmm4, xmm4, xmm0
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x347de2c4; BYTE $0xe4               // vpmovzxwq    ymm4, xmm4
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe875d1c5                           // vpcmpeqw    xmm5, xmm5, xmm0
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x347de2c4; BYTE $0xed               // vpmovzxwq    ymm5, xmm5
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xf075c9c5                           // vpcmpeqw    xmm6, xmm6, xmm0
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x347de2c4; BYTE $0xf6               // vpmovzxwq    ymm6, xmm6
	LONG $0xf2dbcdc5                           // vpand    ymm6, ymm6, ymm2
	LONG $0x7f7ec1c4; WORD $0xf01c             // vmovdqu    yword [r8 + 8*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0xf064; BYTE $0x20 // vmovdqu    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x7f7ec1c4; WORD $0xf06c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x7f7ec1c4; WORD $0xf074; BYTE $0x60 // vmovdqu    yword [r8 + 8*rsi + 96], ymm6
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_736
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_738:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0xd0348949             // mov    qword [r8 + 8*rdx], rsi
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_738
	JMP  LBB4_1351

LBB4_739:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5                     // vpcmpeqd    xmm1, xmm1, xmm1
	QUAD $0x00009c95587de2c4; BYTE $0x00 // vpbroadcastd    ymm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_740:
	LONG $0x1c75f9c5; BYTE $0x71               // vpcmpeqw    xmm3, xmm0, oword [rcx + 2*rsi]
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x337de2c4; BYTE $0xdb               // vpmovzxwd    ymm3, xmm3
	LONG $0xdadbe5c5                           // vpand    ymm3, ymm3, ymm2
	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
	LONG $0x6475f9c5; WORD $0x1071             // vpcmpeqw    xmm4, xmm0, oword [rcx + 2*rsi + 16]
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x337de2c4; BYTE $0xe4               // vpmovzxwd    ymm4, xmm4
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe45bfcc5                           // vcvtdq2ps    ymm4, ymm4
	LONG $0x6c75f9c5; WORD $0x2071             // vpcmpeqw    xmm5, xmm0, oword [rcx + 2*rsi + 32]
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x337de2c4; BYTE $0xed               // vpmovzxwd    ymm5, xmm5
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xed5bfcc5                           // vcvtdq2ps    ymm5, ymm5
	LONG $0x7475f9c5; WORD $0x3071             // vpcmpeqw    xmm6, xmm0, oword [rcx + 2*rsi + 48]
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x337de2c4; BYTE $0xf6               // vpmovzxwd    ymm6, xmm6
	LONG $0xf2dbcdc5                           // vpand    ymm6, ymm6, ymm2
	LONG $0xf65bfcc5                           // vcvtdq2ps    ymm6, ymm6
	LONG $0x117cc1c4; WORD $0xb01c             // vmovups    yword [r8 + 4*rsi], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x117cc1c4; WORD $0xb06c; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm5
	LONG $0x117cc1c4; WORD $0xb074; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm6
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_740
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_742:
	QUAD $0x00000098856ef9c5 // vmovd    xmm0, dword 152[rbp] /* [rip + .LCPI4_5] */
	JMP  LBB4_744

LBB4_743:
	LONG $0x7e79c1c4; WORD $0x900c // vmovd    dword [r8 + 4*rdx], xmm1
	LONG $0x01c28348               // add    rdx, 1
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JE   LBB4_1351

LBB4_744:
	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
	LONG $0xc86ff9c5             // vmovdqa    xmm1, xmm0
	JNE  LBB4_743
	LONG $0xc9eff1c5             // vpxor    xmm1, xmm1, xmm1
	JMP  LBB4_743

LBB4_746:
	WORD $0x8944; BYTE $0xd2       // mov    edx, r10d
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0   // vpcmpeqd    xmm8, xmm8, xmm8
	LONG $0x197de2c4; WORD $0x2855 // vbroadcastsd    ymm2, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_747:
	LONG $0x1c7efac5; BYTE $0x71               // vmovq    xmm3, qword [rcx + 2*rsi]
	LONG $0x647efac5; WORD $0x0871             // vmovq    xmm4, qword [rcx + 2*rsi + 8]
	LONG $0x6c7efac5; WORD $0x1071             // vmovq    xmm5, qword [rcx + 2*rsi + 16]
	LONG $0x747efac5; WORD $0x1871             // vmovq    xmm6, qword [rcx + 2*rsi + 24]
	LONG $0xf865e1c5                           // vpcmpgtw    xmm7, xmm3, xmm0
	LONG $0x247d62c4; BYTE $0xcf               // vpmovsxwq    ymm9, xmm7
	LONG $0xc865d9c5                           // vpcmpgtw    xmm1, xmm4, xmm0
	LONG $0x247d62c4; BYTE $0xd1               // vpmovsxwq    ymm10, xmm1
	LONG $0xf865d1c5                           // vpcmpgtw    xmm7, xmm5, xmm0
	LONG $0x247de2c4; BYTE $0xff               // vpmovsxwq    ymm7, xmm7
	LONG $0xc865c9c5                           // vpcmpgtw    xmm1, xmm6, xmm0
	LONG $0x247de2c4; BYTE $0xc9               // vpmovsxwq    ymm1, xmm1
	LONG $0xd875e1c5                           // vpcmpeqw    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x247de2c4; BYTE $0xdb               // vpmovsxwq    ymm3, xmm3
	LONG $0xe075d9c5                           // vpcmpeqw    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x247de2c4; BYTE $0xe4               // vpmovsxwq    ymm4, xmm4
	LONG $0xe875d1c5                           // vpcmpeqw    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x247de2c4; BYTE $0xed               // vpmovsxwq    ymm5, xmm5
	LONG $0xf075c9c5                           // vpcmpeqw    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x247de2c4; BYTE $0xf6               // vpmovsxwq    ymm6, xmm6
	LONG $0x4b65e3c4; WORD $0x90da             // vblendvpd    ymm3, ymm3, ymm2, ymm9
	LONG $0x4b5de3c4; WORD $0xa0e2             // vblendvpd    ymm4, ymm4, ymm2, ymm10
	LONG $0x4b55e3c4; WORD $0x70ea             // vblendvpd    ymm5, ymm5, ymm2, ymm7
	LONG $0x4b4de3c4; WORD $0x10ca             // vblendvpd    ymm1, ymm6, ymm2, ymm1
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x117dc1c4; WORD $0xf04c; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm1
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_747
	WORD $0x394c; BYTE $0xd2                   // cmp    rdx, r10
	JE   LBB4_1351

LBB4_749:
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_750:
	LONG $0x513cb70f         // movzx    edi, word [rcx + 2*rdx]
	WORD $0xc031             // xor    eax, eax
	WORD $0x8566; BYTE $0xff // test    di, di
	WORD $0x950f; BYTE $0xd0 // setne    al
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0x8566; BYTE $0xff // test    di, di
	LONG $0xc64f0f48         // cmovg    rax, rsi
	LONG $0xd0048949         // mov    qword [r8 + 8*rdx], rax
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
	JNE  LBB4_750
	JMP  LBB4_1351

LBB4_751:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0         // vpcmpeqd    xmm8, xmm8, xmm8
	QUAD $0x00009895187de2c4; BYTE $0x00 // vbroadcastss    ymm2, dword 152[rbp] /* [rip + .LCPI4_5] */

LBB4_752:
	LONG $0x1c6ffac5; BYTE $0x71               // vmovdqu    xmm3, oword [rcx + 2*rsi]
	LONG $0x646ffac5; WORD $0x1071             // vmovdqu    xmm4, oword [rcx + 2*rsi + 16]
	LONG $0x6c6ffac5; WORD $0x2071             // vmovdqu    xmm5, oword [rcx + 2*rsi + 32]
	LONG $0x746ffac5; WORD $0x3071             // vmovdqu    xmm6, oword [rcx + 2*rsi + 48]
	LONG $0xf865e1c5                           // vpcmpgtw    xmm7, xmm3, xmm0
	LONG $0x237d62c4; BYTE $0xcf               // vpmovsxwd    ymm9, xmm7
	LONG $0xc865d9c5                           // vpcmpgtw    xmm1, xmm4, xmm0
	LONG $0x237d62c4; BYTE $0xd1               // vpmovsxwd    ymm10, xmm1
	LONG $0xf865d1c5                           // vpcmpgtw    xmm7, xmm5, xmm0
	LONG $0x237de2c4; BYTE $0xff               // vpmovsxwd    ymm7, xmm7
	LONG $0xc865c9c5                           // vpcmpgtw    xmm1, xmm6, xmm0
	LONG $0x237de2c4; BYTE $0xc9               // vpmovsxwd    ymm1, xmm1
	LONG $0xd875e1c5                           // vpcmpeqw    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x237de2c4; BYTE $0xdb               // vpmovsxwd    ymm3, xmm3
	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
	LONG $0xe075d9c5                           // vpcmpeqw    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x237de2c4; BYTE $0xe4               // vpmovsxwd    ymm4, xmm4
	LONG $0xe45bfcc5                           // vcvtdq2ps    ymm4, ymm4
	LONG $0xe875d1c5                           // vpcmpeqw    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x237de2c4; BYTE $0xed               // vpmovsxwd    ymm5, xmm5
	LONG $0xed5bfcc5                           // vcvtdq2ps    ymm5, ymm5
	LONG $0xf075c9c5                           // vpcmpeqw    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x237de2c4; BYTE $0xf6               // vpmovsxwd    ymm6, xmm6
	LONG $0xf65bfcc5                           // vcvtdq2ps    ymm6, ymm6
	LONG $0x4a65e3c4; WORD $0x90da             // vblendvps    ymm3, ymm3, ymm2, ymm9
	LONG $0x4a5de3c4; WORD $0xa0e2             // vblendvps    ymm4, ymm4, ymm2, ymm10
	LONG $0x4a55e3c4; WORD $0x70ea             // vblendvps    ymm5, ymm5, ymm2, ymm7
	LONG $0x4a4de3c4; WORD $0x10ca             // vblendvps    ymm1, ymm6, ymm2, ymm1
	LONG $0x117cc1c4; WORD $0xb01c             // vmovups    yword [r8 + 4*rsi], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x117cc1c4; WORD $0xb06c; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm5
	LONG $0x117cc1c4; WORD $0xb04c; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm1
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_752
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_754:
	QUAD $0x000000a88510fac5 // vmovss    xmm0, dword 168[rbp] /* [rip + .LCPI4_14] */
	QUAD $0x000000988d10fac5 // vmovss    xmm1, dword 152[rbp] /* [rip + .LCPI4_5] */
	JMP  LBB4_756

LBB4_755:
	LONG $0x117ac1c4; WORD $0x901c // vmovss    dword [r8 + 4*rdx], xmm3
	LONG $0x01c28348               // add    rdx, 1
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JE   LBB4_1351

LBB4_756:
	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
	LONG $0xd028f8c5             // vmovaps    xmm2, xmm0
	JNE  LBB4_758
	LONG $0xd257e8c5             // vxorps    xmm2, xmm2, xmm2

LBB4_758:
	LONG $0xd928f8c5 // vmovaps    xmm3, xmm1
	JG   LBB4_755
	LONG $0xda28f8c5 // vmovaps    xmm3, xmm2
	JMP  LBB4_755

LBB4_763:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xf0             // and    edx, -16
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0x763541c4; BYTE $0xc9         // vpcmpeqd    ymm9, ymm9, ymm9
	QUAD $0x000098951879e2c4; BYTE $0x00 // vbroadcastss    xmm2, dword 152[rbp] /* [rip + .LCPI4_5] */

LBB4_764:
	LONG $0x346ffec5; BYTE $0xf1               // vmovdqu    ymm6, yword [rcx + 8*rsi]
	LONG $0x7c6ffec5; WORD $0x20f1             // vmovdqu    ymm7, yword [rcx + 8*rsi + 32]
	LONG $0x446f7ec5; WORD $0x40f1             // vmovdqu    ymm8, yword [rcx + 8*rsi + 64]
	LONG $0x646ffec5; WORD $0x60f1             // vmovdqu    ymm4, yword [rcx + 8*rsi + 96]
	LONG $0x374de2c4; BYTE $0xd8               // vpcmpgtq    ymm3, ymm6, ymm0
	LONG $0x397de3c4; WORD $0x01dd             // vextracti128    xmm5, ymm3, 1
	LONG $0xd56b61c5                           // vpackssdw    xmm10, xmm3, xmm5
	LONG $0x3745e2c4; BYTE $0xe8               // vpcmpgtq    ymm5, ymm7, ymm0
	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
	LONG $0xd96b51c5                           // vpackssdw    xmm11, xmm5, xmm1
	LONG $0x373de2c4; BYTE $0xc8               // vpcmpgtq    ymm1, ymm8, ymm0
	LONG $0x397de3c4; WORD $0x01cb             // vextracti128    xmm3, ymm1, 1
	LONG $0xe36b71c5                           // vpackssdw    xmm12, xmm1, xmm3
	LONG $0x375de2c4; BYTE $0xd8               // vpcmpgtq    ymm3, ymm4, ymm0
	LONG $0x397de3c4; WORD $0x01dd             // vextracti128    xmm5, ymm3, 1
	LONG $0xdd6be1c5                           // vpackssdw    xmm3, xmm3, xmm5
	LONG $0x294de2c4; BYTE $0xe8               // vpcmpeqq    ymm5, ymm6, ymm0
	LONG $0xedefb5c5                           // vpxor    ymm5, ymm9, ymm5
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xed5bf8c5                           // vcvtdq2ps    xmm5, xmm5
	LONG $0x2945e2c4; BYTE $0xf0               // vpcmpeqq    ymm6, ymm7, ymm0
	LONG $0xf6efb5c5                           // vpxor    ymm6, ymm9, ymm6
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5                           // vpackssdw    xmm6, xmm6, xmm7
	LONG $0xf65bf8c5                           // vcvtdq2ps    xmm6, xmm6
	LONG $0x293de2c4; BYTE $0xf8               // vpcmpeqq    ymm7, ymm8, ymm0
	LONG $0xffefb5c5                           // vpxor    ymm7, ymm9, ymm7
	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
	LONG $0xc96bc1c5                           // vpackssdw    xmm1, xmm7, xmm1
	LONG $0xc95bf8c5                           // vcvtdq2ps    xmm1, xmm1
	LONG $0x295de2c4; BYTE $0xe0               // vpcmpeqq    ymm4, ymm4, ymm0
	LONG $0xe4efb5c5                           // vpxor    ymm4, ymm9, ymm4
	LONG $0x397de3c4; WORD $0x01e7             // vextracti128    xmm7, ymm4, 1
	LONG $0xe76bd9c5                           // vpackssdw    xmm4, xmm4, xmm7
	LONG $0xe45bf8c5                           // vcvtdq2ps    xmm4, xmm4
	LONG $0x4a51e3c4; WORD $0xa0ea             // vblendvps    xmm5, xmm5, xmm2, xmm10
	LONG $0x4a49e3c4; WORD $0xb0f2             // vblendvps    xmm6, xmm6, xmm2, xmm11
	LONG $0x4a71e3c4; WORD $0xc0ca             // vblendvps    xmm1, xmm1, xmm2, xmm12
	LONG $0x4a59e3c4; WORD $0x30da             // vblendvps    xmm3, xmm4, xmm2, xmm3
	LONG $0x1178c1c4; WORD $0xb02c             // vmovups    oword [r8 + 4*rsi], xmm5
	LONG $0x1178c1c4; WORD $0xb074; BYTE $0x10 // vmovups    oword [r8 + 4*rsi + 16], xmm6
	LONG $0x1178c1c4; WORD $0xb04c; BYTE $0x20 // vmovups    oword [r8 + 4*rsi + 32], xmm1
	LONG $0x1178c1c4; WORD $0xb05c; BYTE $0x30 // vmovups    oword [r8 + 4*rsi + 48], xmm3
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_764
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_766:
	QUAD $0x000000a88510fac5 // vmovss    xmm0, dword 168[rbp] /* [rip + .LCPI4_14] */
	QUAD $0x000000988d10fac5 // vmovss    xmm1, dword 152[rbp] /* [rip + .LCPI4_5] */
	JMP  LBB4_768

LBB4_767:
	LONG $0x117ac1c4; WORD $0x901c // vmovss    dword [r8 + 4*rdx], xmm3
	LONG $0x01c28348               // add    rdx, 1
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JE   LBB4_1351

LBB4_768:
	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
	LONG $0xd028f8c5             // vmovaps    xmm2, xmm0
	JNE  LBB4_770
	LONG $0xd257e8c5             // vxorps    xmm2, xmm2, xmm2

LBB4_770:
	LONG $0xd928f8c5 // vmovaps    xmm3, xmm1
	JG   LBB4_767
	LONG $0xda28f8c5 // vmovaps    xmm3, xmm2
	JMP  LBB4_767

LBB4_772:
	WORD $0x8944; BYTE $0xd2             // mov    edx, r10d
	WORD $0xe283; BYTE $0xfc             // and    edx, -4
	LONG $0xfc728d48                     // lea    rsi, [rdx - 4]
	WORD $0x8949; BYTE $0xf1             // mov    r9, rsi
	LONG $0x02e9c149                     // shr    r9, 2
	LONG $0x01c18349                     // add    r9, 1
	WORD $0x8548; BYTE $0xf6             // test    rsi, rsi
	JE   LBB4_1308
	WORD $0x894c; BYTE $0xcf             // mov    rdi, r9
	LONG $0xfee78348                     // and    rdi, -2
	WORD $0xf748; BYTE $0xdf             // neg    rdi
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc057f8c5                     // vxorps    xmm0, xmm0, xmm0
	QUAD $0x00009c8d5879e2c4; BYTE $0x00 // vpbroadcastd    xmm1, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_774:
	LONG $0x1410f8c5; BYTE $0xb1               // vmovups    xmm2, oword [rcx + 4*rsi]
	LONG $0xd8c2e8c5; BYTE $0x00               // vcmpeqps    xmm3, xmm2, xmm0
	LONG $0x257de2c4; BYTE $0xdb               // vpmovsxdq    ymm3, xmm3
	LONG $0xe272e9c5; BYTE $0x1f               // vpsrad    xmm2, xmm2, 31
	LONG $0xd1ebe9c5                           // vpor    xmm2, xmm2, xmm1
	LONG $0xd25bf8c5                           // vcvtdq2ps    xmm2, xmm2
	LONG $0x0479e3c4; WORD $0xe7e2             // vpermilps    xmm4, xmm2, 231
	LONG $0x2cfae1c4; BYTE $0xc4               // vcvttss2si    rax, xmm4
	LONG $0x6ef9e1c4; BYTE $0xe0               // vmovq    xmm4, rax
	LONG $0x0579e3c4; WORD $0x01ea             // vpermilpd    xmm5, xmm2, 1
	LONG $0x2cfae1c4; BYTE $0xc5               // vcvttss2si    rax, xmm5
	LONG $0x6ef9e1c4; BYTE $0xe8               // vmovq    xmm5, rax
	LONG $0xe46cd1c5                           // vpunpcklqdq    xmm4, xmm5, xmm4
	LONG $0x2cfae1c4; BYTE $0xc2               // vcvttss2si    rax, xmm2
	LONG $0x6ef9e1c4; BYTE $0xe8               // vmovq    xmm5, rax
	LONG $0xd216fac5                           // vmovshdup    xmm2, xmm2
	LONG $0x2cfae1c4; BYTE $0xc2               // vcvttss2si    rax, xmm2
	LONG $0x6ef9e1c4; BYTE $0xd0               // vmovq    xmm2, rax
	LONG $0xd26cd1c5                           // vpunpcklqdq    xmm2, xmm5, xmm2
	LONG $0x386de3c4; WORD $0x01d4             // vinserti128    ymm2, ymm2, xmm4, 1
	LONG $0xd2dfe5c5                           // vpandn    ymm2, ymm3, ymm2
	LONG $0x7f7ec1c4; WORD $0xf014             // vmovdqu    yword [r8 + 8*rsi], ymm2
	LONG $0x546ffac5; WORD $0x10b1             // vmovdqu    xmm2, oword [rcx + 4*rsi + 16]
	LONG $0xe272e1c5; BYTE $0x1f               // vpsrad    xmm3, xmm2, 31
	LONG $0xd9ebe1c5                           // vpor    xmm3, xmm3, xmm1
	LONG $0xdb5bf8c5                           // vcvtdq2ps    xmm3, xmm3
	LONG $0x0479e3c4; WORD $0xe7e3             // vpermilps    xmm4, xmm3, 231
	LONG $0x2cfae1c4; BYTE $0xc4               // vcvttss2si    rax, xmm4
	LONG $0x0579e3c4; WORD $0x01e3             // vpermilpd    xmm4, xmm3, 1
	LONG $0x2cfa61c4; BYTE $0xdc               // vcvttss2si    r11, xmm4
	LONG $0x2cfae1c4; BYTE $0xdb               // vcvttss2si    rbx, xmm3
	LONG $0x6ef9e1c4; BYTE $0xe0               // vmovq    xmm4, rax
	LONG $0xdb16fac5                           // vmovshdup    xmm3, xmm3
	LONG $0x2cfae1c4; BYTE $0xc3               // vcvttss2si    rax, xmm3
	LONG $0x6ef9c1c4; BYTE $0xdb               // vmovq    xmm3, r11
	LONG $0x6ef9e1c4; BYTE $0xeb               // vmovq    xmm5, rbx
	LONG $0xd0c2e8c5; BYTE $0x00               // vcmpeqps    xmm2, xmm2, xmm0
	LONG $0x257de2c4; BYTE $0xd2               // vpmovsxdq    ymm2, xmm2
	LONG $0xdc6ce1c5                           // vpunpcklqdq    xmm3, xmm3, xmm4
	LONG $0x6ef9e1c4; BYTE $0xe0               // vmovq    xmm4, rax
	LONG $0xe46cd1c5                           // vpunpcklqdq    xmm4, xmm5, xmm4
	LONG $0x385de3c4; WORD $0x01db             // vinserti128    ymm3, ymm4, xmm3, 1
	LONG $0xd3dfedc5                           // vpandn    ymm2, ymm2, ymm3
	LONG $0x7f7ec1c4; WORD $0xf054; BYTE $0x20 // vmovdqu    yword [r8 + 8*rsi + 32], ymm2
	LONG $0x08c68348                           // add    rsi, 8
	LONG $0x02c78348                           // add    rdi, 2
	JNE  LBB4_774
	JMP  LBB4_1309

LBB4_784:
	WORD $0x8944; BYTE $0xd2       // mov    edx, r10d
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0   // vpcmpeqd    xmm8, xmm8, xmm8
	LONG $0x197de2c4; WORD $0x2855 // vbroadcastsd    ymm2, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_785:
	LONG $0x1c6ffac5; BYTE $0xb1               // vmovdqu    xmm3, oword [rcx + 4*rsi]
	LONG $0x646ffac5; WORD $0x10b1             // vmovdqu    xmm4, oword [rcx + 4*rsi + 16]
	LONG $0x6c6ffac5; WORD $0x20b1             // vmovdqu    xmm5, oword [rcx + 4*rsi + 32]
	LONG $0x746ffac5; WORD $0x30b1             // vmovdqu    xmm6, oword [rcx + 4*rsi + 48]
	LONG $0xf866e1c5                           // vpcmpgtd    xmm7, xmm3, xmm0
	LONG $0x257d62c4; BYTE $0xcf               // vpmovsxdq    ymm9, xmm7
	LONG $0xc866d9c5                           // vpcmpgtd    xmm1, xmm4, xmm0
	LONG $0x257d62c4; BYTE $0xd1               // vpmovsxdq    ymm10, xmm1
	LONG $0xf866d1c5                           // vpcmpgtd    xmm7, xmm5, xmm0
	LONG $0x257de2c4; BYTE $0xff               // vpmovsxdq    ymm7, xmm7
	LONG $0xc866c9c5                           // vpcmpgtd    xmm1, xmm6, xmm0
	LONG $0x257de2c4; BYTE $0xc9               // vpmovsxdq    ymm1, xmm1
	LONG $0xd876e1c5                           // vpcmpeqd    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x257de2c4; BYTE $0xdb               // vpmovsxdq    ymm3, xmm3
	LONG $0xe076d9c5                           // vpcmpeqd    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x257de2c4; BYTE $0xe4               // vpmovsxdq    ymm4, xmm4
	LONG $0xe876d1c5                           // vpcmpeqd    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x257de2c4; BYTE $0xed               // vpmovsxdq    ymm5, xmm5
	LONG $0xf076c9c5                           // vpcmpeqd    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x257de2c4; BYTE $0xf6               // vpmovsxdq    ymm6, xmm6
	LONG $0x4b65e3c4; WORD $0x90da             // vblendvpd    ymm3, ymm3, ymm2, ymm9
	LONG $0x4b5de3c4; WORD $0xa0e2             // vblendvpd    ymm4, ymm4, ymm2, ymm10
	LONG $0x4b55e3c4; WORD $0x70ea             // vblendvpd    ymm5, ymm5, ymm2, ymm7
	LONG $0x4b4de3c4; WORD $0x10ca             // vblendvpd    ymm1, ymm6, ymm2, ymm1
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x117dc1c4; WORD $0xf04c; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm1
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_785
	WORD $0x394c; BYTE $0xd2                   // cmp    rdx, r10
	JE   LBB4_1351

LBB4_787:
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_788:
	WORD $0x3c8b; BYTE $0x91 // mov    edi, dword [rcx + 4*rdx]
	WORD $0xc031             // xor    eax, eax
	WORD $0xff85             // test    edi, edi
	WORD $0x950f; BYTE $0xd0 // setne    al
	WORD $0xf748; BYTE $0xd8 // neg    rax
	WORD $0xff85             // test    edi, edi
	LONG $0xc64f0f48         // cmovg    rax, rsi
	LONG $0xd0048949         // mov    qword [r8 + 8*rdx], rax
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
	JNE  LBB4_788
	JMP  LBB4_1351

LBB4_789:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5                     // vpcmpeqd    ymm1, ymm1, ymm1
	QUAD $0x00009895187de2c4; BYTE $0x00 // vbroadcastss    ymm2, dword 152[rbp] /* [rip + .LCPI4_5] */

LBB4_790:
	LONG $0x1c6ffec5; BYTE $0xb1               // vmovdqu    ymm3, yword [rcx + 4*rsi]
	LONG $0x646ffec5; WORD $0x20b1             // vmovdqu    ymm4, yword [rcx + 4*rsi + 32]
	LONG $0x6c6ffec5; WORD $0x40b1             // vmovdqu    ymm5, yword [rcx + 4*rsi + 64]
	LONG $0x746ffec5; WORD $0x60b1             // vmovdqu    ymm6, yword [rcx + 4*rsi + 96]
	LONG $0xf866e5c5                           // vpcmpgtd    ymm7, ymm3, ymm0
	LONG $0xc0665dc5                           // vpcmpgtd    ymm8, ymm4, ymm0
	LONG $0xc86655c5                           // vpcmpgtd    ymm9, ymm5, ymm0
	LONG $0xd0664dc5                           // vpcmpgtd    ymm10, ymm6, ymm0
	LONG $0xd876e5c5                           // vpcmpeqd    ymm3, ymm3, ymm0
	LONG $0xd9efe5c5                           // vpxor    ymm3, ymm3, ymm1
	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
	LONG $0xe076ddc5                           // vpcmpeqd    ymm4, ymm4, ymm0
	LONG $0xe1efddc5                           // vpxor    ymm4, ymm4, ymm1
	LONG $0xe45bfcc5                           // vcvtdq2ps    ymm4, ymm4
	LONG $0xe876d5c5                           // vpcmpeqd    ymm5, ymm5, ymm0
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0xed5bfcc5                           // vcvtdq2ps    ymm5, ymm5
	LONG $0xf076cdc5                           // vpcmpeqd    ymm6, ymm6, ymm0
	LONG $0xf1efcdc5                           // vpxor    ymm6, ymm6, ymm1
	LONG $0xf65bfcc5                           // vcvtdq2ps    ymm6, ymm6
	LONG $0x4a65e3c4; WORD $0x70da             // vblendvps    ymm3, ymm3, ymm2, ymm7
	LONG $0x4a5de3c4; WORD $0x80e2             // vblendvps    ymm4, ymm4, ymm2, ymm8
	LONG $0x4a55e3c4; WORD $0x90ea             // vblendvps    ymm5, ymm5, ymm2, ymm9
	LONG $0x4a4de3c4; WORD $0xa0f2             // vblendvps    ymm6, ymm6, ymm2, ymm10
	LONG $0x117cc1c4; WORD $0xb01c             // vmovups    yword [r8 + 4*rsi], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x117cc1c4; WORD $0xb06c; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm5
	LONG $0x117cc1c4; WORD $0xb074; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm6
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_790
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_792:
	QUAD $0x000000a88510fac5 // vmovss    xmm0, dword 168[rbp] /* [rip + .LCPI4_14] */
	QUAD $0x000000988d10fac5 // vmovss    xmm1, dword 152[rbp] /* [rip + .LCPI4_5] */
	JMP  LBB4_794

LBB4_793:
	LONG $0x117ac1c4; WORD $0x901c // vmovss    dword [r8 + 4*rdx], xmm3
	LONG $0x01c28348               // add    rdx, 1
	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
	JE   LBB4_1351

LBB4_794:
	LONG $0x00913c83 // cmp    dword [rcx + 4*rdx], 0
	LONG $0xd028f8c5 // vmovaps    xmm2, xmm0
	JNE  LBB4_796
	LONG $0xd257e8c5 // vxorps    xmm2, xmm2, xmm2

LBB4_796:
	LONG $0xd928f8c5 // vmovaps    xmm3, xmm1
	JG   LBB4_793
	LONG $0xda28f8c5 // vmovaps    xmm3, xmm2
	JMP  LBB4_793

LBB4_831:
	WORD $0xc689                   // mov    esi, eax
	WORD $0xe683; BYTE $0xf0       // and    esi, -16
	WORD $0xff31                   // xor    edi, edi
	LONG $0x573941c4; BYTE $0xc0   // vxorpd    xmm8, xmm8, xmm8
	LONG $0x197de2c4; WORD $0x004d // vbroadcastsd    ymm1, qword 0[rbp] /* [rip + .LCPI4_0] */
	LONG $0x197de2c4; WORD $0x0855 // vbroadcastsd    ymm2, qword 8[rbp] /* [rip + .LCPI4_1] */

LBB4_832:
	LONG $0x1c10fdc5; BYTE $0xf9               // vmovupd    ymm3, yword [rcx + 8*rdi]
	LONG $0x6410fdc5; WORD $0x20f9             // vmovupd    ymm4, yword [rcx + 8*rdi + 32]
	LONG $0x6c10fdc5; WORD $0x40f9             // vmovupd    ymm5, yword [rcx + 8*rdi + 64]
	LONG $0x7410fdc5; WORD $0x60f9             // vmovupd    ymm6, yword [rcx + 8*rdi + 96]
	LONG $0xfbc2bdc5; BYTE $0x00               // vcmpeqpd    ymm7, ymm8, ymm3
	LONG $0x197de3c4; WORD $0x01f8             // vextractf128    xmm0, ymm7, 1
	LONG $0xc86b41c5                           // vpackssdw    xmm9, xmm7, xmm0
	LONG $0xfcc2bdc5; BYTE $0x00               // vcmpeqpd    ymm7, ymm8, ymm4
	LONG $0x197de3c4; WORD $0x01f8             // vextractf128    xmm0, ymm7, 1
	LONG $0xd06b41c5                           // vpackssdw    xmm10, xmm7, xmm0
	LONG $0xfdc2bdc5; BYTE $0x00               // vcmpeqpd    ymm7, ymm8, ymm5
	LONG $0x197de3c4; WORD $0x01f8             // vextractf128    xmm0, ymm7, 1
	LONG $0xd86b41c5                           // vpackssdw    xmm11, xmm7, xmm0
	LONG $0xfec2bdc5; BYTE $0x00               // vcmpeqpd    ymm7, ymm8, ymm6
	LONG $0x197de3c4; WORD $0x01f8             // vextractf128    xmm0, ymm7, 1
	LONG $0xc06bc1c5                           // vpackssdw    xmm0, xmm7, xmm0
	LONG $0xd954e5c5                           // vandpd    ymm3, ymm3, ymm1
	LONG $0xdb56edc5                           // vorpd    ymm3, ymm2, ymm3
	LONG $0xe154ddc5                           // vandpd    ymm4, ymm4, ymm1
	LONG $0xe456edc5                           // vorpd    ymm4, ymm2, ymm4
	LONG $0xe954d5c5                           // vandpd    ymm5, ymm5, ymm1
	LONG $0xed56edc5                           // vorpd    ymm5, ymm2, ymm5
	LONG $0xf154cdc5                           // vandpd    ymm6, ymm6, ymm1
	LONG $0xf656edc5                           // vorpd    ymm6, ymm2, ymm6
	LONG $0xdbe6fdc5                           // vcvttpd2dq    xmm3, ymm3
	LONG $0xdbdfb1c5                           // vpandn    xmm3, xmm9, xmm3
	LONG $0xe4e6fdc5                           // vcvttpd2dq    xmm4, ymm4
	LONG $0xe4dfa9c5                           // vpandn    xmm4, xmm10, xmm4
	LONG $0xede6fdc5                           // vcvttpd2dq    xmm5, ymm5
	LONG $0xf6e6fdc5                           // vcvttpd2dq    xmm6, ymm6
	LONG $0xeddfa1c5                           // vpandn    xmm5, xmm11, xmm5
	LONG $0xc6dff9c5                           // vpandn    xmm0, xmm0, xmm6
	LONG $0x7f7ac1c4; WORD $0xb81c             // vmovdqu    oword [r8 + 4*rdi], xmm3
	LONG $0x7f7ac1c4; WORD $0xb864; BYTE $0x10 // vmovdqu    oword [r8 + 4*rdi + 16], xmm4
	LONG $0x7f7ac1c4; WORD $0xb86c; BYTE $0x20 // vmovdqu    oword [r8 + 4*rdi + 32], xmm5
	LONG $0x7f7ac1c4; WORD $0xb844; BYTE $0x30 // vmovdqu    oword [r8 + 4*rdi + 48], xmm0
	LONG $0x10c78348                           // add    rdi, 16
	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
	JNE  LBB4_832
	WORD $0x3948; BYTE $0xc6                   // cmp    rsi, rax
	JE   LBB4_1351

LBB4_834:
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0x4d28f9c5; BYTE $0x30 // vmovapd    xmm1, oword 48[rbp] /* [rip + .LCPI4_2] */
	LONG $0x5512fbc5; BYTE $0x08 // vmovddup    xmm2, qword 8[rbp] /* [rip + .LCPI4_1] */

LBB4_835:
	LONG $0x1c10fbc5; BYTE $0xf1 // vmovsd    xmm3, qword [rcx + 8*rsi]
	LONG $0xc32ef9c5             // vucomisd    xmm0, xmm3
	LONG $0xd954e1c5             // vandpd    xmm3, xmm3, xmm1
	LONG $0xdb56e9c5             // vorpd    xmm3, xmm2, xmm3
	LONG $0xfb2cfbc5             // vcvttsd2si    edi, xmm3
	WORD $0x440f; BYTE $0xfa     // cmove    edi, edx
	LONG $0xb03c8941             // mov    dword [r8 + 4*rsi], edi
	LONG $0x01c68348             // add    rsi, 1
	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
	JNE  LBB4_835
	JMP  LBB4_1351

LBB4_839:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xf0             // and    edx, -16
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5                     // vpcmpeqd    ymm1, ymm1, ymm1
	QUAD $0x00009c955879e2c4; BYTE $0x00 // vpbroadcastd    xmm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_840:
	LONG $0x297de2c4; WORD $0xf11c             // vpcmpeqq    ymm3, ymm0, yword [rcx + 8*rsi]
	LONG $0xd9efe5c5                           // vpxor    ymm3, ymm3, ymm1
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5                           // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xdadbe1c5                           // vpand    xmm3, xmm3, xmm2
	LONG $0x297de2c4; WORD $0xf164; BYTE $0x20 // vpcmpeqq    ymm4, ymm0, yword [rcx + 8*rsi + 32]
	LONG $0xe1efddc5                           // vpxor    ymm4, ymm4, ymm1
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe2dbd9c5                           // vpand    xmm4, xmm4, xmm2
	LONG $0x297de2c4; WORD $0xf16c; BYTE $0x40 // vpcmpeqq    ymm5, ymm0, yword [rcx + 8*rsi + 64]
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xeadbd1c5                           // vpand    xmm5, xmm5, xmm2
	LONG $0x297de2c4; WORD $0xf174; BYTE $0x60 // vpcmpeqq    ymm6, ymm0, yword [rcx + 8*rsi + 96]
	LONG $0xf1efcdc5                           // vpxor    ymm6, ymm6, ymm1
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5                           // vpackssdw    xmm6, xmm6, xmm7
	LONG $0xf2dbc9c5                           // vpand    xmm6, xmm6, xmm2
	LONG $0x7f7ac1c4; WORD $0xb01c             // vmovdqu    oword [r8 + 4*rsi], xmm3
	LONG $0x7f7ac1c4; WORD $0xb064; BYTE $0x10 // vmovdqu    oword [r8 + 4*rsi + 16], xmm4
	LONG $0x7f7ac1c4; WORD $0xb06c; BYTE $0x20 // vmovdqu    oword [r8 + 4*rsi + 32], xmm5
	LONG $0x7f7ac1c4; WORD $0xb074; BYTE $0x30 // vmovdqu    oword [r8 + 4*rsi + 48], xmm6
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_840
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_842:
	WORD $0xf631                 // xor    esi, esi
	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90348941             // mov    dword [r8 + 4*rdx], esi
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_842
	JMP  LBB4_1351

LBB4_843:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5                     // vpcmpeqd    xmm1, xmm1, xmm1
	QUAD $0x00009c95587de2c4; BYTE $0x00 // vpbroadcastd    ymm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_844:
	LONG $0x1c75f9c5; BYTE $0x71               // vpcmpeqw    xmm3, xmm0, oword [rcx + 2*rsi]
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x337de2c4; BYTE $0xdb               // vpmovzxwd    ymm3, xmm3
	LONG $0x6475f9c5; WORD $0x1071             // vpcmpeqw    xmm4, xmm0, oword [rcx + 2*rsi + 16]
	LONG $0xdadbe5c5                           // vpand    ymm3, ymm3, ymm2
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x337de2c4; BYTE $0xe4               // vpmovzxwd    ymm4, xmm4
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0x6c75f9c5; WORD $0x2071             // vpcmpeqw    xmm5, xmm0, oword [rcx + 2*rsi + 32]
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x337de2c4; BYTE $0xed               // vpmovzxwd    ymm5, xmm5
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0x7475f9c5; WORD $0x3071             // vpcmpeqw    xmm6, xmm0, oword [rcx + 2*rsi + 48]
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x337de2c4; BYTE $0xf6               // vpmovzxwd    ymm6, xmm6
	LONG $0xf2dbcdc5                           // vpand    ymm6, ymm6, ymm2
	LONG $0x7f7ec1c4; WORD $0xb01c             // vmovdqu    yword [r8 + 4*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0xb064; BYTE $0x20 // vmovdqu    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x7f7ec1c4; WORD $0xb06c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rsi + 64], ymm5
	LONG $0x7f7ec1c4; WORD $0xb074; BYTE $0x60 // vmovdqu    yword [r8 + 4*rsi + 96], ymm6
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_844
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_846:
	WORD $0xf631                 // xor    esi, esi
	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
	LONG $0xd6950f40             // setne    sil
	LONG $0x90348941             // mov    dword [r8 + 4*rdx], esi
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_846
	JMP  LBB4_1351

LBB4_847:
	WORD $0x8944; BYTE $0xd2             // mov    edx, r10d
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0         // vpcmpeqd    xmm8, xmm8, xmm8
	QUAD $0x00009c95187de2c4; BYTE $0x00 // vbroadcastss    ymm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_848:
	LONG $0x1c6ffac5; BYTE $0x71               // vmovdqu    xmm3, oword [rcx + 2*rsi]
	LONG $0x646ffac5; WORD $0x1071             // vmovdqu    xmm4, oword [rcx + 2*rsi + 16]
	LONG $0x6c6ffac5; WORD $0x2071             // vmovdqu    xmm5, oword [rcx + 2*rsi + 32]
	LONG $0x746ffac5; WORD $0x3071             // vmovdqu    xmm6, oword [rcx + 2*rsi + 48]
	LONG $0xf865e1c5                           // vpcmpgtw    xmm7, xmm3, xmm0
	LONG $0x237d62c4; BYTE $0xcf               // vpmovsxwd    ymm9, xmm7
	LONG $0xc865d9c5                           // vpcmpgtw    xmm1, xmm4, xmm0
	LONG $0x237d62c4; BYTE $0xd1               // vpmovsxwd    ymm10, xmm1
	LONG $0xf865d1c5                           // vpcmpgtw    xmm7, xmm5, xmm0
	LONG $0x237de2c4; BYTE $0xff               // vpmovsxwd    ymm7, xmm7
	LONG $0xc865c9c5                           // vpcmpgtw    xmm1, xmm6, xmm0
	LONG $0x237de2c4; BYTE $0xc9               // vpmovsxwd    ymm1, xmm1
	LONG $0xd875e1c5                           // vpcmpeqw    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x237de2c4; BYTE $0xdb               // vpmovsxwd    ymm3, xmm3
	LONG $0xe075d9c5                           // vpcmpeqw    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x237de2c4; BYTE $0xe4               // vpmovsxwd    ymm4, xmm4
	LONG $0xe875d1c5                           // vpcmpeqw    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x237de2c4; BYTE $0xed               // vpmovsxwd    ymm5, xmm5
	LONG $0xf075c9c5                           // vpcmpeqw    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x237de2c4; BYTE $0xf6               // vpmovsxwd    ymm6, xmm6
	LONG $0x4a65e3c4; WORD $0x90da             // vblendvps    ymm3, ymm3, ymm2, ymm9
	LONG $0x4a5de3c4; WORD $0xa0e2             // vblendvps    ymm4, ymm4, ymm2, ymm10
	LONG $0x4a55e3c4; WORD $0x70ea             // vblendvps    ymm5, ymm5, ymm2, ymm7
	LONG $0x4a4de3c4; WORD $0x10ca             // vblendvps    ymm1, ymm6, ymm2, ymm1
	LONG $0x117cc1c4; WORD $0xb01c             // vmovups    yword [r8 + 4*rsi], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x117cc1c4; WORD $0xb06c; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm5
	LONG $0x117cc1c4; WORD $0xb04c; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm1
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_848
	WORD $0x394c; BYTE $0xd2                   // cmp    rdx, r10
	JE   LBB4_1351

LBB4_850:
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_851:
	LONG $0x513cb70f         // movzx    edi, word [rcx + 2*rdx]
	WORD $0xc031             // xor    eax, eax
	WORD $0x8566; BYTE $0xff // test    di, di
	WORD $0x950f; BYTE $0xd0 // setne    al
	WORD $0xd8f7             // neg    eax
	WORD $0x8566; BYTE $0xff // test    di, di
	WORD $0x4f0f; BYTE $0xc6 // cmovg    eax, esi
	LONG $0x90048941         // mov    dword [r8 + 4*rdx], eax
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
	JNE  LBB4_851
	JMP  LBB4_1351

LBB4_852:
	WORD $0x8944; BYTE $0xd2             // mov    edx, r10d
	WORD $0xe283; BYTE $0xf0             // and    edx, -16
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0x763d41c4; BYTE $0xc0         // vpcmpeqd    ymm8, ymm8, ymm8
	QUAD $0x00009c951879e2c4; BYTE $0x00 // vbroadcastss    xmm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_853:
	LONG $0x246ffec5; BYTE $0xf1               // vmovdqu    ymm4, yword [rcx + 8*rsi]
	LONG $0x6c6ffec5; WORD $0x20f1             // vmovdqu    ymm5, yword [rcx + 8*rsi + 32]
	LONG $0x746ffec5; WORD $0x40f1             // vmovdqu    ymm6, yword [rcx + 8*rsi + 64]
	LONG $0x7c6ffec5; WORD $0x60f1             // vmovdqu    ymm7, yword [rcx + 8*rsi + 96]
	LONG $0x375de2c4; BYTE $0xd8               // vpcmpgtq    ymm3, ymm4, ymm0
	LONG $0x397de3c4; WORD $0x01d9             // vextracti128    xmm1, ymm3, 1
	LONG $0xc96b61c5                           // vpackssdw    xmm9, xmm3, xmm1
	LONG $0x3755e2c4; BYTE $0xc8               // vpcmpgtq    ymm1, ymm5, ymm0
	LONG $0x397de3c4; WORD $0x01cb             // vextracti128    xmm3, ymm1, 1
	LONG $0xd36b71c5                           // vpackssdw    xmm10, xmm1, xmm3
	LONG $0x374de2c4; BYTE $0xd8               // vpcmpgtq    ymm3, ymm6, ymm0
	LONG $0x397de3c4; WORD $0x01d9             // vextracti128    xmm1, ymm3, 1
	LONG $0xd96b61c5                           // vpackssdw    xmm11, xmm3, xmm1
	LONG $0x3745e2c4; BYTE $0xd8               // vpcmpgtq    ymm3, ymm7, ymm0
	LONG $0x397de3c4; WORD $0x01d9             // vextracti128    xmm1, ymm3, 1
	LONG $0xc96be1c5                           // vpackssdw    xmm1, xmm3, xmm1
	LONG $0x295de2c4; BYTE $0xd8               // vpcmpeqq    ymm3, ymm4, ymm0
	LONG $0xdbefbdc5                           // vpxor    ymm3, ymm8, ymm3
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5                           // vpackssdw    xmm3, xmm3, xmm4
	LONG $0x2955e2c4; BYTE $0xe0               // vpcmpeqq    ymm4, ymm5, ymm0
	LONG $0xe4efbdc5                           // vpxor    ymm4, ymm8, ymm4
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0x294de2c4; BYTE $0xe8               // vpcmpeqq    ymm5, ymm6, ymm0
	LONG $0xedefbdc5                           // vpxor    ymm5, ymm8, ymm5
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0x2945e2c4; BYTE $0xf0               // vpcmpeqq    ymm6, ymm7, ymm0
	LONG $0xf6efbdc5                           // vpxor    ymm6, ymm8, ymm6
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5                           // vpackssdw    xmm6, xmm6, xmm7
	LONG $0x4a61e3c4; WORD $0x90da             // vblendvps    xmm3, xmm3, xmm2, xmm9
	LONG $0x4a59e3c4; WORD $0xa0e2             // vblendvps    xmm4, xmm4, xmm2, xmm10
	LONG $0x4a51e3c4; WORD $0xb0ea             // vblendvps    xmm5, xmm5, xmm2, xmm11
	LONG $0x4a49e3c4; WORD $0x10ca             // vblendvps    xmm1, xmm6, xmm2, xmm1
	LONG $0x1178c1c4; WORD $0xb01c             // vmovups    oword [r8 + 4*rsi], xmm3
	LONG $0x1178c1c4; WORD $0xb064; BYTE $0x10 // vmovups    oword [r8 + 4*rsi + 16], xmm4
	LONG $0x1178c1c4; WORD $0xb06c; BYTE $0x20 // vmovups    oword [r8 + 4*rsi + 32], xmm5
	LONG $0x1178c1c4; WORD $0xb04c; BYTE $0x30 // vmovups    oword [r8 + 4*rsi + 48], xmm1
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_853
	WORD $0x394c; BYTE $0xd2                   // cmp    rdx, r10
	JE   LBB4_1351

LBB4_855:
	LONG $0x000001be; BYTE $0x00 // mov    esi, 1

LBB4_856:
	LONG $0xd13c8b48         // mov    rdi, qword [rcx + 8*rdx]
	WORD $0xc031             // xor    eax, eax
	WORD $0x8548; BYTE $0xff // test    rdi, rdi
	WORD $0x950f; BYTE $0xd0 // setne    al
	WORD $0xd8f7             // neg    eax
	WORD $0x8548; BYTE $0xff // test    rdi, rdi
	WORD $0x4f0f; BYTE $0xc6 // cmovg    eax, esi
	LONG $0x90048941         // mov    dword [r8 + 4*rdx], eax
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
	JNE  LBB4_856
	JMP  LBB4_1351

LBB4_857:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc057f8c5                     // vxorps    xmm0, xmm0, xmm0
	QUAD $0x00009c8d587de2c4; BYTE $0x00 // vpbroadcastd    ymm1, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_858:
	LONG $0x146ffec5; BYTE $0xb1               // vmovdqu    ymm2, yword [rcx + 4*rsi]
	LONG $0x5c6ffec5; WORD $0x20b1             // vmovdqu    ymm3, yword [rcx + 4*rsi + 32]
	LONG $0x646ffec5; WORD $0x40b1             // vmovdqu    ymm4, yword [rcx + 4*rsi + 64]
	LONG $0x6c6ffec5; WORD $0x60b1             // vmovdqu    ymm5, yword [rcx + 4*rsi + 96]
	LONG $0xe272cdc5; BYTE $0x1f               // vpsrad    ymm6, ymm2, 31
	LONG $0xf1ebcdc5                           // vpor    ymm6, ymm6, ymm1
	LONG $0xe372c5c5; BYTE $0x1f               // vpsrad    ymm7, ymm3, 31
	LONG $0xf9ebc5c5                           // vpor    ymm7, ymm7, ymm1
	LONG $0xe472bdc5; BYTE $0x1f               // vpsrad    ymm8, ymm4, 31
	LONG $0xc1eb3dc5                           // vpor    ymm8, ymm8, ymm1
	LONG $0xe572b5c5; BYTE $0x1f               // vpsrad    ymm9, ymm5, 31
	LONG $0xc9eb35c5                           // vpor    ymm9, ymm9, ymm1
	LONG $0xf65bfcc5                           // vcvtdq2ps    ymm6, ymm6
	LONG $0xff5bfcc5                           // vcvtdq2ps    ymm7, ymm7
	LONG $0x5b7c41c4; BYTE $0xc0               // vcvtdq2ps    ymm8, ymm8
	LONG $0x5b7c41c4; BYTE $0xc9               // vcvtdq2ps    ymm9, ymm9
	LONG $0xf65bfec5                           // vcvttps2dq    ymm6, ymm6
	LONG $0xff5bfec5                           // vcvttps2dq    ymm7, ymm7
	LONG $0x5b7e41c4; BYTE $0xc0               // vcvttps2dq    ymm8, ymm8
	LONG $0x5b7e41c4; BYTE $0xc9               // vcvttps2dq    ymm9, ymm9
	LONG $0xd0c2ecc5; BYTE $0x04               // vcmpneqps    ymm2, ymm2, ymm0
	LONG $0xd654ecc5                           // vandps    ymm2, ymm2, ymm6
	LONG $0xd8c2e4c5; BYTE $0x04               // vcmpneqps    ymm3, ymm3, ymm0
	LONG $0xdf54e4c5                           // vandps    ymm3, ymm3, ymm7
	LONG $0xe0c2dcc5; BYTE $0x04               // vcmpneqps    ymm4, ymm4, ymm0
	LONG $0xe454bcc5                           // vandps    ymm4, ymm8, ymm4
	LONG $0xe8c2d4c5; BYTE $0x04               // vcmpneqps    ymm5, ymm5, ymm0
	LONG $0xed54b4c5                           // vandps    ymm5, ymm9, ymm5
	LONG $0x117cc1c4; WORD $0xb014             // vmovups    yword [r8 + 4*rsi], ymm2
	LONG $0x117cc1c4; WORD $0xb05c; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb06c; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm5
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_858
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351

LBB4_860:
	LONG $0xc057f8c5 // vxorps    xmm0, xmm0, xmm0
	JMP  LBB4_862

LBB4_861:
	LONG $0x90348941         // mov    dword [r8 + 4*rdx], esi
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JE   LBB4_1351

LBB4_862:
	LONG $0x0c10fac5; BYTE $0x91 // vmovss    xmm1, dword [rcx + 4*rdx]
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc12ef8c5             // vucomiss    xmm0, xmm1
	JE   LBB4_861
	LONG $0xf150f8c5             // vmovmskps    esi, xmm1
	WORD $0xe683; BYTE $0x01     // and    esi, 1
	WORD $0xdef7                 // neg    esi
	WORD $0xce83; BYTE $0x01     // or    esi, 1
	LONG $0xce2aaac5             // vcvtsi2ss    xmm1, xmm10, esi
	LONG $0xf12cfac5             // vcvttss2si    esi, xmm1
	JMP  LBB4_861

LBB4_870:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	QUAD $0x00009c8d587de2c4; BYTE $0x00 // vpbroadcastd    ymm1, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_871:
	LONG $0x1476fdc5; BYTE $0xb1               // vpcmpeqd    ymm2, ymm0, yword [rcx + 4*rsi]
	LONG $0xd1dfedc5                           // vpandn    ymm2, ymm2, ymm1
	LONG $0x5c76fdc5; WORD $0x20b1             // vpcmpeqd    ymm3, ymm0, yword [rcx + 4*rsi + 32]
	LONG $0xd9dfe5c5                           // vpandn    ymm3, ymm3, ymm1
	LONG $0x6476fdc5; WORD $0x40b1             // vpcmpeqd    ymm4, ymm0, yword [rcx + 4*rsi + 64]
	LONG $0x6c76fdc5; WORD $0x60b1             // vpcmpeqd    ymm5, ymm0, yword [rcx + 4*rsi + 96]
	LONG $0xe1dfddc5                           // vpandn    ymm4, ymm4, ymm1
	LONG $0xe9dfd5c5                           // vpandn    ymm5, ymm5, ymm1
	LONG $0x7f7ec1c4; WORD $0xb014             // vmovdqu    yword [r8 + 4*rsi], ymm2
	LONG $0x7f7ec1c4; WORD $0xb05c; BYTE $0x20 // vmovdqu    yword [r8 + 4*rsi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xb064; BYTE $0x40 // vmovdqu    yword [r8 + 4*rsi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xb06c; BYTE $0x60 // vmovdqu    yword [r8 + 4*rsi + 96], ymm5
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_871
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_873

LBB4_877:
	WORD $0x8944; BYTE $0xda             // mov    edx, r11d
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0         // vpcmpeqd    xmm8, xmm8, xmm8
	QUAD $0x00009c95187de2c4; BYTE $0x00 // vbroadcastss    ymm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_878:
	LONG $0x1c7efac5; BYTE $0x31               // vmovq    xmm3, qword [rcx + rsi]
	LONG $0x647efac5; WORD $0x0831             // vmovq    xmm4, qword [rcx + rsi + 8]
	LONG $0x6c7efac5; WORD $0x1031             // vmovq    xmm5, qword [rcx + rsi + 16]
	LONG $0x747efac5; WORD $0x1831             // vmovq    xmm6, qword [rcx + rsi + 24]
	LONG $0xf864e1c5                           // vpcmpgtb    xmm7, xmm3, xmm0
	LONG $0x217d62c4; BYTE $0xcf               // vpmovsxbd    ymm9, xmm7
	LONG $0xc864d9c5                           // vpcmpgtb    xmm1, xmm4, xmm0
	LONG $0x217d62c4; BYTE $0xd1               // vpmovsxbd    ymm10, xmm1
	LONG $0xf864d1c5                           // vpcmpgtb    xmm7, xmm5, xmm0
	LONG $0x217de2c4; BYTE $0xff               // vpmovsxbd    ymm7, xmm7
	LONG $0xc864c9c5                           // vpcmpgtb    xmm1, xmm6, xmm0
	LONG $0x217de2c4; BYTE $0xc9               // vpmovsxbd    ymm1, xmm1
	LONG $0xd874e1c5                           // vpcmpeqb    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x217de2c4; BYTE $0xdb               // vpmovsxbd    ymm3, xmm3
	LONG $0xe074d9c5                           // vpcmpeqb    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x217de2c4; BYTE $0xe4               // vpmovsxbd    ymm4, xmm4
	LONG $0xe874d1c5                           // vpcmpeqb    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x217de2c4; BYTE $0xed               // vpmovsxbd    ymm5, xmm5
	LONG $0xf074c9c5                           // vpcmpeqb    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x217de2c4; BYTE $0xf6               // vpmovsxbd    ymm6, xmm6
	LONG $0x4a65e3c4; WORD $0x90da             // vblendvps    ymm3, ymm3, ymm2, ymm9
	LONG $0x4a5de3c4; WORD $0xa0e2             // vblendvps    ymm4, ymm4, ymm2, ymm10
	LONG $0x4a55e3c4; WORD $0x70ea             // vblendvps    ymm5, ymm5, ymm2, ymm7
	LONG $0x4a4de3c4; WORD $0x10ca             // vblendvps    ymm1, ymm6, ymm2, ymm1
	LONG $0x117cc1c4; WORD $0xb01c             // vmovups    yword [r8 + 4*rsi], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x117cc1c4; WORD $0xb06c; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm5
	LONG $0x117cc1c4; WORD $0xb04c; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm1
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_878
	WORD $0x394c; BYTE $0xda                   // cmp    rdx, r11
	JE   LBB4_1351
	JMP  LBB4_880

LBB4_885:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5                     // vpcmpeqd    xmm1, xmm1, xmm1
	QUAD $0x00009c95587de2c4; BYTE $0x00 // vpbroadcastd    ymm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_886:
	LONG $0x1c7efac5; BYTE $0x31               // vmovq    xmm3, qword [rcx + rsi]
	LONG $0x647efac5; WORD $0x0831             // vmovq    xmm4, qword [rcx + rsi + 8]
	LONG $0x6c7efac5; WORD $0x1031             // vmovq    xmm5, qword [rcx + rsi + 16]
	LONG $0x747efac5; WORD $0x1831             // vmovq    xmm6, qword [rcx + rsi + 24]
	LONG $0xd874e1c5                           // vpcmpeqb    xmm3, xmm3, xmm0
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x317de2c4; BYTE $0xdb               // vpmovzxbd    ymm3, xmm3
	LONG $0xdadbe5c5                           // vpand    ymm3, ymm3, ymm2
	LONG $0xe074d9c5                           // vpcmpeqb    xmm4, xmm4, xmm0
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x317de2c4; BYTE $0xe4               // vpmovzxbd    ymm4, xmm4
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe874d1c5                           // vpcmpeqb    xmm5, xmm5, xmm0
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x317de2c4; BYTE $0xed               // vpmovzxbd    ymm5, xmm5
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xf074c9c5                           // vpcmpeqb    xmm6, xmm6, xmm0
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x317de2c4; BYTE $0xf6               // vpmovzxbd    ymm6, xmm6
	LONG $0xf2dbcdc5                           // vpand    ymm6, ymm6, ymm2
	LONG $0x7f7ec1c4; WORD $0xb01c             // vmovdqu    yword [r8 + 4*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0xb064; BYTE $0x20 // vmovdqu    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x7f7ec1c4; WORD $0xb06c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rsi + 64], ymm5
	LONG $0x7f7ec1c4; WORD $0xb074; BYTE $0x60 // vmovdqu    yword [r8 + 4*rsi + 96], ymm6
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_886
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_888

LBB4_892:
	WORD $0x8944; BYTE $0xda             // mov    edx, r11d
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5                     // vpcmpeqd    ymm1, ymm1, ymm1
	QUAD $0x00009c95587de2c4; BYTE $0x00 // vpbroadcastd    ymm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_893:
	LONG $0x1c6ffec5; BYTE $0xb1               // vmovdqu    ymm3, yword [rcx + 4*rsi]
	LONG $0x646ffec5; WORD $0x20b1             // vmovdqu    ymm4, yword [rcx + 4*rsi + 32]
	LONG $0x6c6ffec5; WORD $0x40b1             // vmovdqu    ymm5, yword [rcx + 4*rsi + 64]
	LONG $0x746ffec5; WORD $0x60b1             // vmovdqu    ymm6, yword [rcx + 4*rsi + 96]
	LONG $0xf876e5c5                           // vpcmpeqd    ymm7, ymm3, ymm0
	LONG $0xf9efc5c5                           // vpxor    ymm7, ymm7, ymm1
	LONG $0xc0765dc5                           // vpcmpeqd    ymm8, ymm4, ymm0
	LONG $0xc1ef3dc5                           // vpxor    ymm8, ymm8, ymm1
	LONG $0xc87655c5                           // vpcmpeqd    ymm9, ymm5, ymm0
	LONG $0xc9ef35c5                           // vpxor    ymm9, ymm9, ymm1
	LONG $0xd0764dc5                           // vpcmpeqd    ymm10, ymm6, ymm0
	LONG $0xd1ef2dc5                           // vpxor    ymm10, ymm10, ymm1
	LONG $0xdb66edc5                           // vpcmpgtd    ymm3, ymm2, ymm3
	LONG $0xe466edc5                           // vpcmpgtd    ymm4, ymm2, ymm4
	LONG $0xed66edc5                           // vpcmpgtd    ymm5, ymm2, ymm5
	LONG $0xf666edc5                           // vpcmpgtd    ymm6, ymm2, ymm6
	LONG $0x4a6de3c4; WORD $0x30df             // vblendvps    ymm3, ymm2, ymm7, ymm3
	LONG $0x4a6dc3c4; WORD $0x40e0             // vblendvps    ymm4, ymm2, ymm8, ymm4
	LONG $0x4a6dc3c4; WORD $0x50e9             // vblendvps    ymm5, ymm2, ymm9, ymm5
	LONG $0x4a6dc3c4; WORD $0x60f2             // vblendvps    ymm6, ymm2, ymm10, ymm6
	LONG $0x117cc1c4; WORD $0xb01c             // vmovups    yword [r8 + 4*rsi], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x117cc1c4; WORD $0xb06c; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm5
	LONG $0x117cc1c4; WORD $0xb074; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm6
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_893
	WORD $0x394c; BYTE $0xda                   // cmp    rdx, r11
	JE   LBB4_1351
	JMP  LBB4_895

LBB4_900:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc057f9c5               // vxorpd    xmm0, xmm0, xmm0
	LONG $0x197de2c4; WORD $0x004d // vbroadcastsd    ymm1, qword 0[rbp] /* [rip + .LCPI4_0] */
	LONG $0x197de2c4; WORD $0x0855 // vbroadcastsd    ymm2, qword 8[rbp] /* [rip + .LCPI4_1] */

LBB4_901:
	LONG $0x1c10fdc5; BYTE $0xf1               // vmovupd    ymm3, yword [rcx + 8*rsi]
	LONG $0x6410fdc5; WORD $0x20f1             // vmovupd    ymm4, yword [rcx + 8*rsi + 32]
	LONG $0x6c10fdc5; WORD $0x40f1             // vmovupd    ymm5, yword [rcx + 8*rsi + 64]
	LONG $0x7410fdc5; WORD $0x60f1             // vmovupd    ymm6, yword [rcx + 8*rsi + 96]
	LONG $0xf954e5c5                           // vandpd    ymm7, ymm3, ymm1
	LONG $0xff56edc5                           // vorpd    ymm7, ymm2, ymm7
	LONG $0xc1545dc5                           // vandpd    ymm8, ymm4, ymm1
	LONG $0xc2563dc5                           // vorpd    ymm8, ymm8, ymm2
	LONG $0xc95455c5                           // vandpd    ymm9, ymm5, ymm1
	LONG $0xca5635c5                           // vorpd    ymm9, ymm9, ymm2
	LONG $0xd1544dc5                           // vandpd    ymm10, ymm6, ymm1
	LONG $0xd2562dc5                           // vorpd    ymm10, ymm10, ymm2
	LONG $0xd8c2e5c5; BYTE $0x04               // vcmpneqpd    ymm3, ymm3, ymm0
	LONG $0xdf54e5c5                           // vandpd    ymm3, ymm3, ymm7
	LONG $0xe0c2ddc5; BYTE $0x04               // vcmpneqpd    ymm4, ymm4, ymm0
	LONG $0xe454bdc5                           // vandpd    ymm4, ymm8, ymm4
	LONG $0xe8c2d5c5; BYTE $0x04               // vcmpneqpd    ymm5, ymm5, ymm0
	LONG $0xed54b5c5                           // vandpd    ymm5, ymm9, ymm5
	LONG $0xf0c2cdc5; BYTE $0x04               // vcmpneqpd    ymm6, ymm6, ymm0
	LONG $0xf654adc5                           // vandpd    ymm6, ymm10, ymm6
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x117dc1c4; WORD $0xf074; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm6
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_901
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_903

LBB4_908:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0   // vpcmpeqd    xmm8, xmm8, xmm8
	LONG $0x197de2c4; WORD $0x0855 // vbroadcastsd    ymm2, qword 8[rbp] /* [rip + .LCPI4_1] */

LBB4_909:
	LONG $0x1c6ef9c5; BYTE $0x31               // vmovd    xmm3, dword [rcx + rsi]
	LONG $0x646ef9c5; WORD $0x0431             // vmovd    xmm4, dword [rcx + rsi + 4]
	LONG $0x6c6ef9c5; WORD $0x0831             // vmovd    xmm5, dword [rcx + rsi + 8]
	LONG $0x746ef9c5; WORD $0x0c31             // vmovd    xmm6, dword [rcx + rsi + 12]
	LONG $0xf864e1c5                           // vpcmpgtb    xmm7, xmm3, xmm0
	LONG $0x227d62c4; BYTE $0xcf               // vpmovsxbq    ymm9, xmm7
	LONG $0xc864d9c5                           // vpcmpgtb    xmm1, xmm4, xmm0
	LONG $0x227d62c4; BYTE $0xd1               // vpmovsxbq    ymm10, xmm1
	LONG $0xf864d1c5                           // vpcmpgtb    xmm7, xmm5, xmm0
	LONG $0x227de2c4; BYTE $0xff               // vpmovsxbq    ymm7, xmm7
	LONG $0xc864c9c5                           // vpcmpgtb    xmm1, xmm6, xmm0
	LONG $0xd874e1c5                           // vpcmpeqb    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x2179e2c4; BYTE $0xdb               // vpmovsxbd    xmm3, xmm3
	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
	LONG $0xe074d9c5                           // vpcmpeqb    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x2179e2c4; BYTE $0xe4               // vpmovsxbd    xmm4, xmm4
	LONG $0xe4e6fec5                           // vcvtdq2pd    ymm4, xmm4
	LONG $0xe874d1c5                           // vpcmpeqb    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x2179e2c4; BYTE $0xed               // vpmovsxbd    xmm5, xmm5
	LONG $0xede6fec5                           // vcvtdq2pd    ymm5, xmm5
	LONG $0x227de2c4; BYTE $0xc9               // vpmovsxbq    ymm1, xmm1
	LONG $0xf074c9c5                           // vpcmpeqb    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x2179e2c4; BYTE $0xf6               // vpmovsxbd    xmm6, xmm6
	LONG $0xf6e6fec5                           // vcvtdq2pd    ymm6, xmm6
	LONG $0x4b65e3c4; WORD $0x90da             // vblendvpd    ymm3, ymm3, ymm2, ymm9
	LONG $0x4b5de3c4; WORD $0xa0e2             // vblendvpd    ymm4, ymm4, ymm2, ymm10
	LONG $0x4b55e3c4; WORD $0x70ea             // vblendvpd    ymm5, ymm5, ymm2, ymm7
	LONG $0x4b4de3c4; WORD $0x10ca             // vblendvpd    ymm1, ymm6, ymm2, ymm1
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x117dc1c4; WORD $0xf04c; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm1
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_909
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_911

LBB4_914:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xf0             // and    edx, -16
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5                     // vpcmpeqd    xmm1, xmm1, xmm1
	QUAD $0x00009c955879e2c4; BYTE $0x00 // vpbroadcastd    xmm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_915:
	LONG $0x1c6ef9c5; BYTE $0x31               // vmovd    xmm3, dword [rcx + rsi]
	LONG $0x646ef9c5; WORD $0x0431             // vmovd    xmm4, dword [rcx + rsi + 4]
	LONG $0x6c6ef9c5; WORD $0x0831             // vmovd    xmm5, dword [rcx + rsi + 8]
	LONG $0x746ef9c5; WORD $0x0c31             // vmovd    xmm6, dword [rcx + rsi + 12]
	LONG $0xd874e1c5                           // vpcmpeqb    xmm3, xmm3, xmm0
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x3179e2c4; BYTE $0xdb               // vpmovzxbd    xmm3, xmm3
	LONG $0xdadbe1c5                           // vpand    xmm3, xmm3, xmm2
	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
	LONG $0xe074d9c5                           // vpcmpeqb    xmm4, xmm4, xmm0
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x3179e2c4; BYTE $0xe4               // vpmovzxbd    xmm4, xmm4
	LONG $0xe2dbd9c5                           // vpand    xmm4, xmm4, xmm2
	LONG $0xe4e6fec5                           // vcvtdq2pd    ymm4, xmm4
	LONG $0xe874d1c5                           // vpcmpeqb    xmm5, xmm5, xmm0
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x3179e2c4; BYTE $0xed               // vpmovzxbd    xmm5, xmm5
	LONG $0xeadbd1c5                           // vpand    xmm5, xmm5, xmm2
	LONG $0xede6fec5                           // vcvtdq2pd    ymm5, xmm5
	LONG $0xf074c9c5                           // vpcmpeqb    xmm6, xmm6, xmm0
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x3179e2c4; BYTE $0xf6               // vpmovzxbd    xmm6, xmm6
	LONG $0xf2dbc9c5                           // vpand    xmm6, xmm6, xmm2
	LONG $0xf6e6fec5                           // vcvtdq2pd    ymm6, xmm6
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x117dc1c4; WORD $0xf074; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm6
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_915
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_917

LBB4_933:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xe0     // and    edx, -32
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5             // vpcmpeqd    ymm1, ymm1, ymm1
	LONG $0x556ff9c5; BYTE $0x50 // vmovdqa    xmm2, oword 80[rbp] /* [rip + .LCPI4_12] */

LBB4_934:
	LONG $0x1c76fdc5; BYTE $0xb1   // vpcmpeqd    ymm3, ymm0, yword [rcx + 4*rsi]
	LONG $0xd9efe5c5               // vpxor    ymm3, ymm3, ymm1
	LONG $0x397de3c4; WORD $0x01dc // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5               // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xdb63e1c5               // vpacksswb    xmm3, xmm3, xmm3
	LONG $0xdadbe1c5               // vpand    xmm3, xmm3, xmm2
	LONG $0x6476fdc5; WORD $0x20b1 // vpcmpeqd    ymm4, ymm0, yword [rcx + 4*rsi + 32]
	LONG $0xe1efddc5               // vpxor    ymm4, ymm4, ymm1
	LONG $0x397de3c4; WORD $0x01e5 // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5               // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe463d9c5               // vpacksswb    xmm4, xmm4, xmm4
	LONG $0x6c76fdc5; WORD $0x40b1 // vpcmpeqd    ymm5, ymm0, yword [rcx + 4*rsi + 64]
	LONG $0xe2dbd9c5               // vpand    xmm4, xmm4, xmm2
	LONG $0xe9efd5c5               // vpxor    ymm5, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01ee // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5               // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xed63d1c5               // vpacksswb    xmm5, xmm5, xmm5
	LONG $0xeadbd1c5               // vpand    xmm5, xmm5, xmm2
	LONG $0x7476fdc5; WORD $0x60b1 // vpcmpeqd    ymm6, ymm0, yword [rcx + 4*rsi + 96]
	LONG $0xf1efcdc5               // vpxor    ymm6, ymm6, ymm1
	LONG $0x397de3c4; WORD $0x01f7 // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5               // vpackssdw    xmm6, xmm6, xmm7
	LONG $0xf663c9c5               // vpacksswb    xmm6, xmm6, xmm6
	LONG $0xf2dbc9c5               // vpand    xmm6, xmm6, xmm2
	LONG $0x3855e3c4; WORD $0x01ee // vinserti128    ymm5, ymm5, xmm6, 1
	LONG $0x3865e3c4; WORD $0x01dc // vinserti128    ymm3, ymm3, xmm4, 1
	LONG $0xdd6ce5c5               // vpunpcklqdq    ymm3, ymm3, ymm5
	LONG $0x00fde3c4; WORD $0xd8db // vpermq    ymm3, ymm3, 216
	LONG $0x7f7ec1c4; WORD $0x301c // vmovdqu    yword [r8 + rsi], ymm3
	LONG $0x20c68348               // add    rsi, 32
	WORD $0x3948; BYTE $0xf2       // cmp    rdx, rsi
	JNE  LBB4_934
	WORD $0x3948; BYTE $0xc2       // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_936

LBB4_940:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0x597de2c4; WORD $0x0045 // vpbroadcastq    ymm0, qword 0[rbp] /* [rip + .LCPI4_0] */
	LONG $0x572941c4; BYTE $0xd2   // vxorpd    xmm10, xmm10, xmm10
	LONG $0x197de2c4; WORD $0x0855 // vbroadcastsd    ymm2, qword 8[rbp] /* [rip + .LCPI4_1] */
	LONG $0xef2141c4; BYTE $0xdb   // vpxor    xmm11, xmm11, xmm11

LBB4_941:
	LONG $0x3410fdc5; BYTE $0xf1   // vmovupd    ymm6, yword [rcx + 8*rsi]
	LONG $0x7c10fdc5; WORD $0x20f1 // vmovupd    ymm7, yword [rcx + 8*rsi + 32]
	LONG $0x44107dc5; WORD $0x40f1 // vmovupd    ymm8, yword [rcx + 8*rsi + 64]
	LONG $0x4c107dc5; WORD $0x60f1 // vmovupd    ymm9, yword [rcx + 8*rsi + 96]
	LONG $0xe6c2adc5; BYTE $0x00   // vcmpeqpd    ymm4, ymm10, ymm6
	LONG $0x197de3c4; WORD $0x01e5 // vextractf128    xmm5, ymm4, 1
	LONG $0xe56bd9c5               // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe46bd9c5               // vpackssdw    xmm4, xmm4, xmm4
	LONG $0xe46359c5               // vpacksswb    xmm12, xmm4, xmm4
	LONG $0xefc2adc5; BYTE $0x00   // vcmpeqpd    ymm5, ymm10, ymm7
	LONG $0x197de3c4; WORD $0x01e9 // vextractf128    xmm1, ymm5, 1
	LONG $0xc96bd1c5               // vpackssdw    xmm1, xmm5, xmm1
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0xe96371c5               // vpacksswb    xmm13, xmm1, xmm1
	LONG $0xc23dc1c4; WORD $0x00ca // vcmpeqpd    ymm1, ymm8, ymm10
	LONG $0x197de3c4; WORD $0x01cb // vextractf128    xmm3, ymm1, 1
	LONG $0xcb6bf1c5               // vpackssdw    xmm1, xmm1, xmm3
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0xc963f1c5               // vpacksswb    xmm1, xmm1, xmm1
	LONG $0xc235c1c4; WORD $0x00da // vcmpeqpd    ymm3, ymm9, ymm10
	LONG $0x197de3c4; WORD $0x01dc // vextractf128    xmm4, ymm3, 1
	LONG $0xdc6be1c5               // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xdb6be1c5               // vpackssdw    xmm3, xmm3, xmm3
	LONG $0xdb63e1c5               // vpacksswb    xmm3, xmm3, xmm3
	LONG $0xe054cdc5               // vandpd    ymm4, ymm6, ymm0
	LONG $0xe456edc5               // vorpd    ymm4, ymm2, ymm4
	LONG $0xf054c5c5               // vandpd    ymm6, ymm7, ymm0
	LONG $0xf656edc5               // vorpd    ymm6, ymm2, ymm6
	LONG $0xf854bdc5               // vandpd    ymm7, ymm8, ymm0
	LONG $0xff56edc5               // vorpd    ymm7, ymm2, ymm7
	LONG $0xc05435c5               // vandpd    ymm8, ymm9, ymm0
	LONG $0xc2563dc5               // vorpd    ymm8, ymm8, ymm2
	LONG $0xe4e6fdc5               // vcvttpd2dq    xmm4, ymm4
	LONG $0xe46bd9c5               // vpackssdw    xmm4, xmm4, xmm4
	LONG $0xe463d9c5               // vpacksswb    xmm4, xmm4, xmm4
	LONG $0xf6e6fdc5               // vcvttpd2dq    xmm6, ymm6
	LONG $0xf66bc9c5               // vpackssdw    xmm6, xmm6, xmm6
	LONG $0xf663c9c5               // vpacksswb    xmm6, xmm6, xmm6
	LONG $0xffe6fdc5               // vcvttpd2dq    xmm7, ymm7
	LONG $0xff6bc1c5               // vpackssdw    xmm7, xmm7, xmm7
	LONG $0xff63c1c5               // vpacksswb    xmm7, xmm7, xmm7
	LONG $0xe67dc1c4; BYTE $0xe8   // vcvttpd2dq    xmm5, ymm8
	LONG $0xed6bd1c5               // vpackssdw    xmm5, xmm5, xmm5
	LONG $0xed63d1c5               // vpacksswb    xmm5, xmm5, xmm5
	LONG $0x4c59c3c4; WORD $0xc0e3 // vpblendvb    xmm4, xmm4, xmm11, xmm12
	LONG $0x4c49c3c4; WORD $0xd0f3 // vpblendvb    xmm6, xmm6, xmm11, xmm13
	LONG $0x4c41c3c4; WORD $0x10cb // vpblendvb    xmm1, xmm7, xmm11, xmm1
	LONG $0xe662d9c5               // vpunpckldq    xmm4, xmm4, xmm6
	LONG $0x4c51c3c4; WORD $0x30db // vpblendvb    xmm3, xmm5, xmm11, xmm3
	LONG $0xcb62f1c5               // vpunpckldq    xmm1, xmm1, xmm3
	LONG $0xc96cd9c5               // vpunpcklqdq    xmm1, xmm4, xmm1
	LONG $0x7f7ac1c4; WORD $0x300c // vmovdqu    oword [r8 + rsi], xmm1
	LONG $0x10c68348               // add    rsi, 16
	WORD $0x3948; BYTE $0xf2       // cmp    rdx, rsi
	JNE  LBB4_941
	WORD $0x3948; BYTE $0xc2       // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_943

LBB4_948:
	WORD $0x8945; BYTE $0xd3 // mov    r11d, r10d
	LONG $0x80e38341         // and    r11d, -128
	WORD $0xf631             // xor    esi, esi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
	QUAD $0x000000e0956ffdc5 // vmovdqa    ymm2, yword 224[rbp] /* [rip + .LCPI4_20] */

LBB4_949:
	LONG $0x1c6ffec5; BYTE $0x31               // vmovdqu    ymm3, yword [rcx + rsi]
	LONG $0x646ffec5; WORD $0x2031             // vmovdqu    ymm4, yword [rcx + rsi + 32]
	LONG $0x6c6ffec5; WORD $0x4031             // vmovdqu    ymm5, yword [rcx + rsi + 64]
	LONG $0x746ffec5; WORD $0x6031             // vmovdqu    ymm6, yword [rcx + rsi + 96]
	LONG $0xf874e5c5                           // vpcmpeqb    ymm7, ymm3, ymm0
	LONG $0xf9efc5c5                           // vpxor    ymm7, ymm7, ymm1
	LONG $0xc0745dc5                           // vpcmpeqb    ymm8, ymm4, ymm0
	LONG $0xc1ef3dc5                           // vpxor    ymm8, ymm8, ymm1
	LONG $0xc87455c5                           // vpcmpeqb    ymm9, ymm5, ymm0
	LONG $0xc9ef35c5                           // vpxor    ymm9, ymm9, ymm1
	LONG $0xd0744dc5                           // vpcmpeqb    ymm10, ymm6, ymm0
	LONG $0xd1ef2dc5                           // vpxor    ymm10, ymm10, ymm1
	LONG $0xdb64edc5                           // vpcmpgtb    ymm3, ymm2, ymm3
	LONG $0xe464edc5                           // vpcmpgtb    ymm4, ymm2, ymm4
	LONG $0xed64edc5                           // vpcmpgtb    ymm5, ymm2, ymm5
	LONG $0xf664edc5                           // vpcmpgtb    ymm6, ymm2, ymm6
	LONG $0x4c6de3c4; WORD $0x30df             // vpblendvb    ymm3, ymm2, ymm7, ymm3
	LONG $0x4c6dc3c4; WORD $0x40e0             // vpblendvb    ymm4, ymm2, ymm8, ymm4
	LONG $0x4c6dc3c4; WORD $0x50e9             // vpblendvb    ymm5, ymm2, ymm9, ymm5
	LONG $0x4c6dc3c4; WORD $0x60f2             // vpblendvb    ymm6, ymm2, ymm10, ymm6
	LONG $0x7f7ec1c4; WORD $0x301c             // vmovdqu    yword [r8 + rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm4
	LONG $0x7f7ec1c4; WORD $0x306c; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm5
	LONG $0x7f7ec1c4; WORD $0x3074; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm6
	LONG $0x80ee8348                           // sub    rsi, -128
	WORD $0x3949; BYTE $0xf3                   // cmp    r11, rsi
	JNE  LBB4_949
	WORD $0x394d; BYTE $0xd3                   // cmp    r11, r10
	JE   LBB4_1351
	JMP  LBB4_951

LBB4_956:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xf0     // and    edx, -16
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5             // vpcmpeqd    ymm1, ymm1, ymm1
	LONG $0x556ff9c5; BYTE $0x70 // vmovdqa    xmm2, oword 112[rbp] /* [rip + .LCPI4_17] */

LBB4_957:
	LONG $0x297de2c4; WORD $0xf11c             // vpcmpeqq    ymm3, ymm0, yword [rcx + 8*rsi]
	LONG $0xd9efe5c5                           // vpxor    ymm3, ymm3, ymm1
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5                           // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
	LONG $0xdb63e1c5                           // vpacksswb    xmm3, xmm3, xmm3
	LONG $0xdadbe1c5                           // vpand    xmm3, xmm3, xmm2
	LONG $0x297de2c4; WORD $0xf164; BYTE $0x20 // vpcmpeqq    ymm4, ymm0, yword [rcx + 8*rsi + 32]
	LONG $0xe1efddc5                           // vpxor    ymm4, ymm4, ymm1
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe46bd9c5                           // vpackssdw    xmm4, xmm4, xmm4
	LONG $0xe463d9c5                           // vpacksswb    xmm4, xmm4, xmm4
	LONG $0xe2dbd9c5                           // vpand    xmm4, xmm4, xmm2
	LONG $0x297de2c4; WORD $0xf16c; BYTE $0x40 // vpcmpeqq    ymm5, ymm0, yword [rcx + 8*rsi + 64]
	LONG $0xdc62e1c5                           // vpunpckldq    xmm3, xmm3, xmm4
	LONG $0xe1efd5c5                           // vpxor    ymm4, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe46bd9c5                           // vpackssdw    xmm4, xmm4, xmm4
	LONG $0xe463d9c5                           // vpacksswb    xmm4, xmm4, xmm4
	LONG $0x297de2c4; WORD $0xf16c; BYTE $0x60 // vpcmpeqq    ymm5, ymm0, yword [rcx + 8*rsi + 96]
	LONG $0xe2dbd9c5                           // vpand    xmm4, xmm4, xmm2
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xed6bd1c5                           // vpackssdw    xmm5, xmm5, xmm5
	LONG $0xed63d1c5                           // vpacksswb    xmm5, xmm5, xmm5
	LONG $0xeadbd1c5                           // vpand    xmm5, xmm5, xmm2
	LONG $0xe562d9c5                           // vpunpckldq    xmm4, xmm4, xmm5
	LONG $0xdc6ce1c5                           // vpunpcklqdq    xmm3, xmm3, xmm4
	LONG $0x7f7ac1c4; WORD $0x301c             // vmovdqu    oword [r8 + rsi], xmm3
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_957
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_959

LBB4_963:
	WORD $0xc289             // mov    edx, eax
	WORD $0xe283; BYTE $0xc0 // and    edx, -64
	WORD $0xf631             // xor    esi, esi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
	QUAD $0x00000080956ff9c5 // vmovdqa    xmm2, oword 128[rbp] /* [rip + .LCPI4_19] */

LBB4_964:
	LONG $0x1c75fdc5; BYTE $0x71               // vpcmpeqw    ymm3, ymm0, yword [rcx + 2*rsi]
	LONG $0xd9efe5c5                           // vpxor    ymm3, ymm3, ymm1
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc63e1c5                           // vpacksswb    xmm3, xmm3, xmm4
	LONG $0xdadbe1c5                           // vpand    xmm3, xmm3, xmm2
	LONG $0x6475fdc5; WORD $0x2071             // vpcmpeqw    ymm4, ymm0, yword [rcx + 2*rsi + 32]
	LONG $0xe1efddc5                           // vpxor    ymm4, ymm4, ymm1
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe563d9c5                           // vpacksswb    xmm4, xmm4, xmm5
	LONG $0xe2dbd9c5                           // vpand    xmm4, xmm4, xmm2
	LONG $0x6c75fdc5; WORD $0x4071             // vpcmpeqw    ymm5, ymm0, yword [rcx + 2*rsi + 64]
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee63d1c5                           // vpacksswb    xmm5, xmm5, xmm6
	LONG $0xeadbd1c5                           // vpand    xmm5, xmm5, xmm2
	LONG $0x7475fdc5; WORD $0x6071             // vpcmpeqw    ymm6, ymm0, yword [rcx + 2*rsi + 96]
	LONG $0xf1efcdc5                           // vpxor    ymm6, ymm6, ymm1
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf763c9c5                           // vpacksswb    xmm6, xmm6, xmm7
	LONG $0xf2dbc9c5                           // vpand    xmm6, xmm6, xmm2
	LONG $0x7f7ac1c4; WORD $0x301c             // vmovdqu    oword [r8 + rsi], xmm3
	LONG $0x7f7ac1c4; WORD $0x3064; BYTE $0x10 // vmovdqu    oword [r8 + rsi + 16], xmm4
	LONG $0x7f7ac1c4; WORD $0x306c; BYTE $0x20 // vmovdqu    oword [r8 + rsi + 32], xmm5
	LONG $0x7f7ac1c4; WORD $0x3074; BYTE $0x30 // vmovdqu    oword [r8 + rsi + 48], xmm6
	LONG $0x40c68348                           // add    rsi, 64
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_964
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_966

LBB4_970:
	WORD $0x8945; BYTE $0xd3     // mov    r11d, r10d
	LONG $0xc0e38341             // and    r11d, -64
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0x763d41c4; BYTE $0xc0 // vpcmpeqd    ymm8, ymm8, ymm8
	QUAD $0x000000808d6f79c5     // vmovdqa    xmm9, oword 128[rbp] /* [rip + .LCPI4_19] */

LBB4_971:
	LONG $0x246ffec5; BYTE $0x71               // vmovdqu    ymm4, yword [rcx + 2*rsi]
	LONG $0x6c6ffec5; WORD $0x2071             // vmovdqu    ymm5, yword [rcx + 2*rsi + 32]
	LONG $0x746ffec5; WORD $0x4071             // vmovdqu    ymm6, yword [rcx + 2*rsi + 64]
	LONG $0x7c6ffec5; WORD $0x6071             // vmovdqu    ymm7, yword [rcx + 2*rsi + 96]
	LONG $0xd865ddc5                           // vpcmpgtw    ymm3, ymm4, ymm0
	LONG $0x397de3c4; WORD $0x01d9             // vextracti128    xmm1, ymm3, 1
	LONG $0xd16361c5                           // vpacksswb    xmm10, xmm3, xmm1
	LONG $0xc865d5c5                           // vpcmpgtw    ymm1, ymm5, ymm0
	LONG $0x397de3c4; WORD $0x01ca             // vextracti128    xmm2, ymm1, 1
	LONG $0xda6371c5                           // vpacksswb    xmm11, xmm1, xmm2
	LONG $0xd065cdc5                           // vpcmpgtw    ymm2, ymm6, ymm0
	LONG $0x397de3c4; WORD $0x01d3             // vextracti128    xmm3, ymm2, 1
	LONG $0xd363e9c5                           // vpacksswb    xmm2, xmm2, xmm3
	LONG $0xd865c5c5                           // vpcmpgtw    ymm3, ymm7, ymm0
	LONG $0x397de3c4; WORD $0x01d9             // vextracti128    xmm1, ymm3, 1
	LONG $0xc963e1c5                           // vpacksswb    xmm1, xmm3, xmm1
	LONG $0xd875ddc5                           // vpcmpeqw    ymm3, ymm4, ymm0
	LONG $0xdbefbdc5                           // vpxor    ymm3, ymm8, ymm3
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc63e1c5                           // vpacksswb    xmm3, xmm3, xmm4
	LONG $0xe075d5c5                           // vpcmpeqw    ymm4, ymm5, ymm0
	LONG $0xe4efbdc5                           // vpxor    ymm4, ymm8, ymm4
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe563d9c5                           // vpacksswb    xmm4, xmm4, xmm5
	LONG $0xe875cdc5                           // vpcmpeqw    ymm5, ymm6, ymm0
	LONG $0xedefbdc5                           // vpxor    ymm5, ymm8, ymm5
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee63d1c5                           // vpacksswb    xmm5, xmm5, xmm6
	LONG $0xf075c5c5                           // vpcmpeqw    ymm6, ymm7, ymm0
	LONG $0xf6efbdc5                           // vpxor    ymm6, ymm8, ymm6
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf763c9c5                           // vpacksswb    xmm6, xmm6, xmm7
	LONG $0x4c61c3c4; WORD $0xa0d9             // vpblendvb    xmm3, xmm3, xmm9, xmm10
	LONG $0x4c59c3c4; WORD $0xb0e1             // vpblendvb    xmm4, xmm4, xmm9, xmm11
	LONG $0x4c51c3c4; WORD $0x20d1             // vpblendvb    xmm2, xmm5, xmm9, xmm2
	LONG $0x4c49c3c4; WORD $0x10c9             // vpblendvb    xmm1, xmm6, xmm9, xmm1
	LONG $0x7f7ac1c4; WORD $0x301c             // vmovdqu    oword [r8 + rsi], xmm3
	LONG $0x7f7ac1c4; WORD $0x3064; BYTE $0x10 // vmovdqu    oword [r8 + rsi + 16], xmm4
	LONG $0x7f7ac1c4; WORD $0x3054; BYTE $0x20 // vmovdqu    oword [r8 + rsi + 32], xmm2
	LONG $0x7f7ac1c4; WORD $0x304c; BYTE $0x30 // vmovdqu    oword [r8 + rsi + 48], xmm1
	LONG $0x40c68348                           // add    rsi, 64
	WORD $0x3949; BYTE $0xf3                   // cmp    r11, rsi
	JNE  LBB4_971
	WORD $0x394d; BYTE $0xd3                   // cmp    r11, r10
	JE   LBB4_1351
	JMP  LBB4_973

LBB4_978:
	WORD $0x8945; BYTE $0xd3     // mov    r11d, r10d
	LONG $0xf0e38341             // and    r11d, -16
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0x763541c4; BYTE $0xc9 // vpcmpeqd    ymm9, ymm9, ymm9
	LONG $0x5d6f79c5; BYTE $0x70 // vmovdqa    xmm11, oword 112[rbp] /* [rip + .LCPI4_17] */

LBB4_979:
	LONG $0x146f7ec5; BYTE $0xf1   // vmovdqu    ymm10, yword [rcx + 8*rsi]
	LONG $0x446f7ec5; WORD $0x20f1 // vmovdqu    ymm8, yword [rcx + 8*rsi + 32]
	LONG $0x746ffec5; WORD $0x40f1 // vmovdqu    ymm6, yword [rcx + 8*rsi + 64]
	LONG $0x646ffec5; WORD $0x60f1 // vmovdqu    ymm4, yword [rcx + 8*rsi + 96]
	LONG $0x372de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm10, ymm0
	LONG $0x397de3c4; WORD $0x01cb // vextracti128    xmm3, ymm1, 1
	LONG $0xcb6bf1c5               // vpackssdw    xmm1, xmm1, xmm3
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0xe16371c5               // vpacksswb    xmm12, xmm1, xmm1
	LONG $0x373de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm8, ymm0
	LONG $0x397de3c4; WORD $0x01cd // vextracti128    xmm5, ymm1, 1
	LONG $0xcd6bf1c5               // vpackssdw    xmm1, xmm1, xmm5
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0xe96371c5               // vpacksswb    xmm13, xmm1, xmm1
	LONG $0x374de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm6, ymm0
	LONG $0x397de3c4; WORD $0x01cf // vextracti128    xmm7, ymm1, 1
	LONG $0xcf6bf1c5               // vpackssdw    xmm1, xmm1, xmm7
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0xf963f1c5               // vpacksswb    xmm7, xmm1, xmm1
	LONG $0x375de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm4, ymm0
	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
	LONG $0xca6bf1c5               // vpackssdw    xmm1, xmm1, xmm2
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0xc963f1c5               // vpacksswb    xmm1, xmm1, xmm1
	LONG $0x292de2c4; BYTE $0xd0   // vpcmpeqq    ymm2, ymm10, ymm0
	LONG $0xd2efb5c5               // vpxor    ymm2, ymm9, ymm2
	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
	LONG $0xd36be9c5               // vpackssdw    xmm2, xmm2, xmm3
	LONG $0xd26be9c5               // vpackssdw    xmm2, xmm2, xmm2
	LONG $0xd263e9c5               // vpacksswb    xmm2, xmm2, xmm2
	LONG $0x293de2c4; BYTE $0xd8   // vpcmpeqq    ymm3, ymm8, ymm0
	LONG $0xdbefb5c5               // vpxor    ymm3, ymm9, ymm3
	LONG $0x397de3c4; WORD $0x01dd // vextracti128    xmm5, ymm3, 1
	LONG $0xdd6be1c5               // vpackssdw    xmm3, xmm3, xmm5
	LONG $0xdb6be1c5               // vpackssdw    xmm3, xmm3, xmm3
	LONG $0xdb63e1c5               // vpacksswb    xmm3, xmm3, xmm3
	LONG $0x294de2c4; BYTE $0xe8   // vpcmpeqq    ymm5, ymm6, ymm0
	LONG $0xedefb5c5               // vpxor    ymm5, ymm9, ymm5
	LONG $0x397de3c4; WORD $0x01ee // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5               // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xed6bd1c5               // vpackssdw    xmm5, xmm5, xmm5
	LONG $0xed63d1c5               // vpacksswb    xmm5, xmm5, xmm5
	LONG $0x295de2c4; BYTE $0xe0   // vpcmpeqq    ymm4, ymm4, ymm0
	LONG $0xe4efb5c5               // vpxor    ymm4, ymm9, ymm4
	LONG $0x397de3c4; WORD $0x01e6 // vextracti128    xmm6, ymm4, 1
	LONG $0xe66bd9c5               // vpackssdw    xmm4, xmm4, xmm6
	LONG $0xe46bd9c5               // vpackssdw    xmm4, xmm4, xmm4
	LONG $0xe463d9c5               // vpacksswb    xmm4, xmm4, xmm4
	LONG $0x4c69c3c4; WORD $0xc0d3 // vpblendvb    xmm2, xmm2, xmm11, xmm12
	LONG $0x4c61c3c4; WORD $0xd0db // vpblendvb    xmm3, xmm3, xmm11, xmm13
	LONG $0x4c51c3c4; WORD $0x70eb // vpblendvb    xmm5, xmm5, xmm11, xmm7
	LONG $0xd362e9c5               // vpunpckldq    xmm2, xmm2, xmm3
	LONG $0x4c59c3c4; WORD $0x10cb // vpblendvb    xmm1, xmm4, xmm11, xmm1
	LONG $0xc962d1c5               // vpunpckldq    xmm1, xmm5, xmm1
	LONG $0xc96ce9c5               // vpunpcklqdq    xmm1, xmm2, xmm1
	LONG $0x7f7ac1c4; WORD $0x300c // vmovdqu    oword [r8 + rsi], xmm1
	LONG $0x10c68348               // add    rsi, 16
	WORD $0x3949; BYTE $0xf3       // cmp    r11, rsi
	JNE  LBB4_979
	WORD $0x394d; BYTE $0xd3       // cmp    r11, r10
	JE   LBB4_1351
	JMP  LBB4_981

LBB4_986:
	WORD $0x8944; BYTE $0xd2     // mov    edx, r10d
	WORD $0xe283; BYTE $0xe0     // and    edx, -32
	WORD $0xf631                 // xor    esi, esi
	LONG $0x571841c4; BYTE $0xe4 // vxorps    xmm12, xmm12, xmm12
	LONG $0x761541c4; BYTE $0xed // vpcmpeqd    ymm13, ymm13, ymm13
	LONG $0x756f79c5; BYTE $0x50 // vmovdqa    xmm14, oword 80[rbp] /* [rip + .LCPI4_12] */
	LONG $0x760141c4; BYTE $0xff // vpcmpeqd    xmm15, xmm15, xmm15

LBB4_987:
	LONG $0x0c107cc5; BYTE $0xb1   // vmovups    ymm9, yword [rcx + 4*rsi]
	LONG $0x54107cc5; WORD $0x20b1 // vmovups    ymm10, yword [rcx + 4*rsi + 32]
	LONG $0x5c107cc5; WORD $0x40b1 // vmovups    ymm11, yword [rcx + 4*rsi + 64]
	LONG $0x7c10fcc5; WORD $0x60b1 // vmovups    ymm7, yword [rcx + 4*rsi + 96]
	LONG $0xc234c1c4; WORD $0x00e4 // vcmpeqps    ymm4, ymm9, ymm12
	LONG $0x197de3c4; WORD $0x01e5 // vextractf128    xmm5, ymm4, 1
	LONG $0xe56bd9c5               // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xc46359c5               // vpacksswb    xmm8, xmm4, xmm4
	LONG $0xc22cc1c4; WORD $0x00e4 // vcmpeqps    ymm4, ymm10, ymm12
	LONG $0x197de3c4; WORD $0x01e6 // vextractf128    xmm6, ymm4, 1
	LONG $0xe66bd9c5               // vpackssdw    xmm4, xmm4, xmm6
	LONG $0xf463d9c5               // vpacksswb    xmm6, xmm4, xmm4
	LONG $0xc224c1c4; WORD $0x00e4 // vcmpeqps    ymm4, ymm11, ymm12
	LONG $0x197de3c4; WORD $0x01e0 // vextractf128    xmm0, ymm4, 1
	LONG $0xc06bd9c5               // vpackssdw    xmm0, xmm4, xmm0
	LONG $0xe063f9c5               // vpacksswb    xmm4, xmm0, xmm0
	LONG $0xc7c29cc5; BYTE $0x00   // vcmpeqps    ymm0, ymm12, ymm7
	LONG $0x197de3c4; WORD $0x01c1 // vextractf128    xmm1, ymm0, 1
	LONG $0xc16bf9c5               // vpackssdw    xmm0, xmm0, xmm1
	LONG $0xc063f9c5               // vpacksswb    xmm0, xmm0, xmm0
	LONG $0x6635c1c4; BYTE $0xcd   // vpcmpgtd    ymm1, ymm9, ymm13
	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
	LONG $0xca6bf1c5               // vpackssdw    xmm1, xmm1, xmm2
	LONG $0xc963f1c5               // vpacksswb    xmm1, xmm1, xmm1
	LONG $0x662dc1c4; BYTE $0xd5   // vpcmpgtd    ymm2, ymm10, ymm13
	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
	LONG $0xd36be9c5               // vpackssdw    xmm2, xmm2, xmm3
	LONG $0xd263e9c5               // vpacksswb    xmm2, xmm2, xmm2
	LONG $0x6625c1c4; BYTE $0xdd   // vpcmpgtd    ymm3, ymm11, ymm13
	LONG $0x397de3c4; WORD $0x01dd // vextracti128    xmm5, ymm3, 1
	LONG $0xdd6be1c5               // vpackssdw    xmm3, xmm3, xmm5
	LONG $0xdb63e1c5               // vpacksswb    xmm3, xmm3, xmm3
	LONG $0x6645c1c4; BYTE $0xed   // vpcmpgtd    ymm5, ymm7, ymm13
	LONG $0x397de3c4; WORD $0x01ef // vextracti128    xmm7, ymm5, 1
	LONG $0xef6bd1c5               // vpackssdw    xmm5, xmm5, xmm7
	LONG $0x4c01c3c4; WORD $0x10ce // vpblendvb    xmm1, xmm15, xmm14, xmm1
	LONG $0xed63d1c5               // vpacksswb    xmm5, xmm5, xmm5
	LONG $0xc9dfb9c5               // vpandn    xmm1, xmm8, xmm1
	LONG $0x4c01c3c4; WORD $0x20d6 // vpblendvb    xmm2, xmm15, xmm14, xmm2
	LONG $0x4c01c3c4; WORD $0x30de // vpblendvb    xmm3, xmm15, xmm14, xmm3
	LONG $0x4c01c3c4; WORD $0x50ee // vpblendvb    xmm5, xmm15, xmm14, xmm5
	LONG $0xffefc1c5               // vpxor    xmm7, xmm7, xmm7
	LONG $0x4c69e3c4; WORD $0x60d7 // vpblendvb    xmm2, xmm2, xmm7, xmm6
	LONG $0x4c51e3c4; WORD $0x00c7 // vpblendvb    xmm0, xmm5, xmm7, xmm0
	LONG $0xdbdfd9c5               // vpandn    xmm3, xmm4, xmm3
	LONG $0x3865e3c4; WORD $0x01c0 // vinserti128    ymm0, ymm3, xmm0, 1
	LONG $0x3875e3c4; WORD $0x01ca // vinserti128    ymm1, ymm1, xmm2, 1
	LONG $0xc06cf5c5               // vpunpcklqdq    ymm0, ymm1, ymm0
	LONG $0x00fde3c4; WORD $0xd8c0 // vpermq    ymm0, ymm0, 216
	LONG $0x7f7ec1c4; WORD $0x3004 // vmovdqu    yword [r8 + rsi], ymm0
	LONG $0x20c68348               // add    rsi, 32
	WORD $0x3948; BYTE $0xf2       // cmp    rdx, rsi
	JNE  LBB4_987
	WORD $0x394c; BYTE $0xd2       // cmp    rdx, r10
	JE   LBB4_1351
	JMP  LBB4_989

LBB4_994:
	WORD $0xc289             // mov    edx, eax
	WORD $0xe283; BYTE $0x80 // and    edx, -128
	WORD $0xf631             // xor    esi, esi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	QUAD $0x000000e08d6ffdc5 // vmovdqa    ymm1, yword 224[rbp] /* [rip + .LCPI4_20] */

LBB4_995:
	LONG $0x1474fdc5; BYTE $0x31               // vpcmpeqb    ymm2, ymm0, yword [rcx + rsi]
	LONG $0xd1dfedc5                           // vpandn    ymm2, ymm2, ymm1
	LONG $0x5c74fdc5; WORD $0x2031             // vpcmpeqb    ymm3, ymm0, yword [rcx + rsi + 32]
	LONG $0xd9dfe5c5                           // vpandn    ymm3, ymm3, ymm1
	LONG $0x6474fdc5; WORD $0x4031             // vpcmpeqb    ymm4, ymm0, yword [rcx + rsi + 64]
	LONG $0x6c74fdc5; WORD $0x6031             // vpcmpeqb    ymm5, ymm0, yword [rcx + rsi + 96]
	LONG $0xe1dfddc5                           // vpandn    ymm4, ymm4, ymm1
	LONG $0xe9dfd5c5                           // vpandn    ymm5, ymm5, ymm1
	LONG $0x7f7ec1c4; WORD $0x3014             // vmovdqu    yword [r8 + rsi], ymm2
	LONG $0x7f7ec1c4; WORD $0x305c; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0x306c; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm5
	LONG $0x80ee8348                           // sub    rsi, -128
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_995
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_997

LBB4_1001:
	WORD $0x8945; BYTE $0xd3     // mov    r11d, r10d
	LONG $0xe0e38341             // and    r11d, -32
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0x763541c4; BYTE $0xc9 // vpcmpeqd    ymm9, ymm9, ymm9
	LONG $0x556f79c5; BYTE $0x50 // vmovdqa    xmm10, oword 80[rbp] /* [rip + .LCPI4_12] */

LBB4_1002:
	LONG $0x3c6ffec5; BYTE $0xb1   // vmovdqu    ymm7, yword [rcx + 4*rsi]
	LONG $0x446f7ec5; WORD $0x20b1 // vmovdqu    ymm8, yword [rcx + 4*rsi + 32]
	LONG $0x746ffec5; WORD $0x40b1 // vmovdqu    ymm6, yword [rcx + 4*rsi + 64]
	LONG $0x646ffec5; WORD $0x60b1 // vmovdqu    ymm4, yword [rcx + 4*rsi + 96]
	LONG $0xd866c5c5               // vpcmpgtd    ymm3, ymm7, ymm0
	LONG $0x397de3c4; WORD $0x01dd // vextracti128    xmm5, ymm3, 1
	LONG $0xdd6be1c5               // vpackssdw    xmm3, xmm3, xmm5
	LONG $0xdb6361c5               // vpacksswb    xmm11, xmm3, xmm3
	LONG $0xe866bdc5               // vpcmpgtd    ymm5, ymm8, ymm0
	LONG $0x397de3c4; WORD $0x01e9 // vextracti128    xmm1, ymm5, 1
	LONG $0xc96bd1c5               // vpackssdw    xmm1, xmm5, xmm1
	LONG $0xe16371c5               // vpacksswb    xmm12, xmm1, xmm1
	LONG $0xc866cdc5               // vpcmpgtd    ymm1, ymm6, ymm0
	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
	LONG $0xca6bf1c5               // vpackssdw    xmm1, xmm1, xmm2
	LONG $0xc963f1c5               // vpacksswb    xmm1, xmm1, xmm1
	LONG $0xd066ddc5               // vpcmpgtd    ymm2, ymm4, ymm0
	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
	LONG $0xd36be9c5               // vpackssdw    xmm2, xmm2, xmm3
	LONG $0xd263e9c5               // vpacksswb    xmm2, xmm2, xmm2
	LONG $0xd876c5c5               // vpcmpeqd    ymm3, ymm7, ymm0
	LONG $0xdbefb5c5               // vpxor    ymm3, ymm9, ymm3
	LONG $0x397de3c4; WORD $0x01df // vextracti128    xmm7, ymm3, 1
	LONG $0xdf6be1c5               // vpackssdw    xmm3, xmm3, xmm7
	LONG $0xdb63e1c5               // vpacksswb    xmm3, xmm3, xmm3
	LONG $0xf876bdc5               // vpcmpeqd    ymm7, ymm8, ymm0
	LONG $0xffefb5c5               // vpxor    ymm7, ymm9, ymm7
	LONG $0x397de3c4; WORD $0x01fd // vextracti128    xmm5, ymm7, 1
	LONG $0xed6bc1c5               // vpackssdw    xmm5, xmm7, xmm5
	LONG $0xed63d1c5               // vpacksswb    xmm5, xmm5, xmm5
	LONG $0xf076cdc5               // vpcmpeqd    ymm6, ymm6, ymm0
	LONG $0xf6efb5c5               // vpxor    ymm6, ymm9, ymm6
	LONG $0x397de3c4; WORD $0x01f7 // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5               // vpackssdw    xmm6, xmm6, xmm7
	LONG $0xf663c9c5               // vpacksswb    xmm6, xmm6, xmm6
	LONG $0xe076ddc5               // vpcmpeqd    ymm4, ymm4, ymm0
	LONG $0xe4efb5c5               // vpxor    ymm4, ymm9, ymm4
	LONG $0x397de3c4; WORD $0x01e7 // vextracti128    xmm7, ymm4, 1
	LONG $0xe76bd9c5               // vpackssdw    xmm4, xmm4, xmm7
	LONG $0xe463d9c5               // vpacksswb    xmm4, xmm4, xmm4
	LONG $0x4c61c3c4; WORD $0xb0da // vpblendvb    xmm3, xmm3, xmm10, xmm11
	LONG $0x4c51c3c4; WORD $0xc0ea // vpblendvb    xmm5, xmm5, xmm10, xmm12
	LONG $0x4c49c3c4; WORD $0x10ca // vpblendvb    xmm1, xmm6, xmm10, xmm1
	LONG $0x4c59c3c4; WORD $0x20d2 // vpblendvb    xmm2, xmm4, xmm10, xmm2
	LONG $0x3875e3c4; WORD $0x01ca // vinserti128    ymm1, ymm1, xmm2, 1
	LONG $0x3865e3c4; WORD $0x01d5 // vinserti128    ymm2, ymm3, xmm5, 1
	LONG $0xc96cedc5               // vpunpcklqdq    ymm1, ymm2, ymm1
	LONG $0x00fde3c4; WORD $0xd8c9 // vpermq    ymm1, ymm1, 216
	LONG $0x7f7ec1c4; WORD $0x300c // vmovdqu    yword [r8 + rsi], ymm1
	LONG $0x20c68348               // add    rsi, 32
	WORD $0x3949; BYTE $0xf3       // cmp    r11, rsi
	JNE  LBB4_1002
	WORD $0x394d; BYTE $0xd3       // cmp    r11, r10
	JE   LBB4_1351
	JMP  LBB4_1004

LBB4_1009:
	WORD $0x8944; BYTE $0xda       // mov    edx, r11d
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0   // vpcmpeqd    xmm8, xmm8, xmm8
	LONG $0x197de2c4; WORD $0x2855 // vbroadcastsd    ymm2, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_1010:
	LONG $0x1c6ef9c5; BYTE $0x31               // vmovd    xmm3, dword [rcx + rsi]
	LONG $0x646ef9c5; WORD $0x0431             // vmovd    xmm4, dword [rcx + rsi + 4]
	LONG $0x6c6ef9c5; WORD $0x0831             // vmovd    xmm5, dword [rcx + rsi + 8]
	LONG $0x746ef9c5; WORD $0x0c31             // vmovd    xmm6, dword [rcx + rsi + 12]
	LONG $0xf864e1c5                           // vpcmpgtb    xmm7, xmm3, xmm0
	LONG $0x227d62c4; BYTE $0xcf               // vpmovsxbq    ymm9, xmm7
	LONG $0xc864d9c5                           // vpcmpgtb    xmm1, xmm4, xmm0
	LONG $0x227d62c4; BYTE $0xd1               // vpmovsxbq    ymm10, xmm1
	LONG $0xf864d1c5                           // vpcmpgtb    xmm7, xmm5, xmm0
	LONG $0x227de2c4; BYTE $0xff               // vpmovsxbq    ymm7, xmm7
	LONG $0xc864c9c5                           // vpcmpgtb    xmm1, xmm6, xmm0
	LONG $0x227de2c4; BYTE $0xc9               // vpmovsxbq    ymm1, xmm1
	LONG $0xd874e1c5                           // vpcmpeqb    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x227de2c4; BYTE $0xdb               // vpmovsxbq    ymm3, xmm3
	LONG $0xe074d9c5                           // vpcmpeqb    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x227de2c4; BYTE $0xe4               // vpmovsxbq    ymm4, xmm4
	LONG $0xe874d1c5                           // vpcmpeqb    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x227de2c4; BYTE $0xed               // vpmovsxbq    ymm5, xmm5
	LONG $0xf074c9c5                           // vpcmpeqb    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x227de2c4; BYTE $0xf6               // vpmovsxbq    ymm6, xmm6
	LONG $0x4b65e3c4; WORD $0x90da             // vblendvpd    ymm3, ymm3, ymm2, ymm9
	LONG $0x4b5de3c4; WORD $0xa0e2             // vblendvpd    ymm4, ymm4, ymm2, ymm10
	LONG $0x4b55e3c4; WORD $0x70ea             // vblendvpd    ymm5, ymm5, ymm2, ymm7
	LONG $0x4b4de3c4; WORD $0x10ca             // vblendvpd    ymm1, ymm6, ymm2, ymm1
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x117dc1c4; WORD $0xf04c; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm1
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1010
	WORD $0x394c; BYTE $0xda                   // cmp    rdx, r11
	JE   LBB4_1351
	JMP  LBB4_1012

LBB4_1017:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x597de2c4; WORD $0x284d // vpbroadcastq    ymm1, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_1018:
	LONG $0x297de2c4; WORD $0xf114             // vpcmpeqq    ymm2, ymm0, yword [rcx + 8*rsi]
	LONG $0xd1dfedc5                           // vpandn    ymm2, ymm2, ymm1
	LONG $0x297de2c4; WORD $0xf15c; BYTE $0x20 // vpcmpeqq    ymm3, ymm0, yword [rcx + 8*rsi + 32]
	LONG $0xd9dfe5c5                           // vpandn    ymm3, ymm3, ymm1
	LONG $0x297de2c4; WORD $0xf164; BYTE $0x40 // vpcmpeqq    ymm4, ymm0, yword [rcx + 8*rsi + 64]
	LONG $0x297de2c4; WORD $0xf16c; BYTE $0x60 // vpcmpeqq    ymm5, ymm0, yword [rcx + 8*rsi + 96]
	LONG $0xe1dfddc5                           // vpandn    ymm4, ymm4, ymm1
	LONG $0xe9dfd5c5                           // vpandn    ymm5, ymm5, ymm1
	LONG $0x7f7ec1c4; WORD $0xf014             // vmovdqu    yword [r8 + 8*rsi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf05c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rsi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf064; BYTE $0x40 // vmovdqu    yword [r8 + 8*rsi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf06c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rsi + 96], ymm5
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1018
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1020

LBB4_1024:
	WORD $0x8944; BYTE $0xda       // mov    edx, r11d
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5               // vpcmpeqd    ymm1, ymm1, ymm1
	LONG $0x597de2c4; WORD $0x2855 // vpbroadcastq    ymm2, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_1025:
	LONG $0x1c6ffec5; BYTE $0xf1               // vmovdqu    ymm3, yword [rcx + 8*rsi]
	LONG $0x646ffec5; WORD $0x20f1             // vmovdqu    ymm4, yword [rcx + 8*rsi + 32]
	LONG $0x6c6ffec5; WORD $0x40f1             // vmovdqu    ymm5, yword [rcx + 8*rsi + 64]
	LONG $0x746ffec5; WORD $0x60f1             // vmovdqu    ymm6, yword [rcx + 8*rsi + 96]
	LONG $0x2965e2c4; BYTE $0xf8               // vpcmpeqq    ymm7, ymm3, ymm0
	LONG $0xf9efc5c5                           // vpxor    ymm7, ymm7, ymm1
	LONG $0x295d62c4; BYTE $0xc0               // vpcmpeqq    ymm8, ymm4, ymm0
	LONG $0xc1ef3dc5                           // vpxor    ymm8, ymm8, ymm1
	LONG $0x295562c4; BYTE $0xc8               // vpcmpeqq    ymm9, ymm5, ymm0
	LONG $0xc9ef35c5                           // vpxor    ymm9, ymm9, ymm1
	LONG $0x294d62c4; BYTE $0xd0               // vpcmpeqq    ymm10, ymm6, ymm0
	LONG $0xd1ef2dc5                           // vpxor    ymm10, ymm10, ymm1
	LONG $0x376de2c4; BYTE $0xdb               // vpcmpgtq    ymm3, ymm2, ymm3
	LONG $0x376de2c4; BYTE $0xe4               // vpcmpgtq    ymm4, ymm2, ymm4
	LONG $0x376de2c4; BYTE $0xed               // vpcmpgtq    ymm5, ymm2, ymm5
	LONG $0x376de2c4; BYTE $0xf6               // vpcmpgtq    ymm6, ymm2, ymm6
	LONG $0x4b6de3c4; WORD $0x30df             // vblendvpd    ymm3, ymm2, ymm7, ymm3
	LONG $0x4b6dc3c4; WORD $0x40e0             // vblendvpd    ymm4, ymm2, ymm8, ymm4
	LONG $0x4b6dc3c4; WORD $0x50e9             // vblendvpd    ymm5, ymm2, ymm9, ymm5
	LONG $0x4b6dc3c4; WORD $0x60f2             // vblendvpd    ymm6, ymm2, ymm10, ymm6
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x117dc1c4; WORD $0xf074; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm6
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1025
	WORD $0x394c; BYTE $0xda                   // cmp    rdx, r11
	JE   LBB4_1351
	JMP  LBB4_1027

LBB4_1032:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5               // vpcmpeqd    xmm1, xmm1, xmm1
	LONG $0x597de2c4; WORD $0x2855 // vpbroadcastq    ymm2, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_1033:
	LONG $0x1c6ef9c5; BYTE $0x31               // vmovd    xmm3, dword [rcx + rsi]
	LONG $0x646ef9c5; WORD $0x0431             // vmovd    xmm4, dword [rcx + rsi + 4]
	LONG $0x6c6ef9c5; WORD $0x0831             // vmovd    xmm5, dword [rcx + rsi + 8]
	LONG $0x746ef9c5; WORD $0x0c31             // vmovd    xmm6, dword [rcx + rsi + 12]
	LONG $0xd874e1c5                           // vpcmpeqb    xmm3, xmm3, xmm0
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x327de2c4; BYTE $0xdb               // vpmovzxbq    ymm3, xmm3
	LONG $0xdadbe5c5                           // vpand    ymm3, ymm3, ymm2
	LONG $0xe074d9c5                           // vpcmpeqb    xmm4, xmm4, xmm0
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x327de2c4; BYTE $0xe4               // vpmovzxbq    ymm4, xmm4
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe874d1c5                           // vpcmpeqb    xmm5, xmm5, xmm0
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x327de2c4; BYTE $0xed               // vpmovzxbq    ymm5, xmm5
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xf074c9c5                           // vpcmpeqb    xmm6, xmm6, xmm0
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x327de2c4; BYTE $0xf6               // vpmovzxbq    ymm6, xmm6
	LONG $0xf2dbcdc5                           // vpand    ymm6, ymm6, ymm2
	LONG $0x7f7ec1c4; WORD $0xf01c             // vmovdqu    yword [r8 + 8*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0xf064; BYTE $0x20 // vmovdqu    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x7f7ec1c4; WORD $0xf06c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x7f7ec1c4; WORD $0xf074; BYTE $0x60 // vmovdqu    yword [r8 + 8*rsi + 96], ymm6
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1033
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1035

LBB4_1039:
	WORD $0x8944; BYTE $0xda     // mov    edx, r11d
	WORD $0xe283; BYTE $0xc0     // and    edx, -64
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0 // vpcmpeqd    xmm8, xmm8, xmm8
	QUAD $0x000000c08d6f7dc5     // vmovdqa    ymm9, yword 192[rbp] /* [rip + .LCPI4_18] */

LBB4_1040:
	LONG $0x1c6ffac5; BYTE $0x31               // vmovdqu    xmm3, oword [rcx + rsi]
	LONG $0x646ffac5; WORD $0x1031             // vmovdqu    xmm4, oword [rcx + rsi + 16]
	LONG $0x6c6ffac5; WORD $0x2031             // vmovdqu    xmm5, oword [rcx + rsi + 32]
	LONG $0x746ffac5; WORD $0x3031             // vmovdqu    xmm6, oword [rcx + rsi + 48]
	LONG $0xf864e1c5                           // vpcmpgtb    xmm7, xmm3, xmm0
	LONG $0x207d62c4; BYTE $0xd7               // vpmovsxbw    ymm10, xmm7
	LONG $0xc864d9c5                           // vpcmpgtb    xmm1, xmm4, xmm0
	LONG $0x207de2c4; BYTE $0xc9               // vpmovsxbw    ymm1, xmm1
	LONG $0xd064d1c5                           // vpcmpgtb    xmm2, xmm5, xmm0
	LONG $0x207de2c4; BYTE $0xd2               // vpmovsxbw    ymm2, xmm2
	LONG $0xf864c9c5                           // vpcmpgtb    xmm7, xmm6, xmm0
	LONG $0x207de2c4; BYTE $0xff               // vpmovsxbw    ymm7, xmm7
	LONG $0xd874e1c5                           // vpcmpeqb    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x207de2c4; BYTE $0xdb               // vpmovsxbw    ymm3, xmm3
	LONG $0xe074d9c5                           // vpcmpeqb    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x207de2c4; BYTE $0xe4               // vpmovsxbw    ymm4, xmm4
	LONG $0xe874d1c5                           // vpcmpeqb    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x207de2c4; BYTE $0xed               // vpmovsxbw    ymm5, xmm5
	LONG $0xf074c9c5                           // vpcmpeqb    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x207de2c4; BYTE $0xf6               // vpmovsxbw    ymm6, xmm6
	LONG $0x4c65c3c4; WORD $0xa0d9             // vpblendvb    ymm3, ymm3, ymm9, ymm10
	LONG $0x4c5dc3c4; WORD $0x10c9             // vpblendvb    ymm1, ymm4, ymm9, ymm1
	LONG $0x4c55c3c4; WORD $0x20d1             // vpblendvb    ymm2, ymm5, ymm9, ymm2
	LONG $0x4c4dc3c4; WORD $0x70e1             // vpblendvb    ymm4, ymm6, ymm9, ymm7
	LONG $0x7f7ec1c4; WORD $0x701c             // vmovdqu    yword [r8 + 2*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0x704c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rsi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x7054; BYTE $0x40 // vmovdqu    yword [r8 + 2*rsi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x7064; BYTE $0x60 // vmovdqu    yword [r8 + 2*rsi + 96], ymm4
	LONG $0x40c68348                           // add    rsi, 64
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1040
	WORD $0x394c; BYTE $0xda                   // cmp    rdx, r11
	JE   LBB4_1351
	JMP  LBB4_1042

LBB4_1047:
	WORD $0x8944; BYTE $0xda     // mov    edx, r11d
	WORD $0xe283; BYTE $0xc0     // and    edx, -64
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0 // vpcmpeqd    xmm8, xmm8, xmm8
	QUAD $0x000000c08d6f7dc5     // vmovdqa    ymm9, yword 192[rbp] /* [rip + .LCPI4_18] */

LBB4_1048:
	LONG $0x1c6ffac5; BYTE $0x31               // vmovdqu    xmm3, oword [rcx + rsi]
	LONG $0x646ffac5; WORD $0x1031             // vmovdqu    xmm4, oword [rcx + rsi + 16]
	LONG $0x6c6ffac5; WORD $0x2031             // vmovdqu    xmm5, oword [rcx + rsi + 32]
	LONG $0x746ffac5; WORD $0x3031             // vmovdqu    xmm6, oword [rcx + rsi + 48]
	LONG $0xf864e1c5                           // vpcmpgtb    xmm7, xmm3, xmm0
	LONG $0x207d62c4; BYTE $0xd7               // vpmovsxbw    ymm10, xmm7
	LONG $0xc864d9c5                           // vpcmpgtb    xmm1, xmm4, xmm0
	LONG $0x207de2c4; BYTE $0xc9               // vpmovsxbw    ymm1, xmm1
	LONG $0xd064d1c5                           // vpcmpgtb    xmm2, xmm5, xmm0
	LONG $0x207de2c4; BYTE $0xd2               // vpmovsxbw    ymm2, xmm2
	LONG $0xf864c9c5                           // vpcmpgtb    xmm7, xmm6, xmm0
	LONG $0x207de2c4; BYTE $0xff               // vpmovsxbw    ymm7, xmm7
	LONG $0xd874e1c5                           // vpcmpeqb    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x207de2c4; BYTE $0xdb               // vpmovsxbw    ymm3, xmm3
	LONG $0xe074d9c5                           // vpcmpeqb    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x207de2c4; BYTE $0xe4               // vpmovsxbw    ymm4, xmm4
	LONG $0xe874d1c5                           // vpcmpeqb    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x207de2c4; BYTE $0xed               // vpmovsxbw    ymm5, xmm5
	LONG $0xf074c9c5                           // vpcmpeqb    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x207de2c4; BYTE $0xf6               // vpmovsxbw    ymm6, xmm6
	LONG $0x4c65c3c4; WORD $0xa0d9             // vpblendvb    ymm3, ymm3, ymm9, ymm10
	LONG $0x4c5dc3c4; WORD $0x10c9             // vpblendvb    ymm1, ymm4, ymm9, ymm1
	LONG $0x4c55c3c4; WORD $0x20d1             // vpblendvb    ymm2, ymm5, ymm9, ymm2
	LONG $0x4c4dc3c4; WORD $0x70e1             // vpblendvb    ymm4, ymm6, ymm9, ymm7
	LONG $0x7f7ec1c4; WORD $0x701c             // vmovdqu    yword [r8 + 2*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0x704c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rsi + 32], ymm1
	LONG $0x7f7ec1c4; WORD $0x7054; BYTE $0x40 // vmovdqu    yword [r8 + 2*rsi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x7064; BYTE $0x60 // vmovdqu    yword [r8 + 2*rsi + 96], ymm4
	LONG $0x40c68348                           // add    rsi, 64
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1048
	WORD $0x394c; BYTE $0xda                   // cmp    rdx, r11
	JE   LBB4_1351
	JMP  LBB4_1050

LBB4_1055:
	WORD $0xc289             // mov    edx, eax
	WORD $0xe283; BYTE $0xe0 // and    edx, -32
	LONG $0xe0728d48         // lea    rsi, [rdx - 32]
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	LONG $0x05e9c149         // shr    r9, 5
	LONG $0x01c18349         // add    r9, 1
	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
	JE   LBB4_1317
	WORD $0x894c; BYTE $0xce // mov    rsi, r9
	LONG $0xfee68348         // and    rsi, -2
	WORD $0xf748; BYTE $0xde // neg    rsi
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	QUAD $0x000000c08d6ffdc5 // vmovdqa    ymm1, yword 192[rbp] /* [rip + .LCPI4_18] */

LBB4_1057:
	LONG $0x1475fdc5; BYTE $0x79               // vpcmpeqw    ymm2, ymm0, yword [rcx + 2*rdi]
	LONG $0xd1dfedc5                           // vpandn    ymm2, ymm2, ymm1
	LONG $0x5c75fdc5; WORD $0x2079             // vpcmpeqw    ymm3, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0xd9dfe5c5                           // vpandn    ymm3, ymm3, ymm1
	LONG $0x7f7ec1c4; WORD $0x7814             // vmovdqu    yword [r8 + 2*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm3
	LONG $0x5475fdc5; WORD $0x4079             // vpcmpeqw    ymm2, ymm0, yword [rcx + 2*rdi + 64]
	LONG $0xd1dfedc5                           // vpandn    ymm2, ymm2, ymm1
	LONG $0x5c75fdc5; WORD $0x6079             // vpcmpeqw    ymm3, ymm0, yword [rcx + 2*rdi + 96]
	LONG $0xd9dfe5c5                           // vpandn    ymm3, ymm3, ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm3
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB4_1057
	JMP  LBB4_1318

LBB4_1058:
	WORD $0xc289             // mov    edx, eax
	WORD $0xe283; BYTE $0xe0 // and    edx, -32
	LONG $0xe0728d48         // lea    rsi, [rdx - 32]
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	LONG $0x05e9c149         // shr    r9, 5
	LONG $0x01c18349         // add    r9, 1
	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
	JE   LBB4_1325
	WORD $0x894c; BYTE $0xce // mov    rsi, r9
	LONG $0xfee68348         // and    rsi, -2
	WORD $0xf748; BYTE $0xde // neg    rsi
	WORD $0xff31             // xor    edi, edi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	QUAD $0x000000c08d6ffdc5 // vmovdqa    ymm1, yword 192[rbp] /* [rip + .LCPI4_18] */

LBB4_1060:
	LONG $0x1475fdc5; BYTE $0x79               // vpcmpeqw    ymm2, ymm0, yword [rcx + 2*rdi]
	LONG $0xd1dfedc5                           // vpandn    ymm2, ymm2, ymm1
	LONG $0x5c75fdc5; WORD $0x2079             // vpcmpeqw    ymm3, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0xd9dfe5c5                           // vpandn    ymm3, ymm3, ymm1
	LONG $0x7f7ec1c4; WORD $0x7814             // vmovdqu    yword [r8 + 2*rdi], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm3
	LONG $0x5475fdc5; WORD $0x4079             // vpcmpeqw    ymm2, ymm0, yword [rcx + 2*rdi + 64]
	LONG $0xd1dfedc5                           // vpandn    ymm2, ymm2, ymm1
	LONG $0x5c75fdc5; WORD $0x6079             // vpcmpeqw    ymm3, ymm0, yword [rcx + 2*rdi + 96]
	LONG $0xd9dfe5c5                           // vpandn    ymm3, ymm3, ymm1
	LONG $0x7f7ec1c4; WORD $0x7854; BYTE $0x40 // vmovdqu    yword [r8 + 2*rdi + 64], ymm2
	LONG $0x7f7ec1c4; WORD $0x785c; BYTE $0x60 // vmovdqu    yword [r8 + 2*rdi + 96], ymm3
	LONG $0x40c78348                           // add    rdi, 64
	LONG $0x02c68348                           // add    rsi, 2
	JNE  LBB4_1060
	JMP  LBB4_1326

LBB4_1061:
	WORD $0x8944; BYTE $0xda // mov    edx, r11d
	WORD $0xe283; BYTE $0xe0 // and    edx, -32
	LONG $0xe0728d48         // lea    rsi, [rdx - 32]
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	LONG $0x05e9c149         // shr    r9, 5
	LONG $0x01c18349         // add    r9, 1
	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
	JE   LBB4_1333
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0xfee78348         // and    rdi, -2
	WORD $0xf748; BYTE $0xdf // neg    rdi
	WORD $0xf631             // xor    esi, esi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
	QUAD $0x000000c0956ffdc5 // vmovdqa    ymm2, yword 192[rbp] /* [rip + .LCPI4_18] */

LBB4_1063:
	LONG $0x1c6ffec5; BYTE $0x71               // vmovdqu    ymm3, yword [rcx + 2*rsi]
	LONG $0x646ffec5; WORD $0x2071             // vmovdqu    ymm4, yword [rcx + 2*rsi + 32]
	LONG $0xe875e5c5                           // vpcmpeqw    ymm5, ymm3, ymm0
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0xf075ddc5                           // vpcmpeqw    ymm6, ymm4, ymm0
	LONG $0xf1efcdc5                           // vpxor    ymm6, ymm6, ymm1
	LONG $0xdb65edc5                           // vpcmpgtw    ymm3, ymm2, ymm3
	LONG $0xe465edc5                           // vpcmpgtw    ymm4, ymm2, ymm4
	LONG $0x4c6de3c4; WORD $0x30dd             // vpblendvb    ymm3, ymm2, ymm5, ymm3
	LONG $0x4c6de3c4; WORD $0x40e6             // vpblendvb    ymm4, ymm2, ymm6, ymm4
	LONG $0x7f7ec1c4; WORD $0x701c             // vmovdqu    yword [r8 + 2*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0x7064; BYTE $0x20 // vmovdqu    yword [r8 + 2*rsi + 32], ymm4
	LONG $0x5c6ffec5; WORD $0x4071             // vmovdqu    ymm3, yword [rcx + 2*rsi + 64]
	LONG $0x646ffec5; WORD $0x6071             // vmovdqu    ymm4, yword [rcx + 2*rsi + 96]
	LONG $0xe875e5c5                           // vpcmpeqw    ymm5, ymm3, ymm0
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0xf075ddc5                           // vpcmpeqw    ymm6, ymm4, ymm0
	LONG $0xf1efcdc5                           // vpxor    ymm6, ymm6, ymm1
	LONG $0xdb65edc5                           // vpcmpgtw    ymm3, ymm2, ymm3
	LONG $0xe465edc5                           // vpcmpgtw    ymm4, ymm2, ymm4
	LONG $0x4c6de3c4; WORD $0x30dd             // vpblendvb    ymm3, ymm2, ymm5, ymm3
	LONG $0x4c6de3c4; WORD $0x40e6             // vpblendvb    ymm4, ymm2, ymm6, ymm4
	LONG $0x7f7ec1c4; WORD $0x705c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rsi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x7064; BYTE $0x60 // vmovdqu    yword [r8 + 2*rsi + 96], ymm4
	LONG $0x40c68348                           // add    rsi, 64
	LONG $0x02c78348                           // add    rdi, 2
	JNE  LBB4_1063
	JMP  LBB4_1334

LBB4_1064:
	WORD $0x8944; BYTE $0xda // mov    edx, r11d
	WORD $0xe283; BYTE $0xe0 // and    edx, -32
	LONG $0xe0728d48         // lea    rsi, [rdx - 32]
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	LONG $0x05e9c149         // shr    r9, 5
	LONG $0x01c18349         // add    r9, 1
	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
	JE   LBB4_1342
	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
	LONG $0xfee78348         // and    rdi, -2
	WORD $0xf748; BYTE $0xdf // neg    rdi
	WORD $0xf631             // xor    esi, esi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
	QUAD $0x000000c0956ffdc5 // vmovdqa    ymm2, yword 192[rbp] /* [rip + .LCPI4_18] */

LBB4_1066:
	LONG $0x1c6ffec5; BYTE $0x71               // vmovdqu    ymm3, yword [rcx + 2*rsi]
	LONG $0x646ffec5; WORD $0x2071             // vmovdqu    ymm4, yword [rcx + 2*rsi + 32]
	LONG $0xe875e5c5                           // vpcmpeqw    ymm5, ymm3, ymm0
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0xf075ddc5                           // vpcmpeqw    ymm6, ymm4, ymm0
	LONG $0xf1efcdc5                           // vpxor    ymm6, ymm6, ymm1
	LONG $0xdb65edc5                           // vpcmpgtw    ymm3, ymm2, ymm3
	LONG $0xe465edc5                           // vpcmpgtw    ymm4, ymm2, ymm4
	LONG $0x4c6de3c4; WORD $0x30dd             // vpblendvb    ymm3, ymm2, ymm5, ymm3
	LONG $0x4c6de3c4; WORD $0x40e6             // vpblendvb    ymm4, ymm2, ymm6, ymm4
	LONG $0x7f7ec1c4; WORD $0x701c             // vmovdqu    yword [r8 + 2*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0x7064; BYTE $0x20 // vmovdqu    yword [r8 + 2*rsi + 32], ymm4
	LONG $0x5c6ffec5; WORD $0x4071             // vmovdqu    ymm3, yword [rcx + 2*rsi + 64]
	LONG $0x646ffec5; WORD $0x6071             // vmovdqu    ymm4, yword [rcx + 2*rsi + 96]
	LONG $0xe875e5c5                           // vpcmpeqw    ymm5, ymm3, ymm0
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0xf075ddc5                           // vpcmpeqw    ymm6, ymm4, ymm0
	LONG $0xf1efcdc5                           // vpxor    ymm6, ymm6, ymm1
	LONG $0xdb65edc5                           // vpcmpgtw    ymm3, ymm2, ymm3
	LONG $0xe465edc5                           // vpcmpgtw    ymm4, ymm2, ymm4
	LONG $0x4c6de3c4; WORD $0x30dd             // vpblendvb    ymm3, ymm2, ymm5, ymm3
	LONG $0x4c6de3c4; WORD $0x40e6             // vpblendvb    ymm4, ymm2, ymm6, ymm4
	LONG $0x7f7ec1c4; WORD $0x705c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rsi + 64], ymm3
	LONG $0x7f7ec1c4; WORD $0x7064; BYTE $0x60 // vmovdqu    yword [r8 + 2*rsi + 96], ymm4
	LONG $0x40c68348                           // add    rsi, 64
	LONG $0x02c78348                           // add    rdi, 2
	JNE  LBB4_1066
	JMP  LBB4_1343

LBB4_1067:
	WORD $0xc289             // mov    edx, eax
	WORD $0xe283; BYTE $0xc0 // and    edx, -64
	WORD $0xf631             // xor    esi, esi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5         // vpcmpeqd    xmm1, xmm1, xmm1
	QUAD $0x000000c0956ffdc5 // vmovdqa    ymm2, yword 192[rbp] /* [rip + .LCPI4_18] */

LBB4_1068:
	LONG $0x1c74f9c5; BYTE $0x31               // vpcmpeqb    xmm3, xmm0, oword [rcx + rsi]
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x307de2c4; BYTE $0xdb               // vpmovzxbw    ymm3, xmm3
	LONG $0x6474f9c5; WORD $0x1031             // vpcmpeqb    xmm4, xmm0, oword [rcx + rsi + 16]
	LONG $0xdadbe5c5                           // vpand    ymm3, ymm3, ymm2
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x307de2c4; BYTE $0xe4               // vpmovzxbw    ymm4, xmm4
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0x6c74f9c5; WORD $0x2031             // vpcmpeqb    xmm5, xmm0, oword [rcx + rsi + 32]
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x307de2c4; BYTE $0xed               // vpmovzxbw    ymm5, xmm5
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0x7474f9c5; WORD $0x3031             // vpcmpeqb    xmm6, xmm0, oword [rcx + rsi + 48]
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x307de2c4; BYTE $0xf6               // vpmovzxbw    ymm6, xmm6
	LONG $0xf2dbcdc5                           // vpand    ymm6, ymm6, ymm2
	LONG $0x7f7ec1c4; WORD $0x701c             // vmovdqu    yword [r8 + 2*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0x7064; BYTE $0x20 // vmovdqu    yword [r8 + 2*rsi + 32], ymm4
	LONG $0x7f7ec1c4; WORD $0x706c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rsi + 64], ymm5
	LONG $0x7f7ec1c4; WORD $0x7074; BYTE $0x60 // vmovdqu    yword [r8 + 2*rsi + 96], ymm6
	LONG $0x40c68348                           // add    rsi, 64
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1068
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1070

LBB4_1074:
	WORD $0xc289             // mov    edx, eax
	WORD $0xe283; BYTE $0xc0 // and    edx, -64
	WORD $0xf631             // xor    esi, esi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5         // vpcmpeqd    xmm1, xmm1, xmm1
	QUAD $0x000000c0956ffdc5 // vmovdqa    ymm2, yword 192[rbp] /* [rip + .LCPI4_18] */

LBB4_1075:
	LONG $0x1c74f9c5; BYTE $0x31               // vpcmpeqb    xmm3, xmm0, oword [rcx + rsi]
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x307de2c4; BYTE $0xdb               // vpmovzxbw    ymm3, xmm3
	LONG $0x6474f9c5; WORD $0x1031             // vpcmpeqb    xmm4, xmm0, oword [rcx + rsi + 16]
	LONG $0xdadbe5c5                           // vpand    ymm3, ymm3, ymm2
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x307de2c4; BYTE $0xe4               // vpmovzxbw    ymm4, xmm4
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0x6c74f9c5; WORD $0x2031             // vpcmpeqb    xmm5, xmm0, oword [rcx + rsi + 32]
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x307de2c4; BYTE $0xed               // vpmovzxbw    ymm5, xmm5
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0x7474f9c5; WORD $0x3031             // vpcmpeqb    xmm6, xmm0, oword [rcx + rsi + 48]
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x307de2c4; BYTE $0xf6               // vpmovzxbw    ymm6, xmm6
	LONG $0xf2dbcdc5                           // vpand    ymm6, ymm6, ymm2
	LONG $0x7f7ec1c4; WORD $0x701c             // vmovdqu    yword [r8 + 2*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0x7064; BYTE $0x20 // vmovdqu    yword [r8 + 2*rsi + 32], ymm4
	LONG $0x7f7ec1c4; WORD $0x706c; BYTE $0x40 // vmovdqu    yword [r8 + 2*rsi + 64], ymm5
	LONG $0x7f7ec1c4; WORD $0x7074; BYTE $0x60 // vmovdqu    yword [r8 + 2*rsi + 96], ymm6
	LONG $0x40c68348                           // add    rsi, 64
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1075
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1077

LBB4_1081:
	WORD $0x8944; BYTE $0xda       // mov    edx, r11d
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0   // vpcmpeqd    xmm8, xmm8, xmm8
	LONG $0x197de2c4; WORD $0x2855 // vbroadcastsd    ymm2, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_1082:
	LONG $0x1c6ef9c5; BYTE $0x31               // vmovd    xmm3, dword [rcx + rsi]
	LONG $0x646ef9c5; WORD $0x0431             // vmovd    xmm4, dword [rcx + rsi + 4]
	LONG $0x6c6ef9c5; WORD $0x0831             // vmovd    xmm5, dword [rcx + rsi + 8]
	LONG $0x746ef9c5; WORD $0x0c31             // vmovd    xmm6, dword [rcx + rsi + 12]
	LONG $0xf864e1c5                           // vpcmpgtb    xmm7, xmm3, xmm0
	LONG $0x227d62c4; BYTE $0xcf               // vpmovsxbq    ymm9, xmm7
	LONG $0xc864d9c5                           // vpcmpgtb    xmm1, xmm4, xmm0
	LONG $0x227d62c4; BYTE $0xd1               // vpmovsxbq    ymm10, xmm1
	LONG $0xf864d1c5                           // vpcmpgtb    xmm7, xmm5, xmm0
	LONG $0x227de2c4; BYTE $0xff               // vpmovsxbq    ymm7, xmm7
	LONG $0xc864c9c5                           // vpcmpgtb    xmm1, xmm6, xmm0
	LONG $0x227de2c4; BYTE $0xc9               // vpmovsxbq    ymm1, xmm1
	LONG $0xd874e1c5                           // vpcmpeqb    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x227de2c4; BYTE $0xdb               // vpmovsxbq    ymm3, xmm3
	LONG $0xe074d9c5                           // vpcmpeqb    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x227de2c4; BYTE $0xe4               // vpmovsxbq    ymm4, xmm4
	LONG $0xe874d1c5                           // vpcmpeqb    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x227de2c4; BYTE $0xed               // vpmovsxbq    ymm5, xmm5
	LONG $0xf074c9c5                           // vpcmpeqb    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x227de2c4; BYTE $0xf6               // vpmovsxbq    ymm6, xmm6
	LONG $0x4b65e3c4; WORD $0x90da             // vblendvpd    ymm3, ymm3, ymm2, ymm9
	LONG $0x4b5de3c4; WORD $0xa0e2             // vblendvpd    ymm4, ymm4, ymm2, ymm10
	LONG $0x4b55e3c4; WORD $0x70ea             // vblendvpd    ymm5, ymm5, ymm2, ymm7
	LONG $0x4b4de3c4; WORD $0x10ca             // vblendvpd    ymm1, ymm6, ymm2, ymm1
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x117dc1c4; WORD $0xf04c; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm1
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1082
	WORD $0x394c; BYTE $0xda                   // cmp    rdx, r11
	JE   LBB4_1351
	JMP  LBB4_1084

LBB4_1089:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0         // vpcmpeqd    xmm8, xmm8, xmm8
	QUAD $0x00009895187de2c4; BYTE $0x00 // vbroadcastss    ymm2, dword 152[rbp] /* [rip + .LCPI4_5] */

LBB4_1090:
	LONG $0x1c7efac5; BYTE $0x31               // vmovq    xmm3, qword [rcx + rsi]
	LONG $0x647efac5; WORD $0x0831             // vmovq    xmm4, qword [rcx + rsi + 8]
	LONG $0x6c7efac5; WORD $0x1031             // vmovq    xmm5, qword [rcx + rsi + 16]
	LONG $0x747efac5; WORD $0x1831             // vmovq    xmm6, qword [rcx + rsi + 24]
	LONG $0xf864e1c5                           // vpcmpgtb    xmm7, xmm3, xmm0
	LONG $0x217d62c4; BYTE $0xcf               // vpmovsxbd    ymm9, xmm7
	LONG $0xc864d9c5                           // vpcmpgtb    xmm1, xmm4, xmm0
	LONG $0x217d62c4; BYTE $0xd1               // vpmovsxbd    ymm10, xmm1
	LONG $0xf864d1c5                           // vpcmpgtb    xmm7, xmm5, xmm0
	LONG $0x217de2c4; BYTE $0xff               // vpmovsxbd    ymm7, xmm7
	LONG $0xc864c9c5                           // vpcmpgtb    xmm1, xmm6, xmm0
	LONG $0x217de2c4; BYTE $0xc9               // vpmovsxbd    ymm1, xmm1
	LONG $0xd874e1c5                           // vpcmpeqb    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x217de2c4; BYTE $0xdb               // vpmovsxbd    ymm3, xmm3
	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
	LONG $0xe074d9c5                           // vpcmpeqb    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x217de2c4; BYTE $0xe4               // vpmovsxbd    ymm4, xmm4
	LONG $0xe45bfcc5                           // vcvtdq2ps    ymm4, ymm4
	LONG $0xe874d1c5                           // vpcmpeqb    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x217de2c4; BYTE $0xed               // vpmovsxbd    ymm5, xmm5
	LONG $0xed5bfcc5                           // vcvtdq2ps    ymm5, ymm5
	LONG $0xf074c9c5                           // vpcmpeqb    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x217de2c4; BYTE $0xf6               // vpmovsxbd    ymm6, xmm6
	LONG $0xf65bfcc5                           // vcvtdq2ps    ymm6, ymm6
	LONG $0x4a65e3c4; WORD $0x90da             // vblendvps    ymm3, ymm3, ymm2, ymm9
	LONG $0x4a5de3c4; WORD $0xa0e2             // vblendvps    ymm4, ymm4, ymm2, ymm10
	LONG $0x4a55e3c4; WORD $0x70ea             // vblendvps    ymm5, ymm5, ymm2, ymm7
	LONG $0x4a4de3c4; WORD $0x10ca             // vblendvps    ymm1, ymm6, ymm2, ymm1
	LONG $0x117cc1c4; WORD $0xb01c             // vmovups    yword [r8 + 4*rsi], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x117cc1c4; WORD $0xb06c; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm5
	LONG $0x117cc1c4; WORD $0xb04c; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm1
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1090
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1092

LBB4_1095:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0x597de2c4; WORD $0x284d // vpbroadcastq    ymm1, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_1096:
	LONG $0x297de2c4; WORD $0xf114             // vpcmpeqq    ymm2, ymm0, yword [rcx + 8*rsi]
	LONG $0xd1dfedc5                           // vpandn    ymm2, ymm2, ymm1
	LONG $0x297de2c4; WORD $0xf15c; BYTE $0x20 // vpcmpeqq    ymm3, ymm0, yword [rcx + 8*rsi + 32]
	LONG $0xd9dfe5c5                           // vpandn    ymm3, ymm3, ymm1
	LONG $0x297de2c4; WORD $0xf164; BYTE $0x40 // vpcmpeqq    ymm4, ymm0, yword [rcx + 8*rsi + 64]
	LONG $0x297de2c4; WORD $0xf16c; BYTE $0x60 // vpcmpeqq    ymm5, ymm0, yword [rcx + 8*rsi + 96]
	LONG $0xe1dfddc5                           // vpandn    ymm4, ymm4, ymm1
	LONG $0xe9dfd5c5                           // vpandn    ymm5, ymm5, ymm1
	LONG $0x7f7ec1c4; WORD $0xf014             // vmovdqu    yword [r8 + 8*rsi], ymm2
	LONG $0x7f7ec1c4; WORD $0xf05c; BYTE $0x20 // vmovdqu    yword [r8 + 8*rsi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xf064; BYTE $0x40 // vmovdqu    yword [r8 + 8*rsi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xf06c; BYTE $0x60 // vmovdqu    yword [r8 + 8*rsi + 96], ymm5
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1096
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1098

LBB4_1102:
	WORD $0x8944; BYTE $0xda       // mov    edx, r11d
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5               // vpcmpeqd    ymm1, ymm1, ymm1
	LONG $0x597de2c4; WORD $0x2855 // vpbroadcastq    ymm2, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_1103:
	LONG $0x1c6ffec5; BYTE $0xf1               // vmovdqu    ymm3, yword [rcx + 8*rsi]
	LONG $0x646ffec5; WORD $0x20f1             // vmovdqu    ymm4, yword [rcx + 8*rsi + 32]
	LONG $0x6c6ffec5; WORD $0x40f1             // vmovdqu    ymm5, yword [rcx + 8*rsi + 64]
	LONG $0x746ffec5; WORD $0x60f1             // vmovdqu    ymm6, yword [rcx + 8*rsi + 96]
	LONG $0x2965e2c4; BYTE $0xf8               // vpcmpeqq    ymm7, ymm3, ymm0
	LONG $0xf9efc5c5                           // vpxor    ymm7, ymm7, ymm1
	LONG $0x295d62c4; BYTE $0xc0               // vpcmpeqq    ymm8, ymm4, ymm0
	LONG $0xc1ef3dc5                           // vpxor    ymm8, ymm8, ymm1
	LONG $0x295562c4; BYTE $0xc8               // vpcmpeqq    ymm9, ymm5, ymm0
	LONG $0xc9ef35c5                           // vpxor    ymm9, ymm9, ymm1
	LONG $0x294d62c4; BYTE $0xd0               // vpcmpeqq    ymm10, ymm6, ymm0
	LONG $0xd1ef2dc5                           // vpxor    ymm10, ymm10, ymm1
	LONG $0x376de2c4; BYTE $0xdb               // vpcmpgtq    ymm3, ymm2, ymm3
	LONG $0x376de2c4; BYTE $0xe4               // vpcmpgtq    ymm4, ymm2, ymm4
	LONG $0x376de2c4; BYTE $0xed               // vpcmpgtq    ymm5, ymm2, ymm5
	LONG $0x376de2c4; BYTE $0xf6               // vpcmpgtq    ymm6, ymm2, ymm6
	LONG $0x4b6de3c4; WORD $0x30df             // vblendvpd    ymm3, ymm2, ymm7, ymm3
	LONG $0x4b6dc3c4; WORD $0x40e0             // vblendvpd    ymm4, ymm2, ymm8, ymm4
	LONG $0x4b6dc3c4; WORD $0x50e9             // vblendvpd    ymm5, ymm2, ymm9, ymm5
	LONG $0x4b6dc3c4; WORD $0x60f2             // vblendvpd    ymm6, ymm2, ymm10, ymm6
	LONG $0x117dc1c4; WORD $0xf01c             // vmovupd    yword [r8 + 8*rsi], ymm3
	LONG $0x117dc1c4; WORD $0xf064; BYTE $0x20 // vmovupd    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x117dc1c4; WORD $0xf06c; BYTE $0x40 // vmovupd    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x117dc1c4; WORD $0xf074; BYTE $0x60 // vmovupd    yword [r8 + 8*rsi + 96], ymm6
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1103
	WORD $0x394c; BYTE $0xda                   // cmp    rdx, r11
	JE   LBB4_1351
	JMP  LBB4_1105

LBB4_1110:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc057f8c5                     // vxorps    xmm0, xmm0, xmm0
	QUAD $0x00009c8d587de2c4; BYTE $0x00 // vpbroadcastd    ymm1, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_1111:
	LONG $0x146ffec5; BYTE $0xb1               // vmovdqu    ymm2, yword [rcx + 4*rsi]
	LONG $0x5c6ffec5; WORD $0x20b1             // vmovdqu    ymm3, yword [rcx + 4*rsi + 32]
	LONG $0x646ffec5; WORD $0x40b1             // vmovdqu    ymm4, yword [rcx + 4*rsi + 64]
	LONG $0x6c6ffec5; WORD $0x60b1             // vmovdqu    ymm5, yword [rcx + 4*rsi + 96]
	LONG $0xe272cdc5; BYTE $0x1f               // vpsrad    ymm6, ymm2, 31
	LONG $0xf1ebcdc5                           // vpor    ymm6, ymm6, ymm1
	LONG $0xe372c5c5; BYTE $0x1f               // vpsrad    ymm7, ymm3, 31
	LONG $0xf9ebc5c5                           // vpor    ymm7, ymm7, ymm1
	LONG $0xe472bdc5; BYTE $0x1f               // vpsrad    ymm8, ymm4, 31
	LONG $0xc1eb3dc5                           // vpor    ymm8, ymm8, ymm1
	LONG $0xe572b5c5; BYTE $0x1f               // vpsrad    ymm9, ymm5, 31
	LONG $0xc9eb35c5                           // vpor    ymm9, ymm9, ymm1
	LONG $0xf65bfcc5                           // vcvtdq2ps    ymm6, ymm6
	LONG $0xff5bfcc5                           // vcvtdq2ps    ymm7, ymm7
	LONG $0x5b7c41c4; BYTE $0xc0               // vcvtdq2ps    ymm8, ymm8
	LONG $0x5b7c41c4; BYTE $0xc9               // vcvtdq2ps    ymm9, ymm9
	LONG $0xd0c2ecc5; BYTE $0x04               // vcmpneqps    ymm2, ymm2, ymm0
	LONG $0xd654ecc5                           // vandps    ymm2, ymm2, ymm6
	LONG $0xd8c2e4c5; BYTE $0x04               // vcmpneqps    ymm3, ymm3, ymm0
	LONG $0xdf54e4c5                           // vandps    ymm3, ymm3, ymm7
	LONG $0xe0c2dcc5; BYTE $0x04               // vcmpneqps    ymm4, ymm4, ymm0
	LONG $0xe454bcc5                           // vandps    ymm4, ymm8, ymm4
	LONG $0xe8c2d4c5; BYTE $0x04               // vcmpneqps    ymm5, ymm5, ymm0
	LONG $0xed54b4c5                           // vandps    ymm5, ymm9, ymm5
	LONG $0x117cc1c4; WORD $0xb014             // vmovups    yword [r8 + 4*rsi], ymm2
	LONG $0x117cc1c4; WORD $0xb05c; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm4
	LONG $0x117cc1c4; WORD $0xb06c; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm5
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1111
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1113

LBB4_1118:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5               // vpcmpeqd    xmm1, xmm1, xmm1
	LONG $0x597de2c4; WORD $0x2855 // vpbroadcastq    ymm2, qword 40[rbp] /* [rip + .LCPI4_15] */

LBB4_1119:
	LONG $0x1c6ef9c5; BYTE $0x31               // vmovd    xmm3, dword [rcx + rsi]
	LONG $0x646ef9c5; WORD $0x0431             // vmovd    xmm4, dword [rcx + rsi + 4]
	LONG $0x6c6ef9c5; WORD $0x0831             // vmovd    xmm5, dword [rcx + rsi + 8]
	LONG $0x746ef9c5; WORD $0x0c31             // vmovd    xmm6, dword [rcx + rsi + 12]
	LONG $0xd874e1c5                           // vpcmpeqb    xmm3, xmm3, xmm0
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x327de2c4; BYTE $0xdb               // vpmovzxbq    ymm3, xmm3
	LONG $0xdadbe5c5                           // vpand    ymm3, ymm3, ymm2
	LONG $0xe074d9c5                           // vpcmpeqb    xmm4, xmm4, xmm0
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x327de2c4; BYTE $0xe4               // vpmovzxbq    ymm4, xmm4
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe874d1c5                           // vpcmpeqb    xmm5, xmm5, xmm0
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x327de2c4; BYTE $0xed               // vpmovzxbq    ymm5, xmm5
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xf074c9c5                           // vpcmpeqb    xmm6, xmm6, xmm0
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x327de2c4; BYTE $0xf6               // vpmovzxbq    ymm6, xmm6
	LONG $0xf2dbcdc5                           // vpand    ymm6, ymm6, ymm2
	LONG $0x7f7ec1c4; WORD $0xf01c             // vmovdqu    yword [r8 + 8*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0xf064; BYTE $0x20 // vmovdqu    yword [r8 + 8*rsi + 32], ymm4
	LONG $0x7f7ec1c4; WORD $0xf06c; BYTE $0x40 // vmovdqu    yword [r8 + 8*rsi + 64], ymm5
	LONG $0x7f7ec1c4; WORD $0xf074; BYTE $0x60 // vmovdqu    yword [r8 + 8*rsi + 96], ymm6
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1119
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1121

LBB4_1125:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5                     // vpcmpeqd    xmm1, xmm1, xmm1
	QUAD $0x00009c95587de2c4; BYTE $0x00 // vpbroadcastd    ymm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_1126:
	LONG $0x1c7efac5; BYTE $0x31               // vmovq    xmm3, qword [rcx + rsi]
	LONG $0x647efac5; WORD $0x0831             // vmovq    xmm4, qword [rcx + rsi + 8]
	LONG $0x6c7efac5; WORD $0x1031             // vmovq    xmm5, qword [rcx + rsi + 16]
	LONG $0x747efac5; WORD $0x1831             // vmovq    xmm6, qword [rcx + rsi + 24]
	LONG $0xd874e1c5                           // vpcmpeqb    xmm3, xmm3, xmm0
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x317de2c4; BYTE $0xdb               // vpmovzxbd    ymm3, xmm3
	LONG $0xdadbe5c5                           // vpand    ymm3, ymm3, ymm2
	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
	LONG $0xe074d9c5                           // vpcmpeqb    xmm4, xmm4, xmm0
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x317de2c4; BYTE $0xe4               // vpmovzxbd    ymm4, xmm4
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe45bfcc5                           // vcvtdq2ps    ymm4, ymm4
	LONG $0xe874d1c5                           // vpcmpeqb    xmm5, xmm5, xmm0
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x317de2c4; BYTE $0xed               // vpmovzxbd    ymm5, xmm5
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xed5bfcc5                           // vcvtdq2ps    ymm5, ymm5
	LONG $0xf074c9c5                           // vpcmpeqb    xmm6, xmm6, xmm0
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x317de2c4; BYTE $0xf6               // vpmovzxbd    ymm6, xmm6
	LONG $0xf2dbcdc5                           // vpand    ymm6, ymm6, ymm2
	LONG $0xf65bfcc5                           // vcvtdq2ps    ymm6, ymm6
	LONG $0x117cc1c4; WORD $0xb01c             // vmovups    yword [r8 + 4*rsi], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x117cc1c4; WORD $0xb06c; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm5
	LONG $0x117cc1c4; WORD $0xb074; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm6
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1126
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1128

LBB4_1144:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xe0     // and    edx, -32
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5             // vpcmpeqd    ymm1, ymm1, ymm1
	LONG $0x556ff9c5; BYTE $0x50 // vmovdqa    xmm2, oword 80[rbp] /* [rip + .LCPI4_12] */

LBB4_1145:
	LONG $0x1c76fdc5; BYTE $0xb1   // vpcmpeqd    ymm3, ymm0, yword [rcx + 4*rsi]
	LONG $0xd9efe5c5               // vpxor    ymm3, ymm3, ymm1
	LONG $0x397de3c4; WORD $0x01dc // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5               // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xdb63e1c5               // vpacksswb    xmm3, xmm3, xmm3
	LONG $0xdadbe1c5               // vpand    xmm3, xmm3, xmm2
	LONG $0x6476fdc5; WORD $0x20b1 // vpcmpeqd    ymm4, ymm0, yword [rcx + 4*rsi + 32]
	LONG $0xe1efddc5               // vpxor    ymm4, ymm4, ymm1
	LONG $0x397de3c4; WORD $0x01e5 // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5               // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe463d9c5               // vpacksswb    xmm4, xmm4, xmm4
	LONG $0x6c76fdc5; WORD $0x40b1 // vpcmpeqd    ymm5, ymm0, yword [rcx + 4*rsi + 64]
	LONG $0xe2dbd9c5               // vpand    xmm4, xmm4, xmm2
	LONG $0xe9efd5c5               // vpxor    ymm5, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01ee // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5               // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xed63d1c5               // vpacksswb    xmm5, xmm5, xmm5
	LONG $0xeadbd1c5               // vpand    xmm5, xmm5, xmm2
	LONG $0x7476fdc5; WORD $0x60b1 // vpcmpeqd    ymm6, ymm0, yword [rcx + 4*rsi + 96]
	LONG $0xf1efcdc5               // vpxor    ymm6, ymm6, ymm1
	LONG $0x397de3c4; WORD $0x01f7 // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5               // vpackssdw    xmm6, xmm6, xmm7
	LONG $0xf663c9c5               // vpacksswb    xmm6, xmm6, xmm6
	LONG $0xf2dbc9c5               // vpand    xmm6, xmm6, xmm2
	LONG $0x3855e3c4; WORD $0x01ee // vinserti128    ymm5, ymm5, xmm6, 1
	LONG $0x3865e3c4; WORD $0x01dc // vinserti128    ymm3, ymm3, xmm4, 1
	LONG $0xdd6ce5c5               // vpunpcklqdq    ymm3, ymm3, ymm5
	LONG $0x00fde3c4; WORD $0xd8db // vpermq    ymm3, ymm3, 216
	LONG $0x7f7ec1c4; WORD $0x301c // vmovdqu    yword [r8 + rsi], ymm3
	LONG $0x20c68348               // add    rsi, 32
	WORD $0x3948; BYTE $0xf2       // cmp    rdx, rsi
	JNE  LBB4_1145
	WORD $0x3948; BYTE $0xc2       // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1147

LBB4_1151:
	WORD $0xc289                   // mov    edx, eax
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	WORD $0xf631                   // xor    esi, esi
	LONG $0x597de2c4; WORD $0x0045 // vpbroadcastq    ymm0, qword 0[rbp] /* [rip + .LCPI4_0] */
	LONG $0x572941c4; BYTE $0xd2   // vxorpd    xmm10, xmm10, xmm10
	LONG $0x197de2c4; WORD $0x0855 // vbroadcastsd    ymm2, qword 8[rbp] /* [rip + .LCPI4_1] */
	LONG $0xef2141c4; BYTE $0xdb   // vpxor    xmm11, xmm11, xmm11

LBB4_1152:
	LONG $0x3410fdc5; BYTE $0xf1   // vmovupd    ymm6, yword [rcx + 8*rsi]
	LONG $0x7c10fdc5; WORD $0x20f1 // vmovupd    ymm7, yword [rcx + 8*rsi + 32]
	LONG $0x44107dc5; WORD $0x40f1 // vmovupd    ymm8, yword [rcx + 8*rsi + 64]
	LONG $0x4c107dc5; WORD $0x60f1 // vmovupd    ymm9, yword [rcx + 8*rsi + 96]
	LONG $0xe6c2adc5; BYTE $0x00   // vcmpeqpd    ymm4, ymm10, ymm6
	LONG $0x197de3c4; WORD $0x01e5 // vextractf128    xmm5, ymm4, 1
	LONG $0xe56bd9c5               // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe46bd9c5               // vpackssdw    xmm4, xmm4, xmm4
	LONG $0xe46359c5               // vpacksswb    xmm12, xmm4, xmm4
	LONG $0xefc2adc5; BYTE $0x00   // vcmpeqpd    ymm5, ymm10, ymm7
	LONG $0x197de3c4; WORD $0x01e9 // vextractf128    xmm1, ymm5, 1
	LONG $0xc96bd1c5               // vpackssdw    xmm1, xmm5, xmm1
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0xe96371c5               // vpacksswb    xmm13, xmm1, xmm1
	LONG $0xc23dc1c4; WORD $0x00ca // vcmpeqpd    ymm1, ymm8, ymm10
	LONG $0x197de3c4; WORD $0x01cb // vextractf128    xmm3, ymm1, 1
	LONG $0xcb6bf1c5               // vpackssdw    xmm1, xmm1, xmm3
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0xc963f1c5               // vpacksswb    xmm1, xmm1, xmm1
	LONG $0xc235c1c4; WORD $0x00da // vcmpeqpd    ymm3, ymm9, ymm10
	LONG $0x197de3c4; WORD $0x01dc // vextractf128    xmm4, ymm3, 1
	LONG $0xdc6be1c5               // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xdb6be1c5               // vpackssdw    xmm3, xmm3, xmm3
	LONG $0xdb63e1c5               // vpacksswb    xmm3, xmm3, xmm3
	LONG $0xe054cdc5               // vandpd    ymm4, ymm6, ymm0
	LONG $0xe456edc5               // vorpd    ymm4, ymm2, ymm4
	LONG $0xf054c5c5               // vandpd    ymm6, ymm7, ymm0
	LONG $0xf656edc5               // vorpd    ymm6, ymm2, ymm6
	LONG $0xf854bdc5               // vandpd    ymm7, ymm8, ymm0
	LONG $0xff56edc5               // vorpd    ymm7, ymm2, ymm7
	LONG $0xc05435c5               // vandpd    ymm8, ymm9, ymm0
	LONG $0xc2563dc5               // vorpd    ymm8, ymm8, ymm2
	LONG $0xe4e6fdc5               // vcvttpd2dq    xmm4, ymm4
	LONG $0x2b59e2c4; BYTE $0xe4   // vpackusdw    xmm4, xmm4, xmm4
	LONG $0xe467d9c5               // vpackuswb    xmm4, xmm4, xmm4
	LONG $0xf6e6fdc5               // vcvttpd2dq    xmm6, ymm6
	LONG $0x2b49e2c4; BYTE $0xf6   // vpackusdw    xmm6, xmm6, xmm6
	LONG $0xf667c9c5               // vpackuswb    xmm6, xmm6, xmm6
	LONG $0xffe6fdc5               // vcvttpd2dq    xmm7, ymm7
	LONG $0x2b41e2c4; BYTE $0xff   // vpackusdw    xmm7, xmm7, xmm7
	LONG $0xff67c1c5               // vpackuswb    xmm7, xmm7, xmm7
	LONG $0xe67dc1c4; BYTE $0xe8   // vcvttpd2dq    xmm5, ymm8
	LONG $0x2b51e2c4; BYTE $0xed   // vpackusdw    xmm5, xmm5, xmm5
	LONG $0xed67d1c5               // vpackuswb    xmm5, xmm5, xmm5
	LONG $0x4c59c3c4; WORD $0xc0e3 // vpblendvb    xmm4, xmm4, xmm11, xmm12
	LONG $0x4c49c3c4; WORD $0xd0f3 // vpblendvb    xmm6, xmm6, xmm11, xmm13
	LONG $0x4c41c3c4; WORD $0x10cb // vpblendvb    xmm1, xmm7, xmm11, xmm1
	LONG $0xe662d9c5               // vpunpckldq    xmm4, xmm4, xmm6
	LONG $0x4c51c3c4; WORD $0x30db // vpblendvb    xmm3, xmm5, xmm11, xmm3
	LONG $0xcb62f1c5               // vpunpckldq    xmm1, xmm1, xmm3
	LONG $0xc96cd9c5               // vpunpcklqdq    xmm1, xmm4, xmm1
	LONG $0x7f7ac1c4; WORD $0x300c // vmovdqu    oword [r8 + rsi], xmm1
	LONG $0x10c68348               // add    rsi, 16
	WORD $0x3948; BYTE $0xf2       // cmp    rdx, rsi
	JNE  LBB4_1152
	WORD $0x3948; BYTE $0xc2       // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1154

LBB4_1159:
	WORD $0x8945; BYTE $0xd3 // mov    r11d, r10d
	LONG $0x80e38341         // and    r11d, -128
	WORD $0xf631             // xor    esi, esi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
	QUAD $0x000000e0956ffdc5 // vmovdqa    ymm2, yword 224[rbp] /* [rip + .LCPI4_20] */

LBB4_1160:
	LONG $0x1c6ffec5; BYTE $0x31               // vmovdqu    ymm3, yword [rcx + rsi]
	LONG $0x646ffec5; WORD $0x2031             // vmovdqu    ymm4, yword [rcx + rsi + 32]
	LONG $0x6c6ffec5; WORD $0x4031             // vmovdqu    ymm5, yword [rcx + rsi + 64]
	LONG $0x746ffec5; WORD $0x6031             // vmovdqu    ymm6, yword [rcx + rsi + 96]
	LONG $0xf874e5c5                           // vpcmpeqb    ymm7, ymm3, ymm0
	LONG $0xf9efc5c5                           // vpxor    ymm7, ymm7, ymm1
	LONG $0xc0745dc5                           // vpcmpeqb    ymm8, ymm4, ymm0
	LONG $0xc1ef3dc5                           // vpxor    ymm8, ymm8, ymm1
	LONG $0xc87455c5                           // vpcmpeqb    ymm9, ymm5, ymm0
	LONG $0xc9ef35c5                           // vpxor    ymm9, ymm9, ymm1
	LONG $0xd0744dc5                           // vpcmpeqb    ymm10, ymm6, ymm0
	LONG $0xd1ef2dc5                           // vpxor    ymm10, ymm10, ymm1
	LONG $0xdb64edc5                           // vpcmpgtb    ymm3, ymm2, ymm3
	LONG $0xe464edc5                           // vpcmpgtb    ymm4, ymm2, ymm4
	LONG $0xed64edc5                           // vpcmpgtb    ymm5, ymm2, ymm5
	LONG $0xf664edc5                           // vpcmpgtb    ymm6, ymm2, ymm6
	LONG $0x4c6de3c4; WORD $0x30df             // vpblendvb    ymm3, ymm2, ymm7, ymm3
	LONG $0x4c6dc3c4; WORD $0x40e0             // vpblendvb    ymm4, ymm2, ymm8, ymm4
	LONG $0x4c6dc3c4; WORD $0x50e9             // vpblendvb    ymm5, ymm2, ymm9, ymm5
	LONG $0x4c6dc3c4; WORD $0x60f2             // vpblendvb    ymm6, ymm2, ymm10, ymm6
	LONG $0x7f7ec1c4; WORD $0x301c             // vmovdqu    yword [r8 + rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm4
	LONG $0x7f7ec1c4; WORD $0x306c; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm5
	LONG $0x7f7ec1c4; WORD $0x3074; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm6
	LONG $0x80ee8348                           // sub    rsi, -128
	WORD $0x3949; BYTE $0xf3                   // cmp    r11, rsi
	JNE  LBB4_1160
	WORD $0x394d; BYTE $0xd3                   // cmp    r11, r10
	JE   LBB4_1351
	JMP  LBB4_1162

LBB4_1167:
	WORD $0xc289                 // mov    edx, eax
	WORD $0xe283; BYTE $0xf0     // and    edx, -16
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5             // vpcmpeqd    ymm1, ymm1, ymm1
	LONG $0x556ff9c5; BYTE $0x70 // vmovdqa    xmm2, oword 112[rbp] /* [rip + .LCPI4_17] */

LBB4_1168:
	LONG $0x297de2c4; WORD $0xf11c             // vpcmpeqq    ymm3, ymm0, yword [rcx + 8*rsi]
	LONG $0xd9efe5c5                           // vpxor    ymm3, ymm3, ymm1
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc6be1c5                           // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
	LONG $0xdb63e1c5                           // vpacksswb    xmm3, xmm3, xmm3
	LONG $0xdadbe1c5                           // vpand    xmm3, xmm3, xmm2
	LONG $0x297de2c4; WORD $0xf164; BYTE $0x20 // vpcmpeqq    ymm4, ymm0, yword [rcx + 8*rsi + 32]
	LONG $0xe1efddc5                           // vpxor    ymm4, ymm4, ymm1
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe46bd9c5                           // vpackssdw    xmm4, xmm4, xmm4
	LONG $0xe463d9c5                           // vpacksswb    xmm4, xmm4, xmm4
	LONG $0xe2dbd9c5                           // vpand    xmm4, xmm4, xmm2
	LONG $0x297de2c4; WORD $0xf16c; BYTE $0x40 // vpcmpeqq    ymm5, ymm0, yword [rcx + 8*rsi + 64]
	LONG $0xdc62e1c5                           // vpunpckldq    xmm3, xmm3, xmm4
	LONG $0xe1efd5c5                           // vpxor    ymm4, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe56bd9c5                           // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xe46bd9c5                           // vpackssdw    xmm4, xmm4, xmm4
	LONG $0xe463d9c5                           // vpacksswb    xmm4, xmm4, xmm4
	LONG $0x297de2c4; WORD $0xf16c; BYTE $0x60 // vpcmpeqq    ymm5, ymm0, yword [rcx + 8*rsi + 96]
	LONG $0xe2dbd9c5                           // vpand    xmm4, xmm4, xmm2
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5                           // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xed6bd1c5                           // vpackssdw    xmm5, xmm5, xmm5
	LONG $0xed63d1c5                           // vpacksswb    xmm5, xmm5, xmm5
	LONG $0xeadbd1c5                           // vpand    xmm5, xmm5, xmm2
	LONG $0xe562d9c5                           // vpunpckldq    xmm4, xmm4, xmm5
	LONG $0xdc6ce1c5                           // vpunpcklqdq    xmm3, xmm3, xmm4
	LONG $0x7f7ac1c4; WORD $0x301c             // vmovdqu    oword [r8 + rsi], xmm3
	LONG $0x10c68348                           // add    rsi, 16
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1168
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1170

LBB4_1174:
	WORD $0xc289             // mov    edx, eax
	WORD $0xe283; BYTE $0xc0 // and    edx, -64
	WORD $0xf631             // xor    esi, esi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
	QUAD $0x00000080956ff9c5 // vmovdqa    xmm2, oword 128[rbp] /* [rip + .LCPI4_19] */

LBB4_1175:
	LONG $0x1c75fdc5; BYTE $0x71               // vpcmpeqw    ymm3, ymm0, yword [rcx + 2*rsi]
	LONG $0xd9efe5c5                           // vpxor    ymm3, ymm3, ymm1
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc63e1c5                           // vpacksswb    xmm3, xmm3, xmm4
	LONG $0xdadbe1c5                           // vpand    xmm3, xmm3, xmm2
	LONG $0x6475fdc5; WORD $0x2071             // vpcmpeqw    ymm4, ymm0, yword [rcx + 2*rsi + 32]
	LONG $0xe1efddc5                           // vpxor    ymm4, ymm4, ymm1
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe563d9c5                           // vpacksswb    xmm4, xmm4, xmm5
	LONG $0xe2dbd9c5                           // vpand    xmm4, xmm4, xmm2
	LONG $0x6c75fdc5; WORD $0x4071             // vpcmpeqw    ymm5, ymm0, yword [rcx + 2*rsi + 64]
	LONG $0xe9efd5c5                           // vpxor    ymm5, ymm5, ymm1
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee63d1c5                           // vpacksswb    xmm5, xmm5, xmm6
	LONG $0xeadbd1c5                           // vpand    xmm5, xmm5, xmm2
	LONG $0x7475fdc5; WORD $0x6071             // vpcmpeqw    ymm6, ymm0, yword [rcx + 2*rsi + 96]
	LONG $0xf1efcdc5                           // vpxor    ymm6, ymm6, ymm1
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf763c9c5                           // vpacksswb    xmm6, xmm6, xmm7
	LONG $0xf2dbc9c5                           // vpand    xmm6, xmm6, xmm2
	LONG $0x7f7ac1c4; WORD $0x301c             // vmovdqu    oword [r8 + rsi], xmm3
	LONG $0x7f7ac1c4; WORD $0x3064; BYTE $0x10 // vmovdqu    oword [r8 + rsi + 16], xmm4
	LONG $0x7f7ac1c4; WORD $0x306c; BYTE $0x20 // vmovdqu    oword [r8 + rsi + 32], xmm5
	LONG $0x7f7ac1c4; WORD $0x3074; BYTE $0x30 // vmovdqu    oword [r8 + rsi + 48], xmm6
	LONG $0x40c68348                           // add    rsi, 64
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1175
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1177

LBB4_1181:
	WORD $0x8945; BYTE $0xd3     // mov    r11d, r10d
	LONG $0xc0e38341             // and    r11d, -64
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0x763d41c4; BYTE $0xc0 // vpcmpeqd    ymm8, ymm8, ymm8
	QUAD $0x000000808d6f79c5     // vmovdqa    xmm9, oword 128[rbp] /* [rip + .LCPI4_19] */

LBB4_1182:
	LONG $0x246ffec5; BYTE $0x71               // vmovdqu    ymm4, yword [rcx + 2*rsi]
	LONG $0x6c6ffec5; WORD $0x2071             // vmovdqu    ymm5, yword [rcx + 2*rsi + 32]
	LONG $0x746ffec5; WORD $0x4071             // vmovdqu    ymm6, yword [rcx + 2*rsi + 64]
	LONG $0x7c6ffec5; WORD $0x6071             // vmovdqu    ymm7, yword [rcx + 2*rsi + 96]
	LONG $0xd865ddc5                           // vpcmpgtw    ymm3, ymm4, ymm0
	LONG $0x397de3c4; WORD $0x01d9             // vextracti128    xmm1, ymm3, 1
	LONG $0xd16361c5                           // vpacksswb    xmm10, xmm3, xmm1
	LONG $0xc865d5c5                           // vpcmpgtw    ymm1, ymm5, ymm0
	LONG $0x397de3c4; WORD $0x01ca             // vextracti128    xmm2, ymm1, 1
	LONG $0xda6371c5                           // vpacksswb    xmm11, xmm1, xmm2
	LONG $0xd065cdc5                           // vpcmpgtw    ymm2, ymm6, ymm0
	LONG $0x397de3c4; WORD $0x01d3             // vextracti128    xmm3, ymm2, 1
	LONG $0xd363e9c5                           // vpacksswb    xmm2, xmm2, xmm3
	LONG $0xd865c5c5                           // vpcmpgtw    ymm3, ymm7, ymm0
	LONG $0x397de3c4; WORD $0x01d9             // vextracti128    xmm1, ymm3, 1
	LONG $0xc963e1c5                           // vpacksswb    xmm1, xmm3, xmm1
	LONG $0xd875ddc5                           // vpcmpeqw    ymm3, ymm4, ymm0
	LONG $0xdbefbdc5                           // vpxor    ymm3, ymm8, ymm3
	LONG $0x397de3c4; WORD $0x01dc             // vextracti128    xmm4, ymm3, 1
	LONG $0xdc63e1c5                           // vpacksswb    xmm3, xmm3, xmm4
	LONG $0xe075d5c5                           // vpcmpeqw    ymm4, ymm5, ymm0
	LONG $0xe4efbdc5                           // vpxor    ymm4, ymm8, ymm4
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128    xmm5, ymm4, 1
	LONG $0xe563d9c5                           // vpacksswb    xmm4, xmm4, xmm5
	LONG $0xe875cdc5                           // vpcmpeqw    ymm5, ymm6, ymm0
	LONG $0xedefbdc5                           // vpxor    ymm5, ymm8, ymm5
	LONG $0x397de3c4; WORD $0x01ee             // vextracti128    xmm6, ymm5, 1
	LONG $0xee63d1c5                           // vpacksswb    xmm5, xmm5, xmm6
	LONG $0xf075c5c5                           // vpcmpeqw    ymm6, ymm7, ymm0
	LONG $0xf6efbdc5                           // vpxor    ymm6, ymm8, ymm6
	LONG $0x397de3c4; WORD $0x01f7             // vextracti128    xmm7, ymm6, 1
	LONG $0xf763c9c5                           // vpacksswb    xmm6, xmm6, xmm7
	LONG $0x4c61c3c4; WORD $0xa0d9             // vpblendvb    xmm3, xmm3, xmm9, xmm10
	LONG $0x4c59c3c4; WORD $0xb0e1             // vpblendvb    xmm4, xmm4, xmm9, xmm11
	LONG $0x4c51c3c4; WORD $0x20d1             // vpblendvb    xmm2, xmm5, xmm9, xmm2
	LONG $0x4c49c3c4; WORD $0x10c9             // vpblendvb    xmm1, xmm6, xmm9, xmm1
	LONG $0x7f7ac1c4; WORD $0x301c             // vmovdqu    oword [r8 + rsi], xmm3
	LONG $0x7f7ac1c4; WORD $0x3064; BYTE $0x10 // vmovdqu    oword [r8 + rsi + 16], xmm4
	LONG $0x7f7ac1c4; WORD $0x3054; BYTE $0x20 // vmovdqu    oword [r8 + rsi + 32], xmm2
	LONG $0x7f7ac1c4; WORD $0x304c; BYTE $0x30 // vmovdqu    oword [r8 + rsi + 48], xmm1
	LONG $0x40c68348                           // add    rsi, 64
	WORD $0x3949; BYTE $0xf3                   // cmp    r11, rsi
	JNE  LBB4_1182
	WORD $0x394d; BYTE $0xd3                   // cmp    r11, r10
	JE   LBB4_1351
	JMP  LBB4_1184

LBB4_1189:
	WORD $0x8945; BYTE $0xd3     // mov    r11d, r10d
	LONG $0xf0e38341             // and    r11d, -16
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0x763541c4; BYTE $0xc9 // vpcmpeqd    ymm9, ymm9, ymm9
	LONG $0x5d6f79c5; BYTE $0x70 // vmovdqa    xmm11, oword 112[rbp] /* [rip + .LCPI4_17] */

LBB4_1190:
	LONG $0x146f7ec5; BYTE $0xf1   // vmovdqu    ymm10, yword [rcx + 8*rsi]
	LONG $0x446f7ec5; WORD $0x20f1 // vmovdqu    ymm8, yword [rcx + 8*rsi + 32]
	LONG $0x746ffec5; WORD $0x40f1 // vmovdqu    ymm6, yword [rcx + 8*rsi + 64]
	LONG $0x646ffec5; WORD $0x60f1 // vmovdqu    ymm4, yword [rcx + 8*rsi + 96]
	LONG $0x372de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm10, ymm0
	LONG $0x397de3c4; WORD $0x01cb // vextracti128    xmm3, ymm1, 1
	LONG $0xcb6bf1c5               // vpackssdw    xmm1, xmm1, xmm3
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0xe16371c5               // vpacksswb    xmm12, xmm1, xmm1
	LONG $0x373de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm8, ymm0
	LONG $0x397de3c4; WORD $0x01cd // vextracti128    xmm5, ymm1, 1
	LONG $0xcd6bf1c5               // vpackssdw    xmm1, xmm1, xmm5
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0xe96371c5               // vpacksswb    xmm13, xmm1, xmm1
	LONG $0x374de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm6, ymm0
	LONG $0x397de3c4; WORD $0x01cf // vextracti128    xmm7, ymm1, 1
	LONG $0xcf6bf1c5               // vpackssdw    xmm1, xmm1, xmm7
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0xf963f1c5               // vpacksswb    xmm7, xmm1, xmm1
	LONG $0x375de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm4, ymm0
	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
	LONG $0xca6bf1c5               // vpackssdw    xmm1, xmm1, xmm2
	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
	LONG $0xc963f1c5               // vpacksswb    xmm1, xmm1, xmm1
	LONG $0x292de2c4; BYTE $0xd0   // vpcmpeqq    ymm2, ymm10, ymm0
	LONG $0xd2efb5c5               // vpxor    ymm2, ymm9, ymm2
	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
	LONG $0xd36be9c5               // vpackssdw    xmm2, xmm2, xmm3
	LONG $0xd26be9c5               // vpackssdw    xmm2, xmm2, xmm2
	LONG $0xd263e9c5               // vpacksswb    xmm2, xmm2, xmm2
	LONG $0x293de2c4; BYTE $0xd8   // vpcmpeqq    ymm3, ymm8, ymm0
	LONG $0xdbefb5c5               // vpxor    ymm3, ymm9, ymm3
	LONG $0x397de3c4; WORD $0x01dd // vextracti128    xmm5, ymm3, 1
	LONG $0xdd6be1c5               // vpackssdw    xmm3, xmm3, xmm5
	LONG $0xdb6be1c5               // vpackssdw    xmm3, xmm3, xmm3
	LONG $0xdb63e1c5               // vpacksswb    xmm3, xmm3, xmm3
	LONG $0x294de2c4; BYTE $0xe8   // vpcmpeqq    ymm5, ymm6, ymm0
	LONG $0xedefb5c5               // vpxor    ymm5, ymm9, ymm5
	LONG $0x397de3c4; WORD $0x01ee // vextracti128    xmm6, ymm5, 1
	LONG $0xee6bd1c5               // vpackssdw    xmm5, xmm5, xmm6
	LONG $0xed6bd1c5               // vpackssdw    xmm5, xmm5, xmm5
	LONG $0xed63d1c5               // vpacksswb    xmm5, xmm5, xmm5
	LONG $0x295de2c4; BYTE $0xe0   // vpcmpeqq    ymm4, ymm4, ymm0
	LONG $0xe4efb5c5               // vpxor    ymm4, ymm9, ymm4
	LONG $0x397de3c4; WORD $0x01e6 // vextracti128    xmm6, ymm4, 1
	LONG $0xe66bd9c5               // vpackssdw    xmm4, xmm4, xmm6
	LONG $0xe46bd9c5               // vpackssdw    xmm4, xmm4, xmm4
	LONG $0xe463d9c5               // vpacksswb    xmm4, xmm4, xmm4
	LONG $0x4c69c3c4; WORD $0xc0d3 // vpblendvb    xmm2, xmm2, xmm11, xmm12
	LONG $0x4c61c3c4; WORD $0xd0db // vpblendvb    xmm3, xmm3, xmm11, xmm13
	LONG $0x4c51c3c4; WORD $0x70eb // vpblendvb    xmm5, xmm5, xmm11, xmm7
	LONG $0xd362e9c5               // vpunpckldq    xmm2, xmm2, xmm3
	LONG $0x4c59c3c4; WORD $0x10cb // vpblendvb    xmm1, xmm4, xmm11, xmm1
	LONG $0xc962d1c5               // vpunpckldq    xmm1, xmm5, xmm1
	LONG $0xc96ce9c5               // vpunpcklqdq    xmm1, xmm2, xmm1
	LONG $0x7f7ac1c4; WORD $0x300c // vmovdqu    oword [r8 + rsi], xmm1
	LONG $0x10c68348               // add    rsi, 16
	WORD $0x3949; BYTE $0xf3       // cmp    r11, rsi
	JNE  LBB4_1190
	WORD $0x394d; BYTE $0xd3       // cmp    r11, r10
	JE   LBB4_1351
	JMP  LBB4_1192

LBB4_1197:
	WORD $0x8944; BYTE $0xd2     // mov    edx, r10d
	WORD $0xe283; BYTE $0xe0     // and    edx, -32
	WORD $0xf631                 // xor    esi, esi
	LONG $0x571841c4; BYTE $0xe4 // vxorps    xmm12, xmm12, xmm12
	LONG $0x761541c4; BYTE $0xed // vpcmpeqd    ymm13, ymm13, ymm13
	LONG $0x756f79c5; BYTE $0x50 // vmovdqa    xmm14, oword 80[rbp] /* [rip + .LCPI4_12] */
	LONG $0x760141c4; BYTE $0xff // vpcmpeqd    xmm15, xmm15, xmm15

LBB4_1198:
	LONG $0x0c107cc5; BYTE $0xb1   // vmovups    ymm9, yword [rcx + 4*rsi]
	LONG $0x54107cc5; WORD $0x20b1 // vmovups    ymm10, yword [rcx + 4*rsi + 32]
	LONG $0x5c107cc5; WORD $0x40b1 // vmovups    ymm11, yword [rcx + 4*rsi + 64]
	LONG $0x7c10fcc5; WORD $0x60b1 // vmovups    ymm7, yword [rcx + 4*rsi + 96]
	LONG $0xc234c1c4; WORD $0x00e4 // vcmpeqps    ymm4, ymm9, ymm12
	LONG $0x197de3c4; WORD $0x01e5 // vextractf128    xmm5, ymm4, 1
	LONG $0xe56bd9c5               // vpackssdw    xmm4, xmm4, xmm5
	LONG $0xc46359c5               // vpacksswb    xmm8, xmm4, xmm4
	LONG $0xc22cc1c4; WORD $0x00e4 // vcmpeqps    ymm4, ymm10, ymm12
	LONG $0x197de3c4; WORD $0x01e6 // vextractf128    xmm6, ymm4, 1
	LONG $0xe66bd9c5               // vpackssdw    xmm4, xmm4, xmm6
	LONG $0xf463d9c5               // vpacksswb    xmm6, xmm4, xmm4
	LONG $0xc224c1c4; WORD $0x00e4 // vcmpeqps    ymm4, ymm11, ymm12
	LONG $0x197de3c4; WORD $0x01e0 // vextractf128    xmm0, ymm4, 1
	LONG $0xc06bd9c5               // vpackssdw    xmm0, xmm4, xmm0
	LONG $0xe063f9c5               // vpacksswb    xmm4, xmm0, xmm0
	LONG $0xc7c29cc5; BYTE $0x00   // vcmpeqps    ymm0, ymm12, ymm7
	LONG $0x197de3c4; WORD $0x01c1 // vextractf128    xmm1, ymm0, 1
	LONG $0xc16bf9c5               // vpackssdw    xmm0, xmm0, xmm1
	LONG $0xc063f9c5               // vpacksswb    xmm0, xmm0, xmm0
	LONG $0x6635c1c4; BYTE $0xcd   // vpcmpgtd    ymm1, ymm9, ymm13
	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
	LONG $0xca6bf1c5               // vpackssdw    xmm1, xmm1, xmm2
	LONG $0xc963f1c5               // vpacksswb    xmm1, xmm1, xmm1
	LONG $0x662dc1c4; BYTE $0xd5   // vpcmpgtd    ymm2, ymm10, ymm13
	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
	LONG $0xd36be9c5               // vpackssdw    xmm2, xmm2, xmm3
	LONG $0xd263e9c5               // vpacksswb    xmm2, xmm2, xmm2
	LONG $0x6625c1c4; BYTE $0xdd   // vpcmpgtd    ymm3, ymm11, ymm13
	LONG $0x397de3c4; WORD $0x01dd // vextracti128    xmm5, ymm3, 1
	LONG $0xdd6be1c5               // vpackssdw    xmm3, xmm3, xmm5
	LONG $0xdb63e1c5               // vpacksswb    xmm3, xmm3, xmm3
	LONG $0x6645c1c4; BYTE $0xed   // vpcmpgtd    ymm5, ymm7, ymm13
	LONG $0x397de3c4; WORD $0x01ef // vextracti128    xmm7, ymm5, 1
	LONG $0xef6bd1c5               // vpackssdw    xmm5, xmm5, xmm7
	LONG $0x4c01c3c4; WORD $0x10ce // vpblendvb    xmm1, xmm15, xmm14, xmm1
	LONG $0xed63d1c5               // vpacksswb    xmm5, xmm5, xmm5
	LONG $0xc9dfb9c5               // vpandn    xmm1, xmm8, xmm1
	LONG $0x4c01c3c4; WORD $0x20d6 // vpblendvb    xmm2, xmm15, xmm14, xmm2
	LONG $0x4c01c3c4; WORD $0x30de // vpblendvb    xmm3, xmm15, xmm14, xmm3
	LONG $0x4c01c3c4; WORD $0x50ee // vpblendvb    xmm5, xmm15, xmm14, xmm5
	LONG $0xffefc1c5               // vpxor    xmm7, xmm7, xmm7
	LONG $0x4c69e3c4; WORD $0x60d7 // vpblendvb    xmm2, xmm2, xmm7, xmm6
	LONG $0x4c51e3c4; WORD $0x00c7 // vpblendvb    xmm0, xmm5, xmm7, xmm0
	LONG $0xdbdfd9c5               // vpandn    xmm3, xmm4, xmm3
	LONG $0x3865e3c4; WORD $0x01c0 // vinserti128    ymm0, ymm3, xmm0, 1
	LONG $0x3875e3c4; WORD $0x01ca // vinserti128    ymm1, ymm1, xmm2, 1
	LONG $0xc06cf5c5               // vpunpcklqdq    ymm0, ymm1, ymm0
	LONG $0x00fde3c4; WORD $0xd8c0 // vpermq    ymm0, ymm0, 216
	LONG $0x7f7ec1c4; WORD $0x3004 // vmovdqu    yword [r8 + rsi], ymm0
	LONG $0x20c68348               // add    rsi, 32
	WORD $0x3948; BYTE $0xf2       // cmp    rdx, rsi
	JNE  LBB4_1198
	WORD $0x394c; BYTE $0xd2       // cmp    rdx, r10
	JE   LBB4_1351
	JMP  LBB4_1200

LBB4_1205:
	WORD $0xc289             // mov    edx, eax
	WORD $0xe283; BYTE $0x80 // and    edx, -128
	WORD $0xf631             // xor    esi, esi
	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
	QUAD $0x000000e08d6ffdc5 // vmovdqa    ymm1, yword 224[rbp] /* [rip + .LCPI4_20] */

LBB4_1206:
	LONG $0x1474fdc5; BYTE $0x31               // vpcmpeqb    ymm2, ymm0, yword [rcx + rsi]
	LONG $0xd1dfedc5                           // vpandn    ymm2, ymm2, ymm1
	LONG $0x5c74fdc5; WORD $0x2031             // vpcmpeqb    ymm3, ymm0, yword [rcx + rsi + 32]
	LONG $0xd9dfe5c5                           // vpandn    ymm3, ymm3, ymm1
	LONG $0x6474fdc5; WORD $0x4031             // vpcmpeqb    ymm4, ymm0, yword [rcx + rsi + 64]
	LONG $0x6c74fdc5; WORD $0x6031             // vpcmpeqb    ymm5, ymm0, yword [rcx + rsi + 96]
	LONG $0xe1dfddc5                           // vpandn    ymm4, ymm4, ymm1
	LONG $0xe9dfd5c5                           // vpandn    ymm5, ymm5, ymm1
	LONG $0x7f7ec1c4; WORD $0x3014             // vmovdqu    yword [r8 + rsi], ymm2
	LONG $0x7f7ec1c4; WORD $0x305c; BYTE $0x20 // vmovdqu    yword [r8 + rsi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0x3064; BYTE $0x40 // vmovdqu    yword [r8 + rsi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0x306c; BYTE $0x60 // vmovdqu    yword [r8 + rsi + 96], ymm5
	LONG $0x80ee8348                           // sub    rsi, -128
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1206
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1208

LBB4_1212:
	WORD $0x8945; BYTE $0xd3     // mov    r11d, r10d
	LONG $0xe0e38341             // and    r11d, -32
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc0eff9c5             // vpxor    xmm0, xmm0, xmm0
	LONG $0x763541c4; BYTE $0xc9 // vpcmpeqd    ymm9, ymm9, ymm9
	LONG $0x556f79c5; BYTE $0x50 // vmovdqa    xmm10, oword 80[rbp] /* [rip + .LCPI4_12] */

LBB4_1213:
	LONG $0x3c6ffec5; BYTE $0xb1   // vmovdqu    ymm7, yword [rcx + 4*rsi]
	LONG $0x446f7ec5; WORD $0x20b1 // vmovdqu    ymm8, yword [rcx + 4*rsi + 32]
	LONG $0x746ffec5; WORD $0x40b1 // vmovdqu    ymm6, yword [rcx + 4*rsi + 64]
	LONG $0x646ffec5; WORD $0x60b1 // vmovdqu    ymm4, yword [rcx + 4*rsi + 96]
	LONG $0xd866c5c5               // vpcmpgtd    ymm3, ymm7, ymm0
	LONG $0x397de3c4; WORD $0x01dd // vextracti128    xmm5, ymm3, 1
	LONG $0xdd6be1c5               // vpackssdw    xmm3, xmm3, xmm5
	LONG $0xdb6361c5               // vpacksswb    xmm11, xmm3, xmm3
	LONG $0xe866bdc5               // vpcmpgtd    ymm5, ymm8, ymm0
	LONG $0x397de3c4; WORD $0x01e9 // vextracti128    xmm1, ymm5, 1
	LONG $0xc96bd1c5               // vpackssdw    xmm1, xmm5, xmm1
	LONG $0xe16371c5               // vpacksswb    xmm12, xmm1, xmm1
	LONG $0xc866cdc5               // vpcmpgtd    ymm1, ymm6, ymm0
	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
	LONG $0xca6bf1c5               // vpackssdw    xmm1, xmm1, xmm2
	LONG $0xc963f1c5               // vpacksswb    xmm1, xmm1, xmm1
	LONG $0xd066ddc5               // vpcmpgtd    ymm2, ymm4, ymm0
	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
	LONG $0xd36be9c5               // vpackssdw    xmm2, xmm2, xmm3
	LONG $0xd263e9c5               // vpacksswb    xmm2, xmm2, xmm2
	LONG $0xd876c5c5               // vpcmpeqd    ymm3, ymm7, ymm0
	LONG $0xdbefb5c5               // vpxor    ymm3, ymm9, ymm3
	LONG $0x397de3c4; WORD $0x01df // vextracti128    xmm7, ymm3, 1
	LONG $0xdf6be1c5               // vpackssdw    xmm3, xmm3, xmm7
	LONG $0xdb63e1c5               // vpacksswb    xmm3, xmm3, xmm3
	LONG $0xf876bdc5               // vpcmpeqd    ymm7, ymm8, ymm0
	LONG $0xffefb5c5               // vpxor    ymm7, ymm9, ymm7
	LONG $0x397de3c4; WORD $0x01fd // vextracti128    xmm5, ymm7, 1
	LONG $0xed6bc1c5               // vpackssdw    xmm5, xmm7, xmm5
	LONG $0xed63d1c5               // vpacksswb    xmm5, xmm5, xmm5
	LONG $0xf076cdc5               // vpcmpeqd    ymm6, ymm6, ymm0
	LONG $0xf6efb5c5               // vpxor    ymm6, ymm9, ymm6
	LONG $0x397de3c4; WORD $0x01f7 // vextracti128    xmm7, ymm6, 1
	LONG $0xf76bc9c5               // vpackssdw    xmm6, xmm6, xmm7
	LONG $0xf663c9c5               // vpacksswb    xmm6, xmm6, xmm6
	LONG $0xe076ddc5               // vpcmpeqd    ymm4, ymm4, ymm0
	LONG $0xe4efb5c5               // vpxor    ymm4, ymm9, ymm4
	LONG $0x397de3c4; WORD $0x01e7 // vextracti128    xmm7, ymm4, 1
	LONG $0xe76bd9c5               // vpackssdw    xmm4, xmm4, xmm7
	LONG $0xe463d9c5               // vpacksswb    xmm4, xmm4, xmm4
	LONG $0x4c61c3c4; WORD $0xb0da // vpblendvb    xmm3, xmm3, xmm10, xmm11
	LONG $0x4c51c3c4; WORD $0xc0ea // vpblendvb    xmm5, xmm5, xmm10, xmm12
	LONG $0x4c49c3c4; WORD $0x10ca // vpblendvb    xmm1, xmm6, xmm10, xmm1
	LONG $0x4c59c3c4; WORD $0x20d2 // vpblendvb    xmm2, xmm4, xmm10, xmm2
	LONG $0x3875e3c4; WORD $0x01ca // vinserti128    ymm1, ymm1, xmm2, 1
	LONG $0x3865e3c4; WORD $0x01d5 // vinserti128    ymm2, ymm3, xmm5, 1
	LONG $0xc96cedc5               // vpunpcklqdq    ymm1, ymm2, ymm1
	LONG $0x00fde3c4; WORD $0xd8c9 // vpermq    ymm1, ymm1, 216
	LONG $0x7f7ec1c4; WORD $0x300c // vmovdqu    yword [r8 + rsi], ymm1
	LONG $0x20c68348               // add    rsi, 32
	WORD $0x3949; BYTE $0xf3       // cmp    r11, rsi
	JNE  LBB4_1213
	WORD $0x394d; BYTE $0xd3       // cmp    r11, r10
	JE   LBB4_1351
	JMP  LBB4_1215

LBB4_1220:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	QUAD $0x00009c8d587de2c4; BYTE $0x00 // vpbroadcastd    ymm1, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_1221:
	LONG $0x1476fdc5; BYTE $0xb1               // vpcmpeqd    ymm2, ymm0, yword [rcx + 4*rsi]
	LONG $0xd1dfedc5                           // vpandn    ymm2, ymm2, ymm1
	LONG $0x5c76fdc5; WORD $0x20b1             // vpcmpeqd    ymm3, ymm0, yword [rcx + 4*rsi + 32]
	LONG $0xd9dfe5c5                           // vpandn    ymm3, ymm3, ymm1
	LONG $0x6476fdc5; WORD $0x40b1             // vpcmpeqd    ymm4, ymm0, yword [rcx + 4*rsi + 64]
	LONG $0x6c76fdc5; WORD $0x60b1             // vpcmpeqd    ymm5, ymm0, yword [rcx + 4*rsi + 96]
	LONG $0xe1dfddc5                           // vpandn    ymm4, ymm4, ymm1
	LONG $0xe9dfd5c5                           // vpandn    ymm5, ymm5, ymm1
	LONG $0x7f7ec1c4; WORD $0xb014             // vmovdqu    yword [r8 + 4*rsi], ymm2
	LONG $0x7f7ec1c4; WORD $0xb05c; BYTE $0x20 // vmovdqu    yword [r8 + 4*rsi + 32], ymm3
	LONG $0x7f7ec1c4; WORD $0xb064; BYTE $0x40 // vmovdqu    yword [r8 + 4*rsi + 64], ymm4
	LONG $0x7f7ec1c4; WORD $0xb06c; BYTE $0x60 // vmovdqu    yword [r8 + 4*rsi + 96], ymm5
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1221
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1223

LBB4_1227:
	WORD $0x8944; BYTE $0xda             // mov    edx, r11d
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0x763941c4; BYTE $0xc0         // vpcmpeqd    xmm8, xmm8, xmm8
	QUAD $0x00009c95187de2c4; BYTE $0x00 // vbroadcastss    ymm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_1228:
	LONG $0x1c7efac5; BYTE $0x31               // vmovq    xmm3, qword [rcx + rsi]
	LONG $0x647efac5; WORD $0x0831             // vmovq    xmm4, qword [rcx + rsi + 8]
	LONG $0x6c7efac5; WORD $0x1031             // vmovq    xmm5, qword [rcx + rsi + 16]
	LONG $0x747efac5; WORD $0x1831             // vmovq    xmm6, qword [rcx + rsi + 24]
	LONG $0xf864e1c5                           // vpcmpgtb    xmm7, xmm3, xmm0
	LONG $0x217d62c4; BYTE $0xcf               // vpmovsxbd    ymm9, xmm7
	LONG $0xc864d9c5                           // vpcmpgtb    xmm1, xmm4, xmm0
	LONG $0x217d62c4; BYTE $0xd1               // vpmovsxbd    ymm10, xmm1
	LONG $0xf864d1c5                           // vpcmpgtb    xmm7, xmm5, xmm0
	LONG $0x217de2c4; BYTE $0xff               // vpmovsxbd    ymm7, xmm7
	LONG $0xc864c9c5                           // vpcmpgtb    xmm1, xmm6, xmm0
	LONG $0x217de2c4; BYTE $0xc9               // vpmovsxbd    ymm1, xmm1
	LONG $0xd874e1c5                           // vpcmpeqb    xmm3, xmm3, xmm0
	LONG $0xdbefb9c5                           // vpxor    xmm3, xmm8, xmm3
	LONG $0x217de2c4; BYTE $0xdb               // vpmovsxbd    ymm3, xmm3
	LONG $0xe074d9c5                           // vpcmpeqb    xmm4, xmm4, xmm0
	LONG $0xe4efb9c5                           // vpxor    xmm4, xmm8, xmm4
	LONG $0x217de2c4; BYTE $0xe4               // vpmovsxbd    ymm4, xmm4
	LONG $0xe874d1c5                           // vpcmpeqb    xmm5, xmm5, xmm0
	LONG $0xedefb9c5                           // vpxor    xmm5, xmm8, xmm5
	LONG $0x217de2c4; BYTE $0xed               // vpmovsxbd    ymm5, xmm5
	LONG $0xf074c9c5                           // vpcmpeqb    xmm6, xmm6, xmm0
	LONG $0xf6efb9c5                           // vpxor    xmm6, xmm8, xmm6
	LONG $0x217de2c4; BYTE $0xf6               // vpmovsxbd    ymm6, xmm6
	LONG $0x4a65e3c4; WORD $0x90da             // vblendvps    ymm3, ymm3, ymm2, ymm9
	LONG $0x4a5de3c4; WORD $0xa0e2             // vblendvps    ymm4, ymm4, ymm2, ymm10
	LONG $0x4a55e3c4; WORD $0x70ea             // vblendvps    ymm5, ymm5, ymm2, ymm7
	LONG $0x4a4de3c4; WORD $0x10ca             // vblendvps    ymm1, ymm6, ymm2, ymm1
	LONG $0x117cc1c4; WORD $0xb01c             // vmovups    yword [r8 + 4*rsi], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x117cc1c4; WORD $0xb06c; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm5
	LONG $0x117cc1c4; WORD $0xb04c; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm1
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1228
	WORD $0x394c; BYTE $0xda                   // cmp    rdx, r11
	JE   LBB4_1351
	JMP  LBB4_1230

LBB4_1235:
	WORD $0xc289                         // mov    edx, eax
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f1c5                     // vpcmpeqd    xmm1, xmm1, xmm1
	QUAD $0x00009c95587de2c4; BYTE $0x00 // vpbroadcastd    ymm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_1236:
	LONG $0x1c7efac5; BYTE $0x31               // vmovq    xmm3, qword [rcx + rsi]
	LONG $0x647efac5; WORD $0x0831             // vmovq    xmm4, qword [rcx + rsi + 8]
	LONG $0x6c7efac5; WORD $0x1031             // vmovq    xmm5, qword [rcx + rsi + 16]
	LONG $0x747efac5; WORD $0x1831             // vmovq    xmm6, qword [rcx + rsi + 24]
	LONG $0xd874e1c5                           // vpcmpeqb    xmm3, xmm3, xmm0
	LONG $0xd9efe1c5                           // vpxor    xmm3, xmm3, xmm1
	LONG $0x317de2c4; BYTE $0xdb               // vpmovzxbd    ymm3, xmm3
	LONG $0xdadbe5c5                           // vpand    ymm3, ymm3, ymm2
	LONG $0xe074d9c5                           // vpcmpeqb    xmm4, xmm4, xmm0
	LONG $0xe1efd9c5                           // vpxor    xmm4, xmm4, xmm1
	LONG $0x317de2c4; BYTE $0xe4               // vpmovzxbd    ymm4, xmm4
	LONG $0xe2dbddc5                           // vpand    ymm4, ymm4, ymm2
	LONG $0xe874d1c5                           // vpcmpeqb    xmm5, xmm5, xmm0
	LONG $0xe9efd1c5                           // vpxor    xmm5, xmm5, xmm1
	LONG $0x317de2c4; BYTE $0xed               // vpmovzxbd    ymm5, xmm5
	LONG $0xeadbd5c5                           // vpand    ymm5, ymm5, ymm2
	LONG $0xf074c9c5                           // vpcmpeqb    xmm6, xmm6, xmm0
	LONG $0xf1efc9c5                           // vpxor    xmm6, xmm6, xmm1
	LONG $0x317de2c4; BYTE $0xf6               // vpmovzxbd    ymm6, xmm6
	LONG $0xf2dbcdc5                           // vpand    ymm6, ymm6, ymm2
	LONG $0x7f7ec1c4; WORD $0xb01c             // vmovdqu    yword [r8 + 4*rsi], ymm3
	LONG $0x7f7ec1c4; WORD $0xb064; BYTE $0x20 // vmovdqu    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x7f7ec1c4; WORD $0xb06c; BYTE $0x40 // vmovdqu    yword [r8 + 4*rsi + 64], ymm5
	LONG $0x7f7ec1c4; WORD $0xb074; BYTE $0x60 // vmovdqu    yword [r8 + 4*rsi + 96], ymm6
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1236
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1238

LBB4_1242:
	WORD $0x8944; BYTE $0xda             // mov    edx, r11d
	WORD $0xe283; BYTE $0xe0             // and    edx, -32
	WORD $0xf631                         // xor    esi, esi
	LONG $0xc0eff9c5                     // vpxor    xmm0, xmm0, xmm0
	LONG $0xc976f5c5                     // vpcmpeqd    ymm1, ymm1, ymm1
	QUAD $0x00009c95587de2c4; BYTE $0x00 // vpbroadcastd    ymm2, dword 156[rbp] /* [rip + .LCPI4_8] */

LBB4_1243:
	LONG $0x1c6ffec5; BYTE $0xb1               // vmovdqu    ymm3, yword [rcx + 4*rsi]
	LONG $0x646ffec5; WORD $0x20b1             // vmovdqu    ymm4, yword [rcx + 4*rsi + 32]
	LONG $0x6c6ffec5; WORD $0x40b1             // vmovdqu    ymm5, yword [rcx + 4*rsi + 64]
	LONG $0x746ffec5; WORD $0x60b1             // vmovdqu    ymm6, yword [rcx + 4*rsi + 96]
	LONG $0xf876e5c5                           // vpcmpeqd    ymm7, ymm3, ymm0
	LONG $0xf9efc5c5                           // vpxor    ymm7, ymm7, ymm1
	LONG $0xc0765dc5                           // vpcmpeqd    ymm8, ymm4, ymm0
	LONG $0xc1ef3dc5                           // vpxor    ymm8, ymm8, ymm1
	LONG $0xc87655c5                           // vpcmpeqd    ymm9, ymm5, ymm0
	LONG $0xc9ef35c5                           // vpxor    ymm9, ymm9, ymm1
	LONG $0xd0764dc5                           // vpcmpeqd    ymm10, ymm6, ymm0
	LONG $0xd1ef2dc5                           // vpxor    ymm10, ymm10, ymm1
	LONG $0xdb66edc5                           // vpcmpgtd    ymm3, ymm2, ymm3
	LONG $0xe466edc5                           // vpcmpgtd    ymm4, ymm2, ymm4
	LONG $0xed66edc5                           // vpcmpgtd    ymm5, ymm2, ymm5
	LONG $0xf666edc5                           // vpcmpgtd    ymm6, ymm2, ymm6
	LONG $0x4a6de3c4; WORD $0x30df             // vblendvps    ymm3, ymm2, ymm7, ymm3
	LONG $0x4a6dc3c4; WORD $0x40e0             // vblendvps    ymm4, ymm2, ymm8, ymm4
	LONG $0x4a6dc3c4; WORD $0x50e9             // vblendvps    ymm5, ymm2, ymm9, ymm5
	LONG $0x4a6dc3c4; WORD $0x60f2             // vblendvps    ymm6, ymm2, ymm10, ymm6
	LONG $0x117cc1c4; WORD $0xb01c             // vmovups    yword [r8 + 4*rsi], ymm3
	LONG $0x117cc1c4; WORD $0xb064; BYTE $0x20 // vmovups    yword [r8 + 4*rsi + 32], ymm4
	LONG $0x117cc1c4; WORD $0xb06c; BYTE $0x40 // vmovups    yword [r8 + 4*rsi + 64], ymm5
	LONG $0x117cc1c4; WORD $0xb074; BYTE $0x60 // vmovups    yword [r8 + 4*rsi + 96], ymm6
	LONG $0x20c68348                           // add    rsi, 32
	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
	JNE  LBB4_1243
	WORD $0x394c; BYTE $0xda                   // cmp    rdx, r11
	JE   LBB4_1351
	JMP  LBB4_1245

LBB4_1250:
	LONG $0x457efac5; BYTE $0x20 // vmovq    xmm0, qword 32[rbp] /* [rip + .LCPI4_13] */

LBB4_1251:
	JLE  LBB4_1253
	LONG $0x457efac5; BYTE $0x08 // vmovq    xmm0, qword 8[rbp] /* [rip + .LCPI4_1] */

LBB4_1253:
	LONG $0xd679c1c4; WORD $0xd004 // vmovq    qword [r8 + 8*rdx], xmm0
	LONG $0x01ca8348               // or    rdx, 1

LBB4_1254:
	WORD $0x0148; BYTE $0xc6     // add    rsi, rax
	JE   LBB4_1351
	LONG $0x4510fbc5; BYTE $0x20 // vmovsd    xmm0, qword 32[rbp] /* [rip + .LCPI4_13] */
	LONG $0x4d10fbc5; BYTE $0x08 // vmovsd    xmm1, qword 8[rbp] /* [rip + .LCPI4_1] */
	JMP  LBB4_1257

LBB4_1256:
	LONG $0x117bc1c4; WORD $0xd05c; BYTE $0x08 // vmovsd    qword [r8 + 8*rdx + 8], xmm3
	LONG $0x02c28348                           // add    rdx, 2
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JE   LBB4_1351

LBB4_1257:
	LONG $0x00113c80 // cmp    byte [rcx + rdx], 0
	LONG $0xd028f9c5 // vmovapd    xmm2, xmm0
	JNE  LBB4_1258
	LONG $0xd257e9c5 // vxorpd    xmm2, xmm2, xmm2
	LONG $0xd928f9c5 // vmovapd    xmm3, xmm1
	JLE  LBB4_1262

LBB4_1259:
	LONG $0x117bc1c4; WORD $0xd01c // vmovsd    qword [r8 + 8*rdx], xmm3
	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
	LONG $0xd028f9c5               // vmovapd    xmm2, xmm0
	JNE  LBB4_1260

LBB4_1263:
	LONG $0xd257e9c5 // vxorpd    xmm2, xmm2, xmm2
	LONG $0xd928f9c5 // vmovapd    xmm3, xmm1
	JG   LBB4_1256
	JMP  LBB4_1264

LBB4_1258:
	LONG $0xd928f9c5 // vmovapd    xmm3, xmm1
	JG   LBB4_1259

LBB4_1262:
	LONG $0xda28f9c5               // vmovapd    xmm3, xmm2
	LONG $0x117bc1c4; WORD $0xd01c // vmovsd    qword [r8 + 8*rdx], xmm3
	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
	LONG $0xd028f9c5               // vmovapd    xmm2, xmm0
	JE   LBB4_1263

LBB4_1260:
	LONG $0xd928f9c5 // vmovapd    xmm3, xmm1
	JG   LBB4_1256

LBB4_1264:
	LONG $0xda28f9c5 // vmovapd    xmm3, xmm2
	JMP  LBB4_1256

LBB4_1265:
	QUAD $0x000000a8856ef9c5 // vmovd    xmm0, dword 168[rbp] /* [rip + .LCPI4_14] */

LBB4_1266:
	JLE  LBB4_1268
	QUAD $0x00000098856ef9c5 // vmovd    xmm0, dword 152[rbp] /* [rip + .LCPI4_5] */

LBB4_1268:
	LONG $0x7e79c1c4; WORD $0x9004 // vmovd    dword [r8 + 4*rdx], xmm0
	LONG $0x01ca8348               // or    rdx, 1

LBB4_1269:
	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
	JE   LBB4_1351
	QUAD $0x000000a88510fac5 // vmovss    xmm0, dword 168[rbp] /* [rip + .LCPI4_14] */
	QUAD $0x000000988d10fac5 // vmovss    xmm1, dword 152[rbp] /* [rip + .LCPI4_5] */
	JMP  LBB4_1272

LBB4_1271:
	LONG $0x117ac1c4; WORD $0x905c; BYTE $0x04 // vmovss    dword [r8 + 4*rdx + 4], xmm3
	LONG $0x02c28348                           // add    rdx, 2
	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
	JE   LBB4_1351

LBB4_1272:
	LONG $0x00113c80 // cmp    byte [rcx + rdx], 0
	LONG $0xd028f8c5 // vmovaps    xmm2, xmm0
	JNE  LBB4_1273
	LONG $0xd257e8c5 // vxorps    xmm2, xmm2, xmm2
	LONG $0xd928f8c5 // vmovaps    xmm3, xmm1
	JLE  LBB4_1277

LBB4_1274:
	LONG $0x117ac1c4; WORD $0x901c // vmovss    dword [r8 + 4*rdx], xmm3
	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
	LONG $0xd028f8c5               // vmovaps    xmm2, xmm0
	JNE  LBB4_1275

LBB4_1278:
	LONG $0xd257e8c5 // vxorps    xmm2, xmm2, xmm2
	LONG $0xd928f8c5 // vmovaps    xmm3, xmm1
	JG   LBB4_1271
	JMP  LBB4_1279

LBB4_1273:
	LONG $0xd928f8c5 // vmovaps    xmm3, xmm1
	JG   LBB4_1274

LBB4_1277:
	LONG $0xda28f8c5               // vmovaps    xmm3, xmm2
	LONG $0x117ac1c4; WORD $0x901c // vmovss    dword [r8 + 4*rdx], xmm3
	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
	LONG $0xd028f8c5               // vmovaps    xmm2, xmm0
	JE   LBB4_1278

LBB4_1275:
	LONG $0xd928f8c5 // vmovaps    xmm3, xmm1
	JG   LBB4_1271

LBB4_1279:
	LONG $0xda28f8c5 // vmovaps    xmm3, xmm2
	JMP  LBB4_1271

LBB4_1282:
	WORD $0xff31 // xor    edi, edi

LBB4_1283:
	LONG $0x01c1f641               // test    r9b, 1
	JE   LBB4_1285
	LONG $0x0410fdc5; BYTE $0xf9   // vmovupd    ymm0, yword [rcx + 8*rdi]
	LONG $0xc957f1c5               // vxorpd    xmm1, xmm1, xmm1
	LONG $0x197de2c4; WORD $0x0055 // vbroadcastsd    ymm2, qword 0[rbp] /* [rip + .LCPI4_0] */
	LONG $0x197de2c4; WORD $0x085d // vbroadcastsd    ymm3, qword 8[rbp] /* [rip + .LCPI4_1] */
	LONG $0xd254fdc5               // vandpd    ymm2, ymm0, ymm2
	LONG $0xda56e5c5               // vorpd    ymm3, ymm3, ymm2
	LONG $0x197de3c4; WORD $0x01dc // vextractf128    xmm4, ymm3, 1
	LONG $0x5510fbc5; BYTE $0x10   // vmovsd    xmm2, qword 16[rbp] /* [rip + .LCPI4_6] */
	LONG $0xea5cdbc5               // vsubsd    xmm5, xmm4, xmm2
	LONG $0x2cfbe1c4; BYTE $0xc5   // vcvttsd2si    rax, xmm5
	WORD $0x314c; BYTE $0xd8       // xor    rax, r11
	LONG $0x2cfbe1c4; BYTE $0xd4   // vcvttsd2si    rdx, xmm4
	LONG $0xe22ef9c5               // vucomisd    xmm4, xmm2
	LONG $0xd0430f48               // cmovae    rdx, rax
	LONG $0x6ef9e1c4; BYTE $0xea   // vmovq    xmm5, rdx
	LONG $0x0479e3c4; WORD $0x4ee4 // vpermilps    xmm4, xmm4, 78
	LONG $0xf25cdbc5               // vsubsd    xmm6, xmm4, xmm2
	LONG $0x2cfbe1c4; BYTE $0xc6   // vcvttsd2si    rax, xmm6
	WORD $0x314c; BYTE $0xd8       // xor    rax, r11
	LONG $0x2cfbe1c4; BYTE $0xd4   // vcvttsd2si    rdx, xmm4
	LONG $0xe22ef9c5               // vucomisd    xmm4, xmm2
	LONG $0xd0430f48               // cmovae    rdx, rax
	LONG $0x6ef9e1c4; BYTE $0xe2   // vmovq    xmm4, rdx
	LONG $0xe46cd1c5               // vpunpcklqdq    xmm4, xmm5, xmm4
	LONG $0xea5ce3c5               // vsubsd    xmm5, xmm3, xmm2
	LONG $0x2cfbe1c4; BYTE $0xc5   // vcvttsd2si    rax, xmm5
	WORD $0x314c; BYTE $0xd8       // xor    rax, r11
	LONG $0x2cfbe1c4; BYTE $0xd3   // vcvttsd2si    rdx, xmm3
	LONG $0xda2ef9c5               // vucomisd    xmm3, xmm2
	LONG $0xd0430f48               // cmovae    rdx, rax
	LONG $0x6ef9e1c4; BYTE $0xea   // vmovq    xmm5, rdx
	LONG $0x0479e3c4; WORD $0x4edb // vpermilps    xmm3, xmm3, 78
	LONG $0xf25ce3c5               // vsubsd    xmm6, xmm3, xmm2
	LONG $0x2cfbe1c4; BYTE $0xc6   // vcvttsd2si    rax, xmm6
	WORD $0x314c; BYTE $0xd8       // xor    rax, r11
	LONG $0x2cfbe1c4; BYTE $0xd3   // vcvttsd2si    rdx, xmm3
	LONG $0xda2ef9c5               // vucomisd    xmm3, xmm2
	LONG $0xd0430f48               // cmovae    rdx, rax
	LONG $0x6ef9e1c4; BYTE $0xd2   // vmovq    xmm2, rdx
	LONG $0xd26cd1c5               // vpunpcklqdq    xmm2, xmm5, xmm2
	LONG $0x386de3c4; WORD $0x01d4 // vinserti128    ymm2, ymm2, xmm4, 1
	LONG $0xc1c2fdc5; BYTE $0x04   // vcmpneqpd    ymm0, ymm0, ymm1
	LONG $0xc254fdc5               // vandpd    ymm0, ymm0, ymm2
	LONG $0x117dc1c4; WORD $0xf804 // vmovupd    yword [r8 + 8*rdi], ymm0

LBB4_1285:
	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
	JE   LBB4_1351

LBB4_1286:
	LONG $0x4512fbc5; BYTE $0x08 // vmovddup    xmm0, qword 8[rbp] /* [rip + .LCPI4_1] */
	LONG $0x4d28f9c5; BYTE $0x30 // vmovapd    xmm1, oword 48[rbp] /* [rip + .LCPI4_2] */
	LONG $0x5510fbc5; BYTE $0x10 // vmovsd    xmm2, qword 16[rbp] /* [rip + .LCPI4_6] */
	WORD $0xc031                 // xor    eax, eax
	LONG $0xdb57e1c5             // vxorpd    xmm3, xmm3, xmm3

LBB4_1287:
	LONG $0x2410fbc5; BYTE $0xf1 // vmovsd    xmm4, qword [rcx + 8*rsi]
	LONG $0xe954d9c5             // vandpd    xmm5, xmm4, xmm1
	LONG $0xed56f9c5             // vorpd    xmm5, xmm0, xmm5
	LONG $0xf25cd3c5             // vsubsd    xmm6, xmm5, xmm2
	LONG $0x2cfbe1c4; BYTE $0xd6 // vcvttsd2si    rdx, xmm6
	WORD $0x314c; BYTE $0xda     // xor    rdx, r11
	LONG $0x2cfbe1c4; BYTE $0xfd // vcvttsd2si    rdi, xmm5
	LONG $0xea2ef9c5             // vucomisd    xmm5, xmm2
	LONG $0xfa430f48             // cmovae    rdi, rdx
	LONG $0xdc2ef9c5             // vucomisd    xmm3, xmm4
	LONG $0xf8440f48             // cmove    rdi, rax
	LONG $0xf03c8949             // mov    qword [r8 + 8*rsi], rdi
	LONG $0x01c68348             // add    rsi, 1
	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
	JNE  LBB4_1287
	JMP  LBB4_1351

LBB4_1280:
	LONG $0xc850f8c5                       // vmovmskps    ecx, xmm0
	WORD $0xe183; BYTE $0x01               // and    ecx, 1
	WORD $0xd9f7                           // neg    ecx
	WORD $0xc983; BYTE $0x01               // or    ecx, 1
	LONG $0xc12adac5                       // vcvtsi2ss    xmm0, xmm4, ecx
	QUAD $0x000000a08d10fac5               // vmovss    xmm1, dword 160[rbp] /* [rip + .LCPI4_9] */
	LONG $0xd15cfac5                       // vsubss    xmm2, xmm0, xmm1
	LONG $0x2cfae1c4; BYTE $0xca           // vcvttss2si    rcx, xmm2
	QUAD $0x000000000000ba48; WORD $0x8000 // mov    rdx, -9223372036854775808
	WORD $0x3148; BYTE $0xca               // xor    rdx, rcx
	LONG $0x2cfae1c4; BYTE $0xc8           // vcvttss2si    rcx, xmm0
	LONG $0xc12ef8c5                       // vucomiss    xmm0, xmm1
	LONG $0xca430f48                       // cmovae    rcx, rdx

LBB4_1281:
	LONG $0xc00c8949 // mov    qword [r8 + 8*rax], rcx
	JMP  LBB4_1351

LBB4_1288:
	WORD $0xff31 // xor    edi, edi

LBB4_1289:
	LONG $0x01c1f641                     // test    r9b, 1
	JE   LBB4_1291
	LONG $0x0410fdc5; BYTE $0xf9         // vmovupd    ymm0, yword [rcx + 8*rdi]
	LONG $0xc957f1c5                     // vxorpd    xmm1, xmm1, xmm1
	LONG $0xc9c2fdc5; BYTE $0x00         // vcmpeqpd    ymm1, ymm0, ymm1
	LONG $0x197de3c4; WORD $0x01ca       // vextractf128    xmm2, ymm1, 1
	LONG $0xca6bf1c5                     // vpackssdw    xmm1, xmm1, xmm2
	LONG $0x197de2c4; WORD $0x0055       // vbroadcastsd    ymm2, qword 0[rbp] /* [rip + .LCPI4_0] */
	LONG $0xc254fdc5                     // vandpd    ymm0, ymm0, ymm2
	LONG $0x197de2c4; WORD $0x0855       // vbroadcastsd    ymm2, qword 8[rbp] /* [rip + .LCPI4_1] */
	LONG $0xc056edc5                     // vorpd    ymm0, ymm2, ymm0
	LONG $0x197de2c4; WORD $0x1855       // vbroadcastsd    ymm2, qword 24[rbp] /* [rip + .LCPI4_7] */
	LONG $0xdac2fdc5; BYTE $0x01         // vcmpltpd    ymm3, ymm0, ymm2
	LONG $0x197de3c4; WORD $0x01dc       // vextractf128    xmm4, ymm3, 1
	LONG $0xd25cfdc5                     // vsubpd    ymm2, ymm0, ymm2
	LONG $0xd2e6fdc5                     // vcvttpd2dq    xmm2, ymm2
	QUAD $0x000094ad1879e2c4; BYTE $0x00 // vbroadcastss    xmm5, dword 148[rbp] /* [rip + .LCPI4_4] */
	LONG $0xdc6be1c5                     // vpackssdw    xmm3, xmm3, xmm4
	LONG $0xd557e9c5                     // vxorpd    xmm2, xmm2, xmm5
	LONG $0xc0e6fdc5                     // vcvttpd2dq    xmm0, ymm0
	LONG $0x4a69e3c4; WORD $0x30c0       // vblendvps    xmm0, xmm2, xmm0, xmm3
	LONG $0xc0dff1c5                     // vpandn    xmm0, xmm1, xmm0
	LONG $0x7f7ac1c4; WORD $0xb804       // vmovdqu    oword [r8 + 4*rdi], xmm0

LBB4_1291:
	WORD $0x3948; BYTE $0xc6 // cmp    rsi, rax
	JE   LBB4_1351

LBB4_1292:
	LONG $0xc057f9c5             // vxorpd    xmm0, xmm0, xmm0
	LONG $0x4d28f9c5; BYTE $0x30 // vmovapd    xmm1, oword 48[rbp] /* [rip + .LCPI4_2] */
	LONG $0x5512fbc5; BYTE $0x08 // vmovddup    xmm2, qword 8[rbp] /* [rip + .LCPI4_1] */

LBB4_1293:
	LONG $0x1c10fbc5; BYTE $0xf1 // vmovsd    xmm3, qword [rcx + 8*rsi]
	LONG $0xc32ef9c5             // vucomisd    xmm0, xmm3
	LONG $0xd954e1c5             // vandpd    xmm3, xmm3, xmm1
	LONG $0xdb56e9c5             // vorpd    xmm3, xmm2, xmm3
	LONG $0x2cfbe1c4; BYTE $0xd3 // vcvttsd2si    rdx, xmm3
	LONG $0xd2440f41             // cmove    edx, r10d
	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
	LONG $0x01c68348             // add    rsi, 1
	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
	JNE  LBB4_1293
	JMP  LBB4_1351

LBB4_1294:
	WORD $0xf631 // xor    esi, esi

LBB4_1295:
	LONG $0x01c1f641                     // test    r9b, 1
	JE   LBB4_1297
	LONG $0x046ffec5; BYTE $0xb1         // vmovdqu    ymm0, yword [rcx + 4*rsi]
	LONG $0xe072f5c5; BYTE $0x1f         // vpsrad    ymm1, ymm0, 31
	QUAD $0x00009c95587de2c4; BYTE $0x00 // vpbroadcastd    ymm2, dword 156[rbp] /* [rip + .LCPI4_8] */
	LONG $0xcaebf5c5                     // vpor    ymm1, ymm1, ymm2
	LONG $0xc95bfcc5                     // vcvtdq2ps    ymm1, ymm1
	QUAD $0x0000a495187de2c4; BYTE $0x00 // vbroadcastss    ymm2, dword 164[rbp] /* [rip + .LCPI4_10] */
	LONG $0xdac2f4c5; BYTE $0x01         // vcmpltps    ymm3, ymm1, ymm2
	LONG $0xd25cf4c5                     // vsubps    ymm2, ymm1, ymm2
	LONG $0xd25bfec5                     // vcvttps2dq    ymm2, ymm2
	QUAD $0x000094a5187de2c4; BYTE $0x00 // vbroadcastss    ymm4, dword 148[rbp] /* [rip + .LCPI4_4] */
	LONG $0xd457ecc5                     // vxorps    ymm2, ymm2, ymm4
	LONG $0xc95bfec5                     // vcvttps2dq    ymm1, ymm1
	LONG $0x4a6de3c4; WORD $0x30c9       // vblendvps    ymm1, ymm2, ymm1, ymm3
	LONG $0xd257e8c5                     // vxorps    xmm2, xmm2, xmm2
	LONG $0xc2c2fcc5; BYTE $0x04         // vcmpneqps    ymm0, ymm0, ymm2
	LONG $0xc154fcc5                     // vandps    ymm0, ymm0, ymm1
	LONG $0x117cc1c4; WORD $0xb004       // vmovups    yword [r8 + 4*rsi], ymm0

LBB4_1297:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB4_1351

LBB4_1298:
	LONG $0xc0eff9c5 // vpxor    xmm0, xmm0, xmm0
	JMP  LBB4_1300

LBB4_1299:
	LONG $0x90348941         // mov    dword [r8 + 4*rdx], esi
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	JE   LBB4_1351

LBB4_1300:
	LONG $0x0c10fac5; BYTE $0x91 // vmovss    xmm1, dword [rcx + 4*rdx]
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc12ef8c5             // vucomiss    xmm0, xmm1
	JE   LBB4_1299
	LONG $0xf150f8c5             // vmovmskps    esi, xmm1
	WORD $0xe683; BYTE $0x01     // and    esi, 1
	WORD $0xdef7                 // neg    esi
	WORD $0xce83; BYTE $0x01     // or    esi, 1
	LONG $0xce2abac5             // vcvtsi2ss    xmm1, xmm8, esi
	LONG $0x2cfae1c4; BYTE $0xf1 // vcvttss2si    rsi, xmm1
	JMP  LBB4_1299

LBB4_1302:
	WORD $0xf631 // xor    esi, esi

LBB4_1303:
	LONG $0x01c1f641               // test    r9b, 1
	JE   LBB4_1305
	LONG $0x0410fdc5; BYTE $0xf1   // vmovupd    ymm0, yword [rcx + 8*rsi]
	LONG $0xc957f1c5               // vxorpd    xmm1, xmm1, xmm1
	LONG $0x197de2c4; WORD $0x0055 // vbroadcastsd    ymm2, qword 0[rbp] /* [rip + .LCPI4_0] */
	LONG $0xd254fdc5               // vandpd    ymm2, ymm0, ymm2
	LONG $0x197de2c4; WORD $0x085d // vbroadcastsd    ymm3, qword 8[rbp] /* [rip + .LCPI4_1] */
	LONG $0xd256e5c5               // vorpd    ymm2, ymm3, ymm2
	LONG $0x197de3c4; WORD $0x01d3 // vextractf128    xmm3, ymm2, 1
	LONG $0x2cfbe1c4; BYTE $0xfb   // vcvttsd2si    rdi, xmm3
	LONG $0x6ef9e1c4; BYTE $0xe7   // vmovq    xmm4, rdi
	LONG $0x0479e3c4; WORD $0x4edb // vpermilps    xmm3, xmm3, 78
	LONG $0x2cfbe1c4; BYTE $0xfb   // vcvttsd2si    rdi, xmm3
	LONG $0x6ef9e1c4; BYTE $0xdf   // vmovq    xmm3, rdi
	LONG $0xdb6cd9c5               // vpunpcklqdq    xmm3, xmm4, xmm3
	LONG $0x2cfbe1c4; BYTE $0xfa   // vcvttsd2si    rdi, xmm2
	LONG $0x6ef9e1c4; BYTE $0xe7   // vmovq    xmm4, rdi
	LONG $0x0479e3c4; WORD $0x4ed2 // vpermilps    xmm2, xmm2, 78
	LONG $0x2cfbe1c4; BYTE $0xfa   // vcvttsd2si    rdi, xmm2
	LONG $0x6ef9e1c4; BYTE $0xd7   // vmovq    xmm2, rdi
	LONG $0xd26cd9c5               // vpunpcklqdq    xmm2, xmm4, xmm2
	LONG $0x386de3c4; WORD $0x01d3 // vinserti128    ymm2, ymm2, xmm3, 1
	LONG $0xc1c2fdc5; BYTE $0x04   // vcmpneqpd    ymm0, ymm0, ymm1
	LONG $0xc254fdc5               // vandpd    ymm0, ymm0, ymm2
	LONG $0x117dc1c4; WORD $0xf004 // vmovupd    yword [r8 + 8*rsi], ymm0

LBB4_1305:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB4_1351

LBB4_1306:
	WORD $0xf631                 // xor    esi, esi
	LONG $0xc057f9c5             // vxorpd    xmm0, xmm0, xmm0
	LONG $0x4d28f9c5; BYTE $0x30 // vmovapd    xmm1, oword 48[rbp] /* [rip + .LCPI4_2] */
	LONG $0x5512fbc5; BYTE $0x08 // vmovddup    xmm2, qword 8[rbp] /* [rip + .LCPI4_1] */

LBB4_1307:
	LONG $0x1c10fbc5; BYTE $0xd1 // vmovsd    xmm3, qword [rcx + 8*rdx]
	LONG $0xc32ef9c5             // vucomisd    xmm0, xmm3
	LONG $0xd954e1c5             // vandpd    xmm3, xmm3, xmm1
	LONG $0xdb56e9c5             // vorpd    xmm3, xmm2, xmm3
	LONG $0x2cfbe1c4; BYTE $0xfb // vcvttsd2si    rdi, xmm3
	LONG $0xfe440f48             // cmove    rdi, rsi
	LONG $0xd03c8949             // mov    qword [r8 + 8*rdx], rdi
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
	JNE  LBB4_1307
	JMP  LBB4_1351

LBB4_1308:
	WORD $0xf631 // xor    esi, esi

LBB4_1309:
	LONG $0x01c1f641                     // test    r9b, 1
	JE   LBB4_1311
	LONG $0x0410f8c5; BYTE $0xb1         // vmovups    xmm0, oword [rcx + 4*rsi]
	LONG $0xc9eff1c5                     // vpxor    xmm1, xmm1, xmm1
	LONG $0xc9c2f8c5; BYTE $0x00         // vcmpeqps    xmm1, xmm0, xmm1
	LONG $0x257de2c4; BYTE $0xc9         // vpmovsxdq    ymm1, xmm1
	LONG $0xe072f9c5; BYTE $0x1f         // vpsrad    xmm0, xmm0, 31
	QUAD $0x00009c955879e2c4; BYTE $0x00 // vpbroadcastd    xmm2, dword 156[rbp] /* [rip + .LCPI4_8] */
	LONG $0xc2ebf9c5                     // vpor    xmm0, xmm0, xmm2
	LONG $0xc05bf8c5                     // vcvtdq2ps    xmm0, xmm0
	LONG $0x0479e3c4; WORD $0xe7d0       // vpermilps    xmm2, xmm0, 231
	LONG $0x2cfae1c4; BYTE $0xc2         // vcvttss2si    rax, xmm2
	LONG $0x6ef9e1c4; BYTE $0xd0         // vmovq    xmm2, rax
	LONG $0x0579e3c4; WORD $0x01d8       // vpermilpd    xmm3, xmm0, 1
	LONG $0x2cfae1c4; BYTE $0xc3         // vcvttss2si    rax, xmm3
	LONG $0x6ef9e1c4; BYTE $0xd8         // vmovq    xmm3, rax
	LONG $0xd26ce1c5                     // vpunpcklqdq    xmm2, xmm3, xmm2
	LONG $0x2cfae1c4; BYTE $0xc0         // vcvttss2si    rax, xmm0
	LONG $0x6ef9e1c4; BYTE $0xd8         // vmovq    xmm3, rax
	LONG $0xc016fac5                     // vmovshdup    xmm0, xmm0
	LONG $0x2cfae1c4; BYTE $0xc0         // vcvttss2si    rax, xmm0
	LONG $0x6ef9e1c4; BYTE $0xc0         // vmovq    xmm0, rax
	LONG $0xc06ce1c5                     // vpunpcklqdq    xmm0, xmm3, xmm0
	LONG $0x387de3c4; WORD $0x01c2       // vinserti128    ymm0, ymm0, xmm2, 1
	LONG $0xc0dff5c5                     // vpandn    ymm0, ymm1, ymm0
	LONG $0x7f7ec1c4; WORD $0xf004       // vmovdqu    yword [r8 + 8*rsi], ymm0

LBB4_1311:
	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
	JE   LBB4_1351

LBB4_1312:
	LONG $0xc057f8c5 // vxorps    xmm0, xmm0, xmm0
	JMP  LBB4_1315

LBB4_1313:
	LONG $0xc150f8c5             // vmovmskps    eax, xmm1
	WORD $0xe083; BYTE $0x01     // and    eax, 1
	WORD $0xd8f7                 // neg    eax
	WORD $0xc883; BYTE $0x01     // or    eax, 1
	LONG $0xc82acac5             // vcvtsi2ss    xmm1, xmm6, eax
	LONG $0x2cfae1c4; BYTE $0xf1 // vcvttss2si    rsi, xmm1
	LONG $0xd0348949             // mov    qword [r8 + 8*rdx], rsi
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
	JE   LBB4_1351

LBB4_1315:
	LONG $0x0c10fac5; BYTE $0x91 // vmovss    xmm1, dword [rcx + 4*rdx]
	LONG $0xc12ef8c5             // vucomiss    xmm0, xmm1
	JNE  LBB4_1313
	WORD $0xf631                 // xor    esi, esi
	LONG $0xd0348949             // mov    qword [r8 + 8*rdx], rsi
	LONG $0x01c28348             // add    rdx, 1
	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
	JNE  LBB4_1315
	JMP  LBB4_1351

LBB4_1317:
	WORD $0xff31 // xor    edi, edi

LBB4_1318:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB4_1320
	LONG $0xc0eff9c5                           // vpxor    xmm0, xmm0, xmm0
	LONG $0x0c75fdc5; BYTE $0x79               // vpcmpeqw    ymm1, ymm0, yword [rcx + 2*rdi]
	QUAD $0x000000c0956ffdc5                   // vmovdqa    ymm2, yword 192[rbp] /* [rip + .LCPI4_18] */
	LONG $0x4475fdc5; WORD $0x2079             // vpcmpeqw    ymm0, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0xcadff5c5                           // vpandn    ymm1, ymm1, ymm2
	LONG $0xc2dffdc5                           // vpandn    ymm0, ymm0, ymm2
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB4_1320:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1321

LBB4_1325:
	WORD $0xff31 // xor    edi, edi

LBB4_1326:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB4_1328
	LONG $0xc0eff9c5                           // vpxor    xmm0, xmm0, xmm0
	LONG $0x0c75fdc5; BYTE $0x79               // vpcmpeqw    ymm1, ymm0, yword [rcx + 2*rdi]
	QUAD $0x000000c0956ffdc5                   // vmovdqa    ymm2, yword 192[rbp] /* [rip + .LCPI4_18] */
	LONG $0x4475fdc5; WORD $0x2079             // vpcmpeqw    ymm0, ymm0, yword [rcx + 2*rdi + 32]
	LONG $0xcadff5c5                           // vpandn    ymm1, ymm1, ymm2
	LONG $0xc2dffdc5                           // vpandn    ymm0, ymm0, ymm2
	LONG $0x7f7ec1c4; WORD $0x780c             // vmovdqu    yword [r8 + 2*rdi], ymm1
	LONG $0x7f7ec1c4; WORD $0x7844; BYTE $0x20 // vmovdqu    yword [r8 + 2*rdi + 32], ymm0

LBB4_1328:
	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
	JE   LBB4_1351
	JMP  LBB4_1329

LBB4_1333:
	WORD $0xf631 // xor    esi, esi

LBB4_1334:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB4_1336
	LONG $0x046ffec5; BYTE $0x71               // vmovdqu    ymm0, yword [rcx + 2*rsi]
	LONG $0x4c6ffec5; WORD $0x2071             // vmovdqu    ymm1, yword [rcx + 2*rsi + 32]
	LONG $0xd2efe9c5                           // vpxor    xmm2, xmm2, xmm2
	LONG $0xda75fdc5                           // vpcmpeqw    ymm3, ymm0, ymm2
	LONG $0xe476ddc5                           // vpcmpeqd    ymm4, ymm4, ymm4
	LONG $0xdcefe5c5                           // vpxor    ymm3, ymm3, ymm4
	LONG $0xd275f5c5                           // vpcmpeqw    ymm2, ymm1, ymm2
	LONG $0xd4efedc5                           // vpxor    ymm2, ymm2, ymm4
	QUAD $0x000000c0a56ffdc5                   // vmovdqa    ymm4, yword 192[rbp] /* [rip + .LCPI4_18] */
	LONG $0xc065ddc5                           // vpcmpgtw    ymm0, ymm4, ymm0
	LONG $0xc965ddc5                           // vpcmpgtw    ymm1, ymm4, ymm1
	LONG $0x4c5de3c4; WORD $0x00c3             // vpblendvb    ymm0, ymm4, ymm3, ymm0
	LONG $0x4c5de3c4; WORD $0x10ca             // vpblendvb    ymm1, ymm4, ymm2, ymm1
	LONG $0x7f7ec1c4; WORD $0x7004             // vmovdqu    yword [r8 + 2*rsi], ymm0
	LONG $0x7f7ec1c4; WORD $0x704c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rsi + 32], ymm1

LBB4_1336:
	WORD $0x394c; BYTE $0xda // cmp    rdx, r11
	JE   LBB4_1351
	JMP  LBB4_1337

LBB4_1342:
	WORD $0xf631 // xor    esi, esi

LBB4_1343:
	LONG $0x01c1f641                           // test    r9b, 1
	JE   LBB4_1345
	LONG $0x046ffec5; BYTE $0x71               // vmovdqu    ymm0, yword [rcx + 2*rsi]
	LONG $0x4c6ffec5; WORD $0x2071             // vmovdqu    ymm1, yword [rcx + 2*rsi + 32]
	LONG $0xd2efe9c5                           // vpxor    xmm2, xmm2, xmm2
	LONG $0xda75fdc5                           // vpcmpeqw    ymm3, ymm0, ymm2
	LONG $0xe476ddc5                           // vpcmpeqd    ymm4, ymm4, ymm4
	LONG $0xdcefe5c5                           // vpxor    ymm3, ymm3, ymm4
	LONG $0xd275f5c5                           // vpcmpeqw    ymm2, ymm1, ymm2
	LONG $0xd4efedc5                           // vpxor    ymm2, ymm2, ymm4
	QUAD $0x000000c0a56ffdc5                   // vmovdqa    ymm4, yword 192[rbp] /* [rip + .LCPI4_18] */
	LONG $0xc065ddc5                           // vpcmpgtw    ymm0, ymm4, ymm0
	LONG $0xc965ddc5                           // vpcmpgtw    ymm1, ymm4, ymm1
	LONG $0x4c5de3c4; WORD $0x00c3             // vpblendvb    ymm0, ymm4, ymm3, ymm0
	LONG $0x4c5de3c4; WORD $0x10ca             // vpblendvb    ymm1, ymm4, ymm2, ymm1
	LONG $0x7f7ec1c4; WORD $0x7004             // vmovdqu    yword [r8 + 2*rsi], ymm0
	LONG $0x7f7ec1c4; WORD $0x704c; BYTE $0x20 // vmovdqu    yword [r8 + 2*rsi + 32], ymm1

LBB4_1345:
	WORD $0x394c; BYTE $0xda // cmp    rdx, r11
	JNE  LBB4_1346

LBB4_1351:
	VZEROUPPER
	RET