//go:build go1.18 && !noasm && !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT

TEXT ·_multiply_constant_int32_int32_avx2(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB0_16
	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
	WORD $0xfa83; BYTE $0x1f // cmp    edx, 31
	JBE  LBB0_2
	LONG $0x8f048d4a         // lea    rax, [rdi + 4*r9]
	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
	JBE  LBB0_9
	LONG $0x8e048d4a         // lea    rax, [rsi + 4*r9]
	WORD $0x3948; BYTE $0xf8 // cmp    rax, rdi
	JBE  LBB0_9

LBB0_2:
	WORD $0x3145; BYTE $0xdb // xor    r11d, r11d

LBB0_3:
	WORD $0x894d; BYTE $0xd8 // mov    r8, r11
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_5

LBB0_4:
	LONG $0x9f148b42         // mov    edx, dword [rdi + 4*r11]
	WORD $0xaf0f; BYTE $0xd1 // imul    edx, ecx
	LONG $0x9e148942         // mov    dword [rsi + 4*r11], edx
	LONG $0x01c38349         // add    r11, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB0_4

LBB0_5:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB0_16

LBB0_6:
	LONG $0x9f048b42             // mov    eax, dword [rdi + 4*r11]
	WORD $0xaf0f; BYTE $0xc1     // imul    eax, ecx
	LONG $0x9e048942             // mov    dword [rsi + 4*r11], eax
	LONG $0x9f448b42; BYTE $0x04 // mov    eax, dword [rdi + 4*r11 + 4]
	WORD $0xaf0f; BYTE $0xc1     // imul    eax, ecx
	LONG $0x9e448942; BYTE $0x04 // mov    dword [rsi + 4*r11 + 4], eax
	LONG $0x9f448b42; BYTE $0x08 // mov    eax, dword [rdi + 4*r11 + 8]
	WORD $0xaf0f; BYTE $0xc1     // imul    eax, ecx
	LONG $0x9e448942; BYTE $0x08 // mov    dword [rsi + 4*r11 + 8], eax
	LONG $0x9f448b42; BYTE $0x0c // mov    eax, dword [rdi + 4*r11 + 12]
	WORD $0xaf0f; BYTE $0xc1     // imul    eax, ecx
	LONG $0x9e448942; BYTE $0x0c // mov    dword [rsi + 4*r11 + 12], eax
	LONG $0x04c38349             // add    r11, 4
	WORD $0x394d; BYTE $0xd9     // cmp    r9, r11
	JNE  LBB0_6
	JMP  LBB0_16

LBB0_9:
	WORD $0x8945; BYTE $0xcb     // mov    r11d, r9d
	LONG $0xe0e38341             // and    r11d, -32
	LONG $0xc16ef9c5             // vmovd    xmm0, ecx
	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
	LONG $0xe0438d49             // lea    rax, [r11 - 32]
	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
	LONG $0x05e8c149             // shr    r8, 5
	LONG $0x01c08349             // add    r8, 1
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	JE   LBB0_10
	WORD $0x894d; BYTE $0xc2     // mov    r10, r8
	LONG $0xfee28349             // and    r10, -2
	WORD $0xf749; BYTE $0xda     // neg    r10
	WORD $0xc031                 // xor    eax, eax

LBB0_12:
	LONG $0x407de2c4; WORD $0x870c             // vpmulld    ymm1, ymm0, yword [rdi + 4*rax]
	LONG $0x407de2c4; WORD $0x8754; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rdi + 4*rax + 32]
	LONG $0x407de2c4; WORD $0x875c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rdi + 4*rax + 64]
	LONG $0x407de2c4; WORD $0x8764; BYTE $0x60 // vpmulld    ymm4, ymm0, yword [rdi + 4*rax + 96]
	LONG $0x0c7ffec5; BYTE $0x86               // vmovdqu    yword [rsi + 4*rax], ymm1
	LONG $0x547ffec5; WORD $0x2086             // vmovdqu    yword [rsi + 4*rax + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x4086             // vmovdqu    yword [rsi + 4*rax + 64], ymm3
	LONG $0x647ffec5; WORD $0x6086             // vmovdqu    yword [rsi + 4*rax + 96], ymm4
	QUAD $0x0080878c407de2c4; WORD $0x0000     // vpmulld    ymm1, ymm0, yword [rdi + 4*rax + 128]
	QUAD $0x00a08794407de2c4; WORD $0x0000     // vpmulld    ymm2, ymm0, yword [rdi + 4*rax + 160]
	QUAD $0x00c0879c407de2c4; WORD $0x0000     // vpmulld    ymm3, ymm0, yword [rdi + 4*rax + 192]
	QUAD $0x00e087a4407de2c4; WORD $0x0000     // vpmulld    ymm4, ymm0, yword [rdi + 4*rax + 224]
	QUAD $0x000080868c7ffec5; BYTE $0x00       // vmovdqu    yword [rsi + 4*rax + 128], ymm1
	QUAD $0x0000a086947ffec5; BYTE $0x00       // vmovdqu    yword [rsi + 4*rax + 160], ymm2
	QUAD $0x0000c0869c7ffec5; BYTE $0x00       // vmovdqu    yword [rsi + 4*rax + 192], ymm3
	QUAD $0x0000e086a47ffec5; BYTE $0x00       // vmovdqu    yword [rsi + 4*rax + 224], ymm4
	LONG $0x40c08348                           // add    rax, 64
	LONG $0x02c28349                           // add    r10, 2
	JNE  LBB0_12
	LONG $0x01c0f641                           // test    r8b, 1
	JE   LBB0_15

LBB0_14:
	LONG $0x407de2c4; WORD $0x870c             // vpmulld    ymm1, ymm0, yword [rdi + 4*rax]
	LONG $0x407de2c4; WORD $0x8754; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rdi + 4*rax + 32]
	LONG $0x407de2c4; WORD $0x875c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rdi + 4*rax + 64]
	LONG $0x407de2c4; WORD $0x8744; BYTE $0x60 // vpmulld    ymm0, ymm0, yword [rdi + 4*rax + 96]
	LONG $0x0c7ffec5; BYTE $0x86               // vmovdqu    yword [rsi + 4*rax], ymm1
	LONG $0x547ffec5; WORD $0x2086             // vmovdqu    yword [rsi + 4*rax + 32], ymm2
	LONG $0x5c7ffec5; WORD $0x4086             // vmovdqu    yword [rsi + 4*rax + 64], ymm3
	LONG $0x447ffec5; WORD $0x6086             // vmovdqu    yword [rsi + 4*rax + 96], ymm0

LBB0_15:
	WORD $0x394d; BYTE $0xcb // cmp    r11, r9
	JNE  LBB0_3

LBB0_16:
	VZEROUPPER
	RET

LBB0_10:
	WORD $0xc031     // xor    eax, eax
	LONG $0x01c0f641 // test    r8b, 1
	JNE  LBB0_14
	JMP  LBB0_15

TEXT ·_divide_constant_int32_int32_avx2(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB1_8
	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
	WORD $0xfa83; BYTE $0x01 // cmp    edx, 1
	JNE  LBB1_9
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d

LBB1_3:
	LONG $0x01c1f641         // test    r9b, 1
	JE   LBB1_8
	LONG $0x8704634a         // movsxd    rax, dword [rdi + 4*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB1_5
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB1_7

LBB1_9:
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0xfee28341         // and    r10d, -2
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
	JMP  LBB1_10

LBB1_15:
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx

LBB1_16:
	LONG $0x86448942; BYTE $0x04 // mov    dword [rsi + 4*r8 + 4], eax
	LONG $0x02c08349             // add    r8, 2
	WORD $0x394d; BYTE $0xc2     // cmp    r10, r8
	JE   LBB1_3

LBB1_10:
	LONG $0x8704634a         // movsxd    rax, dword [rdi + 4*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB1_11
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB1_13

LBB1_11:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB1_13:
	LONG $0x86048942             // mov    dword [rsi + 4*r8], eax
	LONG $0x8744634a; BYTE $0x04 // movsxd    rax, dword [rdi + 4*r8 + 4]
	WORD $0x8948; BYTE $0xc2     // mov    rdx, rax
	WORD $0x0948; BYTE $0xca     // or    rdx, rcx
	LONG $0x20eac148             // shr    rdx, 32
	JNE  LBB1_15
	WORD $0xd231                 // xor    edx, edx
	WORD $0xf1f7                 // div    ecx
	JMP  LBB1_16

LBB1_5:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB1_7:
	LONG $0x86048942 // mov    dword [rsi + 4*r8], eax

LBB1_8:
	RET

TEXT ·_multiply_constant_int32_int64_avx2(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB2_7
	WORD $0x8941; BYTE $0xd0 // mov    r8d, edx
	WORD $0xfa83; BYTE $0x0f // cmp    edx, 15
	JA   LBB2_3
	WORD $0xd231             // xor    edx, edx
	JMP  LBB2_6

LBB2_3:
	WORD $0x8944; BYTE $0xc2     // mov    edx, r8d
	WORD $0xe283; BYTE $0xf0     // and    edx, -16
	LONG $0x6ef9e1c4; BYTE $0xc1 // vmovq    xmm0, rcx
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	WORD $0xc031                 // xor    eax, eax
	LONG $0xd073f5c5; BYTE $0x20 // vpsrlq    ymm1, ymm0, 32

LBB2_4:
	LONG $0x257de2c4; WORD $0x8714             // vpmovsxdq    ymm2, oword [rdi + 4*rax]
	LONG $0x257de2c4; WORD $0x875c; BYTE $0x10 // vpmovsxdq    ymm3, oword [rdi + 4*rax + 16]
	LONG $0x257de2c4; WORD $0x8764; BYTE $0x20 // vpmovsxdq    ymm4, oword [rdi + 4*rax + 32]
	LONG $0x257de2c4; WORD $0x876c; BYTE $0x30 // vpmovsxdq    ymm5, oword [rdi + 4*rax + 48]
	LONG $0xf2f4f5c5                           // vpmuludq    ymm6, ymm1, ymm2
	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
	LONG $0xfff4fdc5                           // vpmuludq    ymm7, ymm0, ymm7
	LONG $0xf6d4c5c5                           // vpaddq    ymm6, ymm7, ymm6
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xd2f4fdc5                           // vpmuludq    ymm2, ymm0, ymm2
	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf3f4f5c5                           // vpmuludq    ymm6, ymm1, ymm3
	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
	LONG $0xfff4fdc5                           // vpmuludq    ymm7, ymm0, ymm7
	LONG $0xf6d4c5c5                           // vpaddq    ymm6, ymm7, ymm6
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xdbf4fdc5                           // vpmuludq    ymm3, ymm0, ymm3
	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf4f4f5c5                           // vpmuludq    ymm6, ymm1, ymm4
	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
	LONG $0xfff4fdc5                           // vpmuludq    ymm7, ymm0, ymm7
	LONG $0xf6d4c5c5                           // vpaddq    ymm6, ymm7, ymm6
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xe4f4fdc5                           // vpmuludq    ymm4, ymm0, ymm4
	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf5f4f5c5                           // vpmuludq    ymm6, ymm1, ymm5
	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
	LONG $0xfff4fdc5                           // vpmuludq    ymm7, ymm0, ymm7
	LONG $0xf6d4c5c5                           // vpaddq    ymm6, ymm7, ymm6
	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
	LONG $0xedf4fdc5                           // vpmuludq    ymm5, ymm0, ymm5
	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
	LONG $0x147ffec5; BYTE $0xc6               // vmovdqu    yword [rsi + 8*rax], ymm2
	LONG $0x5c7ffec5; WORD $0x20c6             // vmovdqu    yword [rsi + 8*rax + 32], ymm3
	LONG $0x647ffec5; WORD $0x40c6             // vmovdqu    yword [rsi + 8*rax + 64], ymm4
	LONG $0x6c7ffec5; WORD $0x60c6             // vmovdqu    yword [rsi + 8*rax + 96], ymm5
	LONG $0x10c08348                           // add    rax, 16
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JNE  LBB2_4
	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
	JE   LBB2_7

LBB2_6:
	LONG $0x97046348         // movsxd    rax, dword [rdi + 4*rdx]
	LONG $0xc1af0f48         // imul    rax, rcx
	LONG $0xd6048948         // mov    qword [rsi + 8*rdx], rax
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3949; BYTE $0xd0 // cmp    r8, rdx
	JNE  LBB2_6

LBB2_7:
	VZEROUPPER
	RET

TEXT ·_divide_constant_int32_int64_avx2(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB3_8
	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
	WORD $0xfa83; BYTE $0x01 // cmp    edx, 1
	JNE  LBB3_9
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d

LBB3_3:
	LONG $0x01c1f641         // test    r9b, 1
	JE   LBB3_8
	LONG $0x8704634a         // movsxd    rax, dword [rdi + 4*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB3_5
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB3_7

LBB3_9:
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0xfee28341         // and    r10d, -2
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
	JMP  LBB3_10

LBB3_15:
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx

LBB3_16:
	LONG $0xc644894a; BYTE $0x08 // mov    qword [rsi + 8*r8 + 8], rax
	LONG $0x02c08349             // add    r8, 2
	WORD $0x394d; BYTE $0xc2     // cmp    r10, r8
	JE   LBB3_3

LBB3_10:
	LONG $0x8704634a         // movsxd    rax, dword [rdi + 4*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB3_11
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB3_13

LBB3_11:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB3_13:
	LONG $0xc604894a             // mov    qword [rsi + 8*r8], rax
	LONG $0x8744634a; BYTE $0x04 // movsxd    rax, dword [rdi + 4*r8 + 4]
	WORD $0x8948; BYTE $0xc2     // mov    rdx, rax
	WORD $0x0948; BYTE $0xca     // or    rdx, rcx
	LONG $0x20eac148             // shr    rdx, 32
	JNE  LBB3_15
	WORD $0xd231                 // xor    edx, edx
	WORD $0xf1f7                 // div    ecx
	JMP  LBB3_16

LBB3_5:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB3_7:
	LONG $0xc604894a // mov    qword [rsi + 8*r8], rax

LBB3_8:
	RET

TEXT ·_multiply_constant_int64_int32_avx2(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB4_7
	WORD $0x8941; BYTE $0xd0 // mov    r8d, edx
	WORD $0xfa83; BYTE $0x0f // cmp    edx, 15
	JA   LBB4_3
	WORD $0xd231             // xor    edx, edx
	JMP  LBB4_6

LBB4_3:
	WORD $0x8944; BYTE $0xc2       // mov    edx, r8d
	WORD $0xe283; BYTE $0xf0       // and    edx, -16
	LONG $0x6ef9e1c4; BYTE $0xc1   // vmovq    xmm0, rcx
	LONG $0x597de2c4; BYTE $0xc0   // vpbroadcastq    ymm0, xmm0
	WORD $0xc031                   // xor    eax, eax
	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1

LBB4_4:
	LONG $0x1410f8c5; BYTE $0xc7               // vmovups    xmm2, oword [rdi + 8*rax]
	LONG $0x5c10f8c5; WORD $0x20c7             // vmovups    xmm3, oword [rdi + 8*rax + 32]
	LONG $0x6410f8c5; WORD $0x40c7             // vmovups    xmm4, oword [rdi + 8*rax + 64]
	LONG $0x6c10f8c5; WORD $0x60c7             // vmovups    xmm5, oword [rdi + 8*rax + 96]
	LONG $0x54c6e8c5; WORD $0x10c7; BYTE $0x88 // vshufps    xmm2, xmm2, oword [rdi + 8*rax + 16], 136
	LONG $0xf1c6f8c5; BYTE $0x88               // vshufps    xmm6, xmm0, xmm1, 136
	LONG $0x4069e2c4; BYTE $0xd6               // vpmulld    xmm2, xmm2, xmm6
	LONG $0x5cc6e0c5; WORD $0x30c7; BYTE $0x88 // vshufps    xmm3, xmm3, oword [rdi + 8*rax + 48], 136
	LONG $0xf1c6f8c5; BYTE $0x88               // vshufps    xmm6, xmm0, xmm1, 136
	LONG $0x4061e2c4; BYTE $0xde               // vpmulld    xmm3, xmm3, xmm6
	LONG $0x64c6d8c5; WORD $0x50c7; BYTE $0x88 // vshufps    xmm4, xmm4, oword [rdi + 8*rax + 80], 136
	LONG $0xf1c6f8c5; BYTE $0x88               // vshufps    xmm6, xmm0, xmm1, 136
	LONG $0x4059e2c4; BYTE $0xe6               // vpmulld    xmm4, xmm4, xmm6
	LONG $0x6cc6d0c5; WORD $0x70c7; BYTE $0x88 // vshufps    xmm5, xmm5, oword [rdi + 8*rax + 112], 136
	LONG $0xf1c6f8c5; BYTE $0x88               // vshufps    xmm6, xmm0, xmm1, 136
	LONG $0x4051e2c4; BYTE $0xee               // vpmulld    xmm5, xmm5, xmm6
	LONG $0x147ffac5; BYTE $0x86               // vmovdqu    oword [rsi + 4*rax], xmm2
	LONG $0x5c7ffac5; WORD $0x1086             // vmovdqu    oword [rsi + 4*rax + 16], xmm3
	LONG $0x647ffac5; WORD $0x2086             // vmovdqu    oword [rsi + 4*rax + 32], xmm4
	LONG $0x6c7ffac5; WORD $0x3086             // vmovdqu    oword [rsi + 4*rax + 48], xmm5
	LONG $0x10c08348                           // add    rax, 16
	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
	JNE  LBB4_4
	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
	JE   LBB4_7

LBB4_6:
	WORD $0x048b; BYTE $0xd7 // mov    eax, dword [rdi + 8*rdx]
	WORD $0xaf0f; BYTE $0xc1 // imul    eax, ecx
	WORD $0x0489; BYTE $0x96 // mov    dword [rsi + 4*rdx], eax
	LONG $0x01c28348         // add    rdx, 1
	WORD $0x3949; BYTE $0xd0 // cmp    r8, rdx
	JNE  LBB4_6

LBB4_7:
	VZEROUPPER
	RET

TEXT ·_divide_constant_int64_int32_avx2(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB5_8
	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
	WORD $0xfa83; BYTE $0x01 // cmp    edx, 1
	JNE  LBB5_9
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d

LBB5_3:
	LONG $0x01c1f641         // test    r9b, 1
	JE   LBB5_8
	LONG $0xc7048b4a         // mov    rax, qword [rdi + 8*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB5_5
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB5_7

LBB5_9:
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0xfee28341         // and    r10d, -2
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
	JMP  LBB5_10

LBB5_15:
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx

LBB5_16:
	LONG $0x86448942; BYTE $0x04 // mov    dword [rsi + 4*r8 + 4], eax
	LONG $0x02c08349             // add    r8, 2
	WORD $0x394d; BYTE $0xc2     // cmp    r10, r8
	JE   LBB5_3

LBB5_10:
	LONG $0xc7048b4a         // mov    rax, qword [rdi + 8*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB5_11
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB5_13

LBB5_11:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB5_13:
	LONG $0x86048942             // mov    dword [rsi + 4*r8], eax
	LONG $0xc7448b4a; BYTE $0x08 // mov    rax, qword [rdi + 8*r8 + 8]
	WORD $0x8948; BYTE $0xc2     // mov    rdx, rax
	WORD $0x0948; BYTE $0xca     // or    rdx, rcx
	LONG $0x20eac148             // shr    rdx, 32
	JNE  LBB5_15
	WORD $0xd231                 // xor    edx, edx
	WORD $0xf1f7                 // div    ecx
	JMP  LBB5_16

LBB5_5:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB5_7:
	LONG $0x86048942 // mov    dword [rsi + 4*r8], eax

LBB5_8:
	RET

TEXT ·_multiply_constant_int64_int64_avx2(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB6_16
	WORD $0x8941; BYTE $0xd0 // mov    r8d, edx
	WORD $0xfa83; BYTE $0x0f // cmp    edx, 15
	JBE  LBB6_2
	LONG $0xc7048d4a         // lea    rax, [rdi + 8*r8]
	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
	JBE  LBB6_9
	LONG $0xc6048d4a         // lea    rax, [rsi + 8*r8]
	WORD $0x3948; BYTE $0xf8 // cmp    rax, rdi
	JBE  LBB6_9

LBB6_2:
	WORD $0x3145; BYTE $0xdb // xor    r11d, r11d

LBB6_3:
	WORD $0x894d; BYTE $0xd9 // mov    r9, r11
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0x014d; BYTE $0xc1 // add    r9, r8
	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
	LONG $0x03e08348         // and    rax, 3
	JE   LBB6_5

LBB6_4:
	LONG $0xdf148b4a // mov    rdx, qword [rdi + 8*r11]
	LONG $0xd1af0f48 // imul    rdx, rcx
	LONG $0xde14894a // mov    qword [rsi + 8*r11], rdx
	LONG $0x01c38349 // add    r11, 1
	LONG $0xffc08348 // add    rax, -1
	JNE  LBB6_4

LBB6_5:
	LONG $0x03f98349 // cmp    r9, 3
	JB   LBB6_16

LBB6_6:
	LONG $0xdf048b4a             // mov    rax, qword [rdi + 8*r11]
	LONG $0xc1af0f48             // imul    rax, rcx
	LONG $0xde04894a             // mov    qword [rsi + 8*r11], rax
	LONG $0xdf448b4a; BYTE $0x08 // mov    rax, qword [rdi + 8*r11 + 8]
	LONG $0xc1af0f48             // imul    rax, rcx
	LONG $0xde44894a; BYTE $0x08 // mov    qword [rsi + 8*r11 + 8], rax
	LONG $0xdf448b4a; BYTE $0x10 // mov    rax, qword [rdi + 8*r11 + 16]
	LONG $0xc1af0f48             // imul    rax, rcx
	LONG $0xde44894a; BYTE $0x10 // mov    qword [rsi + 8*r11 + 16], rax
	LONG $0xdf448b4a; BYTE $0x18 // mov    rax, qword [rdi + 8*r11 + 24]
	LONG $0xc1af0f48             // imul    rax, rcx
	LONG $0xde44894a; BYTE $0x18 // mov    qword [rsi + 8*r11 + 24], rax
	LONG $0x04c38349             // add    r11, 4
	WORD $0x394d; BYTE $0xd8     // cmp    r8, r11
	JNE  LBB6_6
	JMP  LBB6_16

LBB6_9:
	WORD $0x8945; BYTE $0xc3     // mov    r11d, r8d
	LONG $0xf0e38341             // and    r11d, -16
	LONG $0x6ef9e1c4; BYTE $0xc1 // vmovq    xmm0, rcx
	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
	LONG $0xf0438d49             // lea    rax, [r11 - 16]
	WORD $0x8949; BYTE $0xc1     // mov    r9, rax
	LONG $0x04e9c149             // shr    r9, 4
	LONG $0x01c18349             // add    r9, 1
	LONG $0xd073f5c5; BYTE $0x20 // vpsrlq    ymm1, ymm0, 32
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	JE   LBB6_10
	WORD $0x894d; BYTE $0xca     // mov    r10, r9
	LONG $0xfee28349             // and    r10, -2
	WORD $0xf749; BYTE $0xda     // neg    r10
	WORD $0xc031                 // xor    eax, eax

LBB6_12:
	LONG $0x146ffec5; BYTE $0xc7         // vmovdqu    ymm2, yword [rdi + 8*rax]
	LONG $0x5c6ffec5; WORD $0x20c7       // vmovdqu    ymm3, yword [rdi + 8*rax + 32]
	LONG $0x646ffec5; WORD $0x40c7       // vmovdqu    ymm4, yword [rdi + 8*rax + 64]
	LONG $0x6c6ffec5; WORD $0x60c7       // vmovdqu    ymm5, yword [rdi + 8*rax + 96]
	LONG $0xf1f4edc5                     // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                     // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                     // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                     // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                     // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                     // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                     // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                     // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                     // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                     // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                     // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                     // vpaddq    ymm5, ymm5, ymm6
	LONG $0x147ffec5; BYTE $0xc6         // vmovdqu    yword [rsi + 8*rax], ymm2
	LONG $0x5c7ffec5; WORD $0x20c6       // vmovdqu    yword [rsi + 8*rax + 32], ymm3
	LONG $0x647ffec5; WORD $0x40c6       // vmovdqu    yword [rsi + 8*rax + 64], ymm4
	LONG $0x6c7ffec5; WORD $0x60c6       // vmovdqu    yword [rsi + 8*rax + 96], ymm5
	QUAD $0x000080c7946ffec5; BYTE $0x00 // vmovdqu    ymm2, yword [rdi + 8*rax + 128]
	QUAD $0x0000a0c79c6ffec5; BYTE $0x00 // vmovdqu    ymm3, yword [rdi + 8*rax + 160]
	QUAD $0x0000c0c7a46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdi + 8*rax + 192]
	QUAD $0x0000e0c7ac6ffec5; BYTE $0x00 // vmovdqu    ymm5, yword [rdi + 8*rax + 224]
	LONG $0xf1f4edc5                     // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5                     // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5                     // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5                     // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5                     // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5                     // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5                     // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5                     // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5                     // vpaddq    ymm4, ymm4, ymm6
	LONG $0xf1f4d5c5                     // vpmuludq    ymm6, ymm5, ymm1
	LONG $0xd573c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm5, 32
	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
	LONG $0xe8f4d5c5                     // vpmuludq    ymm5, ymm5, ymm0
	LONG $0xeed4d5c5                     // vpaddq    ymm5, ymm5, ymm6
	QUAD $0x000080c6947ffec5; BYTE $0x00 // vmovdqu    yword [rsi + 8*rax + 128], ymm2
	QUAD $0x0000a0c69c7ffec5; BYTE $0x00 // vmovdqu    yword [rsi + 8*rax + 160], ymm3
	QUAD $0x0000c0c6a47ffec5; BYTE $0x00 // vmovdqu    yword [rsi + 8*rax + 192], ymm4
	QUAD $0x0000e0c6ac7ffec5; BYTE $0x00 // vmovdqu    yword [rsi + 8*rax + 224], ymm5
	LONG $0x20c08348                     // add    rax, 32
	LONG $0x02c28349                     // add    r10, 2
	JNE  LBB6_12
	LONG $0x01c1f641                     // test    r9b, 1
	JE   LBB6_15

LBB6_14:
	LONG $0x146ffec5; BYTE $0xc7   // vmovdqu    ymm2, yword [rdi + 8*rax]
	LONG $0x5c6ffec5; WORD $0x20c7 // vmovdqu    ymm3, yword [rdi + 8*rax + 32]
	LONG $0x646ffec5; WORD $0x40c7 // vmovdqu    ymm4, yword [rdi + 8*rax + 64]
	LONG $0x6c6ffec5; WORD $0x60c7 // vmovdqu    ymm5, yword [rdi + 8*rax + 96]
	LONG $0xf1f4edc5               // vpmuludq    ymm6, ymm2, ymm1
	LONG $0xd273c5c5; BYTE $0x20   // vpsrlq    ymm7, ymm2, 32
	LONG $0xf8f4c5c5               // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5               // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20   // vpsllq    ymm6, ymm6, 32
	LONG $0xd0f4edc5               // vpmuludq    ymm2, ymm2, ymm0
	LONG $0xd6d4edc5               // vpaddq    ymm2, ymm2, ymm6
	LONG $0xf1f4e5c5               // vpmuludq    ymm6, ymm3, ymm1
	LONG $0xd373c5c5; BYTE $0x20   // vpsrlq    ymm7, ymm3, 32
	LONG $0xf8f4c5c5               // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5               // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20   // vpsllq    ymm6, ymm6, 32
	LONG $0xd8f4e5c5               // vpmuludq    ymm3, ymm3, ymm0
	LONG $0xded4e5c5               // vpaddq    ymm3, ymm3, ymm6
	LONG $0xf1f4ddc5               // vpmuludq    ymm6, ymm4, ymm1
	LONG $0xd473c5c5; BYTE $0x20   // vpsrlq    ymm7, ymm4, 32
	LONG $0xf8f4c5c5               // vpmuludq    ymm7, ymm7, ymm0
	LONG $0xf7d4cdc5               // vpaddq    ymm6, ymm6, ymm7
	LONG $0xf673cdc5; BYTE $0x20   // vpsllq    ymm6, ymm6, 32
	LONG $0xe0f4ddc5               // vpmuludq    ymm4, ymm4, ymm0
	LONG $0xe6d4ddc5               // vpaddq    ymm4, ymm4, ymm6
	LONG $0xc9f4d5c5               // vpmuludq    ymm1, ymm5, ymm1
	LONG $0xd573cdc5; BYTE $0x20   // vpsrlq    ymm6, ymm5, 32
	LONG $0xf0f4cdc5               // vpmuludq    ymm6, ymm6, ymm0
	LONG $0xced4f5c5               // vpaddq    ymm1, ymm1, ymm6
	LONG $0xf173f5c5; BYTE $0x20   // vpsllq    ymm1, ymm1, 32
	LONG $0xc0f4d5c5               // vpmuludq    ymm0, ymm5, ymm0
	LONG $0xc1d4fdc5               // vpaddq    ymm0, ymm0, ymm1
	LONG $0x147ffec5; BYTE $0xc6   // vmovdqu    yword [rsi + 8*rax], ymm2
	LONG $0x5c7ffec5; WORD $0x20c6 // vmovdqu    yword [rsi + 8*rax + 32], ymm3
	LONG $0x647ffec5; WORD $0x40c6 // vmovdqu    yword [rsi + 8*rax + 64], ymm4
	LONG $0x447ffec5; WORD $0x60c6 // vmovdqu    yword [rsi + 8*rax + 96], ymm0

LBB6_15:
	WORD $0x394d; BYTE $0xc3 // cmp    r11, r8
	JNE  LBB6_3

LBB6_16:
	VZEROUPPER
	RET

LBB6_10:
	WORD $0xc031     // xor    eax, eax
	LONG $0x01c1f641 // test    r9b, 1
	JNE  LBB6_14
	JMP  LBB6_15

TEXT ·_divide_constant_int64_int64_avx2(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB7_8
	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
	WORD $0xfa83; BYTE $0x01 // cmp    edx, 1
	JNE  LBB7_9
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d

LBB7_3:
	LONG $0x01c1f641         // test    r9b, 1
	JE   LBB7_8
	LONG $0xc7048b4a         // mov    rax, qword [rdi + 8*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB7_5
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB7_7

LBB7_9:
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0xfee28341         // and    r10d, -2
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
	JMP  LBB7_10

LBB7_15:
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx

LBB7_16:
	LONG $0xc644894a; BYTE $0x08 // mov    qword [rsi + 8*r8 + 8], rax
	LONG $0x02c08349             // add    r8, 2
	WORD $0x394d; BYTE $0xc2     // cmp    r10, r8
	JE   LBB7_3

LBB7_10:
	LONG $0xc7048b4a         // mov    rax, qword [rdi + 8*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB7_11
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB7_13

LBB7_11:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB7_13:
	LONG $0xc604894a             // mov    qword [rsi + 8*r8], rax
	LONG $0xc7448b4a; BYTE $0x08 // mov    rax, qword [rdi + 8*r8 + 8]
	WORD $0x8948; BYTE $0xc2     // mov    rdx, rax
	WORD $0x0948; BYTE $0xca     // or    rdx, rcx
	LONG $0x20eac148             // shr    rdx, 32
	JNE  LBB7_15
	WORD $0xd231                 // xor    edx, edx
	WORD $0xf1f7                 // div    ecx
	JMP  LBB7_16

LBB7_5:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB7_7:
	LONG $0xc604894a // mov    qword [rsi + 8*r8], rax

LBB7_8:
	RET