//go:build go1.18 && !noasm && !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT

TEXT ·_multiply_constant_int32_int32_sse4(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB0_16
	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
	WORD $0xfa83; BYTE $0x07 // cmp    edx, 7
	JBE  LBB0_2
	LONG $0x8f048d4a         // lea    rax, [rdi + 4*r9]
	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
	JBE  LBB0_9
	LONG $0x8e048d4a         // lea    rax, [rsi + 4*r9]
	WORD $0x3948; BYTE $0xf8 // cmp    rax, rdi
	JBE  LBB0_9

LBB0_2:
	WORD $0x3145; BYTE $0xdb // xor    r11d, r11d

LBB0_3:
	WORD $0x894d; BYTE $0xd8 // mov    r8, r11
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x014d; BYTE $0xc8 // add    r8, r9
	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
	LONG $0x03e08348         // and    rax, 3
	JE   LBB0_5

LBB0_4:
	LONG $0x9f148b42         // mov    edx, dword [rdi + 4*r11]
	WORD $0xaf0f; BYTE $0xd1 // imul    edx, ecx
	LONG $0x9e148942         // mov    dword [rsi + 4*r11], edx
	LONG $0x01c38349         // add    r11, 1
	LONG $0xffc08348         // add    rax, -1
	JNE  LBB0_4

LBB0_5:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB0_16

LBB0_6:
	LONG $0x9f048b42             // mov    eax, dword [rdi + 4*r11]
	WORD $0xaf0f; BYTE $0xc1     // imul    eax, ecx
	LONG $0x9e048942             // mov    dword [rsi + 4*r11], eax
	LONG $0x9f448b42; BYTE $0x04 // mov    eax, dword [rdi + 4*r11 + 4]
	WORD $0xaf0f; BYTE $0xc1     // imul    eax, ecx
	LONG $0x9e448942; BYTE $0x04 // mov    dword [rsi + 4*r11 + 4], eax
	LONG $0x9f448b42; BYTE $0x08 // mov    eax, dword [rdi + 4*r11 + 8]
	WORD $0xaf0f; BYTE $0xc1     // imul    eax, ecx
	LONG $0x9e448942; BYTE $0x08 // mov    dword [rsi + 4*r11 + 8], eax
	LONG $0x9f448b42; BYTE $0x0c // mov    eax, dword [rdi + 4*r11 + 12]
	WORD $0xaf0f; BYTE $0xc1     // imul    eax, ecx
	LONG $0x9e448942; BYTE $0x0c // mov    dword [rsi + 4*r11 + 12], eax
	LONG $0x04c38349             // add    r11, 4
	WORD $0x394d; BYTE $0xd9     // cmp    r9, r11
	JNE  LBB0_6
	JMP  LBB0_16

LBB0_9:
	WORD $0x8945; BYTE $0xcb     // mov    r11d, r9d
	LONG $0xf8e38341             // and    r11d, -8
	LONG $0xc16e0f66             // movd    xmm0, ecx
	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
	LONG $0xf8438d49             // lea    rax, [r11 - 8]
	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
	LONG $0x03e8c149             // shr    r8, 3
	LONG $0x01c08349             // add    r8, 1
	WORD $0x8548; BYTE $0xc0     // test    rax, rax
	JE   LBB0_10
	WORD $0x894d; BYTE $0xc2     // mov    r10, r8
	LONG $0xfee28349             // and    r10, -2
	WORD $0xf749; BYTE $0xda     // neg    r10
	WORD $0xc031                 // xor    eax, eax

LBB0_12:
	LONG $0x0c6f0ff3; BYTE $0x87   // movdqu    xmm1, oword [rdi + 4*rax]
	LONG $0x546f0ff3; WORD $0x1087 // movdqu    xmm2, oword [rdi + 4*rax + 16]
	LONG $0x40380f66; BYTE $0xc8   // pmulld    xmm1, xmm0
	LONG $0x40380f66; BYTE $0xd0   // pmulld    xmm2, xmm0
	LONG $0x0c7f0ff3; BYTE $0x86   // movdqu    oword [rsi + 4*rax], xmm1
	LONG $0x547f0ff3; WORD $0x1086 // movdqu    oword [rsi + 4*rax + 16], xmm2
	LONG $0x4c6f0ff3; WORD $0x2087 // movdqu    xmm1, oword [rdi + 4*rax + 32]
	LONG $0x546f0ff3; WORD $0x3087 // movdqu    xmm2, oword [rdi + 4*rax + 48]
	LONG $0x40380f66; BYTE $0xc8   // pmulld    xmm1, xmm0
	LONG $0x40380f66; BYTE $0xd0   // pmulld    xmm2, xmm0
	LONG $0x4c7f0ff3; WORD $0x2086 // movdqu    oword [rsi + 4*rax + 32], xmm1
	LONG $0x547f0ff3; WORD $0x3086 // movdqu    oword [rsi + 4*rax + 48], xmm2
	LONG $0x10c08348               // add    rax, 16
	LONG $0x02c28349               // add    r10, 2
	JNE  LBB0_12
	LONG $0x01c0f641               // test    r8b, 1
	JE   LBB0_15

LBB0_14:
	LONG $0x0c6f0ff3; BYTE $0x87   // movdqu    xmm1, oword [rdi + 4*rax]
	LONG $0x546f0ff3; WORD $0x1087 // movdqu    xmm2, oword [rdi + 4*rax + 16]
	LONG $0x40380f66; BYTE $0xc8   // pmulld    xmm1, xmm0
	LONG $0x40380f66; BYTE $0xd0   // pmulld    xmm2, xmm0
	LONG $0x0c7f0ff3; BYTE $0x86   // movdqu    oword [rsi + 4*rax], xmm1
	LONG $0x547f0ff3; WORD $0x1086 // movdqu    oword [rsi + 4*rax + 16], xmm2

LBB0_15:
	WORD $0x394d; BYTE $0xcb // cmp    r11, r9
	JNE  LBB0_3

LBB0_16:
	RET

LBB0_10:
	WORD $0xc031     // xor    eax, eax
	LONG $0x01c0f641 // test    r8b, 1
	JNE  LBB0_14
	JMP  LBB0_15

TEXT ·_divide_constant_int32_int32_sse4(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB1_8
	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
	WORD $0xfa83; BYTE $0x01 // cmp    edx, 1
	JNE  LBB1_9
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d

LBB1_3:
	LONG $0x01c1f641         // test    r9b, 1
	JE   LBB1_8
	LONG $0x8704634a         // movsxd    rax, dword [rdi + 4*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB1_5
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB1_7

LBB1_9:
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0xfee28341         // and    r10d, -2
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
	JMP  LBB1_10

LBB1_15:
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx

LBB1_16:
	LONG $0x86448942; BYTE $0x04 // mov    dword [rsi + 4*r8 + 4], eax
	LONG $0x02c08349             // add    r8, 2
	WORD $0x394d; BYTE $0xc2     // cmp    r10, r8
	JE   LBB1_3

LBB1_10:
	LONG $0x8704634a         // movsxd    rax, dword [rdi + 4*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB1_11
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB1_13

LBB1_11:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB1_13:
	LONG $0x86048942             // mov    dword [rsi + 4*r8], eax
	LONG $0x8744634a; BYTE $0x04 // movsxd    rax, dword [rdi + 4*r8 + 4]
	WORD $0x8948; BYTE $0xc2     // mov    rdx, rax
	WORD $0x0948; BYTE $0xca     // or    rdx, rcx
	LONG $0x20eac148             // shr    rdx, 32
	JNE  LBB1_15
	WORD $0xd231                 // xor    edx, edx
	WORD $0xf1f7                 // div    ecx
	JMP  LBB1_16

LBB1_5:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB1_7:
	LONG $0x86048942 // mov    dword [rsi + 4*r8], eax

LBB1_8:
	RET

TEXT ·_multiply_constant_int32_int64_sse4(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB2_6
	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
	LONG $0xff418d49         // lea    rax, [r9 - 1]
	WORD $0x8945; BYTE $0xc8 // mov    r8d, r9d
	LONG $0x03e08341         // and    r8d, 3
	LONG $0x03f88348         // cmp    rax, 3
	JAE  LBB2_7
	WORD $0xc031             // xor    eax, eax
	JMP  LBB2_3

LBB2_7:
	LONG $0xfce18341 // and    r9d, -4
	WORD $0xc031     // xor    eax, eax

LBB2_8:
	LONG $0x87146348             // movsxd    rdx, dword [rdi + 4*rax]
	LONG $0xd1af0f48             // imul    rdx, rcx
	LONG $0xc6148948             // mov    qword [rsi + 8*rax], rdx
	LONG $0x87546348; BYTE $0x04 // movsxd    rdx, dword [rdi + 4*rax + 4]
	LONG $0xd1af0f48             // imul    rdx, rcx
	LONG $0xc6548948; BYTE $0x08 // mov    qword [rsi + 8*rax + 8], rdx
	LONG $0x87546348; BYTE $0x08 // movsxd    rdx, dword [rdi + 4*rax + 8]
	LONG $0xd1af0f48             // imul    rdx, rcx
	LONG $0xc6548948; BYTE $0x10 // mov    qword [rsi + 8*rax + 16], rdx
	LONG $0x87546348; BYTE $0x0c // movsxd    rdx, dword [rdi + 4*rax + 12]
	LONG $0xd1af0f48             // imul    rdx, rcx
	LONG $0xc6548948; BYTE $0x18 // mov    qword [rsi + 8*rax + 24], rdx
	LONG $0x04c08348             // add    rax, 4
	WORD $0x3949; BYTE $0xc1     // cmp    r9, rax
	JNE  LBB2_8

LBB2_3:
	WORD $0x854d; BYTE $0xc0 // test    r8, r8
	JE   LBB2_6
	LONG $0xc6148d48         // lea    rdx, [rsi + 8*rax]
	LONG $0x87048d48         // lea    rax, [rdi + 4*rax]
	WORD $0xf631             // xor    esi, esi

LBB2_5:
	LONG $0xb03c6348         // movsxd    rdi, dword [rax + 4*rsi]
	LONG $0xf9af0f48         // imul    rdi, rcx
	LONG $0xf23c8948         // mov    qword [rdx + 8*rsi], rdi
	LONG $0x01c68348         // add    rsi, 1
	WORD $0x3949; BYTE $0xf0 // cmp    r8, rsi
	JNE  LBB2_5

LBB2_6:
	RET

TEXT ·_divide_constant_int32_int64_sse4(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB3_8
	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
	WORD $0xfa83; BYTE $0x01 // cmp    edx, 1
	JNE  LBB3_9
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d

LBB3_3:
	LONG $0x01c1f641         // test    r9b, 1
	JE   LBB3_8
	LONG $0x8704634a         // movsxd    rax, dword [rdi + 4*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB3_5
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB3_7

LBB3_9:
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0xfee28341         // and    r10d, -2
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
	JMP  LBB3_10

LBB3_15:
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx

LBB3_16:
	LONG $0xc644894a; BYTE $0x08 // mov    qword [rsi + 8*r8 + 8], rax
	LONG $0x02c08349             // add    r8, 2
	WORD $0x394d; BYTE $0xc2     // cmp    r10, r8
	JE   LBB3_3

LBB3_10:
	LONG $0x8704634a         // movsxd    rax, dword [rdi + 4*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB3_11
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB3_13

LBB3_11:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB3_13:
	LONG $0xc604894a             // mov    qword [rsi + 8*r8], rax
	LONG $0x8744634a; BYTE $0x04 // movsxd    rax, dword [rdi + 4*r8 + 4]
	WORD $0x8948; BYTE $0xc2     // mov    rdx, rax
	WORD $0x0948; BYTE $0xca     // or    rdx, rcx
	LONG $0x20eac148             // shr    rdx, 32
	JNE  LBB3_15
	WORD $0xd231                 // xor    edx, edx
	WORD $0xf1f7                 // div    ecx
	JMP  LBB3_16

LBB3_5:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB3_7:
	LONG $0xc604894a // mov    qword [rsi + 8*r8], rax

LBB3_8:
	RET

TEXT ·_multiply_constant_int64_int32_sse4(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB4_6
	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
	LONG $0xff418d49         // lea    rax, [r9 - 1]
	WORD $0x8945; BYTE $0xc8 // mov    r8d, r9d
	LONG $0x03e08341         // and    r8d, 3
	LONG $0x03f88348         // cmp    rax, 3
	JAE  LBB4_7
	WORD $0xc031             // xor    eax, eax
	JMP  LBB4_3

LBB4_7:
	LONG $0xfce18341 // and    r9d, -4
	WORD $0xc031     // xor    eax, eax

LBB4_8:
	WORD $0x148b; BYTE $0xc7 // mov    edx, dword [rdi + 8*rax]
	WORD $0xaf0f; BYTE $0xd1 // imul    edx, ecx
	WORD $0x1489; BYTE $0x86 // mov    dword [rsi + 4*rax], edx
	LONG $0x08c7548b         // mov    edx, dword [rdi + 8*rax + 8]
	WORD $0xaf0f; BYTE $0xd1 // imul    edx, ecx
	LONG $0x04865489         // mov    dword [rsi + 4*rax + 4], edx
	LONG $0x10c7548b         // mov    edx, dword [rdi + 8*rax + 16]
	WORD $0xaf0f; BYTE $0xd1 // imul    edx, ecx
	LONG $0x08865489         // mov    dword [rsi + 4*rax + 8], edx
	LONG $0x18c7548b         // mov    edx, dword [rdi + 8*rax + 24]
	WORD $0xaf0f; BYTE $0xd1 // imul    edx, ecx
	LONG $0x0c865489         // mov    dword [rsi + 4*rax + 12], edx
	LONG $0x04c08348         // add    rax, 4
	WORD $0x3949; BYTE $0xc1 // cmp    r9, rax
	JNE  LBB4_8

LBB4_3:
	WORD $0x854d; BYTE $0xc0 // test    r8, r8
	JE   LBB4_6
	LONG $0x86148d48         // lea    rdx, [rsi + 4*rax]
	LONG $0xc7048d48         // lea    rax, [rdi + 8*rax]
	WORD $0xf631             // xor    esi, esi

LBB4_5:
	WORD $0x3c8b; BYTE $0xf0 // mov    edi, dword [rax + 8*rsi]
	WORD $0xaf0f; BYTE $0xf9 // imul    edi, ecx
	WORD $0x3c89; BYTE $0xb2 // mov    dword [rdx + 4*rsi], edi
	LONG $0x01c68348         // add    rsi, 1
	WORD $0x3949; BYTE $0xf0 // cmp    r8, rsi
	JNE  LBB4_5

LBB4_6:
	RET

TEXT ·_divide_constant_int64_int32_sse4(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB5_8
	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
	WORD $0xfa83; BYTE $0x01 // cmp    edx, 1
	JNE  LBB5_9
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d

LBB5_3:
	LONG $0x01c1f641         // test    r9b, 1
	JE   LBB5_8
	LONG $0xc7048b4a         // mov    rax, qword [rdi + 8*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB5_5
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB5_7

LBB5_9:
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0xfee28341         // and    r10d, -2
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
	JMP  LBB5_10

LBB5_15:
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx

LBB5_16:
	LONG $0x86448942; BYTE $0x04 // mov    dword [rsi + 4*r8 + 4], eax
	LONG $0x02c08349             // add    r8, 2
	WORD $0x394d; BYTE $0xc2     // cmp    r10, r8
	JE   LBB5_3

LBB5_10:
	LONG $0xc7048b4a         // mov    rax, qword [rdi + 8*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB5_11
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB5_13

LBB5_11:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB5_13:
	LONG $0x86048942             // mov    dword [rsi + 4*r8], eax
	LONG $0xc7448b4a; BYTE $0x08 // mov    rax, qword [rdi + 8*r8 + 8]
	WORD $0x8948; BYTE $0xc2     // mov    rdx, rax
	WORD $0x0948; BYTE $0xca     // or    rdx, rcx
	LONG $0x20eac148             // shr    rdx, 32
	JNE  LBB5_15
	WORD $0xd231                 // xor    edx, edx
	WORD $0xf1f7                 // div    ecx
	JMP  LBB5_16

LBB5_5:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB5_7:
	LONG $0x86048942 // mov    dword [rsi + 4*r8], eax

LBB5_8:
	RET

TEXT ·_multiply_constant_int64_int64_sse4(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB6_6
	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
	LONG $0xff418d49         // lea    rax, [r9 - 1]
	WORD $0x8945; BYTE $0xc8 // mov    r8d, r9d
	LONG $0x03e08341         // and    r8d, 3
	LONG $0x03f88348         // cmp    rax, 3
	JAE  LBB6_7
	WORD $0xc031             // xor    eax, eax
	JMP  LBB6_3

LBB6_7:
	LONG $0xfce18341 // and    r9d, -4
	WORD $0xc031     // xor    eax, eax

LBB6_8:
	LONG $0xc7148b48             // mov    rdx, qword [rdi + 8*rax]
	LONG $0xd1af0f48             // imul    rdx, rcx
	LONG $0xc6148948             // mov    qword [rsi + 8*rax], rdx
	LONG $0xc7548b48; BYTE $0x08 // mov    rdx, qword [rdi + 8*rax + 8]
	LONG $0xd1af0f48             // imul    rdx, rcx
	LONG $0xc6548948; BYTE $0x08 // mov    qword [rsi + 8*rax + 8], rdx
	LONG $0xc7548b48; BYTE $0x10 // mov    rdx, qword [rdi + 8*rax + 16]
	LONG $0xd1af0f48             // imul    rdx, rcx
	LONG $0xc6548948; BYTE $0x10 // mov    qword [rsi + 8*rax + 16], rdx
	LONG $0xc7548b48; BYTE $0x18 // mov    rdx, qword [rdi + 8*rax + 24]
	LONG $0xd1af0f48             // imul    rdx, rcx
	LONG $0xc6548948; BYTE $0x18 // mov    qword [rsi + 8*rax + 24], rdx
	LONG $0x04c08348             // add    rax, 4
	WORD $0x3949; BYTE $0xc1     // cmp    r9, rax
	JNE  LBB6_8

LBB6_3:
	WORD $0x854d; BYTE $0xc0 // test    r8, r8
	JE   LBB6_6
	LONG $0xc6148d48         // lea    rdx, [rsi + 8*rax]
	LONG $0xc7048d48         // lea    rax, [rdi + 8*rax]
	WORD $0xf631             // xor    esi, esi

LBB6_5:
	LONG $0xf03c8b48         // mov    rdi, qword [rax + 8*rsi]
	LONG $0xf9af0f48         // imul    rdi, rcx
	LONG $0xf23c8948         // mov    qword [rdx + 8*rsi], rdi
	LONG $0x01c68348         // add    rsi, 1
	WORD $0x3949; BYTE $0xf0 // cmp    r8, rsi
	JNE  LBB6_5

LBB6_6:
	RET

TEXT ·_divide_constant_int64_int64_sse4(SB), $0-32

	MOVQ src+0(FP), DI
	MOVQ dest+8(FP), SI
	MOVQ len+16(FP), DX
	MOVQ factor+24(FP), CX

	WORD $0xd285             // test    edx, edx
	JLE  LBB7_8
	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
	WORD $0xfa83; BYTE $0x01 // cmp    edx, 1
	JNE  LBB7_9
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d

LBB7_3:
	LONG $0x01c1f641         // test    r9b, 1
	JE   LBB7_8
	LONG $0xc7048b4a         // mov    rax, qword [rdi + 8*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB7_5
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB7_7

LBB7_9:
	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
	LONG $0xfee28341         // and    r10d, -2
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
	JMP  LBB7_10

LBB7_15:
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx

LBB7_16:
	LONG $0xc644894a; BYTE $0x08 // mov    qword [rsi + 8*r8 + 8], rax
	LONG $0x02c08349             // add    r8, 2
	WORD $0x394d; BYTE $0xc2     // cmp    r10, r8
	JE   LBB7_3

LBB7_10:
	LONG $0xc7048b4a         // mov    rax, qword [rdi + 8*r8]
	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
	WORD $0x0948; BYTE $0xca // or    rdx, rcx
	LONG $0x20eac148         // shr    rdx, 32
	JE   LBB7_11
	WORD $0x9948             // cqo
	WORD $0xf748; BYTE $0xf9 // idiv    rcx
	JMP  LBB7_13

LBB7_11:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB7_13:
	LONG $0xc604894a             // mov    qword [rsi + 8*r8], rax
	LONG $0xc7448b4a; BYTE $0x08 // mov    rax, qword [rdi + 8*r8 + 8]
	WORD $0x8948; BYTE $0xc2     // mov    rdx, rax
	WORD $0x0948; BYTE $0xca     // or    rdx, rcx
	LONG $0x20eac148             // shr    rdx, 32
	JNE  LBB7_15
	WORD $0xd231                 // xor    edx, edx
	WORD $0xf1f7                 // div    ecx
	JMP  LBB7_16

LBB7_5:
	WORD $0xd231 // xor    edx, edx
	WORD $0xf1f7 // div    ecx

LBB7_7:
	LONG $0xc604894a // mov    qword [rsi + 8*r8], rax

LBB7_8:
	RET