//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT

TEXT ·_bitmap_aligned_and_avx2(SB), $0-32

	MOVQ left+0(FP), DI
	MOVQ right+8(FP), SI
	MOVQ out+16(FP), DX
	MOVQ length+24(FP), CX

	WORD $0x8548; BYTE $0xc9 // test    rcx, rcx
	JLE  LBB0_12
	LONG $0x7ff98348         // cmp    rcx, 127
	JA   LBB0_7
	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
	JMP  LBB0_3

LBB0_7:
	LONG $0x0a0c8d4c         // lea    r9, [rdx + rcx]
	LONG $0x0f048d48         // lea    rax, [rdi + rcx]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	LONG $0xd3970f41         // seta    r11b
	LONG $0x0e048d48         // lea    rax, [rsi + rcx]
	WORD $0x3949; BYTE $0xf9 // cmp    r9, rdi
	WORD $0x970f; BYTE $0xd3 // seta    bl
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	LONG $0xd0970f41         // seta    r8b
	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
	LONG $0xd1970f41         // seta    r9b
	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
	WORD $0x8441; BYTE $0xdb // test    r11b, bl
	JNE  LBB0_3
	WORD $0x2045; BYTE $0xc8 // and    r8b, r9b
	JNE  LBB0_3
	WORD $0x8949; BYTE $0xca // mov    r10, rcx
	LONG $0x80e28349         // and    r10, -128
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d

LBB0_10:
	LONG $0x107ca1c4; WORD $0x0604             // vmovups    ymm0, yword [rsi + r8]
	LONG $0x107ca1c4; WORD $0x064c; BYTE $0x20 // vmovups    ymm1, yword [rsi + r8 + 32]
	LONG $0x107ca1c4; WORD $0x0654; BYTE $0x40 // vmovups    ymm2, yword [rsi + r8 + 64]
	LONG $0x107ca1c4; WORD $0x065c; BYTE $0x60 // vmovups    ymm3, yword [rsi + r8 + 96]
	LONG $0x547ca1c4; WORD $0x0704             // vandps    ymm0, ymm0, yword [rdi + r8]
	LONG $0x5474a1c4; WORD $0x074c; BYTE $0x20 // vandps    ymm1, ymm1, yword [rdi + r8 + 32]
	LONG $0x546ca1c4; WORD $0x0754; BYTE $0x40 // vandps    ymm2, ymm2, yword [rdi + r8 + 64]
	LONG $0x5464a1c4; WORD $0x075c; BYTE $0x60 // vandps    ymm3, ymm3, yword [rdi + r8 + 96]
	LONG $0x117ca1c4; WORD $0x0204             // vmovups    yword [rdx + r8], ymm0
	LONG $0x117ca1c4; WORD $0x024c; BYTE $0x20 // vmovups    yword [rdx + r8 + 32], ymm1
	LONG $0x117ca1c4; WORD $0x0254; BYTE $0x40 // vmovups    yword [rdx + r8 + 64], ymm2
	LONG $0x117ca1c4; WORD $0x025c; BYTE $0x60 // vmovups    yword [rdx + r8 + 96], ymm3
	LONG $0x80e88349                           // sub    r8, -128
	WORD $0x394d; BYTE $0xc2                   // cmp    r10, r8
	JNE  LBB0_10
	WORD $0x3949; BYTE $0xca                   // cmp    r10, rcx
	JE   LBB0_12

LBB0_3:
	WORD $0x894d; BYTE $0xd0 // mov    r8, r10
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x0149; BYTE $0xc8 // add    r8, rcx
	WORD $0x8949; BYTE $0xc9 // mov    r9, rcx
	LONG $0x03e18349         // and    r9, 3
	JE   LBB0_5

LBB0_4:
	LONG $0x04b60f42; BYTE $0x16 // movzx    eax, byte [rsi + r10]
	LONG $0x17042242             // and    al, byte [rdi + r10]
	LONG $0x12048842             // mov    byte [rdx + r10], al
	LONG $0x01c28349             // add    r10, 1
	LONG $0xffc18349             // add    r9, -1
	JNE  LBB0_4

LBB0_5:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB0_12

LBB0_6:
	LONG $0x04b60f42; BYTE $0x16   // movzx    eax, byte [rsi + r10]
	LONG $0x17042242               // and    al, byte [rdi + r10]
	LONG $0x12048842               // mov    byte [rdx + r10], al
	LONG $0x44b60f42; WORD $0x0116 // movzx    eax, byte [rsi + r10 + 1]
	LONG $0x17442242; BYTE $0x01   // and    al, byte [rdi + r10 + 1]
	LONG $0x12448842; BYTE $0x01   // mov    byte [rdx + r10 + 1], al
	LONG $0x44b60f42; WORD $0x0216 // movzx    eax, byte [rsi + r10 + 2]
	LONG $0x17442242; BYTE $0x02   // and    al, byte [rdi + r10 + 2]
	LONG $0x12448842; BYTE $0x02   // mov    byte [rdx + r10 + 2], al
	LONG $0x44b60f42; WORD $0x0316 // movzx    eax, byte [rsi + r10 + 3]
	LONG $0x17442242; BYTE $0x03   // and    al, byte [rdi + r10 + 3]
	LONG $0x12448842; BYTE $0x03   // mov    byte [rdx + r10 + 3], al
	LONG $0x04c28349               // add    r10, 4
	WORD $0x394c; BYTE $0xd1       // cmp    rcx, r10
	JNE  LBB0_6

LBB0_12:
	VZEROUPPER
	RET

TEXT ·_bitmap_aligned_or_avx2(SB), $0-32

	MOVQ left+0(FP), DI
	MOVQ right+8(FP), SI
	MOVQ out+16(FP), DX
	MOVQ length+24(FP), CX

	WORD $0x8548; BYTE $0xc9 // test    rcx, rcx
	JLE  LBB1_12
	LONG $0x7ff98348         // cmp    rcx, 127
	JA   LBB1_7
	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
	JMP  LBB1_3

LBB1_7:
	LONG $0x0a0c8d4c         // lea    r9, [rdx + rcx]
	LONG $0x0f048d48         // lea    rax, [rdi + rcx]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	LONG $0xd3970f41         // seta    r11b
	LONG $0x0e048d48         // lea    rax, [rsi + rcx]
	WORD $0x3949; BYTE $0xf9 // cmp    r9, rdi
	WORD $0x970f; BYTE $0xd3 // seta    bl
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	LONG $0xd0970f41         // seta    r8b
	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
	LONG $0xd1970f41         // seta    r9b
	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
	WORD $0x8441; BYTE $0xdb // test    r11b, bl
	JNE  LBB1_3
	WORD $0x2045; BYTE $0xc8 // and    r8b, r9b
	JNE  LBB1_3
	WORD $0x8949; BYTE $0xca // mov    r10, rcx
	LONG $0x80e28349         // and    r10, -128
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d

LBB1_10:
	LONG $0x107ca1c4; WORD $0x0604             // vmovups    ymm0, yword [rsi + r8]
	LONG $0x107ca1c4; WORD $0x064c; BYTE $0x20 // vmovups    ymm1, yword [rsi + r8 + 32]
	LONG $0x107ca1c4; WORD $0x0654; BYTE $0x40 // vmovups    ymm2, yword [rsi + r8 + 64]
	LONG $0x107ca1c4; WORD $0x065c; BYTE $0x60 // vmovups    ymm3, yword [rsi + r8 + 96]
	LONG $0x567ca1c4; WORD $0x0704             // vorps    ymm0, ymm0, yword [rdi + r8]
	LONG $0x5674a1c4; WORD $0x074c; BYTE $0x20 // vorps    ymm1, ymm1, yword [rdi + r8 + 32]
	LONG $0x566ca1c4; WORD $0x0754; BYTE $0x40 // vorps    ymm2, ymm2, yword [rdi + r8 + 64]
	LONG $0x5664a1c4; WORD $0x075c; BYTE $0x60 // vorps    ymm3, ymm3, yword [rdi + r8 + 96]
	LONG $0x117ca1c4; WORD $0x0204             // vmovups    yword [rdx + r8], ymm0
	LONG $0x117ca1c4; WORD $0x024c; BYTE $0x20 // vmovups    yword [rdx + r8 + 32], ymm1
	LONG $0x117ca1c4; WORD $0x0254; BYTE $0x40 // vmovups    yword [rdx + r8 + 64], ymm2
	LONG $0x117ca1c4; WORD $0x025c; BYTE $0x60 // vmovups    yword [rdx + r8 + 96], ymm3
	LONG $0x80e88349                           // sub    r8, -128
	WORD $0x394d; BYTE $0xc2                   // cmp    r10, r8
	JNE  LBB1_10
	WORD $0x3949; BYTE $0xca                   // cmp    r10, rcx
	JE   LBB1_12

LBB1_3:
	WORD $0x894d; BYTE $0xd0 // mov    r8, r10
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x0149; BYTE $0xc8 // add    r8, rcx
	WORD $0x8949; BYTE $0xc9 // mov    r9, rcx
	LONG $0x03e18349         // and    r9, 3
	JE   LBB1_5

LBB1_4:
	LONG $0x04b60f42; BYTE $0x16 // movzx    eax, byte [rsi + r10]
	LONG $0x17040a42             // or    al, byte [rdi + r10]
	LONG $0x12048842             // mov    byte [rdx + r10], al
	LONG $0x01c28349             // add    r10, 1
	LONG $0xffc18349             // add    r9, -1
	JNE  LBB1_4

LBB1_5:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB1_12

LBB1_6:
	LONG $0x04b60f42; BYTE $0x16   // movzx    eax, byte [rsi + r10]
	LONG $0x17040a42               // or    al, byte [rdi + r10]
	LONG $0x12048842               // mov    byte [rdx + r10], al
	LONG $0x44b60f42; WORD $0x0116 // movzx    eax, byte [rsi + r10 + 1]
	LONG $0x17440a42; BYTE $0x01   // or    al, byte [rdi + r10 + 1]
	LONG $0x12448842; BYTE $0x01   // mov    byte [rdx + r10 + 1], al
	LONG $0x44b60f42; WORD $0x0216 // movzx    eax, byte [rsi + r10 + 2]
	LONG $0x17440a42; BYTE $0x02   // or    al, byte [rdi + r10 + 2]
	LONG $0x12448842; BYTE $0x02   // mov    byte [rdx + r10 + 2], al
	LONG $0x44b60f42; WORD $0x0316 // movzx    eax, byte [rsi + r10 + 3]
	LONG $0x17440a42; BYTE $0x03   // or    al, byte [rdi + r10 + 3]
	LONG $0x12448842; BYTE $0x03   // mov    byte [rdx + r10 + 3], al
	LONG $0x04c28349               // add    r10, 4
	WORD $0x394c; BYTE $0xd1       // cmp    rcx, r10
	JNE  LBB1_6

LBB1_12:
	VZEROUPPER
	RET

TEXT ·_bitmap_aligned_and_not_avx2(SB), $0-32

	MOVQ left+0(FP), DI
	MOVQ right+8(FP), SI
	MOVQ out+16(FP), DX
	MOVQ length+24(FP), CX

	WORD $0x8548; BYTE $0xc9 // test    rcx, rcx
	JLE  LBB2_12
	LONG $0x7ff98348         // cmp    rcx, 127
	JA   LBB2_7
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
	JMP  LBB2_3

LBB2_7:
	LONG $0x0a048d4c         // lea    r8, [rdx + rcx]
	LONG $0x0f048d48         // lea    rax, [rdi + rcx]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	LONG $0xd3970f41         // seta    r11b
	LONG $0x0e048d48         // lea    rax, [rsi + rcx]
	WORD $0x3949; BYTE $0xf8 // cmp    r8, rdi
	WORD $0x970f; BYTE $0xd3 // seta    bl
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	LONG $0xd2970f41         // seta    r10b
	WORD $0x3949; BYTE $0xf0 // cmp    r8, rsi
	LONG $0xd1970f41         // seta    r9b
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
	WORD $0x8441; BYTE $0xdb // test    r11b, bl
	JNE  LBB2_3
	WORD $0x2045; BYTE $0xca // and    r10b, r9b
	JNE  LBB2_3
	WORD $0x8949; BYTE $0xc8 // mov    r8, rcx
	LONG $0x80e08349         // and    r8, -128
	WORD $0xc031             // xor    eax, eax

LBB2_10:
	LONG $0x0410fcc5; BYTE $0x06   // vmovups    ymm0, yword [rsi + rax]
	LONG $0x4c10fcc5; WORD $0x2006 // vmovups    ymm1, yword [rsi + rax + 32]
	LONG $0x5410fcc5; WORD $0x4006 // vmovups    ymm2, yword [rsi + rax + 64]
	LONG $0x5c10fcc5; WORD $0x6006 // vmovups    ymm3, yword [rsi + rax + 96]
	LONG $0x0455fcc5; BYTE $0x07   // vandnps    ymm0, ymm0, yword [rdi + rax]
	LONG $0x4c55f4c5; WORD $0x2007 // vandnps    ymm1, ymm1, yword [rdi + rax + 32]
	LONG $0x5455ecc5; WORD $0x4007 // vandnps    ymm2, ymm2, yword [rdi + rax + 64]
	LONG $0x5c55e4c5; WORD $0x6007 // vandnps    ymm3, ymm3, yword [rdi + rax + 96]
	LONG $0x0411fcc5; BYTE $0x02   // vmovups    yword [rdx + rax], ymm0
	LONG $0x4c11fcc5; WORD $0x2002 // vmovups    yword [rdx + rax + 32], ymm1
	LONG $0x5411fcc5; WORD $0x4002 // vmovups    yword [rdx + rax + 64], ymm2
	LONG $0x5c11fcc5; WORD $0x6002 // vmovups    yword [rdx + rax + 96], ymm3
	LONG $0x80e88348               // sub    rax, -128
	WORD $0x3949; BYTE $0xc0       // cmp    r8, rax
	JNE  LBB2_10
	WORD $0x3949; BYTE $0xc8       // cmp    r8, rcx
	JE   LBB2_12

LBB2_3:
	WORD $0x894d; BYTE $0xc1 // mov    r9, r8
	WORD $0xf749; BYTE $0xd1 // not    r9
	WORD $0xc1f6; BYTE $0x01 // test    cl, 1
	JE   LBB2_5
	LONG $0x06048a42         // mov    al, byte [rsi + r8]
	WORD $0xd0f6             // not    al
	LONG $0x07042242         // and    al, byte [rdi + r8]
	LONG $0x02048842         // mov    byte [rdx + r8], al
	LONG $0x01c88349         // or    r8, 1

LBB2_5:
	WORD $0x0149; BYTE $0xc9 // add    r9, rcx
	JE   LBB2_12

LBB2_6:
	LONG $0x04b60f42; BYTE $0x06   // movzx    eax, byte [rsi + r8]
	WORD $0xd0f6                   // not    al
	LONG $0x07042242               // and    al, byte [rdi + r8]
	LONG $0x02048842               // mov    byte [rdx + r8], al
	LONG $0x44b60f42; WORD $0x0106 // movzx    eax, byte [rsi + r8 + 1]
	WORD $0xd0f6                   // not    al
	LONG $0x07442242; BYTE $0x01   // and    al, byte [rdi + r8 + 1]
	LONG $0x02448842; BYTE $0x01   // mov    byte [rdx + r8 + 1], al
	LONG $0x02c08349               // add    r8, 2
	WORD $0x394c; BYTE $0xc1       // cmp    rcx, r8
	JNE  LBB2_6

LBB2_12:
	VZEROUPPER
	RET

TEXT ·_bitmap_aligned_xor_avx2(SB), $0-32

	MOVQ left+0(FP), DI
	MOVQ right+8(FP), SI
	MOVQ out+16(FP), DX
	MOVQ length+24(FP), CX

	WORD $0x8548; BYTE $0xc9 // test    rcx, rcx
	JLE  LBB3_12
	LONG $0x7ff98348         // cmp    rcx, 127
	JA   LBB3_7
	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
	JMP  LBB3_3

LBB3_7:
	LONG $0x0a0c8d4c         // lea    r9, [rdx + rcx]
	LONG $0x0f048d48         // lea    rax, [rdi + rcx]
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	LONG $0xd3970f41         // seta    r11b
	LONG $0x0e048d48         // lea    rax, [rsi + rcx]
	WORD $0x3949; BYTE $0xf9 // cmp    r9, rdi
	WORD $0x970f; BYTE $0xd3 // seta    bl
	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
	LONG $0xd0970f41         // seta    r8b
	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
	LONG $0xd1970f41         // seta    r9b
	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
	WORD $0x8441; BYTE $0xdb // test    r11b, bl
	JNE  LBB3_3
	WORD $0x2045; BYTE $0xc8 // and    r8b, r9b
	JNE  LBB3_3
	WORD $0x8949; BYTE $0xca // mov    r10, rcx
	LONG $0x80e28349         // and    r10, -128
	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d

LBB3_10:
	LONG $0x107ca1c4; WORD $0x0604             // vmovups    ymm0, yword [rsi + r8]
	LONG $0x107ca1c4; WORD $0x064c; BYTE $0x20 // vmovups    ymm1, yword [rsi + r8 + 32]
	LONG $0x107ca1c4; WORD $0x0654; BYTE $0x40 // vmovups    ymm2, yword [rsi + r8 + 64]
	LONG $0x107ca1c4; WORD $0x065c; BYTE $0x60 // vmovups    ymm3, yword [rsi + r8 + 96]
	LONG $0x577ca1c4; WORD $0x0704             // vxorps    ymm0, ymm0, yword [rdi + r8]
	LONG $0x5774a1c4; WORD $0x074c; BYTE $0x20 // vxorps    ymm1, ymm1, yword [rdi + r8 + 32]
	LONG $0x576ca1c4; WORD $0x0754; BYTE $0x40 // vxorps    ymm2, ymm2, yword [rdi + r8 + 64]
	LONG $0x5764a1c4; WORD $0x075c; BYTE $0x60 // vxorps    ymm3, ymm3, yword [rdi + r8 + 96]
	LONG $0x117ca1c4; WORD $0x0204             // vmovups    yword [rdx + r8], ymm0
	LONG $0x117ca1c4; WORD $0x024c; BYTE $0x20 // vmovups    yword [rdx + r8 + 32], ymm1
	LONG $0x117ca1c4; WORD $0x0254; BYTE $0x40 // vmovups    yword [rdx + r8 + 64], ymm2
	LONG $0x117ca1c4; WORD $0x025c; BYTE $0x60 // vmovups    yword [rdx + r8 + 96], ymm3
	LONG $0x80e88349                           // sub    r8, -128
	WORD $0x394d; BYTE $0xc2                   // cmp    r10, r8
	JNE  LBB3_10
	WORD $0x3949; BYTE $0xca                   // cmp    r10, rcx
	JE   LBB3_12

LBB3_3:
	WORD $0x894d; BYTE $0xd0 // mov    r8, r10
	WORD $0xf749; BYTE $0xd0 // not    r8
	WORD $0x0149; BYTE $0xc8 // add    r8, rcx
	WORD $0x8949; BYTE $0xc9 // mov    r9, rcx
	LONG $0x03e18349         // and    r9, 3
	JE   LBB3_5

LBB3_4:
	LONG $0x04b60f42; BYTE $0x16 // movzx    eax, byte [rsi + r10]
	LONG $0x17043242             // xor    al, byte [rdi + r10]
	LONG $0x12048842             // mov    byte [rdx + r10], al
	LONG $0x01c28349             // add    r10, 1
	LONG $0xffc18349             // add    r9, -1
	JNE  LBB3_4

LBB3_5:
	LONG $0x03f88349 // cmp    r8, 3
	JB   LBB3_12

LBB3_6:
	LONG $0x04b60f42; BYTE $0x16   // movzx    eax, byte [rsi + r10]
	LONG $0x17043242               // xor    al, byte [rdi + r10]
	LONG $0x12048842               // mov    byte [rdx + r10], al
	LONG $0x44b60f42; WORD $0x0116 // movzx    eax, byte [rsi + r10 + 1]
	LONG $0x17443242; BYTE $0x01   // xor    al, byte [rdi + r10 + 1]
	LONG $0x12448842; BYTE $0x01   // mov    byte [rdx + r10 + 1], al
	LONG $0x44b60f42; WORD $0x0216 // movzx    eax, byte [rsi + r10 + 2]
	LONG $0x17443242; BYTE $0x02   // xor    al, byte [rdi + r10 + 2]
	LONG $0x12448842; BYTE $0x02   // mov    byte [rdx + r10 + 2], al
	LONG $0x44b60f42; WORD $0x0316 // movzx    eax, byte [rsi + r10 + 3]
	LONG $0x17443242; BYTE $0x03   // xor    al, byte [rdi + r10 + 3]
	LONG $0x12448842; BYTE $0x03   // mov    byte [rdx + r10 + 3], al
	LONG $0x04c28349               // add    r10, 4
	WORD $0x394c; BYTE $0xd1       // cmp    rcx, r10
	JNE  LBB3_6

LBB3_12:
	VZEROUPPER
	RET