//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT

TEXT ยท_sum_float64_avx2(SB), $0-24

	MOVQ buf+0(FP), DI
	MOVQ len+8(FP), SI
	MOVQ res+16(FP), DX

	LONG $0xc057f9c5         // vxorpd    xmm0, xmm0, xmm0
	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
	JE   LBB0_14
	LONG $0x1ffe8348         // cmp    rsi, 31
	JBE  LBB0_2
	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
	LONG $0xe0e18349         // and    r9, -32
	JE   LBB0_2
	LONG $0xe0418d4d         // lea    r8, [r9 - 32]
	WORD $0x8944; BYTE $0xc0 // mov    eax, r8d
	WORD $0xe8c1; BYTE $0x05 // shr    eax, 5
	WORD $0xc0ff             // inc    eax
	LONG $0x07e08348         // and    rax, 7
	JE   LBB0_7
	WORD $0xf748; BYTE $0xd8 // neg    rax
	LONG $0xc057fdc5         // vxorpd    ymm0, ymm0, ymm0
	WORD $0xc931             // xor    ecx, ecx
	LONG $0xc957f5c5         // vxorpd    ymm1, ymm1, ymm1
	LONG $0xd257edc5         // vxorpd    ymm2, ymm2, ymm2
	LONG $0xdb57e5c5         // vxorpd    ymm3, ymm3, ymm3
	LONG $0xe457ddc5         // vxorpd    ymm4, ymm4, ymm4
	LONG $0xed57d5c5         // vxorpd    ymm5, ymm5, ymm5
	LONG $0xf657cdc5         // vxorpd    ymm6, ymm6, ymm6
	LONG $0xff57c5c5         // vxorpd    ymm7, ymm7, ymm7

LBB0_9:
	LONG $0x0458fdc5; BYTE $0xcf         // vaddpd    ymm0, ymm0, yword [rdi + 8*rcx]
	LONG $0x4c58f5c5; WORD $0x20cf       // vaddpd    ymm1, ymm1, yword [rdi + 8*rcx + 32]
	LONG $0x5458edc5; WORD $0x40cf       // vaddpd    ymm2, ymm2, yword [rdi + 8*rcx + 64]
	LONG $0x5c58e5c5; WORD $0x60cf       // vaddpd    ymm3, ymm3, yword [rdi + 8*rcx + 96]
	QUAD $0x000080cfa458ddc5; BYTE $0x00 // vaddpd    ymm4, ymm4, yword [rdi + 8*rcx + 128]
	QUAD $0x0000a0cfac58d5c5; BYTE $0x00 // vaddpd    ymm5, ymm5, yword [rdi + 8*rcx + 160]
	QUAD $0x0000c0cfb458cdc5; BYTE $0x00 // vaddpd    ymm6, ymm6, yword [rdi + 8*rcx + 192]
	QUAD $0x0000e0cfbc58c5c5; BYTE $0x00 // vaddpd    ymm7, ymm7, yword [rdi + 8*rcx + 224]
	LONG $0x20c18348                     // add    rcx, 32
	WORD $0xff48; BYTE $0xc0             // inc    rax
	JNE  LBB0_9
	JMP  LBB0_10

LBB0_2:
	WORD $0x3145; BYTE $0xc9 // xor    r9d, r9d

LBB0_3:
	LONG $0xcf048d4a         // lea    rax, [rdi + 8*r9]
	WORD $0x294c; BYTE $0xce // sub    rsi, r9

LBB0_4:
	LONG $0x0058fbc5         // vaddsd    xmm0, xmm0, qword [rax]
	LONG $0x08c08348         // add    rax, 8
	WORD $0xff48; BYTE $0xce // dec    rsi
	JNE  LBB0_4

LBB0_14:
	LONG $0x0211fbc5 // vmovsd    qword [rdx], xmm0
	VZEROUPPER
	RET

LBB0_7:
	WORD $0xc931     // xor    ecx, ecx
	LONG $0xc057fdc5 // vxorpd    ymm0, ymm0, ymm0
	LONG $0xc957f5c5 // vxorpd    ymm1, ymm1, ymm1
	LONG $0xd257edc5 // vxorpd    ymm2, ymm2, ymm2
	LONG $0xdb57e5c5 // vxorpd    ymm3, ymm3, ymm3
	LONG $0xe457ddc5 // vxorpd    ymm4, ymm4, ymm4
	LONG $0xed57d5c5 // vxorpd    ymm5, ymm5, ymm5
	LONG $0xf657cdc5 // vxorpd    ymm6, ymm6, ymm6
	LONG $0xff57c5c5 // vxorpd    ymm7, ymm7, ymm7

LBB0_10:
	LONG $0xe0f88149; WORD $0x0000; BYTE $0x00 // cmp    r8, 224
	JB   LBB0_13
	WORD $0x894c; BYTE $0xc8                   // mov    rax, r9
	WORD $0x2948; BYTE $0xc8                   // sub    rax, rcx
	QUAD $0x00000700cf8c8d48                   // lea    rcx, [rdi + 8*rcx + 1792]

LBB0_12:
	QUAD $0xfffff9e0b958c5c5                   // vaddpd    ymm7, ymm7, yword [rcx - 1568]
	QUAD $0xfffff9c0b158cdc5                   // vaddpd    ymm6, ymm6, yword [rcx - 1600]
	QUAD $0xfffff9a0a958d5c5                   // vaddpd    ymm5, ymm5, yword [rcx - 1632]
	QUAD $0xfffff980a158ddc5                   // vaddpd    ymm4, ymm4, yword [rcx - 1664]
	QUAD $0xfffff9609958e5c5                   // vaddpd    ymm3, ymm3, yword [rcx - 1696]
	QUAD $0xfffff9409158edc5                   // vaddpd    ymm2, ymm2, yword [rcx - 1728]
	QUAD $0xfffff9208958f5c5                   // vaddpd    ymm1, ymm1, yword [rcx - 1760]
	QUAD $0xfffff9008158fdc5                   // vaddpd    ymm0, ymm0, yword [rcx - 1792]
	QUAD $0xfffffa008158fdc5                   // vaddpd    ymm0, ymm0, yword [rcx - 1536]
	QUAD $0xfffffa208958f5c5                   // vaddpd    ymm1, ymm1, yword [rcx - 1504]
	QUAD $0xfffffa409158edc5                   // vaddpd    ymm2, ymm2, yword [rcx - 1472]
	QUAD $0xfffffa609958e5c5                   // vaddpd    ymm3, ymm3, yword [rcx - 1440]
	QUAD $0xfffffa80a158ddc5                   // vaddpd    ymm4, ymm4, yword [rcx - 1408]
	QUAD $0xfffffaa0a958d5c5                   // vaddpd    ymm5, ymm5, yword [rcx - 1376]
	QUAD $0xfffffac0b158cdc5                   // vaddpd    ymm6, ymm6, yword [rcx - 1344]
	QUAD $0xfffffae0b958c5c5                   // vaddpd    ymm7, ymm7, yword [rcx - 1312]
	QUAD $0xfffffbe0b958c5c5                   // vaddpd    ymm7, ymm7, yword [rcx - 1056]
	QUAD $0xfffffbc0b158cdc5                   // vaddpd    ymm6, ymm6, yword [rcx - 1088]
	QUAD $0xfffffba0a958d5c5                   // vaddpd    ymm5, ymm5, yword [rcx - 1120]
	QUAD $0xfffffb80a158ddc5                   // vaddpd    ymm4, ymm4, yword [rcx - 1152]
	QUAD $0xfffffb609958e5c5                   // vaddpd    ymm3, ymm3, yword [rcx - 1184]
	QUAD $0xfffffb409158edc5                   // vaddpd    ymm2, ymm2, yword [rcx - 1216]
	QUAD $0xfffffb208958f5c5                   // vaddpd    ymm1, ymm1, yword [rcx - 1248]
	QUAD $0xfffffb008158fdc5                   // vaddpd    ymm0, ymm0, yword [rcx - 1280]
	QUAD $0xfffffc008158fdc5                   // vaddpd    ymm0, ymm0, yword [rcx - 1024]
	QUAD $0xfffffc208958f5c5                   // vaddpd    ymm1, ymm1, yword [rcx - 992]
	QUAD $0xfffffc409158edc5                   // vaddpd    ymm2, ymm2, yword [rcx - 960]
	QUAD $0xfffffc609958e5c5                   // vaddpd    ymm3, ymm3, yword [rcx - 928]
	QUAD $0xfffffc80a158ddc5                   // vaddpd    ymm4, ymm4, yword [rcx - 896]
	QUAD $0xfffffca0a958d5c5                   // vaddpd    ymm5, ymm5, yword [rcx - 864]
	QUAD $0xfffffcc0b158cdc5                   // vaddpd    ymm6, ymm6, yword [rcx - 832]
	QUAD $0xfffffce0b958c5c5                   // vaddpd    ymm7, ymm7, yword [rcx - 800]
	QUAD $0xfffffde0b958c5c5                   // vaddpd    ymm7, ymm7, yword [rcx - 544]
	QUAD $0xfffffdc0b158cdc5                   // vaddpd    ymm6, ymm6, yword [rcx - 576]
	QUAD $0xfffffda0a958d5c5                   // vaddpd    ymm5, ymm5, yword [rcx - 608]
	QUAD $0xfffffd80a158ddc5                   // vaddpd    ymm4, ymm4, yword [rcx - 640]
	QUAD $0xfffffd609958e5c5                   // vaddpd    ymm3, ymm3, yword [rcx - 672]
	QUAD $0xfffffd409158edc5                   // vaddpd    ymm2, ymm2, yword [rcx - 704]
	QUAD $0xfffffd208958f5c5                   // vaddpd    ymm1, ymm1, yword [rcx - 736]
	QUAD $0xfffffd008158fdc5                   // vaddpd    ymm0, ymm0, yword [rcx - 768]
	QUAD $0xfffffe008158fdc5                   // vaddpd    ymm0, ymm0, yword [rcx - 512]
	QUAD $0xfffffe208958f5c5                   // vaddpd    ymm1, ymm1, yword [rcx - 480]
	QUAD $0xfffffe409158edc5                   // vaddpd    ymm2, ymm2, yword [rcx - 448]
	QUAD $0xfffffe609958e5c5                   // vaddpd    ymm3, ymm3, yword [rcx - 416]
	QUAD $0xfffffe80a158ddc5                   // vaddpd    ymm4, ymm4, yword [rcx - 384]
	QUAD $0xfffffea0a958d5c5                   // vaddpd    ymm5, ymm5, yword [rcx - 352]
	QUAD $0xfffffec0b158cdc5                   // vaddpd    ymm6, ymm6, yword [rcx - 320]
	QUAD $0xfffffee0b958c5c5                   // vaddpd    ymm7, ymm7, yword [rcx - 288]
	LONG $0x7958c5c5; BYTE $0xe0               // vaddpd    ymm7, ymm7, yword [rcx - 32]
	LONG $0x7158cdc5; BYTE $0xc0               // vaddpd    ymm6, ymm6, yword [rcx - 64]
	LONG $0x6958d5c5; BYTE $0xa0               // vaddpd    ymm5, ymm5, yword [rcx - 96]
	LONG $0x6158ddc5; BYTE $0x80               // vaddpd    ymm4, ymm4, yword [rcx - 128]
	QUAD $0xffffff609958e5c5                   // vaddpd    ymm3, ymm3, yword [rcx - 160]
	QUAD $0xffffff409158edc5                   // vaddpd    ymm2, ymm2, yword [rcx - 192]
	QUAD $0xffffff208958f5c5                   // vaddpd    ymm1, ymm1, yword [rcx - 224]
	QUAD $0xffffff008158fdc5                   // vaddpd    ymm0, ymm0, yword [rcx - 256]
	LONG $0x0158fdc5                           // vaddpd    ymm0, ymm0, yword [rcx]
	LONG $0x4958f5c5; BYTE $0x20               // vaddpd    ymm1, ymm1, yword [rcx + 32]
	LONG $0x5158edc5; BYTE $0x40               // vaddpd    ymm2, ymm2, yword [rcx + 64]
	LONG $0x5958e5c5; BYTE $0x60               // vaddpd    ymm3, ymm3, yword [rcx + 96]
	QUAD $0x00000080a158ddc5                   // vaddpd    ymm4, ymm4, yword [rcx + 128]
	QUAD $0x000000a0a958d5c5                   // vaddpd    ymm5, ymm5, yword [rcx + 160]
	QUAD $0x000000c0b158cdc5                   // vaddpd    ymm6, ymm6, yword [rcx + 192]
	QUAD $0x000000e0b958c5c5                   // vaddpd    ymm7, ymm7, yword [rcx + 224]
	LONG $0x00c18148; WORD $0x0008; BYTE $0x00 // add    rcx, 2048
	LONG $0xff000548; WORD $0xffff             // add    rax, -256
	JNE  LBB0_12

LBB0_13:
	LONG $0xcd58f5c5               // vaddpd    ymm1, ymm1, ymm5
	LONG $0xdf58e5c5               // vaddpd    ymm3, ymm3, ymm7
	LONG $0xc458fdc5               // vaddpd    ymm0, ymm0, ymm4
	LONG $0xd658edc5               // vaddpd    ymm2, ymm2, ymm6
	LONG $0xc258fdc5               // vaddpd    ymm0, ymm0, ymm2
	LONG $0xcb58f5c5               // vaddpd    ymm1, ymm1, ymm3
	LONG $0xc158fdc5               // vaddpd    ymm0, ymm0, ymm1
	LONG $0x197de3c4; WORD $0x01c1 // vextractf128    xmm1, ymm0, 1
	LONG $0xc158fdc5               // vaddpd    ymm0, ymm0, ymm1
	LONG $0xc07cfdc5               // vhaddpd    ymm0, ymm0, ymm0
	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
	JNE  LBB0_3
	JMP  LBB0_14