package amd64

import (
	
	
	
	
)

var calleeSavedVRegs = []regalloc.VReg{
	rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg,
	xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg,
}

// CompileGoFunctionTrampoline implements backend.Machine.
func ( *machine) ( wazevoapi.ExitCode,  *ssa.Signature,  bool) []byte {
	 := 1 // Skips exec context by default.
	if  {
		++
	}

	 := &backend.FunctionABI{}
	.Init(, intArgResultRegs, floatArgResultRegs)
	.currentABI = 

	 := .allocateNop()
	.rootInstr = 

	// Execution context is always the first argument.
	 := raxVReg

	// First we update RBP and RSP just like the normal prologue.
	//
	//                   (high address)                     (high address)
	//       RBP ----> +-----------------+                +-----------------+
	//                 |     .......     |                |     .......     |
	//                 |      ret Y      |                |      ret Y      |
	//                 |     .......     |                |     .......     |
	//                 |      ret 0      |                |      ret 0      |
	//                 |      arg X      |                |      arg X      |
	//                 |     .......     |     ====>      |     .......     |
	//                 |      arg 1      |                |      arg 1      |
	//                 |      arg 0      |                |      arg 0      |
	//                 |   Return Addr   |                |   Return Addr   |
	//       RSP ----> +-----------------+                |    Caller_RBP   |
	//                    (low address)                   +-----------------+ <----- RSP, RBP
	//
	 = .setupRBPRSP()

	,  := backend.GoFunctionCallRequiredStackSize(, )
	 = .insertStackBoundsCheck(+8 /* size of the Go slice */, )

	// Save the callee saved registers.
	 = .saveRegistersInExecutionContext(, , calleeSavedVRegs)

	if  {
		 := rbxVReg // Module context is always the second argument.
		 := .newAmodeImmReg(
			wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.U32(),
			)
		 := .allocateInstr().asMovRM(, newOperandMem(), 8)
		 = linkInstr(, )
	}

	// Now let's advance the RSP to the stack slot for the arguments.
	//
	//                (high address)                     (high address)
	//              +-----------------+               +-----------------+
	//              |     .......     |               |     .......     |
	//              |      ret Y      |               |      ret Y      |
	//              |     .......     |               |     .......     |
	//              |      ret 0      |               |      ret 0      |
	//              |      arg X      |               |      arg X      |
	//              |     .......     |   =======>    |     .......     |
	//              |      arg 1      |               |      arg 1      |
	//              |      arg 0      |               |      arg 0      |
	//              |   Return Addr   |               |   Return Addr   |
	//              |    Caller_RBP   |               |    Caller_RBP   |
	//  RBP,RSP --> +-----------------+               +-----------------+ <----- RBP
	//                 (low address)                  |  arg[N]/ret[M]  |
	//                                                |    ..........   |
	//                                                |  arg[1]/ret[1]  |
	//                                                |  arg[0]/ret[0]  |
	//                                                +-----------------+ <----- RSP
	//                                                   (low address)
	//
	// where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions,
	// therefore will be accessed as the usual []uint64. So that's where we need to pass/receive
	// the arguments/return values to/from Go function.
	 = .addRSP(-int32(), )

	// Next, we need to store all the arguments to the stack in the typical Wasm stack style.
	var  int32
	for  := range .Args[:] {
		 := &.Args[+]
		var  regalloc.VReg
		if .Kind == backend.ABIArgKindReg {
			 = .Reg
		} else {
			// We have saved callee saved registers, so we can use them.
			if .Type.IsInt() {
				 = r15VReg
			} else {
				 = xmm15VReg
			}
			 := newOperandMem(.newAmodeImmReg(uint32(.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg))
			 := .allocateInstr()
			switch .Type {
			case ssa.TypeI32:
				.asMovzxRmR(extModeLQ, , )
			case ssa.TypeI64:
				.asMov64MR(, )
			case ssa.TypeF32:
				.asXmmUnaryRmR(sseOpcodeMovss, , )
			case ssa.TypeF64:
				.asXmmUnaryRmR(sseOpcodeMovsd, , )
			case ssa.TypeV128:
				.asXmmUnaryRmR(sseOpcodeMovdqu, , )
			default:
				panic("BUG")
			}
			 = linkInstr(, )
		}

		 := .allocateInstr()
		 := newOperandMem(.newAmodeImmReg(uint32(), rspVReg))
		switch .Type {
		case ssa.TypeI32:
			.asMovRM(, , 4)
			 += 8 // always uint64 rep.
		case ssa.TypeI64:
			.asMovRM(, , 8)
			 += 8
		case ssa.TypeF32:
			.asXmmMovRM(sseOpcodeMovss, , )
			 += 8 // always uint64 rep.
		case ssa.TypeF64:
			.asXmmMovRM(sseOpcodeMovsd, , )
			 += 8
		case ssa.TypeV128:
			.asXmmMovRM(sseOpcodeMovdqu, , )
			 += 16
		default:
			panic("BUG")
		}
		 = linkInstr(, )
	}

	// Finally we push the size of the slice to the stack so the stack looks like:
	//
	//          (high address)
	//       +-----------------+
	//       |     .......     |
	//       |      ret Y      |
	//       |     .......     |
	//       |      ret 0      |
	//       |      arg X      |
	//       |     .......     |
	//       |      arg 1      |
	//       |      arg 0      |
	//       |   Return Addr   |
	//       |    Caller_RBP   |
	//       +-----------------+ <----- RBP
	//       |  arg[N]/ret[M]  |
	//       |    ..........   |
	//       |  arg[1]/ret[1]  |
	//       |  arg[0]/ret[0]  |
	//       |    slice size   |
	//       +-----------------+ <----- RSP
	//         (low address)
	//
	// 		push $sliceSize
	 = linkInstr(, .allocateInstr().asPush64(newOperandImm32(uint32())))

	// Load the exitCode to the register.
	 := r12VReg // Callee saved which is already saved.
	 = linkInstr(, .allocateInstr().asImm(, uint64(), false))

	, ,  := .allocateExitInstructions(, )
	 = linkInstr(, )
	 = linkInstr(, )
	 = linkInstr(, )

	// Ready to exit the execution.
	 = .storeReturnAddressAndExit(, )

	// We don't need the slice size anymore, so pop it.
	 = .addRSP(8, )

	// Ready to set up the results.
	 = 0
	// To avoid overwriting with the execution context pointer by the result, we need to track the offset,
	// and defer the restoration of the result to the end of this function.
	var  int32 = -1
	for  := range .Rets {
		 := &.Rets[]
		var  regalloc.VReg
		 := .Kind == backend.ABIArgKindReg
		if  {
			 = .Reg
			if .RealReg() == .RealReg() {
				 = 
				 += 8 // always uint64 rep.
				continue
			}
		} else {
			if .Type.IsInt() {
				 = r15VReg
			} else {
				 = xmm15VReg
			}
		}

		 := .allocateInstr()
		 := newOperandMem(.newAmodeImmReg(uint32(), rspVReg))
		switch .Type {
		case ssa.TypeI32:
			.asMovzxRmR(extModeLQ, , )
			 += 8 // always uint64 rep.
		case ssa.TypeI64:
			.asMov64MR(, )
			 += 8
		case ssa.TypeF32:
			.asXmmUnaryRmR(sseOpcodeMovss, , )
			 += 8 // always uint64 rep.
		case ssa.TypeF64:
			.asXmmUnaryRmR(sseOpcodeMovsd, , )
			 += 8
		case ssa.TypeV128:
			.asXmmUnaryRmR(sseOpcodeMovdqu, , )
			 += 16
		default:
			panic("BUG")
		}
		 = linkInstr(, )

		if ! {
			// We need to store it back to the result slot above rbp.
			 := .allocateInstr()
			 := newOperandMem(.newAmodeImmReg(uint32(.ArgStackSize+.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg))
			switch .Type {
			case ssa.TypeI32:
				.asMovRM(, , 4)
			case ssa.TypeI64:
				.asMovRM(, , 8)
			case ssa.TypeF32:
				.asXmmMovRM(sseOpcodeMovss, , )
			case ssa.TypeF64:
				.asXmmMovRM(sseOpcodeMovsd, , )
			case ssa.TypeV128:
				.asXmmMovRM(sseOpcodeMovdqu, , )
			default:
				panic("BUG")
			}
			 = linkInstr(, )
		}
	}

	// Before return, we need to restore the callee saved registers.
	 = .restoreRegistersInExecutionContext(, , calleeSavedVRegs)

	if  >= 0 {
		// At this point execCtt is not used anymore, so we can finally store the
		// result to the register which overlaps with the execution context pointer.
		 := newOperandMem(.newAmodeImmReg(uint32(), rspVReg))
		 := .allocateInstr().asMov64MR(, )
		 = linkInstr(, )
	}

	// Finally ready to return.
	 = .revertRBPRSP()
	linkInstr(, .allocateInstr().asRet())

	.encodeWithoutSSA(.rootInstr)
	return .c.Buf()
}

func ( *machine) ( *instruction,  regalloc.VReg,  []regalloc.VReg) *instruction {
	 := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
	for ,  := range  {
		 := .allocateInstr()
		 := newOperandMem(.newAmodeImmReg(uint32(), ))
		switch .RegType() {
		case regalloc.RegTypeInt:
			.asMovRM(, , 8)
		case regalloc.RegTypeFloat:
			.asXmmMovRM(sseOpcodeMovdqu, , )
		default:
			panic("BUG")
		}
		 = linkInstr(, )
		 += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally.
	}
	return 
}

func ( *machine) ( *instruction,  regalloc.VReg,  []regalloc.VReg) *instruction {
	 := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
	for ,  := range  {
		 := .allocateInstr()
		 := newOperandMem(.newAmodeImmReg(uint32(), ))
		switch .RegType() {
		case regalloc.RegTypeInt:
			.asMov64MR(, )
		case regalloc.RegTypeFloat:
			.asXmmUnaryRmR(sseOpcodeMovdqu, , )
		default:
			panic("BUG")
		}
		 = linkInstr(, )
		 += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally.
	}
	return 
}

func ( *machine) ( *instruction,  regalloc.VReg) *instruction {
	 := .allocateInstr()
	 = linkInstr(, )

	 := r12VReg // Callee saved which is already saved.
	 := .allocateInstr().asMovRM(
		,
		newOperandMem(.newAmodeImmReg(wazevoapi.ExecutionContextOffsetGoCallReturnAddress.U32(), )),
		8,
	)
	 = linkInstr(, )

	 := .allocateExitSeq()
	 = linkInstr(, )

	,  := .allocateBrTarget()
	 = linkInstr(, )
	.asLEA(newOperandLabel(), )
	return 
}

// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient
// stack space left. Basically this is the all allocatable registers except for RSP and RBP, and RAX which contains the
// execution context pointer. ExecCtx pointer is always the first argument so we don't need to save it.
var stackGrowSaveVRegs = []regalloc.VReg{
	rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg,
	rcxVReg, rbxVReg, rsiVReg, rdiVReg, r8VReg, r9VReg, r10VReg, r11VReg,
	xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg,
	xmm0VReg, xmm1VReg, xmm2VReg, xmm3VReg, xmm4VReg, xmm5VReg, xmm6VReg, xmm7VReg,
}

// CompileStackGrowCallSequence implements backend.Machine.
func ( *machine) () []byte {
	 := .allocateNop()
	.rootInstr = 

	 = .setupRBPRSP()

	// Execution context is always the first argument.
	 := raxVReg

	// Save the callee saved and argument registers.
	 = .saveRegistersInExecutionContext(, , stackGrowSaveVRegs)

	// Load the exitCode to the register.
	 := r12VReg // Already saved.
	 = linkInstr(, .allocateInstr().asImm(, uint64(wazevoapi.ExitCodeGrowStack), false))

	, ,  := .allocateExitInstructions(, )
	 = linkInstr(, )
	 = linkInstr(, )
	 = linkInstr(, )

	// Ready to exit the execution.
	 = .storeReturnAddressAndExit(, )

	// After the exit, restore the saved registers.
	 = .restoreRegistersInExecutionContext(, , stackGrowSaveVRegs)

	// Finally ready to return.
	 = .revertRBPRSP()
	linkInstr(, .allocateInstr().asRet())

	.encodeWithoutSSA(.rootInstr)
	return .c.Buf()
}

// insertStackBoundsCheck will insert the instructions after `cur` to check the
// stack bounds, and if there's no sufficient spaces required for the function,
// exit the execution and try growing it in Go world.
func ( *machine) ( int64,  *instruction) *instruction {
	//		add $requiredStackSize, %rsp ;; Temporarily update the sp.
	// 		cmp ExecutionContextOffsetStackBottomPtr(%rax), %rsp ;; Compare the stack bottom and the sp.
	// 		ja .ok
	//		sub $requiredStackSize, %rsp ;; Reverse the temporary update.
	//      pushq r15 ;; save the temporary.
	//		mov $requiredStackSize, %r15
	//		mov %15, ExecutionContextOffsetStackGrowRequiredSize(%rax) ;; Set the required size in the execution context.
	//      popq r15 ;; restore the temporary.
	//		callq *ExecutionContextOffsetStackGrowCallTrampolineAddress(%rax) ;; Call the Go function to grow the stack.
	//		jmp .cont
	// .ok:
	//		sub $requiredStackSize, %rsp ;; Reverse the temporary update.
	// .cont:
	 = .addRSP(-int32(), )
	 = linkInstr(, .allocateInstr().asCmpRmiR(true,
		newOperandMem(.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackBottomPtr.U32(), raxVReg)),
		rspVReg, true))

	 := .allocateInstr()
	 = linkInstr(, )

	 = .addRSP(int32(), )

	// Save the temporary.

	 = linkInstr(, .allocateInstr().asPush64(newOperandReg(r15VReg)))
	// Load the required size to the temporary.
	 = linkInstr(, .allocateInstr().asImm(r15VReg, uint64(), true))
	// Set the required size in the execution context.
	 = linkInstr(, .allocateInstr().asMovRM(r15VReg,
		newOperandMem(.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.U32(), raxVReg)), 8))
	// Restore the temporary.
	 = linkInstr(, .allocateInstr().asPop64(r15VReg))
	// Call the Go function to grow the stack.
	 = linkInstr(, .allocateInstr().asCallIndirect(newOperandMem(.newAmodeImmReg(
		wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.U32(), raxVReg)), nil))
	// Jump to the continuation.
	 := .allocateInstr()
	 = linkInstr(, )

	// .ok:
	,  := .allocateBrTarget()
	 = linkInstr(, )
	.asJmpIf(condNBE, newOperandLabel())
	// On the ok path, we only need to reverse the temporary update.
	 = .addRSP(int32(), )

	// .cont:
	,  := .allocateBrTarget()
	 = linkInstr(, )
	.asJmp(newOperandLabel())

	return 
}