package amd64
import (
"fmt"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
var swizzleMask = [16 ]byte {
0x70 , 0x70 , 0x70 , 0x70 , 0x70 , 0x70 , 0x70 , 0x70 ,
0x70 , 0x70 , 0x70 , 0x70 , 0x70 , 0x70 , 0x70 , 0x70 ,
}
func (m *machine ) lowerSwizzle (x , y ssa .Value , ret ssa .Value ) {
masklabel := m .getOrAllocateConstLabel (&m .constSwizzleMaskConstIndex , swizzleMask [:])
maskReg := m .c .AllocateVReg (ssa .TypeV128 )
loadMask := m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovdqu , newOperandMem (m .newAmodeRipRel (masklabel )), maskReg )
m .insert (loadMask )
xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
tmpDst := m .copyToTmp (xx .reg ())
yy := m .getOperand_Reg (m .c .ValueDefinition (y ))
tmpX := m .copyToTmp (yy .reg ())
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePaddusb , newOperandReg (maskReg ), tmpX ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePshufb , newOperandReg (tmpX ), tmpDst ))
m .copyTo (tmpDst , m .c .VRegOf (ret ))
}
func (m *machine ) lowerInsertLane (x , y ssa .Value , index byte , ret ssa .Value , lane ssa .VecLane ) {
tmpDst := m .c .AllocateVReg (ssa .TypeV128 )
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovdqu , m .getOperand_Mem_Reg (m .c .ValueDefinition (x )), tmpDst ))
yy := m .getOperand_Reg (m .c .ValueDefinition (y ))
switch lane {
case ssa .VecLaneI8x16 :
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePinsrb , index , yy , tmpDst ))
case ssa .VecLaneI16x8 :
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePinsrw , index , yy , tmpDst ))
case ssa .VecLaneI32x4 :
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePinsrd , index , yy , tmpDst ))
case ssa .VecLaneI64x2 :
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePinsrq , index , yy , tmpDst ))
case ssa .VecLaneF32x4 :
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodeInsertps , index <<4 , yy , tmpDst ))
case ssa .VecLaneF64x2 :
if index == 0 {
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovsd , yy , tmpDst ))
} else {
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodeMovlhps , yy , tmpDst ))
}
default :
panic (fmt .Sprintf ("invalid lane type: %s" , lane ))
}
m .copyTo (tmpDst , m .c .VRegOf (ret ))
}
func (m *machine ) lowerExtractLane (x ssa .Value , index byte , signed bool , ret ssa .Value , lane ssa .VecLane ) {
xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
tmpDst := m .c .AllocateVReg (ret .Type ())
m .insert (m .allocateInstr ().asDefineUninitializedReg (tmpDst ))
switch lane {
case ssa .VecLaneI8x16 :
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePextrb , index , xx , tmpDst ))
if signed {
m .insert (m .allocateInstr ().asMovsxRmR (extModeBL , newOperandReg (tmpDst ), tmpDst ))
} else {
m .insert (m .allocateInstr ().asMovzxRmR (extModeBL , newOperandReg (tmpDst ), tmpDst ))
}
case ssa .VecLaneI16x8 :
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePextrw , index , xx , tmpDst ))
if signed {
m .insert (m .allocateInstr ().asMovsxRmR (extModeWL , newOperandReg (tmpDst ), tmpDst ))
} else {
m .insert (m .allocateInstr ().asMovzxRmR (extModeWL , newOperandReg (tmpDst ), tmpDst ))
}
case ssa .VecLaneI32x4 :
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePextrd , index , xx , tmpDst ))
case ssa .VecLaneI64x2 :
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePextrq , index , xx , tmpDst ))
case ssa .VecLaneF32x4 :
if index == 0 {
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovss , xx , tmpDst ))
} else {
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePshufd , index , xx , tmpDst ))
}
case ssa .VecLaneF64x2 :
if index == 0 {
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovsd , xx , tmpDst ))
} else {
m .copyTo (xx .reg (), tmpDst )
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePshufd , 0b00_00_11_10 , newOperandReg (tmpDst ), tmpDst ))
}
default :
panic (fmt .Sprintf ("invalid lane type: %s" , lane ))
}
m .copyTo (tmpDst , m .c .VRegOf (ret ))
}
var sqmulRoundSat = [16 ]byte {
0x00 , 0x80 , 0x00 , 0x80 , 0x00 , 0x80 , 0x00 , 0x80 ,
0x00 , 0x80 , 0x00 , 0x80 , 0x00 , 0x80 , 0x00 , 0x80 ,
}
func (m *machine ) lowerSqmulRoundSat (x , y , ret ssa .Value ) {
maskLabel := m .getOrAllocateConstLabel (&m .constSqmulRoundSatIndex , sqmulRoundSat [:])
tmp := m .c .AllocateVReg (ssa .TypeV128 )
loadMask := m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovdqu , newOperandMem (m .newAmodeRipRel (maskLabel )), tmp )
m .insert (loadMask )
xx , yy := m .getOperand_Reg (m .c .ValueDefinition (x )), m .getOperand_Mem_Reg (m .c .ValueDefinition (y ))
tmpX := m .copyToTmp (xx .reg ())
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePmulhrsw , yy , tmpX ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePcmpeqw , newOperandReg (tmpX ), tmp ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePxor , newOperandReg (tmp ), tmpX ))
m .copyTo (tmpX , m .c .VRegOf (ret ))
}
func (m *machine ) lowerVUshr (x , y , ret ssa .Value , lane ssa .VecLane ) {
switch lane {
case ssa .VecLaneI8x16 :
m .lowerVUshri8x16 (x , y , ret )
case ssa .VecLaneI16x8 , ssa .VecLaneI32x4 , ssa .VecLaneI64x2 :
m .lowerShr (x , y , ret , lane , false )
default :
panic (fmt .Sprintf ("invalid lane type: %s" , lane ))
}
}
var i8x16LogicalSHRMaskTable = [8 * 16 ]byte {
0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff ,
0x7f , 0x7f , 0x7f , 0x7f , 0x7f , 0x7f , 0x7f , 0x7f , 0x7f , 0x7f , 0x7f , 0x7f , 0x7f , 0x7f , 0x7f , 0x7f ,
0x3f , 0x3f , 0x3f , 0x3f , 0x3f , 0x3f , 0x3f , 0x3f , 0x3f , 0x3f , 0x3f , 0x3f , 0x3f , 0x3f , 0x3f , 0x3f ,
0x1f , 0x1f , 0x1f , 0x1f , 0x1f , 0x1f , 0x1f , 0x1f , 0x1f , 0x1f , 0x1f , 0x1f , 0x1f , 0x1f , 0x1f , 0x1f ,
0x0f , 0x0f , 0x0f , 0x0f , 0x0f , 0x0f , 0x0f , 0x0f , 0x0f , 0x0f , 0x0f , 0x0f , 0x0f , 0x0f , 0x0f , 0x0f ,
0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07 ,
0x03 , 0x03 , 0x03 , 0x03 , 0x03 , 0x03 , 0x03 , 0x03 , 0x03 , 0x03 , 0x03 , 0x03 , 0x03 , 0x03 , 0x03 , 0x03 ,
0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 ,
}
func (m *machine ) lowerVUshri8x16 (x , y , ret ssa .Value ) {
tmpGpReg := m .c .AllocateVReg (ssa .TypeI32 )
m .lowerIconst (tmpGpReg , 0x7 , false )
shiftAmt := m .getOperand_Mem_Imm32_Reg (m .c .ValueDefinition (y ))
m .insert (m .allocateInstr ().asAluRmiR (aluRmiROpcodeAnd , shiftAmt , tmpGpReg , false ))
_xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
xx := m .copyToTmp (_xx .reg ())
vecTmp := m .c .AllocateVReg (ssa .TypeV128 )
m .insert (m .allocateInstr ().asGprToXmm (sseOpcodeMovd , newOperandReg (tmpGpReg ), vecTmp , false ))
m .insert (m .allocateInstr ().asXmmRmiReg (sseOpcodePsrlw , newOperandReg (vecTmp ), xx ))
maskTableLabel := m .getOrAllocateConstLabel (&m .constI8x16LogicalSHRMaskTableIndex , i8x16LogicalSHRMaskTable [:])
base := m .c .AllocateVReg (ssa .TypeI64 )
lea := m .allocateInstr ().asLEA (newOperandLabel (maskTableLabel ), base )
m .insert (lea )
m .insert (m .allocateInstr ().asShiftR (shiftROpShiftLeft , newOperandImm32 (4 ), tmpGpReg , false ))
mem := m .newAmodeRegRegShift (0 , base , tmpGpReg , 0 )
loadMask := m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovdqu , newOperandMem (mem ), vecTmp )
m .insert (loadMask )
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePand , newOperandReg (vecTmp ), xx ))
m .copyTo (xx , m .c .VRegOf (ret ))
}
func (m *machine ) lowerVSshr (x , y , ret ssa .Value , lane ssa .VecLane ) {
switch lane {
case ssa .VecLaneI8x16 :
m .lowerVSshri8x16 (x , y , ret )
case ssa .VecLaneI16x8 , ssa .VecLaneI32x4 :
m .lowerShr (x , y , ret , lane , true )
case ssa .VecLaneI64x2 :
m .lowerVSshri64x2 (x , y , ret )
default :
panic (fmt .Sprintf ("invalid lane type: %s" , lane ))
}
}
func (m *machine ) lowerVSshri8x16 (x , y , ret ssa .Value ) {
shiftAmtReg := m .c .AllocateVReg (ssa .TypeI32 )
m .lowerIconst (shiftAmtReg , 0x7 , false )
shiftAmt := m .getOperand_Mem_Imm32_Reg (m .c .ValueDefinition (y ))
m .insert (m .allocateInstr ().asAluRmiR (aluRmiROpcodeAnd , shiftAmt , shiftAmtReg , false ))
_xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
xx := m .copyToTmp (_xx .reg ())
vecTmp := m .c .AllocateVReg (ssa .TypeV128 )
m .copyTo (xx , vecTmp )
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePunpcklbw , newOperandReg (xx ), xx ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePunpckhbw , newOperandReg (vecTmp ), vecTmp ))
vecTmp2 := m .c .AllocateVReg (ssa .TypeV128 )
m .insert (m .allocateInstr ().asAluRmiR (aluRmiROpcodeAdd , newOperandImm32 (8 ), shiftAmtReg , false ))
m .insert (m .allocateInstr ().asGprToXmm (sseOpcodeMovd , newOperandReg (shiftAmtReg ), vecTmp2 , false ))
m .insert (m .allocateInstr ().asXmmRmiReg (sseOpcodePsraw , newOperandReg (vecTmp2 ), xx ))
m .insert (m .allocateInstr ().asXmmRmiReg (sseOpcodePsraw , newOperandReg (vecTmp2 ), vecTmp ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePacksswb , newOperandReg (vecTmp ), xx ))
m .copyTo (xx , m .c .VRegOf (ret ))
}
func (m *machine ) lowerVSshri64x2 (x , y , ret ssa .Value ) {
shiftAmt := m .getOperand_Mem_Reg (m .c .ValueDefinition (y ))
m .insert (m .allocateInstr ().asMovzxRmR (extModeBQ , shiftAmt , rcxVReg ))
tmpGp := m .c .AllocateVReg (ssa .TypeI64 )
_xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
xxReg := m .copyToTmp (_xx .reg ())
m .insert (m .allocateInstr ().asDefineUninitializedReg (tmpGp ))
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePextrq , 0 , newOperandReg (xxReg ), tmpGp ))
m .insert (m .allocateInstr ().asShiftR (shiftROpShiftRightArithmetic , newOperandReg (rcxVReg ), tmpGp , true ))
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePinsrq , 0 , newOperandReg (tmpGp ), xxReg ))
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePextrq , 1 , newOperandReg (xxReg ), tmpGp ))
m .insert (m .allocateInstr ().asShiftR (shiftROpShiftRightArithmetic , newOperandReg (rcxVReg ), tmpGp , true ))
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePinsrq , 1 , newOperandReg (tmpGp ), xxReg ))
m .copyTo (xxReg , m .c .VRegOf (ret ))
}
func (m *machine ) lowerShr (x , y , ret ssa .Value , lane ssa .VecLane , signed bool ) {
var modulo uint64
var shiftOp sseOpcode
switch lane {
case ssa .VecLaneI16x8 :
modulo = 0xf
if signed {
shiftOp = sseOpcodePsraw
} else {
shiftOp = sseOpcodePsrlw
}
case ssa .VecLaneI32x4 :
modulo = 0x1f
if signed {
shiftOp = sseOpcodePsrad
} else {
shiftOp = sseOpcodePsrld
}
case ssa .VecLaneI64x2 :
modulo = 0x3f
if signed {
panic ("BUG" )
}
shiftOp = sseOpcodePsrlq
default :
panic (fmt .Sprintf ("invalid lane type: %s" , lane ))
}
_xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
xx := m .copyToTmp (_xx .reg ())
tmpGpReg := m .c .AllocateVReg (ssa .TypeI32 )
m .lowerIconst (tmpGpReg , modulo , false )
m .insert (m .allocateInstr ().asAluRmiR (aluRmiROpcodeAnd ,
m .getOperand_Mem_Imm32_Reg (m .c .ValueDefinition (y )), tmpGpReg , false ))
tmpVec := m .c .AllocateVReg (ssa .TypeV128 )
m .insert (m .allocateInstr ().asGprToXmm (sseOpcodeMovd , newOperandReg (tmpGpReg ), tmpVec , false ))
m .insert (m .allocateInstr ().asXmmRmiReg (shiftOp , newOperandReg (tmpVec ), xx ))
m .copyTo (xx , m .c .VRegOf (ret ))
}
func (m *machine ) lowerVIshl (x , y , ret ssa .Value , lane ssa .VecLane ) {
var modulo uint64
var shiftOp sseOpcode
var isI8x16 bool
switch lane {
case ssa .VecLaneI8x16 :
isI8x16 = true
modulo = 0x7
shiftOp = sseOpcodePsllw
case ssa .VecLaneI16x8 :
modulo = 0xf
shiftOp = sseOpcodePsllw
case ssa .VecLaneI32x4 :
modulo = 0x1f
shiftOp = sseOpcodePslld
case ssa .VecLaneI64x2 :
modulo = 0x3f
shiftOp = sseOpcodePsllq
default :
panic (fmt .Sprintf ("invalid lane type: %s" , lane ))
}
_xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
xx := m .copyToTmp (_xx .reg ())
tmpGpReg := m .c .AllocateVReg (ssa .TypeI32 )
m .lowerIconst (tmpGpReg , modulo , false )
m .insert (m .allocateInstr ().asAluRmiR (aluRmiROpcodeAnd ,
m .getOperand_Mem_Imm32_Reg (m .c .ValueDefinition (y )), tmpGpReg , false ))
tmpVec := m .c .AllocateVReg (ssa .TypeV128 )
m .insert (m .allocateInstr ().asGprToXmm (sseOpcodeMovd , newOperandReg (tmpGpReg ), tmpVec , false ))
m .insert (m .allocateInstr ().asXmmRmiReg (shiftOp , newOperandReg (tmpVec ), xx ))
if isI8x16 {
maskTableLabel := m .getOrAllocateConstLabel (&m .constI8x16SHLMaskTableIndex , i8x16SHLMaskTable [:])
base := m .c .AllocateVReg (ssa .TypeI64 )
lea := m .allocateInstr ().asLEA (newOperandLabel (maskTableLabel ), base )
m .insert (lea )
m .insert (m .allocateInstr ().asShiftR (shiftROpShiftLeft , newOperandImm32 (4 ), tmpGpReg , false ))
mem := m .newAmodeRegRegShift (0 , base , tmpGpReg , 0 )
loadMask := m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovdqu , newOperandMem (mem ), tmpVec )
m .insert (loadMask )
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePand , newOperandReg (tmpVec ), xx ))
}
m .copyTo (xx , m .c .VRegOf (ret ))
}
var i8x16SHLMaskTable = [8 * 16 ]byte {
0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff ,
0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe ,
0xfc , 0xfc , 0xfc , 0xfc , 0xfc , 0xfc , 0xfc , 0xfc , 0xfc , 0xfc , 0xfc , 0xfc , 0xfc , 0xfc , 0xfc , 0xfc ,
0xf8 , 0xf8 , 0xf8 , 0xf8 , 0xf8 , 0xf8 , 0xf8 , 0xf8 , 0xf8 , 0xf8 , 0xf8 , 0xf8 , 0xf8 , 0xf8 , 0xf8 , 0xf8 ,
0xf0 , 0xf0 , 0xf0 , 0xf0 , 0xf0 , 0xf0 , 0xf0 , 0xf0 , 0xf0 , 0xf0 , 0xf0 , 0xf0 , 0xf0 , 0xf0 , 0xf0 , 0xf0 ,
0xe0 , 0xe0 , 0xe0 , 0xe0 , 0xe0 , 0xe0 , 0xe0 , 0xe0 , 0xe0 , 0xe0 , 0xe0 , 0xe0 , 0xe0 , 0xe0 , 0xe0 , 0xe0 ,
0xc0 , 0xc0 , 0xc0 , 0xc0 , 0xc0 , 0xc0 , 0xc0 , 0xc0 , 0xc0 , 0xc0 , 0xc0 , 0xc0 , 0xc0 , 0xc0 , 0xc0 , 0xc0 ,
0x80 , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 ,
}
func (m *machine ) lowerVRound (x , ret ssa .Value , imm byte , _64 bool ) {
xx := m .getOperand_Mem_Reg (m .c .ValueDefinition (x ))
var round sseOpcode
if _64 {
round = sseOpcodeRoundpd
} else {
round = sseOpcodeRoundps
}
m .insert (m .allocateInstr ().asXmmUnaryRmRImm (round , imm , xx , m .c .VRegOf (ret )))
}
var (
allOnesI8x16 = [16 ]byte {0x1 , 0x1 , 0x1 , 0x1 , 0x1 , 0x1 , 0x1 , 0x1 , 0x1 , 0x1 , 0x1 , 0x1 , 0x1 , 0x1 , 0x1 , 0x1 }
allOnesI16x8 = [16 ]byte {0x1 , 0x0 , 0x1 , 0x0 , 0x1 , 0x0 , 0x1 , 0x0 , 0x1 , 0x0 , 0x1 , 0x0 , 0x1 , 0x0 , 0x1 , 0x0 }
extAddPairwiseI16x8uMask1 = [16 ]byte {0x00 , 0x80 , 0x00 , 0x80 , 0x00 , 0x80 , 0x00 , 0x80 , 0x00 , 0x80 , 0x00 , 0x80 , 0x00 , 0x80 , 0x00 , 0x80 }
extAddPairwiseI16x8uMask2 = [16 ]byte {0x00 , 0x00 , 0x01 , 0x00 , 0x00 , 0x00 , 0x01 , 0x00 , 0x00 , 0x00 , 0x01 , 0x00 , 0x00 , 0x00 , 0x01 , 0x00 }
)
func (m *machine ) lowerExtIaddPairwise (x , ret ssa .Value , srcLane ssa .VecLane , signed bool ) {
_xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
xx := m .copyToTmp (_xx .reg ())
switch srcLane {
case ssa .VecLaneI8x16 :
allOneReg := m .c .AllocateVReg (ssa .TypeV128 )
mask := m .getOrAllocateConstLabel (&m .constAllOnesI8x16Index , allOnesI8x16 [:])
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovdqu , newOperandMem (m .newAmodeRipRel (mask )), allOneReg ))
var resultReg regalloc .VReg
if signed {
resultReg = allOneReg
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePmaddubsw , newOperandReg (xx ), resultReg ))
} else {
resultReg = xx
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePmaddubsw , newOperandReg (allOneReg ), resultReg ))
}
m .copyTo (resultReg , m .c .VRegOf (ret ))
case ssa .VecLaneI16x8 :
if signed {
allOnesReg := m .c .AllocateVReg (ssa .TypeV128 )
mask := m .getOrAllocateConstLabel (&m .constAllOnesI16x8Index , allOnesI16x8 [:])
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovdqu , newOperandMem (m .newAmodeRipRel (mask )), allOnesReg ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePmaddwd , newOperandReg (allOnesReg ), xx ))
m .copyTo (xx , m .c .VRegOf (ret ))
} else {
maskReg := m .c .AllocateVReg (ssa .TypeV128 )
mask := m .getOrAllocateConstLabel (&m .constExtAddPairwiseI16x8uMask1Index , extAddPairwiseI16x8uMask1 [:])
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovdqu , newOperandMem (m .newAmodeRipRel (mask )), maskReg ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePxor , newOperandReg (maskReg ), xx ))
mask = m .getOrAllocateConstLabel (&m .constAllOnesI16x8Index , allOnesI16x8 [:])
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovdqu , newOperandMem (m .newAmodeRipRel (mask )), maskReg ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePmaddwd , newOperandReg (maskReg ), xx ))
mask = m .getOrAllocateConstLabel (&m .constExtAddPairwiseI16x8uMask2Index , extAddPairwiseI16x8uMask2 [:])
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovdqu , newOperandMem (m .newAmodeRipRel (mask )), maskReg ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePaddd , newOperandReg (maskReg ), xx ))
m .copyTo (xx , m .c .VRegOf (ret ))
}
default :
panic (fmt .Sprintf ("invalid lane type: %s" , srcLane ))
}
}
func (m *machine ) lowerWidenLow (x , ret ssa .Value , lane ssa .VecLane , signed bool ) {
var sseOp sseOpcode
switch lane {
case ssa .VecLaneI8x16 :
if signed {
sseOp = sseOpcodePmovsxbw
} else {
sseOp = sseOpcodePmovzxbw
}
case ssa .VecLaneI16x8 :
if signed {
sseOp = sseOpcodePmovsxwd
} else {
sseOp = sseOpcodePmovzxwd
}
case ssa .VecLaneI32x4 :
if signed {
sseOp = sseOpcodePmovsxdq
} else {
sseOp = sseOpcodePmovzxdq
}
default :
panic (fmt .Sprintf ("invalid lane type: %s" , lane ))
}
xx := m .getOperand_Mem_Reg (m .c .ValueDefinition (x ))
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOp , xx , m .c .VRegOf (ret )))
}
func (m *machine ) lowerWidenHigh (x , ret ssa .Value , lane ssa .VecLane , signed bool ) {
tmp := m .c .AllocateVReg (ssa .TypeV128 )
xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
m .copyTo (xx .reg (), tmp )
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePalignr , 8 , newOperandReg (tmp ), tmp ))
var sseOp sseOpcode
switch lane {
case ssa .VecLaneI8x16 :
if signed {
sseOp = sseOpcodePmovsxbw
} else {
sseOp = sseOpcodePmovzxbw
}
case ssa .VecLaneI16x8 :
if signed {
sseOp = sseOpcodePmovsxwd
} else {
sseOp = sseOpcodePmovzxwd
}
case ssa .VecLaneI32x4 :
if signed {
sseOp = sseOpcodePmovsxdq
} else {
sseOp = sseOpcodePmovzxdq
}
default :
panic (fmt .Sprintf ("invalid lane type: %s" , lane ))
}
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOp , newOperandReg (tmp ), m .c .VRegOf (ret )))
}
func (m *machine ) lowerLoadSplat (ptr ssa .Value , offset uint32 , ret ssa .Value , lane ssa .VecLane ) {
tmpDst , tmpGp := m .c .AllocateVReg (ssa .TypeV128 ), m .c .AllocateVReg (ssa .TypeI64 )
am := newOperandMem (m .lowerToAddressMode (ptr , offset ))
m .insert (m .allocateInstr ().asDefineUninitializedReg (tmpDst ))
switch lane {
case ssa .VecLaneI8x16 :
m .insert (m .allocateInstr ().asMovzxRmR (extModeBQ , am , tmpGp ))
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePinsrb , 0 , newOperandReg (tmpGp ), tmpDst ))
tmpZeroVec := m .c .AllocateVReg (ssa .TypeV128 )
m .insert (m .allocateInstr ().asZeros (tmpZeroVec ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePshufb , newOperandReg (tmpZeroVec ), tmpDst ))
case ssa .VecLaneI16x8 :
m .insert (m .allocateInstr ().asMovzxRmR (extModeWQ , am , tmpGp ))
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePinsrw , 0 , newOperandReg (tmpGp ), tmpDst ))
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePinsrw , 1 , newOperandReg (tmpGp ), tmpDst ))
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePshufd , 0 , newOperandReg (tmpDst ), tmpDst ))
case ssa .VecLaneI32x4 :
m .insert (m .allocateInstr ().asMovzxRmR (extModeLQ , am , tmpGp ))
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePinsrd , 0 , newOperandReg (tmpGp ), tmpDst ))
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePshufd , 0 , newOperandReg (tmpDst ), tmpDst ))
case ssa .VecLaneI64x2 :
m .insert (m .allocateInstr ().asMov64MR (am , tmpGp ))
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePinsrq , 0 , newOperandReg (tmpGp ), tmpDst ))
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodePinsrq , 1 , newOperandReg (tmpGp ), tmpDst ))
default :
panic (fmt .Sprintf ("invalid lane type: %s" , lane ))
}
m .copyTo (tmpDst , m .c .VRegOf (ret ))
}
var f64x2CvtFromIMask = [16 ]byte {
0x00 , 0x00 , 0x30 , 0x43 , 0x00 , 0x00 , 0x30 , 0x43 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
}
func (m *machine ) lowerVFcvtFromInt (x , ret ssa .Value , lane ssa .VecLane , signed bool ) {
switch lane {
case ssa .VecLaneF32x4 :
if signed {
xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeCvtdq2ps , xx , m .c .VRegOf (ret )))
} else {
xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
tmp := m .copyToTmp (xx .reg ())
tmp2 := m .copyToTmp (xx .reg ())
m .insert (m .allocateInstr ().asXmmRmiReg (sseOpcodePslld , newOperandImm32 (0xa ), tmp ))
m .insert (m .allocateInstr ().asXmmRmiReg (sseOpcodePsrld , newOperandImm32 (0xa ), tmp ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePsubd , newOperandReg (tmp ), tmp2 ))
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeCvtdq2ps , newOperandReg (tmp ), tmp ))
m .insert (m .allocateInstr ().asXmmRmiReg (sseOpcodePsrld , newOperandImm32 (1 ), tmp2 ))
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeCvtdq2ps , newOperandReg (tmp2 ), tmp2 ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodeAddps , newOperandReg (tmp2 ), tmp2 ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodeAddps , newOperandReg (tmp ), tmp2 ))
m .copyTo (tmp2 , m .c .VRegOf (ret ))
}
case ssa .VecLaneF64x2 :
if signed {
xx := m .getOperand_Mem_Reg (m .c .ValueDefinition (x ))
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeCvtdq2pd , xx , m .c .VRegOf (ret )))
} else {
maskReg := m .c .AllocateVReg (ssa .TypeV128 )
maskLabel := m .getOrAllocateConstLabel (&m .constF64x2CvtFromIMaskIndex , f64x2CvtFromIMask [:])
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovdqu , newOperandMem (m .newAmodeRipRel (maskLabel )), maskReg ))
_xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
xx := m .copyToTmp (_xx .reg ())
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodeUnpcklps , newOperandReg (maskReg ), xx ))
maskLabel = m .getOrAllocateConstLabel (&m .constTwop52Index , twop52 [:])
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovdqu , newOperandMem (m .newAmodeRipRel (maskLabel )), maskReg ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodeSubpd , newOperandReg (maskReg ), xx ))
m .copyTo (xx , m .c .VRegOf (ret ))
}
default :
panic (fmt .Sprintf ("invalid lane type: %s" , lane ))
}
}
var (
i32sMaxOnF64x2 = [16 ]byte {
0x00 , 0x00 , 0xc0 , 0xff , 0xff , 0xff , 0xdf , 0x41 ,
0x00 , 0x00 , 0xc0 , 0xff , 0xff , 0xff , 0xdf , 0x41 ,
}
i32uMaxOnF64x2 = [16 ]byte {
0x00 , 0x00 , 0xe0 , 0xff , 0xff , 0xff , 0xef , 0x41 ,
0x00 , 0x00 , 0xe0 , 0xff , 0xff , 0xff , 0xef , 0x41 ,
}
twop52 = [16 ]byte {
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x30 , 0x43 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x30 , 0x43 ,
}
)
func (m *machine ) lowerVFcvtToIntSat (x , ret ssa .Value , lane ssa .VecLane , signed bool ) {
_xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
xx := m .copyToTmp (_xx .reg ())
switch lane {
case ssa .VecLaneF32x4 :
if signed {
tmp := m .copyToTmp (xx )
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodeCmpps , uint8 (cmpPredEQ_OQ ), newOperandReg (tmp ), tmp ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodeAndps , newOperandReg (tmp ), xx ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodeXorps , newOperandReg (xx ), tmp ))
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeCvttps2dq , newOperandReg (xx ), xx ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodeAndps , newOperandReg (xx ), tmp ))
m .insert (m .allocateInstr ().asXmmRmiReg (sseOpcodePsrad , newOperandImm32 (0x1f ), tmp ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePxor , newOperandReg (tmp ), xx ))
} else {
tmp := m .c .AllocateVReg (ssa .TypeV128 )
m .insert (m .allocateInstr ().asZeros (tmp ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodeMaxps , newOperandReg (tmp ), xx ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePcmpeqd , newOperandReg (tmp ), tmp ))
m .insert (m .allocateInstr ().asXmmRmiReg (sseOpcodePsrld , newOperandImm32 (0x1 ), tmp ))
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeCvtdq2ps , newOperandReg (tmp ), tmp ))
tmp2 := m .copyToTmp (xx )
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeCvttps2dq , newOperandReg (xx ), xx ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodeSubps , newOperandReg (tmp ), tmp2 ))
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodeCmpps , uint8 (cmpPredLE_OS ), newOperandReg (tmp2 ), tmp ))
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeCvttps2dq , newOperandReg (tmp2 ), tmp2 ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePxor , newOperandReg (tmp ), tmp2 ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePxor , newOperandReg (tmp ), tmp ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePmaxsd , newOperandReg (tmp ), tmp2 ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePaddd , newOperandReg (tmp2 ), xx ))
}
case ssa .VecLaneF64x2 :
tmp2 := m .c .AllocateVReg (ssa .TypeV128 )
if signed {
tmp := m .copyToTmp (xx )
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodeCmppd , uint8 (cmpPredEQ_OQ ), newOperandReg (tmp ), tmp ))
maskLabel := m .getOrAllocateConstLabel (&m .constI32sMaxOnF64x2Index , i32sMaxOnF64x2 [:])
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovdqu , newOperandMem (m .newAmodeRipRel (maskLabel )), tmp2 ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodeAndps , newOperandReg (tmp2 ), tmp ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodeMinpd , newOperandReg (tmp ), xx ))
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeCvttpd2dq , newOperandReg (xx ), xx ))
} else {
tmp := m .c .AllocateVReg (ssa .TypeV128 )
m .insert (m .allocateInstr ().asZeros (tmp ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodeMaxpd , newOperandReg (tmp ), xx ))
maskIndex := m .getOrAllocateConstLabel (&m .constI32uMaxOnF64x2Index , i32uMaxOnF64x2 [:])
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovdqu , newOperandMem (m .newAmodeRipRel (maskIndex )), tmp2 ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodeMinpd , newOperandReg (tmp2 ), xx ))
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodeRoundpd , 0x3 , newOperandReg (xx ), xx ))
maskIndex = m .getOrAllocateConstLabel (&m .constTwop52Index , twop52 [:])
m .insert (m .allocateInstr ().asXmmUnaryRmR (sseOpcodeMovdqu , newOperandMem (m .newAmodeRipRel (maskIndex )), tmp2 ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodeAddpd , newOperandReg (tmp2 ), xx ))
m .insert (m .allocateInstr ().asXmmRmRImm (sseOpcodeShufps , 0b00_00_10_00 , newOperandReg (tmp ), xx ))
}
default :
panic (fmt .Sprintf ("invalid lane type: %s" , lane ))
}
m .copyTo (xx , m .c .VRegOf (ret ))
}
func (m *machine ) lowerNarrow (x , y , ret ssa .Value , lane ssa .VecLane , signed bool ) {
_xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
xx := m .copyToTmp (_xx .reg ())
yy := m .getOperand_Mem_Reg (m .c .ValueDefinition (y ))
var sseOp sseOpcode
switch lane {
case ssa .VecLaneI16x8 :
if signed {
sseOp = sseOpcodePacksswb
} else {
sseOp = sseOpcodePackuswb
}
case ssa .VecLaneI32x4 :
if signed {
sseOp = sseOpcodePackssdw
} else {
sseOp = sseOpcodePackusdw
}
default :
panic (fmt .Sprintf ("invalid lane type: %s" , lane ))
}
m .insert (m .allocateInstr ().asXmmRmR (sseOp , yy , xx ))
m .copyTo (xx , m .c .VRegOf (ret ))
}
func (m *machine ) lowerWideningPairwiseDotProductS (x , y , ret ssa .Value ) {
_xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
xx := m .copyToTmp (_xx .reg ())
yy := m .getOperand_Mem_Reg (m .c .ValueDefinition (y ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePmaddwd , yy , xx ))
m .copyTo (xx , m .c .VRegOf (ret ))
}
func (m *machine ) lowerVIabs (instr *ssa .Instruction ) {
x , lane := instr .ArgWithLane ()
rd := m .c .VRegOf (instr .Return ())
if lane == ssa .VecLaneI64x2 {
_xx := m .getOperand_Reg (m .c .ValueDefinition (x ))
blendReg := xmm0VReg
m .insert (m .allocateInstr ().asDefineUninitializedReg (blendReg ))
tmp := m .copyToTmp (_xx .reg ())
xx := m .copyToTmp (_xx .reg ())
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePxor , newOperandReg (blendReg ), blendReg ))
m .insert (m .allocateInstr ().asXmmRmR (sseOpcodePsubq , newOperandReg (xx ), blendReg ))
m .copyTo (blendReg , xx )
m .insert (m .allocateInstr ().asBlendvpd (newOperandReg (tmp ), xx ))
m .copyTo (xx , rd )
} else {
var vecOp sseOpcode
switch lane {
case ssa .VecLaneI8x16 :
vecOp = sseOpcodePabsb
case ssa .VecLaneI16x8 :
vecOp = sseOpcodePabsw
case ssa .VecLaneI32x4 :
vecOp = sseOpcodePabsd
}
rn := m .getOperand_Reg (m .c .ValueDefinition (x ))
i := m .allocateInstr ()
i .asXmmUnaryRmR (vecOp , rn , rd )
m .insert (i )
}
}
func (m *machine ) lowerVIpopcnt (instr *ssa .Instruction ) {
x := instr .Arg ()
rn := m .getOperand_Reg (m .c .ValueDefinition (x ))
rd := m .c .VRegOf (instr .Return ())
tmp1 := m .c .AllocateVReg (ssa .TypeV128 )
m .lowerVconst (tmp1 , 0x0f0f0f0f0f0f0f0f , 0x0f0f0f0f0f0f0f0f )
tmp2 := m .copyToTmp (rn .reg ())
pand := m .allocateInstr ()
pand .asXmmRmR (sseOpcodePand , newOperandReg (tmp1 ), tmp2 )
m .insert (pand )
tmp3 := m .copyToTmp (rn .reg ())
psrlw := m .allocateInstr ()
psrlw .asXmmRmiReg (sseOpcodePsrlw , newOperandImm32 (4 ), tmp3 )
m .insert (psrlw )
pand2 := m .allocateInstr ()
pand2 .asXmmRmR (sseOpcodePand , newOperandReg (tmp1 ), tmp3 )
m .insert (pand2 )
tmp4 := m .c .AllocateVReg (ssa .TypeV128 )
m .lowerVconst (tmp4 , 0x03_02_02_01_02_01_01_00 , 0x04_03_03_02_03_02_02_01 )
tmp5 := m .copyToTmp (tmp4 )
pshufb := m .allocateInstr ()
pshufb .asXmmRmR (sseOpcodePshufb , newOperandReg (tmp2 ), tmp4 )
m .insert (pshufb )
pshufb2 := m .allocateInstr ()
pshufb2 .asXmmRmR (sseOpcodePshufb , newOperandReg (tmp3 ), tmp5 )
m .insert (pshufb2 )
paddb := m .allocateInstr ()
paddb .asXmmRmR (sseOpcodePaddb , newOperandReg (tmp4 ), tmp5 )
m .insert (paddb )
m .copyTo (tmp5 , rd )
}
func (m *machine ) lowerVImul (instr *ssa .Instruction ) {
x , y , lane := instr .Arg2WithLane ()
rd := m .c .VRegOf (instr .Return ())
if lane == ssa .VecLaneI64x2 {
rn := m .getOperand_Reg (m .c .ValueDefinition (x ))
rm := m .getOperand_Reg (m .c .ValueDefinition (y ))
tmp1 := m .copyToTmp (rn .reg ())
shift := m .allocateInstr ()
shift .asXmmRmiReg (sseOpcodePsrlq , newOperandImm32 (32 ), tmp1 )
m .insert (shift )
mul := m .allocateInstr ()
mul .asXmmRmR (sseOpcodePmuludq , rm , tmp1 )
m .insert (mul )
tmp2 := m .copyToTmp (rm .reg ())
shift2 := m .allocateInstr ()
shift2 .asXmmRmiReg (sseOpcodePsrlq , newOperandImm32 (32 ), tmp2 )
m .insert (shift2 )
mul2 := m .allocateInstr ()
mul2 .asXmmRmR (sseOpcodePmuludq , rn , tmp2 )
m .insert (mul2 )
add := m .allocateInstr ()
add .asXmmRmR (sseOpcodePaddq , newOperandReg (tmp2 ), tmp1 )
m .insert (add )
shift3 := m .allocateInstr ()
shift3 .asXmmRmiReg (sseOpcodePsllq , newOperandImm32 (32 ), tmp1 )
m .insert (shift3 )
tmp3 := m .copyToTmp (rm .reg ())
mul3 := m .allocateInstr ()
mul3 .asXmmRmR (sseOpcodePmuludq , rn , tmp3 )
m .insert (mul3 )
add2 := m .allocateInstr ()
add2 .asXmmRmR (sseOpcodePaddq , newOperandReg (tmp3 ), tmp1 )
m .insert (add2 )
m .copyTo (tmp1 , rd )
} else {
var vecOp sseOpcode
switch lane {
case ssa .VecLaneI16x8 :
vecOp = sseOpcodePmullw
case ssa .VecLaneI32x4 :
vecOp = sseOpcodePmulld
default :
panic ("unsupported: " + lane .String ())
}
m .lowerVbBinOp (vecOp , x , y , instr .Return ())
}
}
The pages are generated with Golds v0.8.2 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .