Commit cbed65f4 authored by LiuYu's avatar LiuYu Committed by Commit Bot

[mips64] Move extended multiply into macro-assembler

Change-Id: I1024b336ac3d24c69f5a47a919b69a9ef363ec66
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2620002
Auto-Submit: Liu yu <liuyu@loongson.cn>
Reviewed-by: 's avatarZhao Jiazhong <zhaojiazhong-hf@loongson.cn>
Commit-Queue: Zhao Jiazhong <zhaojiazhong-hf@loongson.cn>
Cr-Commit-Position: refs/heads/master@{#72027}
parent 4b28dbfc
......@@ -260,8 +260,21 @@ class MSARegisters {
static const RegisterAlias aliases_[];
};
// MSA sizes.
enum MSASize { MSA_B = 0x0, MSA_H = 0x1, MSA_W = 0x2, MSA_D = 0x3 };
// MSA data type, top bit set for unsigned data types.
enum MSADataType {
MSAS8 = 0,
MSAS16 = 1,
MSAS32 = 2,
MSAS64 = 3,
MSAU8 = 4,
MSAU16 = 5,
MSAU32 = 6,
MSAU64 = 7
};
// -----------------------------------------------------------------------------
// Instructions encoding constants.
......
......@@ -2689,6 +2689,43 @@ void TurboAssembler::StoreLane(MSASize sz, MSARegister src, uint8_t laneidx,
}
}
#define EXT_MUL_BINOP(type, ilv_instr, dotp_instr) \
case type: \
xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero); \
ilv_instr(kSimd128ScratchReg, kSimd128RegZero, src1); \
ilv_instr(kSimd128RegZero, kSimd128RegZero, src2); \
dotp_instr(dst, kSimd128ScratchReg, kSimd128RegZero); \
break;
void TurboAssembler::ExtMulLow(MSADataType type, MSARegister dst,
MSARegister src1, MSARegister src2) {
switch (type) {
EXT_MUL_BINOP(MSAS8, ilvr_b, dotp_s_h)
EXT_MUL_BINOP(MSAS16, ilvr_h, dotp_s_w)
EXT_MUL_BINOP(MSAS32, ilvr_w, dotp_s_d)
EXT_MUL_BINOP(MSAU8, ilvr_b, dotp_u_h)
EXT_MUL_BINOP(MSAU16, ilvr_h, dotp_u_w)
EXT_MUL_BINOP(MSAU32, ilvr_w, dotp_u_d)
default:
UNREACHABLE();
}
}
void TurboAssembler::ExtMulHigh(MSADataType type, MSARegister dst,
MSARegister src1, MSARegister src2) {
switch (type) {
EXT_MUL_BINOP(MSAS8, ilvl_b, dotp_s_h)
EXT_MUL_BINOP(MSAS16, ilvl_h, dotp_s_w)
EXT_MUL_BINOP(MSAS32, ilvl_w, dotp_s_d)
EXT_MUL_BINOP(MSAU8, ilvl_b, dotp_u_h)
EXT_MUL_BINOP(MSAU16, ilvl_h, dotp_u_w)
EXT_MUL_BINOP(MSAU32, ilvl_w, dotp_u_d)
default:
UNREACHABLE();
}
}
#undef EXT_MUL_BINOP
void TurboAssembler::MSARoundW(MSARegister dst, MSARegister src,
FPURoundingMode mode) {
BlockTrampolinePoolScope block_trampoline_pool(this);
......
......@@ -795,6 +795,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void LoadLane(MSASize sz, MSARegister dst, uint8_t laneidx, MemOperand src);
void StoreLane(MSASize sz, MSARegister src, uint8_t laneidx, MemOperand dst);
void ExtMulLow(MSADataType type, MSARegister dst, MSARegister src1,
MSARegister src2);
void ExtMulHigh(MSADataType type, MSARegister dst, MSARegister src1,
MSARegister src2);
void MSARoundW(MSARegister dst, MSARegister src, FPURoundingMode mode);
void MSARoundD(MSARegister dst, MSARegister src, FPURoundingMode mode);
......
......@@ -496,15 +496,6 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
i.InputSimd128Register(1)); \
} while (0)
#define ASSEMBLE_SIMD_EXTENDED_MULTIPLY(op0, op1) \
do { \
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); \
__ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero); \
__ op0(kSimd128ScratchReg, kSimd128RegZero, i.InputSimd128Register(0)); \
__ op0(kSimd128RegZero, kSimd128RegZero, i.InputSimd128Register(1)); \
__ op1(i.OutputSimd128Register(), kSimd128ScratchReg, kSimd128RegZero); \
} while (0)
void CodeGenerator::AssembleDeconstructFrame() {
__ mov(sp, fp);
__ Pop(ra, fp);
......@@ -2450,42 +2441,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kMips64I64x2ExtMulLowI32x4S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_w, dotp_s_d);
break;
case kMips64I64x2ExtMulHighI32x4S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_w, dotp_s_d);
break;
case kMips64I64x2ExtMulLowI32x4U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_w, dotp_u_d);
break;
case kMips64I64x2ExtMulHighI32x4U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_w, dotp_u_d);
break;
case kMips64I32x4ExtMulLowI16x8S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_h, dotp_s_w);
break;
case kMips64I32x4ExtMulHighI16x8S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_h, dotp_s_w);
break;
case kMips64I32x4ExtMulLowI16x8U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_h, dotp_u_w);
break;
case kMips64I32x4ExtMulHighI16x8U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_h, dotp_u_w);
break;
case kMips64I16x8ExtMulLowI8x16S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_b, dotp_s_h);
break;
case kMips64I16x8ExtMulHighI8x16S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_b, dotp_s_h);
break;
case kMips64I16x8ExtMulLowI8x16U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_b, dotp_u_h);
case kMips64ExtMulLow: {
auto dt = static_cast<MSADataType>(MiscField::decode(instr->opcode()));
__ ExtMulLow(dt, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
case kMips64I16x8ExtMulHighI8x16U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_b, dotp_u_h);
}
case kMips64ExtMulHigh: {
auto dt = static_cast<MSADataType>(MiscField::decode(instr->opcode()));
__ ExtMulHigh(dt, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kMips64F32x4Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ FmoveLow(kScratchReg, i.InputSingleRegister(0));
......@@ -4738,7 +4705,6 @@ void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
#undef ASSEMBLE_IEEE754_BINOP
#undef ASSEMBLE_IEEE754_UNOP
#undef ASSEMBLE_F64X2_ARITHMETIC_BINOP
#undef ASSEMBLE_SIMD_EXTENDED_MULTIPLY
#undef TRACE_MSG
#undef TRACE_UNIMPL
......
......@@ -222,10 +222,8 @@ namespace compiler {
V(Mips64I64x2ShrU) \
V(Mips64I64x2BitMask) \
V(Mips64I64x2Eq) \
V(Mips64I64x2ExtMulLowI32x4S) \
V(Mips64I64x2ExtMulHighI32x4S) \
V(Mips64I64x2ExtMulLowI32x4U) \
V(Mips64I64x2ExtMulHighI32x4U) \
V(Mips64ExtMulLow) \
V(Mips64ExtMulHigh) \
V(Mips64F32x4Abs) \
V(Mips64F32x4Neg) \
V(Mips64F32x4Sqrt) \
......@@ -258,10 +256,6 @@ namespace compiler {
V(Mips64I32x4Abs) \
V(Mips64I32x4BitMask) \
V(Mips64I32x4DotI16x8S) \
V(Mips64I32x4ExtMulLowI16x8S) \
V(Mips64I32x4ExtMulHighI16x8S) \
V(Mips64I32x4ExtMulLowI16x8U) \
V(Mips64I32x4ExtMulHighI16x8U) \
V(Mips64I16x8Splat) \
V(Mips64I16x8ExtractLaneU) \
V(Mips64I16x8ExtractLaneS) \
......@@ -291,10 +285,6 @@ namespace compiler {
V(Mips64I16x8RoundingAverageU) \
V(Mips64I16x8Abs) \
V(Mips64I16x8BitMask) \
V(Mips64I16x8ExtMulLowI8x16S) \
V(Mips64I16x8ExtMulHighI8x16S) \
V(Mips64I16x8ExtMulLowI8x16U) \
V(Mips64I16x8ExtMulHighI8x16U) \
V(Mips64I8x16Splat) \
V(Mips64I8x16ExtractLaneU) \
V(Mips64I8x16ExtractLaneS) \
......
......@@ -100,10 +100,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64I64x2ShrU:
case kMips64I64x2BitMask:
case kMips64I64x2Eq:
case kMips64I64x2ExtMulLowI32x4S:
case kMips64I64x2ExtMulHighI32x4S:
case kMips64I64x2ExtMulLowI32x4U:
case kMips64I64x2ExtMulHighI32x4U:
case kMips64ExtMulLow:
case kMips64ExtMulHigh:
case kMips64F32x4Abs:
case kMips64F32x4Add:
case kMips64F32x4AddHoriz:
......@@ -190,10 +188,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64I16x8RoundingAverageU:
case kMips64I16x8Abs:
case kMips64I16x8BitMask:
case kMips64I16x8ExtMulLowI8x16S:
case kMips64I16x8ExtMulHighI8x16S:
case kMips64I16x8ExtMulLowI8x16U:
case kMips64I16x8ExtMulHighI8x16U:
case kMips64I32x4Add:
case kMips64I32x4AddHoriz:
case kMips64I32x4Eq:
......@@ -224,10 +218,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64I32x4Abs:
case kMips64I32x4BitMask:
case kMips64I32x4DotI16x8S:
case kMips64I32x4ExtMulLowI16x8S:
case kMips64I32x4ExtMulHighI16x8S:
case kMips64I32x4ExtMulLowI16x8U:
case kMips64I32x4ExtMulHighI16x8U:
case kMips64I8x16Add:
case kMips64I8x16AddSatS:
case kMips64I8x16AddSatU:
......
......@@ -2951,10 +2951,6 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I64x2Add, kMips64I64x2Add) \
V(I64x2Sub, kMips64I64x2Sub) \
V(I64x2Mul, kMips64I64x2Mul) \
V(I64x2ExtMulLowI32x4S, kMips64I64x2ExtMulLowI32x4S) \
V(I64x2ExtMulHighI32x4S, kMips64I64x2ExtMulHighI32x4S) \
V(I64x2ExtMulLowI32x4U, kMips64I64x2ExtMulLowI32x4U) \
V(I64x2ExtMulHighI32x4U, kMips64I64x2ExtMulHighI32x4U) \
V(F32x4Add, kMips64F32x4Add) \
V(F32x4AddHoriz, kMips64F32x4AddHoriz) \
V(F32x4Sub, kMips64F32x4Sub) \
......@@ -2981,10 +2977,6 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I32x4GtU, kMips64I32x4GtU) \
V(I32x4GeU, kMips64I32x4GeU) \
V(I32x4DotI16x8S, kMips64I32x4DotI16x8S) \
V(I32x4ExtMulLowI16x8S, kMips64I32x4ExtMulLowI16x8S) \
V(I32x4ExtMulHighI16x8S, kMips64I32x4ExtMulHighI16x8S) \
V(I32x4ExtMulLowI16x8U, kMips64I32x4ExtMulLowI16x8U) \
V(I32x4ExtMulHighI16x8U, kMips64I32x4ExtMulHighI16x8U) \
V(I16x8Add, kMips64I16x8Add) \
V(I16x8AddSatS, kMips64I16x8AddSatS) \
V(I16x8AddSatU, kMips64I16x8AddSatU) \
......@@ -3006,10 +2998,6 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I16x8RoundingAverageU, kMips64I16x8RoundingAverageU) \
V(I16x8SConvertI32x4, kMips64I16x8SConvertI32x4) \
V(I16x8UConvertI32x4, kMips64I16x8UConvertI32x4) \
V(I16x8ExtMulLowI8x16S, kMips64I16x8ExtMulLowI8x16S) \
V(I16x8ExtMulHighI8x16S, kMips64I16x8ExtMulHighI8x16S) \
V(I16x8ExtMulLowI8x16U, kMips64I16x8ExtMulLowI8x16U) \
V(I16x8ExtMulHighI8x16U, kMips64I16x8ExtMulHighI8x16U) \
V(I8x16Add, kMips64I8x16Add) \
V(I8x16AddSatS, kMips64I8x16AddSatS) \
V(I8x16AddSatU, kMips64I8x16AddSatU) \
......@@ -3278,6 +3266,27 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) {
VisitUniqueRRR(this, kMips64F64x2Pmax, node);
}
#define VISIT_EXT_MUL(OPCODE1, OPCODE2, TYPE) \
void InstructionSelector::Visit##OPCODE1##ExtMulLow##OPCODE2(Node* node) { \
Mips64OperandGenerator g(this); \
Emit(kMips64ExtMulLow | MiscField::encode(TYPE), g.DefineAsRegister(node), \
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); \
} \
void InstructionSelector::Visit##OPCODE1##ExtMulHigh##OPCODE2(Node* node) { \
Mips64OperandGenerator g(this); \
Emit(kMips64ExtMulHigh | MiscField::encode(TYPE), \
g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)), \
g.UseRegister(node->InputAt(1))); \
}
VISIT_EXT_MUL(I64x2, I32x4S, MSAS32)
VISIT_EXT_MUL(I64x2, I32x4U, MSAU32)
VISIT_EXT_MUL(I32x4, I16x8S, MSAS16)
VISIT_EXT_MUL(I32x4, I16x8U, MSAU16)
VISIT_EXT_MUL(I16x8, I8x16S, MSAS8)
VISIT_EXT_MUL(I16x8, I8x16U, MSAU8)
#undef VISIT_EXT_MUL
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
......
......@@ -1696,29 +1696,26 @@ void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
fill_d(dst.fp().toW(), kScratchReg);
}
#define SIMD_BINOP(name, ilv_instr, dotp_instr) \
void LiftoffAssembler::emit_##name( \
#define SIMD_BINOP(name1, name2, type) \
void LiftoffAssembler::emit_##name1##_extmul_low_##name2( \
LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2) { \
xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero); \
ilv_instr(kSimd128ScratchReg, kSimd128RegZero, src1.fp().toW()); \
ilv_instr(kSimd128RegZero, kSimd128RegZero, src2.fp().toW()); \
dotp_instr(dst.fp().toW(), kSimd128ScratchReg, kSimd128RegZero); \
}
SIMD_BINOP(i16x8_extmul_low_i8x16_s, ilvr_b, dotp_s_h)
SIMD_BINOP(i16x8_extmul_high_i8x16_s, ilvl_b, dotp_s_h)
SIMD_BINOP(i16x8_extmul_low_i8x16_u, ilvr_b, dotp_u_h)
SIMD_BINOP(i16x8_extmul_high_i8x16_u, ilvl_b, dotp_u_h)
SIMD_BINOP(i32x4_extmul_low_i16x8_s, ilvr_h, dotp_s_w)
SIMD_BINOP(i32x4_extmul_high_i16x8_s, ilvl_h, dotp_s_w)
SIMD_BINOP(i32x4_extmul_low_i16x8_u, ilvr_h, dotp_u_w)
SIMD_BINOP(i32x4_extmul_high_i16x8_u, ilvl_h, dotp_u_w)
SIMD_BINOP(i64x2_extmul_low_i32x4_s, ilvr_w, dotp_s_d)
SIMD_BINOP(i64x2_extmul_high_i32x4_s, ilvl_w, dotp_s_d)
SIMD_BINOP(i64x2_extmul_low_i32x4_u, ilvr_w, dotp_u_d)
SIMD_BINOP(i64x2_extmul_high_i32x4_u, ilvl_w, dotp_u_d)
TurboAssembler::ExtMulLow(type, dst.fp().toW(), src1.fp().toW(), \
src2.fp().toW()); \
} \
void LiftoffAssembler::emit_##name1##_extmul_high_##name2( \
LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2) { \
TurboAssembler::ExtMulHigh(type, dst.fp().toW(), src1.fp().toW(), \
src2.fp().toW()); \
}
SIMD_BINOP(i16x8, i8x16_s, MSAS8)
SIMD_BINOP(i16x8, i8x16_u, MSAU8)
SIMD_BINOP(i32x4, i16x8_s, MSAS16)
SIMD_BINOP(i32x4, i16x8_u, MSAU16)
SIMD_BINOP(i64x2, i32x4_s, MSAS32)
SIMD_BINOP(i64x2, i32x4_u, MSAU32)
#undef SIMD_BINOP
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment