Commit a90b1ce5 authored by LiuYu's avatar LiuYu Committed by Commit Bot

[wasm-simd][mips] Prototype extended multiply

Port: 4ad68f1c

Bug: v8:11008

Change-Id: I0aa384612b529babf9e526fca83c8c69f58b6f3b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2592828Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Auto-Submit: Liu yu <liuyu@loongson.cn>
Cr-Commit-Position: refs/heads/master@{#71847}
parent 050f6a8d
......@@ -525,6 +525,15 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
i.InputSimd128Register(1)); \
} while (0)
#define ASSEMBLE_SIMD_EXTENDED_MULTIPLY(op0, op1) \
do { \
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); \
__ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero); \
__ op0(kSimd128ScratchReg, kSimd128RegZero, i.InputSimd128Register(0)); \
__ op0(kSimd128RegZero, kSimd128RegZero, i.InputSimd128Register(1)); \
__ op1(i.OutputSimd128Register(), kSimd128ScratchReg, kSimd128RegZero); \
} while (0)
void CodeGenerator::AssembleDeconstructFrame() {
__ mov(sp, fp);
__ Pop(ra, fp);
......@@ -2286,6 +2295,42 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kMipsI64x2ExtMulLowI32x4S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_w, dotp_s_d);
break;
case kMipsI64x2ExtMulHighI32x4S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_w, dotp_s_d);
break;
case kMipsI64x2ExtMulLowI32x4U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_w, dotp_u_d);
break;
case kMipsI64x2ExtMulHighI32x4U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_w, dotp_u_d);
break;
case kMipsI32x4ExtMulLowI16x8S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_h, dotp_s_w);
break;
case kMipsI32x4ExtMulHighI16x8S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_h, dotp_s_w);
break;
case kMipsI32x4ExtMulLowI16x8U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_h, dotp_u_w);
break;
case kMipsI32x4ExtMulHighI16x8U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_h, dotp_u_w);
break;
case kMipsI16x8ExtMulLowI8x16S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_b, dotp_s_h);
break;
case kMipsI16x8ExtMulHighI8x16S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_b, dotp_s_h);
break;
case kMipsI16x8ExtMulLowI8x16U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_b, dotp_u_h);
break;
case kMipsI16x8ExtMulHighI8x16U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_b, dotp_u_h);
break;
case kMipsF32x4Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ FmoveLow(kScratchReg, i.InputSingleRegister(0));
......@@ -4430,6 +4475,8 @@ void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
}
#undef __
#undef ASSEMBLE_F64X2_ARITHMETIC_BINOP
#undef ASSEMBLE_SIMD_EXTENDED_MULTIPLY
} // namespace compiler
} // namespace internal
......
......@@ -169,6 +169,10 @@ namespace compiler {
V(MipsI64x2ShrU) \
V(MipsI64x2BitMask) \
V(MipsI64x2Eq) \
V(MipsI64x2ExtMulLowI32x4S) \
V(MipsI64x2ExtMulHighI32x4S) \
V(MipsI64x2ExtMulLowI32x4U) \
V(MipsI64x2ExtMulHighI32x4U) \
V(MipsF32x4Splat) \
V(MipsF32x4ExtractLane) \
V(MipsF32x4ReplaceLane) \
......@@ -219,6 +223,10 @@ namespace compiler {
V(MipsI32x4Abs) \
V(MipsI32x4BitMask) \
V(MipsI32x4DotI16x8S) \
V(MipsI32x4ExtMulLowI16x8S) \
V(MipsI32x4ExtMulHighI16x8S) \
V(MipsI32x4ExtMulLowI16x8U) \
V(MipsI32x4ExtMulHighI16x8U) \
V(MipsI16x8Splat) \
V(MipsI16x8ExtractLaneU) \
V(MipsI16x8ExtractLaneS) \
......@@ -248,6 +256,10 @@ namespace compiler {
V(MipsI16x8RoundingAverageU) \
V(MipsI16x8Abs) \
V(MipsI16x8BitMask) \
V(MipsI16x8ExtMulLowI8x16S) \
V(MipsI16x8ExtMulHighI8x16S) \
V(MipsI16x8ExtMulLowI8x16U) \
V(MipsI16x8ExtMulHighI8x16U) \
V(MipsI8x16Splat) \
V(MipsI8x16ExtractLaneU) \
V(MipsI8x16ExtractLaneS) \
......
......@@ -72,6 +72,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsI64x2ShrU:
case kMipsI64x2BitMask:
case kMipsI64x2Eq:
case kMipsI64x2ExtMulLowI32x4S:
case kMipsI64x2ExtMulHighI32x4S:
case kMipsI64x2ExtMulLowI32x4U:
case kMipsI64x2ExtMulHighI32x4U:
case kMipsF32x4Abs:
case kMipsF32x4Add:
case kMipsF32x4AddHoriz:
......@@ -153,6 +157,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsI16x8UConvertI8x16Low:
case kMipsI16x8Abs:
case kMipsI16x8BitMask:
case kMipsI16x8ExtMulLowI8x16S:
case kMipsI16x8ExtMulHighI8x16S:
case kMipsI16x8ExtMulLowI8x16U:
case kMipsI16x8ExtMulHighI8x16U:
case kMipsI32x4Add:
case kMipsI32x4AddHoriz:
case kMipsI32x4Eq:
......@@ -183,6 +191,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsI32x4Abs:
case kMipsI32x4BitMask:
case kMipsI32x4DotI16x8S:
case kMipsI32x4ExtMulLowI16x8S:
case kMipsI32x4ExtMulHighI16x8S:
case kMipsI32x4ExtMulLowI16x8U:
case kMipsI32x4ExtMulHighI16x8U:
case kMipsI8x16Add:
case kMipsI8x16AddSatS:
case kMipsI8x16AddSatU:
......
......@@ -2187,6 +2187,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I64x2Add, kMipsI64x2Add) \
V(I64x2Sub, kMipsI64x2Sub) \
V(I64x2Mul, kMipsI64x2Mul) \
V(I64x2ExtMulLowI32x4S, kMipsI64x2ExtMulLowI32x4S) \
V(I64x2ExtMulHighI32x4S, kMipsI64x2ExtMulHighI32x4S) \
V(I64x2ExtMulLowI32x4U, kMipsI64x2ExtMulLowI32x4U) \
V(I64x2ExtMulHighI32x4U, kMipsI64x2ExtMulHighI32x4U) \
V(F32x4Add, kMipsF32x4Add) \
V(F32x4AddHoriz, kMipsF32x4AddHoriz) \
V(F32x4Sub, kMipsF32x4Sub) \
......@@ -2214,6 +2218,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I32x4GeU, kMipsI32x4GeU) \
V(I32x4Abs, kMipsI32x4Abs) \
V(I32x4DotI16x8S, kMipsI32x4DotI16x8S) \
V(I32x4ExtMulLowI16x8S, kMipsI32x4ExtMulLowI16x8S) \
V(I32x4ExtMulHighI16x8S, kMipsI32x4ExtMulHighI16x8S) \
V(I32x4ExtMulLowI16x8U, kMipsI32x4ExtMulLowI16x8U) \
V(I32x4ExtMulHighI16x8U, kMipsI32x4ExtMulHighI16x8U) \
V(I16x8Add, kMipsI16x8Add) \
V(I16x8AddSatS, kMipsI16x8AddSatS) \
V(I16x8AddSatU, kMipsI16x8AddSatU) \
......@@ -2234,6 +2242,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I16x8GeU, kMipsI16x8GeU) \
V(I16x8SConvertI32x4, kMipsI16x8SConvertI32x4) \
V(I16x8UConvertI32x4, kMipsI16x8UConvertI32x4) \
V(I16x8ExtMulLowI8x16S, kMipsI16x8ExtMulLowI8x16S) \
V(I16x8ExtMulHighI8x16S, kMipsI16x8ExtMulHighI8x16S) \
V(I16x8ExtMulLowI8x16U, kMipsI16x8ExtMulLowI8x16U) \
V(I16x8ExtMulHighI8x16U, kMipsI16x8ExtMulHighI8x16U) \
V(I16x8RoundingAverageU, kMipsI16x8RoundingAverageU) \
V(I16x8Abs, kMipsI16x8Abs) \
V(I8x16Add, kMipsI8x16Add) \
......
......@@ -496,6 +496,15 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
i.InputSimd128Register(1)); \
} while (0)
#define ASSEMBLE_SIMD_EXTENDED_MULTIPLY(op0, op1) \
do { \
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); \
__ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero); \
__ op0(kSimd128ScratchReg, kSimd128RegZero, i.InputSimd128Register(0)); \
__ op0(kSimd128RegZero, kSimd128RegZero, i.InputSimd128Register(1)); \
__ op1(i.OutputSimd128Register(), kSimd128ScratchReg, kSimd128RegZero); \
} while (0)
void CodeGenerator::AssembleDeconstructFrame() {
__ mov(sp, fp);
__ Pop(ra, fp);
......@@ -2459,6 +2468,42 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kMips64I64x2ExtMulLowI32x4S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_w, dotp_s_d);
break;
case kMips64I64x2ExtMulHighI32x4S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_w, dotp_s_d);
break;
case kMips64I64x2ExtMulLowI32x4U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_w, dotp_u_d);
break;
case kMips64I64x2ExtMulHighI32x4U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_w, dotp_u_d);
break;
case kMips64I32x4ExtMulLowI16x8S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_h, dotp_s_w);
break;
case kMips64I32x4ExtMulHighI16x8S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_h, dotp_s_w);
break;
case kMips64I32x4ExtMulLowI16x8U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_h, dotp_u_w);
break;
case kMips64I32x4ExtMulHighI16x8U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_h, dotp_u_w);
break;
case kMips64I16x8ExtMulLowI8x16S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_b, dotp_s_h);
break;
case kMips64I16x8ExtMulHighI8x16S:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_b, dotp_s_h);
break;
case kMips64I16x8ExtMulLowI8x16U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvr_b, dotp_u_h);
break;
case kMips64I16x8ExtMulHighI8x16U:
ASSEMBLE_SIMD_EXTENDED_MULTIPLY(ilvl_b, dotp_u_h);
break;
case kMips64F32x4Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ FmoveLow(kScratchReg, i.InputSingleRegister(0));
......@@ -4710,6 +4755,8 @@ void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
#undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT
#undef ASSEMBLE_IEEE754_BINOP
#undef ASSEMBLE_IEEE754_UNOP
#undef ASSEMBLE_F64X2_ARITHMETIC_BINOP
#undef ASSEMBLE_SIMD_EXTENDED_MULTIPLY
#undef TRACE_MSG
#undef TRACE_UNIMPL
......
......@@ -222,6 +222,10 @@ namespace compiler {
V(Mips64I64x2ShrU) \
V(Mips64I64x2BitMask) \
V(Mips64I64x2Eq) \
V(Mips64I64x2ExtMulLowI32x4S) \
V(Mips64I64x2ExtMulHighI32x4S) \
V(Mips64I64x2ExtMulLowI32x4U) \
V(Mips64I64x2ExtMulHighI32x4U) \
V(Mips64F32x4Abs) \
V(Mips64F32x4Neg) \
V(Mips64F32x4Sqrt) \
......@@ -254,6 +258,10 @@ namespace compiler {
V(Mips64I32x4Abs) \
V(Mips64I32x4BitMask) \
V(Mips64I32x4DotI16x8S) \
V(Mips64I32x4ExtMulLowI16x8S) \
V(Mips64I32x4ExtMulHighI16x8S) \
V(Mips64I32x4ExtMulLowI16x8U) \
V(Mips64I32x4ExtMulHighI16x8U) \
V(Mips64I16x8Splat) \
V(Mips64I16x8ExtractLaneU) \
V(Mips64I16x8ExtractLaneS) \
......@@ -283,6 +291,10 @@ namespace compiler {
V(Mips64I16x8RoundingAverageU) \
V(Mips64I16x8Abs) \
V(Mips64I16x8BitMask) \
V(Mips64I16x8ExtMulLowI8x16S) \
V(Mips64I16x8ExtMulHighI8x16S) \
V(Mips64I16x8ExtMulLowI8x16U) \
V(Mips64I16x8ExtMulHighI8x16U) \
V(Mips64I8x16Splat) \
V(Mips64I8x16ExtractLaneU) \
V(Mips64I8x16ExtractLaneS) \
......
......@@ -100,6 +100,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64I64x2ShrU:
case kMips64I64x2BitMask:
case kMips64I64x2Eq:
case kMips64I64x2ExtMulLowI32x4S:
case kMips64I64x2ExtMulHighI32x4S:
case kMips64I64x2ExtMulLowI32x4U:
case kMips64I64x2ExtMulHighI32x4U:
case kMips64F32x4Abs:
case kMips64F32x4Add:
case kMips64F32x4AddHoriz:
......@@ -186,6 +190,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64I16x8RoundingAverageU:
case kMips64I16x8Abs:
case kMips64I16x8BitMask:
case kMips64I16x8ExtMulLowI8x16S:
case kMips64I16x8ExtMulHighI8x16S:
case kMips64I16x8ExtMulLowI8x16U:
case kMips64I16x8ExtMulHighI8x16U:
case kMips64I32x4Add:
case kMips64I32x4AddHoriz:
case kMips64I32x4Eq:
......@@ -216,6 +224,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64I32x4Abs:
case kMips64I32x4BitMask:
case kMips64I32x4DotI16x8S:
case kMips64I32x4ExtMulLowI16x8S:
case kMips64I32x4ExtMulHighI16x8S:
case kMips64I32x4ExtMulLowI16x8U:
case kMips64I32x4ExtMulHighI16x8U:
case kMips64I8x16Add:
case kMips64I8x16AddSatS:
case kMips64I8x16AddSatU:
......
......@@ -2951,6 +2951,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I64x2Add, kMips64I64x2Add) \
V(I64x2Sub, kMips64I64x2Sub) \
V(I64x2Mul, kMips64I64x2Mul) \
V(I64x2ExtMulLowI32x4S, kMips64I64x2ExtMulLowI32x4S) \
V(I64x2ExtMulHighI32x4S, kMips64I64x2ExtMulHighI32x4S) \
V(I64x2ExtMulLowI32x4U, kMips64I64x2ExtMulLowI32x4U) \
V(I64x2ExtMulHighI32x4U, kMips64I64x2ExtMulHighI32x4U) \
V(F32x4Add, kMips64F32x4Add) \
V(F32x4AddHoriz, kMips64F32x4AddHoriz) \
V(F32x4Sub, kMips64F32x4Sub) \
......@@ -2977,6 +2981,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I32x4GtU, kMips64I32x4GtU) \
V(I32x4GeU, kMips64I32x4GeU) \
V(I32x4DotI16x8S, kMips64I32x4DotI16x8S) \
V(I32x4ExtMulLowI16x8S, kMips64I32x4ExtMulLowI16x8S) \
V(I32x4ExtMulHighI16x8S, kMips64I32x4ExtMulHighI16x8S) \
V(I32x4ExtMulLowI16x8U, kMips64I32x4ExtMulLowI16x8U) \
V(I32x4ExtMulHighI16x8U, kMips64I32x4ExtMulHighI16x8U) \
V(I16x8Add, kMips64I16x8Add) \
V(I16x8AddSatS, kMips64I16x8AddSatS) \
V(I16x8AddSatU, kMips64I16x8AddSatU) \
......@@ -2998,6 +3006,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I16x8RoundingAverageU, kMips64I16x8RoundingAverageU) \
V(I16x8SConvertI32x4, kMips64I16x8SConvertI32x4) \
V(I16x8UConvertI32x4, kMips64I16x8UConvertI32x4) \
V(I16x8ExtMulLowI8x16S, kMips64I16x8ExtMulLowI8x16S) \
V(I16x8ExtMulHighI8x16S, kMips64I16x8ExtMulHighI8x16S) \
V(I16x8ExtMulLowI8x16U, kMips64I16x8ExtMulLowI8x16U) \
V(I16x8ExtMulHighI8x16U, kMips64I16x8ExtMulHighI8x16U) \
V(I8x16Add, kMips64I8x16Add) \
V(I8x16AddSatS, kMips64I8x16AddSatS) \
V(I8x16AddSatU, kMips64I8x16AddSatU) \
......
......@@ -365,6 +365,13 @@
'test-run-machops/RunFloat64MulAndFloat64Sub2': [SKIP],
}], # 'mips_arch_variant == r6'
##############################################################################
['arch == mipsel and mips_arch_variant == r1', {
# For MIPS architecture release 1, conversion from double to int is unsupport
# on liftoff.
'test-gc/RunWasmLiftoff_NewDefault': [SKIP],
}], # 'arch == mipsel and mips_arch_variant == r1'
##############################################################################
['system == android', {
# Uses too much memory.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment