Commit 91f173aa authored by Shu-yu Guo's avatar Shu-yu Guo Committed by Commit Bot

Revert "[wasm-simd] Remove add horiz instructions"

This reverts commit 77838343.

Reason for revert: Mysterious win32 test262 failures: https://ci.chromium.org/ui/p/v8/builders/ci/V8%20Win32%20-%20debug/30119/overview

Original change's description:
> [wasm-simd] Remove add horiz instructions
>
> Bug: v8:6020
> Change-Id: I0605798d03f2e9f9c3c07c49141289889a10a3b0
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2727204
> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#73180}

Bug: v8:6020
Change-Id: Id1a58d7689d506b17ed04a7df67fec4003bc523d
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2733662
Auto-Submit: Shu-yu Guo <syg@chromium.org>
Commit-Queue: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Cr-Commit-Position: refs/heads/master@{#73181}
parent 77838343
......@@ -479,6 +479,24 @@ void ComputePoisonedAddressForLoad(CodeGenerator* codegen,
} \
} while (0)
#define ASSEMBLE_NEON_PAIRWISE_OP(op, size) \
do { \
Simd128Register dst = i.OutputSimd128Register(), \
src0 = i.InputSimd128Register(0), \
src1 = i.InputSimd128Register(1); \
if (dst == src0) { \
__ op(size, dst.low(), src0.low(), src0.high()); \
if (dst == src1) { \
__ vmov(dst.high(), dst.low()); \
} else { \
__ op(size, dst.high(), src1.low(), src1.high()); \
} \
} else { \
__ op(size, dst.high(), src1.low(), src1.high()); \
__ op(size, dst.low(), src0.low(), src0.high()); \
} \
} while (0)
#define ASSEMBLE_F64X2_ARITHMETIC_BINOP(op) \
do { \
__ op(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low(), \
......@@ -2248,6 +2266,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kArmF32x4AddHoriz: {
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
// Make sure we don't overwrite source data before it's used.
if (dst == src0) {
__ vpadd(dst.low(), src0.low(), src0.high());
if (dst == src1) {
__ vmov(dst.high(), dst.low());
} else {
__ vpadd(dst.high(), src1.low(), src1.high());
}
} else {
__ vpadd(dst.high(), src1.low(), src1.high());
__ vpadd(dst.low(), src0.low(), src0.high());
}
break;
}
case kArmF32x4Sub: {
__ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......@@ -2384,6 +2420,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kArmI32x4AddHoriz:
ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon32);
break;
case kArmI32x4Sub: {
__ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......@@ -2589,6 +2628,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kArmI16x8AddHoriz:
ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon16);
break;
case kArmI16x8Sub: {
__ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......@@ -3562,6 +3604,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
#undef ASSEMBLE_IEEE754_BINOP
#undef ASSEMBLE_IEEE754_UNOP
#undef ASSEMBLE_NEON_NARROWING_OP
#undef ASSEMBLE_NEON_PAIRWISE_OP
#undef ASSEMBLE_SIMD_SHIFT_LEFT
#undef ASSEMBLE_SIMD_SHIFT_RIGHT
}
......
......@@ -168,6 +168,7 @@ namespace compiler {
V(ArmF32x4RecipApprox) \
V(ArmF32x4RecipSqrtApprox) \
V(ArmF32x4Add) \
V(ArmF32x4AddHoriz) \
V(ArmF32x4Sub) \
V(ArmF32x4Mul) \
V(ArmF32x4Div) \
......@@ -209,6 +210,7 @@ namespace compiler {
V(ArmI32x4Shl) \
V(ArmI32x4ShrS) \
V(ArmI32x4Add) \
V(ArmI32x4AddHoriz) \
V(ArmI32x4Sub) \
V(ArmI32x4Mul) \
V(ArmI32x4MinS) \
......@@ -241,6 +243,7 @@ namespace compiler {
V(ArmI16x8SConvertI32x4) \
V(ArmI16x8Add) \
V(ArmI16x8AddSatS) \
V(ArmI16x8AddHoriz) \
V(ArmI16x8Sub) \
V(ArmI16x8SubSatS) \
V(ArmI16x8Mul) \
......
......@@ -148,6 +148,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF32x4RecipApprox:
case kArmF32x4RecipSqrtApprox:
case kArmF32x4Add:
case kArmF32x4AddHoriz:
case kArmF32x4Sub:
case kArmF32x4Mul:
case kArmF32x4Div:
......@@ -189,6 +190,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmI32x4Shl:
case kArmI32x4ShrS:
case kArmI32x4Add:
case kArmI32x4AddHoriz:
case kArmI32x4Sub:
case kArmI32x4Mul:
case kArmI32x4MinS:
......@@ -221,6 +223,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmI16x8SConvertI32x4:
case kArmI16x8Add:
case kArmI16x8AddSatS:
case kArmI16x8AddHoriz:
case kArmI16x8Sub:
case kArmI16x8SubSatS:
case kArmI16x8Mul:
......
......@@ -2618,6 +2618,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(F64x2Lt, kArmF64x2Lt) \
V(F64x2Le, kArmF64x2Le) \
V(F32x4Add, kArmF32x4Add) \
V(F32x4AddHoriz, kArmF32x4AddHoriz) \
V(F32x4Sub, kArmF32x4Sub) \
V(F32x4Mul, kArmF32x4Mul) \
V(F32x4Min, kArmF32x4Min) \
......@@ -2629,6 +2630,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I64x2Add, kArmI64x2Add) \
V(I64x2Sub, kArmI64x2Sub) \
V(I32x4Add, kArmI32x4Add) \
V(I32x4AddHoriz, kArmI32x4AddHoriz) \
V(I32x4Sub, kArmI32x4Sub) \
V(I32x4Mul, kArmI32x4Mul) \
V(I32x4MinS, kArmI32x4MinS) \
......@@ -2648,6 +2650,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I16x8SConvertI32x4, kArmI16x8SConvertI32x4) \
V(I16x8Add, kArmI16x8Add) \
V(I16x8AddSatS, kArmI16x8AddSatS) \
V(I16x8AddHoriz, kArmI16x8AddHoriz) \
V(I16x8Sub, kArmI16x8Sub) \
V(I16x8SubSatS, kArmI16x8SubSatS) \
V(I16x8Mul, kArmI16x8Mul) \
......
......@@ -2168,6 +2168,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_UNOP_CASE(kArm64F32x4RecipApprox, Frecpe, 4S);
SIMD_UNOP_CASE(kArm64F32x4RecipSqrtApprox, Frsqrte, 4S);
SIMD_BINOP_CASE(kArm64F32x4Add, Fadd, 4S);
SIMD_BINOP_CASE(kArm64F32x4AddHoriz, Faddp, 4S);
SIMD_BINOP_CASE(kArm64F32x4Sub, Fsub, 4S);
SIMD_BINOP_CASE(kArm64F32x4Mul, Fmul, 4S);
SIMD_BINOP_CASE(kArm64F32x4Div, Fdiv, 4S);
......@@ -2360,6 +2361,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
SIMD_BINOP_CASE(kArm64I32x4Add, Add, 4S);
SIMD_BINOP_CASE(kArm64I32x4AddHoriz, Addp, 4S);
SIMD_BINOP_CASE(kArm64I32x4Sub, Sub, 4S);
SIMD_BINOP_CASE(kArm64I32x4Mul, Mul, 4S);
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I32x4Mla, Mla, 4S);
......@@ -2461,6 +2463,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
SIMD_BINOP_CASE(kArm64I16x8Add, Add, 8H);
SIMD_BINOP_CASE(kArm64I16x8AddSatS, Sqadd, 8H);
SIMD_BINOP_CASE(kArm64I16x8AddHoriz, Addp, 8H);
SIMD_BINOP_CASE(kArm64I16x8Sub, Sub, 8H);
SIMD_BINOP_CASE(kArm64I16x8SubSatS, Sqsub, 8H);
SIMD_BINOP_CASE(kArm64I16x8Mul, Mul, 8H);
......
......@@ -213,6 +213,7 @@ namespace compiler {
V(Arm64F32x4RecipApprox) \
V(Arm64F32x4RecipSqrtApprox) \
V(Arm64F32x4Add) \
V(Arm64F32x4AddHoriz) \
V(Arm64F32x4Sub) \
V(Arm64F32x4Mul) \
V(Arm64F32x4MulElement) \
......@@ -252,6 +253,7 @@ namespace compiler {
V(Arm64I32x4Shl) \
V(Arm64I32x4ShrS) \
V(Arm64I32x4Add) \
V(Arm64I32x4AddHoriz) \
V(Arm64I32x4Sub) \
V(Arm64I32x4Mul) \
V(Arm64I32x4Mla) \
......@@ -283,6 +285,7 @@ namespace compiler {
V(Arm64I16x8SConvertI32x4) \
V(Arm64I16x8Add) \
V(Arm64I16x8AddSatS) \
V(Arm64I16x8AddHoriz) \
V(Arm64I16x8Sub) \
V(Arm64I16x8SubSatS) \
V(Arm64I16x8Mul) \
......
......@@ -178,6 +178,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F32x4RecipApprox:
case kArm64F32x4RecipSqrtApprox:
case kArm64F32x4Add:
case kArm64F32x4AddHoriz:
case kArm64F32x4Sub:
case kArm64F32x4Mul:
case kArm64F32x4MulElement:
......@@ -221,6 +222,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I32x4Shl:
case kArm64I32x4ShrS:
case kArm64I32x4Add:
case kArm64I32x4AddHoriz:
case kArm64I32x4Sub:
case kArm64I32x4Mul:
case kArm64I32x4Mla:
......@@ -252,6 +254,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I16x8SConvertI32x4:
case kArm64I16x8Add:
case kArm64I16x8AddSatS:
case kArm64I16x8AddHoriz:
case kArm64I16x8Sub:
case kArm64I16x8SubSatS:
case kArm64I16x8Mul:
......
......@@ -3465,6 +3465,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(F64x2Lt, kArm64F64x2Lt) \
V(F64x2Le, kArm64F64x2Le) \
V(F32x4Add, kArm64F32x4Add) \
V(F32x4AddHoriz, kArm64F32x4AddHoriz) \
V(F32x4Sub, kArm64F32x4Sub) \
V(F32x4Div, kArm64F32x4Div) \
V(F32x4Min, kArm64F32x4Min) \
......@@ -3479,6 +3480,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I64x2Ne, kArm64I64x2Ne) \
V(I64x2GtS, kArm64I64x2GtS) \
V(I64x2GeS, kArm64I64x2GeS) \
V(I32x4AddHoriz, kArm64I32x4AddHoriz) \
V(I32x4Mul, kArm64I32x4Mul) \
V(I32x4MinS, kArm64I32x4MinS) \
V(I32x4MaxS, kArm64I32x4MaxS) \
......@@ -3493,6 +3495,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I32x4DotI16x8S, kArm64I32x4DotI16x8S) \
V(I16x8SConvertI32x4, kArm64I16x8SConvertI32x4) \
V(I16x8AddSatS, kArm64I16x8AddSatS) \
V(I16x8AddHoriz, kArm64I16x8AddHoriz) \
V(I16x8SubSatS, kArm64I16x8SubSatS) \
V(I16x8Mul, kArm64I16x8Mul) \
V(I16x8MinS, kArm64I16x8MinS) \
......
......@@ -2385,6 +2385,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1));
break;
};
case kIA32F32x4AddHoriz: {
__ Haddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kIA32F32x4Sub: {
__ Subps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
......@@ -2599,6 +2604,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1));
break;
}
case kSSEI32x4AddHoriz: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSSE3);
__ phaddd(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI32x4AddHoriz: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vphaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI32x4Sub: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ psubd(i.OutputSimd128Register(), i.InputOperand(1));
......@@ -2920,6 +2937,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1));
break;
}
case kSSEI16x8AddHoriz: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSSE3);
__ phaddw(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI16x8AddHoriz: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vphaddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI16x8Sub: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ psubw(i.OutputSimd128Register(), i.InputOperand(1));
......
......@@ -172,6 +172,7 @@ namespace compiler {
V(IA32F32x4RecipApprox) \
V(IA32F32x4RecipSqrtApprox) \
V(IA32F32x4Add) \
V(IA32F32x4AddHoriz) \
V(IA32F32x4Sub) \
V(IA32F32x4Mul) \
V(IA32F32x4Div) \
......@@ -201,6 +202,8 @@ namespace compiler {
V(IA32I32x4ShrS) \
V(SSEI32x4Add) \
V(AVXI32x4Add) \
V(SSEI32x4AddHoriz) \
V(AVXI32x4AddHoriz) \
V(SSEI32x4Sub) \
V(AVXI32x4Sub) \
V(SSEI32x4Mul) \
......@@ -254,6 +257,8 @@ namespace compiler {
V(AVXI16x8Add) \
V(SSEI16x8AddSatS) \
V(AVXI16x8AddSatS) \
V(SSEI16x8AddHoriz) \
V(AVXI16x8AddHoriz) \
V(SSEI16x8Sub) \
V(AVXI16x8Sub) \
V(SSEI16x8SubSatS) \
......
......@@ -157,6 +157,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32F32x4RecipApprox:
case kIA32F32x4RecipSqrtApprox:
case kIA32F32x4Add:
case kIA32F32x4AddHoriz:
case kIA32F32x4Sub:
case kIA32F32x4Mul:
case kIA32F32x4Div:
......@@ -186,6 +187,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I32x4ShrS:
case kSSEI32x4Add:
case kAVXI32x4Add:
case kSSEI32x4AddHoriz:
case kAVXI32x4AddHoriz:
case kSSEI32x4Sub:
case kAVXI32x4Sub:
case kSSEI32x4Mul:
......@@ -239,6 +242,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI16x8Add:
case kSSEI16x8AddSatS:
case kAVXI16x8AddSatS:
case kSSEI16x8AddHoriz:
case kAVXI16x8AddHoriz:
case kSSEI16x8Sub:
case kAVXI16x8Sub:
case kSSEI16x8SubSatS:
......
......@@ -2217,6 +2217,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(F32x4Lt) \
V(F32x4Le) \
V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I32x4MinS) \
......@@ -2232,6 +2233,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I16x8SConvertI32x4) \
V(I16x8Add) \
V(I16x8AddSatS) \
V(I16x8AddHoriz) \
V(I16x8Sub) \
V(I16x8SubSatS) \
V(I16x8Mul) \
......@@ -2258,6 +2260,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
#define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \
V(F32x4Add) \
V(F32x4AddHoriz) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Div) \
......
......@@ -1968,6 +1968,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF32x4RecipSqrtApprox(node);
case IrOpcode::kF32x4Add:
return MarkAsSimd128(node), VisitF32x4Add(node);
case IrOpcode::kF32x4AddHoriz:
return MarkAsSimd128(node), VisitF32x4AddHoriz(node);
case IrOpcode::kF32x4Sub:
return MarkAsSimd128(node), VisitF32x4Sub(node);
case IrOpcode::kF32x4Mul:
......@@ -2076,6 +2078,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI32x4ShrS(node);
case IrOpcode::kI32x4Add:
return MarkAsSimd128(node), VisitI32x4Add(node);
case IrOpcode::kI32x4AddHoriz:
return MarkAsSimd128(node), VisitI32x4AddHoriz(node);
case IrOpcode::kI32x4Sub:
return MarkAsSimd128(node), VisitI32x4Sub(node);
case IrOpcode::kI32x4Mul:
......@@ -2154,6 +2158,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI16x8Add(node);
case IrOpcode::kI16x8AddSatS:
return MarkAsSimd128(node), VisitI16x8AddSatS(node);
case IrOpcode::kI16x8AddHoriz:
return MarkAsSimd128(node), VisitI16x8AddHoriz(node);
case IrOpcode::kI16x8Sub:
return MarkAsSimd128(node), VisitI16x8Sub(node);
case IrOpcode::kI16x8SubSatS:
......
......@@ -2609,6 +2609,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ASSEMBLE_SIMD_BINOP(addps);
break;
}
case kX64F32x4AddHoriz: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Sub: {
ASSEMBLE_SIMD_BINOP(subps);
break;
......@@ -2955,6 +2960,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ASSEMBLE_SIMD_BINOP(paddd);
break;
}
case kX64I32x4AddHoriz: {
ASSEMBLE_SIMD_BINOP(phaddd);
break;
}
case kX64I32x4Sub: {
ASSEMBLE_SIMD_BINOP(psubd);
break;
......@@ -3169,6 +3178,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ASSEMBLE_SIMD_BINOP(paddsw);
break;
}
case kX64I16x8AddHoriz: {
ASSEMBLE_SIMD_BINOP(phaddw);
break;
}
case kX64I16x8Sub: {
ASSEMBLE_SIMD_BINOP(psubw);
break;
......
......@@ -188,6 +188,7 @@ namespace compiler {
V(X64F32x4RecipApprox) \
V(X64F32x4RecipSqrtApprox) \
V(X64F32x4Add) \
V(X64F32x4AddHoriz) \
V(X64F32x4Sub) \
V(X64F32x4Mul) \
V(X64F32x4Div) \
......@@ -235,6 +236,7 @@ namespace compiler {
V(X64I32x4Shl) \
V(X64I32x4ShrS) \
V(X64I32x4Add) \
V(X64I32x4AddHoriz) \
V(X64I32x4Sub) \
V(X64I32x4Mul) \
V(X64I32x4MinS) \
......@@ -272,6 +274,7 @@ namespace compiler {
V(X64I16x8SConvertI32x4) \
V(X64I16x8Add) \
V(X64I16x8AddSatS) \
V(X64I16x8AddHoriz) \
V(X64I16x8Sub) \
V(X64I16x8SubSatS) \
V(X64I16x8Mul) \
......
......@@ -164,6 +164,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4Neg:
case kX64F32x4Sqrt:
case kX64F32x4Add:
case kX64F32x4AddHoriz:
case kX64F32x4Sub:
case kX64F32x4Mul:
case kX64F32x4Div:
......@@ -211,6 +212,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I32x4Shl:
case kX64I32x4ShrS:
case kX64I32x4Add:
case kX64I32x4AddHoriz:
case kX64I32x4Sub:
case kX64I32x4Mul:
case kX64I32x4MinS:
......@@ -248,6 +250,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I16x8SConvertI32x4:
case kX64I16x8Add:
case kX64I16x8AddSatS:
case kX64I16x8AddHoriz:
case kX64I16x8Sub:
case kX64I16x8SubSatS:
case kX64I16x8Mul:
......
......@@ -2853,6 +2853,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(I64x2ExtMulLowI32x4U) \
V(I64x2ExtMulHighI32x4U) \
V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I32x4MinS) \
......@@ -2870,6 +2871,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(I16x8UConvertI32x4) \
V(I16x8Add) \
V(I16x8AddSatS) \
V(I16x8AddHoriz) \
V(I16x8Sub) \
V(I16x8SubSatS) \
V(I16x8Mul) \
......@@ -2909,6 +2911,7 @@ VISIT_ATOMIC_BINOP(Xor)
#define SIMD_BINOP_LIST(V) \
V(F64x2Min) \
V(F64x2Max) \
V(F32x4AddHoriz) \
V(F32x4Min) \
V(F32x4Max) \
V(I64x2Ne) \
......
......@@ -405,6 +405,7 @@ std::ostream& operator<<(std::ostream& os, TruncateKind kind) {
V(F32x4RecipApprox, Operator::kNoProperties, 1, 0, 1) \
V(F32x4RecipSqrtApprox, Operator::kNoProperties, 1, 0, 1) \
V(F32x4Add, Operator::kCommutative, 2, 0, 1) \
V(F32x4AddHoriz, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Sub, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Mul, Operator::kCommutative, 2, 0, 1) \
V(F32x4Div, Operator::kNoProperties, 2, 0, 1) \
......@@ -454,6 +455,7 @@ std::ostream& operator<<(std::ostream& os, TruncateKind kind) {
V(I32x4Shl, Operator::kNoProperties, 2, 0, 1) \
V(I32x4ShrS, Operator::kNoProperties, 2, 0, 1) \
V(I32x4Add, Operator::kCommutative, 2, 0, 1) \
V(I32x4AddHoriz, Operator::kNoProperties, 2, 0, 1) \
V(I32x4Sub, Operator::kNoProperties, 2, 0, 1) \
V(I32x4Mul, Operator::kCommutative, 2, 0, 1) \
V(I32x4MinS, Operator::kCommutative, 2, 0, 1) \
......@@ -490,6 +492,7 @@ std::ostream& operator<<(std::ostream& os, TruncateKind kind) {
V(I16x8SConvertI32x4, Operator::kNoProperties, 2, 0, 1) \
V(I16x8Add, Operator::kCommutative, 2, 0, 1) \
V(I16x8AddSatS, Operator::kCommutative, 2, 0, 1) \
V(I16x8AddHoriz, Operator::kNoProperties, 2, 0, 1) \
V(I16x8Sub, Operator::kNoProperties, 2, 0, 1) \
V(I16x8SubSatS, Operator::kNoProperties, 2, 0, 1) \
V(I16x8Mul, Operator::kCommutative, 2, 0, 1) \
......
......@@ -642,6 +642,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* F32x4RecipApprox();
const Operator* F32x4RecipSqrtApprox();
const Operator* F32x4Add();
const Operator* F32x4AddHoriz();
const Operator* F32x4Sub();
const Operator* F32x4Mul();
const Operator* F32x4Div();
......@@ -698,6 +699,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I32x4Shl();
const Operator* I32x4ShrS();
const Operator* I32x4Add();
const Operator* I32x4AddHoriz();
const Operator* I32x4Sub();
const Operator* I32x4Mul();
const Operator* I32x4MinS();
......@@ -739,6 +741,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I16x8SConvertI32x4();
const Operator* I16x8Add();
const Operator* I16x8AddSatS();
const Operator* I16x8AddHoriz();
const Operator* I16x8Sub();
const Operator* I16x8SubSatS();
const Operator* I16x8Mul();
......
......@@ -797,6 +797,7 @@
V(F32x4RecipApprox) \
V(F32x4RecipSqrtApprox) \
V(F32x4Add) \
V(F32x4AddHoriz) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Div) \
......@@ -853,6 +854,7 @@
V(I32x4Shl) \
V(I32x4ShrS) \
V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I32x4MinS) \
......@@ -896,6 +898,7 @@
V(I16x8SConvertI32x4) \
V(I16x8Add) \
V(I16x8AddSatS) \
V(I16x8AddHoriz) \
V(I16x8Sub) \
V(I16x8SubSatS) \
V(I16x8Mul) \
......
......@@ -140,6 +140,7 @@ void SimdScalarLowering::LowerGraph() {
V(I32x4Shl) \
V(I32x4ShrS) \
V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I32x4MinS) \
......@@ -210,6 +211,7 @@ void SimdScalarLowering::LowerGraph() {
V(F32x4RecipApprox) \
V(F32x4RecipSqrtApprox) \
V(F32x4Add) \
V(F32x4AddHoriz) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Div) \
......@@ -249,6 +251,7 @@ void SimdScalarLowering::LowerGraph() {
V(I16x8SConvertI32x4) \
V(I16x8Add) \
V(I16x8AddSatS) \
V(I16x8AddHoriz) \
V(I16x8Sub) \
V(I16x8SubSatS) \
V(I16x8Mul) \
......@@ -1629,6 +1632,14 @@ void SimdScalarLowering::LowerNode(Node* node) {
I32X4_BINOP_CASE(kS128Or, Word32Or)
I32X4_BINOP_CASE(kS128Xor, Word32Xor)
#undef I32X4_BINOP_CASE
case IrOpcode::kI32x4AddHoriz: {
LowerBinaryOp(node, rep_type, machine()->Int32Add(), false);
break;
}
case IrOpcode::kI16x8AddHoriz: {
LowerBinaryOpForSmallInt(node, rep_type, machine()->Int32Add(), false);
break;
}
case IrOpcode::kI16x8Add:
case IrOpcode::kI8x16Add: {
LowerBinaryOpForSmallInt(node, rep_type, machine()->Int32Add());
......@@ -1876,6 +1887,10 @@ void SimdScalarLowering::LowerNode(Node* node) {
LowerShiftOp(node, rep_type);
break;
}
case IrOpcode::kF32x4AddHoriz: {
LowerBinaryOp(node, rep_type, machine()->Float32Add(), false);
break;
}
#define F32X4_BINOP_CASE(name) \
case IrOpcode::kF32x4##name: { \
LowerBinaryOp(node, rep_type, machine()->Float32##name()); \
......
......@@ -4632,6 +4632,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprF32x4Add:
return graph()->NewNode(mcgraph()->machine()->F32x4Add(), inputs[0],
inputs[1]);
case wasm::kExprF32x4AddHoriz:
return graph()->NewNode(mcgraph()->machine()->F32x4AddHoriz(), inputs[0],
inputs[1]);
case wasm::kExprF32x4Sub:
return graph()->NewNode(mcgraph()->machine()->F32x4Sub(), inputs[0],
inputs[1]);
......@@ -4796,6 +4799,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI32x4Add:
return graph()->NewNode(mcgraph()->machine()->I32x4Add(), inputs[0],
inputs[1]);
case wasm::kExprI32x4AddHoriz:
return graph()->NewNode(mcgraph()->machine()->I32x4AddHoriz(), inputs[0],
inputs[1]);
case wasm::kExprI32x4Sub:
return graph()->NewNode(mcgraph()->machine()->I32x4Sub(), inputs[0],
inputs[1]);
......@@ -4909,6 +4915,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI16x8AddSatS:
return graph()->NewNode(mcgraph()->machine()->I16x8AddSatS(), inputs[0],
inputs[1]);
case wasm::kExprI16x8AddHoriz:
return graph()->NewNode(mcgraph()->machine()->I16x8AddHoriz(), inputs[0],
inputs[1]);
case wasm::kExprI16x8Sub:
return graph()->NewNode(mcgraph()->machine()->I16x8Sub(), inputs[0],
inputs[1]);
......
......@@ -256,6 +256,7 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_SIMDF_OP(Gt, "gt")
CASE_SIMDF_OP(Ge, "ge")
CASE_SIMDF_OP(Abs, "abs")
CASE_F32x4_OP(AddHoriz, "add_horizontal")
CASE_F32x4_OP(RecipApprox, "recip_approx")
CASE_F32x4_OP(RecipSqrtApprox, "recip_sqrt_approx")
CASE_SIMDF_OP(Min, "min")
......@@ -289,6 +290,8 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_CONVERT_OP(Convert, I64x2, I32x4High, "i32x4_high", "convert")
CASE_SIGN_OP(SIMDI, Shr, "shr")
CASE_SIMDI_OP(Shl, "shl")
CASE_I32x4_OP(AddHoriz, "add_horizontal")
CASE_I16x8_OP(AddHoriz, "add_horizontal")
CASE_SIGN_OP(I16x8, AddSat, "add_sat")
CASE_SIGN_OP(I8x16, AddSat, "add_sat")
CASE_SIGN_OP(I16x8, SubSat, "sub_sat")
......
......@@ -519,6 +519,9 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(F32x4Qfms, 0xfdd4, s_sss) \
V(F64x2Qfma, 0xfdfe, s_sss) \
V(F64x2Qfms, 0xfdff, s_sss) \
V(I16x8AddHoriz, 0xfdaf, s_ss) \
V(I32x4AddHoriz, 0xfdb0, s_ss) \
V(F32x4AddHoriz, 0xfdb2, s_ss) \
V(F32x4RecipApprox, 0xfdb3, s_s) \
V(F32x4RecipSqrtApprox, 0xfdbc, s_s)
......
......@@ -2927,6 +2927,27 @@ void RunBinaryLaneOpTest(
}
}
WASM_SIMD_TEST(I32x4AddHoriz) {
FLAG_SCOPE(wasm_simd_post_mvp);
// Inputs are [0 1 2 3] and [4 5 6 7].
RunBinaryLaneOpTest<int32_t>(execution_tier, lower_simd, kExprI32x4AddHoriz,
{{1, 5, 9, 13}});
}
WASM_SIMD_TEST(I16x8AddHoriz) {
FLAG_SCOPE(wasm_simd_post_mvp);
// Inputs are [0 1 2 3 4 5 6 7] and [8 9 10 11 12 13 14 15].
RunBinaryLaneOpTest<int16_t>(execution_tier, lower_simd, kExprI16x8AddHoriz,
{{1, 5, 9, 13, 17, 21, 25, 29}});
}
WASM_SIMD_TEST(F32x4AddHoriz) {
FLAG_SCOPE(wasm_simd_post_mvp);
// Inputs are [0.0f 1.0f 2.0f 3.0f] and [4.0f 5.0f 6.0f 7.0f].
RunBinaryLaneOpTest<float>(execution_tier, lower_simd, kExprF32x4AddHoriz,
{{1.0f, 5.0f, 9.0f, 13.0f}});
}
// Test shuffle ops.
void RunShuffleOpTest(TestExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode simd_op,
......
......@@ -2752,6 +2752,28 @@ class WasmInterpreterInternals {
Push(WasmValue(Simd128(res)));
return true;
}
#define ADD_HORIZ_CASE(op, name, stype, count) \
case kExpr##op: { \
WasmValue v2 = Pop(); \
WasmValue v1 = Pop(); \
stype s1 = v1.to_s128().to_##name(); \
stype s2 = v2.to_s128().to_##name(); \
stype res; \
for (size_t i = 0; i < count / 2; ++i) { \
auto result1 = s1.val[LANE(i * 2, s1)] + s1.val[LANE(i * 2 + 1, s1)]; \
possible_nondeterminism_ |= has_nondeterminism(result1); \
res.val[LANE(i, res)] = result1; \
auto result2 = s2.val[LANE(i * 2, s2)] + s2.val[LANE(i * 2 + 1, s2)]; \
possible_nondeterminism_ |= has_nondeterminism(result2); \
res.val[LANE(i + count / 2, res)] = result2; \
} \
Push(WasmValue(Simd128(res))); \
return true; \
}
ADD_HORIZ_CASE(I32x4AddHoriz, i32x4, int4, 4)
ADD_HORIZ_CASE(F32x4AddHoriz, f32x4, float4, 4)
ADD_HORIZ_CASE(I16x8AddHoriz, i16x8, int8, 8)
#undef ADD_HORIZ_CASE
case kExprI32x4DotI16x8S: {
int8 v2 = Pop().to_s128().to_i16x8();
int8 v1 = Pop().to_s128().to_i16x8();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment