Commit a71c338d authored by bbudge's avatar bbudge Committed by Commit bot

[WASM SIMD] Implement horizontal add for float and integer types.

- Adds new F32x4AddHoriz, I32x4AddHoriz, etc. to WASM opcodes.
- Implements them for ARM.

LOG=N
BUG=v8:6020

Review-Url: https://codereview.chromium.org/2804883008
Cr-Commit-Position: refs/heads/master@{#44812}
parent 6c0e81bd
......@@ -4486,13 +4486,16 @@ void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1,
emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2));
}
enum NeonPairwiseOp { VPMIN, VPMAX };
enum NeonPairwiseOp { VPADD, VPMIN, VPMAX };
static Instr EncodeNeonPairwiseOp(NeonPairwiseOp op, NeonDataType dt,
DwVfpRegister dst, DwVfpRegister src1,
DwVfpRegister src2) {
int op_encoding = 0;
switch (op) {
case VPADD:
op_encoding = 0xB * B8 | B4;
break;
case VPMIN:
op_encoding = 0xA * B8 | B4;
break;
......@@ -4515,6 +4518,30 @@ static Instr EncodeNeonPairwiseOp(NeonPairwiseOp op, NeonDataType dt,
n * B7 | m * B5 | vm | op_encoding;
}
void Assembler::vpadd(DwVfpRegister dst, DwVfpRegister src1,
DwVfpRegister src2) {
DCHECK(IsEnabled(NEON));
// Dd = vpadd(Dn, Dm) SIMD integer pairwise ADD.
// Instruction details available in ARM DDI 0406C.b, A8-982.
int vd, d;
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 |
m * B5 | vm);
}
void Assembler::vpadd(NeonSize size, DwVfpRegister dst, DwVfpRegister src1,
DwVfpRegister src2) {
DCHECK(IsEnabled(NEON));
// Dd = vpadd(Dn, Dm) SIMD integer pairwise ADD.
// Instruction details available in ARM DDI 0406C.b, A8-980.
emit(EncodeNeonPairwiseOp(VPADD, NeonSizeToDatatype(size), dst, src1, src2));
}
void Assembler::vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
DwVfpRegister src2) {
DCHECK(IsEnabled(NEON));
......
......@@ -1371,6 +1371,9 @@ class Assembler : public AssemblerBase {
void vmax(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vmax(NeonDataType dt, QwNeonRegister dst,
QwNeonRegister src1, QwNeonRegister src2);
void vpadd(DwVfpRegister dst, DwVfpRegister src1, DwVfpRegister src2);
void vpadd(NeonSize size, DwVfpRegister dst, DwVfpRegister src1,
DwVfpRegister src2);
void vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
DwVfpRegister src2);
void vpmax(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
......
......@@ -324,6 +324,8 @@ enum LFlag {
Short = 0 << 22 // Short load/store coprocessor.
};
// Neon sizes.
enum NeonSize { Neon8 = 0x0, Neon16 = 0x1, Neon32 = 0x2, Neon64 = 0x3 };
// NEON data type
enum NeonDataType {
......@@ -339,6 +341,11 @@ enum NeonDataType {
inline int NeonU(NeonDataType dt) { return static_cast<int>(dt) >> 2; }
inline int NeonSz(NeonDataType dt) { return static_cast<int>(dt) & 0x3; }
// Convert sizes to data types (U bit is clear).
inline NeonDataType NeonSizeToDatatype(NeonSize size) {
return static_cast<NeonDataType>(size);
}
enum NeonListType {
nlt_1 = 0x7,
nlt_2 = 0xA,
......@@ -346,13 +353,6 @@ enum NeonListType {
nlt_4 = 0x2
};
enum NeonSize {
Neon8 = 0x0,
Neon16 = 0x1,
Neon32 = 0x2,
Neon64 = 0x3
};
// -----------------------------------------------------------------------------
// Supervisor Call (svc) specific support.
......
......@@ -1950,6 +1950,13 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
op, size, Vd, Vn, Vm);
break;
}
case 0xb: {
// vpadd.i<size> Dd, Dm, Dn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vpadd.i%d d%d, d%d, d%d",
size, Vd, Vn, Vm);
break;
}
case 0xd: {
if (instr->Bit(4) == 0) {
const char* op = (instr->Bits(21, 20) == 0) ? "vadd" : "vsub";
......@@ -2130,10 +2137,16 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
break;
}
case 0xd: {
if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
// vmul.f32 Qd, Qn, Qm
if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&
instr->Bit(4) == 1) {
// vmul.f32 Qd, Qm, Qn
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmul.f32 q%d, q%d, q%d", Vd, Vn, Vm);
} else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 0 &&
instr->Bit(4) == 0) {
// vpadd.f32 Dd, Dm, Dn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vpadd.f32 d%d, d%d, d%d", Vd, Vn, Vm);
} else {
Unknown(instr);
}
......
......@@ -4278,6 +4278,20 @@ void PairwiseMinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) {
simulator->set_neon_register<T, kDoubleSize>(Vd, dst);
}
template <typename T>
void PairwiseAdd(Simulator* simulator, int Vd, int Vm, int Vn) {
static const int kElems = kDoubleSize / sizeof(T);
static const int kPairs = kElems / 2;
T dst[kElems], src1[kElems], src2[kElems];
simulator->get_neon_register<T, kDoubleSize>(Vn, src1);
simulator->get_neon_register<T, kDoubleSize>(Vm, src2);
for (int i = 0; i < kPairs; i++) {
dst[i] = src1[i * 2] + src1[i * 2 + 1];
dst[i + kPairs] = src2[i * 2] + src2[i * 2 + 1];
}
simulator->set_neon_register<T, kDoubleSize>(Vd, dst);
}
void Simulator::DecodeSpecialCondition(Instruction* instr) {
switch (instr->SpecialValue()) {
case 4: {
......@@ -4489,6 +4503,25 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
}
break;
}
case 0xb: {
// vpadd.i<size> Dd, Dm, Dn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8:
PairwiseAdd<int8_t>(this, Vd, Vm, Vn);
break;
case Neon16:
PairwiseAdd<int16_t>(this, Vd, Vm, Vn);
break;
case Neon32:
PairwiseAdd<int32_t>(this, Vd, Vm, Vn);
break;
default:
UNREACHABLE();
break;
}
break;
}
case 0xd: {
if (instr->Bit(4) == 0) {
float src1[4], src2[4];
......@@ -4837,7 +4870,8 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
break;
}
case 0xd: {
if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&
instr->Bit(4) == 1) {
// vmul.f32 Qd, Qn, Qm
float src1[4], src2[4];
get_neon_register(Vn, src1);
......@@ -4846,6 +4880,10 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
src1[i] = src1[i] * src2[i];
}
set_neon_register(Vd, src1);
} else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 0 &&
instr->Bit(4) == 0) {
// vpadd.f32 Dd, Dn, Dm
PairwiseAdd<float>(this, Vd, Vm, Vn);
} else {
UNIMPLEMENTED();
}
......
......@@ -496,6 +496,41 @@ Condition FlagsConditionToCondition(FlagsCondition condition) {
DCHECK_EQ(LeaveCC, i.OutputSBit()); \
} while (0)
#define ASSEMBLE_NEON_NARROWING_OP(dt) \
do { \
Simd128Register dst = i.OutputSimd128Register(), \
src0 = i.InputSimd128Register(0), \
src1 = i.InputSimd128Register(1); \
if (dst.is(src0) && dst.is(src1)) { \
__ vqmovn(dt, dst.low(), src0); \
__ vmov(dst.high(), dst.low()); \
} else if (dst.is(src0)) { \
__ vqmovn(dt, dst.low(), src0); \
__ vqmovn(dt, dst.high(), src1); \
} else { \
__ vqmovn(dt, dst.high(), src1); \
__ vqmovn(dt, dst.low(), src0); \
} \
} while (0)
#define ASSEMBLE_NEON_PAIRWISE_OP(op, size) \
do { \
Simd128Register dst = i.OutputSimd128Register(), \
src0 = i.InputSimd128Register(0), \
src1 = i.InputSimd128Register(1); \
if (dst.is(src0)) { \
__ op(size, dst.low(), src0.low(), src0.high()); \
if (dst.is(src1)) { \
__ vmov(dst.high(), dst.low()); \
} else { \
__ op(size, dst.high(), src1.low(), src1.high()); \
} \
} else { \
__ op(size, dst.high(), src1.low(), src1.high()); \
__ op(size, dst.low(), src0.low(), src0.high()); \
} \
} while (0)
void CodeGenerator::AssembleDeconstructFrame() {
__ LeaveFrame(StackFrame::MANUAL);
unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
......@@ -1611,6 +1646,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kArmF32x4AddHoriz: {
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
// Make sure we don't overwrite source data before it's used.
if (dst.is(src0)) {
__ vpadd(dst.low(), src0.low(), src0.high());
if (dst.is(src1)) {
__ vmov(dst.high(), dst.low());
} else {
__ vpadd(dst.high(), src1.low(), src1.high());
}
} else {
__ vpadd(dst.high(), src1.low(), src1.high());
__ vpadd(dst.low(), src0.low(), src0.high());
}
break;
}
case kArmF32x4Sub: {
__ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......@@ -1699,6 +1752,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kArmI32x4AddHoriz:
ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon32);
break;
case kArmI32x4Sub: {
__ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......@@ -1818,25 +1874,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputInt4(1));
break;
}
case kArmI16x8SConvertI32x4: {
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
// Take care not to overwrite a source register before it's used.
if (dst.is(src0) && dst.is(src1)) {
__ vqmovn(NeonS16, dst.low(), src0);
__ vmov(dst.high(), dst.low());
} else if (dst.is(src0)) {
// dst is src0, so narrow src0 first.
__ vqmovn(NeonS16, dst.low(), src0);
__ vqmovn(NeonS16, dst.high(), src1);
} else {
// dst may alias src1, so narrow src1 first.
__ vqmovn(NeonS16, dst.high(), src1);
__ vqmovn(NeonS16, dst.low(), src0);
}
case kArmI16x8SConvertI32x4:
ASSEMBLE_NEON_NARROWING_OP(NeonS16);
break;
}
case kArmI16x8Add: {
__ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......@@ -1847,6 +1887,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kArmI16x8AddHoriz:
ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon16);
break;
case kArmI16x8Sub: {
__ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......@@ -1909,25 +1952,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputInt4(1));
break;
}
case kArmI16x8UConvertI32x4: {
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
// Take care not to overwrite a source register before it's used.
if (dst.is(src0) && dst.is(src1)) {
__ vqmovn(NeonU16, dst.low(), src0);
__ vmov(dst.high(), dst.low());
} else if (dst.is(src0)) {
// dst is src0, so narrow src0 first.
__ vqmovn(NeonU16, dst.low(), src0);
__ vqmovn(NeonU16, dst.high(), src1);
} else {
// dst may alias src1, so narrow src1 first.
__ vqmovn(NeonU16, dst.high(), src1);
__ vqmovn(NeonU16, dst.low(), src0);
}
case kArmI16x8UConvertI32x4:
ASSEMBLE_NEON_NARROWING_OP(NeonU16);
break;
}
case kArmI16x8AddSaturateU: {
__ vqadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......@@ -1986,25 +2013,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputInt3(1));
break;
}
case kArmI8x16SConvertI16x8: {
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
// Take care not to overwrite a source register before it's used.
if (dst.is(src0) && dst.is(src1)) {
__ vqmovn(NeonS8, dst.low(), src0);
__ vmov(dst.high(), dst.low());
} else if (dst.is(src0)) {
// dst is src0, so narrow src0 first.
__ vqmovn(NeonS8, dst.low(), src0);
__ vqmovn(NeonS8, dst.high(), src1);
} else {
// dst may alias src1, so narrow src1 first.
__ vqmovn(NeonS8, dst.high(), src1);
__ vqmovn(NeonS8, dst.low(), src0);
}
case kArmI8x16SConvertI16x8:
ASSEMBLE_NEON_NARROWING_OP(NeonS8);
break;
}
case kArmI8x16Add: {
__ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......@@ -2066,25 +2077,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputInt3(1));
break;
}
case kArmI8x16UConvertI16x8: {
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
// Take care not to overwrite a source register before it's used.
if (dst.is(src0) && dst.is(src1)) {
__ vqmovn(NeonU8, dst.low(), src0);
__ vmov(dst.high(), dst.low());
} else if (dst.is(src0)) {
// dst is src0, so narrow src0 first.
__ vqmovn(NeonU8, dst.low(), src0);
__ vqmovn(NeonU8, dst.high(), src1);
} else {
// dst may alias src1, so narrow src1 first.
__ vqmovn(NeonU8, dst.high(), src1);
__ vqmovn(NeonU8, dst.low(), src0);
}
case kArmI8x16UConvertI16x8:
ASSEMBLE_NEON_NARROWING_OP(NeonU8);
break;
}
case kArmI8x16AddSaturateU: {
__ vqadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......
......@@ -134,6 +134,7 @@ namespace compiler {
V(ArmF32x4RecipApprox) \
V(ArmF32x4RecipSqrtApprox) \
V(ArmF32x4Add) \
V(ArmF32x4AddHoriz) \
V(ArmF32x4Sub) \
V(ArmF32x4Mul) \
V(ArmF32x4Min) \
......@@ -152,6 +153,7 @@ namespace compiler {
V(ArmI32x4Shl) \
V(ArmI32x4ShrS) \
V(ArmI32x4Add) \
V(ArmI32x4AddHoriz) \
V(ArmI32x4Sub) \
V(ArmI32x4Mul) \
V(ArmI32x4MinS) \
......@@ -179,6 +181,7 @@ namespace compiler {
V(ArmI16x8SConvertI32x4) \
V(ArmI16x8Add) \
V(ArmI16x8AddSaturateS) \
V(ArmI16x8AddHoriz) \
V(ArmI16x8Sub) \
V(ArmI16x8SubSaturateS) \
V(ArmI16x8Mul) \
......
......@@ -118,6 +118,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF32x4RecipApprox:
case kArmF32x4RecipSqrtApprox:
case kArmF32x4Add:
case kArmF32x4AddHoriz:
case kArmF32x4Sub:
case kArmF32x4Mul:
case kArmF32x4Min:
......@@ -136,6 +137,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmI32x4Shl:
case kArmI32x4ShrS:
case kArmI32x4Add:
case kArmI32x4AddHoriz:
case kArmI32x4Sub:
case kArmI32x4Mul:
case kArmI32x4MinS:
......@@ -163,6 +165,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmI16x8SConvertI32x4:
case kArmI16x8Add:
case kArmI16x8AddSaturateS:
case kArmI16x8AddHoriz:
case kArmI16x8Sub:
case kArmI16x8SubSaturateS:
case kArmI16x8Mul:
......
......@@ -2441,78 +2441,81 @@ VISIT_ATOMIC_BINOP(Xor)
V(I8x16ShrS) \
V(I8x16ShrU)
#define SIMD_BINOP_LIST(V) \
V(F32x4Add, kArmF32x4Add) \
V(F32x4Sub, kArmF32x4Sub) \
V(F32x4Mul, kArmF32x4Mul) \
V(F32x4Min, kArmF32x4Min) \
V(F32x4Max, kArmF32x4Max) \
V(F32x4Eq, kArmF32x4Eq) \
V(F32x4Ne, kArmF32x4Ne) \
V(F32x4Lt, kArmF32x4Lt) \
V(F32x4Le, kArmF32x4Le) \
V(I32x4Add, kArmI32x4Add) \
V(I32x4Sub, kArmI32x4Sub) \
V(I32x4Mul, kArmI32x4Mul) \
V(I32x4MinS, kArmI32x4MinS) \
V(I32x4MaxS, kArmI32x4MaxS) \
V(I32x4Eq, kArmI32x4Eq) \
V(I32x4Ne, kArmI32x4Ne) \
V(I32x4LtS, kArmI32x4LtS) \
V(I32x4LeS, kArmI32x4LeS) \
V(I32x4MinU, kArmI32x4MinU) \
V(I32x4MaxU, kArmI32x4MaxU) \
V(I32x4LtU, kArmI32x4LtU) \
V(I32x4LeU, kArmI32x4LeU) \
V(I16x8SConvertI32x4, kArmI16x8SConvertI32x4) \
V(I16x8Add, kArmI16x8Add) \
V(I16x8AddSaturateS, kArmI16x8AddSaturateS) \
V(I16x8Sub, kArmI16x8Sub) \
V(I16x8SubSaturateS, kArmI16x8SubSaturateS) \
V(I16x8Mul, kArmI16x8Mul) \
V(I16x8MinS, kArmI16x8MinS) \
V(I16x8MaxS, kArmI16x8MaxS) \
V(I16x8Eq, kArmI16x8Eq) \
V(I16x8Ne, kArmI16x8Ne) \
V(I16x8LtS, kArmI16x8LtS) \
V(I16x8LeS, kArmI16x8LeS) \
V(I16x8UConvertI32x4, kArmI16x8UConvertI32x4) \
V(I16x8AddSaturateU, kArmI16x8AddSaturateU) \
V(I16x8SubSaturateU, kArmI16x8SubSaturateU) \
V(I16x8MinU, kArmI16x8MinU) \
V(I16x8MaxU, kArmI16x8MaxU) \
V(I16x8LtU, kArmI16x8LtU) \
V(I16x8LeU, kArmI16x8LeU) \
V(I8x16SConvertI16x8, kArmI8x16SConvertI16x8) \
V(I8x16Add, kArmI8x16Add) \
V(I8x16AddSaturateS, kArmI8x16AddSaturateS) \
V(I8x16Sub, kArmI8x16Sub) \
V(I8x16SubSaturateS, kArmI8x16SubSaturateS) \
V(I8x16Mul, kArmI8x16Mul) \
V(I8x16MinS, kArmI8x16MinS) \
V(I8x16MaxS, kArmI8x16MaxS) \
V(I8x16Eq, kArmI8x16Eq) \
V(I8x16Ne, kArmI8x16Ne) \
V(I8x16LtS, kArmI8x16LtS) \
V(I8x16LeS, kArmI8x16LeS) \
V(I8x16UConvertI16x8, kArmI8x16UConvertI16x8) \
V(I8x16AddSaturateU, kArmI8x16AddSaturateU) \
V(I8x16SubSaturateU, kArmI8x16SubSaturateU) \
V(I8x16MinU, kArmI8x16MinU) \
V(I8x16MaxU, kArmI8x16MaxU) \
V(I8x16LtU, kArmI8x16LtU) \
V(I8x16LeU, kArmI8x16LeU) \
V(S128And, kArmS128And) \
V(S128Or, kArmS128Or) \
V(S128Xor, kArmS128Xor) \
V(S1x4And, kArmS128And) \
V(S1x4Or, kArmS128Or) \
V(S1x4Xor, kArmS128Xor) \
V(S1x8And, kArmS128And) \
V(S1x8Or, kArmS128Or) \
V(S1x8Xor, kArmS128Xor) \
V(S1x16And, kArmS128And) \
V(S1x16Or, kArmS128Or) \
#define SIMD_BINOP_LIST(V) \
V(F32x4Add, kArmF32x4Add) \
V(F32x4AddHoriz, kArmF32x4AddHoriz) \
V(F32x4Sub, kArmF32x4Sub) \
V(F32x4Mul, kArmF32x4Mul) \
V(F32x4Min, kArmF32x4Min) \
V(F32x4Max, kArmF32x4Max) \
V(F32x4Eq, kArmF32x4Eq) \
V(F32x4Ne, kArmF32x4Ne) \
V(F32x4Lt, kArmF32x4Lt) \
V(F32x4Le, kArmF32x4Le) \
V(I32x4Add, kArmI32x4Add) \
V(I32x4AddHoriz, kArmI32x4AddHoriz) \
V(I32x4Sub, kArmI32x4Sub) \
V(I32x4Mul, kArmI32x4Mul) \
V(I32x4MinS, kArmI32x4MinS) \
V(I32x4MaxS, kArmI32x4MaxS) \
V(I32x4Eq, kArmI32x4Eq) \
V(I32x4Ne, kArmI32x4Ne) \
V(I32x4LtS, kArmI32x4LtS) \
V(I32x4LeS, kArmI32x4LeS) \
V(I32x4MinU, kArmI32x4MinU) \
V(I32x4MaxU, kArmI32x4MaxU) \
V(I32x4LtU, kArmI32x4LtU) \
V(I32x4LeU, kArmI32x4LeU) \
V(I16x8SConvertI32x4, kArmI16x8SConvertI32x4) \
V(I16x8Add, kArmI16x8Add) \
V(I16x8AddSaturateS, kArmI16x8AddSaturateS) \
V(I16x8AddHoriz, kArmI16x8AddHoriz) \
V(I16x8Sub, kArmI16x8Sub) \
V(I16x8SubSaturateS, kArmI16x8SubSaturateS) \
V(I16x8Mul, kArmI16x8Mul) \
V(I16x8MinS, kArmI16x8MinS) \
V(I16x8MaxS, kArmI16x8MaxS) \
V(I16x8Eq, kArmI16x8Eq) \
V(I16x8Ne, kArmI16x8Ne) \
V(I16x8LtS, kArmI16x8LtS) \
V(I16x8LeS, kArmI16x8LeS) \
V(I16x8UConvertI32x4, kArmI16x8UConvertI32x4) \
V(I16x8AddSaturateU, kArmI16x8AddSaturateU) \
V(I16x8SubSaturateU, kArmI16x8SubSaturateU) \
V(I16x8MinU, kArmI16x8MinU) \
V(I16x8MaxU, kArmI16x8MaxU) \
V(I16x8LtU, kArmI16x8LtU) \
V(I16x8LeU, kArmI16x8LeU) \
V(I8x16SConvertI16x8, kArmI8x16SConvertI16x8) \
V(I8x16Add, kArmI8x16Add) \
V(I8x16AddSaturateS, kArmI8x16AddSaturateS) \
V(I8x16Sub, kArmI8x16Sub) \
V(I8x16SubSaturateS, kArmI8x16SubSaturateS) \
V(I8x16Mul, kArmI8x16Mul) \
V(I8x16MinS, kArmI8x16MinS) \
V(I8x16MaxS, kArmI8x16MaxS) \
V(I8x16Eq, kArmI8x16Eq) \
V(I8x16Ne, kArmI8x16Ne) \
V(I8x16LtS, kArmI8x16LtS) \
V(I8x16LeS, kArmI8x16LeS) \
V(I8x16UConvertI16x8, kArmI8x16UConvertI16x8) \
V(I8x16AddSaturateU, kArmI8x16AddSaturateU) \
V(I8x16SubSaturateU, kArmI8x16SubSaturateU) \
V(I8x16MinU, kArmI8x16MinU) \
V(I8x16MaxU, kArmI8x16MaxU) \
V(I8x16LtU, kArmI8x16LtU) \
V(I8x16LeU, kArmI8x16LeU) \
V(S128And, kArmS128And) \
V(S128Or, kArmS128Or) \
V(S128Xor, kArmS128Xor) \
V(S1x4And, kArmS128And) \
V(S1x4Or, kArmS128Or) \
V(S1x4Xor, kArmS128Xor) \
V(S1x8And, kArmS128And) \
V(S1x8Or, kArmS128Or) \
V(S1x8Xor, kArmS128Xor) \
V(S1x16And, kArmS128And) \
V(S1x16Or, kArmS128Or) \
V(S1x16Xor, kArmS128Xor)
#define SIMD_SHUFFLE_OP_LIST(V) \
......
......@@ -1509,6 +1509,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF32x4RecipSqrtApprox(node);
case IrOpcode::kF32x4Add:
return MarkAsSimd128(node), VisitF32x4Add(node);
case IrOpcode::kF32x4AddHoriz:
return MarkAsSimd128(node), VisitF32x4AddHoriz(node);
case IrOpcode::kF32x4Sub:
return MarkAsSimd128(node), VisitF32x4Sub(node);
case IrOpcode::kF32x4Mul:
......@@ -1545,6 +1547,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI32x4ShrS(node);
case IrOpcode::kI32x4Add:
return MarkAsSimd128(node), VisitI32x4Add(node);
case IrOpcode::kI32x4AddHoriz:
return MarkAsSimd128(node), VisitI32x4AddHoriz(node);
case IrOpcode::kI32x4Sub:
return MarkAsSimd128(node), VisitI32x4Sub(node);
case IrOpcode::kI32x4Mul:
......@@ -1599,6 +1603,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI16x8Add(node);
case IrOpcode::kI16x8AddSaturateS:
return MarkAsSimd128(node), VisitI16x8AddSaturateS(node);
case IrOpcode::kI16x8AddHoriz:
return MarkAsSimd128(node), VisitI16x8AddHoriz(node);
case IrOpcode::kI16x8Sub:
return MarkAsSimd128(node), VisitI16x8Sub(node);
case IrOpcode::kI16x8SubSaturateS:
......@@ -2149,7 +2155,13 @@ void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) {
}
void InstructionSelector::VisitF32x4Add(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitF32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitF32x4Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Mul(Node* node) { UNIMPLEMENTED(); }
......@@ -2207,6 +2219,10 @@ void InstructionSelector::VisitI32x4ShrU(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS &&
// !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
UNIMPLEMENTED();
......@@ -2261,7 +2277,13 @@ void InstructionSelector::VisitI16x8Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8AddSaturateS(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitI16x8AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitI16x8Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8SubSaturateS(Node* node) {
......@@ -2355,7 +2377,9 @@ void InstructionSelector::VisitI8x16Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16AddSaturateS(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitI8x16Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16SubSaturateS(Node* node) {
......
......@@ -232,6 +232,7 @@ MachineType AtomicOpRepresentationOf(Operator const* op) {
V(F32x4RecipApprox, Operator::kNoProperties, 1, 0, 1) \
V(F32x4RecipSqrtApprox, Operator::kNoProperties, 1, 0, 1) \
V(F32x4Add, Operator::kCommutative, 2, 0, 1) \
V(F32x4AddHoriz, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Sub, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Mul, Operator::kCommutative, 2, 0, 1) \
V(F32x4Min, Operator::kCommutative, 2, 0, 1) \
......@@ -246,6 +247,7 @@ MachineType AtomicOpRepresentationOf(Operator const* op) {
V(I32x4SConvertI16x8High, Operator::kNoProperties, 1, 0, 1) \
V(I32x4Neg, Operator::kNoProperties, 1, 0, 1) \
V(I32x4Add, Operator::kCommutative, 2, 0, 1) \
V(I32x4AddHoriz, Operator::kNoProperties, 2, 0, 1) \
V(I32x4Sub, Operator::kNoProperties, 2, 0, 1) \
V(I32x4Mul, Operator::kCommutative, 2, 0, 1) \
V(I32x4MinS, Operator::kCommutative, 2, 0, 1) \
......@@ -268,6 +270,7 @@ MachineType AtomicOpRepresentationOf(Operator const* op) {
V(I16x8SConvertI32x4, Operator::kNoProperties, 2, 0, 1) \
V(I16x8Add, Operator::kCommutative, 2, 0, 1) \
V(I16x8AddSaturateS, Operator::kCommutative, 2, 0, 1) \
V(I16x8AddHoriz, Operator::kNoProperties, 2, 0, 1) \
V(I16x8Sub, Operator::kNoProperties, 2, 0, 1) \
V(I16x8SubSaturateS, Operator::kNoProperties, 2, 0, 1) \
V(I16x8Mul, Operator::kCommutative, 2, 0, 1) \
......
......@@ -444,6 +444,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* F32x4RecipApprox();
const Operator* F32x4RecipSqrtApprox();
const Operator* F32x4Add();
const Operator* F32x4AddHoriz();
const Operator* F32x4Sub();
const Operator* F32x4Mul();
const Operator* F32x4Div();
......@@ -464,6 +465,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I32x4Shl(int32_t);
const Operator* I32x4ShrS(int32_t);
const Operator* I32x4Add();
const Operator* I32x4AddHoriz();
const Operator* I32x4Sub();
const Operator* I32x4Mul();
const Operator* I32x4MinS();
......@@ -493,6 +495,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I16x8SConvertI32x4();
const Operator* I16x8Add();
const Operator* I16x8AddSaturateS();
const Operator* I16x8AddHoriz();
const Operator* I16x8Sub();
const Operator* I16x8SubSaturateS();
const Operator* I16x8Mul();
......
......@@ -578,6 +578,7 @@
V(F32x4RecipApprox) \
V(F32x4RecipSqrtApprox) \
V(F32x4Add) \
V(F32x4AddHoriz) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Min) \
......@@ -598,6 +599,7 @@
V(I32x4Shl) \
V(I32x4ShrS) \
V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I32x4MinS) \
......@@ -629,6 +631,7 @@
V(I16x8SConvertI32x4) \
V(I16x8Add) \
V(I16x8AddSaturateS) \
V(I16x8AddHoriz) \
V(I16x8Sub) \
V(I16x8SubSaturateS) \
V(I16x8Mul) \
......
......@@ -3197,6 +3197,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode,
case wasm::kExprF32x4Add:
return graph()->NewNode(jsgraph()->machine()->F32x4Add(), inputs[0],
inputs[1]);
case wasm::kExprF32x4AddHoriz:
return graph()->NewNode(jsgraph()->machine()->F32x4AddHoriz(), inputs[0],
inputs[1]);
case wasm::kExprF32x4Sub:
return graph()->NewNode(jsgraph()->machine()->F32x4Sub(), inputs[0],
inputs[1]);
......@@ -3246,6 +3249,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode,
case wasm::kExprI32x4Add:
return graph()->NewNode(jsgraph()->machine()->I32x4Add(), inputs[0],
inputs[1]);
case wasm::kExprI32x4AddHoriz:
return graph()->NewNode(jsgraph()->machine()->I32x4AddHoriz(), inputs[0],
inputs[1]);
case wasm::kExprI32x4Sub:
return graph()->NewNode(jsgraph()->machine()->I32x4Sub(), inputs[0],
inputs[1]);
......@@ -3319,6 +3325,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode,
case wasm::kExprI16x8AddSaturateS:
return graph()->NewNode(jsgraph()->machine()->I16x8AddSaturateS(),
inputs[0], inputs[1]);
case wasm::kExprI16x8AddHoriz:
return graph()->NewNode(jsgraph()->machine()->I16x8AddHoriz(), inputs[0],
inputs[1]);
case wasm::kExprI16x8Sub:
return graph()->NewNode(jsgraph()->machine()->I16x8Sub(), inputs[0],
inputs[1]);
......
......@@ -175,6 +175,7 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_SIMD_OP(Sub, "sub")
CASE_SIMD_OP(Mul, "mul")
CASE_F32x4_OP(Abs, "abs")
CASE_F32x4_OP(AddHoriz, "add_horizontal")
CASE_F32x4_OP(RecipApprox, "recip_approx")
CASE_F32x4_OP(RecipSqrtApprox, "recip_sqrt_approx")
CASE_F32x4_OP(Min, "min")
......@@ -203,6 +204,8 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_SIGN_OP(SIMDI, Ge, "ge")
CASE_SIGN_OP(SIMDI, Shr, "shr")
CASE_SIMDI_OP(Shl, "shl")
CASE_I32x4_OP(AddHoriz, "add_horizontal")
CASE_I16x8_OP(AddHoriz, "add_horizontal")
CASE_SIGN_OP(I16x8, AddSaturate, "add_saturate")
CASE_SIGN_OP(I8x16, AddSaturate, "add_saturate")
CASE_SIGN_OP(I16x8, SubSaturate, "sub_saturate")
......
......@@ -289,6 +289,7 @@ constexpr WasmCodePosition kNoCodePosition = -1;
V(F32x4RecipApprox, 0xe506, s_s) \
V(F32x4RecipSqrtApprox, 0xe507, s_s) \
V(F32x4Add, 0xe508, s_ss) \
V(F32x4AddHoriz, 0xe5b9, s_ss) \
V(F32x4Sub, 0xe509, s_ss) \
V(F32x4Mul, 0xe50a, s_ss) \
V(F32x4Min, 0xe50c, s_ss) \
......@@ -304,6 +305,7 @@ constexpr WasmCodePosition kNoCodePosition = -1;
V(I32x4Splat, 0xe51b, s_i) \
V(I32x4Neg, 0xe51e, s_s) \
V(I32x4Add, 0xe51f, s_ss) \
V(I32x4AddHoriz, 0xe5ba, s_ss) \
V(I32x4Sub, 0xe520, s_ss) \
V(I32x4Mul, 0xe521, s_ss) \
V(I32x4MinS, 0xe522, s_ss) \
......@@ -330,6 +332,7 @@ constexpr WasmCodePosition kNoCodePosition = -1;
V(I16x8Neg, 0xe53b, s_s) \
V(I16x8Add, 0xe53c, s_ss) \
V(I16x8AddSaturateS, 0xe53d, s_ss) \
V(I16x8AddHoriz, 0xe5bb, s_ss) \
V(I16x8Sub, 0xe53e, s_ss) \
V(I16x8SubSaturateS, 0xe53f, s_ss) \
V(I16x8Mul, 0xe540, s_ss) \
......
......@@ -1297,9 +1297,10 @@ TEST(15) {
uint32_t vabs_s8[4], vabs_s16[4], vabs_s32[4];
uint32_t vneg_s8[4], vneg_s16[4], vneg_s32[4];
uint32_t veor[4], vand[4], vorr[4];
float vdupf[4], vaddf[4], vsubf[4], vmulf[4];
float vdupf[4], vaddf[4], vpaddf[2], vsubf[4], vmulf[4];
uint32_t vmin_s8[4], vmin_u16[4], vmin_s32[4];
uint32_t vmax_s8[4], vmax_u16[4], vmax_s32[4];
uint32_t vpadd_i8[2], vpadd_i16[2], vpadd_i32[2];
uint32_t vpmin_s8[2], vpmin_u16[2], vpmin_s32[2];
uint32_t vpmax_s8[2], vpmax_u16[2], vpmax_s32[2];
uint32_t vadd8[4], vadd16[4], vadd32[4];
......@@ -1545,6 +1546,13 @@ TEST(15) {
__ vadd(q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vaddf))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vpadd (float).
__ vmov(s0, 1.0);
__ vmov(s1, 2.0);
__ vmov(s2, 3.0);
__ vmov(s3, 4.0);
__ vpadd(d2, d0, d1);
__ vstr(d2, r0, offsetof(T, vpaddf));
// vsub (float).
__ vmov(s4, 2.0);
__ vdup(q0, s4);
......@@ -1637,6 +1645,17 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmax_s32))));
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
// vpadd integer.
__ mov(r4, Operand(0x03));
__ vdup(Neon16, q0, r4);
__ vdup(Neon8, q1, r4);
__ vpadd(Neon8, d0, d0, d2);
__ vstr(d0, r0, offsetof(T, vpadd_i8));
__ vpadd(Neon16, d0, d0, d2);
__ vstr(d0, r0, offsetof(T, vpadd_i16));
__ vpadd(Neon32, d0, d0, d2);
__ vstr(d0, r0, offsetof(T, vpadd_i32));
// vpmin/vpmax integer.
__ mov(r4, Operand(0x03));
__ vdup(Neon16, q0, r4);
......@@ -2115,6 +2134,7 @@ TEST(15) {
CHECK_EQ_SPLAT(vand, 0x00fe00feu);
CHECK_EQ_SPLAT(vorr, 0x00ff00ffu);
CHECK_EQ_SPLAT(vaddf, 2.0);
CHECK_EQ_32X2(vpaddf, 3.0, 7.0);
CHECK_EQ_SPLAT(vminf, 1.0);
CHECK_EQ_SPLAT(vmaxf, 2.0);
CHECK_EQ_SPLAT(vsubf, -1.0);
......@@ -2137,6 +2157,9 @@ TEST(15) {
CHECK_EQ_SPLAT(vmin_s32, 0xffffffffu);
CHECK_EQ_SPLAT(vmax_s32, 0xffu);
// [0, 3, 0, 3, ...] and [3, 3, 3, 3, ...]
CHECK_EQ_32X2(vpadd_i8, 0x03030303u, 0x06060606u);
CHECK_EQ_32X2(vpadd_i16, 0x0c0c0606u, 0x06060606u);
CHECK_EQ_32X2(vpadd_i32, 0x12120c0cu, 0x06060606u);
CHECK_EQ_32X2(vpmin_s8, 0x00000000u, 0x03030303u);
CHECK_EQ_32X2(vpmax_s8, 0x03030303u, 0x03030303u);
// [0, ffff, 0, ffff] and [ffff, ffff]
......
......@@ -1044,6 +1044,14 @@ TEST(Neon) {
"f3142670 vmin.u16 q1, q2, q8");
COMPARE(vmax(NeonS32, q15, q0, q8),
"f260e660 vmax.s32 q15, q0, q8");
COMPARE(vpadd(d0, d1, d2),
"f3010d02 vpadd.f32 d0, d1, d2");
COMPARE(vpadd(Neon8, d0, d1, d2),
"f2010b12 vpadd.i8 d0, d1, d2");
COMPARE(vpadd(Neon16, d0, d1, d2),
"f2110b12 vpadd.i16 d0, d1, d2");
COMPARE(vpadd(Neon32, d0, d1, d2),
"f2210b12 vpadd.i32 d0, d1, d2");
COMPARE(vpmax(NeonS8, d0, d1, d2),
"f2010a02 vpmax.s8 d0, d1, d2");
COMPARE(vpmin(NeonU16, d1, d2, d8),
......
......@@ -1569,8 +1569,9 @@ WASM_SIMD_SELECT_TEST(8x16)
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_X64
#if V8_TARGET_ARCH_ARM
// Test unary ops with a lane test pattern, all lanes distinct.
template <typename T>
void RunUnaryPermuteOpTest(
void RunUnaryLaneOpTest(
WasmOpcode simd_op,
const std::array<T, kSimd128Size / sizeof(T)>& expected) {
FLAG_wasm_simd_prototype = true;
......@@ -1591,35 +1592,35 @@ void RunUnaryPermuteOpTest(
}
WASM_EXEC_COMPILED_TEST(S32x2Reverse) {
RunUnaryPermuteOpTest<int32_t>(kExprS32x2Reverse, {{1, 0, 3, 2}});
RunUnaryLaneOpTest<int32_t>(kExprS32x2Reverse, {{1, 0, 3, 2}});
}
WASM_EXEC_COMPILED_TEST(S16x4Reverse) {
RunUnaryPermuteOpTest<int16_t>(kExprS16x4Reverse, {{3, 2, 1, 0, 7, 6, 5, 4}});
RunUnaryLaneOpTest<int16_t>(kExprS16x4Reverse, {{3, 2, 1, 0, 7, 6, 5, 4}});
}
WASM_EXEC_COMPILED_TEST(S16x2Reverse) {
RunUnaryPermuteOpTest<int16_t>(kExprS16x2Reverse, {{1, 0, 3, 2, 5, 4, 7, 6}});
RunUnaryLaneOpTest<int16_t>(kExprS16x2Reverse, {{1, 0, 3, 2, 5, 4, 7, 6}});
}
WASM_EXEC_COMPILED_TEST(S8x8Reverse) {
RunUnaryPermuteOpTest<int8_t>(kExprS8x8Reverse, {{7, 6, 5, 4, 3, 2, 1, 0, 15,
14, 13, 12, 11, 10, 9, 8}});
RunUnaryLaneOpTest<int8_t>(kExprS8x8Reverse, {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
13, 12, 11, 10, 9, 8}});
}
WASM_EXEC_COMPILED_TEST(S8x4Reverse) {
RunUnaryPermuteOpTest<int8_t>(kExprS8x4Reverse, {{3, 2, 1, 0, 7, 6, 5, 4, 11,
10, 9, 8, 15, 14, 13, 12}});
RunUnaryLaneOpTest<int8_t>(kExprS8x4Reverse, {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
9, 8, 15, 14, 13, 12}});
}
WASM_EXEC_COMPILED_TEST(S8x2Reverse) {
RunUnaryPermuteOpTest<int8_t>(
kExprS8x2Reverse,
{{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}});
RunUnaryLaneOpTest<int8_t>(kExprS8x2Reverse, {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8,
11, 10, 13, 12, 15, 14}});
}
// Test binary ops with two lane test patterns, all lanes distinct.
template <typename T>
void RunBinaryPermuteOpTest(
void RunBinaryLaneOpTest(
WasmOpcode simd_op,
const std::array<T, kSimd128Size / sizeof(T)>& expected) {
FLAG_wasm_simd_prototype = true;
......@@ -1643,92 +1644,104 @@ void RunBinaryPermuteOpTest(
}
}
WASM_EXEC_COMPILED_TEST(F32x4AddHoriz) {
RunBinaryLaneOpTest<float>(kExprF32x4AddHoriz, {{1.0f, 5.0f, 9.0f, 13.0f}});
}
WASM_EXEC_COMPILED_TEST(I32x4AddHoriz) {
RunBinaryLaneOpTest<int32_t>(kExprI32x4AddHoriz, {{1, 5, 9, 13}});
}
WASM_EXEC_COMPILED_TEST(I16x8AddHoriz) {
RunBinaryLaneOpTest<int16_t>(kExprI16x8AddHoriz,
{{1, 5, 9, 13, 17, 21, 25, 29}});
}
WASM_EXEC_COMPILED_TEST(S32x4ZipLeft) {
RunBinaryPermuteOpTest<int32_t>(kExprS32x4ZipLeft, {{0, 4, 1, 5}});
RunBinaryLaneOpTest<int32_t>(kExprS32x4ZipLeft, {{0, 4, 1, 5}});
}
WASM_EXEC_COMPILED_TEST(S32x4ZipRight) {
RunBinaryPermuteOpTest<int32_t>(kExprS32x4ZipRight, {{2, 6, 3, 7}});
RunBinaryLaneOpTest<int32_t>(kExprS32x4ZipRight, {{2, 6, 3, 7}});
}
WASM_EXEC_COMPILED_TEST(S32x4UnzipLeft) {
RunBinaryPermuteOpTest<int32_t>(kExprS32x4UnzipLeft, {{0, 2, 4, 6}});
RunBinaryLaneOpTest<int32_t>(kExprS32x4UnzipLeft, {{0, 2, 4, 6}});
}
WASM_EXEC_COMPILED_TEST(S32x4UnzipRight) {
RunBinaryPermuteOpTest<int32_t>(kExprS32x4UnzipRight, {{1, 3, 5, 7}});
RunBinaryLaneOpTest<int32_t>(kExprS32x4UnzipRight, {{1, 3, 5, 7}});
}
WASM_EXEC_COMPILED_TEST(S32x4TransposeLeft) {
RunBinaryPermuteOpTest<int32_t>(kExprS32x4TransposeLeft, {{0, 4, 2, 6}});
RunBinaryLaneOpTest<int32_t>(kExprS32x4TransposeLeft, {{0, 4, 2, 6}});
}
WASM_EXEC_COMPILED_TEST(S32x4TransposeRight) {
RunBinaryPermuteOpTest<int32_t>(kExprS32x4TransposeRight, {{1, 5, 3, 7}});
RunBinaryLaneOpTest<int32_t>(kExprS32x4TransposeRight, {{1, 5, 3, 7}});
}
WASM_EXEC_COMPILED_TEST(S16x8ZipLeft) {
RunBinaryPermuteOpTest<int16_t>(kExprS16x8ZipLeft,
{{0, 8, 1, 9, 2, 10, 3, 11}});
RunBinaryLaneOpTest<int16_t>(kExprS16x8ZipLeft, {{0, 8, 1, 9, 2, 10, 3, 11}});
}
WASM_EXEC_COMPILED_TEST(S16x8ZipRight) {
RunBinaryPermuteOpTest<int16_t>(kExprS16x8ZipRight,
{{4, 12, 5, 13, 6, 14, 7, 15}});
RunBinaryLaneOpTest<int16_t>(kExprS16x8ZipRight,
{{4, 12, 5, 13, 6, 14, 7, 15}});
}
WASM_EXEC_COMPILED_TEST(S16x8UnzipLeft) {
RunBinaryPermuteOpTest<int16_t>(kExprS16x8UnzipLeft,
{{0, 2, 4, 6, 8, 10, 12, 14}});
RunBinaryLaneOpTest<int16_t>(kExprS16x8UnzipLeft,
{{0, 2, 4, 6, 8, 10, 12, 14}});
}
WASM_EXEC_COMPILED_TEST(S16x8UnzipRight) {
RunBinaryPermuteOpTest<int16_t>(kExprS16x8UnzipRight,
{{1, 3, 5, 7, 9, 11, 13, 15}});
RunBinaryLaneOpTest<int16_t>(kExprS16x8UnzipRight,
{{1, 3, 5, 7, 9, 11, 13, 15}});
}
WASM_EXEC_COMPILED_TEST(S16x8TransposeLeft) {
RunBinaryPermuteOpTest<int16_t>(kExprS16x8TransposeLeft,
{{0, 8, 2, 10, 4, 12, 6, 14}});
RunBinaryLaneOpTest<int16_t>(kExprS16x8TransposeLeft,
{{0, 8, 2, 10, 4, 12, 6, 14}});
}
WASM_EXEC_COMPILED_TEST(S16x8TransposeRight) {
RunBinaryPermuteOpTest<int16_t>(kExprS16x8TransposeRight,
{{1, 9, 3, 11, 5, 13, 7, 15}});
RunBinaryLaneOpTest<int16_t>(kExprS16x8TransposeRight,
{{1, 9, 3, 11, 5, 13, 7, 15}});
}
WASM_EXEC_COMPILED_TEST(S8x16ZipLeft) {
RunBinaryPermuteOpTest<int8_t>(
RunBinaryLaneOpTest<int8_t>(
kExprS8x16ZipLeft,
{{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}});
}
WASM_EXEC_COMPILED_TEST(S8x16ZipRight) {
RunBinaryPermuteOpTest<int8_t>(
RunBinaryLaneOpTest<int8_t>(
kExprS8x16ZipRight,
{{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}});
}
WASM_EXEC_COMPILED_TEST(S8x16UnzipLeft) {
RunBinaryPermuteOpTest<int8_t>(
RunBinaryLaneOpTest<int8_t>(
kExprS8x16UnzipLeft,
{{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}});
}
WASM_EXEC_COMPILED_TEST(S8x16UnzipRight) {
RunBinaryPermuteOpTest<int8_t>(
RunBinaryLaneOpTest<int8_t>(
kExprS8x16UnzipRight,
{{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}});
}
WASM_EXEC_COMPILED_TEST(S8x16TransposeLeft) {
RunBinaryPermuteOpTest<int8_t>(
RunBinaryLaneOpTest<int8_t>(
kExprS8x16TransposeLeft,
{{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}});
}
WASM_EXEC_COMPILED_TEST(S8x16TransposeRight) {
RunBinaryPermuteOpTest<int8_t>(
RunBinaryLaneOpTest<int8_t>(
kExprS8x16TransposeRight,
{{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}});
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment