Commit 5806d862 authored by bbudge's avatar bbudge Committed by Commit bot

[WASM SIMD] Implement primitive shuffles.

- Adds unary Reverse shuffles (swizzles): S32x2Reverse, S16x4Reverse,
  S16x2Reverse, S8x8Reverse, S8x4Reverse, S8x2Reverse. Reversals are
  done within the sub-vectors that prefix the opcode name, e.g. S8x2
  reverses the 8 consecutive pairs in an S8x16 vector.

- Adds binary Zip (interleave) left and right half-shuffles to return a
  single vector: S32x4ZipLeft, S32x4ZipRightS16x8ZipLeft, S16x8ZipRight,
  S8x16ZipLeft, S8x16ZipRight.

- Adds binary Unzip (de-interleave) left and right half shuffles to return
  a single vector: S32x4UnzipLeft, S32x4UnzipRight, S16x8UnzipLeft,
  S16x8UnzipRight, S8x16UnzipLeft, S8x16UnzipRight.

- Adds binary Transpose left and right half shuffles to return
  a single vector: S32x4TransposeLeft, S32x4TransposeRight,
  S16x8TransposeLeft, S16xTransposeRight, S8x16TransposeLeft,
  S8x16TransposeRight.

- Adds binary Concat (concatenate) byte shuffle: S8x16Concat #bytes to
  paste two vectors together.

LOG=N
BUG=v8:6020

Review-Url: https://codereview.chromium.org/2801183002
Cr-Commit-Position: refs/heads/master@{#44734}
parent c38e8865
......@@ -1171,64 +1171,6 @@ void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
VmovExtended(s_code, src_lane.code(), scratch);
}
void MacroAssembler::Swizzle(QwNeonRegister dst, QwNeonRegister src,
Register scratch, NeonSize size, uint32_t lanes) {
// TODO(bbudge) Handle Int16x8, Int8x16 vectors.
DCHECK_EQ(Neon32, size);
DCHECK_IMPLIES(size == Neon32, lanes < 0xFFFFu);
if (size == Neon32) {
switch (lanes) {
// TODO(bbudge) Handle more special cases.
case 0x3210: // Identity.
Move(dst, src);
return;
case 0x1032: // Swap top and bottom.
vext(dst, src, src, 8);
return;
case 0x2103: // Rotation.
vext(dst, src, src, 12);
return;
case 0x0321: // Rotation.
vext(dst, src, src, 4);
return;
case 0x0000: // Equivalent to vdup.
case 0x1111:
case 0x2222:
case 0x3333: {
int lane_code = src.code() * 4 + (lanes & 0xF);
if (lane_code >= SwVfpRegister::kMaxNumRegisters) {
// TODO(bbudge) use vdup (vdup.32 dst, D<src>[lane]) once implemented.
int temp_code = kScratchDoubleReg.code() * 2;
VmovExtended(temp_code, lane_code, scratch);
lane_code = temp_code;
}
vdup(dst, SwVfpRegister::from_code(lane_code));
return;
}
case 0x2301: // Swap lanes 0, 1 and lanes 2, 3.
vrev64(Neon32, dst, src);
return;
default: // Handle all other cases with vmovs.
int src_code = src.code() * 4;
int dst_code = dst.code() * 4;
bool in_place = src.is(dst);
if (in_place) {
vmov(kScratchQuadReg, src);
src_code = kScratchQuadReg.code() * 4;
}
for (int i = 0; i < 4; i++) {
int lane = (lanes >> (i * 4) & 0xF);
VmovExtended(dst_code + i, src_code + lane, scratch);
}
if (in_place) {
// Restore zero reg.
veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
}
return;
}
}
}
void MacroAssembler::LslPair(Register dst_low, Register dst_high,
Register src_low, Register src_high,
Register scratch, Register shift) {
......
......@@ -571,8 +571,6 @@ class MacroAssembler: public Assembler {
NeonDataType dt, int lane);
void ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
SwVfpRegister src_lane, Register scratch, int lane);
void Swizzle(QwNeonRegister dst, QwNeonRegister src, Register scratch,
NeonSize size, uint32_t lanes);
void LslPair(Register dst_low, Register dst_high, Register src_low,
Register src_high, Register scratch, Register shift);
......
......@@ -2156,6 +2156,197 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(2));
break;
}
case kArmS32x4ZipLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
__ vmov(dst.high(), src1.low()); // dst = [0, 1, 4, 5]
__ vtrn(Neon32, dst.low(), dst.high()); // dst = [0, 4, 1, 5]
break;
}
case kArmS32x4ZipRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from ZipLeft).
__ vmov(dst.low(), src1.high()); // dst = [2, 3, 6, 7]
__ vtrn(Neon32, dst.low(), dst.high()); // dst = [2, 6, 3, 7]
break;
}
case kArmS32x4UnzipLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
__ vmov(kScratchQuadReg, src1);
__ vuzp(Neon32, dst, kScratchQuadReg); // dst = [0, 2, 4, 6]
break;
}
case kArmS32x4UnzipRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft).
__ vmov(kScratchQuadReg, src1);
__ vuzp(Neon32, kScratchQuadReg, dst); // dst = [1, 3, 5, 7]
break;
}
case kArmS32x4TransposeLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
__ vmov(kScratchQuadReg, src1);
__ vtrn(Neon32, dst, kScratchQuadReg); // dst = [0, 4, 2, 6]
break;
}
case kArmS32x4TransposeRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft).
__ vmov(kScratchQuadReg, src1);
__ vtrn(Neon32, kScratchQuadReg, dst); // dst = [1, 5, 3, 7]
break;
}
case kArmS16x8ZipLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
// src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
DCHECK(dst.is(i.InputSimd128Register(0)));
__ vmov(dst.high(), src1.low()); // dst = [0, 1, 2, 3, 8, ... 11]
__ vzip(Neon16, dst.low(), dst.high()); // dst = [0, 8, 1, 9, ... 11]
break;
}
case kArmS16x8ZipRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
__ vmov(dst.low(), src1.high());
__ vzip(Neon16, dst.low(), dst.high()); // dst = [4, 12, 5, 13, ... 15]
break;
}
case kArmS16x8UnzipLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
__ vmov(kScratchQuadReg, src1);
__ vuzp(Neon16, dst, kScratchQuadReg); // dst = [0, 2, 4, 6, ... 14]
break;
}
case kArmS16x8UnzipRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
__ vmov(kScratchQuadReg, src1);
__ vuzp(Neon16, kScratchQuadReg, dst); // dst = [1, 3, 5, 7, ... 15]
break;
}
case kArmS16x8TransposeLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
__ vmov(kScratchQuadReg, src1);
__ vtrn(Neon16, dst, kScratchQuadReg); // dst = [0, 8, 2, 10, ... 14]
break;
}
case kArmS16x8TransposeRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
__ vmov(kScratchQuadReg, src1);
__ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15]
break;
}
case kArmS8x16ZipLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
__ vmov(dst.high(), src1.low());
__ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23]
break;
}
case kArmS8x16ZipRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
__ vmov(dst.low(), src1.high());
__ vzip(Neon8, dst.low(), dst.high()); // dst = [8, 24, 9, 25, ... 31]
break;
}
case kArmS8x16UnzipLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
__ vmov(kScratchQuadReg, src1);
__ vuzp(Neon8, dst, kScratchQuadReg); // dst = [0, 2, 4, 6, ... 30]
break;
}
case kArmS8x16UnzipRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
__ vmov(kScratchQuadReg, src1);
__ vuzp(Neon8, kScratchQuadReg, dst); // dst = [1, 3, 5, 7, ... 31]
break;
}
case kArmS8x16TransposeLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
__ vmov(kScratchQuadReg, src1);
__ vtrn(Neon8, dst, kScratchQuadReg); // dst = [0, 16, 2, 18, ... 30]
break;
}
case kArmS8x16TransposeRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst.is(i.InputSimd128Register(0)));
// src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
__ vmov(kScratchQuadReg, src1);
__ vtrn(Neon8, kScratchQuadReg, dst); // dst = [1, 17, 3, 19, ... 31]
break;
}
case kArmS8x16Concat: {
__ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), i.InputInt4(2));
break;
}
case kArmS32x2Reverse: {
__ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmS16x4Reverse: {
__ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmS16x2Reverse: {
__ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmS8x8Reverse: {
__ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmS8x4Reverse: {
__ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmS8x2Reverse: {
__ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmS1x4AnyTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
__ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high());
......
......@@ -232,6 +232,31 @@ namespace compiler {
V(ArmS128Xor) \
V(ArmS128Not) \
V(ArmS128Select) \
V(ArmS32x4ZipLeft) \
V(ArmS32x4ZipRight) \
V(ArmS32x4UnzipLeft) \
V(ArmS32x4UnzipRight) \
V(ArmS32x4TransposeLeft) \
V(ArmS32x4TransposeRight) \
V(ArmS16x8ZipLeft) \
V(ArmS16x8ZipRight) \
V(ArmS16x8UnzipLeft) \
V(ArmS16x8UnzipRight) \
V(ArmS16x8TransposeLeft) \
V(ArmS16x8TransposeRight) \
V(ArmS8x16ZipLeft) \
V(ArmS8x16ZipRight) \
V(ArmS8x16UnzipLeft) \
V(ArmS8x16UnzipRight) \
V(ArmS8x16TransposeLeft) \
V(ArmS8x16TransposeRight) \
V(ArmS8x16Concat) \
V(ArmS32x2Reverse) \
V(ArmS16x4Reverse) \
V(ArmS16x2Reverse) \
V(ArmS8x8Reverse) \
V(ArmS8x4Reverse) \
V(ArmS8x2Reverse) \
V(ArmS1x4AnyTrue) \
V(ArmS1x4AllTrue) \
V(ArmS1x8AnyTrue) \
......
......@@ -216,6 +216,31 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmS128Xor:
case kArmS128Not:
case kArmS128Select:
case kArmS32x4ZipLeft:
case kArmS32x4ZipRight:
case kArmS32x4UnzipLeft:
case kArmS32x4UnzipRight:
case kArmS32x4TransposeLeft:
case kArmS32x4TransposeRight:
case kArmS16x8ZipLeft:
case kArmS16x8ZipRight:
case kArmS16x8UnzipLeft:
case kArmS16x8UnzipRight:
case kArmS16x8TransposeLeft:
case kArmS16x8TransposeRight:
case kArmS8x16ZipLeft:
case kArmS8x16ZipRight:
case kArmS8x16UnzipLeft:
case kArmS8x16UnzipRight:
case kArmS8x16TransposeLeft:
case kArmS8x16TransposeRight:
case kArmS8x16Concat:
case kArmS32x2Reverse:
case kArmS16x4Reverse:
case kArmS16x2Reverse:
case kArmS8x8Reverse:
case kArmS8x4Reverse:
case kArmS8x2Reverse:
case kArmS1x4AnyTrue:
case kArmS1x4AllTrue:
case kArmS1x8AnyTrue:
......
......@@ -91,6 +91,27 @@ void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
g.UseRegister(node->InputAt(1)));
}
void VisitRRRShuffle(InstructionSelector* selector, ArchOpcode opcode,
Node* node) {
ArmOperandGenerator g(selector);
// Swap inputs to save an instruction in the CodeGenerator for High ops.
if (opcode == kArmS32x4ZipRight || opcode == kArmS32x4UnzipRight ||
opcode == kArmS32x4TransposeRight || opcode == kArmS16x8ZipRight ||
opcode == kArmS16x8UnzipRight || opcode == kArmS16x8TransposeRight ||
opcode == kArmS8x16ZipRight || opcode == kArmS8x16UnzipRight ||
opcode == kArmS8x16TransposeRight) {
Node* in0 = node->InputAt(0);
Node* in1 = node->InputAt(1);
node->ReplaceInput(0, in1);
node->ReplaceInput(1, in0);
}
// Use DefineSameAsFirst for binary ops that clobber their inputs, e.g. the
// NEON vzip, vuzp, and vtrn instructions.
selector->Emit(opcode, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)));
}
void VisitRRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
ArmOperandGenerator g(selector);
// Use DefineSameAsFirst for ternary ops that clobber their first input,
......@@ -2393,6 +2414,12 @@ VISIT_ATOMIC_BINOP(Xor)
V(I16x8UConvertI8x16High, kArmI16x8UConvertI8x16High) \
V(I8x16Neg, kArmI8x16Neg) \
V(S128Not, kArmS128Not) \
V(S32x2Reverse, kArmS32x2Reverse) \
V(S16x4Reverse, kArmS16x4Reverse) \
V(S16x2Reverse, kArmS16x2Reverse) \
V(S8x8Reverse, kArmS8x8Reverse) \
V(S8x4Reverse, kArmS8x4Reverse) \
V(S8x2Reverse, kArmS8x2Reverse) \
V(S1x4Not, kArmS128Not) \
V(S1x4AnyTrue, kArmS1x4AnyTrue) \
V(S1x4AllTrue, kArmS1x4AllTrue) \
......@@ -2490,6 +2517,26 @@ VISIT_ATOMIC_BINOP(Xor)
V(S1x16Or, kArmS128Or) \
V(S1x16Xor, kArmS128Xor)
#define SIMD_SHUFFLE_OP_LIST(V) \
V(S32x4ZipLeft) \
V(S32x4ZipRight) \
V(S32x4UnzipLeft) \
V(S32x4UnzipRight) \
V(S32x4TransposeLeft) \
V(S32x4TransposeRight) \
V(S16x8ZipLeft) \
V(S16x8ZipRight) \
V(S16x8UnzipLeft) \
V(S16x8UnzipRight) \
V(S16x8TransposeLeft) \
V(S16x8TransposeRight) \
V(S8x16ZipLeft) \
V(S8x16ZipRight) \
V(S8x16UnzipLeft) \
V(S8x16UnzipRight) \
V(S8x16TransposeLeft) \
V(S8x16TransposeRight)
#define SIMD_VISIT_SPLAT(Type) \
void InstructionSelector::Visit##Type##Splat(Node* node) { \
VisitRR(this, kArm##Type##Splat, node); \
......@@ -2547,6 +2594,21 @@ SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
SIMD_FORMAT_LIST(SIMD_VISIT_SELECT_OP)
#undef SIMD_VISIT_SELECT_OP
#define SIMD_VISIT_SHUFFLE_OP(Name) \
void InstructionSelector::Visit##Name(Node* node) { \
VisitRRRShuffle(this, kArm##Name, node); \
}
SIMD_SHUFFLE_OP_LIST(SIMD_VISIT_SHUFFLE_OP)
#undef SIMD_VISIT_SHUFFLE_OP
void InstructionSelector::VisitS8x16Concat(Node* node) {
ArmOperandGenerator g(this);
int32_t imm = OpParameter<int32_t>(node);
Emit(kArmS8x16Concat, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
g.UseImmediate(imm));
}
void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
UNREACHABLE();
}
......
......@@ -1703,12 +1703,62 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitS128Xor(node);
case IrOpcode::kS128Not:
return MarkAsSimd128(node), VisitS128Not(node);
case IrOpcode::kS32x4ZipLeft:
return MarkAsSimd128(node), VisitS32x4ZipLeft(node);
case IrOpcode::kS32x4ZipRight:
return MarkAsSimd128(node), VisitS32x4ZipRight(node);
case IrOpcode::kS32x4UnzipLeft:
return MarkAsSimd128(node), VisitS32x4UnzipLeft(node);
case IrOpcode::kS32x4UnzipRight:
return MarkAsSimd128(node), VisitS32x4UnzipRight(node);
case IrOpcode::kS32x4TransposeLeft:
return MarkAsSimd128(node), VisitS32x4TransposeLeft(node);
case IrOpcode::kS32x4TransposeRight:
return MarkAsSimd128(node), VisitS32x4TransposeRight(node);
case IrOpcode::kS32x4Select:
return MarkAsSimd128(node), VisitS32x4Select(node);
case IrOpcode::kS16x8ZipLeft:
return MarkAsSimd128(node), VisitS16x8ZipLeft(node);
case IrOpcode::kS16x8ZipRight:
return MarkAsSimd128(node), VisitS16x8ZipRight(node);
case IrOpcode::kS16x8UnzipLeft:
return MarkAsSimd128(node), VisitS16x8UnzipLeft(node);
case IrOpcode::kS16x8UnzipRight:
return MarkAsSimd128(node), VisitS16x8UnzipRight(node);
case IrOpcode::kS16x8TransposeLeft:
return MarkAsSimd128(node), VisitS16x8TransposeLeft(node);
case IrOpcode::kS16x8TransposeRight:
return MarkAsSimd128(node), VisitS16x8TransposeRight(node);
case IrOpcode::kS16x8Select:
return MarkAsSimd128(node), VisitS16x8Select(node);
case IrOpcode::kS8x16ZipLeft:
return MarkAsSimd128(node), VisitS8x16ZipLeft(node);
case IrOpcode::kS8x16ZipRight:
return MarkAsSimd128(node), VisitS8x16ZipRight(node);
case IrOpcode::kS8x16UnzipLeft:
return MarkAsSimd128(node), VisitS8x16UnzipLeft(node);
case IrOpcode::kS8x16UnzipRight:
return MarkAsSimd128(node), VisitS8x16UnzipRight(node);
case IrOpcode::kS8x16TransposeLeft:
return MarkAsSimd128(node), VisitS8x16TransposeLeft(node);
case IrOpcode::kS8x16TransposeRight:
return MarkAsSimd128(node), VisitS8x16TransposeRight(node);
case IrOpcode::kS8x16Select:
return MarkAsSimd128(node), VisitS8x16Select(node);
case IrOpcode::kS8x16Concat:
return MarkAsSimd128(node), VisitS8x16Concat(node);
case IrOpcode::kS32x2Reverse:
return MarkAsSimd128(node), VisitS32x2Reverse(node);
case IrOpcode::kS16x4Reverse:
return MarkAsSimd128(node), VisitS16x4Reverse(node);
case IrOpcode::kS16x2Reverse:
return MarkAsSimd128(node), VisitS16x2Reverse(node);
case IrOpcode::kS8x8Reverse:
return MarkAsSimd128(node), VisitS8x8Reverse(node);
case IrOpcode::kS8x4Reverse:
return MarkAsSimd128(node), VisitS8x4Reverse(node);
case IrOpcode::kS8x2Reverse:
return MarkAsSimd128(node), VisitS8x2Reverse(node);
case IrOpcode::kS1x4Zero:
return MarkAsSimd1x4(node), VisitS1x4Zero(node);
case IrOpcode::kS1x4And:
......@@ -2391,13 +2441,81 @@ void InstructionSelector::VisitS32x4Select(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS &&
// !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitS32x4ZipLeft(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS32x4ZipRight(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS32x4UnzipLeft(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS32x4UnzipRight(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS32x4TransposeLeft(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitS32x4TransposeRight(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitS16x8ZipLeft(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS16x8ZipRight(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS16x8UnzipLeft(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS16x8UnzipRight(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS16x8TransposeLeft(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitS16x8TransposeRight(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitS16x8Select(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitS8x16ZipLeft(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x16ZipRight(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x16UnzipLeft(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x16UnzipRight(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x16TransposeLeft(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitS8x16TransposeRight(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitS8x16Select(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitS8x16Concat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS32x2Reverse(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS16x4Reverse(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS16x2Reverse(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x8Reverse(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x4Reverse(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x2Reverse(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS1x4And(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS1x4Or(Node* node) { UNIMPLEMENTED(); }
......
......@@ -320,9 +320,33 @@ MachineType AtomicOpRepresentationOf(Operator const* op) {
V(S128Or, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(S128Xor, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(S128Not, Operator::kNoProperties, 1, 0, 1) \
V(S32x4ZipLeft, Operator::kNoProperties, 2, 0, 1) \
V(S32x4ZipRight, Operator::kNoProperties, 2, 0, 1) \
V(S32x4UnzipLeft, Operator::kNoProperties, 2, 0, 1) \
V(S32x4UnzipRight, Operator::kNoProperties, 2, 0, 1) \
V(S32x4TransposeLeft, Operator::kNoProperties, 2, 0, 1) \
V(S32x4TransposeRight, Operator::kNoProperties, 2, 0, 1) \
V(S32x4Select, Operator::kNoProperties, 3, 0, 1) \
V(S16x8ZipLeft, Operator::kNoProperties, 2, 0, 1) \
V(S16x8ZipRight, Operator::kNoProperties, 2, 0, 1) \
V(S16x8UnzipLeft, Operator::kNoProperties, 2, 0, 1) \
V(S16x8UnzipRight, Operator::kNoProperties, 2, 0, 1) \
V(S16x8TransposeLeft, Operator::kNoProperties, 2, 0, 1) \
V(S16x8TransposeRight, Operator::kNoProperties, 2, 0, 1) \
V(S16x8Select, Operator::kNoProperties, 3, 0, 1) \
V(S8x16ZipLeft, Operator::kNoProperties, 2, 0, 1) \
V(S8x16ZipRight, Operator::kNoProperties, 2, 0, 1) \
V(S8x16UnzipLeft, Operator::kNoProperties, 2, 0, 1) \
V(S8x16UnzipRight, Operator::kNoProperties, 2, 0, 1) \
V(S8x16TransposeLeft, Operator::kNoProperties, 2, 0, 1) \
V(S8x16TransposeRight, Operator::kNoProperties, 2, 0, 1) \
V(S8x16Select, Operator::kNoProperties, 3, 0, 1) \
V(S32x2Reverse, Operator::kNoProperties, 1, 0, 1) \
V(S16x4Reverse, Operator::kNoProperties, 1, 0, 1) \
V(S16x2Reverse, Operator::kNoProperties, 1, 0, 1) \
V(S8x8Reverse, Operator::kNoProperties, 1, 0, 1) \
V(S8x4Reverse, Operator::kNoProperties, 1, 0, 1) \
V(S8x2Reverse, Operator::kNoProperties, 1, 0, 1) \
V(S1x4Zero, Operator::kNoProperties, 0, 0, 1) \
V(S1x4And, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(S1x4Or, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
......@@ -1007,16 +1031,11 @@ SIMD_LANE_OP_LIST(SIMD_LANE_OPS)
SIMD_FORMAT_LIST(SIMD_SHIFT_OPS)
#undef SIMD_SHIFT_OPS
// TODO(bbudge) Add Shuffle, DCHECKs based on format.
#define SIMD_PERMUTE_OPS(format, bits) \
const Operator* MachineOperatorBuilder::S##format##Swizzle( \
uint32_t swizzle) { \
return new (zone_) \
Operator1<uint32_t>(IrOpcode::kS##format##Swizzle, Operator::kPure, \
"Swizzle", 2, 0, 0, 1, 0, 0, swizzle); \
}
SIMD_FORMAT_LIST(SIMD_PERMUTE_OPS)
#undef SIMD_PERMUTE_OPS
const Operator* MachineOperatorBuilder::S8x16Concat(int32_t bytes) {
DCHECK(0 <= bytes && bytes < kSimd128Size);
return new (zone_) Operator1<int32_t>(IrOpcode::kS8x16Concat, Operator::kPure,
"Concat", 2, 0, 0, 1, 0, 0, bytes);
}
} // namespace compiler
} // namespace internal
......
......@@ -556,15 +556,35 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* S128Xor();
const Operator* S128Not();
const Operator* S32x4ZipLeft();
const Operator* S32x4ZipRight();
const Operator* S32x4UnzipLeft();
const Operator* S32x4UnzipRight();
const Operator* S32x4TransposeLeft();
const Operator* S32x4TransposeRight();
const Operator* S32x4Select();
const Operator* S32x4Swizzle(uint32_t);
const Operator* S32x4Shuffle();
const Operator* S16x8ZipLeft();
const Operator* S16x8ZipRight();
const Operator* S16x8UnzipLeft();
const Operator* S16x8UnzipRight();
const Operator* S16x8TransposeLeft();
const Operator* S16x8TransposeRight();
const Operator* S16x8Select();
const Operator* S16x8Swizzle(uint32_t);
const Operator* S16x8Shuffle();
const Operator* S8x16ZipLeft();
const Operator* S8x16ZipRight();
const Operator* S8x16UnzipLeft();
const Operator* S8x16UnzipRight();
const Operator* S8x16TransposeLeft();
const Operator* S8x16TransposeRight();
const Operator* S8x16Select();
const Operator* S8x16Swizzle(uint32_t);
const Operator* S8x16Shuffle();
const Operator* S8x16Concat(int32_t);
const Operator* S32x2Reverse();
const Operator* S16x4Reverse();
const Operator* S16x2Reverse();
const Operator* S8x8Reverse();
const Operator* S8x4Reverse();
const Operator* S8x2Reverse();
const Operator* S1x4Zero();
const Operator* S1x4And();
......
......@@ -691,19 +691,38 @@
V(S128Load) \
V(S128Store) \
V(S128Zero) \
V(S128Not) \
V(S128And) \
V(S128Or) \
V(S128Xor) \
V(S128Not) \
V(S32x4ZipLeft) \
V(S32x4ZipRight) \
V(S32x4UnzipLeft) \
V(S32x4UnzipRight) \
V(S32x4TransposeLeft) \
V(S32x4TransposeRight) \
V(S32x4Select) \
V(S32x4Swizzle) \
V(S32x4Shuffle) \
V(S16x8ZipLeft) \
V(S16x8ZipRight) \
V(S16x8UnzipLeft) \
V(S16x8UnzipRight) \
V(S16x8TransposeLeft) \
V(S16x8TransposeRight) \
V(S16x8Select) \
V(S16x8Swizzle) \
V(S16x8Shuffle) \
V(S8x16ZipLeft) \
V(S8x16ZipRight) \
V(S8x16UnzipLeft) \
V(S8x16UnzipRight) \
V(S8x16TransposeLeft) \
V(S8x16TransposeRight) \
V(S8x16Select) \
V(S8x16Swizzle) \
V(S8x16Shuffle) \
V(S8x16Concat) \
V(S32x2Reverse) \
V(S16x4Reverse) \
V(S16x2Reverse) \
V(S8x8Reverse) \
V(S8x4Reverse) \
V(S8x2Reverse) \
V(S1x4Zero) \
V(S1x4And) \
V(S1x4Or) \
......
......@@ -3480,15 +3480,81 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode,
inputs[1]);
case wasm::kExprS128Not:
return graph()->NewNode(jsgraph()->machine()->S128Not(), inputs[0]);
case wasm::kExprS32x4ZipLeft:
return graph()->NewNode(jsgraph()->machine()->S32x4ZipLeft(), inputs[0],
inputs[1]);
case wasm::kExprS32x4ZipRight:
return graph()->NewNode(jsgraph()->machine()->S32x4ZipRight(), inputs[0],
inputs[1]);
case wasm::kExprS32x4UnzipLeft:
return graph()->NewNode(jsgraph()->machine()->S32x4UnzipLeft(), inputs[0],
inputs[1]);
case wasm::kExprS32x4UnzipRight:
return graph()->NewNode(jsgraph()->machine()->S32x4UnzipRight(),
inputs[0], inputs[1]);
case wasm::kExprS32x4TransposeLeft:
return graph()->NewNode(jsgraph()->machine()->S32x4TransposeLeft(),
inputs[0], inputs[1]);
case wasm::kExprS32x4TransposeRight:
return graph()->NewNode(jsgraph()->machine()->S32x4TransposeRight(),
inputs[0], inputs[1]);
case wasm::kExprS32x4Select:
return graph()->NewNode(jsgraph()->machine()->S32x4Select(), inputs[0],
inputs[1], inputs[2]);
case wasm::kExprS16x8ZipLeft:
return graph()->NewNode(jsgraph()->machine()->S16x8ZipLeft(), inputs[0],
inputs[1]);
case wasm::kExprS16x8ZipRight:
return graph()->NewNode(jsgraph()->machine()->S16x8ZipRight(), inputs[0],
inputs[1]);
case wasm::kExprS16x8UnzipLeft:
return graph()->NewNode(jsgraph()->machine()->S16x8UnzipLeft(), inputs[0],
inputs[1]);
case wasm::kExprS16x8UnzipRight:
return graph()->NewNode(jsgraph()->machine()->S16x8UnzipRight(),
inputs[0], inputs[1]);
case wasm::kExprS16x8TransposeLeft:
return graph()->NewNode(jsgraph()->machine()->S16x8TransposeLeft(),
inputs[0], inputs[1]);
case wasm::kExprS16x8TransposeRight:
return graph()->NewNode(jsgraph()->machine()->S16x8TransposeRight(),
inputs[0], inputs[1]);
case wasm::kExprS16x8Select:
return graph()->NewNode(jsgraph()->machine()->S16x8Select(), inputs[0],
inputs[1], inputs[2]);
case wasm::kExprS8x16ZipLeft:
return graph()->NewNode(jsgraph()->machine()->S8x16ZipLeft(), inputs[0],
inputs[1]);
case wasm::kExprS8x16ZipRight:
return graph()->NewNode(jsgraph()->machine()->S8x16ZipRight(), inputs[0],
inputs[1]);
case wasm::kExprS8x16UnzipLeft:
return graph()->NewNode(jsgraph()->machine()->S8x16UnzipLeft(), inputs[0],
inputs[1]);
case wasm::kExprS8x16UnzipRight:
return graph()->NewNode(jsgraph()->machine()->S8x16UnzipRight(),
inputs[0], inputs[1]);
case wasm::kExprS8x16TransposeLeft:
return graph()->NewNode(jsgraph()->machine()->S8x16TransposeLeft(),
inputs[0], inputs[1]);
case wasm::kExprS8x16TransposeRight:
return graph()->NewNode(jsgraph()->machine()->S8x16TransposeRight(),
inputs[0], inputs[1]);
case wasm::kExprS8x16Select:
return graph()->NewNode(jsgraph()->machine()->S8x16Select(), inputs[0],
inputs[1], inputs[2]);
case wasm::kExprS32x2Reverse:
return graph()->NewNode(jsgraph()->machine()->S32x2Reverse(), inputs[0]);
case wasm::kExprS16x4Reverse:
return graph()->NewNode(jsgraph()->machine()->S16x4Reverse(), inputs[0]);
case wasm::kExprS16x2Reverse:
return graph()->NewNode(jsgraph()->machine()->S16x2Reverse(), inputs[0]);
case wasm::kExprS8x8Reverse:
return graph()->NewNode(jsgraph()->machine()->S8x8Reverse(), inputs[0]);
case wasm::kExprS8x4Reverse:
return graph()->NewNode(jsgraph()->machine()->S8x4Reverse(), inputs[0]);
case wasm::kExprS8x2Reverse:
return graph()->NewNode(jsgraph()->machine()->S8x2Reverse(), inputs[0]);
case wasm::kExprS1x4And:
return graph()->NewNode(jsgraph()->machine()->S1x4And(), inputs[0],
inputs[1]);
......@@ -3605,22 +3671,10 @@ Node* WasmGraphBuilder::SimdShiftOp(wasm::WasmOpcode opcode, uint8_t shift,
}
}
Node* WasmGraphBuilder::SimdSwizzleOp(wasm::WasmOpcode opcode, uint32_t swizzle,
const NodeVector& inputs) {
Node* WasmGraphBuilder::SimdConcatOp(uint8_t bytes, const NodeVector& inputs) {
has_simd_ = true;
switch (opcode) {
case wasm::kExprS32x4Swizzle:
return graph()->NewNode(jsgraph()->machine()->S32x4Swizzle(swizzle),
inputs[0]);
case wasm::kExprS16x8Swizzle:
return graph()->NewNode(jsgraph()->machine()->S16x8Swizzle(swizzle),
inputs[0]);
case wasm::kExprS8x16Swizzle:
return graph()->NewNode(jsgraph()->machine()->S8x16Swizzle(swizzle),
inputs[0]);
default:
return graph()->NewNode(UnsupportedOpcode(opcode), nullptr);
}
return graph()->NewNode(jsgraph()->machine()->S8x16Concat(bytes), inputs[0],
inputs[1]);
}
static void RecordFunctionCompilation(CodeEventListener::LogEventsAndTags tag,
......
......@@ -252,8 +252,7 @@ class WasmGraphBuilder {
Node* SimdShiftOp(wasm::WasmOpcode opcode, uint8_t shift,
const NodeVector& inputs);
Node* SimdSwizzleOp(wasm::WasmOpcode opcode, uint32_t swizzle,
const NodeVector& inputs);
Node* SimdConcatOp(uint8_t bytes, const NodeVector& inputs);
bool has_simd() const { return has_simd_; }
......
......@@ -322,6 +322,18 @@ struct SimdShiftOperand {
}
};
// Operand for SIMD concatenation operations.
template <bool checked>
struct SimdConcatOperand {
uint8_t bytes;
unsigned length;
inline SimdConcatOperand(Decoder* decoder, const byte* pc) {
bytes = decoder->read_u8<checked>(pc + 2, "bytes");
length = 1;
}
};
#undef CHECKED_COND
} // namespace wasm
......
......@@ -411,6 +411,17 @@ class WasmDecoder : public Decoder {
}
}
inline bool Validate(const byte* pc, WasmOpcode opcode,
SimdConcatOperand<true>& operand) {
DCHECK_EQ(wasm::kExprS8x16Concat, opcode);
if (operand.bytes <= 0 || operand.bytes >= kSimd128Size) {
error(pc_ + 2, "invalid byte amount");
return false;
} else {
return true;
}
}
static unsigned OpcodeLength(Decoder* decoder, const byte* pc) {
switch (static_cast<byte>(*pc)) {
#define DECLARE_OPCODE_CASE(name, opcode, sig) case kExpr##name:
......@@ -1475,6 +1486,19 @@ class WasmFullDecoder : public WasmDecoder {
return operand.length;
}
unsigned SimdConcatOp(WasmOpcode opcode) {
DCHECK_EQ(wasm::kExprS8x16Concat, opcode);
SimdConcatOperand<true> operand(this, pc_);
if (Validate(pc_, opcode, operand)) {
compiler::NodeVector inputs(2, zone_);
inputs[1] = Pop(1, ValueType::kSimd128).node;
inputs[0] = Pop(0, ValueType::kSimd128).node;
TFNode* node = BUILD(SimdConcatOp, operand.bytes, inputs);
Push(ValueType::kSimd128, node);
}
return operand.length;
}
unsigned DecodeSimdOpcode(WasmOpcode opcode) {
unsigned len = 0;
switch (opcode) {
......@@ -1510,6 +1534,10 @@ class WasmFullDecoder : public WasmDecoder {
len = SimdShiftOp(opcode);
break;
}
case kExprS8x16Concat: {
len = SimdConcatOp(opcode);
break;
}
default: {
FunctionSig* sig = WasmOpcodes::Signature(opcode);
if (sig != nullptr) {
......
......@@ -217,15 +217,34 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_S128_OP(Or, "or")
CASE_S128_OP(Xor, "xor")
CASE_S128_OP(Not, "not")
CASE_S32x4_OP(ZipLeft, "zip left")
CASE_S32x4_OP(ZipRight, "zip right")
CASE_S32x4_OP(UnzipLeft, "unzip left")
CASE_S32x4_OP(UnzipRight, "unzip right")
CASE_S32x4_OP(TransposeLeft, "transpose left")
CASE_S32x4_OP(TransposeRight, "transpose right")
CASE_S32x4_OP(Select, "select")
CASE_S32x4_OP(Swizzle, "swizzle")
CASE_S32x4_OP(Shuffle, "shuffle")
CASE_S16x8_OP(ZipLeft, "zip left")
CASE_S16x8_OP(ZipRight, "zip right")
CASE_S16x8_OP(UnzipLeft, "unzip left")
CASE_S16x8_OP(UnzipRight, "unzip right")
CASE_S16x8_OP(TransposeLeft, "transpose left")
CASE_S16x8_OP(TransposeRight, "transpose right")
CASE_S16x8_OP(Select, "select")
CASE_S16x8_OP(Swizzle, "swizzle")
CASE_S16x8_OP(Shuffle, "shuffle")
CASE_S8x16_OP(ZipLeft, "zip left")
CASE_S8x16_OP(ZipRight, "zip right")
CASE_S8x16_OP(UnzipLeft, "unzip left")
CASE_S8x16_OP(UnzipRight, "unzip right")
CASE_S8x16_OP(TransposeLeft, "transpose left")
CASE_S8x16_OP(TransposeRight, "transpose right")
CASE_S8x16_OP(Select, "select")
CASE_S8x16_OP(Swizzle, "swizzle")
CASE_S8x16_OP(Shuffle, "shuffle")
CASE_S8x16_OP(Concat, "concat")
CASE_OP(S32x2Reverse, "32x2 reverse")
CASE_OP(S16x4Reverse, "16x4 reverse")
CASE_OP(S16x2Reverse, "16x2 reverse")
CASE_OP(S8x8Reverse, "8x8 reverse")
CASE_OP(S8x4Reverse, "8x4 reverse")
CASE_OP(S8x2Reverse, "8x2 reverse")
CASE_S1x4_OP(And, "and")
CASE_S1x4_OP(Or, "or")
CASE_S1x4_OP(Xor, "xor")
......
......@@ -390,15 +390,33 @@ constexpr WasmCodePosition kNoCodePosition = -1;
V(S128Or, 0xe577, s_ss) \
V(S128Xor, 0xe578, s_ss) \
V(S128Not, 0xe579, s_s) \
V(S32x4ZipLeft, 0xe5a0, s_ss) \
V(S32x4ZipRight, 0xe5a1, s_ss) \
V(S32x4UnzipLeft, 0xe5a2, s_ss) \
V(S32x4UnzipRight, 0xe5a3, s_ss) \
V(S32x4TransposeLeft, 0xe5a4, s_ss) \
V(S32x4TransposeRight, 0xe5a5, s_ss) \
V(S32x4Select, 0xe52c, s_s1x4ss) \
V(S32x4Swizzle, 0xe52d, s_s) \
V(S32x4Shuffle, 0xe52e, s_ss) \
V(S16x8ZipLeft, 0xe5a6, s_ss) \
V(S16x8ZipRight, 0xe5a7, s_ss) \
V(S16x8UnzipLeft, 0xe5a8, s_ss) \
V(S16x8UnzipRight, 0xe5a9, s_ss) \
V(S16x8TransposeLeft, 0xe5aa, s_ss) \
V(S16x8TransposeRight, 0xe5ab, s_ss) \
V(S16x8Select, 0xe54b, s_s1x8ss) \
V(S16x8Swizzle, 0xe54c, s_s) \
V(S16x8Shuffle, 0xe54d, s_ss) \
V(S8x16ZipLeft, 0xe5ac, s_ss) \
V(S8x16ZipRight, 0xe5ad, s_ss) \
V(S8x16UnzipLeft, 0xe5ae, s_ss) \
V(S8x16UnzipRight, 0xe5af, s_ss) \
V(S8x16TransposeLeft, 0xe5b0, s_ss) \
V(S8x16TransposeRight, 0xe5b1, s_ss) \
V(S8x16Select, 0xe56a, s_s1x16ss) \
V(S8x16Swizzle, 0xe56b, s_s) \
V(S8x16Shuffle, 0xe56c, s_ss) \
V(S32x2Reverse, 0xe5b2, s_s) \
V(S16x4Reverse, 0xe5b3, s_s) \
V(S16x2Reverse, 0xe5b4, s_s) \
V(S8x8Reverse, 0xe5b5, s_s) \
V(S8x4Reverse, 0xe5b6, s_s) \
V(S8x2Reverse, 0xe5b7, s_s) \
V(S1x4And, 0xe580, s1x4_s1x4s1x4) \
V(S1x4Or, 0xe581, s1x4_s1x4s1x4) \
V(S1x4Xor, 0xe582, s1x4_s1x4s1x4) \
......@@ -435,7 +453,8 @@ constexpr WasmCodePosition kNoCodePosition = -1;
V(I8x16ReplaceLane, 0xe559, _) \
V(I8x16Shl, 0xe562, _) \
V(I8x16ShrS, 0xe563, _) \
V(I8x16ShrU, 0xe571, _)
V(I8x16ShrU, 0xe571, _) \
V(S8x16Concat, 0xe5b8, _)
#define FOREACH_ATOMIC_OPCODE(V) \
V(I32AtomicAdd8S, 0xe601, i_ii) \
......
......@@ -405,115 +405,4 @@ TEST(ReplaceLane) {
}
}
#define CHECK_EQ_32X4(field, v0, v1, v2, v3) \
CHECK_EQ(v0, t.field[0]); \
CHECK_EQ(v1, t.field[1]); \
CHECK_EQ(v2, t.field[2]); \
CHECK_EQ(v3, t.field[3]);
TEST(Swizzle) {
if (!CpuFeatures::IsSupported(NEON)) return;
// Allocate an executable page of memory.
size_t actual_size;
byte* buffer = static_cast<byte*>(v8::base::OS::Allocate(
Assembler::kMinimalBufferSize, &actual_size, true));
CHECK(buffer);
Isolate* isolate = CcTest::i_isolate();
HandleScope handles(isolate);
MacroAssembler assembler(isolate, buffer, static_cast<int>(actual_size),
v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler; // Create a pointer for the __ macro.
typedef struct {
int32_t _32x4_3210[4]; // identity
int32_t _32x4_1032[4]; // high / low swap
int32_t _32x4_0000[4]; // vdup's
int32_t _32x4_1111[4];
int32_t _32x4_2222[4];
int32_t _32x4_3333[4];
int32_t _32x4_2103[4]; // rotate left
int32_t _32x4_0321[4]; // rotate right
int32_t _32x4_1132[4]; // irregular
int32_t _32x4_1132_in_place[4]; // irregular, in-place
} T;
T t;
__ stm(db_w, sp, r4.bit() | r5.bit() | r6.bit() | r7.bit() | lr.bit());
const Register kScratch = r5;
// Make test vector [0, 1, 2, 3]
__ veor(q1, q1, q1); // Zero
for (int i = 0; i < 4; i++) {
__ mov(r4, Operand(i));
__ ReplaceLane(q1, q1, r4, NeonS32, i);
}
__ Swizzle(q0, q1, kScratch, Neon32, 0x3210);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, _32x4_3210))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ Swizzle(q0, q1, kScratch, Neon32, 0x1032);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, _32x4_1032))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ Swizzle(q0, q1, kScratch, Neon32, 0x0000);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, _32x4_0000))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ Swizzle(q0, q1, kScratch, Neon32, 0x1111);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, _32x4_1111))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ Swizzle(q0, q1, kScratch, Neon32, 0x2222);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, _32x4_2222))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ Swizzle(q0, q1, kScratch, Neon32, 0x3333);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, _32x4_3333))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ Swizzle(q0, q1, kScratch, Neon32, 0x2103);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, _32x4_2103))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ Swizzle(q0, q1, kScratch, Neon32, 0x0321);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, _32x4_0321))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ Swizzle(q0, q1, kScratch, Neon32, 0x1132);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, _32x4_1132))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ vmov(q0, q1);
__ Swizzle(q0, q0, kScratch, Neon32, 0x1132);
__ add(r4, r0,
Operand(static_cast<int32_t>(offsetof(T, _32x4_1132_in_place))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ ldm(ia_w, sp, r4.bit() | r5.bit() | r6.bit() | r7.bit() | pc.bit());
CodeDesc desc;
masm->GetCode(&desc);
Handle<Code> code = isolate->factory()->NewCode(
desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
#ifdef DEBUG
OFStream os(stdout);
code->Print(os);
#endif
F3 f = FUNCTION_CAST<F3>(code->entry());
Object* dummy = CALL_GENERATED_CODE(isolate, f, &t, 0, 0, 0, 0);
USE(dummy);
CHECK_EQ_32X4(_32x4_3210, 0, 1, 2, 3);
CHECK_EQ_32X4(_32x4_1032, 2, 3, 0, 1);
CHECK_EQ_32X4(_32x4_0000, 0, 0, 0, 0);
CHECK_EQ_32X4(_32x4_1111, 1, 1, 1, 1);
CHECK_EQ_32X4(_32x4_2222, 2, 2, 2, 2);
CHECK_EQ_32X4(_32x4_3333, 3, 3, 3, 3);
CHECK_EQ_32X4(_32x4_2103, 3, 0, 1, 2);
CHECK_EQ_32X4(_32x4_0321, 1, 2, 3, 0);
CHECK_EQ_32X4(_32x4_1132, 2, 3, 1, 1);
CHECK_EQ_32X4(_32x4_1132_in_place, 2, 3, 1, 1);
}
#undef __
......@@ -368,6 +368,8 @@ T RecipSqrtRefine(T a, T b) {
#define WASM_SIMD_UNOP(op, x) x, WASM_SIMD_OP(op)
#define WASM_SIMD_BINOP(op, x, y) x, y, WASM_SIMD_OP(op)
#define WASM_SIMD_SHIFT_OP(op, shift, x) x, WASM_SIMD_OP(op), TO_BYTE(shift)
#define WASM_SIMD_CONCAT_OP(op, bytes, x, y) \
x, y, WASM_SIMD_OP(op), TO_BYTE(bytes)
#define WASM_SIMD_SELECT(format, x, y, z) \
x, y, z, WASM_SIMD_OP(kExprS##format##Select)
// Since boolean vectors can't be checked directly, materialize them into
......@@ -1595,6 +1597,211 @@ WASM_SIMD_SELECT_TEST(8x16)
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_X64
#if V8_TARGET_ARCH_ARM
template <typename T>
void RunUnaryPermuteOpTest(
WasmOpcode simd_op,
const std::array<T, kSimd128Size / sizeof(T)>& expected) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t> r(kExecuteCompiled);
// Set up a test pattern as a global, e.g. [0, 1, 2, 3].
T* global = r.module().AddGlobal<T>(kWasmS128);
static const size_t kElems = kSimd128Size / sizeof(T);
for (size_t i = 0; i < kElems; i++) {
global[i] = i;
}
BUILD(r, WASM_SET_GLOBAL(0, WASM_SIMD_UNOP(simd_op, WASM_GET_GLOBAL(0))),
WASM_ONE);
CHECK_EQ(1, r.Call());
for (size_t i = 0; i < kElems; i++) {
CHECK_EQ(global[i], expected[i]);
}
}
WASM_EXEC_COMPILED_TEST(S32x2Reverse) {
RunUnaryPermuteOpTest<int32_t>(kExprS32x2Reverse, {{1, 0, 3, 2}});
}
WASM_EXEC_COMPILED_TEST(S16x4Reverse) {
RunUnaryPermuteOpTest<int16_t>(kExprS16x4Reverse, {{3, 2, 1, 0, 7, 6, 5, 4}});
}
WASM_EXEC_COMPILED_TEST(S16x2Reverse) {
RunUnaryPermuteOpTest<int16_t>(kExprS16x2Reverse, {{1, 0, 3, 2, 5, 4, 7, 6}});
}
WASM_EXEC_COMPILED_TEST(S8x8Reverse) {
RunUnaryPermuteOpTest<int8_t>(kExprS8x8Reverse, {{7, 6, 5, 4, 3, 2, 1, 0, 15,
14, 13, 12, 11, 10, 9, 8}});
}
WASM_EXEC_COMPILED_TEST(S8x4Reverse) {
RunUnaryPermuteOpTest<int8_t>(kExprS8x4Reverse, {{3, 2, 1, 0, 7, 6, 5, 4, 11,
10, 9, 8, 15, 14, 13, 12}});
}
WASM_EXEC_COMPILED_TEST(S8x2Reverse) {
RunUnaryPermuteOpTest<int8_t>(
kExprS8x2Reverse,
{{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}});
}
template <typename T>
void RunBinaryPermuteOpTest(
WasmOpcode simd_op,
const std::array<T, kSimd128Size / sizeof(T)>& expected) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t> r(kExecuteCompiled);
// Set up two test patterns as globals, e.g. [0, 1, 2, 3] and [4, 5, 6, 7].
T* global1 = r.module().AddGlobal<T>(kWasmS128);
T* global2 = r.module().AddGlobal<T>(kWasmS128);
static const size_t kElems = kSimd128Size / sizeof(T);
for (size_t i = 0; i < kElems; i++) {
global1[i] = i;
global2[i] = kElems + i;
}
BUILD(r,
WASM_SET_GLOBAL(0, WASM_SIMD_BINOP(simd_op, WASM_GET_GLOBAL(0),
WASM_GET_GLOBAL(1))),
WASM_ONE);
CHECK_EQ(1, r.Call());
for (size_t i = 0; i < expected.size(); i++) {
CHECK_EQ(global1[i], expected[i]);
}
}
WASM_EXEC_COMPILED_TEST(S32x4ZipLeft) {
RunBinaryPermuteOpTest<int32_t>(kExprS32x4ZipLeft, {{0, 4, 1, 5}});
}
WASM_EXEC_COMPILED_TEST(S32x4ZipRight) {
RunBinaryPermuteOpTest<int32_t>(kExprS32x4ZipRight, {{2, 6, 3, 7}});
}
WASM_EXEC_COMPILED_TEST(S32x4UnzipLeft) {
RunBinaryPermuteOpTest<int32_t>(kExprS32x4UnzipLeft, {{0, 2, 4, 6}});
}
WASM_EXEC_COMPILED_TEST(S32x4UnzipRight) {
RunBinaryPermuteOpTest<int32_t>(kExprS32x4UnzipRight, {{1, 3, 5, 7}});
}
WASM_EXEC_COMPILED_TEST(S32x4TransposeLeft) {
RunBinaryPermuteOpTest<int32_t>(kExprS32x4TransposeLeft, {{0, 4, 2, 6}});
}
WASM_EXEC_COMPILED_TEST(S32x4TransposeRight) {
RunBinaryPermuteOpTest<int32_t>(kExprS32x4TransposeRight, {{1, 5, 3, 7}});
}
WASM_EXEC_COMPILED_TEST(S16x8ZipLeft) {
RunBinaryPermuteOpTest<int16_t>(kExprS16x8ZipLeft,
{{0, 8, 1, 9, 2, 10, 3, 11}});
}
WASM_EXEC_COMPILED_TEST(S16x8ZipRight) {
RunBinaryPermuteOpTest<int16_t>(kExprS16x8ZipRight,
{{4, 12, 5, 13, 6, 14, 7, 15}});
}
WASM_EXEC_COMPILED_TEST(S16x8UnzipLeft) {
RunBinaryPermuteOpTest<int16_t>(kExprS16x8UnzipLeft,
{{0, 2, 4, 6, 8, 10, 12, 14}});
}
WASM_EXEC_COMPILED_TEST(S16x8UnzipRight) {
RunBinaryPermuteOpTest<int16_t>(kExprS16x8UnzipRight,
{{1, 3, 5, 7, 9, 11, 13, 15}});
}
WASM_EXEC_COMPILED_TEST(S16x8TransposeLeft) {
RunBinaryPermuteOpTest<int16_t>(kExprS16x8TransposeLeft,
{{0, 8, 2, 10, 4, 12, 6, 14}});
}
WASM_EXEC_COMPILED_TEST(S16x8TransposeRight) {
RunBinaryPermuteOpTest<int16_t>(kExprS16x8TransposeRight,
{{1, 9, 3, 11, 5, 13, 7, 15}});
}
WASM_EXEC_COMPILED_TEST(S8x16ZipLeft) {
RunBinaryPermuteOpTest<int8_t>(
kExprS8x16ZipLeft,
{{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}});
}
WASM_EXEC_COMPILED_TEST(S8x16ZipRight) {
RunBinaryPermuteOpTest<int8_t>(
kExprS8x16ZipRight,
{{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}});
}
WASM_EXEC_COMPILED_TEST(S8x16UnzipLeft) {
RunBinaryPermuteOpTest<int8_t>(
kExprS8x16UnzipLeft,
{{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}});
}
WASM_EXEC_COMPILED_TEST(S8x16UnzipRight) {
RunBinaryPermuteOpTest<int8_t>(
kExprS8x16UnzipRight,
{{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}});
}
WASM_EXEC_COMPILED_TEST(S8x16TransposeLeft) {
RunBinaryPermuteOpTest<int8_t>(
kExprS8x16TransposeLeft,
{{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}});
}
WASM_EXEC_COMPILED_TEST(S8x16TransposeRight) {
RunBinaryPermuteOpTest<int8_t>(
kExprS8x16TransposeRight,
{{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}});
}
template <typename T>
void RunConcatOpTest(WasmOpcode simd_op, int bytes,
const std::array<T, kSimd128Size / sizeof(T)>& expected) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t> r(kExecuteCompiled);
// Set up two test patterns as globals, e.g. [0, 1, 2, 3] and [4, 5, 6, 7].
T* global1 = r.module().AddGlobal<T>(kWasmS128);
T* global2 = r.module().AddGlobal<T>(kWasmS128);
static const size_t kElems = kSimd128Size / sizeof(T);
for (size_t i = 0; i < kElems; i++) {
global1[i] = i;
global2[i] = kElems + i;
}
BUILD(
r,
WASM_SET_GLOBAL(0, WASM_SIMD_CONCAT_OP(simd_op, bytes, WASM_GET_GLOBAL(0),
WASM_GET_GLOBAL(1))),
WASM_ONE);
CHECK_EQ(1, r.Call());
for (size_t i = 0; i < expected.size(); i++) {
CHECK_EQ(global1[i], expected[i]);
}
}
WASM_EXEC_COMPILED_TEST(S8x16Concat) {
std::array<int8_t, kSimd128Size> expected;
for (int k = 1; k < 16; k++) {
int j = 0;
// last 16 - k bytes of first vector.
for (int i = k; i < kSimd128Size; i++) {
expected[j++] = i;
}
// first k bytes of second vector
for (int i = 0; i < k; i++) {
expected[j++] = i + kSimd128Size;
}
RunConcatOpTest<int8_t>(kExprS8x16Concat, k, expected);
}
}
// Boolean unary operations are 'AllTrue' and 'AnyTrue', which return an integer
// result. Use relational ops on numeric vectors to create the boolean vector
// test inputs. Test inputs with all true, all false, one true, and one false.
......@@ -1769,7 +1976,9 @@ WASM_EXEC_COMPILED_TEST(S1x16And) { RunS1x16BinOpTest(kExprS1x16And, And); }
WASM_EXEC_COMPILED_TEST(S1x16Or) { RunS1x16BinOpTest(kExprS1x16Or, Or); }
WASM_EXEC_COMPILED_TEST(S1x16Xor) { RunS1x16BinOpTest(kExprS1x16Xor, Xor); }
#endif // !V8_TARGET_ARCH_ARM
#if V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET
WASM_EXEC_COMPILED_TEST(SimdI32x4ExtractWithF32x4) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t> r(kExecuteCompiled);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment