Commit d0414e87 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

Reland "[wasm-simd][arm] Prototype i64x2.bitmask"

This is a reland of 21e47944

The fix is in the encoding of vmov. Bit 23 (the U bit) should be 0,
whether the NeonDataType is NeonS32 or NeonU32. Also added a DCHECK
in the simulator to assert this.

Original change's description:
> [wasm-simd][arm] Prototype i64x2.bitmask
>
> Cleanup to simulator to remove repetitive logic to get instruction
> fields.
>
> Bug: v8:10997
> Change-Id: I01f0b99f85788b41e4cab505fc94362d637c396f
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2554256
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org>
> Reviewed-by: Bill Budge <bbudge@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#71391}

Bug: v8:10997
Change-Id: I93b5d2168a50446d4a3b487ad83d6af8ea9ba8ab
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2558262
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71467}
parent 7111248e
...@@ -3776,6 +3776,7 @@ void Assembler::vqmovn(NeonDataType dst_dt, NeonDataType src_dt, ...@@ -3776,6 +3776,7 @@ void Assembler::vqmovn(NeonDataType dst_dt, NeonDataType src_dt,
int vm, m; int vm, m;
src.split_code(&vm, &m); src.split_code(&vm, &m);
int size = NeonSz(dst_dt); int size = NeonSz(dst_dt);
DCHECK_NE(3, size);
int op = NeonU(src_dt) ? 0b11 : NeonU(dst_dt) ? 0b01 : 0b10; int op = NeonU(src_dt) ? 0b11 : NeonU(dst_dt) ? 0b01 : 0b10;
emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | 0x2 * B16 | vd * B12 | emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | 0x2 * B16 | vd * B12 |
0x2 * B8 | op * B6 | m * B5 | vm); 0x2 * B8 | op * B6 | m * B5 | vm);
...@@ -3826,7 +3827,8 @@ void Assembler::vmov(NeonDataType dt, Register dst, DwVfpRegister src, ...@@ -3826,7 +3827,8 @@ void Assembler::vmov(NeonDataType dt, Register dst, DwVfpRegister src,
int vn, n; int vn, n;
src.split_code(&vn, &n); src.split_code(&vn, &n);
int opc1_opc2 = EncodeScalar(dt, index); int opc1_opc2 = EncodeScalar(dt, index);
int u = NeonU(dt); // NeonS32 and NeonU32 both encoded as u = 0.
int u = NeonDataTypeToSize(dt) == Neon32 ? 0 : NeonU(dt);
emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 | emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 |
n * B7 | B4 | opc1_opc2); n * B7 | B4 | opc1_opc2);
} }
...@@ -4437,7 +4439,7 @@ void Assembler::vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, ...@@ -4437,7 +4439,7 @@ void Assembler::vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
emit(EncodeNeonBinOp(VMAX, dt, dst, src1, src2)); emit(EncodeNeonBinOp(VMAX, dt, dst, src1, src2));
} }
enum NeonShiftOp { VSHL, VSHR, VSLI, VSRI }; enum NeonShiftOp { VSHL, VSHR, VSLI, VSRI, VSRA };
static Instr EncodeNeonShiftRegisterOp(NeonShiftOp op, NeonDataType dt, static Instr EncodeNeonShiftRegisterOp(NeonShiftOp op, NeonDataType dt,
NeonRegType reg_type, int dst_code, NeonRegType reg_type, int dst_code,
...@@ -4487,6 +4489,13 @@ static Instr EncodeNeonShiftOp(NeonShiftOp op, NeonSize size, bool is_unsigned, ...@@ -4487,6 +4489,13 @@ static Instr EncodeNeonShiftOp(NeonShiftOp op, NeonSize size, bool is_unsigned,
op_encoding = B24 | 0x4 * B8; op_encoding = B24 | 0x4 * B8;
break; break;
} }
case VSRA: {
DCHECK(shift > 0 && size_in_bits >= shift);
imm6 = 2 * size_in_bits - shift;
op_encoding = B8;
if (is_unsigned) op_encoding |= B24;
break;
}
default: default:
UNREACHABLE(); UNREACHABLE();
} }
...@@ -4521,10 +4530,19 @@ void Assembler::vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, ...@@ -4521,10 +4530,19 @@ void Assembler::vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
shift.code())); shift.code()));
} }
void Assembler::vshr(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src,
int shift) {
DCHECK(IsEnabled(NEON));
// Dd = vshr(Dm, bits) SIMD shift right immediate.
// Instruction details available in ARM DDI 0406C.b, A8-1052.
emit(EncodeNeonShiftOp(VSHR, NeonDataTypeToSize(dt), NeonU(dt), NEON_D,
dst.code(), src.code(), shift));
}
void Assembler::vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, void Assembler::vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
int shift) { int shift) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vshl(Qm, bits) SIMD shift right immediate. // Qd = vshr(Qm, bits) SIMD shift right immediate.
// Instruction details available in ARM DDI 0406C.b, A8-1052. // Instruction details available in ARM DDI 0406C.b, A8-1052.
emit(EncodeNeonShiftOp(VSHR, NeonDataTypeToSize(dt), NeonU(dt), NEON_Q, emit(EncodeNeonShiftOp(VSHR, NeonDataTypeToSize(dt), NeonU(dt), NEON_Q,
dst.code(), src.code(), shift)); dst.code(), src.code(), shift));
...@@ -4548,6 +4566,15 @@ void Assembler::vsri(NeonSize size, DwVfpRegister dst, DwVfpRegister src, ...@@ -4548,6 +4566,15 @@ void Assembler::vsri(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
shift)); shift));
} }
void Assembler::vsra(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src,
int imm) {
DCHECK(IsEnabled(NEON));
// Dd = vsra(Dm, imm) SIMD shift right and accumulate.
// Instruction details available in ARM DDI 0487F.b, F6-5569.
emit(EncodeNeonShiftOp(VSRA, NeonDataTypeToSize(dt), NeonU(dt), NEON_D,
dst.code(), src.code(), imm));
}
static Instr EncodeNeonEstimateOp(bool is_rsqrt, QwNeonRegister dst, static Instr EncodeNeonEstimateOp(bool is_rsqrt, QwNeonRegister dst,
QwNeonRegister src) { QwNeonRegister src) {
int vd, d; int vd, d;
......
...@@ -926,9 +926,12 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -926,9 +926,12 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, int shift); void vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, int shift);
void vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, void vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
QwNeonRegister shift); QwNeonRegister shift);
void vshr(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src, int shift);
void vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, int shift); void vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, int shift);
void vsli(NeonSize size, DwVfpRegister dst, DwVfpRegister src, int shift); void vsli(NeonSize size, DwVfpRegister dst, DwVfpRegister src, int shift);
void vsri(NeonSize size, DwVfpRegister dst, DwVfpRegister src, int shift); void vsri(NeonSize size, DwVfpRegister dst, DwVfpRegister src, int shift);
void vsra(NeonDataType size, DwVfpRegister dst, DwVfpRegister src, int imm);
// vrecpe and vrsqrte only support floating point lanes. // vrecpe and vrsqrte only support floating point lanes.
void vrecpe(QwNeonRegister dst, QwNeonRegister src); void vrecpe(QwNeonRegister dst, QwNeonRegister src);
void vrsqrte(QwNeonRegister dst, QwNeonRegister src); void vrsqrte(QwNeonRegister dst, QwNeonRegister src);
......
...@@ -2167,6 +2167,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2167,6 +2167,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 6, Neon32, NeonU64); ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 6, Neon32, NeonU64);
break; break;
} }
case kArmI64x2BitMask: {
UseScratchRegisterScope temps(tasm());
Register dst = i.OutputRegister();
Simd128Register src = i.InputSimd128Register(0);
QwNeonRegister tmp1 = temps.AcquireQ();
Register tmp = temps.Acquire();
__ vshr(NeonU64, tmp1, src, 63);
__ vmov(NeonU32, dst, tmp1.low(), 0);
__ vmov(NeonU32, tmp, tmp1.high(), 0);
__ add(dst, dst, Operand(tmp, LSL, 1));
break;
}
case kArmF32x4Splat: { case kArmF32x4Splat: {
int src_code = i.InputFloatRegister(0).code(); int src_code = i.InputFloatRegister(0).code();
__ vdup(Neon32, i.OutputSimd128Register(), __ vdup(Neon32, i.OutputSimd128Register(),
......
...@@ -182,6 +182,7 @@ namespace compiler { ...@@ -182,6 +182,7 @@ namespace compiler {
V(ArmI64x2Sub) \ V(ArmI64x2Sub) \
V(ArmI64x2Mul) \ V(ArmI64x2Mul) \
V(ArmI64x2ShrU) \ V(ArmI64x2ShrU) \
V(ArmI64x2BitMask) \
V(ArmI32x4Splat) \ V(ArmI32x4Splat) \
V(ArmI32x4ExtractLane) \ V(ArmI32x4ExtractLane) \
V(ArmI32x4ReplaceLane) \ V(ArmI32x4ReplaceLane) \
......
...@@ -162,6 +162,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -162,6 +162,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmI64x2Sub: case kArmI64x2Sub:
case kArmI64x2Mul: case kArmI64x2Mul:
case kArmI64x2ShrU: case kArmI64x2ShrU:
case kArmI64x2BitMask:
case kArmI32x4Splat: case kArmI32x4Splat:
case kArmI32x4ExtractLane: case kArmI32x4ExtractLane:
case kArmI32x4ReplaceLane: case kArmI32x4ReplaceLane:
......
...@@ -2984,6 +2984,10 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) { ...@@ -2984,6 +2984,10 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) {
VisitBitMask<kArmI32x4BitMask>(this, node); VisitBitMask<kArmI32x4BitMask>(this, node);
} }
void InstructionSelector::VisitI64x2BitMask(Node* node) {
VisitBitMask<kArmI64x2BitMask>(this, node);
}
namespace { namespace {
void VisitF32x4PminOrPmax(InstructionSelector* selector, ArchOpcode opcode, void VisitF32x4PminOrPmax(InstructionSelector* selector, ArchOpcode opcode,
Node* node) { Node* node) {
......
...@@ -2823,10 +2823,10 @@ void InstructionSelector::VisitI32x4SignSelect(Node* node) { UNIMPLEMENTED(); } ...@@ -2823,10 +2823,10 @@ void InstructionSelector::VisitI32x4SignSelect(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2SignSelect(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2SignSelect(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 #endif // !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM
// TODO(v8:10997) Prototype i64x2.bitmask. // TODO(v8:10997) Prototype i64x2.bitmask.
void InstructionSelector::VisitI64x2BitMask(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2BitMask(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
...@@ -2153,11 +2153,38 @@ void Decoder::DecodeAdvancedSIMDDataProcessing(Instruction* instr) { ...@@ -2153,11 +2153,38 @@ void Decoder::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
int imm7 = (l << 6) | instr->Bits(21, 16); int imm7 = (l << 6) | instr->Bits(21, 16);
int size = base::bits::RoundDownToPowerOfTwo32(imm7); int size = base::bits::RoundDownToPowerOfTwo32(imm7);
int shift = 2 * size - imm7; int shift = 2 * size - imm7;
int Vd = instr->VFPDRegValue(kSimd128Precision); if (q) {
int Vm = instr->VFPMRegValue(kSimd128Precision); int Vd = instr->VFPDRegValue(kSimd128Precision);
out_buffer_pos_ += int Vm = instr->VFPMRegValue(kSimd128Precision);
SNPrintF(out_buffer_ + out_buffer_pos_, "vshr.%s%d q%d, q%d, #%d", out_buffer_pos_ +=
u ? "u" : "s", size, Vd, Vm, shift); SNPrintF(out_buffer_ + out_buffer_pos_, "vshr.%s%d q%d, q%d, #%d",
u ? "u" : "s", size, Vd, Vm, shift);
} else {
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vshr.%s%d d%d, d%d, #%d",
u ? "u" : "s", size, Vd, Vm, shift);
}
} else if (imm3H_L != 0 && opc == 1) {
// vsra.<type><size> Qd, Qm, shift
// vsra.<type><size> Dd, Dm, shift
int imm7 = (l << 6) | instr->Bits(21, 16);
int size = base::bits::RoundDownToPowerOfTwo32(imm7);
int shift = 2 * size - imm7;
if (q) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vsra.%s%d q%d, q%d, #%d",
u ? "u" : "s", size, Vd, Vm, shift);
} else {
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vsra.%s%d d%d, d%d, #%d",
u ? "u" : "s", size, Vd, Vm, shift);
}
} else if (imm3H_L != 0 && imm3L == 0 && opc == 0b1010 && !q) { } else if (imm3H_L != 0 && imm3L == 0 && opc == 0b1010 && !q) {
// vmovl // vmovl
if ((instr->VdValue() & 1) != 0) Unknown(instr); if ((instr->VdValue() & 1) != 0) Unknown(instr);
......
...@@ -3416,6 +3416,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { ...@@ -3416,6 +3416,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) {
get_d_register(vn, &data); get_d_register(vn, &data);
if ((opc1_opc2 & 0xB) == 0) { if ((opc1_opc2 & 0xB) == 0) {
// NeonS32 / NeonU32 // NeonS32 / NeonU32
DCHECK_EQ(0, instr->Bit(23));
int32_t int_data[2]; int32_t int_data[2];
base::Memcpy(int_data, &data, sizeof(int_data)); base::Memcpy(int_data, &data, sizeof(int_data));
set_register(rt, int_data[instr->Bit(21)]); set_register(rt, int_data[instr->Bit(21)]);
...@@ -3883,28 +3884,28 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) { ...@@ -3883,28 +3884,28 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) {
// Helper functions for implementing NEON ops. Unop applies a unary op to each // Helper functions for implementing NEON ops. Unop applies a unary op to each
// lane. Binop applies a binary operation to matching input lanes. // lane. Binop applies a binary operation to matching input lanes.
template <typename T> template <typename T, int SIZE = kSimd128Size>
void Unop(Simulator* simulator, int Vd, int Vm, std::function<T(T)> unop) { void Unop(Simulator* simulator, int Vd, int Vm, std::function<T(T)> unop) {
static const int kLanes = 16 / sizeof(T); static const int kLanes = SIZE / sizeof(T);
T src[kLanes]; T src[kLanes];
simulator->get_neon_register(Vm, src); simulator->get_neon_register<T, SIZE>(Vm, src);
for (int i = 0; i < kLanes; i++) { for (int i = 0; i < kLanes; i++) {
src[i] = unop(src[i]); src[i] = unop(src[i]);
} }
simulator->set_neon_register(Vd, src); simulator->set_neon_register<T, SIZE>(Vd, src);
} }
template <typename T> template <typename T, int SIZE = kSimd128Size>
void Binop(Simulator* simulator, int Vd, int Vm, int Vn, void Binop(Simulator* simulator, int Vd, int Vm, int Vn,
std::function<T(T, T)> binop) { std::function<T(T, T)> binop) {
static const int kLanes = 16 / sizeof(T); static const int kLanes = SIZE / sizeof(T);
T src1[kLanes], src2[kLanes]; T src1[kLanes], src2[kLanes];
simulator->get_neon_register(Vn, src1); simulator->get_neon_register<T, SIZE>(Vn, src1);
simulator->get_neon_register(Vm, src2); simulator->get_neon_register<T, SIZE>(Vm, src2);
for (int i = 0; i < kLanes; i++) { for (int i = 0; i < kLanes; i++) {
src1[i] = binop(src1[i], src2[i]); src1[i] = binop(src1[i], src2[i]);
} }
simulator->set_neon_register(Vd, src1); simulator->set_neon_register<T, SIZE>(Vd, src1);
} }
// Templated operations for NEON instructions. // Templated operations for NEON instructions.
...@@ -4114,15 +4115,41 @@ void ShiftLeft(Simulator* simulator, int Vd, int Vm, int shift) { ...@@ -4114,15 +4115,41 @@ void ShiftLeft(Simulator* simulator, int Vd, int Vm, int shift) {
} }
template <typename T, int SIZE> template <typename T, int SIZE>
void ShiftRight(Simulator* simulator, int Vd, int Vm, int shift) { void LogicalShiftRight(Simulator* simulator, int Vd, int Vm, int shift) {
Unop<T>(simulator, Vd, Vm, [shift](T x) { return x >> shift; }); Unop<T, SIZE>(simulator, Vd, Vm, [shift](T x) { return x >> shift; });
} }
template <typename T, int SIZE> template <typename T, int SIZE>
void ArithmeticShiftRight(Simulator* simulator, int Vd, int Vm, int shift) { void ArithmeticShiftRight(Simulator* simulator, int Vd, int Vm, int shift) {
auto shift_fn = auto shift_fn =
std::bind(ArithmeticShiftRight<T>, std::placeholders::_1, shift); std::bind(ArithmeticShiftRight<T>, std::placeholders::_1, shift);
Unop<T>(simulator, Vd, Vm, shift_fn); Unop<T, SIZE>(simulator, Vd, Vm, shift_fn);
}
template <typename T, int SIZE>
void ShiftRight(Simulator* simulator, int Vd, int Vm, int shift,
bool is_unsigned) {
if (is_unsigned) {
using unsigned_T = typename std::make_unsigned<T>::type;
LogicalShiftRight<unsigned_T, SIZE>(simulator, Vd, Vm, shift);
} else {
ArithmeticShiftRight<T, SIZE>(simulator, Vd, Vm, shift);
}
}
template <typename T, int SIZE>
void ShiftRightAccumulate(Simulator* simulator, int Vd, int Vm, int shift) {
Binop<T, SIZE>(simulator, Vd, Vm, Vd,
[shift](T a, T x) { return a + (x >> shift); });
}
template <typename T, int SIZE>
void ArithmeticShiftRightAccumulate(Simulator* simulator, int Vd, int Vm,
int shift) {
Binop<T, SIZE>(simulator, Vd, Vm, Vd, [shift](T a, T x) {
T result = ArithmeticShiftRight<T>(x, shift);
return a + result;
});
} }
template <typename T, int SIZE> template <typename T, int SIZE>
...@@ -4652,8 +4679,8 @@ void Simulator::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) { ...@@ -4652,8 +4679,8 @@ void Simulator::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) {
} }
break; break;
} }
default: case Neon64:
UNIMPLEMENTED(); UNREACHABLE();
break; break;
} }
} else if (opc1 == 0b10 && instr->Bit(10) == 1) { } else if (opc1 == 0b10 && instr->Bit(10) == 1) {
...@@ -5384,45 +5411,73 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) { ...@@ -5384,45 +5411,73 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
int l = instr->Bit(7); int l = instr->Bit(7);
int q = instr->Bit(6); int q = instr->Bit(6);
int imm3H_L = imm3H << 1 | l; int imm3H_L = imm3H << 1 | l;
int imm7 = instr->Bits(21, 16);
imm7 += (l << 6);
int size = base::bits::RoundDownToPowerOfTwo32(imm7);
NeonSize ns =
static_cast<NeonSize>(base::bits::WhichPowerOfTwo(size >> 3));
if (imm3H_L != 0 && opc == 0) { if (imm3H_L != 0 && opc == 0) {
// vshr.s<size> Qd, Qm, shift // vshr.s/u<size> Qd, Qm, shift
int imm7 = instr->Bits(21, 16);
if (instr->Bit(7) != 0) imm7 += 64;
int size = base::bits::RoundDownToPowerOfTwo32(imm7);
int shift = 2 * size - imm7; int shift = 2 * size - imm7;
int Vd = instr->VFPDRegValue(kSimd128Precision); int Vd = instr->VFPDRegValue(q ? kSimd128Precision : kDoublePrecision);
int Vm = instr->VFPMRegValue(kSimd128Precision); int Vm = instr->VFPMRegValue(q ? kSimd128Precision : kDoublePrecision);
NeonSize ns = switch (ns) {
static_cast<NeonSize>(base::bits::WhichPowerOfTwo(size >> 3)); case Neon8:
q ? ShiftRight<int8_t, kSimd128Size>(this, Vd, Vm, shift, u)
: ShiftRight<int8_t, kDoubleSize>(this, Vd, Vm, shift, u);
break;
case Neon16:
q ? ShiftRight<int16_t, kSimd128Size>(this, Vd, Vm, shift, u)
: ShiftRight<int16_t, kDoubleSize>(this, Vd, Vm, shift, u);
break;
case Neon32:
q ? ShiftRight<int32_t, kSimd128Size>(this, Vd, Vm, shift, u)
: ShiftRight<int32_t, kDoubleSize>(this, Vd, Vm, shift, u);
break;
case Neon64:
q ? ShiftRight<int64_t, kSimd128Size>(this, Vd, Vm, shift, u)
: ShiftRight<int64_t, kDoubleSize>(this, Vd, Vm, shift, u);
break;
}
} else if (imm3H_L != 0 && opc == 1) {
// vsra Dd, Dm, #imm
DCHECK(!q); // Unimplemented for now.
int shift = 2 * size - imm7;
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
if (u) { if (u) {
switch (ns) { switch (ns) {
case Neon8: case Neon8:
ShiftRight<uint8_t, kSimd128Size>(this, Vd, Vm, shift); ShiftRightAccumulate<uint8_t, kDoubleSize>(this, Vd, Vm, shift);
break; break;
case Neon16: case Neon16:
ShiftRight<uint16_t, kSimd128Size>(this, Vd, Vm, shift); ShiftRightAccumulate<uint16_t, kDoubleSize>(this, Vd, Vm, shift);
break; break;
case Neon32: case Neon32:
ShiftRight<uint32_t, kSimd128Size>(this, Vd, Vm, shift); ShiftRightAccumulate<uint32_t, kDoubleSize>(this, Vd, Vm, shift);
break; break;
case Neon64: case Neon64:
ShiftRight<uint64_t, kSimd128Size>(this, Vd, Vm, shift); ShiftRightAccumulate<uint64_t, kDoubleSize>(this, Vd, Vm, shift);
break; break;
} }
} else { } else {
switch (ns) { switch (ns) {
case Neon8: case Neon8:
ArithmeticShiftRight<int8_t, kSimd128Size>(this, Vd, Vm, shift); ArithmeticShiftRightAccumulate<int8_t, kDoubleSize>(this, Vd, Vm,
shift);
break; break;
case Neon16: case Neon16:
ArithmeticShiftRight<int16_t, kSimd128Size>(this, Vd, Vm, shift); ArithmeticShiftRightAccumulate<int16_t, kDoubleSize>(this, Vd, Vm,
shift);
break; break;
case Neon32: case Neon32:
ArithmeticShiftRight<int32_t, kSimd128Size>(this, Vd, Vm, shift); ArithmeticShiftRightAccumulate<int32_t, kDoubleSize>(this, Vd, Vm,
shift);
break; break;
case Neon64: case Neon64:
ArithmeticShiftRight<int64_t, kSimd128Size>(this, Vd, Vm, shift); ArithmeticShiftRightAccumulate<int64_t, kDoubleSize>(this, Vd, Vm,
shift);
break; break;
} }
} }
...@@ -5432,8 +5487,7 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) { ...@@ -5432,8 +5487,7 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();
int Vd = instr->VFPDRegValue(kSimd128Precision); int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kDoublePrecision); int Vm = instr->VFPMRegValue(kDoublePrecision);
int imm3 = instr->Bits(21, 19); switch (imm3H) {
switch (imm3) {
case 1: case 1:
Widen<uint8_t, uint16_t>(this, Vd, Vm); Widen<uint8_t, uint16_t>(this, Vd, Vm);
break; break;
...@@ -5452,8 +5506,7 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) { ...@@ -5452,8 +5506,7 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();
int Vd = instr->VFPDRegValue(kSimd128Precision); int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kDoublePrecision); int Vm = instr->VFPMRegValue(kDoublePrecision);
int imm3 = instr->Bits(21, 19); switch (imm3H) {
switch (imm3) {
case 1: case 1:
Widen<int8_t, int16_t>(this, Vd, Vm); Widen<int8_t, int16_t>(this, Vd, Vm);
break; break;
...@@ -5470,9 +5523,6 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) { ...@@ -5470,9 +5523,6 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
} }
} else if (!u && imm3H_L != 0 && opc == 0b0101) { } else if (!u && imm3H_L != 0 && opc == 0b0101) {
// vshl.i<size> Qd, Qm, shift // vshl.i<size> Qd, Qm, shift
int imm7 = instr->Bits(21, 16);
if (instr->Bit(7) != 0) imm7 += 64;
int size = base::bits::RoundDownToPowerOfTwo32(imm7);
int shift = imm7 - size; int shift = imm7 - size;
int Vd = instr->VFPDRegValue(kSimd128Precision); int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision); int Vm = instr->VFPMRegValue(kSimd128Precision);
...@@ -5494,9 +5544,6 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) { ...@@ -5494,9 +5544,6 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
} }
} else if (u && imm3H_L != 0 && opc == 0b0100) { } else if (u && imm3H_L != 0 && opc == 0b0100) {
// vsri.<size> Dd, Dm, shift // vsri.<size> Dd, Dm, shift
int imm7 = instr->Bits(21, 16);
if (instr->Bit(7) != 0) imm7 += 64;
int size = base::bits::RoundDownToPowerOfTwo32(imm7);
int shift = 2 * size - imm7; int shift = 2 * size - imm7;
int Vd = instr->VFPDRegValue(kDoublePrecision); int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision); int Vm = instr->VFPMRegValue(kDoublePrecision);
...@@ -5519,9 +5566,6 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) { ...@@ -5519,9 +5566,6 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
} }
} else if (u && imm3H_L != 0 && opc == 0b0101) { } else if (u && imm3H_L != 0 && opc == 0b0101) {
// vsli.<size> Dd, Dm, shift // vsli.<size> Dd, Dm, shift
int imm7 = instr->Bits(21, 16);
if (instr->Bit(7) != 0) imm7 += 64;
int size = base::bits::RoundDownToPowerOfTwo32(imm7);
int shift = imm7 - size; int shift = imm7 - size;
int Vd = instr->VFPDRegValue(kDoublePrecision); int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision); int Vm = instr->VFPMRegValue(kDoublePrecision);
......
...@@ -1289,9 +1289,11 @@ TEST(15) { ...@@ -1289,9 +1289,11 @@ TEST(15) {
uint32_t vsub8[4], vsub16[4], vsub32[4]; uint32_t vsub8[4], vsub16[4], vsub32[4];
uint32_t vqsub_u8[4], vqsub_s16[4], vqsub_u32[4]; uint32_t vqsub_u8[4], vqsub_s16[4], vqsub_u32[4];
uint32_t vmul8[4], vmul16[4], vmul32[4]; uint32_t vmul8[4], vmul16[4], vmul32[4];
uint32_t vshl8[4], vshl16[4], vshl32[5]; uint32_t vshl8[4], vshl16[4], vshl32[4];
uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[5]; uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[4];
uint32_t vshr_s8_d[2], vshr_u16_d[2], vshr_s32_d[2];
uint32_t vsli_64[2], vsri_64[2], vsli_32[2], vsri_32[2]; uint32_t vsli_64[2], vsri_64[2], vsli_32[2], vsri_32[2];
uint32_t vsra_64[2], vsra_32[2], vsra_16[2];
uint32_t vceq[4], vceqf[4], vcgef[4], vcgtf[4]; uint32_t vceq[4], vceqf[4], vcgef[4], vcgtf[4];
uint32_t vcge_s8[4], vcge_u16[4], vcge_s32[4]; uint32_t vcge_s8[4], vcge_u16[4], vcge_s32[4];
uint32_t vcgt_s8[4], vcgt_u16[4], vcgt_s32[4]; uint32_t vcgt_s8[4], vcgt_u16[4], vcgt_s32[4];
...@@ -1801,6 +1803,19 @@ TEST(15) { ...@@ -1801,6 +1803,19 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_s32)))); __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_s32))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vshr.s, vshr.u with d registers.
__ mov(r4, Operand(0x80));
__ vdup(Neon8, q0, r4);
__ vshr(NeonS8, d1, d0, 1);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_s8_d))));
__ vst1(Neon8, NeonListOperand(d1), NeonMemOperand(r4));
__ vshr(NeonU16, d2, d0, 9);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_u16_d))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
__ vshr(NeonS32, d2, d0, 17);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_s32_d))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vsli, vsri. // vsli, vsri.
__ mov(r4, Operand(0xFFFFFFFF)); __ mov(r4, Operand(0xFFFFFFFF));
__ mov(r5, Operand(0x1)); __ mov(r5, Operand(0x1));
...@@ -1821,6 +1836,20 @@ TEST(15) { ...@@ -1821,6 +1836,20 @@ TEST(15) {
__ vsri(Neon32, d1, d0, 16); __ vsri(Neon32, d1, d0, 16);
__ vstr(d1, r0, offsetof(T, vsri_32)); __ vstr(d1, r0, offsetof(T, vsri_32));
// vsra.
__ vmov(d0, r4, r5);
// Check same dst and src registers.
__ vsra(NeonU64, d0, d0, 1);
__ vstr(d0, r0, offsetof(T, vsra_64));
__ vmov(d0, r4, r5);
__ vmov(d1, r5, r4);
__ vsra(NeonS32, d1, d0, 16);
__ vstr(d1, r0, offsetof(T, vsra_32));
__ vmov(d0, r4, r5);
__ vmov(d1, r5, r4);
__ vsra(NeonU16, d1, d0, 2);
__ vstr(d1, r0, offsetof(T, vsra_16));
// vceq. // vceq.
__ mov(r4, Operand(0x03)); __ mov(r4, Operand(0x03));
__ vdup(Neon8, q0, r4); __ vdup(Neon8, q0, r4);
...@@ -2197,10 +2226,16 @@ TEST(15) { ...@@ -2197,10 +2226,16 @@ TEST(15) {
CHECK_EQ_SPLAT(vshr_s8, 0xC0C0C0C0u); CHECK_EQ_SPLAT(vshr_s8, 0xC0C0C0C0u);
CHECK_EQ_SPLAT(vshr_u16, 0x00400040u); CHECK_EQ_SPLAT(vshr_u16, 0x00400040u);
CHECK_EQ_SPLAT(vshr_s32, 0xFFFFC040u); CHECK_EQ_SPLAT(vshr_s32, 0xFFFFC040u);
CHECK_EQ_32X2(vshr_s8_d, 0xC0C0C0C0u, 0xC0C0C0C0u);
CHECK_EQ_32X2(vshr_u16_d, 0x00400040u, 0x00400040u);
CHECK_EQ_32X2(vshr_s32_d, 0xFFFFC040u, 0xFFFFC040u);
CHECK_EQ_32X2(vsli_64, 0x01u, 0xFFFFFFFFu); CHECK_EQ_32X2(vsli_64, 0x01u, 0xFFFFFFFFu);
CHECK_EQ_32X2(vsri_64, 0xFFFFFFFFu, 0x01u); CHECK_EQ_32X2(vsri_64, 0xFFFFFFFFu, 0x01u);
CHECK_EQ_32X2(vsli_32, 0xFFFF0001u, 0x00010001u); CHECK_EQ_32X2(vsli_32, 0xFFFF0001u, 0x00010001u);
CHECK_EQ_32X2(vsri_32, 0x00000000u, 0x0000FFFFu); CHECK_EQ_32X2(vsri_32, 0x00000000u, 0x0000FFFFu);
CHECK_EQ_32X2(vsra_64, 0xFFFFFFFEu, 0x2);
CHECK_EQ_32X2(vsra_32, 0x0, 0xFFFFFFFFu);
CHECK_EQ_32X2(vsra_16, 0x3FFF4000, 0xFFFFFFFFu);
CHECK_EQ_SPLAT(vceq, 0x00FF00FFu); CHECK_EQ_SPLAT(vceq, 0x00FF00FFu);
// [0, 3, 0, 3, ...] >= [3, 3, 3, 3, ...] // [0, 3, 0, 3, ...] >= [3, 3, 3, 3, ...]
CHECK_EQ_SPLAT(vcge_s8, 0x00FF00FFu); CHECK_EQ_SPLAT(vcge_s8, 0x00FF00FFu);
......
...@@ -1630,7 +1630,7 @@ WASM_SIMD_TEST(I32x4BitMask) { ...@@ -1630,7 +1630,7 @@ WASM_SIMD_TEST(I32x4BitMask) {
} }
// TODO(v8:10997) Prototyping i64x2.bitmask. // TODO(v8:10997) Prototyping i64x2.bitmask.
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
WASM_SIMD_TEST_NO_LOWERING(I64x2BitMask) { WASM_SIMD_TEST_NO_LOWERING(I64x2BitMask) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd); WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
...@@ -1648,7 +1648,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2BitMask) { ...@@ -1648,7 +1648,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2BitMask) {
CHECK_EQ(actual, expected); CHECK_EQ(actual, expected);
} }
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
WASM_SIMD_TEST(I8x16Splat) { WASM_SIMD_TEST(I8x16Splat) {
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd); WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment