Commit 386e5a11 authored by bbudge's avatar bbudge Committed by Commit bot

Implement remaining Boolean SIMD operations on ARM.

- Implements Select instructions using a single ARM vbsl instruction.
- Renames boolean machine operators to match renamed S1xN machine types.
- Implements S1xN vector logical ops, AND, OR, XOR, NOT for ARM.
- Implements S1xN AnyTrue, AllTrue ops for ARM.
- Eliminates unused SIMD op categories in opcodes.h.

LOG=N
BUG=v8:6020

Review-Url: https://codereview.chromium.org/2711863002
Cr-Commit-Position: refs/heads/master@{#43556}
parent 91cd0707
......@@ -4123,25 +4123,43 @@ void Assembler::vcvt_u32_f32(const QwNeonRegister dst,
emit(EncodeNeonVCVT(U32, dst, F32, src));
}
// op is instr->Bits(11, 7).
static Instr EncodeNeonUnaryOp(int op, bool is_float, NeonSize size,
const QwNeonRegister dst,
const QwNeonRegister src) {
DCHECK_IMPLIES(is_float, size == Neon32);
enum UnaryOp { VABS, VABSF, VNEG, VNEGF };
static Instr EncodeNeonUnaryOp(UnaryOp op, NeonSize size, QwNeonRegister dst,
QwNeonRegister src) {
int op_encoding = 0;
switch (op) {
case VABS:
op_encoding = 0x6 * B7;
break;
case VABSF:
DCHECK_EQ(Neon32, size);
op_encoding = 0x6 * B7 | B10;
break;
case VNEG:
op_encoding = 0x7 * B7;
break;
case VNEGF:
DCHECK_EQ(Neon32, size);
op_encoding = 0x7 * B7 | B10;
break;
default:
UNREACHABLE();
break;
}
int vd, d;
dst.split_code(&vd, &d);
int vm, m;
src.split_code(&vm, &m);
int F = is_float ? 1 : 0;
return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | B16 | vd * B12 |
F * B10 | B8 | op * B7 | B6 | m * B5 | vm;
return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | B16 | vd * B12 | B6 |
m * B5 | vm | op_encoding;
}
void Assembler::vabs(const QwNeonRegister dst, const QwNeonRegister src) {
// Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value.
// Instruction details available in ARM DDI 0406C.b, A8.8.824.
DCHECK(IsEnabled(NEON));
emit(EncodeNeonUnaryOp(0x6, true, Neon32, dst, src));
emit(EncodeNeonUnaryOp(VABSF, Neon32, dst, src));
}
void Assembler::vabs(NeonSize size, const QwNeonRegister dst,
......@@ -4149,14 +4167,14 @@ void Assembler::vabs(NeonSize size, const QwNeonRegister dst,
// Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value.
// Instruction details available in ARM DDI 0406C.b, A8.8.824.
DCHECK(IsEnabled(NEON));
emit(EncodeNeonUnaryOp(0x6, false, size, dst, src));
emit(EncodeNeonUnaryOp(VABS, size, dst, src));
}
void Assembler::vneg(const QwNeonRegister dst, const QwNeonRegister src) {
// Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate.
// Instruction details available in ARM DDI 0406C.b, A8.8.968.
DCHECK(IsEnabled(NEON));
emit(EncodeNeonUnaryOp(0x7, true, Neon32, dst, src));
emit(EncodeNeonUnaryOp(VNEGF, Neon32, dst, src));
}
void Assembler::vneg(NeonSize size, const QwNeonRegister dst,
......@@ -4164,7 +4182,7 @@ void Assembler::vneg(NeonSize size, const QwNeonRegister dst,
// Qd = vabs.s<size>(Qn, Qm) SIMD integer negate.
// Instruction details available in ARM DDI 0406C.b, A8.8.968.
DCHECK(IsEnabled(NEON));
emit(EncodeNeonUnaryOp(0x7, false, size, dst, src));
emit(EncodeNeonUnaryOp(VNEG, size, dst, src));
}
void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
......@@ -4184,10 +4202,9 @@ void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN };
static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op,
const QwNeonRegister dst,
const QwNeonRegister src1,
const QwNeonRegister src2) {
static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, QwNeonRegister dst,
QwNeonRegister src1,
QwNeonRegister src2) {
int op_encoding = 0;
switch (op) {
case VBIC:
......@@ -4336,9 +4353,8 @@ enum IntegerBinOp {
};
static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
const QwNeonRegister dst,
const QwNeonRegister src1,
const QwNeonRegister src2) {
QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
int op_encoding = 0;
switch (op) {
case VADD:
......@@ -4390,10 +4406,8 @@ static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
n * B7 | B6 | m * B5 | vm | op_encoding;
}
static Instr EncodeNeonBinOp(IntegerBinOp op, NeonSize size,
const QwNeonRegister dst,
const QwNeonRegister src1,
const QwNeonRegister src2) {
static Instr EncodeNeonBinOp(IntegerBinOp op, NeonSize size, QwNeonRegister dst,
QwNeonRegister src1, QwNeonRegister src2) {
// Map NeonSize values to the signed values in NeonDataType, so the U bit
// will be 0.
return EncodeNeonBinOp(op, static_cast<NeonDataType>(size), dst, src1, src2);
......@@ -4578,6 +4592,51 @@ void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1,
emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2));
}
enum PairwiseOp { VPMIN, VPMAX };
static Instr EncodeNeonPairwiseOp(PairwiseOp op, NeonDataType dt,
DwVfpRegister dst, DwVfpRegister src1,
DwVfpRegister src2) {
int op_encoding = 0;
switch (op) {
case VPMIN:
op_encoding = 0xA * B8 | B4;
break;
case VPMAX:
op_encoding = 0xA * B8;
break;
default:
UNREACHABLE();
break;
}
int vd, d;
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
int size = NeonSz(dt);
int u = NeonU(dt);
return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 |
n * B7 | m * B5 | vm | op_encoding;
}
void Assembler::vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
DwVfpRegister src2) {
DCHECK(IsEnabled(NEON));
// Dd = vpmin(Dn, Dm) SIMD integer pairwise MIN.
// Instruction details available in ARM DDI 0406C.b, A8-986.
emit(EncodeNeonPairwiseOp(VPMIN, dt, dst, src1, src2));
}
void Assembler::vpmax(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
DwVfpRegister src2) {
DCHECK(IsEnabled(NEON));
// Dd = vpmax(Dn, Dm) SIMD integer pairwise MAX.
// Instruction details available in ARM DDI 0406C.b, A8-986.
emit(EncodeNeonPairwiseOp(VPMAX, dt, dst, src1, src2));
}
void Assembler::vtst(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
......
......@@ -418,7 +418,8 @@ const QwNeonRegister q15 = { 15 };
// compilation unit that includes this header doesn't use the variables.
#define kFirstCalleeSavedDoubleReg d8
#define kLastCalleeSavedDoubleReg d15
// kDoubleRegZero and kScratchDoubleReg must pair to form kScratchQuadReg.
// kDoubleRegZero and kScratchDoubleReg must pair to form kScratchQuadReg. SIMD
// code depends on kDoubleRegZero before kScratchDoubleReg.
#define kDoubleRegZero d14
#define kScratchDoubleReg d15
// After using kScratchQuadReg, kDoubleRegZero must be reset to 0.
......@@ -1359,6 +1360,10 @@ class Assembler : public AssemblerBase {
void vmax(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vmax(NeonDataType dt, QwNeonRegister dst,
QwNeonRegister src1, QwNeonRegister src2);
void vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
DwVfpRegister src2);
void vpmax(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
DwVfpRegister src2);
void vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, int shift);
void vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, int shift);
// vrecpe and vrsqrte only support floating point lanes.
......
......@@ -1867,10 +1867,10 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
Vm = instr->VFPMRegValue(kSimd128Precision);
Vn = instr->VFPNRegValue(kSimd128Precision);
}
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
switch (instr->Bits(11, 8)) {
case 0x0: {
if (instr->Bit(4) == 1) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vqadd.s<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
......@@ -1904,7 +1904,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
}
case 0x2: {
if (instr->Bit(4) == 1) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vqsub.s<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
......@@ -1915,7 +1914,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
break;
}
case 0x3: {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt";
// vcge/vcgt.s<size> Qd, Qm, Qn.
out_buffer_pos_ +=
......@@ -1924,7 +1922,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
break;
}
case 0x6: {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vmin/vmax.s<size> Qd, Qm, Qn.
const char* op = instr->Bit(4) == 1 ? "vmin" : "vmax";
out_buffer_pos_ +=
......@@ -1934,7 +1931,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
}
case 0x8: {
const char* op = (instr->Bit(4) == 0) ? "vadd" : "vtst";
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vadd/vtst.i<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.i%d q%d, q%d, q%d",
......@@ -1943,7 +1939,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
}
case 0x9: {
if (instr->Bit(6) == 1 && instr->Bit(4) == 1) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vmul.i<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
......@@ -1953,6 +1948,14 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
}
break;
}
case 0xa: {
// vpmin/vpmax.s<size> Dd, Dm, Dn.
const char* op = instr->Bit(4) == 1 ? "vpmin" : "vpmax";
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.s%d d%d, d%d, d%d",
op, size, Vd, Vn, Vm);
break;
}
case 0xd: {
if (instr->Bit(4) == 0) {
const char* op = (instr->Bits(21, 20) == 0) ? "vadd" : "vsub";
......@@ -2052,10 +2055,10 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
Vm = instr->VFPMRegValue(kSimd128Precision);
Vn = instr->VFPNRegValue(kSimd128Precision);
}
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
switch (instr->Bits(11, 8)) {
case 0x0: {
if (instr->Bit(4) == 1) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vqadd.u<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
......@@ -2087,7 +2090,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
}
case 0x2: {
if (instr->Bit(4) == 1) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vqsub.u<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
......@@ -2098,7 +2100,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
break;
}
case 0x3: {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt";
// vcge/vcgt.u<size> Qd, Qm, Qn.
out_buffer_pos_ +=
......@@ -2107,7 +2108,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
break;
}
case 0x6: {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vmin/vmax.u<size> Qd, Qm, Qn.
const char* op = instr->Bit(4) == 1 ? "vmin" : "vmax";
out_buffer_pos_ +=
......@@ -2116,7 +2116,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
break;
}
case 0x8: {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
if (instr->Bit(4) == 0) {
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
......@@ -2128,6 +2127,14 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
}
break;
}
case 0xa: {
// vpmin/vpmax.u<size> Dd, Dm, Dn.
const char* op = instr->Bit(4) == 1 ? "vpmin" : "vpmax";
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.u%d d%d, d%d, d%d",
op, size, Vd, Vn, Vm);
break;
}
case 0xd: {
if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
// vmul.f32 Qd, Qn, Qm
......
......@@ -1162,6 +1162,15 @@ void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src,
vmov(dt, dst, double_source, double_lane);
}
void MacroAssembler::ExtractLane(Register dst, DwVfpRegister src,
NeonDataType dt, int lane) {
int size = NeonSz(dt); // 0, 1, 2
int byte = lane << size;
int double_byte = byte & (kDoubleSize - 1);
int double_lane = double_byte >> size;
vmov(dt, dst, src, double_lane);
}
void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src,
Register scratch, int lane) {
int s_code = src.code() * 4 + lane;
......
......@@ -563,6 +563,7 @@ class MacroAssembler: public Assembler {
void VmovExtended(const MemOperand& dst, int src_code, Register scratch);
void ExtractLane(Register dst, QwNeonRegister src, NeonDataType dt, int lane);
void ExtractLane(Register dst, DwVfpRegister src, NeonDataType dt, int lane);
void ExtractLane(SwVfpRegister dst, QwNeonRegister src, Register scratch,
int lane);
void ReplaceLane(QwNeonRegister dst, QwNeonRegister src, Register src_lane,
......
......@@ -900,6 +900,18 @@ void Simulator::set_d_register(int dreg, const uint32_t* value) {
memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2);
}
template <typename T>
void Simulator::get_d_register(int dreg, T* value) {
DCHECK((dreg >= 0) && (dreg < num_d_registers));
memcpy(value, vfp_registers_ + dreg * 2, kDoubleSize);
}
template <typename T>
void Simulator::set_d_register(int dreg, const T* value) {
DCHECK((dreg >= 0) && (dreg < num_d_registers));
memcpy(vfp_registers_ + dreg * 2, value, kDoubleSize);
}
template <typename T>
void Simulator::get_q_register(int qreg, T* value) {
DCHECK((qreg >= 0) && (qreg < num_q_registers));
......@@ -912,7 +924,6 @@ void Simulator::set_q_register(int qreg, const T* value) {
memcpy(vfp_registers_ + qreg * 4, value, kSimd128Size);
}
// Raw access to the PC register.
void Simulator::set_pc(int32_t value) {
pc_modified_ = true;
......@@ -4006,6 +4017,11 @@ T Clamp(int64_t value) {
return static_cast<T>(clamped);
}
template <typename T>
T MinMax(T a, T b, bool is_min) {
return is_min ? std::min(a, b) : std::max(a, b);
}
template <typename T>
void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
static const int kLanes = 16 / sizeof(T);
......@@ -4180,10 +4196,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
src1[i] = MinMax(src1[i], src2[i], min);
}
set_q_register(Vd, src1);
break;
......@@ -4193,10 +4206,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
src1[i] = MinMax(src1[i], src2[i], min);
}
set_q_register(Vd, src1);
break;
......@@ -4206,10 +4216,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
src1[i] = MinMax(src1[i], src2[i], min);
}
set_q_register(Vd, src1);
break;
......@@ -4344,6 +4351,48 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
}
break;
}
case 0xa: {
// vpmin/vpmax.s<size> Dd, Dm, Dn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
bool min = instr->Bit(4) != 0;
switch (size) {
case Neon8: {
int8_t dst[8], src1[8], src2[8];
get_d_register(Vn, src1);
get_d_register(Vm, src2);
for (int i = 0; i < 4; i++) {
dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
}
set_d_register(Vd, dst);
break;
}
case Neon16: {
int16_t dst[4], src1[4], src2[4];
get_d_register(Vn, src1);
get_d_register(Vm, src2);
for (int i = 0; i < 2; i++) {
dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
}
set_d_register(Vd, dst);
break;
}
case Neon32: {
int32_t dst[2], src1[2], src2[2];
get_d_register(Vn, src1);
get_d_register(Vm, src2);
dst[0] = MinMax(src1[0], src1[1], min);
dst[1] = MinMax(src2[0], src2[1], min);
set_d_register(Vd, dst);
break;
}
default:
UNREACHABLE();
break;
}
break;
}
case 0xd: {
if (instr->Bit(4) == 0) {
float src1[4], src2[4];
......@@ -4398,16 +4447,10 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
}
}
} else {
if (instr->Bit(21) == 1) {
// vmin.f32 Qd, Qm, Qn.
for (int i = 0; i < 4; i++) {
src1[i] = std::min(src1[i], src2[i]);
}
} else {
// vmax.f32 Qd, Qm, Qn.
for (int i = 0; i < 4; i++) {
src1[i] = std::max(src1[i], src2[i]);
}
// vmin/vmax.f32 Qd, Qm, Qn.
bool min = instr->Bit(21) == 1;
for (int i = 0; i < 4; i++) {
src1[i] = MinMax(src1[i], src2[i], min);
}
}
set_q_register(Vd, src1);
......@@ -4693,10 +4736,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
src1[i] = MinMax(src1[i], src2[i], min);
}
set_q_register(Vd, src1);
break;
......@@ -4706,10 +4746,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
src1[i] = MinMax(src1[i], src2[i], min);
}
set_q_register(Vd, src1);
break;
......@@ -4719,10 +4756,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
src1[i] = MinMax(src1[i], src2[i], min);
}
set_q_register(Vd, src1);
break;
......@@ -4813,6 +4847,48 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
}
break;
}
case 0xa: {
// vpmin/vpmax.u<size> Dd, Dm, Dn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
bool min = instr->Bit(4) != 0;
switch (size) {
case Neon8: {
uint8_t dst[8], src1[8], src2[8];
get_d_register(Vn, src1);
get_d_register(Vm, src2);
for (int i = 0; i < 4; i++) {
dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
}
set_d_register(Vd, dst);
break;
}
case Neon16: {
uint16_t dst[4], src1[4], src2[4];
get_d_register(Vn, src1);
get_d_register(Vm, src2);
for (int i = 0; i < 2; i++) {
dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
}
set_d_register(Vd, dst);
break;
}
case Neon32: {
uint32_t dst[2], src1[2], src2[2];
get_d_register(Vn, src1);
get_d_register(Vm, src2);
dst[0] = MinMax(src1[0], src1[1], min);
dst[1] = MinMax(src2[0], src2[1], min);
set_d_register(Vd, dst);
break;
}
default:
UNREACHABLE();
break;
}
break;
}
case 0xd: {
if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
// vmul.f32 Qd, Qn, Qm
......
......@@ -155,6 +155,10 @@ class Simulator {
void set_d_register(int dreg, const uint32_t* value);
// Support for NEON.
template <typename T>
void get_d_register(int dreg, T* value);
template <typename T>
void set_d_register(int dreg, const T* value);
template <typename T>
void get_q_register(int qreg, T* value);
template <typename T>
void set_q_register(int qreg, const T* value);
......
......@@ -1636,8 +1636,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmInt32x4GreaterThanOrEqual: {
Simd128Register dst = i.OutputSimd128Register();
__ vcge(NeonS32, dst, i.InputSimd128Register(0),
__ vcge(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
......@@ -1662,8 +1661,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmUint32x4GreaterThanOrEqual: {
Simd128Register dst = i.OutputSimd128Register();
__ vcge(NeonU32, dst, i.InputSimd128Register(0),
__ vcge(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
......@@ -1748,8 +1746,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmInt16x8GreaterThanOrEqual: {
Simd128Register dst = i.OutputSimd128Register();
__ vcge(NeonS16, dst, i.InputSimd128Register(0),
__ vcge(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
......@@ -1784,8 +1781,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmUint16x8GreaterThanOrEqual: {
Simd128Register dst = i.OutputSimd128Register();
__ vcge(NeonU16, dst, i.InputSimd128Register(0),
__ vcge(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
......@@ -1869,8 +1865,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmInt8x16GreaterThanOrEqual: {
Simd128Register dst = i.OutputSimd128Register();
__ vcge(NeonS8, dst, i.InputSimd128Register(0),
__ vcge(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
......@@ -1905,8 +1900,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmUint8x16GreaterThanOrEqual: {
Simd128Register dst = i.OutputSimd128Register();
__ vcge(NeonU8, dst, i.InputSimd128Register(0),
__ vcge(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
......@@ -1934,15 +1928,69 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmSimd32x4Select:
case kArmSimd16x8Select:
case kArmSimd8x16Select: {
case kArmSimd128Select: {
// vbsl clobbers the mask input so make sure it was DefineSameAsFirst.
DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0)));
__ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1),
i.InputSimd128Register(2));
break;
}
case kArmSimd1x4AnyTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
__ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high());
__ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg,
kScratchDoubleReg);
__ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0);
break;
}
case kArmSimd1x4AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
__ vpmin(NeonU32, kScratchDoubleReg, src.low(), src.high());
__ vpmin(NeonU32, kScratchDoubleReg, kScratchDoubleReg,
kScratchDoubleReg);
__ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0);
break;
}
case kArmSimd1x8AnyTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
__ vpmax(NeonU16, kScratchDoubleReg, src.low(), src.high());
__ vpmax(NeonU16, kScratchDoubleReg, kScratchDoubleReg,
kScratchDoubleReg);
__ vpmax(NeonU16, kScratchDoubleReg, kScratchDoubleReg,
kScratchDoubleReg);
__ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS16, 0);
break;
}
case kArmSimd1x8AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
__ vpmin(NeonU16, kScratchDoubleReg, src.low(), src.high());
__ vpmin(NeonU16, kScratchDoubleReg, kScratchDoubleReg,
kScratchDoubleReg);
__ vpmin(NeonU16, kScratchDoubleReg, kScratchDoubleReg,
kScratchDoubleReg);
__ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS16, 0);
break;
}
case kArmSimd1x16AnyTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
__ vpmax(NeonU8, kScratchDoubleReg, src.low(), src.high());
__ vpmax(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
// vtst to detect any bits in the bottom 32 bits of kScratchDoubleReg.
// This saves an instruction vs. the naive sequence of vpmax.
// kDoubleRegZero is not changed, since it is 0.
__ vtst(Neon32, kScratchQuadReg, kScratchQuadReg, kScratchQuadReg);
__ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0);
break;
}
case kArmSimd1x16AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
__ vpmin(NeonU8, kScratchDoubleReg, src.low(), src.high());
__ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS8, 0);
break;
}
case kCheckedLoadInt8:
ASSEMBLE_CHECKED_LOAD_INTEGER(ldrsb);
break;
......
......@@ -206,9 +206,13 @@ namespace compiler {
V(ArmSimd128Or) \
V(ArmSimd128Xor) \
V(ArmSimd128Not) \
V(ArmSimd32x4Select) \
V(ArmSimd16x8Select) \
V(ArmSimd8x16Select)
V(ArmSimd128Select) \
V(ArmSimd1x4AnyTrue) \
V(ArmSimd1x4AllTrue) \
V(ArmSimd1x8AnyTrue) \
V(ArmSimd1x8AllTrue) \
V(ArmSimd1x16AnyTrue) \
V(ArmSimd1x16AllTrue)
// Addressing modes represent the "shape" of inputs to an instruction.
// Many instructions support multiple addressing modes. Addressing modes
......
......@@ -194,9 +194,13 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmSimd128Or:
case kArmSimd128Xor:
case kArmSimd128Not:
case kArmSimd32x4Select:
case kArmSimd16x8Select:
case kArmSimd8x16Select:
case kArmSimd128Select:
case kArmSimd1x4AnyTrue:
case kArmSimd1x4AllTrue:
case kArmSimd1x8AnyTrue:
case kArmSimd1x8AllTrue:
case kArmSimd1x16AnyTrue:
case kArmSimd1x16AllTrue:
return kNoOpcodeFlags;
case kArmVldrF32:
......
......@@ -2198,73 +2198,91 @@ void InstructionSelector::VisitAtomicStore(Node* node) {
V(Simd1x8Zero) \
V(Simd1x16Zero)
#define SIMD_UNOP_LIST(V) \
V(Float32x4FromInt32x4) \
V(Float32x4FromUint32x4) \
V(Float32x4Abs) \
V(Float32x4Neg) \
V(Int32x4FromFloat32x4) \
V(Uint32x4FromFloat32x4) \
V(Int32x4Neg) \
V(Int16x8Neg) \
V(Int8x16Neg) \
V(Simd128Not)
#define SIMD_BINOP_LIST(V) \
V(Float32x4Add) \
V(Float32x4Sub) \
V(Float32x4Equal) \
V(Float32x4NotEqual) \
V(Int32x4Add) \
V(Int32x4Sub) \
V(Int32x4Mul) \
V(Int32x4Min) \
V(Int32x4Max) \
V(Int32x4Equal) \
V(Int32x4NotEqual) \
V(Int32x4GreaterThan) \
V(Int32x4GreaterThanOrEqual) \
V(Uint32x4Min) \
V(Uint32x4Max) \
V(Uint32x4GreaterThan) \
V(Uint32x4GreaterThanOrEqual) \
V(Int16x8Add) \
V(Int16x8AddSaturate) \
V(Int16x8Sub) \
V(Int16x8SubSaturate) \
V(Int16x8Mul) \
V(Int16x8Min) \
V(Int16x8Max) \
V(Int16x8Equal) \
V(Int16x8NotEqual) \
V(Int16x8GreaterThan) \
V(Int16x8GreaterThanOrEqual) \
V(Uint16x8AddSaturate) \
V(Uint16x8SubSaturate) \
V(Uint16x8Min) \
V(Uint16x8Max) \
V(Uint16x8GreaterThan) \
V(Uint16x8GreaterThanOrEqual) \
V(Int8x16Add) \
V(Int8x16AddSaturate) \
V(Int8x16Sub) \
V(Int8x16SubSaturate) \
V(Int8x16Mul) \
V(Int8x16Min) \
V(Int8x16Max) \
V(Int8x16Equal) \
V(Int8x16NotEqual) \
V(Int8x16GreaterThan) \
V(Int8x16GreaterThanOrEqual) \
V(Uint8x16AddSaturate) \
V(Uint8x16SubSaturate) \
V(Uint8x16Min) \
V(Uint8x16Max) \
V(Uint8x16GreaterThan) \
V(Uint8x16GreaterThanOrEqual) \
V(Simd128And) \
V(Simd128Or) \
V(Simd128Xor)
#define SIMD_UNOP_LIST(V) \
V(Float32x4FromInt32x4, kArmFloat32x4FromInt32x4) \
V(Float32x4FromUint32x4, kArmFloat32x4FromUint32x4) \
V(Float32x4Abs, kArmFloat32x4Abs) \
V(Float32x4Neg, kArmFloat32x4Neg) \
V(Int32x4FromFloat32x4, kArmInt32x4FromFloat32x4) \
V(Uint32x4FromFloat32x4, kArmUint32x4FromFloat32x4) \
V(Int32x4Neg, kArmInt32x4Neg) \
V(Int16x8Neg, kArmInt16x8Neg) \
V(Int8x16Neg, kArmInt8x16Neg) \
V(Simd128Not, kArmSimd128Not) \
V(Simd1x4Not, kArmSimd128Not) \
V(Simd1x4AnyTrue, kArmSimd1x4AnyTrue) \
V(Simd1x4AllTrue, kArmSimd1x4AllTrue) \
V(Simd1x8Not, kArmSimd128Not) \
V(Simd1x8AnyTrue, kArmSimd1x8AnyTrue) \
V(Simd1x8AllTrue, kArmSimd1x8AllTrue) \
V(Simd1x16Not, kArmSimd128Not) \
V(Simd1x16AnyTrue, kArmSimd1x16AnyTrue) \
V(Simd1x16AllTrue, kArmSimd1x16AllTrue)
#define SIMD_BINOP_LIST(V) \
V(Float32x4Add, kArmFloat32x4Add) \
V(Float32x4Sub, kArmFloat32x4Sub) \
V(Float32x4Equal, kArmFloat32x4Equal) \
V(Float32x4NotEqual, kArmFloat32x4NotEqual) \
V(Int32x4Add, kArmInt32x4Add) \
V(Int32x4Sub, kArmInt32x4Sub) \
V(Int32x4Mul, kArmInt32x4Mul) \
V(Int32x4Min, kArmInt32x4Min) \
V(Int32x4Max, kArmInt32x4Max) \
V(Int32x4Equal, kArmInt32x4Equal) \
V(Int32x4NotEqual, kArmInt32x4NotEqual) \
V(Int32x4GreaterThan, kArmInt32x4GreaterThan) \
V(Int32x4GreaterThanOrEqual, kArmInt32x4GreaterThanOrEqual) \
V(Uint32x4Min, kArmUint32x4Min) \
V(Uint32x4Max, kArmUint32x4Max) \
V(Uint32x4GreaterThan, kArmUint32x4GreaterThan) \
V(Uint32x4GreaterThanOrEqual, kArmUint32x4GreaterThanOrEqual) \
V(Int16x8Add, kArmInt16x8Add) \
V(Int16x8AddSaturate, kArmInt16x8AddSaturate) \
V(Int16x8Sub, kArmInt16x8Sub) \
V(Int16x8SubSaturate, kArmInt16x8SubSaturate) \
V(Int16x8Mul, kArmInt16x8Mul) \
V(Int16x8Min, kArmInt16x8Min) \
V(Int16x8Max, kArmInt16x8Max) \
V(Int16x8Equal, kArmInt16x8Equal) \
V(Int16x8NotEqual, kArmInt16x8NotEqual) \
V(Int16x8GreaterThan, kArmInt16x8GreaterThan) \
V(Int16x8GreaterThanOrEqual, kArmInt16x8GreaterThanOrEqual) \
V(Uint16x8AddSaturate, kArmUint16x8AddSaturate) \
V(Uint16x8SubSaturate, kArmUint16x8SubSaturate) \
V(Uint16x8Min, kArmUint16x8Min) \
V(Uint16x8Max, kArmUint16x8Max) \
V(Uint16x8GreaterThan, kArmUint16x8GreaterThan) \
V(Uint16x8GreaterThanOrEqual, kArmUint16x8GreaterThanOrEqual) \
V(Int8x16Add, kArmInt8x16Add) \
V(Int8x16AddSaturate, kArmInt8x16AddSaturate) \
V(Int8x16Sub, kArmInt8x16Sub) \
V(Int8x16SubSaturate, kArmInt8x16SubSaturate) \
V(Int8x16Mul, kArmInt8x16Mul) \
V(Int8x16Min, kArmInt8x16Min) \
V(Int8x16Max, kArmInt8x16Max) \
V(Int8x16Equal, kArmInt8x16Equal) \
V(Int8x16NotEqual, kArmInt8x16NotEqual) \
V(Int8x16GreaterThan, kArmInt8x16GreaterThan) \
V(Int8x16GreaterThanOrEqual, kArmInt8x16GreaterThanOrEqual) \
V(Uint8x16AddSaturate, kArmUint8x16AddSaturate) \
V(Uint8x16SubSaturate, kArmUint8x16SubSaturate) \
V(Uint8x16Min, kArmUint8x16Min) \
V(Uint8x16Max, kArmUint8x16Max) \
V(Uint8x16GreaterThan, kArmUint8x16GreaterThan) \
V(Uint8x16GreaterThanOrEqual, kArmUint8x16GreaterThanOrEqual) \
V(Simd128And, kArmSimd128And) \
V(Simd128Or, kArmSimd128Or) \
V(Simd128Xor, kArmSimd128Xor) \
V(Simd1x4And, kArmSimd128And) \
V(Simd1x4Or, kArmSimd128Or) \
V(Simd1x4Xor, kArmSimd128Xor) \
V(Simd1x8And, kArmSimd128And) \
V(Simd1x8Or, kArmSimd128Or) \
V(Simd1x8Xor, kArmSimd128Xor) \
V(Simd1x16And, kArmSimd128And) \
V(Simd1x16Or, kArmSimd128Or) \
V(Simd1x16Xor, kArmSimd128Xor)
#define SIMD_SHIFT_OP_LIST(V) \
V(Int32x4ShiftLeftByScalar) \
......@@ -2306,16 +2324,16 @@ SIMD_TYPE_LIST(SIMD_VISIT_REPLACE_LANE)
SIMD_ZERO_OP_LIST(SIMD_VISIT_ZERO_OP)
#undef SIMD_VISIT_ZERO_OP
#define SIMD_VISIT_UNOP(Name) \
#define SIMD_VISIT_UNOP(Name, instruction) \
void InstructionSelector::Visit##Name(Node* node) { \
VisitRR(this, kArm##Name, node); \
VisitRR(this, instruction, node); \
}
SIMD_UNOP_LIST(SIMD_VISIT_UNOP)
#undef SIMD_VISIT_UNOP
#define SIMD_VISIT_BINOP(Name) \
#define SIMD_VISIT_BINOP(Name, instruction) \
void InstructionSelector::Visit##Name(Node* node) { \
VisitRRR(this, kArm##Name, node); \
VisitRRR(this, instruction, node); \
}
SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
#undef SIMD_VISIT_BINOP
......@@ -2329,7 +2347,7 @@ SIMD_SHIFT_OP_LIST(SIMD_VISIT_SHIFT_OP)
#define SIMD_VISIT_SELECT_OP(format) \
void InstructionSelector::VisitSimd##format##Select(Node* node) { \
VisitRRRR(this, kArmSimd##format##Select, node); \
VisitRRRR(this, kArmSimd128Select, node); \
}
SIMD_FORMAT_LIST(SIMD_VISIT_SELECT_OP)
#undef SIMD_VISIT_SELECT_OP
......
......@@ -1637,10 +1637,46 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitSimd8x16Select(node);
case IrOpcode::kSimd1x4Zero:
return MarkAsSimd1x4(node), VisitSimd1x4Zero(node);
case IrOpcode::kSimd1x4And:
return MarkAsSimd1x4(node), VisitSimd1x4And(node);
case IrOpcode::kSimd1x4Or:
return MarkAsSimd1x4(node), VisitSimd1x4Or(node);
case IrOpcode::kSimd1x4Xor:
return MarkAsSimd1x4(node), VisitSimd1x4Xor(node);
case IrOpcode::kSimd1x4Not:
return MarkAsSimd1x4(node), VisitSimd1x4Not(node);
case IrOpcode::kSimd1x4AnyTrue:
return MarkAsWord32(node), VisitSimd1x4AnyTrue(node);
case IrOpcode::kSimd1x4AllTrue:
return MarkAsWord32(node), VisitSimd1x4AllTrue(node);
case IrOpcode::kSimd1x8Zero:
return MarkAsSimd1x8(node), VisitSimd1x8Zero(node);
case IrOpcode::kSimd1x8And:
return MarkAsSimd1x8(node), VisitSimd1x8And(node);
case IrOpcode::kSimd1x8Or:
return MarkAsSimd1x8(node), VisitSimd1x8Or(node);
case IrOpcode::kSimd1x8Xor:
return MarkAsSimd1x8(node), VisitSimd1x8Xor(node);
case IrOpcode::kSimd1x8Not:
return MarkAsSimd1x8(node), VisitSimd1x8Not(node);
case IrOpcode::kSimd1x8AnyTrue:
return MarkAsWord32(node), VisitSimd1x8AnyTrue(node);
case IrOpcode::kSimd1x8AllTrue:
return MarkAsWord32(node), VisitSimd1x8AllTrue(node);
case IrOpcode::kSimd1x16Zero:
return MarkAsSimd1x16(node), VisitSimd1x16Zero(node);
case IrOpcode::kSimd1x16And:
return MarkAsSimd1x16(node), VisitSimd1x16And(node);
case IrOpcode::kSimd1x16Or:
return MarkAsSimd1x16(node), VisitSimd1x16Or(node);
case IrOpcode::kSimd1x16Xor:
return MarkAsSimd1x16(node), VisitSimd1x16Xor(node);
case IrOpcode::kSimd1x16Not:
return MarkAsSimd1x16(node), VisitSimd1x16Not(node);
case IrOpcode::kSimd1x16AnyTrue:
return MarkAsWord32(node), VisitSimd1x16AnyTrue(node);
case IrOpcode::kSimd1x16AllTrue:
return MarkAsWord32(node), VisitSimd1x16AllTrue(node);
default:
V8_Fatal(__FILE__, __LINE__, "Unexpected operator #%d:%s @ node #%d",
node->opcode(), node->op()->mnemonic(), node->id());
......@@ -2254,6 +2290,42 @@ void InstructionSelector::VisitSimd32x4Select(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd16x8Select(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd8x16Select(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x4And(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x4Or(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x4Xor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x4Not(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x4AnyTrue(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x4AllTrue(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x8And(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x8Or(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x8Xor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x8Not(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x8AnyTrue(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x8AllTrue(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x16And(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x16Or(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x16Xor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x16Not(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x16AnyTrue(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x16AllTrue(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
......@@ -263,12 +263,6 @@ MachineRepresentation AtomicStoreRepresentationOf(Operator const* op) {
V(Uint32x4GreaterThan, Operator::kNoProperties, 2, 0, 1) \
V(Uint32x4GreaterThanOrEqual, Operator::kNoProperties, 2, 0, 1) \
V(Uint32x4FromFloat32x4, Operator::kNoProperties, 1, 0, 1) \
V(Bool32x4And, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Bool32x4Or, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Bool32x4Xor, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Bool32x4Not, Operator::kNoProperties, 1, 0, 1) \
V(Bool32x4AnyTrue, Operator::kNoProperties, 1, 0, 1) \
V(Bool32x4AllTrue, Operator::kNoProperties, 1, 0, 1) \
V(Int16x8Splat, Operator::kNoProperties, 1, 0, 1) \
V(Int16x8Neg, Operator::kNoProperties, 1, 0, 1) \
V(Int16x8Add, Operator::kCommutative, 2, 0, 1) \
......@@ -292,12 +286,6 @@ MachineRepresentation AtomicStoreRepresentationOf(Operator const* op) {
V(Uint16x8LessThanOrEqual, Operator::kNoProperties, 2, 0, 1) \
V(Uint16x8GreaterThan, Operator::kNoProperties, 2, 0, 1) \
V(Uint16x8GreaterThanOrEqual, Operator::kNoProperties, 2, 0, 1) \
V(Bool16x8And, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Bool16x8Or, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Bool16x8Xor, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Bool16x8Not, Operator::kNoProperties, 1, 0, 1) \
V(Bool16x8AnyTrue, Operator::kNoProperties, 1, 0, 1) \
V(Bool16x8AllTrue, Operator::kNoProperties, 1, 0, 1) \
V(Int8x16Splat, Operator::kNoProperties, 1, 0, 1) \
V(Int8x16Neg, Operator::kNoProperties, 1, 0, 1) \
V(Int8x16Add, Operator::kCommutative, 2, 0, 1) \
......@@ -321,12 +309,6 @@ MachineRepresentation AtomicStoreRepresentationOf(Operator const* op) {
V(Uint8x16LessThanOrEqual, Operator::kNoProperties, 2, 0, 1) \
V(Uint8x16GreaterThan, Operator::kNoProperties, 2, 0, 1) \
V(Uint8x16GreaterThanOrEqual, Operator::kNoProperties, 2, 0, 1) \
V(Bool8x16And, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Bool8x16Or, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Bool8x16Xor, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Bool8x16Not, Operator::kNoProperties, 1, 0, 1) \
V(Bool8x16AnyTrue, Operator::kNoProperties, 1, 0, 1) \
V(Bool8x16AllTrue, Operator::kNoProperties, 1, 0, 1) \
V(Simd128Load, Operator::kNoProperties, 2, 0, 1) \
V(Simd128Load1, Operator::kNoProperties, 2, 0, 1) \
V(Simd128Load2, Operator::kNoProperties, 2, 0, 1) \
......@@ -344,8 +326,26 @@ MachineRepresentation AtomicStoreRepresentationOf(Operator const* op) {
V(Simd16x8Select, Operator::kNoProperties, 3, 0, 1) \
V(Simd8x16Select, Operator::kNoProperties, 3, 0, 1) \
V(Simd1x4Zero, Operator::kNoProperties, 0, 0, 1) \
V(Simd1x4And, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Simd1x4Or, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Simd1x4Xor, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Simd1x4Not, Operator::kNoProperties, 1, 0, 1) \
V(Simd1x4AnyTrue, Operator::kNoProperties, 1, 0, 1) \
V(Simd1x4AllTrue, Operator::kNoProperties, 1, 0, 1) \
V(Simd1x8Zero, Operator::kNoProperties, 0, 0, 1) \
V(Simd1x16Zero, Operator::kNoProperties, 0, 0, 1)
V(Simd1x8And, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Simd1x8Or, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Simd1x8Xor, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Simd1x8Not, Operator::kNoProperties, 1, 0, 1) \
V(Simd1x8AnyTrue, Operator::kNoProperties, 1, 0, 1) \
V(Simd1x8AllTrue, Operator::kNoProperties, 1, 0, 1) \
V(Simd1x16Zero, Operator::kNoProperties, 0, 0, 1) \
V(Simd1x16And, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Simd1x16Or, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Simd1x16Xor, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Simd1x16Not, Operator::kNoProperties, 1, 0, 1) \
V(Simd1x16AnyTrue, Operator::kNoProperties, 1, 0, 1) \
V(Simd1x16AllTrue, Operator::kNoProperties, 1, 0, 1)
#define PURE_OPTIONAL_OP_LIST(V) \
V(Word32Ctz, Operator::kNoProperties, 1, 0, 1) \
......
......@@ -478,13 +478,6 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* Uint32x4GreaterThanOrEqual();
const Operator* Uint32x4FromFloat32x4();
const Operator* Bool32x4And();
const Operator* Bool32x4Or();
const Operator* Bool32x4Xor();
const Operator* Bool32x4Not();
const Operator* Bool32x4AnyTrue();
const Operator* Bool32x4AllTrue();
const Operator* Int16x8Splat();
const Operator* Int16x8ExtractLane(int32_t);
const Operator* Int16x8ReplaceLane(int32_t);
......@@ -515,13 +508,6 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* Uint16x8GreaterThan();
const Operator* Uint16x8GreaterThanOrEqual();
const Operator* Bool16x8And();
const Operator* Bool16x8Or();
const Operator* Bool16x8Xor();
const Operator* Bool16x8Not();
const Operator* Bool16x8AnyTrue();
const Operator* Bool16x8AllTrue();
const Operator* Int8x16Splat();
const Operator* Int8x16ExtractLane(int32_t);
const Operator* Int8x16ReplaceLane(int32_t);
......@@ -552,13 +538,6 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* Uint8x16GreaterThan();
const Operator* Uint8x16GreaterThanOrEqual();
const Operator* Bool8x16And();
const Operator* Bool8x16Or();
const Operator* Bool8x16Xor();
const Operator* Bool8x16Not();
const Operator* Bool8x16AnyTrue();
const Operator* Bool8x16AllTrue();
const Operator* Simd128Load();
const Operator* Simd128Load1();
const Operator* Simd128Load2();
......@@ -585,8 +564,28 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* Simd8x16Shuffle();
const Operator* Simd1x4Zero();
const Operator* Simd1x4And();
const Operator* Simd1x4Or();
const Operator* Simd1x4Xor();
const Operator* Simd1x4Not();
const Operator* Simd1x4AnyTrue();
const Operator* Simd1x4AllTrue();
const Operator* Simd1x8Zero();
const Operator* Simd1x8And();
const Operator* Simd1x8Or();
const Operator* Simd1x8Xor();
const Operator* Simd1x8Not();
const Operator* Simd1x8AnyTrue();
const Operator* Simd1x8AllTrue();
const Operator* Simd1x16Zero();
const Operator* Simd1x16And();
const Operator* Simd1x16Or();
const Operator* Simd1x16Xor();
const Operator* Simd1x16Not();
const Operator* Simd1x16AnyTrue();
const Operator* Simd1x16AllTrue();
// load [base + index]
const Operator* Load(LoadRepresentation rep);
......
This diff is collapsed.
......@@ -3611,6 +3611,17 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode,
return graph()->NewNode(
jsgraph()->machine()->Uint8x16GreaterThanOrEqual(), inputs[0],
inputs[1]);
case wasm::kExprS128And:
return graph()->NewNode(jsgraph()->machine()->Simd128And(), inputs[0],
inputs[1]);
case wasm::kExprS128Or:
return graph()->NewNode(jsgraph()->machine()->Simd128Or(), inputs[0],
inputs[1]);
case wasm::kExprS128Xor:
return graph()->NewNode(jsgraph()->machine()->Simd128Xor(), inputs[0],
inputs[1]);
case wasm::kExprS128Not:
return graph()->NewNode(jsgraph()->machine()->Simd128Not(), inputs[0]);
case wasm::kExprS32x4Select:
return graph()->NewNode(jsgraph()->machine()->Simd32x4Select(), inputs[0],
inputs[1], inputs[2]);
......@@ -3620,17 +3631,57 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode,
case wasm::kExprS8x16Select:
return graph()->NewNode(jsgraph()->machine()->Simd8x16Select(), inputs[0],
inputs[1], inputs[2]);
case wasm::kExprS128And:
return graph()->NewNode(jsgraph()->machine()->Simd128And(), inputs[0],
case wasm::kExprS1x4And:
return graph()->NewNode(jsgraph()->machine()->Simd1x4And(), inputs[0],
inputs[1]);
case wasm::kExprS128Or:
return graph()->NewNode(jsgraph()->machine()->Simd128Or(), inputs[0],
case wasm::kExprS1x4Or:
return graph()->NewNode(jsgraph()->machine()->Simd1x4Or(), inputs[0],
inputs[1]);
case wasm::kExprS128Xor:
return graph()->NewNode(jsgraph()->machine()->Simd128Xor(), inputs[0],
case wasm::kExprS1x4Xor:
return graph()->NewNode(jsgraph()->machine()->Simd1x4Xor(), inputs[0],
inputs[1]);
case wasm::kExprS128Not:
return graph()->NewNode(jsgraph()->machine()->Simd128Not(), inputs[0]);
case wasm::kExprS1x4Not:
return graph()->NewNode(jsgraph()->machine()->Simd1x4Not(), inputs[0]);
case wasm::kExprS1x4AnyTrue:
return graph()->NewNode(jsgraph()->machine()->Simd1x4AnyTrue(),
inputs[0]);
case wasm::kExprS1x4AllTrue:
return graph()->NewNode(jsgraph()->machine()->Simd1x4AllTrue(),
inputs[0]);
case wasm::kExprS1x8And:
return graph()->NewNode(jsgraph()->machine()->Simd1x8And(), inputs[0],
inputs[1]);
case wasm::kExprS1x8Or:
return graph()->NewNode(jsgraph()->machine()->Simd1x8Or(), inputs[0],
inputs[1]);
case wasm::kExprS1x8Xor:
return graph()->NewNode(jsgraph()->machine()->Simd1x8Xor(), inputs[0],
inputs[1]);
case wasm::kExprS1x8Not:
return graph()->NewNode(jsgraph()->machine()->Simd1x8Not(), inputs[0]);
case wasm::kExprS1x8AnyTrue:
return graph()->NewNode(jsgraph()->machine()->Simd1x8AnyTrue(),
inputs[0]);
case wasm::kExprS1x8AllTrue:
return graph()->NewNode(jsgraph()->machine()->Simd1x8AllTrue(),
inputs[0]);
case wasm::kExprS1x16And:
return graph()->NewNode(jsgraph()->machine()->Simd1x16And(), inputs[0],
inputs[1]);
case wasm::kExprS1x16Or:
return graph()->NewNode(jsgraph()->machine()->Simd1x16Or(), inputs[0],
inputs[1]);
case wasm::kExprS1x16Xor:
return graph()->NewNode(jsgraph()->machine()->Simd1x16Xor(), inputs[0],
inputs[1]);
case wasm::kExprS1x16Not:
return graph()->NewNode(jsgraph()->machine()->Simd1x16Not(), inputs[0]);
case wasm::kExprS1x16AnyTrue:
return graph()->NewNode(jsgraph()->machine()->Simd1x16AnyTrue(),
inputs[0]);
case wasm::kExprS1x16AllTrue:
return graph()->NewNode(jsgraph()->machine()->Simd1x16AllTrue(),
inputs[0]);
default:
return graph()->NewNode(UnsupportedOpcode(opcode), nullptr);
}
......
......@@ -20,14 +20,17 @@ typedef Signature<ValueType> FunctionSig;
#define CASE_I64_OP(name, str) CASE_OP(I64##name, "i64." str)
#define CASE_F32_OP(name, str) CASE_OP(F32##name, "f32." str)
#define CASE_F64_OP(name, str) CASE_OP(F64##name, "f64." str)
#define CASE_S128_OP(name, str) CASE_OP(S128##name, "s128." str)
#define CASE_F32x4_OP(name, str) CASE_OP(F32x4##name, "f32x4." str)
#define CASE_I32x4_OP(name, str) CASE_OP(I32x4##name, "i32x4." str)
#define CASE_I16x8_OP(name, str) CASE_OP(I16x8##name, "i16x8." str)
#define CASE_I8x16_OP(name, str) CASE_OP(I8x16##name, "i8x16." str)
#define CASE_S128_OP(name, str) CASE_OP(S128##name, "s128." str)
#define CASE_S32x4_OP(name, str) CASE_OP(S32x4##name, "s32x4." str)
#define CASE_S16x8_OP(name, str) CASE_OP(S16x8##name, "s16x8." str)
#define CASE_S8x16_OP(name, str) CASE_OP(S8x16##name, "s8x16." str)
#define CASE_S1x4_OP(name, str) CASE_OP(S1x4##name, "s1x4." str)
#define CASE_S1x8_OP(name, str) CASE_OP(S1x8##name, "s1x8." str)
#define CASE_S1x16_OP(name, str) CASE_OP(S1x16##name, "s1x16." str)
#define CASE_INT_OP(name, str) CASE_I32_OP(name, str) CASE_I64_OP(name, str)
#define CASE_FLOAT_OP(name, str) CASE_F32_OP(name, str) CASE_F64_OP(name, str)
#define CASE_ALL_OP(name, str) CASE_FLOAT_OP(name, str) CASE_INT_OP(name, str)
......@@ -202,9 +205,9 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_SIGN_OP(I8x16, AddSaturate, "add_saturate")
CASE_SIGN_OP(I16x8, SubSaturate, "sub_saturate")
CASE_SIGN_OP(I8x16, SubSaturate, "sub_saturate")
CASE_S128_OP(And, "and")
CASE_S128_OP(Or, "or")
CASE_S128_OP(Xor, "xor")
CASE_S128_OP(And, "and")
CASE_S128_OP(Not, "not")
CASE_S32x4_OP(Select, "select")
CASE_S32x4_OP(Swizzle, "swizzle")
......@@ -215,6 +218,24 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_S8x16_OP(Select, "select")
CASE_S8x16_OP(Swizzle, "swizzle")
CASE_S8x16_OP(Shuffle, "shuffle")
CASE_S1x4_OP(And, "and")
CASE_S1x4_OP(Or, "or")
CASE_S1x4_OP(Xor, "xor")
CASE_S1x4_OP(Not, "not")
CASE_S1x4_OP(AnyTrue, "any_true")
CASE_S1x4_OP(AllTrue, "all_true")
CASE_S1x8_OP(And, "and")
CASE_S1x8_OP(Or, "or")
CASE_S1x8_OP(Xor, "xor")
CASE_S1x8_OP(Not, "not")
CASE_S1x8_OP(AnyTrue, "any_true")
CASE_S1x8_OP(AllTrue, "all_true")
CASE_S1x16_OP(And, "and")
CASE_S1x16_OP(Or, "or")
CASE_S1x16_OP(Xor, "xor")
CASE_S1x16_OP(Not, "not")
CASE_S1x16_OP(AnyTrue, "any_true")
CASE_S1x16_OP(AllTrue, "all_true")
// Atomic operations.
CASE_L32_OP(AtomicAdd, "atomic_add")
......
......@@ -381,7 +381,25 @@ constexpr WasmCodePosition kNoCodePosition = -1;
V(S16x8Shuffle, 0xe54d, s_ss) \
V(S8x16Select, 0xe56a, s_s1x16ss) \
V(S8x16Swizzle, 0xe56b, s_s) \
V(S8x16Shuffle, 0xe56c, s_ss)
V(S8x16Shuffle, 0xe56c, s_ss) \
V(S1x4And, 0xe580, s1x4_s1x4s1x4) \
V(S1x4Or, 0xe581, s1x4_s1x4s1x4) \
V(S1x4Xor, 0xe582, s1x4_s1x4s1x4) \
V(S1x4Not, 0xe583, s1x4_s1x4) \
V(S1x4AnyTrue, 0xe584, i_s1x4) \
V(S1x4AllTrue, 0xe585, i_s1x4) \
V(S1x8And, 0xe586, s1x8_s1x8s1x8) \
V(S1x8Or, 0xe587, s1x8_s1x8s1x8) \
V(S1x8Xor, 0xe588, s1x8_s1x8s1x8) \
V(S1x8Not, 0xe589, s1x8_s1x8) \
V(S1x8AnyTrue, 0xe58a, i_s1x8) \
V(S1x8AllTrue, 0xe58b, i_s1x8) \
V(S1x16And, 0xe58c, s1x16_s1x16s1x16) \
V(S1x16Or, 0xe58d, s1x16_s1x16s1x16) \
V(S1x16Xor, 0xe58e, s1x16_s1x16s1x16) \
V(S1x16Not, 0xe58f, s1x16_s1x16) \
V(S1x16AnyTrue, 0xe590, i_s1x16) \
V(S1x16AllTrue, 0xe591, i_s1x16)
#define FOREACH_SIMD_1_OPERAND_OPCODE(V) \
V(F32x4ExtractLane, 0xe501, _) \
......@@ -483,19 +501,28 @@ constexpr WasmCodePosition kNoCodePosition = -1;
V(f_if, kWasmF32, kWasmI32, kWasmF32) \
V(l_il, kWasmI64, kWasmI32, kWasmI64)
#define FOREACH_SIMD_SIGNATURE(V) \
V(s_s, kWasmS128, kWasmS128) \
V(s_f, kWasmS128, kWasmF32) \
V(s_ss, kWasmS128, kWasmS128, kWasmS128) \
V(s1x4_ss, kWasmS1x4, kWasmS128, kWasmS128) \
V(s1x8_ss, kWasmS1x8, kWasmS128, kWasmS128) \
V(s1x16_ss, kWasmS1x16, kWasmS128, kWasmS128) \
V(s_i, kWasmS128, kWasmI32) \
V(s_si, kWasmS128, kWasmS128, kWasmI32) \
V(i_s, kWasmI32, kWasmS128) \
V(s_s1x4ss, kWasmS128, kWasmS1x4, kWasmS128, kWasmS128) \
V(s_s1x8ss, kWasmS128, kWasmS1x8, kWasmS128, kWasmS128) \
V(s_s1x16ss, kWasmS128, kWasmS1x16, kWasmS128, kWasmS128)
#define FOREACH_SIMD_SIGNATURE(V) \
V(s_s, kWasmS128, kWasmS128) \
V(s_f, kWasmS128, kWasmF32) \
V(s_ss, kWasmS128, kWasmS128, kWasmS128) \
V(s1x4_ss, kWasmS1x4, kWasmS128, kWasmS128) \
V(s1x8_ss, kWasmS1x8, kWasmS128, kWasmS128) \
V(s1x16_ss, kWasmS1x16, kWasmS128, kWasmS128) \
V(s_i, kWasmS128, kWasmI32) \
V(s_si, kWasmS128, kWasmS128, kWasmI32) \
V(i_s, kWasmI32, kWasmS128) \
V(i_s1x4, kWasmI32, kWasmS1x4) \
V(i_s1x8, kWasmI32, kWasmS1x8) \
V(i_s1x16, kWasmI32, kWasmS1x16) \
V(s_s1x4ss, kWasmS128, kWasmS1x4, kWasmS128, kWasmS128) \
V(s_s1x8ss, kWasmS128, kWasmS1x8, kWasmS128, kWasmS128) \
V(s_s1x16ss, kWasmS128, kWasmS1x16, kWasmS128, kWasmS128) \
V(s1x4_s1x4, kWasmS1x4, kWasmS1x4) \
V(s1x4_s1x4s1x4, kWasmS1x4, kWasmS1x4, kWasmS1x4) \
V(s1x8_s1x8, kWasmS1x8, kWasmS1x8) \
V(s1x8_s1x8s1x8, kWasmS1x8, kWasmS1x8, kWasmS1x8) \
V(s1x16_s1x16, kWasmS1x16, kWasmS1x16) \
V(s1x16_s1x16s1x16, kWasmS1x16, kWasmS1x16, kWasmS1x16)
#define FOREACH_PREFIX(V) \
V(Simd, 0xe5) \
......
......@@ -1227,6 +1227,10 @@ TEST(14) {
CHECK_EQ(ex, t.field[2]); \
CHECK_EQ(ex, t.field[3]);
#define CHECK_EQ_32X2(field, ex0, ex1) \
CHECK_EQ(ex0, t.field[0]); \
CHECK_EQ(ex1, t.field[1]);
#define CHECK_EQ_32X4(field, ex0, ex1, ex2, ex3) \
CHECK_EQ(ex0, t.field[0]); \
CHECK_EQ(ex1, t.field[1]); \
......@@ -1298,6 +1302,8 @@ TEST(15) {
float vdupf[4], vaddf[4], vsubf[4], vmulf[4];
uint32_t vmin_s8[4], vmin_u16[4], vmin_s32[4];
uint32_t vmax_s8[4], vmax_u16[4], vmax_s32[4];
uint32_t vpmin_s8[2], vpmin_u16[2], vpmin_s32[2];
uint32_t vpmax_s8[2], vpmax_u16[2], vpmax_s32[2];
uint32_t vadd8[4], vadd16[4], vadd32[4];
uint32_t vqadd_s8[4], vqadd_u16[4], vqadd_s32[4];
uint32_t vsub8[4], vsub16[4], vsub32[4];
......@@ -1612,6 +1618,30 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmax_s32))));
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
// vpmin/vpmax integer.
__ mov(r4, Operand(0x03));
__ vdup(Neon16, q0, r4);
__ vdup(Neon8, q1, r4);
__ vpmin(NeonS8, d4, d0, d2);
__ vstr(d4, r0, offsetof(T, vpmin_s8));
__ vpmax(NeonS8, d4, d0, d2);
__ vstr(d4, r0, offsetof(T, vpmax_s8));
__ mov(r4, Operand(0xffff));
__ vdup(Neon32, q0, r4);
__ vdup(Neon16, q1, r4);
__ vpmin(NeonU16, d4, d0, d2);
__ vstr(d4, r0, offsetof(T, vpmin_u16));
__ vpmax(NeonU16, d4, d0, d2);
__ vstr(d4, r0, offsetof(T, vpmax_u16));
__ mov(r4, Operand(0xff));
__ veor(q0, q0, q0);
__ vmov(s0, r4);
__ vdup(Neon8, q1, r4);
__ vpmin(NeonS32, d4, d0, d2);
__ vstr(d4, r0, offsetof(T, vpmin_s32));
__ vpmax(NeonS32, d4, d0, d2);
__ vstr(d4, r0, offsetof(T, vpmax_s32));
// vadd (integer).
__ mov(r4, Operand(0x81));
__ vdup(Neon8, q0, r4);
......@@ -1992,6 +2022,15 @@ TEST(15) {
// [0x000000ff, 0x000000ff, ...] and [0xffffffff, 0xffffffff, ...]
CHECK_EQ_SPLAT(vmin_s32, 0xffffffffu);
CHECK_EQ_SPLAT(vmax_s32, 0xffu);
// [0, 3, 0, 3, ...] and [3, 3, 3, 3, ...]
CHECK_EQ_32X2(vpmin_s8, 0x00000000u, 0x03030303u);
CHECK_EQ_32X2(vpmax_s8, 0x03030303u, 0x03030303u);
// [0, ffff, 0, ffff] and [ffff, ffff]
CHECK_EQ_32X2(vpmin_u16, 0x00000000u, 0xffffffffu);
CHECK_EQ_32X2(vpmax_u16, 0xffffffffu, 0xffffffffu);
// [0x000000ff, 0x00000000u] and [0xffffffff, 0xffffffff, ...]
CHECK_EQ_32X2(vpmin_s32, 0x00u, 0xffffffffu);
CHECK_EQ_32X2(vpmax_s32, 0xffu, 0xffffffffu);
CHECK_EQ_SPLAT(vadd8, 0x03030303u);
CHECK_EQ_SPLAT(vadd16, 0x00030003u);
CHECK_EQ_SPLAT(vadd32, 0x00000003u);
......
......@@ -1040,6 +1040,12 @@ TEST(Neon) {
"f3142670 vmin.u16 q1, q2, q8");
COMPARE(vmax(NeonS32, q15, q0, q8),
"f260e660 vmax.s32 q15, q0, q8");
COMPARE(vpmax(NeonS8, d0, d1, d2),
"f2010a02 vpmax.s8 d0, d1, d2");
COMPARE(vpmin(NeonU16, d1, d2, d8),
"f3121a18 vpmin.u16 d1, d2, d8");
COMPARE(vpmax(NeonS32, d15, d0, d8),
"f220fa08 vpmax.s32 d15, d0, d8");
COMPARE(vadd(q15, q0, q8),
"f240ed60 vadd.f32 q15, q0, q8");
COMPARE(vadd(Neon8, q0, q1, q2),
......
......@@ -156,6 +156,8 @@ TEST(ExtractLane) {
int32_t i8x16_high[16];
int32_t f32x4_low[4];
int32_t f32x4_high[4];
int32_t i8x16_low_d[16];
int32_t i8x16_high_d[16];
} T;
T t;
......@@ -185,6 +187,15 @@ TEST(ExtractLane) {
__ str(r5, MemOperand(r0, offsetof(T, i8x16_low) + 4 * i));
}
for (int i = 0; i < 8; i++) {
__ mov(r4, Operand(i));
__ vdup(Neon8, q1, r4); // q1 = d2,d3
__ ExtractLane(r5, d2, NeonS8, i);
__ str(r5, MemOperand(r0, offsetof(T, i8x16_low_d) + 4 * i));
__ ExtractLane(r5, d3, NeonS8, i);
__ str(r5, MemOperand(r0, offsetof(T, i8x16_low_d) + 4 * (i + 8)));
}
if (CpuFeatures::IsSupported(VFP32DREGS)) {
for (int i = 0; i < 4; i++) {
__ mov(r4, Operand(-i));
......@@ -209,6 +220,15 @@ TEST(ExtractLane) {
__ ExtractLane(r5, q15, NeonS8, i);
__ str(r5, MemOperand(r0, offsetof(T, i8x16_high) + 4 * i));
}
for (int i = 0; i < 8; i++) {
__ mov(r4, Operand(-i));
__ vdup(Neon8, q15, r4); // q1 = d30,d31
__ ExtractLane(r5, d30, NeonS8, i);
__ str(r5, MemOperand(r0, offsetof(T, i8x16_high_d) + 4 * i));
__ ExtractLane(r5, d31, NeonS8, i);
__ str(r5, MemOperand(r0, offsetof(T, i8x16_high_d) + 4 * (i + 8)));
}
}
__ ldm(ia_w, sp, r4.bit() | r5.bit() | pc.bit());
......@@ -234,6 +254,10 @@ TEST(ExtractLane) {
for (int i = 0; i < 16; i++) {
CHECK_EQ(i, t.i8x16_low[i]);
}
for (int i = 0; i < 8; i++) {
CHECK_EQ(i, t.i8x16_low_d[i]);
CHECK_EQ(i, t.i8x16_low_d[i + 8]);
}
if (CpuFeatures::IsSupported(VFP32DREGS)) {
for (int i = 0; i < 4; i++) {
CHECK_EQ(-i, t.i32x4_high[i]);
......@@ -245,6 +269,10 @@ TEST(ExtractLane) {
for (int i = 0; i < 16; i++) {
CHECK_EQ(-i, t.i8x16_high[i]);
}
for (int i = 0; i < 8; i++) {
CHECK_EQ(-i, t.i8x16_high_d[i]);
CHECK_EQ(-i, t.i8x16_high_d[i + 8]);
}
}
}
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment