Commit b7df78f3 authored by bbudge's avatar bbudge Committed by Commit bot

[ARM] Add Neon saturating add and subtract instructions.

- Adds vqadd.s/u, vqsub.s/u for all integer lane sizes.
- Refactors disassembler and simulator, using switches instead
of long if-else chains.

LOG=N
BUG=v8:4124

Review-Url: https://codereview.chromium.org/2649323012
Cr-Commit-Position: refs/heads/master@{#42865}
parent 5a02d3e8
...@@ -4272,7 +4272,19 @@ static Instr EncodeNeonBinOp(FPBinOp op, QwNeonRegister dst, ...@@ -4272,7 +4272,19 @@ static Instr EncodeNeonBinOp(FPBinOp op, QwNeonRegister dst,
vm | op_encoding; vm | op_encoding;
} }
enum IntegerBinOp { VADD, VSUB, VMUL, VMIN, VMAX, VTST, VCEQ, VCGE, VCGT }; enum IntegerBinOp {
VADD,
VQADD,
VSUB,
VQSUB,
VMUL,
VMIN,
VMAX,
VTST,
VCEQ,
VCGE,
VCGT
};
static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt, static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
const QwNeonRegister dst, const QwNeonRegister dst,
...@@ -4283,9 +4295,15 @@ static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt, ...@@ -4283,9 +4295,15 @@ static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
case VADD: case VADD:
op_encoding = 0x8 * B8; op_encoding = 0x8 * B8;
break; break;
case VQADD:
op_encoding = B4;
break;
case VSUB: case VSUB:
op_encoding = B24 | 0x8 * B8; op_encoding = B24 | 0x8 * B8;
break; break;
case VQSUB:
op_encoding = 0x2 * B8 | B4;
break;
case VMUL: case VMUL:
op_encoding = 0x9 * B8 | B4; op_encoding = 0x9 * B8 | B4;
break; break;
...@@ -4348,6 +4366,14 @@ void Assembler::vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, ...@@ -4348,6 +4366,14 @@ void Assembler::vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
emit(EncodeNeonBinOp(VADD, size, dst, src1, src2)); emit(EncodeNeonBinOp(VADD, size, dst, src1, src2));
} }
void Assembler::vqadd(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vqadd(Qn, Qm) SIMD integer saturating addition.
// Instruction details available in ARM DDI 0406C.b, A8-996.
emit(EncodeNeonBinOp(VQADD, dt, dst, src1, src2));
}
void Assembler::vsub(QwNeonRegister dst, QwNeonRegister src1, void Assembler::vsub(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) { QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
...@@ -4364,6 +4390,14 @@ void Assembler::vsub(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, ...@@ -4364,6 +4390,14 @@ void Assembler::vsub(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
emit(EncodeNeonBinOp(VSUB, size, dst, src1, src2)); emit(EncodeNeonBinOp(VSUB, size, dst, src1, src2));
} }
void Assembler::vqsub(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vqsub(Qn, Qm) SIMD integer saturating subtraction.
// Instruction details available in ARM DDI 0406C.b, A8-1020.
emit(EncodeNeonBinOp(VQSUB, dt, dst, src1, src2));
}
void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1, void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) { QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
......
...@@ -1374,9 +1374,13 @@ class Assembler : public AssemblerBase { ...@@ -1374,9 +1374,13 @@ class Assembler : public AssemblerBase {
void vadd(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2); void vadd(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, void vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2); QwNeonRegister src2);
void vqadd(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vsub(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2); void vsub(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vsub(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, void vsub(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2); QwNeonRegister src2);
void vqsub(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vmul(QwNeonRegister dst, QwNeonRegister src1, void vmul(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2); QwNeonRegister src2);
void vmul(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, void vmul(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
......
...@@ -1856,104 +1856,150 @@ static const char* const barrier_option_names[] = { ...@@ -1856,104 +1856,150 @@ static const char* const barrier_option_names[] = {
void Decoder::DecodeSpecialCondition(Instruction* instr) { void Decoder::DecodeSpecialCondition(Instruction* instr) {
switch (instr->SpecialValue()) { switch (instr->SpecialValue()) {
case 4: case 4: {
if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 2 && int Vd, Vm, Vn;
instr->Bit(6) == 1 && instr->Bit(4) == 1) { if (instr->Bit(6) == 0) {
int Vd = instr->VFPDRegValue(kSimd128Precision); Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kSimd128Precision); Vm = instr->VFPMRegValue(kDoublePrecision);
int Vn = instr->VFPNRegValue(kSimd128Precision); Vn = instr->VFPNRegValue(kDoublePrecision);
} else {
Vd = instr->VFPDRegValue(kSimd128Precision);
Vm = instr->VFPMRegValue(kSimd128Precision);
Vn = instr->VFPNRegValue(kSimd128Precision);
}
switch (instr->Bits(11, 8)) {
case 0x0: {
if (instr->Bit(4) == 1) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vqadd.s<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"vqadd.s%d q%d, q%d, q%d", size, Vd, Vn, Vm);
} else {
Unknown(instr);
}
break;
}
case 0x1: {
if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 &&
instr->Bit(4) == 1) {
if (Vm == Vn) { if (Vm == Vn) {
// vmov Qd, Qm // vmov Qd, Qm
out_buffer_pos_ += out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
SNPrintF(out_buffer_ + out_buffer_pos_, "vmov q%d, q%d", Vd, Vm); "vmov q%d, q%d", Vd, Vm);
} else { } else {
// vorr Qd, Qm, Qn. // vorr Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vorr q%d, q%d, q%d", Vd, Vn, Vm); "vorr q%d, q%d, q%d", Vd, Vn, Vm);
} }
} else if (instr->Bits(11, 8) == 8) { } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&
instr->Bit(4) == 1) {
// vand Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vand q%d, q%d, q%d", Vd, Vn, Vm);
} else {
Unknown(instr);
}
break;
}
case 0x2: {
if (instr->Bit(4) == 1) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vqsub.s<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"vqsub.s%d q%d, q%d, q%d", size, Vd, Vn, Vm);
} else {
Unknown(instr);
}
break;
}
case 0x3: {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt";
// vcge/vcgt.s<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.s%d q%d, q%d, q%d",
op, size, Vd, Vn, Vm);
break;
}
case 0x6: {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vmin/vmax.s<size> Qd, Qm, Qn.
const char* op = instr->Bit(4) == 1 ? "vmin" : "vmax";
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.s%d q%d, q%d, q%d",
op, size, Vd, Vn, Vm);
break;
}
case 0x8: {
const char* op = (instr->Bit(4) == 0) ? "vadd" : "vtst"; const char* op = (instr->Bit(4) == 0) ? "vadd" : "vtst";
int size = kBitsPerByte * (1 << instr->Bits(21, 20)); int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vadd/vtst.i<size> Qd, Qm, Qn. // vadd/vtst.i<size> Qd, Qm, Qn.
out_buffer_pos_ += out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.i%d q%d, q%d, q%d", op, SNPrintF(out_buffer_ + out_buffer_pos_, "%s.i%d q%d, q%d, q%d",
size, Vd, Vn, Vm); op, size, Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0xd && instr->Bit(4) == 0) { break;
}
case 0x9: {
if (instr->Bit(6) == 1 && instr->Bit(4) == 1) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vmul.i<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"vmul.i%d q%d, q%d, q%d", size, Vd, Vn, Vm);
} else {
Unknown(instr);
}
break;
}
case 0xd: {
if (instr->Bit(4) == 0) {
const char* op = (instr->Bits(21, 20) == 0) ? "vadd" : "vsub"; const char* op = (instr->Bits(21, 20) == 0) ? "vadd" : "vsub";
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vadd/vsub.f32 Qd, Qm, Qn. // vadd/vsub.f32 Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm); "%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0x9 && instr->Bit(6) == 1 && } else {
instr->Bit(4) == 1) { Unknown(instr);
int size = kBitsPerByte * (1 << instr->Bits(21, 20)); }
int Vd = instr->VFPDRegValue(kSimd128Precision); break;
int Vm = instr->VFPMRegValue(kSimd128Precision); }
int Vn = instr->VFPNRegValue(kSimd128Precision); case 0xe: {
// vmul.i<size> Qd, Qm, Qn. if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) {
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmul.i%d q%d, q%d, q%d", size, Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0xe && instr->Bits(21, 20) == 0 &&
instr->Bit(4) == 0) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vceq.f32 Qd, Qm, Qn. // vceq.f32 Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vceq.f32 q%d, q%d, q%d", Vd, Vn, Vm); "vceq.f32 q%d, q%d, q%d", Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 && } else {
instr->Bit(6) == 1 && instr->Bit(4) == 1) { Unknown(instr);
int Vd = instr->VFPDRegValue(kSimd128Precision); }
int Vm = instr->VFPMRegValue(kSimd128Precision); break;
int Vn = instr->VFPNRegValue(kSimd128Precision); }
// vand Qd, Qm, Qn. case 0xf: {
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, if (instr->Bit(20) == 0 && instr->Bit(6) == 1) {
"vand q%d, q%d, q%d", Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0x3) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt";
// vcge/vcgt.s<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.s%d q%d, q%d, q%d", op,
size, Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0xf && instr->Bit(20) == 0 &&
instr->Bit(6) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
if (instr->Bit(4) == 1) { if (instr->Bit(4) == 1) {
// vrecps/vrsqrts.f32 Qd, Qm, Qn. // vrecps/vrsqrts.f32 Qd, Qm, Qn.
const char* op = instr->Bit(21) == 0 ? "vrecps" : "vrsqrts"; const char* op = instr->Bit(21) == 0 ? "vrecps" : "vrsqrts";
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm); "%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm);
} else { } else {
// vmin/max.f32 Qd, Qm, Qn. // vmin/max.f32 Qd, Qm, Qn.
const char* op = instr->Bit(21) == 1 ? "vmin" : "vmax"; const char* op = instr->Bit(21) == 1 ? "vmin" : "vmax";
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm); "%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm);
} }
} else if (instr->Bits(11, 8) == 0x6) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vmin/vmax.s<size> Qd, Qm, Qn.
const char* op = instr->Bit(4) == 1 ? "vmin" : "vmax";
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.s%d q%d, q%d, q%d", op,
size, Vd, Vn, Vm);
} else { } else {
Unknown(instr); Unknown(instr);
} }
break; break;
}
default:
Unknown(instr);
break;
}
break;
}
case 5: case 5:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) { (instr->Bit(4) == 1)) {
...@@ -1963,7 +2009,7 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { ...@@ -1963,7 +2009,7 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
int Vm = (instr->Bit(5) << 4) | instr->VmValue(); int Vm = (instr->Bit(5) << 4) | instr->VmValue();
int imm3 = instr->Bits(21, 19); int imm3 = instr->Bits(21, 19);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmovl.s%d q%d, d%d", imm3*8, Vd, Vm); "vmovl.s%d q%d, d%d", imm3 * 8, Vd, Vm);
} else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) { } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) {
// vext.8 Qd, Qm, Qn, imm4 // vext.8 Qd, Qm, Qn, imm4
int imm4 = instr->Bits(11, 8); int imm4 = instr->Bits(11, 8);
...@@ -1995,87 +2041,120 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { ...@@ -1995,87 +2041,120 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
Unknown(instr); Unknown(instr);
} }
break; break;
case 6: case 6: {
if (instr->Bits(11, 8) == 8) { int Vd, Vm, Vn;
if (instr->Bit(6) == 0) {
Vd = instr->VFPDRegValue(kDoublePrecision);
Vm = instr->VFPMRegValue(kDoublePrecision);
Vn = instr->VFPNRegValue(kDoublePrecision);
} else {
Vd = instr->VFPDRegValue(kSimd128Precision);
Vm = instr->VFPMRegValue(kSimd128Precision);
Vn = instr->VFPNRegValue(kSimd128Precision);
}
switch (instr->Bits(11, 8)) {
case 0x0: {
if (instr->Bit(4) == 1) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20)); int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision); // vqadd.u<size> Qd, Qm, Qn.
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
if (instr->Bit(4) == 0) {
out_buffer_pos_ += out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vsub.i%d q%d, q%d, q%d", SNPrintF(out_buffer_ + out_buffer_pos_,
size, Vd, Vn, Vm); "vqadd.u%d q%d, q%d, q%d", size, Vd, Vn, Vm);
} else { } else {
out_buffer_pos_ += Unknown(instr);
SNPrintF(out_buffer_ + out_buffer_pos_, "vceq.i%d q%d, q%d, q%d",
size, Vd, Vn, Vm);
} }
} else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 1 && break;
instr->Bit(4) == 1) { }
int Vd = instr->VFPDRegValue(kSimd128Precision); case 0x1: {
int Vm = instr->VFPMRegValue(kSimd128Precision); if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) {
int Vn = instr->VFPNRegValue(kSimd128Precision);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vbsl q%d, q%d, q%d", Vd, Vn, Vm); "vbsl q%d, q%d, q%d", Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 && } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) {
instr->Bit(4) == 1) {
if (instr->Bit(6) == 0) { if (instr->Bit(6) == 0) {
// veor Dd, Dn, Dm // veor Dd, Dn, Dm
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vn = instr->VFPNRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"veor d%d, d%d, d%d", Vd, Vn, Vm); "veor d%d, d%d, d%d", Vd, Vn, Vm);
} else { } else {
// veor Qd, Qn, Qm // veor Qd, Qn, Qm
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"veor q%d, q%d, q%d", Vd, Vn, Vm); "veor q%d, q%d, q%d", Vd, Vn, Vm);
} }
} else if (instr->Bits(11, 8) == 0xd && instr->Bit(21) == 0 && } else {
instr->Bit(6) == 1 && instr->Bit(4) == 1) { Unknown(instr);
// vmul.f32 Qd, Qn, Qm }
int Vd = instr->VFPDRegValue(kSimd128Precision); break;
int Vn = instr->VFPNRegValue(kSimd128Precision); }
int Vm = instr->VFPMRegValue(kSimd128Precision); case 0x2: {
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, if (instr->Bit(4) == 1) {
"vmul.f32 q%d, q%d, q%d", Vd, Vn, Vm); int size = kBitsPerByte * (1 << instr->Bits(21, 20));
} else if (instr->Bits(11, 8) == 0xe && instr->Bit(20) == 0 && // vqsub.u<size> Qd, Qm, Qn.
instr->Bit(4) == 0) { out_buffer_pos_ +=
int Vd = instr->VFPDRegValue(kSimd128Precision); SNPrintF(out_buffer_ + out_buffer_pos_,
int Vm = instr->VFPMRegValue(kSimd128Precision); "vqsub.u%d q%d, q%d, q%d", size, Vd, Vn, Vm);
int Vn = instr->VFPNRegValue(kSimd128Precision); } else {
const char* op = (instr->Bit(21) == 0) ? "vcge" : "vcgt"; Unknown(instr);
// vcge/vcgt.f32 Qd, Qm, Qn. }
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, break;
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm); }
} else if (instr->Bits(11, 8) == 0x3) { case 0x3: {
int size = kBitsPerByte * (1 << instr->Bits(21, 20)); int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt"; const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt";
// vcge/vcgt.u<size> Qd, Qm, Qn. // vcge/vcgt.u<size> Qd, Qm, Qn.
out_buffer_pos_ += out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.u%d q%d, q%d, q%d", op, SNPrintF(out_buffer_ + out_buffer_pos_, "%s.u%d q%d, q%d, q%d",
size, Vd, Vn, Vm); op, size, Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0x6) { break;
}
case 0x6: {
int size = kBitsPerByte * (1 << instr->Bits(21, 20)); int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vmin/vmax.u<size> Qd, Qm, Qn. // vmin/vmax.u<size> Qd, Qm, Qn.
const char* op = instr->Bit(4) == 1 ? "vmin" : "vmax"; const char* op = instr->Bit(4) == 1 ? "vmin" : "vmax";
out_buffer_pos_ += out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.u%d q%d, q%d, q%d", op, SNPrintF(out_buffer_ + out_buffer_pos_, "%s.u%d q%d, q%d, q%d",
size, Vd, Vn, Vm); op, size, Vd, Vn, Vm);
break;
}
case 0x8: {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
if (instr->Bit(4) == 0) {
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"vsub.i%d q%d, q%d, q%d", size, Vd, Vn, Vm);
} else {
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"vceq.i%d q%d, q%d, q%d", size, Vd, Vn, Vm);
}
break;
}
case 0xd: {
if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
// vmul.f32 Qd, Qn, Qm
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmul.f32 q%d, q%d, q%d", Vd, Vn, Vm);
} else {
Unknown(instr);
}
break;
}
case 0xe: {
if (instr->Bit(20) == 0 && instr->Bit(4) == 0) {
const char* op = (instr->Bit(21) == 0) ? "vcge" : "vcgt";
// vcge/vcgt.f32 Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm);
} else { } else {
Unknown(instr); Unknown(instr);
} }
break; break;
}
default:
Unknown(instr);
break;
}
break;
}
case 7: case 7:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) { (instr->Bit(4) == 1)) {
...@@ -2085,7 +2164,7 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { ...@@ -2085,7 +2164,7 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
int Vm = (instr->Bit(5) << 4) | instr->VmValue(); int Vm = (instr->Bit(5) << 4) | instr->VmValue();
int imm3 = instr->Bits(21, 19); int imm3 = instr->Bits(21, 19);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmovl.u%d q%d, d%d", imm3*8, Vd, Vm); "vmovl.u%d q%d, d%d", imm3 * 8, Vd, Vm);
} else if (instr->Opc1Value() == 7 && instr->Bits(21, 20) == 0x3 && } else if (instr->Opc1Value() == 7 && instr->Bits(21, 20) == 0x3 &&
instr->Bit(4) == 0) { instr->Bit(4) == 0) {
if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) { if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) {
...@@ -2211,8 +2290,8 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { ...@@ -2211,8 +2290,8 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
int size = instr->Bits(7, 6); int size = instr->Bits(7, 6);
int align = instr->Bits(5, 4); int align = instr->Bits(5, 4);
int Rm = instr->VmValue(); int Rm = instr->VmValue();
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "vst1.%d ",
"vst1.%d ", (1 << size) << 3); (1 << size) << 3);
FormatNeonList(Vd, type); FormatNeonList(Vd, type);
Print(", "); Print(", ");
FormatNeonMemory(Rn, align, Rm); FormatNeonMemory(Rn, align, Rm);
...@@ -2224,8 +2303,8 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { ...@@ -2224,8 +2303,8 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
int size = instr->Bits(7, 6); int size = instr->Bits(7, 6);
int align = instr->Bits(5, 4); int align = instr->Bits(5, 4);
int Rm = instr->VmValue(); int Rm = instr->VmValue();
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "vld1.%d ",
"vld1.%d ", (1 << size) << 3); (1 << size) << 3);
FormatNeonList(Vd, type); FormatNeonList(Vd, type);
Print(", "); Print(", ");
FormatNeonMemory(Rn, align, Rm); FormatNeonMemory(Rn, align, Rm);
...@@ -2239,8 +2318,8 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { ...@@ -2239,8 +2318,8 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
int Rn = instr->Bits(19, 16); int Rn = instr->Bits(19, 16);
int offset = instr->Bits(11, 0); int offset = instr->Bits(11, 0);
if (offset == 0) { if (offset == 0) {
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ +=
"pld [r%d]", Rn); SNPrintF(out_buffer_ + out_buffer_pos_, "pld [r%d]", Rn);
} else if (instr->Bit(23) == 0) { } else if (instr->Bit(23) == 0) {
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"pld [r%d, #-%d]", Rn, offset); "pld [r%d, #-%d]", Rn, offset);
...@@ -2252,16 +2331,16 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { ...@@ -2252,16 +2331,16 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
int option = instr->Bits(3, 0); int option = instr->Bits(3, 0);
switch (instr->Bits(7, 4)) { switch (instr->Bits(7, 4)) {
case 4: case 4:
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "dsb %s",
"dsb %s", barrier_option_names[option]); barrier_option_names[option]);
break; break;
case 5: case 5:
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "dmb %s",
"dmb %s", barrier_option_names[option]); barrier_option_names[option]);
break; break;
case 6: case 6:
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "isb %s",
"isb %s", barrier_option_names[option]); barrier_option_names[option]);
break; break;
default: default:
Unknown(instr); Unknown(instr);
......
...@@ -3988,16 +3988,89 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) { ...@@ -3988,16 +3988,89 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) {
} }
} }
// Templated operations for NEON instructions.
// TODO(bbudge) Add more templates for use in DecodeSpecialCondition.
template <typename T>
int64_t Widen(T value) {
static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
return static_cast<int64_t>(value);
}
template <typename T>
T Clamp(int64_t value) {
static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min());
int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max());
int64_t clamped = std::max(min, std::min(max, value));
return static_cast<T>(clamped);
}
template <typename T>
void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
static const int kLanes = 16 / sizeof(T);
T src1[kLanes], src2[kLanes];
simulator->get_q_register(Vn, src1);
simulator->get_q_register(Vm, src2);
for (int i = 0; i < kLanes; i++) {
src1[i] = Clamp<T>(Widen(src1[i]) + Widen(src2[i]));
}
simulator->set_q_register(Vd, src1);
}
template <typename T>
void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
static const int kLanes = 16 / sizeof(T);
T src1[kLanes], src2[kLanes];
simulator->get_q_register(Vn, src1);
simulator->get_q_register(Vm, src2);
for (int i = 0; i < kLanes; i++) {
src1[i] = Clamp<T>(Widen(src1[i]) - Widen(src2[i]));
}
simulator->set_q_register(Vd, src1);
}
void Simulator::DecodeSpecialCondition(Instruction* instr) { void Simulator::DecodeSpecialCondition(Instruction* instr) {
switch (instr->SpecialValue()) { switch (instr->SpecialValue()) {
case 4: case 4: {
if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 2 && int Vd, Vm, Vn;
if (instr->Bit(6) == 0) {
Vd = instr->VFPDRegValue(kDoublePrecision);
Vm = instr->VFPMRegValue(kDoublePrecision);
Vn = instr->VFPNRegValue(kDoublePrecision);
} else {
Vd = instr->VFPDRegValue(kSimd128Precision);
Vm = instr->VFPMRegValue(kSimd128Precision);
Vn = instr->VFPNRegValue(kSimd128Precision);
}
switch (instr->Bits(11, 8)) {
case 0x0: {
if (instr->Bit(4) == 1) {
// vqadd.s<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8:
AddSaturate<int8_t>(this, Vd, Vm, Vn);
break;
case Neon16:
AddSaturate<int16_t>(this, Vd, Vm, Vn);
break;
case Neon32:
AddSaturate<int32_t>(this, Vd, Vm, Vn);
break;
default:
UNREACHABLE();
break;
}
} else {
UNIMPLEMENTED();
}
break;
}
case 0x1: {
if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 &&
instr->Bit(4) == 1) { instr->Bit(4) == 1) {
// vmov Qd, Qm. // vmov Qd, Qm.
// vorr, Qd, Qm, Qn. // vorr, Qd, Qm, Qn.
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
uint32_t src1[4]; uint32_t src1[4];
get_q_register(Vm, src1); get_q_register(Vm, src1);
if (Vm != Vn) { if (Vm != Vn) {
...@@ -4008,12 +4081,147 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { ...@@ -4008,12 +4081,147 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
} }
} }
set_q_register(Vd, src1); set_q_register(Vd, src1);
} else if (instr->Bits(11, 8) == 8) { } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&
instr->Bit(4) == 1) {
// vand Qd, Qm, Qn.
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = src1[i] & src2[i];
}
set_q_register(Vd, src1);
} else {
UNIMPLEMENTED();
}
break;
}
case 0x2: {
if (instr->Bit(4) == 1) {
// vqsub.s<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8:
SubSaturate<int8_t>(this, Vd, Vm, Vn);
break;
case Neon16:
SubSaturate<int16_t>(this, Vd, Vm, Vn);
break;
case Neon32:
SubSaturate<int32_t>(this, Vd, Vm, Vn);
break;
default:
UNREACHABLE();
break;
}
} else {
UNIMPLEMENTED();
}
break;
}
case 0x3: {
// vcge/vcgt.s<size> Qd, Qm, Qn.
bool ge = instr->Bit(4) == 1;
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8: {
int8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFF : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
int16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFF : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
int32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0;
}
set_q_register(Vd, src1);
break;
}
default:
UNREACHABLE();
break;
}
break;
}
case 0x6: {
// vmin/vmax.s<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
bool min = instr->Bit(4) != 0;
switch (size) {
case Neon8: {
int8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
int16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
int32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
}
set_q_register(Vd, src1);
break;
}
default:
UNREACHABLE();
break;
}
break;
}
case 0x8: {
// vadd/vtst // vadd/vtst
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
if (instr->Bit(4) == 0) { if (instr->Bit(4) == 0) {
// vadd.i<size> Qd, Qm, Qn. // vadd.i<size> Qd, Qm, Qn.
switch (size) { switch (size) {
...@@ -4089,31 +4297,12 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { ...@@ -4089,31 +4297,12 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
break; break;
} }
} }
} else if (instr->Bits(11, 8) == 0xd && instr->Bit(20) == 0 && break;
instr->Bit(4) == 0) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (instr->Bit(21) == 0) {
// vadd.f32 Qd, Qm, Qn.
src1[i] = src1[i] + src2[i];
} else {
// vsub.f32 Qd, Qm, Qn.
src1[i] = src1[i] - src2[i];
}
} }
set_q_register(Vd, src1); case 0x9: {
} else if (instr->Bits(11, 8) == 0x9 && instr->Bit(6) == 1 && if (instr->Bit(6) == 1 && instr->Bit(4) == 1) {
instr->Bit(4) == 1) {
// vmul.i<size> Qd, Qm, Qn. // vmul.i<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
switch (size) { switch (size) {
case Neon8: { case Neon8: {
uint8_t src1[16], src2[16]; uint8_t src1[16], src2[16];
...@@ -4146,92 +4335,52 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { ...@@ -4146,92 +4335,52 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
break; break;
} }
default: default:
UNREACHABLE();
break;
}
} else {
UNIMPLEMENTED(); UNIMPLEMENTED();
}
break; break;
} }
} else if (instr->Bits(11, 8) == 0xe && instr->Bits(21, 20) == 0 && case 0xd: {
instr->Bit(4) == 0) { if (instr->Bit(4) == 0) {
// vceq.f32.
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
float src1[4], src2[4]; float src1[4], src2[4];
get_q_register(Vn, src1); get_q_register(Vn, src1);
get_q_register(Vm, src2); get_q_register(Vm, src2);
uint32_t dst[4];
for (int i = 0; i < 4; i++) {
dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
}
set_q_register(Vd, dst);
} else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 &&
instr->Bit(6) == 1 && instr->Bit(4) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vand Qd, Qm, Qn.
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
src1[i] = src1[i] & src2[i]; if (instr->Bit(21) == 0) {
// vadd.f32 Qd, Qm, Qn.
src1[i] = src1[i] + src2[i];
} else {
// vsub.f32 Qd, Qm, Qn.
src1[i] = src1[i] - src2[i];
} }
set_q_register(Vd, src1);
} else if (instr->Bits(11, 8) == 0x3) {
// vcge/vcgt.s<size> Qd, Qm, Qn.
bool ge = instr->Bit(4) == 1;
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
switch (size) {
case Neon8: {
int8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFF : 0;
} }
set_q_register(Vd, src1); set_q_register(Vd, src1);
break; } else {
} UNIMPLEMENTED();
case Neon16: {
int16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFF : 0;
} }
set_q_register(Vd, src1);
break; break;
} }
case Neon32: { case 0xe: {
int32_t src1[4], src2[4]; if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) {
// vceq.f32.
float src1[4], src2[4];
get_q_register(Vn, src1); get_q_register(Vn, src1);
get_q_register(Vm, src2); get_q_register(Vm, src2);
uint32_t dst[4];
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
if (ge) dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0;
} }
set_q_register(Vd, src1); set_q_register(Vd, dst);
break; } else {
UNIMPLEMENTED();
} }
default:
UNREACHABLE();
break; break;
} }
} else if (instr->Bits(11, 8) == 0xf && instr->Bit(20) == 0 && case 0xf: {
instr->Bit(6) == 1) { if (instr->Bit(20) == 0 && instr->Bit(6) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
float src1[4], src2[4]; float src1[4], src2[4];
get_q_register(Vn, src1); get_q_register(Vn, src1);
get_q_register(Vm, src2); get_q_register(Vm, src2);
...@@ -4261,61 +4410,17 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { ...@@ -4261,61 +4410,17 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
} }
} }
set_q_register(Vd, src1); set_q_register(Vd, src1);
} else if (instr->Bits(11, 8) == 0x6) { } else {
// vmin/vmax.s<size> Qd, Qm, Qn. UNIMPLEMENTED();
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
bool min = instr->Bit(4) != 0;
switch (size) {
case Neon8: {
int8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
int16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
int32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
} }
set_q_register(Vd, src1);
break; break;
} }
default: default:
UNREACHABLE();
break;
}
} else {
UNIMPLEMENTED(); UNIMPLEMENTED();
break;
} }
break; break;
}
case 5: case 5:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) { (instr->Bit(4) == 1)) {
...@@ -4436,95 +4541,44 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { ...@@ -4436,95 +4541,44 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
break; break;
case 6: case 6: {
if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) { int Vd, Vm, Vn;
// vsub.size Qd, Qm, Qn. if (instr->Bit(6) == 0) {
Vd = instr->VFPDRegValue(kDoublePrecision);
Vm = instr->VFPMRegValue(kDoublePrecision);
Vn = instr->VFPNRegValue(kDoublePrecision);
} else {
Vd = instr->VFPDRegValue(kSimd128Precision);
Vm = instr->VFPMRegValue(kSimd128Precision);
Vn = instr->VFPNRegValue(kSimd128Precision);
}
switch (instr->Bits(11, 8)) {
case 0x0: {
if (instr->Bit(4) == 1) {
// vqadd.u<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
switch (size) { switch (size) {
case Neon8: { case Neon8:
uint8_t src1[16], src2[16]; AddSaturate<uint8_t>(this, Vd, Vm, Vn);
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] -= src2[i];
}
set_q_register(Vd, src1);
break; break;
} case Neon16:
case Neon16: { AddSaturate<uint16_t>(this, Vd, Vm, Vn);
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] -= src2[i];
}
set_q_register(Vd, src1);
break; break;
} case Neon32:
case Neon32: { AddSaturate<uint32_t>(this, Vd, Vm, Vn);
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] -= src2[i];
}
set_q_register(Vd, src1);
break; break;
}
default: default:
UNREACHABLE(); UNREACHABLE();
break; break;
} }
} else if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 1) { } else {
// vceq.size Qd, Qm, Qn. UNIMPLEMENTED();
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0;
}
set_q_register(Vd, src1);
break;
} }
default:
UNREACHABLE();
break; break;
} }
} else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 1 && case 0x1: {
instr->Bit(4) == 1) { if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) {
// vbsl.size Qd, Qm, Qn. // vbsl.size Qd, Qm, Qn.
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
uint32_t dst[4], src1[4], src2[4]; uint32_t dst[4], src1[4], src2[4];
get_q_register(Vd, dst); get_q_register(Vd, dst);
get_q_register(Vn, src1); get_q_register(Vn, src1);
...@@ -4533,13 +4587,9 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { ...@@ -4533,13 +4587,9 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
} }
set_q_register(Vd, dst); set_q_register(Vd, dst);
} else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 && } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) {
instr->Bit(4) == 1) {
if (instr->Bit(6) == 0) { if (instr->Bit(6) == 0) {
// veor Dd, Dn, Dm // veor Dd, Dn, Dm
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vn = instr->VFPNRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
uint64_t src1, src2; uint64_t src1, src2;
get_d_register(Vn, &src1); get_d_register(Vn, &src1);
get_d_register(Vm, &src2); get_d_register(Vm, &src2);
...@@ -4548,54 +4598,44 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { ...@@ -4548,54 +4598,44 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
} else { } else {
// veor Qd, Qn, Qm // veor Qd, Qn, Qm
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
uint32_t src1[4], src2[4]; uint32_t src1[4], src2[4];
get_q_register(Vn, src1); get_q_register(Vn, src1);
get_q_register(Vm, src2); get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; for (int i = 0; i < 4; i++) src1[i] ^= src2[i];
set_q_register(Vd, src1); set_q_register(Vd, src1);
} }
} else if (instr->Bits(11, 8) == 0xd && instr->Bit(21) == 0 && } else {
instr->Bit(6) == 1 && instr->Bit(4) == 1) { UNIMPLEMENTED();
// vmul.f32 Qd, Qn, Qm }
int Vd = instr->VFPDRegValue(kSimd128Precision); break;
int Vn = instr->VFPNRegValue(kSimd128Precision); }
int Vm = instr->VFPMRegValue(kSimd128Precision); case 0x2: {
float src1[4], src2[4]; if (instr->Bit(4) == 1) {
get_q_register(Vn, src1); // vqsub.u<size> Qd, Qm, Qn.
get_q_register(Vm, src2); NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
for (int i = 0; i < 4; i++) { switch (size) {
src1[i] = src1[i] * src2[i]; case Neon8:
SubSaturate<uint8_t>(this, Vd, Vm, Vn);
break;
case Neon16:
SubSaturate<uint16_t>(this, Vd, Vm, Vn);
break;
case Neon32:
SubSaturate<uint32_t>(this, Vd, Vm, Vn);
break;
default:
UNREACHABLE();
break;
} }
set_q_register(Vd, src1);
} else if (instr->Bits(11, 8) == 0xe && instr->Bit(20) == 0 &&
instr->Bit(4) == 0) {
// vcge/vcgt.f32 Qd, Qm, Qn
bool ge = instr->Bit(21) == 0;
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
uint32_t dst[4];
for (int i = 0; i < 4; i++) {
if (ge) {
dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
} else { } else {
dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; UNIMPLEMENTED();
} }
break;
} }
set_q_register(Vd, dst); case 0x3: {
} else if (instr->Bits(11, 8) == 0x3) {
// vcge/vcgt.u<size> Qd, Qm, Qn. // vcge/vcgt.u<size> Qd, Qm, Qn.
bool ge = instr->Bit(4) == 1; bool ge = instr->Bit(4) == 1;
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
switch (size) { switch (size) {
case Neon8: { case Neon8: {
uint8_t src1[16], src2[16]; uint8_t src1[16], src2[16];
...@@ -4640,12 +4680,11 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { ...@@ -4640,12 +4680,11 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
UNREACHABLE(); UNREACHABLE();
break; break;
} }
} else if (instr->Bits(11, 8) == 0x6) { break;
}
case 0x6: {
// vmin/vmax.u<size> Qd, Qm, Qn. // vmin/vmax.u<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
bool min = instr->Bit(4) != 0; bool min = instr->Bit(4) != 0;
switch (size) { switch (size) {
case Neon8: { case Neon8: {
...@@ -4691,10 +4730,130 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { ...@@ -4691,10 +4730,130 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
UNREACHABLE(); UNREACHABLE();
break; break;
} }
break;
}
case 0x8: {
if (instr->Bit(4) == 0) {
// vsub.size Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] -= src2[i];
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] -= src2[i];
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] -= src2[i];
}
set_q_register(Vd, src1);
break;
}
default:
UNREACHABLE();
break;
}
} else {
// vceq.size Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0;
}
set_q_register(Vd, src1);
break;
}
default:
UNREACHABLE();
break;
}
}
break;
}
case 0xd: {
if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
// vmul.f32 Qd, Qn, Qm
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = src1[i] * src2[i];
}
set_q_register(Vd, src1);
} else {
UNIMPLEMENTED();
}
break;
}
case 0xe: {
if (instr->Bit(20) == 0 && instr->Bit(4) == 0) {
// vcge/vcgt.f32 Qd, Qm, Qn
bool ge = instr->Bit(21) == 0;
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
uint32_t dst[4];
for (int i = 0; i < 4; i++) {
if (ge) {
dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
} else {
dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
}
}
set_q_register(Vd, dst);
} else { } else {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
break; break;
}
default:
UNREACHABLE();
break;
}
break;
}
case 7: case 7:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) { (instr->Bit(4) == 1)) {
......
...@@ -1299,7 +1299,9 @@ TEST(15) { ...@@ -1299,7 +1299,9 @@ TEST(15) {
uint32_t vmin_s8[4], vmin_u16[4], vmin_s32[4]; uint32_t vmin_s8[4], vmin_u16[4], vmin_s32[4];
uint32_t vmax_s8[4], vmax_u16[4], vmax_s32[4]; uint32_t vmax_s8[4], vmax_u16[4], vmax_s32[4];
uint32_t vadd8[4], vadd16[4], vadd32[4]; uint32_t vadd8[4], vadd16[4], vadd32[4];
uint32_t vqadd_s8[4], vqadd_u16[4], vqadd_s32[4];
uint32_t vsub8[4], vsub16[4], vsub32[4]; uint32_t vsub8[4], vsub16[4], vsub32[4];
uint32_t vqsub_u8[4], vqsub_s16[4], vqsub_u32[4];
uint32_t vmul8[4], vmul16[4], vmul32[4]; uint32_t vmul8[4], vmul16[4], vmul32[4];
uint32_t vshl8[4], vshl16[4], vshl32[5]; uint32_t vshl8[4], vshl16[4], vshl32[5];
uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[5]; uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[5];
...@@ -1633,6 +1635,28 @@ TEST(15) { ...@@ -1633,6 +1635,28 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vadd32)))); __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vadd32))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vqadd.
__ mov(r4, Operand(0x81));
__ vdup(Neon8, q0, r4);
__ mov(r4, Operand(0x82));
__ vdup(Neon8, q1, r4);
__ vqadd(NeonS8, q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vqadd_s8))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
__ mov(r4, Operand(0x8000));
__ vdup(Neon16, q0, r4);
__ vdup(Neon16, q1, r4);
__ vqadd(NeonU16, q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vqadd_u16))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
__ mov(r4, Operand(0x80000001));
__ vdup(Neon32, q0, r4);
__ mov(r4, Operand(0x80000002));
__ vdup(Neon32, q1, r4);
__ vqadd(NeonS32, q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vqadd_s32))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vsub (integer). // vsub (integer).
__ mov(r4, Operand(0x01)); __ mov(r4, Operand(0x01));
__ vdup(Neon8, q0, r4); __ vdup(Neon8, q0, r4);
...@@ -1656,6 +1680,29 @@ TEST(15) { ...@@ -1656,6 +1680,29 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsub32)))); __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsub32))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vqsub.
__ mov(r4, Operand(0x7f));
__ vdup(Neon8, q0, r4);
__ mov(r4, Operand(0x3f));
__ vdup(Neon8, q1, r4);
__ vqsub(NeonU8, q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vqsub_u8))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
__ mov(r4, Operand(0x8000));
__ vdup(Neon16, q0, r4);
__ mov(r4, Operand(0x7fff));
__ vdup(Neon16, q1, r4);
__ vqsub(NeonS16, q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vqsub_s16))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
__ mov(r4, Operand(0x80000001));
__ vdup(Neon32, q0, r4);
__ mov(r4, Operand(0x80000000));
__ vdup(Neon32, q1, r4);
__ vqsub(NeonU32, q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vqsub_u32))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vmul (integer). // vmul (integer).
__ mov(r4, Operand(0x02)); __ mov(r4, Operand(0x02));
__ vdup(Neon8, q0, r4); __ vdup(Neon8, q0, r4);
...@@ -1948,6 +1995,12 @@ TEST(15) { ...@@ -1948,6 +1995,12 @@ TEST(15) {
CHECK_EQ_SPLAT(vadd8, 0x03030303u); CHECK_EQ_SPLAT(vadd8, 0x03030303u);
CHECK_EQ_SPLAT(vadd16, 0x00030003u); CHECK_EQ_SPLAT(vadd16, 0x00030003u);
CHECK_EQ_SPLAT(vadd32, 0x00000003u); CHECK_EQ_SPLAT(vadd32, 0x00000003u);
CHECK_EQ_SPLAT(vqadd_s8, 0x80808080u);
CHECK_EQ_SPLAT(vqadd_u16, 0xffffffffu);
CHECK_EQ_SPLAT(vqadd_s32, 0x80000000u);
CHECK_EQ_SPLAT(vqsub_u8, 0x00000000u);
CHECK_EQ_SPLAT(vqsub_s16, 0x7fff7fffu);
CHECK_EQ_SPLAT(vqsub_u32, 0x00000000u);
CHECK_EQ_SPLAT(vsub8, 0xfefefefeu); CHECK_EQ_SPLAT(vsub8, 0xfefefefeu);
CHECK_EQ_SPLAT(vsub16, 0xfffefffeu); CHECK_EQ_SPLAT(vsub16, 0xfffefffeu);
CHECK_EQ_SPLAT(vsub32, 0xfffffffeu); CHECK_EQ_SPLAT(vsub32, 0xfffffffeu);
......
...@@ -1047,6 +1047,12 @@ TEST(Neon) { ...@@ -1047,6 +1047,12 @@ TEST(Neon) {
"f2142860 vadd.i16 q1, q2, q8"); "f2142860 vadd.i16 q1, q2, q8");
COMPARE(vadd(Neon32, q15, q0, q8), COMPARE(vadd(Neon32, q15, q0, q8),
"f260e860 vadd.i32 q15, q0, q8"); "f260e860 vadd.i32 q15, q0, q8");
COMPARE(vqadd(NeonU8, q0, q1, q2),
"f3020054 vqadd.u8 q0, q1, q2");
COMPARE(vqadd(NeonS16, q1, q2, q8),
"f2142070 vqadd.s16 q1, q2, q8");
COMPARE(vqadd(NeonU32, q15, q0, q8),
"f360e070 vqadd.u32 q15, q0, q8");
COMPARE(vsub(q15, q0, q8), COMPARE(vsub(q15, q0, q8),
"f260ed60 vsub.f32 q15, q0, q8"); "f260ed60 vsub.f32 q15, q0, q8");
COMPARE(vsub(Neon8, q0, q1, q2), COMPARE(vsub(Neon8, q0, q1, q2),
...@@ -1055,6 +1061,12 @@ TEST(Neon) { ...@@ -1055,6 +1061,12 @@ TEST(Neon) {
"f3142860 vsub.i16 q1, q2, q8"); "f3142860 vsub.i16 q1, q2, q8");
COMPARE(vsub(Neon32, q15, q0, q8), COMPARE(vsub(Neon32, q15, q0, q8),
"f360e860 vsub.i32 q15, q0, q8"); "f360e860 vsub.i32 q15, q0, q8");
COMPARE(vqsub(NeonU8, q0, q1, q2),
"f3020254 vqsub.u8 q0, q1, q2");
COMPARE(vqsub(NeonS16, q1, q2, q8),
"f2142270 vqsub.s16 q1, q2, q8");
COMPARE(vqsub(NeonU32, q15, q0, q8),
"f360e270 vqsub.u32 q15, q0, q8");
COMPARE(vmul(q0, q1, q2), COMPARE(vmul(q0, q1, q2),
"f3020d54 vmul.f32 q0, q1, q2"); "f3020d54 vmul.f32 q0, q1, q2");
COMPARE(vmul(Neon8, q0, q1, q2), COMPARE(vmul(Neon8, q0, q1, q2),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment