Commit b7df78f3 authored by bbudge's avatar bbudge Committed by Commit bot

[ARM] Add Neon saturating add and subtract instructions.

- Adds vqadd.s/u, vqsub.s/u for all integer lane sizes.
- Refactors disassembler and simulator, using switches instead
of long if-else chains.

LOG=N
BUG=v8:4124

Review-Url: https://codereview.chromium.org/2649323012
Cr-Commit-Position: refs/heads/master@{#42865}
parent 5a02d3e8
......@@ -4272,7 +4272,19 @@ static Instr EncodeNeonBinOp(FPBinOp op, QwNeonRegister dst,
vm | op_encoding;
}
enum IntegerBinOp { VADD, VSUB, VMUL, VMIN, VMAX, VTST, VCEQ, VCGE, VCGT };
enum IntegerBinOp {
VADD,
VQADD,
VSUB,
VQSUB,
VMUL,
VMIN,
VMAX,
VTST,
VCEQ,
VCGE,
VCGT
};
static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
const QwNeonRegister dst,
......@@ -4283,9 +4295,15 @@ static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
case VADD:
op_encoding = 0x8 * B8;
break;
case VQADD:
op_encoding = B4;
break;
case VSUB:
op_encoding = B24 | 0x8 * B8;
break;
case VQSUB:
op_encoding = 0x2 * B8 | B4;
break;
case VMUL:
op_encoding = 0x9 * B8 | B4;
break;
......@@ -4348,6 +4366,14 @@ void Assembler::vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
emit(EncodeNeonBinOp(VADD, size, dst, src1, src2));
}
void Assembler::vqadd(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vqadd(Qn, Qm) SIMD integer saturating addition.
// Instruction details available in ARM DDI 0406C.b, A8-996.
emit(EncodeNeonBinOp(VQADD, dt, dst, src1, src2));
}
void Assembler::vsub(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
......@@ -4364,6 +4390,14 @@ void Assembler::vsub(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
emit(EncodeNeonBinOp(VSUB, size, dst, src1, src2));
}
void Assembler::vqsub(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vqsub(Qn, Qm) SIMD integer saturating subtraction.
// Instruction details available in ARM DDI 0406C.b, A8-1020.
emit(EncodeNeonBinOp(VQSUB, dt, dst, src1, src2));
}
void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
......
......@@ -1374,9 +1374,13 @@ class Assembler : public AssemblerBase {
void vadd(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vqadd(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vsub(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vsub(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vqsub(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vmul(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vmul(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
......
......@@ -1856,104 +1856,150 @@ static const char* const barrier_option_names[] = {
void Decoder::DecodeSpecialCondition(Instruction* instr) {
switch (instr->SpecialValue()) {
case 4:
if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 2 &&
instr->Bit(6) == 1 && instr->Bit(4) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
if (Vm == Vn) {
// vmov Qd, Qm
case 4: {
int Vd, Vm, Vn;
if (instr->Bit(6) == 0) {
Vd = instr->VFPDRegValue(kDoublePrecision);
Vm = instr->VFPMRegValue(kDoublePrecision);
Vn = instr->VFPNRegValue(kDoublePrecision);
} else {
Vd = instr->VFPDRegValue(kSimd128Precision);
Vm = instr->VFPMRegValue(kSimd128Precision);
Vn = instr->VFPNRegValue(kSimd128Precision);
}
switch (instr->Bits(11, 8)) {
case 0x0: {
if (instr->Bit(4) == 1) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vqadd.s<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"vqadd.s%d q%d, q%d, q%d", size, Vd, Vn, Vm);
} else {
Unknown(instr);
}
break;
}
case 0x1: {
if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 &&
instr->Bit(4) == 1) {
if (Vm == Vn) {
// vmov Qd, Qm
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmov q%d, q%d", Vd, Vm);
} else {
// vorr Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vorr q%d, q%d, q%d", Vd, Vn, Vm);
}
} else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&
instr->Bit(4) == 1) {
// vand Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vand q%d, q%d, q%d", Vd, Vn, Vm);
} else {
Unknown(instr);
}
break;
}
case 0x2: {
if (instr->Bit(4) == 1) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vqsub.s<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"vqsub.s%d q%d, q%d, q%d", size, Vd, Vn, Vm);
} else {
Unknown(instr);
}
break;
}
case 0x3: {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt";
// vcge/vcgt.s<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vmov q%d, q%d", Vd, Vm);
} else {
// vorr Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vorr q%d, q%d, q%d", Vd, Vn, Vm);
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.s%d q%d, q%d, q%d",
op, size, Vd, Vn, Vm);
break;
}
} else if (instr->Bits(11, 8) == 8) {
const char* op = (instr->Bit(4) == 0) ? "vadd" : "vtst";
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vadd/vtst.i<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.i%d q%d, q%d, q%d", op,
size, Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0xd && instr->Bit(4) == 0) {
const char* op = (instr->Bits(21, 20) == 0) ? "vadd" : "vsub";
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vadd/vsub.f32 Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0x9 && instr->Bit(6) == 1 &&
instr->Bit(4) == 1) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vmul.i<size> Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmul.i%d q%d, q%d, q%d", size, Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0xe && instr->Bits(21, 20) == 0 &&
instr->Bit(4) == 0) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vceq.f32 Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vceq.f32 q%d, q%d, q%d", Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 &&
instr->Bit(6) == 1 && instr->Bit(4) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vand Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vand q%d, q%d, q%d", Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0x3) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt";
// vcge/vcgt.s<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.s%d q%d, q%d, q%d", op,
size, Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0xf && instr->Bit(20) == 0 &&
instr->Bit(6) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
if (instr->Bit(4) == 1) {
// vrecps/vrsqrts.f32 Qd, Qm, Qn.
const char* op = instr->Bit(21) == 0 ? "vrecps" : "vrsqrts";
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm);
} else {
// vmin/max.f32 Qd, Qm, Qn.
const char* op = instr->Bit(21) == 1 ? "vmin" : "vmax";
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm);
case 0x6: {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vmin/vmax.s<size> Qd, Qm, Qn.
const char* op = instr->Bit(4) == 1 ? "vmin" : "vmax";
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.s%d q%d, q%d, q%d",
op, size, Vd, Vn, Vm);
break;
}
} else if (instr->Bits(11, 8) == 0x6) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vmin/vmax.s<size> Qd, Qm, Qn.
const char* op = instr->Bit(4) == 1 ? "vmin" : "vmax";
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.s%d q%d, q%d, q%d", op,
size, Vd, Vn, Vm);
} else {
Unknown(instr);
case 0x8: {
const char* op = (instr->Bit(4) == 0) ? "vadd" : "vtst";
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vadd/vtst.i<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.i%d q%d, q%d, q%d",
op, size, Vd, Vn, Vm);
break;
}
case 0x9: {
if (instr->Bit(6) == 1 && instr->Bit(4) == 1) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vmul.i<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"vmul.i%d q%d, q%d, q%d", size, Vd, Vn, Vm);
} else {
Unknown(instr);
}
break;
}
case 0xd: {
if (instr->Bit(4) == 0) {
const char* op = (instr->Bits(21, 20) == 0) ? "vadd" : "vsub";
// vadd/vsub.f32 Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm);
} else {
Unknown(instr);
}
break;
}
case 0xe: {
if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) {
// vceq.f32 Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vceq.f32 q%d, q%d, q%d", Vd, Vn, Vm);
} else {
Unknown(instr);
}
break;
}
case 0xf: {
if (instr->Bit(20) == 0 && instr->Bit(6) == 1) {
if (instr->Bit(4) == 1) {
// vrecps/vrsqrts.f32 Qd, Qm, Qn.
const char* op = instr->Bit(21) == 0 ? "vrecps" : "vrsqrts";
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm);
} else {
// vmin/max.f32 Qd, Qm, Qn.
const char* op = instr->Bit(21) == 1 ? "vmin" : "vmax";
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm);
}
} else {
Unknown(instr);
}
break;
}
default:
Unknown(instr);
break;
}
break;
}
case 5:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
......@@ -1963,7 +2009,7 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
int Vm = (instr->Bit(5) << 4) | instr->VmValue();
int imm3 = instr->Bits(21, 19);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmovl.s%d q%d, d%d", imm3*8, Vd, Vm);
"vmovl.s%d q%d, d%d", imm3 * 8, Vd, Vm);
} else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) {
// vext.8 Qd, Qm, Qn, imm4
int imm4 = instr->Bits(11, 8);
......@@ -1995,87 +2041,120 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
Unknown(instr);
}
break;
case 6:
if (instr->Bits(11, 8) == 8) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
if (instr->Bit(4) == 0) {
case 6: {
int Vd, Vm, Vn;
if (instr->Bit(6) == 0) {
Vd = instr->VFPDRegValue(kDoublePrecision);
Vm = instr->VFPMRegValue(kDoublePrecision);
Vn = instr->VFPNRegValue(kDoublePrecision);
} else {
Vd = instr->VFPDRegValue(kSimd128Precision);
Vm = instr->VFPMRegValue(kSimd128Precision);
Vn = instr->VFPNRegValue(kSimd128Precision);
}
switch (instr->Bits(11, 8)) {
case 0x0: {
if (instr->Bit(4) == 1) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vqadd.u<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"vqadd.u%d q%d, q%d, q%d", size, Vd, Vn, Vm);
} else {
Unknown(instr);
}
break;
}
case 0x1: {
if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) {
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vbsl q%d, q%d, q%d", Vd, Vn, Vm);
} else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) {
if (instr->Bit(6) == 0) {
// veor Dd, Dn, Dm
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"veor d%d, d%d, d%d", Vd, Vn, Vm);
} else {
// veor Qd, Qn, Qm
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"veor q%d, q%d, q%d", Vd, Vn, Vm);
}
} else {
Unknown(instr);
}
break;
}
case 0x2: {
if (instr->Bit(4) == 1) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vqsub.u<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"vqsub.u%d q%d, q%d, q%d", size, Vd, Vn, Vm);
} else {
Unknown(instr);
}
break;
}
case 0x3: {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt";
// vcge/vcgt.u<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vsub.i%d q%d, q%d, q%d",
size, Vd, Vn, Vm);
} else {
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.u%d q%d, q%d, q%d",
op, size, Vd, Vn, Vm);
break;
}
case 0x6: {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
// vmin/vmax.u<size> Qd, Qm, Qn.
const char* op = instr->Bit(4) == 1 ? "vmin" : "vmax";
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vceq.i%d q%d, q%d, q%d",
size, Vd, Vn, Vm);
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.u%d q%d, q%d, q%d",
op, size, Vd, Vn, Vm);
break;
}
} else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 1 &&
instr->Bit(4) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vbsl q%d, q%d, q%d", Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 &&
instr->Bit(4) == 1) {
if (instr->Bit(6) == 0) {
// veor Dd, Dn, Dm
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vn = instr->VFPNRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"veor d%d, d%d, d%d", Vd, Vn, Vm);
} else {
// veor Qd, Qn, Qm
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"veor q%d, q%d, q%d", Vd, Vn, Vm);
case 0x8: {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
if (instr->Bit(4) == 0) {
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"vsub.i%d q%d, q%d, q%d", size, Vd, Vn, Vm);
} else {
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_,
"vceq.i%d q%d, q%d, q%d", size, Vd, Vn, Vm);
}
break;
}
} else if (instr->Bits(11, 8) == 0xd && instr->Bit(21) == 0 &&
instr->Bit(6) == 1 && instr->Bit(4) == 1) {
// vmul.f32 Qd, Qn, Qm
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmul.f32 q%d, q%d, q%d", Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0xe && instr->Bit(20) == 0 &&
instr->Bit(4) == 0) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
const char* op = (instr->Bit(21) == 0) ? "vcge" : "vcgt";
// vcge/vcgt.f32 Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0x3) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt";
// vcge/vcgt.u<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.u%d q%d, q%d, q%d", op,
size, Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0x6) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vmin/vmax.u<size> Qd, Qm, Qn.
const char* op = instr->Bit(4) == 1 ? "vmin" : "vmax";
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.u%d q%d, q%d, q%d", op,
size, Vd, Vn, Vm);
} else {
Unknown(instr);
case 0xd: {
if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
// vmul.f32 Qd, Qn, Qm
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmul.f32 q%d, q%d, q%d", Vd, Vn, Vm);
} else {
Unknown(instr);
}
break;
}
case 0xe: {
if (instr->Bit(20) == 0 && instr->Bit(4) == 0) {
const char* op = (instr->Bit(21) == 0) ? "vcge" : "vcgt";
// vcge/vcgt.f32 Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm);
} else {
Unknown(instr);
}
break;
}
default:
Unknown(instr);
break;
}
break;
}
case 7:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
......@@ -2085,7 +2164,7 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
int Vm = (instr->Bit(5) << 4) | instr->VmValue();
int imm3 = instr->Bits(21, 19);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmovl.u%d q%d, d%d", imm3*8, Vd, Vm);
"vmovl.u%d q%d, d%d", imm3 * 8, Vd, Vm);
} else if (instr->Opc1Value() == 7 && instr->Bits(21, 20) == 0x3 &&
instr->Bit(4) == 0) {
if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) {
......@@ -2211,8 +2290,8 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
int size = instr->Bits(7, 6);
int align = instr->Bits(5, 4);
int Rm = instr->VmValue();
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vst1.%d ", (1 << size) << 3);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "vst1.%d ",
(1 << size) << 3);
FormatNeonList(Vd, type);
Print(", ");
FormatNeonMemory(Rn, align, Rm);
......@@ -2224,8 +2303,8 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
int size = instr->Bits(7, 6);
int align = instr->Bits(5, 4);
int Rm = instr->VmValue();
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vld1.%d ", (1 << size) << 3);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "vld1.%d ",
(1 << size) << 3);
FormatNeonList(Vd, type);
Print(", ");
FormatNeonMemory(Rn, align, Rm);
......@@ -2239,8 +2318,8 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
int Rn = instr->Bits(19, 16);
int offset = instr->Bits(11, 0);
if (offset == 0) {
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"pld [r%d]", Rn);
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "pld [r%d]", Rn);
} else if (instr->Bit(23) == 0) {
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"pld [r%d, #-%d]", Rn, offset);
......@@ -2252,16 +2331,16 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
int option = instr->Bits(3, 0);
switch (instr->Bits(7, 4)) {
case 4:
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"dsb %s", barrier_option_names[option]);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "dsb %s",
barrier_option_names[option]);
break;
case 5:
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"dmb %s", barrier_option_names[option]);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "dmb %s",
barrier_option_names[option]);
break;
case 6:
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"isb %s", barrier_option_names[option]);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "isb %s",
barrier_option_names[option]);
break;
default:
Unknown(instr);
......
......@@ -3988,61 +3988,177 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) {
}
}
// Templated operations for NEON instructions.
// TODO(bbudge) Add more templates for use in DecodeSpecialCondition.
template <typename T>
int64_t Widen(T value) {
static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
return static_cast<int64_t>(value);
}
template <typename T>
T Clamp(int64_t value) {
static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min());
int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max());
int64_t clamped = std::max(min, std::min(max, value));
return static_cast<T>(clamped);
}
template <typename T>
void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
static const int kLanes = 16 / sizeof(T);
T src1[kLanes], src2[kLanes];
simulator->get_q_register(Vn, src1);
simulator->get_q_register(Vm, src2);
for (int i = 0; i < kLanes; i++) {
src1[i] = Clamp<T>(Widen(src1[i]) + Widen(src2[i]));
}
simulator->set_q_register(Vd, src1);
}
template <typename T>
void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
static const int kLanes = 16 / sizeof(T);
T src1[kLanes], src2[kLanes];
simulator->get_q_register(Vn, src1);
simulator->get_q_register(Vm, src2);
for (int i = 0; i < kLanes; i++) {
src1[i] = Clamp<T>(Widen(src1[i]) - Widen(src2[i]));
}
simulator->set_q_register(Vd, src1);
}
void Simulator::DecodeSpecialCondition(Instruction* instr) {
switch (instr->SpecialValue()) {
case 4:
if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 2 &&
instr->Bit(4) == 1) {
// vmov Qd, Qm.
// vorr, Qd, Qm, Qn.
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
uint32_t src1[4];
get_q_register(Vm, src1);
if (Vm != Vn) {
uint32_t src2[4];
get_q_register(Vn, src2);
for (int i = 0; i < 4; i++) {
src1[i] = src1[i] | src2[i];
case 4: {
int Vd, Vm, Vn;
if (instr->Bit(6) == 0) {
Vd = instr->VFPDRegValue(kDoublePrecision);
Vm = instr->VFPMRegValue(kDoublePrecision);
Vn = instr->VFPNRegValue(kDoublePrecision);
} else {
Vd = instr->VFPDRegValue(kSimd128Precision);
Vm = instr->VFPMRegValue(kSimd128Precision);
Vn = instr->VFPNRegValue(kSimd128Precision);
}
switch (instr->Bits(11, 8)) {
case 0x0: {
if (instr->Bit(4) == 1) {
// vqadd.s<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8:
AddSaturate<int8_t>(this, Vd, Vm, Vn);
break;
case Neon16:
AddSaturate<int16_t>(this, Vd, Vm, Vn);
break;
case Neon32:
AddSaturate<int32_t>(this, Vd, Vm, Vn);
break;
default:
UNREACHABLE();
break;
}
} else {
UNIMPLEMENTED();
}
break;
}
set_q_register(Vd, src1);
} else if (instr->Bits(11, 8) == 8) {
// vadd/vtst
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
if (instr->Bit(4) == 0) {
// vadd.i<size> Qd, Qm, Qn.
case 0x1: {
if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 &&
instr->Bit(4) == 1) {
// vmov Qd, Qm.
// vorr, Qd, Qm, Qn.
uint32_t src1[4];
get_q_register(Vm, src1);
if (Vm != Vn) {
uint32_t src2[4];
get_q_register(Vn, src2);
for (int i = 0; i < 4; i++) {
src1[i] = src1[i] | src2[i];
}
}
set_q_register(Vd, src1);
} else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&
instr->Bit(4) == 1) {
// vand Qd, Qm, Qn.
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = src1[i] & src2[i];
}
set_q_register(Vd, src1);
} else {
UNIMPLEMENTED();
}
break;
}
case 0x2: {
if (instr->Bit(4) == 1) {
// vqsub.s<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8:
SubSaturate<int8_t>(this, Vd, Vm, Vn);
break;
case Neon16:
SubSaturate<int16_t>(this, Vd, Vm, Vn);
break;
case Neon32:
SubSaturate<int32_t>(this, Vd, Vm, Vn);
break;
default:
UNREACHABLE();
break;
}
} else {
UNIMPLEMENTED();
}
break;
}
case 0x3: {
// vcge/vcgt.s<size> Qd, Qm, Qn.
bool ge = instr->Bit(4) == 1;
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
int8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] += src2[i];
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFF : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
int16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] += src2[i];
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFF : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
int32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] += src2[i];
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0;
}
set_q_register(Vd, src1);
break;
......@@ -4051,35 +4167,48 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
UNREACHABLE();
break;
}
} else {
// vtst.i<size> Qd, Qm, Qn.
break;
}
case 0x6: {
// vmin/vmax.s<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
bool min = instr->Bit(4) != 0;
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
int8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0;
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
int16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0;
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
int32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0;
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
}
set_q_register(Vd, src1);
break;
......@@ -4088,234 +4217,210 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
UNREACHABLE();
break;
}
break;
}
} else if (instr->Bits(11, 8) == 0xd && instr->Bit(20) == 0 &&
instr->Bit(4) == 0) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (instr->Bit(21) == 0) {
// vadd.f32 Qd, Qm, Qn.
src1[i] = src1[i] + src2[i];
case 0x8: {
// vadd/vtst
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
if (instr->Bit(4) == 0) {
// vadd.i<size> Qd, Qm, Qn.
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] += src2[i];
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] += src2[i];
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] += src2[i];
}
set_q_register(Vd, src1);
break;
}
default:
UNREACHABLE();
break;
}
} else {
// vsub.f32 Qd, Qm, Qn.
src1[i] = src1[i] - src2[i];
}
}
set_q_register(Vd, src1);
} else if (instr->Bits(11, 8) == 0x9 && instr->Bit(6) == 1 &&
instr->Bit(4) == 1) {
// vmul.i<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] *= src2[i];
// vtst.i<size> Qd, Qm, Qn.
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0;
}
set_q_register(Vd, src1);
break;
}
default:
UNREACHABLE();
break;
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] *= src2[i];
break;
}
case 0x9: {
if (instr->Bit(6) == 1 && instr->Bit(4) == 1) {
// vmul.i<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] *= src2[i];
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] *= src2[i];
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] *= src2[i];
}
set_q_register(Vd, src1);
break;
}
default:
UNREACHABLE();
break;
}
set_q_register(Vd, src1);
break;
} else {
UNIMPLEMENTED();
}
case Neon32: {
uint32_t src1[4], src2[4];
break;
}
case 0xd: {
if (instr->Bit(4) == 0) {
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] *= src2[i];
if (instr->Bit(21) == 0) {
// vadd.f32 Qd, Qm, Qn.
src1[i] = src1[i] + src2[i];
} else {
// vsub.f32 Qd, Qm, Qn.
src1[i] = src1[i] - src2[i];
}
}
set_q_register(Vd, src1);
break;
}
default:
} else {
UNIMPLEMENTED();
break;
}
} else if (instr->Bits(11, 8) == 0xe && instr->Bits(21, 20) == 0 &&
instr->Bit(4) == 0) {
// vceq.f32.
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
uint32_t dst[4];
for (int i = 0; i < 4; i++) {
dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
}
set_q_register(Vd, dst);
} else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 &&
instr->Bit(6) == 1 && instr->Bit(4) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vand Qd, Qm, Qn.
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = src1[i] & src2[i];
}
set_q_register(Vd, src1);
} else if (instr->Bits(11, 8) == 0x3) {
// vcge/vcgt.s<size> Qd, Qm, Qn.
bool ge = instr->Bit(4) == 1;
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
switch (size) {
case Neon8: {
int8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFF : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
int16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFF : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
int32_t src1[4], src2[4];
break;
}
case 0xe: {
if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) {
// vceq.f32.
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
uint32_t dst[4];
for (int i = 0; i < 4; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0;
}
set_q_register(Vd, src1);
break;
}
default:
UNREACHABLE();
break;
}
} else if (instr->Bits(11, 8) == 0xf && instr->Bit(20) == 0 &&
instr->Bit(6) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
if (instr->Bit(4) == 1) {
if (instr->Bit(21) == 0) {
// vrecps.f32 Qd, Qm, Qn.
for (int i = 0; i < 4; i++) {
src1[i] = 2.0f - src1[i] * src2[i];
}
} else {
// vrsqrts.f32 Qd, Qm, Qn.
for (int i = 0; i < 4; i++) {
src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f;
}
}
} else {
if (instr->Bit(21) == 1) {
// vmin.f32 Qd, Qm, Qn.
for (int i = 0; i < 4; i++) {
src1[i] = std::min(src1[i], src2[i]);
dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
}
set_q_register(Vd, dst);
} else {
// vmax.f32 Qd, Qm, Qn.
for (int i = 0; i < 4; i++) {
src1[i] = std::max(src1[i], src2[i]);
}
UNIMPLEMENTED();
}
break;
}
set_q_register(Vd, src1);
} else if (instr->Bits(11, 8) == 0x6) {
// vmin/vmax.s<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
bool min = instr->Bit(4) != 0;
switch (size) {
case Neon8: {
int8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
int16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
int32_t src1[4], src2[4];
case 0xf: {
if (instr->Bit(20) == 0 && instr->Bit(6) == 1) {
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
if (instr->Bit(4) == 1) {
if (instr->Bit(21) == 0) {
// vrecps.f32 Qd, Qm, Qn.
for (int i = 0; i < 4; i++) {
src1[i] = 2.0f - src1[i] * src2[i];
}
} else {
// vrsqrts.f32 Qd, Qm, Qn.
for (int i = 0; i < 4; i++) {
src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f;
}
}
} else {
if (instr->Bit(21) == 1) {
// vmin.f32 Qd, Qm, Qn.
for (int i = 0; i < 4; i++) {
src1[i] = std::min(src1[i], src2[i]);
}
} else {
// vmax.f32 Qd, Qm, Qn.
for (int i = 0; i < 4; i++) {
src1[i] = std::max(src1[i], src2[i]);
}
}
}
set_q_register(Vd, src1);
break;
} else {
UNIMPLEMENTED();
}
default:
UNREACHABLE();
break;
break;
}
} else {
UNIMPLEMENTED();
default:
UNIMPLEMENTED();
break;
}
break;
}
case 5:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
......@@ -4436,265 +4541,319 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
UNIMPLEMENTED();
}
break;
case 6:
if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) {
// vsub.size Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] -= src2[i];
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] -= src2[i];
case 6: {
int Vd, Vm, Vn;
if (instr->Bit(6) == 0) {
Vd = instr->VFPDRegValue(kDoublePrecision);
Vm = instr->VFPMRegValue(kDoublePrecision);
Vn = instr->VFPNRegValue(kDoublePrecision);
} else {
Vd = instr->VFPDRegValue(kSimd128Precision);
Vm = instr->VFPMRegValue(kSimd128Precision);
Vn = instr->VFPNRegValue(kSimd128Precision);
}
switch (instr->Bits(11, 8)) {
case 0x0: {
if (instr->Bit(4) == 1) {
// vqadd.u<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8:
AddSaturate<uint8_t>(this, Vd, Vm, Vn);
break;
case Neon16:
AddSaturate<uint16_t>(this, Vd, Vm, Vn);
break;
case Neon32:
AddSaturate<uint32_t>(this, Vd, Vm, Vn);
break;
default:
UNREACHABLE();
break;
}
set_q_register(Vd, src1);
break;
} else {
UNIMPLEMENTED();
}
case Neon32: {
uint32_t src1[4], src2[4];
break;
}
case 0x1: {
if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) {
// vbsl.size Qd, Qm, Qn.
uint32_t dst[4], src1[4], src2[4];
get_q_register(Vd, dst);
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] -= src2[i];
dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
}
set_q_register(Vd, src1);
break;
set_q_register(Vd, dst);
} else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) {
if (instr->Bit(6) == 0) {
// veor Dd, Dn, Dm
uint64_t src1, src2;
get_d_register(Vn, &src1);
get_d_register(Vm, &src2);
src1 ^= src2;
set_d_register(Vd, &src1);
} else {
// veor Qd, Qn, Qm
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) src1[i] ^= src2[i];
set_q_register(Vd, src1);
}
} else {
UNIMPLEMENTED();
}
default:
UNREACHABLE();
break;
break;
}
} else if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 1) {
// vceq.size Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0;
case 0x2: {
if (instr->Bit(4) == 1) {
// vqsub.u<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8:
SubSaturate<uint8_t>(this, Vd, Vm, Vn);
break;
case Neon16:
SubSaturate<uint16_t>(this, Vd, Vm, Vn);
break;
case Neon32:
SubSaturate<uint32_t>(this, Vd, Vm, Vn);
break;
default:
UNREACHABLE();
break;
}
set_q_register(Vd, src1);
break;
} else {
UNIMPLEMENTED();
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0;
break;
}
case 0x3: {
// vcge/vcgt.u<size> Qd, Qm, Qn.
bool ge = instr->Bit(4) == 1;
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFu : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFu : 0;
}
set_q_register(Vd, src1);
break;
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0;
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0;
}
set_q_register(Vd, src1);
break;
}
set_q_register(Vd, src1);
break;
}
default:
UNREACHABLE();
break;
}
} else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 1 &&
instr->Bit(4) == 1) {
// vbsl.size Qd, Qm, Qn.
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
uint32_t dst[4], src1[4], src2[4];
get_q_register(Vd, dst);
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
}
set_q_register(Vd, dst);
} else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 &&
instr->Bit(4) == 1) {
if (instr->Bit(6) == 0) {
// veor Dd, Dn, Dm
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vn = instr->VFPNRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
uint64_t src1, src2;
get_d_register(Vn, &src1);
get_d_register(Vm, &src2);
src1 ^= src2;
set_d_register(Vd, &src1);
} else {
// veor Qd, Qn, Qm
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) src1[i] ^= src2[i];
set_q_register(Vd, src1);
}
} else if (instr->Bits(11, 8) == 0xd && instr->Bit(21) == 0 &&
instr->Bit(6) == 1 && instr->Bit(4) == 1) {
// vmul.f32 Qd, Qn, Qm
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = src1[i] * src2[i];
}
set_q_register(Vd, src1);
} else if (instr->Bits(11, 8) == 0xe && instr->Bit(20) == 0 &&
instr->Bit(4) == 0) {
// vcge/vcgt.f32 Qd, Qm, Qn
bool ge = instr->Bit(21) == 0;
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
uint32_t dst[4];
for (int i = 0; i < 4; i++) {
if (ge) {
dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
} else {
dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
}
set_q_register(Vd, src1);
break;
}
default:
UNREACHABLE();
break;
}
break;
}
set_q_register(Vd, dst);
} else if (instr->Bits(11, 8) == 0x3) {
// vcge/vcgt.u<size> Qd, Qm, Qn.
bool ge = instr->Bit(4) == 1;
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFu : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFu : 0;
case 0x6: {
// vmin/vmax.u<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
bool min = instr->Bit(4) != 0;
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
}
set_q_register(Vd, src1);
break;
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0;
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
}
set_q_register(Vd, src1);
break;
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
}
set_q_register(Vd, src1);
break;
}
set_q_register(Vd, src1);
break;
default:
UNREACHABLE();
break;
}
default:
UNREACHABLE();
break;
break;
}
} else if (instr->Bits(11, 8) == 0x6) {
// vmin/vmax.u<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
bool min = instr->Bit(4) != 0;
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
case 0x8: {
if (instr->Bit(4) == 0) {
// vsub.size Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] -= src2[i];
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] -= src2[i];
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] -= src2[i];
}
set_q_register(Vd, src1);
break;
}
default:
UNREACHABLE();
break;
}
} else {
// vceq.size Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0;
}
set_q_register(Vd, src1);
break;
}
default:
UNREACHABLE();
break;
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
break;
}
case 0xd: {
if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
// vmul.f32 Qd, Qn, Qm
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
for (int i = 0; i < 4; i++) {
src1[i] = src1[i] * src2[i];
}
set_q_register(Vd, src1);
break;
} else {
UNIMPLEMENTED();
}
case Neon32: {
uint32_t src1[4], src2[4];
break;
}
case 0xe: {
if (instr->Bit(20) == 0 && instr->Bit(4) == 0) {
// vcge/vcgt.f32 Qd, Qm, Qn
bool ge = instr->Bit(21) == 0;
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
uint32_t dst[4];
for (int i = 0; i < 4; i++) {
if (min)
src1[i] = std::min(src1[i], src2[i]);
else
src1[i] = std::max(src1[i], src2[i]);
if (ge) {
dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
} else {
dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
}
}
set_q_register(Vd, src1);
break;
set_q_register(Vd, dst);
} else {
UNIMPLEMENTED();
}
default:
UNREACHABLE();
break;
break;
}
} else {
UNIMPLEMENTED();
default:
UNREACHABLE();
break;
}
break;
}
case 7:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
......
......@@ -1299,7 +1299,9 @@ TEST(15) {
uint32_t vmin_s8[4], vmin_u16[4], vmin_s32[4];
uint32_t vmax_s8[4], vmax_u16[4], vmax_s32[4];
uint32_t vadd8[4], vadd16[4], vadd32[4];
uint32_t vqadd_s8[4], vqadd_u16[4], vqadd_s32[4];
uint32_t vsub8[4], vsub16[4], vsub32[4];
uint32_t vqsub_u8[4], vqsub_s16[4], vqsub_u32[4];
uint32_t vmul8[4], vmul16[4], vmul32[4];
uint32_t vshl8[4], vshl16[4], vshl32[5];
uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[5];
......@@ -1633,6 +1635,28 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vadd32))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vqadd.
__ mov(r4, Operand(0x81));
__ vdup(Neon8, q0, r4);
__ mov(r4, Operand(0x82));
__ vdup(Neon8, q1, r4);
__ vqadd(NeonS8, q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vqadd_s8))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
__ mov(r4, Operand(0x8000));
__ vdup(Neon16, q0, r4);
__ vdup(Neon16, q1, r4);
__ vqadd(NeonU16, q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vqadd_u16))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
__ mov(r4, Operand(0x80000001));
__ vdup(Neon32, q0, r4);
__ mov(r4, Operand(0x80000002));
__ vdup(Neon32, q1, r4);
__ vqadd(NeonS32, q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vqadd_s32))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vsub (integer).
__ mov(r4, Operand(0x01));
__ vdup(Neon8, q0, r4);
......@@ -1656,6 +1680,29 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsub32))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vqsub.
__ mov(r4, Operand(0x7f));
__ vdup(Neon8, q0, r4);
__ mov(r4, Operand(0x3f));
__ vdup(Neon8, q1, r4);
__ vqsub(NeonU8, q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vqsub_u8))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
__ mov(r4, Operand(0x8000));
__ vdup(Neon16, q0, r4);
__ mov(r4, Operand(0x7fff));
__ vdup(Neon16, q1, r4);
__ vqsub(NeonS16, q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vqsub_s16))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
__ mov(r4, Operand(0x80000001));
__ vdup(Neon32, q0, r4);
__ mov(r4, Operand(0x80000000));
__ vdup(Neon32, q1, r4);
__ vqsub(NeonU32, q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vqsub_u32))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vmul (integer).
__ mov(r4, Operand(0x02));
__ vdup(Neon8, q0, r4);
......@@ -1948,6 +1995,12 @@ TEST(15) {
CHECK_EQ_SPLAT(vadd8, 0x03030303u);
CHECK_EQ_SPLAT(vadd16, 0x00030003u);
CHECK_EQ_SPLAT(vadd32, 0x00000003u);
CHECK_EQ_SPLAT(vqadd_s8, 0x80808080u);
CHECK_EQ_SPLAT(vqadd_u16, 0xffffffffu);
CHECK_EQ_SPLAT(vqadd_s32, 0x80000000u);
CHECK_EQ_SPLAT(vqsub_u8, 0x00000000u);
CHECK_EQ_SPLAT(vqsub_s16, 0x7fff7fffu);
CHECK_EQ_SPLAT(vqsub_u32, 0x00000000u);
CHECK_EQ_SPLAT(vsub8, 0xfefefefeu);
CHECK_EQ_SPLAT(vsub16, 0xfffefffeu);
CHECK_EQ_SPLAT(vsub32, 0xfffffffeu);
......
......@@ -1047,6 +1047,12 @@ TEST(Neon) {
"f2142860 vadd.i16 q1, q2, q8");
COMPARE(vadd(Neon32, q15, q0, q8),
"f260e860 vadd.i32 q15, q0, q8");
COMPARE(vqadd(NeonU8, q0, q1, q2),
"f3020054 vqadd.u8 q0, q1, q2");
COMPARE(vqadd(NeonS16, q1, q2, q8),
"f2142070 vqadd.s16 q1, q2, q8");
COMPARE(vqadd(NeonU32, q15, q0, q8),
"f360e070 vqadd.u32 q15, q0, q8");
COMPARE(vsub(q15, q0, q8),
"f260ed60 vsub.f32 q15, q0, q8");
COMPARE(vsub(Neon8, q0, q1, q2),
......@@ -1055,6 +1061,12 @@ TEST(Neon) {
"f3142860 vsub.i16 q1, q2, q8");
COMPARE(vsub(Neon32, q15, q0, q8),
"f360e860 vsub.i32 q15, q0, q8");
COMPARE(vqsub(NeonU8, q0, q1, q2),
"f3020254 vqsub.u8 q0, q1, q2");
COMPARE(vqsub(NeonS16, q1, q2, q8),
"f2142270 vqsub.s16 q1, q2, q8");
COMPARE(vqsub(NeonU32, q15, q0, q8),
"f360e270 vqsub.u32 q15, q0, q8");
COMPARE(vmul(q0, q1, q2),
"f3020d54 vmul.f32 q0, q1, q2");
COMPARE(vmul(Neon8, q0, q1, q2),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment