Commit 5f7e6331 authored by bbudge's avatar bbudge Committed by Commit bot

[ARM] Implement D-register versions of vzip, vuzp, and vtrn.

LOG=N
BUG=v8:6020

Review-Url: https://codereview.chromium.org/2797923006
Cr-Original-Commit-Position: refs/heads/master@{#44536}
Committed: https://chromium.googlesource.com/v8/v8/+/6588187ae3acaa5b40762c539ee9fe355551bea3
Review-Url: https://codereview.chromium.org/2797923006
Cr-Commit-Position: refs/heads/master@{#44540}
parent 41b22805
......@@ -4084,6 +4084,16 @@ void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) {
enum NeonRegType { NEON_D, NEON_Q };
void NeonSplitCode(NeonRegType type, int code, int* vm, int* m, int* encoding) {
if (type == NEON_D) {
DwVfpRegister::split_code(code, vm, m);
} else {
DCHECK_EQ(type, NEON_Q);
QwNeonRegister::split_code(code, vm, m);
*encoding |= B6;
}
}
enum UnaryOp { VMVN, VSWP, VABS, VABSF, VNEG, VNEGF };
static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
......@@ -4116,16 +4126,11 @@ static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
UNREACHABLE();
break;
}
int vd, d, vm, m;
if (reg_type == NEON_Q) {
op_encoding |= B6;
QwNeonRegister::split_code(dst_code, &vd, &d);
QwNeonRegister::split_code(src_code, &vm, &m);
} else {
DCHECK_EQ(reg_type, NEON_D);
DwVfpRegister::split_code(dst_code, &vd, &d);
DwVfpRegister::split_code(src_code, &vm, &m);
}
int vd, d;
NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
int vm, m;
NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | vd * B12 | m * B5 |
vm | op_encoding;
}
......@@ -4215,18 +4220,13 @@ static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, NeonRegType reg_type,
UNREACHABLE();
break;
}
int vd, d, vn, n, vm, m;
if (reg_type == NEON_Q) {
op_encoding |= B6;
QwNeonRegister::split_code(dst_code, &vd, &d);
QwNeonRegister::split_code(src_code1, &vn, &n);
QwNeonRegister::split_code(src_code2, &vm, &m);
} else {
DCHECK_EQ(reg_type, NEON_D);
DwVfpRegister::split_code(dst_code, &vd, &d);
DwVfpRegister::split_code(src_code1, &vn, &n);
DwVfpRegister::split_code(src_code2, &vm, &m);
}
int vd, d;
NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
int vn, n;
NeonSplitCode(reg_type, src_code1, &vn, &n, &op_encoding);
int vm, m;
NeonSplitCode(reg_type, src_code2, &vm, &m, &op_encoding);
return 0x1E4U * B23 | op_encoding | d * B22 | vn * B16 | vd * B12 | B8 |
n * B7 | m * B5 | B4 | vm;
}
......@@ -4710,8 +4710,8 @@ void Assembler::vext(QwNeonRegister dst, QwNeonRegister src1,
enum NeonSizedOp { VZIP, VUZP, VREV16, VREV32, VREV64, VTRN };
static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonSize size,
QwNeonRegister dst, QwNeonRegister src) {
static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonRegType reg_type,
NeonSize size, int dst_code, int src_code) {
int op_encoding = 0;
switch (op) {
case VZIP:
......@@ -4737,54 +4737,76 @@ static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonSize size,
break;
}
int vd, d;
dst.split_code(&vd, &d);
NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
int vm, m;
src.split_code(&vm, &m);
NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
int sz = static_cast<int>(size);
return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | B6 |
m * B5 | vm | op_encoding;
return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | m * B5 |
vm | op_encoding;
}
void Assembler::vzip(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
DCHECK(IsEnabled(NEON));
// vzip.<size>(Dn, Dm) SIMD zip (interleave).
// Instruction details available in ARM DDI 0406C.b, A8-1102.
emit(EncodeNeonSizedOp(VZIP, NEON_D, size, src1.code(), src2.code()));
}
void Assembler::vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vzip.<size>(Qn, Qm) SIMD zip (interleave).
// vzip.<size>(Qn, Qm) SIMD zip (interleave).
// Instruction details available in ARM DDI 0406C.b, A8-1102.
emit(EncodeNeonSizedOp(VZIP, size, src1, src2));
emit(EncodeNeonSizedOp(VZIP, NEON_Q, size, src1.code(), src2.code()));
}
void Assembler::vuzp(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
DCHECK(IsEnabled(NEON));
// vuzp.<size>(Dn, Dm) SIMD un-zip (de-interleave).
// Instruction details available in ARM DDI 0406C.b, A8-1100.
emit(EncodeNeonSizedOp(VUZP, NEON_D, size, src1.code(), src2.code()));
}
void Assembler::vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vuzp.<size>(Qn, Qm) SIMD un-zip (de-interleave).
// vuzp.<size>(Qn, Qm) SIMD un-zip (de-interleave).
// Instruction details available in ARM DDI 0406C.b, A8-1100.
emit(EncodeNeonSizedOp(VUZP, size, src1, src2));
emit(EncodeNeonSizedOp(VUZP, NEON_Q, size, src1.code(), src2.code()));
}
void Assembler::vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
DCHECK(IsEnabled(NEON));
// Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse.
// Qd = vrev16.<size>(Qm) SIMD element reverse.
// Instruction details available in ARM DDI 0406C.b, A8-1028.
emit(EncodeNeonSizedOp(VREV16, size, dst, src));
emit(EncodeNeonSizedOp(VREV16, NEON_Q, size, dst.code(), src.code()));
}
void Assembler::vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
DCHECK(IsEnabled(NEON));
// Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse.
// Qd = vrev32.<size>(Qm) SIMD element reverse.
// Instruction details available in ARM DDI 0406C.b, A8-1028.
emit(EncodeNeonSizedOp(VREV32, size, dst, src));
emit(EncodeNeonSizedOp(VREV32, NEON_Q, size, dst.code(), src.code()));
}
void Assembler::vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
DCHECK(IsEnabled(NEON));
// Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse.
// Qd = vrev64.<size>(Qm) SIMD element reverse.
// Instruction details available in ARM DDI 0406C.b, A8-1028.
emit(EncodeNeonSizedOp(VREV64, size, dst, src));
emit(EncodeNeonSizedOp(VREV64, NEON_Q, size, dst.code(), src.code()));
}
void Assembler::vtrn(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
DCHECK(IsEnabled(NEON));
// vtrn.<size>(Dn, Dm) SIMD element transpose.
// Instruction details available in ARM DDI 0406C.b, A8-1096.
emit(EncodeNeonSizedOp(VTRN, NEON_D, size, src1.code(), src2.code()));
}
void Assembler::vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse.
// vtrn.<size>(Qn, Qm) SIMD element transpose.
// Instruction details available in ARM DDI 0406C.b, A8-1096.
emit(EncodeNeonSizedOp(VTRN, size, src1, src2));
emit(EncodeNeonSizedOp(VTRN, NEON_Q, size, src1.code(), src2.code()));
}
// Encode NEON vtbl / vtbx instruction.
......
......@@ -1396,11 +1396,14 @@ class Assembler : public AssemblerBase {
QwNeonRegister src2);
void vext(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2,
int bytes);
void vzip(NeonSize size, DwVfpRegister src1, DwVfpRegister src2);
void vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2);
void vuzp(NeonSize size, DwVfpRegister src1, DwVfpRegister src2);
void vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2);
void vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src);
void vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src);
void vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src);
void vtrn(NeonSize size, DwVfpRegister src1, DwVfpRegister src2);
void vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2);
void vtbl(DwVfpRegister dst, const NeonListOperand& list,
DwVfpRegister index);
......
......@@ -2167,33 +2167,56 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmovl.u%d q%d, d%d", imm3 * 8, Vd, Vm);
} else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) {
if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) {
if (instr->Bit(6) == 0) {
if (instr->Bits(11, 7) == 0x18) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
int index = instr->Bit(19);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vdup q%d, d%d[%d]", Vd, Vm, index);
} else if (instr->Bits(11, 10) == 0x2) {
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vn = instr->VFPNRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
int len = instr->Bits(9, 8);
NeonListOperand list(DwVfpRegister::from_code(Vn), len + 1);
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s d%d, ",
instr->Bit(6) == 0 ? "vtbl.8" : "vtbx.8", Vd);
FormatNeonList(Vn, list.type());
Print(", ");
PrintDRegister(Vm);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 &&
instr->Bits(7, 6) != 0) {
// vqmovn.<type><size> Dd, Qm.
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
char type = instr->Bit(6) != 0 ? 'u' : 's';
int size = 2 * kBitsPerByte * (1 << instr->Bits(19, 18));
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vqmovn.%c%i d%d, q%d",
type, size, Vd, Vm);
} else {
int Vd, Vm;
if (instr->Bit(6) == 0) {
Vd = instr->VFPDRegValue(kDoublePrecision);
Vm = instr->VFPMRegValue(kDoublePrecision);
} else {
Vd = instr->VFPDRegValue(kSimd128Precision);
Vm = instr->VFPMRegValue(kSimd128Precision);
}
if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) {
if (instr->Bit(6) == 0) {
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vswp d%d, d%d", Vd, Vm);
} else {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vswp q%d, q%d", Vd, Vm);
}
} else if (instr->Bits(11, 7) == 0x18) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
int index = instr->Bit(19);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vdup q%d, d%d[%d]", Vd, Vm, index);
} else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vmvn q%d, q%d", Vd, Vm);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmvn q%d, q%d", Vd, Vm);
} else if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 &&
instr->Bit(6) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
const char* suffix = nullptr;
int op = instr->Bits(8, 7);
switch (op) {
......@@ -2212,48 +2235,39 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
}
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vcvt.%s q%d, q%d", suffix, Vd, Vm);
} else if (instr->Bits(11, 10) == 0x2) {
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vn = instr->VFPNRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
int len = instr->Bits(9, 8);
NeonListOperand list(DwVfpRegister::from_code(Vn), len + 1);
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s d%d, ",
instr->Bit(6) == 0 ? "vtbl.8" : "vtbx.8", Vd);
FormatNeonList(Vn, list.type());
Print(", ");
PrintDRegister(Vm);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 &&
instr->Bit(6) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1) {
int size = kBitsPerByte * (1 << instr->Bits(19, 18));
const char* op = instr->Bit(7) != 0 ? "vzip" : "vuzp";
if (instr->Bit(6) == 0) {
// vzip/vuzp.<size> Dd, Dm.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.%d d%d, d%d", op, size, Vd, Vm);
} else {
// vzip/vuzp.<size> Qd, Qm.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.%d q%d, q%d", op, size, Vd, Vm);
}
} else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0 &&
instr->Bit(6) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int size = kBitsPerByte * (1 << instr->Bits(19, 18));
int op = kBitsPerByte
<< (static_cast<int>(Neon64) - instr->Bits(8, 7));
// vrev<op>.<size> Qd, Qm.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vrev%d.%d q%d, q%d", op, size, Vd, Vm);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0x1) {
int size = kBitsPerByte * (1 << instr->Bits(19, 18));
if (instr->Bit(6) == 0) {
// vtrn.<size> Dd, Dm.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vtrn.%d d%d, d%d", size, Vd, Vm);
} else {
// vtrn.<size> Qd, Qm.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vtrn.%d q%d, q%d", size, Vd, Vm);
}
} else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0 &&
instr->Bit(6) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int size = kBitsPerByte * (1 << instr->Bits(19, 18));
char type = instr->Bit(10) != 0 ? 'f' : 's';
if (instr->Bits(9, 6) == 0xd) {
......@@ -2272,24 +2286,13 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
} else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5 &&
instr->Bit(6) == 1) {
// vrecpe/vrsqrte.f32 Qd, Qm.
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
const char* op = instr->Bit(7) == 0 ? "vrecpe" : "vrsqrte";
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.f32 q%d, q%d", op, Vd, Vm);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 &&
instr->Bits(7, 6) != 0) {
// vqmovn.<type><size> Dd, Qm.
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
char type = instr->Bit(6) != 0 ? 'u' : 's';
int size = 2 * kBitsPerByte * (1 << instr->Bits(19, 18));
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vqmovn.%c%i d%d, q%d",
type, size, Vd, Vm);
} else {
Unknown(instr);
}
}
} else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1 &&
instr->Bit(6) == 1) {
// vshr.u<size> Qd, Qm, shift
......
......@@ -896,28 +896,20 @@ void Simulator::set_d_register(int dreg, const uint32_t* value) {
memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2);
}
template <typename T>
void Simulator::get_d_register(int dreg, T* value) {
DCHECK((dreg >= 0) && (dreg < num_d_registers));
memcpy(value, vfp_registers_ + dreg * 2, kDoubleSize);
}
template <typename T>
void Simulator::set_d_register(int dreg, const T* value) {
DCHECK((dreg >= 0) && (dreg < num_d_registers));
memcpy(vfp_registers_ + dreg * 2, value, kDoubleSize);
template <typename T, int SIZE>
void Simulator::get_neon_register(int reg, T (&value)[SIZE / sizeof(T)]) {
DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize);
DCHECK_LE(0, reg);
DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg);
memcpy(value, vfp_registers_ + reg * (SIZE / 4), SIZE);
}
template <typename T>
void Simulator::get_q_register(int qreg, T* value) {
DCHECK((qreg >= 0) && (qreg < num_q_registers));
memcpy(value, vfp_registers_ + qreg * 4, kSimd128Size);
}
template <typename T>
void Simulator::set_q_register(int qreg, const T* value) {
DCHECK((qreg >= 0) && (qreg < num_q_registers));
memcpy(vfp_registers_ + qreg * 4, value, kSimd128Size);
template <typename T, int SIZE>
void Simulator::set_neon_register(int reg, const T (&value)[SIZE / sizeof(T)]) {
DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize);
DCHECK_LE(0, reg);
DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg);
memcpy(vfp_registers_ + reg * (SIZE / 4), value, SIZE);
}
// Raw access to the PC register.
......@@ -3508,7 +3500,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) {
UNREACHABLE();
break;
}
set_q_register(vd, q_data);
set_neon_register(vd, q_data);
}
} else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) {
// vmov (scalar to ARM core register)
......@@ -3995,7 +3987,6 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) {
}
// Templated operations for NEON instructions.
// TODO(bbudge) Add more templates for use in DecodeSpecialCondition.
template <typename T, typename U>
U Widen(T value) {
static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
......@@ -4025,21 +4016,38 @@ T Clamp(int64_t value) {
return static_cast<T>(clamped);
}
template <typename T>
T MinMax(T a, T b, bool is_min) {
return is_min ? std::min(a, b) : std::max(a, b);
}
template <typename T, typename U>
void Widen(Simulator* simulator, int Vd, int Vm) {
static const int kLanes = 8 / sizeof(T);
T src[kLanes];
U dst[kLanes];
simulator->get_d_register(Vm, src);
simulator->get_neon_register<T, kDoubleSize>(Vm, src);
for (int i = 0; i < kLanes; i++) {
dst[i] = Widen<T, U>(src[i]);
}
simulator->set_q_register(Vd, dst);
simulator->set_neon_register(Vd, dst);
}
template <typename T, int SIZE>
void Abs(Simulator* simulator, int Vd, int Vm) {
static const int kElems = SIZE / sizeof(T);
T src[kElems];
simulator->get_neon_register<T, SIZE>(Vm, src);
for (int i = 0; i < kElems; i++) {
src[i] = std::abs(src[i]);
}
simulator->set_neon_register<T, SIZE>(Vd, src);
}
template <typename T, int SIZE>
void Neg(Simulator* simulator, int Vd, int Vm) {
static const int kElems = SIZE / sizeof(T);
T src[kElems];
simulator->get_neon_register<T, SIZE>(Vm, src);
for (int i = 0; i < kElems; i++) {
src[i] = -src[i];
}
simulator->set_neon_register<T, SIZE>(Vd, src);
}
template <typename T, typename U>
......@@ -4047,35 +4055,222 @@ void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) {
static const int kLanes = 16 / sizeof(T);
T src[kLanes];
U dst[kLanes];
simulator->get_q_register(Vm, src);
simulator->get_neon_register(Vm, src);
for (int i = 0; i < kLanes; i++) {
dst[i] = Narrow<T, U>(Clamp<U>(src[i]));
}
simulator->set_d_register(Vd, dst);
simulator->set_neon_register<U, kDoubleSize>(Vd, dst);
}
template <typename T>
void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
static const int kLanes = 16 / sizeof(T);
T src1[kLanes], src2[kLanes];
simulator->get_q_register(Vn, src1);
simulator->get_q_register(Vm, src2);
simulator->get_neon_register(Vn, src1);
simulator->get_neon_register(Vm, src2);
for (int i = 0; i < kLanes; i++) {
src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i]));
}
simulator->set_q_register(Vd, src1);
simulator->set_neon_register(Vd, src1);
}
template <typename T>
void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
static const int kLanes = 16 / sizeof(T);
T src1[kLanes], src2[kLanes];
simulator->get_q_register(Vn, src1);
simulator->get_q_register(Vm, src2);
simulator->get_neon_register(Vn, src1);
simulator->get_neon_register(Vm, src2);
for (int i = 0; i < kLanes; i++) {
src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i]));
}
simulator->set_q_register(Vd, src1);
simulator->set_neon_register(Vd, src1);
}
template <typename T, int SIZE>
void Zip(Simulator* simulator, int Vd, int Vm) {
static const int kElems = SIZE / sizeof(T);
static const int kPairs = kElems / 2;
T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems];
simulator->get_neon_register<T, SIZE>(Vd, src1);
simulator->get_neon_register<T, SIZE>(Vm, src2);
for (int i = 0; i < kPairs; i++) {
dst1[i * 2] = src1[i];
dst1[i * 2 + 1] = src2[i];
dst2[i * 2] = src1[i + kPairs];
dst2[i * 2 + 1] = src2[i + kPairs];
}
simulator->set_neon_register<T, SIZE>(Vd, dst1);
simulator->set_neon_register<T, SIZE>(Vm, dst2);
}
template <typename T, int SIZE>
void Unzip(Simulator* simulator, int Vd, int Vm) {
static const int kElems = SIZE / sizeof(T);
static const int kPairs = kElems / 2;
T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems];
simulator->get_neon_register<T, SIZE>(Vd, src1);
simulator->get_neon_register<T, SIZE>(Vm, src2);
for (int i = 0; i < kPairs; i++) {
dst1[i] = src1[i * 2];
dst1[i + kPairs] = src2[i * 2];
dst2[i] = src1[i * 2 + 1];
dst2[i + kPairs] = src2[i * 2 + 1];
}
simulator->set_neon_register<T, SIZE>(Vd, dst1);
simulator->set_neon_register<T, SIZE>(Vm, dst2);
}
template <typename T, int SIZE>
void Transpose(Simulator* simulator, int Vd, int Vm) {
static const int kElems = SIZE / sizeof(T);
static const int kPairs = kElems / 2;
T src1[kElems], src2[kElems];
simulator->get_neon_register<T, SIZE>(Vd, src1);
simulator->get_neon_register<T, SIZE>(Vm, src2);
for (int i = 0; i < kPairs; i++) {
std::swap(src1[2 * i + 1], src2[2 * i]);
}
simulator->set_neon_register<T, SIZE>(Vd, src1);
simulator->set_neon_register<T, SIZE>(Vm, src2);
}
template <typename T, int SIZE>
void Test(Simulator* simulator, int Vd, int Vm, int Vn) {
static const int kElems = SIZE / sizeof(T);
T src1[kElems], src2[kElems];
simulator->get_neon_register<T, SIZE>(Vn, src1);
simulator->get_neon_register<T, SIZE>(Vm, src2);
for (int i = 0; i < kElems; i++) {
src1[i] = (src1[i] & src2[i]) != 0 ? -1 : 0;
}
simulator->set_neon_register<T, SIZE>(Vd, src1);
}
template <typename T, int SIZE>
void Add(Simulator* simulator, int Vd, int Vm, int Vn) {
static const int kElems = SIZE / sizeof(T);
T src1[kElems], src2[kElems];
simulator->get_neon_register<T, SIZE>(Vn, src1);
simulator->get_neon_register<T, SIZE>(Vm, src2);
for (int i = 0; i < kElems; i++) {
src1[i] += src2[i];
}
simulator->set_neon_register<T, SIZE>(Vd, src1);
}
template <typename T, int SIZE>
void Sub(Simulator* simulator, int Vd, int Vm, int Vn) {
static const int kElems = SIZE / sizeof(T);
T src1[kElems], src2[kElems];
simulator->get_neon_register<T, SIZE>(Vn, src1);
simulator->get_neon_register<T, SIZE>(Vm, src2);
for (int i = 0; i < kElems; i++) {
src1[i] -= src2[i];
}
simulator->set_neon_register<T, SIZE>(Vd, src1);
}
template <typename T, int SIZE>
void Mul(Simulator* simulator, int Vd, int Vm, int Vn) {
static const int kElems = SIZE / sizeof(T);
T src1[kElems], src2[kElems];
simulator->get_neon_register<T, SIZE>(Vn, src1);
simulator->get_neon_register<T, SIZE>(Vm, src2);
for (int i = 0; i < kElems; i++) {
src1[i] *= src2[i];
}
simulator->set_neon_register<T, SIZE>(Vd, src1);
}
template <typename T, int SIZE>
void ShiftLeft(Simulator* simulator, int Vd, int Vm, int shift) {
static const int kElems = SIZE / sizeof(T);
T src[kElems];
simulator->get_neon_register<T, SIZE>(Vm, src);
for (int i = 0; i < kElems; i++) {
src[i] <<= shift;
}
simulator->set_neon_register<T, SIZE>(Vd, src);
}
template <typename T, int SIZE>
void ShiftRight(Simulator* simulator, int Vd, int Vm, int shift) {
static const int kElems = SIZE / sizeof(T);
T src[kElems];
simulator->get_neon_register<T, SIZE>(Vm, src);
for (int i = 0; i < kElems; i++) {
src[i] >>= shift;
}
simulator->set_neon_register<T, SIZE>(Vd, src);
}
template <typename T, int SIZE>
void ArithmeticShiftRight(Simulator* simulator, int Vd, int Vm, int shift) {
static const int kElems = SIZE / sizeof(T);
T src[kElems];
simulator->get_neon_register<T, SIZE>(Vm, src);
for (int i = 0; i < kElems; i++) {
src[i] = ArithmeticShiftRight(src[i], shift);
}
simulator->set_neon_register<T, SIZE>(Vd, src);
}
template <typename T, int SIZE>
void CompareEqual(Simulator* simulator, int Vd, int Vm, int Vn) {
static const int kElems = SIZE / sizeof(T);
T src1[kElems], src2[kElems];
simulator->get_neon_register<T, SIZE>(Vn, src1);
simulator->get_neon_register<T, SIZE>(Vm, src2);
for (int i = 0; i < kElems; i++) {
src1[i] = src1[i] == src2[i] ? -1 : 0;
}
simulator->set_neon_register<T, SIZE>(Vd, src1);
}
template <typename T, int SIZE>
void CompareGreater(Simulator* simulator, int Vd, int Vm, int Vn, bool ge) {
static const int kElems = SIZE / sizeof(T);
T src1[kElems], src2[kElems];
simulator->get_neon_register<T, SIZE>(Vn, src1);
simulator->get_neon_register<T, SIZE>(Vm, src2);
for (int i = 0; i < kElems; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? -1 : 0;
else
src1[i] = src1[i] > src2[i] ? -1 : 0;
}
simulator->set_neon_register<T, SIZE>(Vd, src1);
}
template <typename T>
T MinMax(T a, T b, bool is_min) {
return is_min ? std::min(a, b) : std::max(a, b);
}
template <typename T, int SIZE>
void MinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) {
static const int kElems = SIZE / sizeof(T);
T src1[kElems], src2[kElems];
simulator->get_neon_register<T, SIZE>(Vn, src1);
simulator->get_neon_register<T, SIZE>(Vm, src2);
for (int i = 0; i < kElems; i++) {
src1[i] = MinMax(src1[i], src2[i], min);
}
simulator->set_neon_register<T, SIZE>(Vd, src1);
}
template <typename T>
void PairwiseMinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) {
static const int kElems = kDoubleSize / sizeof(T);
static const int kPairs = kElems / 2;
T dst[kElems], src1[kElems], src2[kElems];
simulator->get_neon_register<T, kDoubleSize>(Vn, src1);
simulator->get_neon_register<T, kDoubleSize>(Vm, src2);
for (int i = 0; i < kPairs; i++) {
dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
}
simulator->set_neon_register<T, kDoubleSize>(Vd, dst);
}
void Simulator::DecodeSpecialCondition(Instruction* instr) {
......@@ -4121,25 +4316,25 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
// vmov Qd, Qm.
// vorr, Qd, Qm, Qn.
uint32_t src1[4];
get_q_register(Vm, src1);
get_neon_register(Vm, src1);
if (Vm != Vn) {
uint32_t src2[4];
get_q_register(Vn, src2);
get_neon_register(Vn, src2);
for (int i = 0; i < 4; i++) {
src1[i] = src1[i] | src2[i];
}
}
set_q_register(Vd, src1);
set_neon_register(Vd, src1);
} else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&
instr->Bit(4) == 1) {
// vand Qd, Qm, Qn.
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
get_neon_register(Vn, src1);
get_neon_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = src1[i] & src2[i];
}
set_q_register(Vd, src1);
set_neon_register(Vd, src1);
} else {
UNIMPLEMENTED();
}
......@@ -4173,45 +4368,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
bool ge = instr->Bit(4) == 1;
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8: {
int8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFF : 0;
}
set_q_register(Vd, src1);
case Neon8:
CompareGreater<int8_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
break;
}
case Neon16: {
int16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFF : 0;
}
set_q_register(Vd, src1);
case Neon16:
CompareGreater<int16_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
break;
}
case Neon32: {
int32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0;
}
set_q_register(Vd, src1);
case Neon32:
CompareGreater<int32_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
break;
}
default:
UNREACHABLE();
break;
......@@ -4223,36 +4388,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
bool min = instr->Bit(4) != 0;
switch (size) {
case Neon8: {
int8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] = MinMax(src1[i], src2[i], min);
}
set_q_register(Vd, src1);
case Neon8:
MinMax<int8_t, kSimd128Size>(this, Vd, Vm, Vn, min);
break;
}
case Neon16: {
int16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] = MinMax(src1[i], src2[i], min);
}
set_q_register(Vd, src1);
case Neon16:
MinMax<int16_t, kSimd128Size>(this, Vd, Vm, Vn, min);
break;
}
case Neon32: {
int32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = MinMax(src1[i], src2[i], min);
}
set_q_register(Vd, src1);
case Neon32:
MinMax<int32_t, kSimd128Size>(this, Vd, Vm, Vn, min);
break;
}
default:
UNREACHABLE();
break;
......@@ -4265,36 +4409,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
if (instr->Bit(4) == 0) {
// vadd.i<size> Qd, Qm, Qn.
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] += src2[i];
}
set_q_register(Vd, src1);
case Neon8:
Add<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] += src2[i];
}
set_q_register(Vd, src1);
case Neon16:
Add<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] += src2[i];
}
set_q_register(Vd, src1);
case Neon32:
Add<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
break;
}
default:
UNREACHABLE();
break;
......@@ -4302,36 +4425,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
} else {
// vtst.i<size> Qd, Qm, Qn.
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0;
}
set_q_register(Vd, src1);
case Neon8:
Test<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0;
}
set_q_register(Vd, src1);
case Neon16:
Test<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0;
}
set_q_register(Vd, src1);
case Neon32:
Test<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
break;
}
default:
UNREACHABLE();
break;
......@@ -4344,36 +4446,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
// vmul.i<size> Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] *= src2[i];
}
set_q_register(Vd, src1);
case Neon8:
Mul<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] *= src2[i];
}
set_q_register(Vd, src1);
case Neon16:
Mul<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] *= src2[i];
}
set_q_register(Vd, src1);
case Neon32:
Mul<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
break;
}
default:
UNREACHABLE();
break;
......@@ -4388,37 +4469,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
bool min = instr->Bit(4) != 0;
switch (size) {
case Neon8: {
int8_t dst[8], src1[8], src2[8];
get_d_register(Vn, src1);
get_d_register(Vm, src2);
for (int i = 0; i < 4; i++) {
dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
}
set_d_register(Vd, dst);
case Neon8:
PairwiseMinMax<int8_t>(this, Vd, Vm, Vn, min);
break;
}
case Neon16: {
int16_t dst[4], src1[4], src2[4];
get_d_register(Vn, src1);
get_d_register(Vm, src2);
for (int i = 0; i < 2; i++) {
dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
}
set_d_register(Vd, dst);
case Neon16:
PairwiseMinMax<int16_t>(this, Vd, Vm, Vn, min);
break;
}
case Neon32: {
int32_t dst[2], src1[2], src2[2];
get_d_register(Vn, src1);
get_d_register(Vm, src2);
dst[0] = MinMax(src1[0], src1[1], min);
dst[1] = MinMax(src2[0], src2[1], min);
set_d_register(Vd, dst);
case Neon32:
PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min);
break;
}
default:
UNREACHABLE();
break;
......@@ -4428,8 +4487,8 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
case 0xd: {
if (instr->Bit(4) == 0) {
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
get_neon_register(Vn, src1);
get_neon_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (instr->Bit(21) == 0) {
// vadd.f32 Qd, Qm, Qn.
......@@ -4439,7 +4498,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
src1[i] = src1[i] - src2[i];
}
}
set_q_register(Vd, src1);
set_neon_register(Vd, src1);
} else {
UNIMPLEMENTED();
}
......@@ -4449,13 +4508,13 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) {
// vceq.f32.
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
get_neon_register(Vn, src1);
get_neon_register(Vm, src2);
uint32_t dst[4];
for (int i = 0; i < 4; i++) {
dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
}
set_q_register(Vd, dst);
set_neon_register(Vd, dst);
} else {
UNIMPLEMENTED();
}
......@@ -4464,8 +4523,8 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
case 0xf: {
if (instr->Bit(20) == 0 && instr->Bit(6) == 1) {
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
get_neon_register(Vn, src1);
get_neon_register(Vm, src2);
if (instr->Bit(4) == 1) {
if (instr->Bit(21) == 0) {
// vrecps.f32 Qd, Qm, Qn.
......@@ -4485,7 +4544,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
src1[i] = MinMax(src1[i], src2[i], min);
}
}
set_q_register(Vd, src1);
set_neon_register(Vd, src1);
} else {
UNIMPLEMENTED();
}
......@@ -4526,8 +4585,8 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
uint8_t src1[16], src2[16], dst[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
get_neon_register(Vn, src1);
get_neon_register(Vm, src2);
int boundary = kSimd128Size - imm4;
int i = 0;
for (; i < boundary; i++) {
......@@ -4536,7 +4595,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
for (; i < 16; i++) {
dst[i] = src2[i - boundary];
}
set_q_register(Vd, dst);
set_neon_register(Vd, dst);
} else if (instr->Bits(11, 7) == 0xA && instr->Bit(4) == 1) {
// vshl.i<size> Qd, Qm, shift
int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16));
......@@ -4545,33 +4604,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
int Vm = instr->VFPMRegValue(kSimd128Precision);
NeonSize ns = static_cast<NeonSize>(size / 16);
switch (ns) {
case Neon8: {
uint8_t src[16];
get_q_register(Vm, src);
for (int i = 0; i < 16; i++) {
src[i] <<= shift;
}
set_q_register(Vd, src);
case Neon8:
ShiftLeft<uint8_t, kSimd128Size>(this, Vd, Vm, shift);
break;
}
case Neon16: {
uint16_t src[8];
get_q_register(Vm, src);
for (int i = 0; i < 8; i++) {
src[i] <<= shift;
}
set_q_register(Vd, src);
case Neon16:
ShiftLeft<uint16_t, kSimd128Size>(this, Vd, Vm, shift);
break;
}
case Neon32: {
uint32_t src[4];
get_q_register(Vm, src);
for (int i = 0; i < 4; i++) {
src[i] <<= shift;
}
set_q_register(Vd, src);
case Neon32:
ShiftLeft<uint32_t, kSimd128Size>(this, Vd, Vm, shift);
break;
}
default:
UNREACHABLE();
break;
......@@ -4584,33 +4625,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
int Vm = instr->VFPMRegValue(kSimd128Precision);
NeonSize ns = static_cast<NeonSize>(size / 16);
switch (ns) {
case Neon8: {
int8_t src[16];
get_q_register(Vm, src);
for (int i = 0; i < 16; i++) {
src[i] = ArithmeticShiftRight(src[i], shift);
}
set_q_register(Vd, src);
case Neon8:
ArithmeticShiftRight<int8_t, kSimd128Size>(this, Vd, Vm, shift);
break;
}
case Neon16: {
int16_t src[8];
get_q_register(Vm, src);
for (int i = 0; i < 8; i++) {
src[i] = ArithmeticShiftRight(src[i], shift);
}
set_q_register(Vd, src);
case Neon16:
ArithmeticShiftRight<int16_t, kSimd128Size>(this, Vd, Vm, shift);
break;
}
case Neon32: {
int32_t src[4];
get_q_register(Vm, src);
for (int i = 0; i < 4; i++) {
src[i] = ArithmeticShiftRight(src[i], shift);
}
set_q_register(Vd, src);
case Neon32:
ArithmeticShiftRight<int32_t, kSimd128Size>(this, Vd, Vm, shift);
break;
}
default:
UNREACHABLE();
break;
......@@ -4658,13 +4681,13 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) {
// vbsl.size Qd, Qm, Qn.
uint32_t dst[4], src1[4], src2[4];
get_q_register(Vd, dst);
get_q_register(Vn, src1);
get_q_register(Vm, src2);
get_neon_register(Vd, dst);
get_neon_register(Vn, src1);
get_neon_register(Vm, src2);
for (int i = 0; i < 4; i++) {
dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
}
set_q_register(Vd, dst);
set_neon_register(Vd, dst);
} else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) {
if (instr->Bit(6) == 0) {
// veor Dd, Dn, Dm
......@@ -4677,10 +4700,10 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
} else {
// veor Qd, Qn, Qm
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
get_neon_register(Vn, src1);
get_neon_register(Vm, src2);
for (int i = 0; i < 4; i++) src1[i] ^= src2[i];
set_q_register(Vd, src1);
set_neon_register(Vd, src1);
}
} else {
UNIMPLEMENTED();
......@@ -4715,45 +4738,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
bool ge = instr->Bit(4) == 1;
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFu : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFu : 0;
}
set_q_register(Vd, src1);
case Neon8:
CompareGreater<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0;
}
set_q_register(Vd, src1);
case Neon16:
CompareGreater<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
}
set_q_register(Vd, src1);
case Neon32:
CompareGreater<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
break;
}
default:
UNREACHABLE();
break;
......@@ -4765,36 +4758,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
bool min = instr->Bit(4) != 0;
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] = MinMax(src1[i], src2[i], min);
}
set_q_register(Vd, src1);
case Neon8:
MinMax<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, min);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] = MinMax(src1[i], src2[i], min);
}
set_q_register(Vd, src1);
case Neon16:
MinMax<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, min);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = MinMax(src1[i], src2[i], min);
}
set_q_register(Vd, src1);
case Neon32:
MinMax<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, min);
break;
}
default:
UNREACHABLE();
break;
......@@ -4806,36 +4778,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
// vsub.size Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] -= src2[i];
}
set_q_register(Vd, src1);
case Neon8:
Sub<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] -= src2[i];
}
set_q_register(Vd, src1);
case Neon16:
Sub<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] -= src2[i];
}
set_q_register(Vd, src1);
case Neon32:
Sub<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
break;
}
default:
UNREACHABLE();
break;
......@@ -4844,36 +4795,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
// vceq.size Qd, Qm, Qn.
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0;
}
set_q_register(Vd, src1);
case Neon8:
CompareEqual<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0;
}
set_q_register(Vd, src1);
case Neon16:
CompareEqual<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0;
}
set_q_register(Vd, src1);
case Neon32:
CompareEqual<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
break;
}
default:
UNREACHABLE();
break;
......@@ -4886,37 +4816,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
bool min = instr->Bit(4) != 0;
switch (size) {
case Neon8: {
uint8_t dst[8], src1[8], src2[8];
get_d_register(Vn, src1);
get_d_register(Vm, src2);
for (int i = 0; i < 4; i++) {
dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
}
set_d_register(Vd, dst);
case Neon8:
PairwiseMinMax<uint8_t>(this, Vd, Vm, Vn, min);
break;
}
case Neon16: {
uint16_t dst[4], src1[4], src2[4];
get_d_register(Vn, src1);
get_d_register(Vm, src2);
for (int i = 0; i < 2; i++) {
dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
}
set_d_register(Vd, dst);
case Neon16:
PairwiseMinMax<uint16_t>(this, Vd, Vm, Vn, min);
break;
}
case Neon32: {
uint32_t dst[2], src1[2], src2[2];
get_d_register(Vn, src1);
get_d_register(Vm, src2);
dst[0] = MinMax(src1[0], src1[1], min);
dst[1] = MinMax(src2[0], src2[1], min);
set_d_register(Vd, dst);
case Neon32:
PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min);
break;
}
default:
UNREACHABLE();
break;
......@@ -4927,12 +4835,12 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
// vmul.f32 Qd, Qn, Qm
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
get_neon_register(Vn, src1);
get_neon_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = src1[i] * src2[i];
}
set_q_register(Vd, src1);
set_neon_register(Vd, src1);
} else {
UNIMPLEMENTED();
}
......@@ -4943,8 +4851,8 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
// vcge/vcgt.f32 Qd, Qm, Qn
bool ge = instr->Bit(21) == 0;
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
get_neon_register(Vn, src1);
get_neon_register(Vm, src2);
uint32_t dst[4];
for (int i = 0; i < 4; i++) {
if (ge) {
......@@ -4953,7 +4861,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
}
}
set_q_register(Vd, dst);
set_neon_register(Vd, dst);
} else {
UNIMPLEMENTED();
}
......@@ -4994,7 +4902,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
uint32_t q_data[4];
get_q_register(Vm, q_data);
get_neon_register(Vm, q_data);
int op = instr->Bits(8, 7);
for (int i = 0; i < 4; i++) {
switch (op) {
......@@ -5020,7 +4928,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
break;
}
}
set_q_register(Vd, q_data);
set_neon_register(Vd, q_data);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) {
if (instr->Bit(6) == 0) {
// vswp Dd, Dm.
......@@ -5036,10 +4944,10 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
uint32_t dval[4], mval[4];
int vd = instr->VFPDRegValue(kSimd128Precision);
int vm = instr->VFPMRegValue(kSimd128Precision);
get_q_register(vd, dval);
get_q_register(vm, mval);
set_q_register(vm, dval);
set_q_register(vd, mval);
get_neon_register(vd, dval);
get_neon_register(vm, mval);
set_neon_register(vm, dval);
set_neon_register(vd, mval);
}
} else if (instr->Bits(11, 7) == 0x18) {
// vdup.32 Qd, Sm.
......@@ -5049,15 +4957,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
uint32_t s_data = get_s_register(vm * 2 + index);
uint32_t q_data[4];
for (int i = 0; i < 4; i++) q_data[i] = s_data;
set_q_register(vd, q_data);
set_neon_register(vd, q_data);
} else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) {
// vmvn Qd, Qm.
int vd = instr->VFPDRegValue(kSimd128Precision);
int vm = instr->VFPMRegValue(kSimd128Precision);
uint32_t q_data[4];
get_q_register(vm, q_data);
get_neon_register(vm, q_data);
for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i];
set_q_register(vd, q_data);
set_neon_register(vd, q_data);
} else if (instr->Bits(11, 10) == 0x2) {
// vtb[l,x] Dd, <list>, Dm.
int vd = instr->VFPDRegValue(kDoublePrecision);
......@@ -5082,56 +4990,59 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
}
}
set_d_register(vd, &result);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 &&
instr->Bit(6) == 1) {
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1) {
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
if (instr->Bit(6) == 0) {
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
if (instr->Bit(7) == 1) {
// vzip.<size> Qd, Qm.
// vzip.<size> Dd, Dm.
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16], dst1[16], dst2[16];
get_q_register(Vd, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
dst1[i * 2] = src1[i];
dst1[i * 2 + 1] = src2[i];
dst2[i * 2] = src1[i + 8];
dst2[i * 2 + 1] = src2[i + 8];
}
set_q_register(Vd, dst1);
set_q_register(Vm, dst2);
case Neon8:
Zip<uint8_t, kDoubleSize>(this, Vd, Vm);
break;
case Neon16:
Zip<uint16_t, kDoubleSize>(this, Vd, Vm);
break;
case Neon32:
UNIMPLEMENTED();
break;
default:
UNREACHABLE();
break;
}
case Neon16: {
uint16_t src1[8], src2[8], dst1[8], dst2[8];
get_q_register(Vd, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
dst1[i * 2] = src1[i];
dst1[i * 2 + 1] = src2[i];
dst2[i * 2] = src1[i + 4];
dst2[i * 2 + 1] = src2[i + 4];
}
set_q_register(Vd, dst1);
set_q_register(Vm, dst2);
} else {
// vuzp.<size> Dd, Dm.
switch (size) {
case Neon8:
Unzip<uint8_t, kDoubleSize>(this, Vd, Vm);
break;
case Neon16:
Unzip<uint16_t, kDoubleSize>(this, Vd, Vm);
break;
case Neon32:
UNIMPLEMENTED();
break;
default:
UNREACHABLE();
break;
}
case Neon32: {
uint32_t src1[4], src2[4], dst1[4], dst2[4];
get_q_register(Vd, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 2; i++) {
dst1[i * 2] = src1[i];
dst1[i * 2 + 1] = src2[i];
dst2[i * 2] = src1[i + 2];
dst2[i * 2 + 1] = src2[i + 2];
}
set_q_register(Vd, dst1);
set_q_register(Vm, dst2);
} else {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
if (instr->Bit(7) == 1) {
// vzip.<size> Qd, Qm.
switch (size) {
case Neon8:
Zip<uint8_t, kSimd128Size>(this, Vd, Vm);
break;
case Neon16:
Zip<uint16_t, kSimd128Size>(this, Vd, Vm);
break;
case Neon32:
Zip<uint32_t, kSimd128Size>(this, Vd, Vm);
break;
}
default:
UNREACHABLE();
break;
......@@ -5139,53 +5050,21 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
} else {
// vuzp.<size> Qd, Qm.
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16], dst1[16], dst2[16];
get_q_register(Vd, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
dst1[i] = src1[i * 2];
dst1[i + 8] = src2[i * 2];
dst2[i] = src1[i * 2 + 1];
dst2[i + 8] = src2[i * 2 + 1];
}
set_q_register(Vd, dst1);
set_q_register(Vm, dst2);
case Neon8:
Unzip<uint8_t, kSimd128Size>(this, Vd, Vm);
break;
}
case Neon16: {
uint16_t src1[8], src2[8], dst1[8], dst2[8];
get_q_register(Vd, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
dst1[i] = src1[i * 2];
dst1[i + 4] = src2[i * 2];
dst2[i] = src1[i * 2 + 1];
dst2[i + 4] = src2[i * 2 + 1];
}
set_q_register(Vd, dst1);
set_q_register(Vm, dst2);
case Neon16:
Unzip<uint16_t, kSimd128Size>(this, Vd, Vm);
break;
}
case Neon32: {
uint32_t src1[4], src2[4], dst1[4], dst2[4];
get_q_register(Vd, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 2; i++) {
dst1[i] = src1[i * 2];
dst1[i + 2] = src2[i * 2];
dst2[i] = src1[i * 2 + 1];
dst2[i + 2] = src2[i * 2 + 1];
}
set_q_register(Vd, dst1);
set_q_register(Vm, dst2);
case Neon32:
Unzip<uint32_t, kSimd128Size>(this, Vd, Vm);
break;
}
default:
UNREACHABLE();
break;
}
}
}
} else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) {
// vrev<op>.size Qd, Qm
int Vd = instr->VFPDRegValue(kSimd128Precision);
......@@ -5197,32 +5076,32 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
case Neon16: {
DCHECK_EQ(Neon8, size);
uint8_t src[16];
get_q_register(Vm, src);
get_neon_register(Vm, src);
for (int i = 0; i < 16; i += 2) {
std::swap(src[i], src[i + 1]);
}
set_q_register(Vd, src);
set_neon_register(Vd, src);
break;
}
case Neon32: {
switch (size) {
case Neon16: {
uint16_t src[8];
get_q_register(Vm, src);
get_neon_register(Vm, src);
for (int i = 0; i < 8; i += 2) {
std::swap(src[i], src[i + 1]);
}
set_q_register(Vd, src);
set_neon_register(Vd, src);
break;
}
case Neon8: {
uint8_t src[16];
get_q_register(Vm, src);
get_neon_register(Vm, src);
for (int i = 0; i < 4; i++) {
std::swap(src[i * 4], src[i * 4 + 3]);
std::swap(src[i * 4 + 1], src[i * 4 + 2]);
}
set_q_register(Vd, src);
set_neon_register(Vd, src);
break;
}
default:
......@@ -5235,30 +5114,30 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
switch (size) {
case Neon32: {
uint32_t src[4];
get_q_register(Vm, src);
get_neon_register(Vm, src);
std::swap(src[0], src[1]);
std::swap(src[2], src[3]);
set_q_register(Vd, src);
set_neon_register(Vd, src);
break;
}
case Neon16: {
uint16_t src[8];
get_q_register(Vm, src);
get_neon_register(Vm, src);
for (int i = 0; i < 4; i++) {
std::swap(src[i * 4], src[i * 4 + 3]);
std::swap(src[i * 4 + 1], src[i * 4 + 2]);
}
set_q_register(Vd, src);
set_neon_register(Vd, src);
break;
}
case Neon8: {
uint8_t src[16];
get_q_register(Vm, src);
get_neon_register(Vm, src);
for (int i = 0; i < 4; i++) {
std::swap(src[i], src[7 - i]);
std::swap(src[i + 8], src[15 - i]);
}
set_q_register(Vd, src);
set_neon_register(Vd, src);
break;
}
default:
......@@ -5271,49 +5150,45 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
UNREACHABLE();
break;
}
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) {
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0x1) {
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
if (instr->Bit(6) == 0) {
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
// vtrn.<size> Dd, Dm.
switch (size) {
case Neon8:
Transpose<uint8_t, kDoubleSize>(this, Vd, Vm);
break;
case Neon16:
Transpose<uint16_t, kDoubleSize>(this, Vd, Vm);
break;
case Neon32:
Transpose<uint32_t, kDoubleSize>(this, Vd, Vm);
break;
default:
UNREACHABLE();
break;
}
} else {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
// vtrn.<size> Qd, Qm.
switch (size) {
case Neon8: {
uint8_t src[16], dst[16];
get_q_register(Vd, dst);
get_q_register(Vm, src);
for (int i = 0; i < 8; i++) {
std::swap(dst[2 * i + 1], src[2 * i]);
}
set_q_register(Vd, dst);
set_q_register(Vm, src);
case Neon8:
Transpose<uint8_t, kSimd128Size>(this, Vd, Vm);
break;
}
case Neon16: {
uint16_t src[8], dst[8];
get_q_register(Vd, dst);
get_q_register(Vm, src);
for (int i = 0; i < 4; i++) {
std::swap(dst[2 * i + 1], src[2 * i]);
}
set_q_register(Vd, dst);
set_q_register(Vm, src);
case Neon16:
Transpose<uint16_t, kSimd128Size>(this, Vd, Vm);
break;
}
case Neon32: {
uint32_t src[4], dst[4];
get_q_register(Vd, dst);
get_q_register(Vm, src);
for (int i = 0; i < 2; i++) {
std::swap(dst[2 * i + 1], src[2 * i]);
}
set_q_register(Vd, dst);
set_q_register(Vm, src);
case Neon32:
Transpose<uint32_t, kSimd128Size>(this, Vd, Vm);
break;
}
default:
UNREACHABLE();
break;
}
}
} else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
......@@ -5323,41 +5198,23 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
if (instr->Bit(10) != 0) {
// floating point (clear sign bits)
uint32_t src[4];
get_q_register(Vm, src);
get_neon_register(Vm, src);
for (int i = 0; i < 4; i++) {
src[i] &= ~0x80000000;
}
set_q_register(Vd, src);
set_neon_register(Vd, src);
} else {
// signed integer
switch (size) {
case Neon8: {
int8_t src[16];
get_q_register(Vm, src);
for (int i = 0; i < 16; i++) {
src[i] = std::abs(src[i]);
}
set_q_register(Vd, src);
case Neon8:
Abs<int8_t, kSimd128Size>(this, Vd, Vm);
break;
}
case Neon16: {
int16_t src[8];
get_q_register(Vm, src);
for (int i = 0; i < 8; i++) {
src[i] = std::abs(src[i]);
}
set_q_register(Vd, src);
case Neon16:
Abs<int16_t, kSimd128Size>(this, Vd, Vm);
break;
}
case Neon32: {
int32_t src[4];
get_q_register(Vm, src);
for (int i = 0; i < 4; i++) {
src[i] = std::abs(src[i]);
}
set_q_register(Vd, src);
case Neon32:
Abs<int32_t, kSimd128Size>(this, Vd, Vm);
break;
}
default:
UNIMPLEMENTED();
break;
......@@ -5368,40 +5225,23 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
if (instr->Bit(10) != 0) {
// floating point (toggle sign bits)
uint32_t src[4];
get_q_register(Vm, src);
get_neon_register(Vm, src);
for (int i = 0; i < 4; i++) {
src[i] ^= 0x80000000;
}
set_q_register(Vd, src);
set_neon_register(Vd, src);
} else {
// signed integer
switch (size) {
case Neon8: {
int8_t src[16];
get_q_register(Vm, src);
for (int i = 0; i < 16; i++) {
src[i] = -src[i];
}
set_q_register(Vd, src);
case Neon8:
Neg<int8_t, kSimd128Size>(this, Vd, Vm);
break;
}
case Neon16:
int16_t src[8];
get_q_register(Vm, src);
for (int i = 0; i < 8; i++) {
src[i] = -src[i];
}
set_q_register(Vd, src);
Neg<int16_t, kSimd128Size>(this, Vd, Vm);
break;
case Neon32: {
int32_t src[4];
get_q_register(Vm, src);
for (int i = 0; i < 4; i++) {
src[i] = -src[i];
}
set_q_register(Vd, src);
case Neon32:
Neg<int32_t, kSimd128Size>(this, Vd, Vm);
break;
}
default:
UNIMPLEMENTED();
break;
......@@ -5415,7 +5255,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
uint32_t src[4];
get_q_register(Vm, src);
get_neon_register(Vm, src);
if (instr->Bit(7) == 0) {
for (int i = 0; i < 4; i++) {
float denom = bit_cast<float>(src[i]);
......@@ -5433,7 +5273,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
src[i] = bit_cast<uint32_t>(result);
}
}
set_q_register(Vd, src);
set_neon_register(Vd, src);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 &&
instr->Bits(7, 6) != 0) {
// vqmovn.<type><size> Dd, Qm.
......@@ -5481,33 +5321,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
int Vm = instr->VFPMRegValue(kSimd128Precision);
NeonSize ns = static_cast<NeonSize>(size / 16);
switch (ns) {
case Neon8: {
uint8_t src[16];
get_q_register(Vm, src);
for (int i = 0; i < 16; i++) {
src[i] >>= shift;
}
set_q_register(Vd, src);
case Neon8:
ShiftRight<uint8_t, kSimd128Size>(this, Vd, Vm, shift);
break;
}
case Neon16: {
uint16_t src[8];
get_q_register(Vm, src);
for (int i = 0; i < 8; i++) {
src[i] >>= shift;
}
set_q_register(Vd, src);
case Neon16:
ShiftRight<uint16_t, kSimd128Size>(this, Vd, Vm, shift);
break;
}
case Neon32: {
uint32_t src[4];
get_q_register(Vm, src);
for (int i = 0; i < 4; i++) {
src[i] >>= shift;
}
set_q_register(Vd, src);
case Neon32:
ShiftRight<uint32_t, kSimd128Size>(this, Vd, Vm, shift);
break;
}
default:
UNREACHABLE();
break;
......
......@@ -154,14 +154,10 @@ class Simulator {
void get_d_register(int dreg, uint32_t* value);
void set_d_register(int dreg, const uint32_t* value);
// Support for NEON.
template <typename T>
void get_d_register(int dreg, T* value);
template <typename T>
void set_d_register(int dreg, const T* value);
template <typename T>
void get_q_register(int qreg, T* value);
template <typename T>
void set_q_register(int qreg, const T* value);
template <typename T, int SIZE = kSimd128Size>
void get_neon_register(int reg, T (&value)[SIZE / sizeof(T)]);
template <typename T, int SIZE = kSimd128Size>
void set_neon_register(int reg, const T (&value)[SIZE / sizeof(T)]);
void set_s_register(int reg, unsigned int value);
unsigned int get_s_register(int reg) const;
......
......@@ -1318,12 +1318,16 @@ TEST(15) {
uint32_t vext[4];
uint32_t vzip8a[4], vzip8b[4], vzip16a[4], vzip16b[4], vzip32a[4],
vzip32b[4];
uint32_t vzipd8a[2], vzipd8b[2], vzipd16a[2], vzipd16b[2];
uint32_t vuzp8a[4], vuzp8b[4], vuzp16a[4], vuzp16b[4], vuzp32a[4],
vuzp32b[4];
uint32_t vuzpd8a[2], vuzpd8b[2], vuzpd16a[2], vuzpd16b[2];
uint32_t vrev64_32[4], vrev64_16[4], vrev64_8[4];
uint32_t vrev32_16[4], vrev32_8[4], vrev16_8[4];
uint32_t vtrn8a[4], vtrn8b[4], vtrn16a[4], vtrn16b[4], vtrn32a[4],
vtrn32b[4];
uint32_t vtrnd8a[2], vtrnd8b[2], vtrnd16a[2], vtrnd16b[2], vtrnd32a[2],
vtrnd32b[2];
uint32_t vtbl[2], vtbx[2];
} T;
T t;
......@@ -1856,7 +1860,7 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vext))));
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
// vzip.
// vzip (q-register).
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ vmov(q1, q0);
......@@ -1882,7 +1886,20 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vzip32b))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vuzp.
// vzip (d-register).
__ vldr(d2, r0, offsetof(T, lane_test));
__ vmov(d0, d2);
__ vmov(d1, d2);
__ vzip(Neon8, d0, d1);
__ vstr(d0, r0, offsetof(T, vzipd8a));
__ vstr(d1, r0, offsetof(T, vzipd8b));
__ vmov(d0, d2);
__ vmov(d1, d2);
__ vzip(Neon16, d0, d1);
__ vstr(d0, r0, offsetof(T, vzipd16a));
__ vstr(d1, r0, offsetof(T, vzipd16b));
// vuzp (q-register).
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ vmov(q1, q0);
......@@ -1908,7 +1925,20 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vuzp32b))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vtrn.
// vuzp (d-register).
__ vldr(d2, r0, offsetof(T, lane_test));
__ vmov(d0, d2);
__ vmov(d1, d2);
__ vuzp(Neon8, d0, d1);
__ vstr(d0, r0, offsetof(T, vuzpd8a));
__ vstr(d1, r0, offsetof(T, vuzpd8b));
__ vmov(d0, d2);
__ vmov(d1, d2);
__ vuzp(Neon16, d0, d1);
__ vstr(d0, r0, offsetof(T, vuzpd16a));
__ vstr(d1, r0, offsetof(T, vuzpd16b));
// vtrn (q-register).
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ vmov(q1, q0);
......@@ -1934,6 +1964,24 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtrn32b))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vtrn (d-register).
__ vldr(d2, r0, offsetof(T, lane_test));
__ vmov(d0, d2);
__ vmov(d1, d2);
__ vtrn(Neon8, d0, d1);
__ vstr(d0, r0, offsetof(T, vtrnd8a));
__ vstr(d1, r0, offsetof(T, vtrnd8b));
__ vmov(d0, d2);
__ vmov(d1, d2);
__ vtrn(Neon16, d0, d1);
__ vstr(d0, r0, offsetof(T, vtrnd16a));
__ vstr(d1, r0, offsetof(T, vtrnd16b));
__ vmov(d0, d2);
__ vmov(d1, d2);
__ vtrn(Neon32, d0, d1);
__ vstr(d0, r0, offsetof(T, vtrnd32a));
__ vstr(d1, r0, offsetof(T, vtrnd32b));
// vrev64/32/16
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
......@@ -2140,6 +2188,11 @@ TEST(15) {
CHECK_EQ_32X4(vzip32a, 0x03020100u, 0x03020100u, 0x07060504u, 0x07060504u);
CHECK_EQ_32X4(vzip32b, 0x0b0a0908u, 0x0b0a0908u, 0x0f0e0d0cu, 0x0f0e0d0cu);
CHECK_EQ_32X2(vzipd8a, 0x01010000u, 0x03030202u);
CHECK_EQ_32X2(vzipd8b, 0x05050404u, 0x07070606u);
CHECK_EQ_32X2(vzipd16a, 0x01000100u, 0x03020302u);
CHECK_EQ_32X2(vzipd16b, 0x05040504u, 0x07060706u);
CHECK_EQ_32X4(vuzp8a, 0x06040200u, 0x0e0c0a08u, 0x06040200u, 0x0e0c0a08u);
CHECK_EQ_32X4(vuzp8b, 0x07050301u, 0x0f0d0b09u, 0x07050301u, 0x0f0d0b09u);
CHECK_EQ_32X4(vuzp16a, 0x05040100u, 0x0d0c0908u, 0x05040100u, 0x0d0c0908u);
......@@ -2147,6 +2200,11 @@ TEST(15) {
CHECK_EQ_32X4(vuzp32a, 0x03020100u, 0x0b0a0908u, 0x03020100u, 0x0b0a0908u);
CHECK_EQ_32X4(vuzp32b, 0x07060504u, 0x0f0e0d0cu, 0x07060504u, 0x0f0e0d0cu);
CHECK_EQ_32X2(vuzpd8a, 0x06040200u, 0x06040200u);
CHECK_EQ_32X2(vuzpd8b, 0x07050301u, 0x07050301u);
CHECK_EQ_32X2(vuzpd16a, 0x05040100u, 0x05040100u);
CHECK_EQ_32X2(vuzpd16b, 0x07060302u, 0x07060302u);
CHECK_EQ_32X4(vtrn8a, 0x02020000u, 0x06060404u, 0x0a0a0808u, 0x0e0e0c0cu);
CHECK_EQ_32X4(vtrn8b, 0x03030101u, 0x07070505u, 0x0b0b0909u, 0x0f0f0d0du);
CHECK_EQ_32X4(vtrn16a, 0x01000100u, 0x05040504u, 0x09080908u, 0x0d0c0d0cu);
......@@ -2154,6 +2212,13 @@ TEST(15) {
CHECK_EQ_32X4(vtrn32a, 0x03020100u, 0x03020100u, 0x0b0a0908u, 0x0b0a0908u);
CHECK_EQ_32X4(vtrn32b, 0x07060504u, 0x07060504u, 0x0f0e0d0cu, 0x0f0e0d0cu);
CHECK_EQ_32X2(vtrnd8a, 0x02020000u, 0x06060404u);
CHECK_EQ_32X2(vtrnd8b, 0x03030101u, 0x07070505u);
CHECK_EQ_32X2(vtrnd16a, 0x01000100u, 0x05040504u);
CHECK_EQ_32X2(vtrnd16b, 0x03020302u, 0x07060706u);
CHECK_EQ_32X2(vtrnd32a, 0x03020100u, 0x03020100u);
CHECK_EQ_32X2(vtrnd32b, 0x07060504u, 0x07060504u);
// src: 0 1 2 3 4 5 6 7 8 9 a b c d e f (little endian)
CHECK_EQ_32X4(vrev64_32, 0x07060504u, 0x03020100u, 0x0f0e0d0cu,
0x0b0a0908u);
......
......@@ -1142,12 +1142,18 @@ TEST(Neon) {
"f350e170 vbsl q15, q0, q8");
COMPARE(vext(q15, q0, q8, 3),
"f2f0e360 vext.8 q15, q0, q8, #3");
COMPARE(vzip(Neon16, d15, d0),
"f3b6f180 vzip.16 d15, d0");
COMPARE(vzip(Neon16, q15, q0),
"f3f6e1c0 vzip.16 q15, q0");
COMPARE(vuzp(Neon16, d15, d0),
"f3b6f100 vuzp.16 d15, d0");
COMPARE(vuzp(Neon16, q15, q0),
"f3f6e140 vuzp.16 q15, q0");
COMPARE(vrev64(Neon8, q15, q0),
"f3f0e040 vrev64.8 q15, q0");
COMPARE(vtrn(Neon16, d15, d0),
"f3b6f080 vtrn.16 d15, d0");
COMPARE(vtrn(Neon16, q15, q0),
"f3f6e0c0 vtrn.16 q15, q0");
COMPARE(vtbl(d0, NeonListOperand(d1, 1), d2),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment