Commit 5f7e6331 authored by bbudge's avatar bbudge Committed by Commit bot

[ARM] Implement D-register versions of vzip, vuzp, and vtrn.

LOG=N
BUG=v8:6020

Review-Url: https://codereview.chromium.org/2797923006
Cr-Original-Commit-Position: refs/heads/master@{#44536}
Committed: https://chromium.googlesource.com/v8/v8/+/6588187ae3acaa5b40762c539ee9fe355551bea3
Review-Url: https://codereview.chromium.org/2797923006
Cr-Commit-Position: refs/heads/master@{#44540}
parent 41b22805
......@@ -4084,6 +4084,16 @@ void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) {
enum NeonRegType { NEON_D, NEON_Q };
void NeonSplitCode(NeonRegType type, int code, int* vm, int* m, int* encoding) {
if (type == NEON_D) {
DwVfpRegister::split_code(code, vm, m);
} else {
DCHECK_EQ(type, NEON_Q);
QwNeonRegister::split_code(code, vm, m);
*encoding |= B6;
}
}
enum UnaryOp { VMVN, VSWP, VABS, VABSF, VNEG, VNEGF };
static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
......@@ -4116,16 +4126,11 @@ static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
UNREACHABLE();
break;
}
int vd, d, vm, m;
if (reg_type == NEON_Q) {
op_encoding |= B6;
QwNeonRegister::split_code(dst_code, &vd, &d);
QwNeonRegister::split_code(src_code, &vm, &m);
} else {
DCHECK_EQ(reg_type, NEON_D);
DwVfpRegister::split_code(dst_code, &vd, &d);
DwVfpRegister::split_code(src_code, &vm, &m);
}
int vd, d;
NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
int vm, m;
NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | vd * B12 | m * B5 |
vm | op_encoding;
}
......@@ -4215,18 +4220,13 @@ static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, NeonRegType reg_type,
UNREACHABLE();
break;
}
int vd, d, vn, n, vm, m;
if (reg_type == NEON_Q) {
op_encoding |= B6;
QwNeonRegister::split_code(dst_code, &vd, &d);
QwNeonRegister::split_code(src_code1, &vn, &n);
QwNeonRegister::split_code(src_code2, &vm, &m);
} else {
DCHECK_EQ(reg_type, NEON_D);
DwVfpRegister::split_code(dst_code, &vd, &d);
DwVfpRegister::split_code(src_code1, &vn, &n);
DwVfpRegister::split_code(src_code2, &vm, &m);
}
int vd, d;
NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
int vn, n;
NeonSplitCode(reg_type, src_code1, &vn, &n, &op_encoding);
int vm, m;
NeonSplitCode(reg_type, src_code2, &vm, &m, &op_encoding);
return 0x1E4U * B23 | op_encoding | d * B22 | vn * B16 | vd * B12 | B8 |
n * B7 | m * B5 | B4 | vm;
}
......@@ -4710,8 +4710,8 @@ void Assembler::vext(QwNeonRegister dst, QwNeonRegister src1,
enum NeonSizedOp { VZIP, VUZP, VREV16, VREV32, VREV64, VTRN };
static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonSize size,
QwNeonRegister dst, QwNeonRegister src) {
static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonRegType reg_type,
NeonSize size, int dst_code, int src_code) {
int op_encoding = 0;
switch (op) {
case VZIP:
......@@ -4737,54 +4737,76 @@ static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonSize size,
break;
}
int vd, d;
dst.split_code(&vd, &d);
NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
int vm, m;
src.split_code(&vm, &m);
NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
int sz = static_cast<int>(size);
return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | B6 |
m * B5 | vm | op_encoding;
return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | m * B5 |
vm | op_encoding;
}
void Assembler::vzip(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
DCHECK(IsEnabled(NEON));
// vzip.<size>(Dn, Dm) SIMD zip (interleave).
// Instruction details available in ARM DDI 0406C.b, A8-1102.
emit(EncodeNeonSizedOp(VZIP, NEON_D, size, src1.code(), src2.code()));
}
void Assembler::vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vzip.<size>(Qn, Qm) SIMD zip (interleave).
// vzip.<size>(Qn, Qm) SIMD zip (interleave).
// Instruction details available in ARM DDI 0406C.b, A8-1102.
emit(EncodeNeonSizedOp(VZIP, size, src1, src2));
emit(EncodeNeonSizedOp(VZIP, NEON_Q, size, src1.code(), src2.code()));
}
void Assembler::vuzp(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
DCHECK(IsEnabled(NEON));
// vuzp.<size>(Dn, Dm) SIMD un-zip (de-interleave).
// Instruction details available in ARM DDI 0406C.b, A8-1100.
emit(EncodeNeonSizedOp(VUZP, NEON_D, size, src1.code(), src2.code()));
}
void Assembler::vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vuzp.<size>(Qn, Qm) SIMD un-zip (de-interleave).
// vuzp.<size>(Qn, Qm) SIMD un-zip (de-interleave).
// Instruction details available in ARM DDI 0406C.b, A8-1100.
emit(EncodeNeonSizedOp(VUZP, size, src1, src2));
emit(EncodeNeonSizedOp(VUZP, NEON_Q, size, src1.code(), src2.code()));
}
void Assembler::vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
DCHECK(IsEnabled(NEON));
// Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse.
// Qd = vrev16.<size>(Qm) SIMD element reverse.
// Instruction details available in ARM DDI 0406C.b, A8-1028.
emit(EncodeNeonSizedOp(VREV16, size, dst, src));
emit(EncodeNeonSizedOp(VREV16, NEON_Q, size, dst.code(), src.code()));
}
void Assembler::vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
DCHECK(IsEnabled(NEON));
// Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse.
// Qd = vrev32.<size>(Qm) SIMD element reverse.
// Instruction details available in ARM DDI 0406C.b, A8-1028.
emit(EncodeNeonSizedOp(VREV32, size, dst, src));
emit(EncodeNeonSizedOp(VREV32, NEON_Q, size, dst.code(), src.code()));
}
void Assembler::vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
DCHECK(IsEnabled(NEON));
// Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse.
// Qd = vrev64.<size>(Qm) SIMD element reverse.
// Instruction details available in ARM DDI 0406C.b, A8-1028.
emit(EncodeNeonSizedOp(VREV64, size, dst, src));
emit(EncodeNeonSizedOp(VREV64, NEON_Q, size, dst.code(), src.code()));
}
void Assembler::vtrn(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
DCHECK(IsEnabled(NEON));
// vtrn.<size>(Dn, Dm) SIMD element transpose.
// Instruction details available in ARM DDI 0406C.b, A8-1096.
emit(EncodeNeonSizedOp(VTRN, NEON_D, size, src1.code(), src2.code()));
}
void Assembler::vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse.
// vtrn.<size>(Qn, Qm) SIMD element transpose.
// Instruction details available in ARM DDI 0406C.b, A8-1096.
emit(EncodeNeonSizedOp(VTRN, size, src1, src2));
emit(EncodeNeonSizedOp(VTRN, NEON_Q, size, src1.code(), src2.code()));
}
// Encode NEON vtbl / vtbx instruction.
......
......@@ -1396,11 +1396,14 @@ class Assembler : public AssemblerBase {
QwNeonRegister src2);
void vext(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2,
int bytes);
void vzip(NeonSize size, DwVfpRegister src1, DwVfpRegister src2);
void vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2);
void vuzp(NeonSize size, DwVfpRegister src1, DwVfpRegister src2);
void vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2);
void vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src);
void vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src);
void vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src);
void vtrn(NeonSize size, DwVfpRegister src1, DwVfpRegister src2);
void vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2);
void vtbl(DwVfpRegister dst, const NeonListOperand& list,
DwVfpRegister index);
......
......@@ -2167,33 +2167,56 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmovl.u%d q%d, d%d", imm3 * 8, Vd, Vm);
} else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) {
if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) {
if (instr->Bit(6) == 0) {
if (instr->Bits(11, 7) == 0x18) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
int index = instr->Bit(19);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vdup q%d, d%d[%d]", Vd, Vm, index);
} else if (instr->Bits(11, 10) == 0x2) {
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vn = instr->VFPNRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
int len = instr->Bits(9, 8);
NeonListOperand list(DwVfpRegister::from_code(Vn), len + 1);
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s d%d, ",
instr->Bit(6) == 0 ? "vtbl.8" : "vtbx.8", Vd);
FormatNeonList(Vn, list.type());
Print(", ");
PrintDRegister(Vm);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 &&
instr->Bits(7, 6) != 0) {
// vqmovn.<type><size> Dd, Qm.
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
char type = instr->Bit(6) != 0 ? 'u' : 's';
int size = 2 * kBitsPerByte * (1 << instr->Bits(19, 18));
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vqmovn.%c%i d%d, q%d",
type, size, Vd, Vm);
} else {
int Vd, Vm;
if (instr->Bit(6) == 0) {
Vd = instr->VFPDRegValue(kDoublePrecision);
Vm = instr->VFPMRegValue(kDoublePrecision);
} else {
Vd = instr->VFPDRegValue(kSimd128Precision);
Vm = instr->VFPMRegValue(kSimd128Precision);
}
if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) {
if (instr->Bit(6) == 0) {
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vswp d%d, d%d", Vd, Vm);
} else {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vswp q%d, q%d", Vd, Vm);
}
} else if (instr->Bits(11, 7) == 0x18) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
int index = instr->Bit(19);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vdup q%d, d%d[%d]", Vd, Vm, index);
} else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vmvn q%d, q%d", Vd, Vm);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmvn q%d, q%d", Vd, Vm);
} else if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 &&
instr->Bit(6) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
const char* suffix = nullptr;
int op = instr->Bits(8, 7);
switch (op) {
......@@ -2212,48 +2235,39 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
}
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vcvt.%s q%d, q%d", suffix, Vd, Vm);
} else if (instr->Bits(11, 10) == 0x2) {
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vn = instr->VFPNRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
int len = instr->Bits(9, 8);
NeonListOperand list(DwVfpRegister::from_code(Vn), len + 1);
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s d%d, ",
instr->Bit(6) == 0 ? "vtbl.8" : "vtbx.8", Vd);
FormatNeonList(Vn, list.type());
Print(", ");
PrintDRegister(Vm);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 &&
instr->Bit(6) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1) {
int size = kBitsPerByte * (1 << instr->Bits(19, 18));
const char* op = instr->Bit(7) != 0 ? "vzip" : "vuzp";
if (instr->Bit(6) == 0) {
// vzip/vuzp.<size> Dd, Dm.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.%d d%d, d%d", op, size, Vd, Vm);
} else {
// vzip/vuzp.<size> Qd, Qm.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.%d q%d, q%d", op, size, Vd, Vm);
}
} else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0 &&
instr->Bit(6) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int size = kBitsPerByte * (1 << instr->Bits(19, 18));
int op = kBitsPerByte
<< (static_cast<int>(Neon64) - instr->Bits(8, 7));
// vrev<op>.<size> Qd, Qm.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vrev%d.%d q%d, q%d", op, size, Vd, Vm);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0x1) {
int size = kBitsPerByte * (1 << instr->Bits(19, 18));
if (instr->Bit(6) == 0) {
// vtrn.<size> Dd, Dm.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vtrn.%d d%d, d%d", size, Vd, Vm);
} else {
// vtrn.<size> Qd, Qm.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vtrn.%d q%d, q%d", size, Vd, Vm);
}
} else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0 &&
instr->Bit(6) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int size = kBitsPerByte * (1 << instr->Bits(19, 18));
char type = instr->Bit(10) != 0 ? 'f' : 's';
if (instr->Bits(9, 6) == 0xd) {
......@@ -2272,24 +2286,13 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
} else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5 &&
instr->Bit(6) == 1) {
// vrecpe/vrsqrte.f32 Qd, Qm.
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
const char* op = instr->Bit(7) == 0 ? "vrecpe" : "vrsqrte";
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.f32 q%d, q%d", op, Vd, Vm);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 &&
instr->Bits(7, 6) != 0) {
// vqmovn.<type><size> Dd, Qm.
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
char type = instr->Bit(6) != 0 ? 'u' : 's';
int size = 2 * kBitsPerByte * (1 << instr->Bits(19, 18));
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vqmovn.%c%i d%d, q%d",
type, size, Vd, Vm);
} else {
Unknown(instr);
}
}
} else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1 &&
instr->Bit(6) == 1) {
// vshr.u<size> Qd, Qm, shift
......
This diff is collapsed.
......@@ -154,14 +154,10 @@ class Simulator {
void get_d_register(int dreg, uint32_t* value);
void set_d_register(int dreg, const uint32_t* value);
// Support for NEON.
template <typename T>
void get_d_register(int dreg, T* value);
template <typename T>
void set_d_register(int dreg, const T* value);
template <typename T>
void get_q_register(int qreg, T* value);
template <typename T>
void set_q_register(int qreg, const T* value);
template <typename T, int SIZE = kSimd128Size>
void get_neon_register(int reg, T (&value)[SIZE / sizeof(T)]);
template <typename T, int SIZE = kSimd128Size>
void set_neon_register(int reg, const T (&value)[SIZE / sizeof(T)]);
void set_s_register(int reg, unsigned int value);
unsigned int get_s_register(int reg) const;
......
......@@ -1318,12 +1318,16 @@ TEST(15) {
uint32_t vext[4];
uint32_t vzip8a[4], vzip8b[4], vzip16a[4], vzip16b[4], vzip32a[4],
vzip32b[4];
uint32_t vzipd8a[2], vzipd8b[2], vzipd16a[2], vzipd16b[2];
uint32_t vuzp8a[4], vuzp8b[4], vuzp16a[4], vuzp16b[4], vuzp32a[4],
vuzp32b[4];
uint32_t vuzpd8a[2], vuzpd8b[2], vuzpd16a[2], vuzpd16b[2];
uint32_t vrev64_32[4], vrev64_16[4], vrev64_8[4];
uint32_t vrev32_16[4], vrev32_8[4], vrev16_8[4];
uint32_t vtrn8a[4], vtrn8b[4], vtrn16a[4], vtrn16b[4], vtrn32a[4],
vtrn32b[4];
uint32_t vtrnd8a[2], vtrnd8b[2], vtrnd16a[2], vtrnd16b[2], vtrnd32a[2],
vtrnd32b[2];
uint32_t vtbl[2], vtbx[2];
} T;
T t;
......@@ -1856,7 +1860,7 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vext))));
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
// vzip.
// vzip (q-register).
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ vmov(q1, q0);
......@@ -1882,7 +1886,20 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vzip32b))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vuzp.
// vzip (d-register).
__ vldr(d2, r0, offsetof(T, lane_test));
__ vmov(d0, d2);
__ vmov(d1, d2);
__ vzip(Neon8, d0, d1);
__ vstr(d0, r0, offsetof(T, vzipd8a));
__ vstr(d1, r0, offsetof(T, vzipd8b));
__ vmov(d0, d2);
__ vmov(d1, d2);
__ vzip(Neon16, d0, d1);
__ vstr(d0, r0, offsetof(T, vzipd16a));
__ vstr(d1, r0, offsetof(T, vzipd16b));
// vuzp (q-register).
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ vmov(q1, q0);
......@@ -1908,7 +1925,20 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vuzp32b))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vtrn.
// vuzp (d-register).
__ vldr(d2, r0, offsetof(T, lane_test));
__ vmov(d0, d2);
__ vmov(d1, d2);
__ vuzp(Neon8, d0, d1);
__ vstr(d0, r0, offsetof(T, vuzpd8a));
__ vstr(d1, r0, offsetof(T, vuzpd8b));
__ vmov(d0, d2);
__ vmov(d1, d2);
__ vuzp(Neon16, d0, d1);
__ vstr(d0, r0, offsetof(T, vuzpd16a));
__ vstr(d1, r0, offsetof(T, vuzpd16b));
// vtrn (q-register).
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ vmov(q1, q0);
......@@ -1934,6 +1964,24 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtrn32b))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vtrn (d-register).
__ vldr(d2, r0, offsetof(T, lane_test));
__ vmov(d0, d2);
__ vmov(d1, d2);
__ vtrn(Neon8, d0, d1);
__ vstr(d0, r0, offsetof(T, vtrnd8a));
__ vstr(d1, r0, offsetof(T, vtrnd8b));
__ vmov(d0, d2);
__ vmov(d1, d2);
__ vtrn(Neon16, d0, d1);
__ vstr(d0, r0, offsetof(T, vtrnd16a));
__ vstr(d1, r0, offsetof(T, vtrnd16b));
__ vmov(d0, d2);
__ vmov(d1, d2);
__ vtrn(Neon32, d0, d1);
__ vstr(d0, r0, offsetof(T, vtrnd32a));
__ vstr(d1, r0, offsetof(T, vtrnd32b));
// vrev64/32/16
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
......@@ -2140,6 +2188,11 @@ TEST(15) {
CHECK_EQ_32X4(vzip32a, 0x03020100u, 0x03020100u, 0x07060504u, 0x07060504u);
CHECK_EQ_32X4(vzip32b, 0x0b0a0908u, 0x0b0a0908u, 0x0f0e0d0cu, 0x0f0e0d0cu);
CHECK_EQ_32X2(vzipd8a, 0x01010000u, 0x03030202u);
CHECK_EQ_32X2(vzipd8b, 0x05050404u, 0x07070606u);
CHECK_EQ_32X2(vzipd16a, 0x01000100u, 0x03020302u);
CHECK_EQ_32X2(vzipd16b, 0x05040504u, 0x07060706u);
CHECK_EQ_32X4(vuzp8a, 0x06040200u, 0x0e0c0a08u, 0x06040200u, 0x0e0c0a08u);
CHECK_EQ_32X4(vuzp8b, 0x07050301u, 0x0f0d0b09u, 0x07050301u, 0x0f0d0b09u);
CHECK_EQ_32X4(vuzp16a, 0x05040100u, 0x0d0c0908u, 0x05040100u, 0x0d0c0908u);
......@@ -2147,6 +2200,11 @@ TEST(15) {
CHECK_EQ_32X4(vuzp32a, 0x03020100u, 0x0b0a0908u, 0x03020100u, 0x0b0a0908u);
CHECK_EQ_32X4(vuzp32b, 0x07060504u, 0x0f0e0d0cu, 0x07060504u, 0x0f0e0d0cu);
CHECK_EQ_32X2(vuzpd8a, 0x06040200u, 0x06040200u);
CHECK_EQ_32X2(vuzpd8b, 0x07050301u, 0x07050301u);
CHECK_EQ_32X2(vuzpd16a, 0x05040100u, 0x05040100u);
CHECK_EQ_32X2(vuzpd16b, 0x07060302u, 0x07060302u);
CHECK_EQ_32X4(vtrn8a, 0x02020000u, 0x06060404u, 0x0a0a0808u, 0x0e0e0c0cu);
CHECK_EQ_32X4(vtrn8b, 0x03030101u, 0x07070505u, 0x0b0b0909u, 0x0f0f0d0du);
CHECK_EQ_32X4(vtrn16a, 0x01000100u, 0x05040504u, 0x09080908u, 0x0d0c0d0cu);
......@@ -2154,6 +2212,13 @@ TEST(15) {
CHECK_EQ_32X4(vtrn32a, 0x03020100u, 0x03020100u, 0x0b0a0908u, 0x0b0a0908u);
CHECK_EQ_32X4(vtrn32b, 0x07060504u, 0x07060504u, 0x0f0e0d0cu, 0x0f0e0d0cu);
CHECK_EQ_32X2(vtrnd8a, 0x02020000u, 0x06060404u);
CHECK_EQ_32X2(vtrnd8b, 0x03030101u, 0x07070505u);
CHECK_EQ_32X2(vtrnd16a, 0x01000100u, 0x05040504u);
CHECK_EQ_32X2(vtrnd16b, 0x03020302u, 0x07060706u);
CHECK_EQ_32X2(vtrnd32a, 0x03020100u, 0x03020100u);
CHECK_EQ_32X2(vtrnd32b, 0x07060504u, 0x07060504u);
// src: 0 1 2 3 4 5 6 7 8 9 a b c d e f (little endian)
CHECK_EQ_32X4(vrev64_32, 0x07060504u, 0x03020100u, 0x0f0e0d0cu,
0x0b0a0908u);
......
......@@ -1142,12 +1142,18 @@ TEST(Neon) {
"f350e170 vbsl q15, q0, q8");
COMPARE(vext(q15, q0, q8, 3),
"f2f0e360 vext.8 q15, q0, q8, #3");
COMPARE(vzip(Neon16, d15, d0),
"f3b6f180 vzip.16 d15, d0");
COMPARE(vzip(Neon16, q15, q0),
"f3f6e1c0 vzip.16 q15, q0");
COMPARE(vuzp(Neon16, d15, d0),
"f3b6f100 vuzp.16 d15, d0");
COMPARE(vuzp(Neon16, q15, q0),
"f3f6e140 vuzp.16 q15, q0");
COMPARE(vrev64(Neon8, q15, q0),
"f3f0e040 vrev64.8 q15, q0");
COMPARE(vtrn(Neon16, d15, d0),
"f3b6f080 vtrn.16 d15, d0");
COMPARE(vtrn(Neon16, q15, q0),
"f3f6e0c0 vtrn.16 q15, q0");
COMPARE(vtbl(d0, NeonListOperand(d1, 1), d2),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment