Commit a75f7cd3 authored by bbudge's avatar bbudge Committed by Commit bot

[ARM] Implement more NEON permutation instructions.

- Implements vuzp, vtrn instructions for q-registers.
- Refactors vmvn, vswp to use common unary op helper fn.

LOG=N
BUG=v8:6020

Review-Url: https://codereview.chromium.org/2739033002
Cr-Commit-Position: refs/heads/master@{#43795}
parent af63679d
This diff is collapsed.
......@@ -180,11 +180,14 @@ struct SwVfpRegister {
SwVfpRegister r = {code};
return r;
}
void split_code(int* vm, int* m) const {
DCHECK(is_valid());
static void split_code(int reg_code, int* vm, int* m) {
DCHECK(from_code(reg_code).is_valid());
*m = reg_code & 0x1;
*vm = reg_code >> 1;
}
void split_code(int* vm, int* m) const {
split_code(reg_code, vm, m);
}
int reg_code;
};
......@@ -226,11 +229,14 @@ struct DwVfpRegister {
DwVfpRegister r = {code};
return r;
}
void split_code(int* vm, int* m) const {
DCHECK(is_valid());
static void split_code(int reg_code, int* vm, int* m) {
DCHECK(from_code(reg_code).is_valid());
*m = (reg_code & 0x10) >> 4;
*vm = reg_code & 0x0F;
}
void split_code(int* vm, int* m) const {
split_code(reg_code, vm, m);
}
int reg_code;
};
......@@ -296,12 +302,15 @@ struct QwNeonRegister {
DCHECK(is_valid());
return reg_code;
}
void split_code(int* vm, int* m) const {
DCHECK(is_valid());
static void split_code(int reg_code, int* vm, int* m) {
DCHECK(from_code(reg_code).is_valid());
int encoded_code = reg_code << 1;
*m = (encoded_code & 0x10) >> 4;
*vm = encoded_code & 0x0F;
}
void split_code(int* vm, int* m) const {
split_code(reg_code, vm, m);
}
DwVfpRegister low() const {
DwVfpRegister reg;
reg.reg_code = reg_code * 2;
......@@ -1315,27 +1324,27 @@ class Assembler : public AssemblerBase {
void vmov(NeonDataType dt, DwVfpRegister dst, int index, Register src);
void vmov(NeonDataType dt, Register dst, DwVfpRegister src, int index);
void vmov(const QwNeonRegister dst, const QwNeonRegister src);
void vmvn(const QwNeonRegister dst, const QwNeonRegister src);
void vmov(QwNeonRegister dst, QwNeonRegister src);
void vdup(NeonSize size, QwNeonRegister dst, Register src);
void vdup(QwNeonRegister dst, SwVfpRegister src);
void vcvt_f32_s32(QwNeonRegister dst, QwNeonRegister src);
void vcvt_f32_u32(QwNeonRegister dst, QwNeonRegister src);
void vcvt_s32_f32(QwNeonRegister dst, QwNeonRegister src);
void vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src);
void vmvn(QwNeonRegister dst, QwNeonRegister src);
void vswp(DwVfpRegister dst, DwVfpRegister src);
void vswp(QwNeonRegister dst, QwNeonRegister src);
// vdup conditional execution isn't supported.
void vdup(NeonSize size, const QwNeonRegister dst, const Register src);
void vdup(const QwNeonRegister dst, const SwVfpRegister src);
void vcvt_f32_s32(const QwNeonRegister dst, const QwNeonRegister src);
void vcvt_f32_u32(const QwNeonRegister dst, const QwNeonRegister src);
void vcvt_s32_f32(const QwNeonRegister dst, const QwNeonRegister src);
void vcvt_u32_f32(const QwNeonRegister dst, const QwNeonRegister src);
void vabs(const QwNeonRegister dst, const QwNeonRegister src);
void vabs(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src);
void vneg(const QwNeonRegister dst, const QwNeonRegister src);
void vneg(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src);
void veor(DwVfpRegister dst, DwVfpRegister src1, DwVfpRegister src2);
void vabs(QwNeonRegister dst, QwNeonRegister src);
void vabs(NeonSize size, QwNeonRegister dst, QwNeonRegister src);
void vneg(QwNeonRegister dst, QwNeonRegister src);
void vneg(NeonSize size, QwNeonRegister dst, QwNeonRegister src);
void vand(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vbsl(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void veor(DwVfpRegister dst, DwVfpRegister src1, DwVfpRegister src2);
void veor(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vbsl(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vorr(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vadd(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
......@@ -1374,24 +1383,23 @@ class Assembler : public AssemblerBase {
void vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vcge(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vcge(NeonDataType dt, QwNeonRegister dst,
QwNeonRegister src1, QwNeonRegister src2);
void vcge(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vcgt(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vcgt(NeonDataType dt, QwNeonRegister dst,
QwNeonRegister src1, QwNeonRegister src2);
void vext(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2, int bytes);
void vzip(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src);
void vrev16(NeonSize size, const QwNeonRegister dst,
const QwNeonRegister src);
void vrev32(NeonSize size, const QwNeonRegister dst,
const QwNeonRegister src);
void vrev64(NeonSize size, const QwNeonRegister dst,
const QwNeonRegister src);
void vtbl(const DwVfpRegister dst, const NeonListOperand& list,
const DwVfpRegister index);
void vtbx(const DwVfpRegister dst, const NeonListOperand& list,
const DwVfpRegister index);
void vcgt(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vext(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2,
int bytes);
void vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2);
void vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2);
void vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src);
void vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src);
void vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src);
void vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2);
void vtbl(DwVfpRegister dst, const NeonListOperand& list,
DwVfpRegister index);
void vtbx(DwVfpRegister dst, const NeonListOperand& list,
DwVfpRegister index);
// Pseudo instructions
......
......@@ -2225,14 +2225,17 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
FormatNeonList(Vn, list.type());
Print(", ");
PrintDRegister(Vm);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x7) {
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 &&
instr->Bit(6) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int size = kBitsPerByte * (1 << instr->Bits(19, 18));
// vzip.<size> Qd, Qm.
const char* op = instr->Bit(7) != 0 ? "vzip" : "vuzp";
// vzip/vuzp.<size> Qd, Qm.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vzip.%d q%d, q%d", size, Vd, Vm);
} else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) {
"%s.%d q%d, q%d", op, size, Vd, Vm);
} else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0 &&
instr->Bit(6) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int size = kBitsPerByte * (1 << instr->Bits(19, 18));
......@@ -2241,7 +2244,15 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
// vrev<op>.<size> Qd, Qm.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vrev%d.%d q%d, q%d", op, size, Vd, Vm);
} else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) {
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int size = kBitsPerByte * (1 << instr->Bits(19, 18));
// vtrn.<size> Qd, Qm.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vtrn.%d q%d, q%d", size, Vd, Vm);
} else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0 &&
instr->Bit(6) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int size = kBitsPerByte * (1 << instr->Bits(19, 18));
......@@ -2259,7 +2270,8 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
} else {
Unknown(instr);
}
} else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5) {
} else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5 &&
instr->Bit(6) == 1) {
// vrecpe/vrsqrte.f32 Qd, Qm.
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
......@@ -2269,7 +2281,8 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
} else {
Unknown(instr);
}
} else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) {
} else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1 &&
instr->Bit(6) == 1) {
// vshr.u<size> Qd, Qm, shift
int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16));
int shift = 2 * size - instr->Bits(21, 16);
......
......@@ -5043,57 +5043,109 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
}
}
set_d_register(vd, &result);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x7) {
// vzip.<size> Qd, Qm.
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 &&
instr->Bit(6) == 1) {
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16], dst1[16], dst2[16];
get_q_register(Vd, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
dst1[i * 2] = src1[i];
dst1[i * 2 + 1] = src2[i];
dst2[i * 2] = src1[i + 8];
dst2[i * 2 + 1] = src2[i + 8];
if (instr->Bit(7) == 1) {
// vzip.<size> Qd, Qm.
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16], dst1[16], dst2[16];
get_q_register(Vd, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
dst1[i * 2] = src1[i];
dst1[i * 2 + 1] = src2[i];
dst2[i * 2] = src1[i + 8];
dst2[i * 2 + 1] = src2[i + 8];
}
set_q_register(Vd, dst1);
set_q_register(Vm, dst2);
break;
}
set_q_register(Vd, dst1);
set_q_register(Vm, dst2);
break;
}
case Neon16: {
uint16_t src1[8], src2[8], dst1[8], dst2[8];
get_q_register(Vd, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i += 2) {
dst1[i] = src1[i / 2];
dst1[i + 1] = src2[i / 2];
dst2[i] = src1[i / 2 + 4];
dst2[i + 1] = src2[i / 2 + 4];
case Neon16: {
uint16_t src1[8], src2[8], dst1[8], dst2[8];
get_q_register(Vd, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
dst1[i * 2] = src1[i];
dst1[i * 2 + 1] = src2[i];
dst2[i * 2] = src1[i + 4];
dst2[i * 2 + 1] = src2[i + 4];
}
set_q_register(Vd, dst1);
set_q_register(Vm, dst2);
break;
}
set_q_register(Vd, dst1);
set_q_register(Vm, dst2);
break;
case Neon32: {
uint32_t src1[4], src2[4], dst1[4], dst2[4];
get_q_register(Vd, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 2; i++) {
dst1[i * 2] = src1[i];
dst1[i * 2 + 1] = src2[i];
dst2[i * 2] = src1[i + 2];
dst2[i * 2 + 1] = src2[i + 2];
}
set_q_register(Vd, dst1);
set_q_register(Vm, dst2);
break;
}
default:
UNREACHABLE();
break;
}
case Neon32: {
uint32_t src1[4], src2[4], dst1[4], dst2[4];
get_q_register(Vd, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 2; i++) {
dst1[i * 2] = src1[i];
dst1[i * 2 + 1] = src2[i];
dst2[i * 2] = src1[i + 2];
dst2[i * 2 + 1] = src2[i + 2];
} else {
// vuzp.<size> Qd, Qm.
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16], dst1[16], dst2[16];
get_q_register(Vd, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
dst1[i] = src1[i * 2];
dst1[i + 8] = src2[i * 2];
dst2[i] = src1[i * 2 + 1];
dst2[i + 8] = src2[i * 2 + 1];
}
set_q_register(Vd, dst1);
set_q_register(Vm, dst2);
break;
}
set_q_register(Vd, dst1);
set_q_register(Vm, dst2);
break;
case Neon16: {
uint16_t src1[8], src2[8], dst1[8], dst2[8];
get_q_register(Vd, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
dst1[i] = src1[i * 2];
dst1[i + 4] = src2[i * 2];
dst2[i] = src1[i * 2 + 1];
dst2[i + 4] = src2[i * 2 + 1];
}
set_q_register(Vd, dst1);
set_q_register(Vm, dst2);
break;
}
case Neon32: {
uint32_t src1[4], src2[4], dst1[4], dst2[4];
get_q_register(Vd, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 2; i++) {
dst1[i] = src1[i * 2];
dst1[i + 2] = src2[i * 2];
dst2[i] = src1[i * 2 + 1];
dst2[i + 2] = src2[i * 2 + 1];
}
set_q_register(Vd, dst1);
set_q_register(Vm, dst2);
break;
}
default:
UNREACHABLE();
break;
}
default:
UNREACHABLE();
break;
}
} else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) {
// vrev<op>.size Qd, Qm
......@@ -5180,6 +5232,49 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
UNREACHABLE();
break;
}
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
// vtrn.<size> Qd, Qm.
switch (size) {
case Neon8: {
uint8_t src[16], dst[16];
get_q_register(Vd, dst);
get_q_register(Vm, src);
for (int i = 0; i < 8; i++) {
std::swap(dst[2 * i + 1], src[2 * i]);
}
set_q_register(Vd, dst);
set_q_register(Vm, src);
break;
}
case Neon16: {
uint16_t src[8], dst[8];
get_q_register(Vd, dst);
get_q_register(Vm, src);
for (int i = 0; i < 4; i++) {
std::swap(dst[2 * i + 1], src[2 * i]);
}
set_q_register(Vd, dst);
set_q_register(Vm, src);
break;
}
case Neon32: {
uint32_t src[4], dst[4];
get_q_register(Vd, dst);
get_q_register(Vm, src);
for (int i = 0; i < 2; i++) {
std::swap(dst[2 * i + 1], src[2 * i]);
}
set_q_register(Vd, dst);
set_q_register(Vm, src);
break;
}
default:
UNREACHABLE();
break;
}
} else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
......
......@@ -1320,9 +1320,12 @@ TEST(15) {
uint32_t vext[4];
uint32_t vzip8a[4], vzip8b[4], vzip16a[4], vzip16b[4], vzip32a[4],
vzip32b[4];
uint32_t vuzp8a[4], vuzp8b[4], vuzp16a[4], vuzp16b[4], vuzp32a[4],
vuzp32b[4];
uint32_t vrev64_32[4], vrev64_16[4], vrev64_8[4];
uint32_t vrev32_16[4], vrev32_8[4];
uint32_t vrev16_8[4];
uint32_t vrev32_16[4], vrev32_8[4], vrev16_8[4];
uint32_t vtrn8a[4], vtrn8b[4], vtrn16a[4], vtrn16b[4], vtrn32a[4],
vtrn32b[4];
uint32_t vtbl[2], vtbx[2];
} T;
T t;
......@@ -1867,6 +1870,58 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vzip32b))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vuzp.
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ vmov(q1, q0);
__ vuzp(Neon8, q0, q1);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vuzp8a))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vuzp8b))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ vmov(q1, q0);
__ vuzp(Neon16, q0, q1);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vuzp16a))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vuzp16b))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ vmov(q1, q0);
__ vuzp(Neon32, q0, q1);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vuzp32a))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vuzp32b))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vtrn.
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ vmov(q1, q0);
__ vtrn(Neon8, q0, q1);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtrn8a))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtrn8b))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ vmov(q1, q0);
__ vtrn(Neon16, q0, q1);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtrn16a))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtrn16b))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ vmov(q1, q0);
__ vtrn(Neon32, q0, q1);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtrn32a))));
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtrn32b))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vrev64/32/16
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
......@@ -2074,6 +2129,20 @@ TEST(15) {
CHECK_EQ_32X4(vzip32a, 0x03020100u, 0x03020100u, 0x07060504u, 0x07060504u);
CHECK_EQ_32X4(vzip32b, 0x0b0a0908u, 0x0b0a0908u, 0x0f0e0d0cu, 0x0f0e0d0cu);
CHECK_EQ_32X4(vuzp8a, 0x06040200u, 0x0e0c0a08u, 0x06040200u, 0x0e0c0a08u);
CHECK_EQ_32X4(vuzp8b, 0x07050301u, 0x0f0d0b09u, 0x07050301u, 0x0f0d0b09u);
CHECK_EQ_32X4(vuzp16a, 0x05040100u, 0x0d0c0908u, 0x05040100u, 0x0d0c0908u);
CHECK_EQ_32X4(vuzp16b, 0x07060302u, 0x0f0e0b0au, 0x07060302u, 0x0f0e0b0au);
CHECK_EQ_32X4(vuzp32a, 0x03020100u, 0x0b0a0908u, 0x03020100u, 0x0b0a0908u);
CHECK_EQ_32X4(vuzp32b, 0x07060504u, 0x0f0e0d0cu, 0x07060504u, 0x0f0e0d0cu);
CHECK_EQ_32X4(vtrn8a, 0x02020000u, 0x06060404u, 0x0a0a0808u, 0x0e0e0c0cu);
CHECK_EQ_32X4(vtrn8b, 0x03030101u, 0x07070505u, 0x0b0b0909u, 0x0f0f0d0du);
CHECK_EQ_32X4(vtrn16a, 0x01000100u, 0x05040504u, 0x09080908u, 0x0d0c0d0cu);
CHECK_EQ_32X4(vtrn16b, 0x03020302u, 0x07060706u, 0x0b0a0b0au, 0x0f0e0f0eu);
CHECK_EQ_32X4(vtrn32a, 0x03020100u, 0x03020100u, 0x0b0a0908u, 0x0b0a0908u);
CHECK_EQ_32X4(vtrn32b, 0x07060504u, 0x07060504u, 0x0f0e0d0cu, 0x0f0e0d0cu);
// src: 0 1 2 3 4 5 6 7 8 9 a b c d e f (little endian)
CHECK_EQ_32X4(vrev64_32, 0x07060504u, 0x03020100u, 0x0f0e0d0cu,
0x0b0a0908u);
......
......@@ -1140,8 +1140,12 @@ TEST(Neon) {
"f2f0e360 vext.8 q15, q0, q8, #3");
COMPARE(vzip(Neon16, q15, q0),
"f3f6e1c0 vzip.16 q15, q0");
COMPARE(vuzp(Neon16, q15, q0),
"f3f6e140 vuzp.16 q15, q0");
COMPARE(vrev64(Neon8, q15, q0),
"f3f0e040 vrev64.8 q15, q0");
COMPARE(vtrn(Neon16, q15, q0),
"f3f6e0c0 vtrn.16 q15, q0");
COMPARE(vtbl(d0, NeonListOperand(d1, 1), d2),
"f3b10802 vtbl.8 d0, {d1}, d2");
COMPARE(vtbl(d31, NeonListOperand(d0, 2), d4),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment