Commit 148a903d authored by bbudge's avatar bbudge Committed by Commit bot

[ARM] Add vand, vorr NEON instructions.

LOG=N
BUG=v8:4124

Review-Url: https://codereview.chromium.org/2620343002
Cr-Commit-Position: refs/heads/master@{#42273}
parent ee2f07b6
......@@ -3934,16 +3934,9 @@ void Assembler::vmov(NeonDataType dt, Register dst, DwVfpRegister src,
}
void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) {
DCHECK(IsEnabled(NEON));
// Instruction details available in ARM DDI 0406C.b, A8-938.
DCHECK(VfpRegisterIsAvailable(dst));
DCHECK(VfpRegisterIsAvailable(src));
int vd, d;
dst.split_code(&vd, &d);
int vm, m;
src.split_code(&vm, &m);
emit(0x1E4U * B23 | d * B22 | 2 * B20 | vm * B16 | vd * B12 | B8 | m * B7 |
B6 | m * B5 | B4 | vm);
// vmov is encoded as vorr.
vorr(dst, src, src);
}
void Assembler::vmvn(const QwNeonRegister dst, const QwNeonRegister src) {
......@@ -4138,19 +4131,82 @@ void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
B4 | vm);
}
void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
// Qd = veor(Qn, Qm) SIMD integer exclusive OR.
// Instruction details available in ARM DDI 0406C.b, A8.8.888.
DCHECK(IsEnabled(NEON));
enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN };
static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op,
const QwNeonRegister dst,
const QwNeonRegister src1,
const QwNeonRegister src2) {
int op_encoding = 0;
switch (op) {
case VBIC:
op_encoding = 0x1 * B20;
break;
case VBIF:
op_encoding = B24 | 0x3 * B20;
break;
case VBIT:
op_encoding = B24 | 0x2 * B20;
break;
case VBSL:
op_encoding = B24 | 0x1 * B20;
break;
case VEOR:
op_encoding = B24;
break;
case VORR:
op_encoding = 0x2 * B20;
break;
case VORN:
op_encoding = 0x3 * B20;
break;
case VAND:
// op_encoding is 0.
break;
default:
UNREACHABLE();
break;
}
int vd, d;
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | B8 | n * B7 | B6 |
m * B5 | B4 | vm);
return 0x1E4U * B23 | op_encoding | d * B22 | vn * B16 | vd * B12 | B8 |
n * B7 | B6 | m * B5 | B4 | vm;
}
void Assembler::vand(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
// Qd = vand(Qn, Qm) SIMD AND.
// Instruction details available in ARM DDI 0406C.b, A8.8.836.
DCHECK(IsEnabled(NEON));
emit(EncodeNeonBinaryBitwiseOp(VAND, dst, src1, src2));
}
void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vbsl(Qn, Qm) SIMD bitwise select.
// Instruction details available in ARM DDI 0406C.b, A8-844.
emit(EncodeNeonBinaryBitwiseOp(VBSL, dst, src1, src2));
}
void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
// Qd = veor(Qn, Qm) SIMD exclusive OR.
// Instruction details available in ARM DDI 0406C.b, A8.8.888.
DCHECK(IsEnabled(NEON));
emit(EncodeNeonBinaryBitwiseOp(VEOR, dst, src1, src2));
}
void Assembler::vorr(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
// Qd = vorr(Qn, Qm) SIMD OR.
// Instruction details available in ARM DDI 0406C.b, A8.8.976.
DCHECK(IsEnabled(NEON));
emit(EncodeNeonBinaryBitwiseOp(VORR, dst, src1, src2));
}
void Assembler::vadd(QwNeonRegister dst, const QwNeonRegister src1,
......@@ -4411,22 +4467,6 @@ void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst,
emit(EncodeNeonCompareOp(dt, dst, src1, src2, gt));
}
void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vbsl(Qn, Qm) SIMD bitwise select.
// Instruction details available in ARM DDI 0406C.b, A8-844.
int vd, d;
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
int op = 1; // vbsl
emit(0x1E6U * B23 | d * B22 | op * B20 | vn * B16 | vd * B12 | 0x1 * B8 |
n * B7 | B6 | m * B5 | B4 | vm);
}
void Assembler::vext(QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2, int bytes) {
DCHECK(IsEnabled(NEON));
......
......@@ -1367,7 +1367,10 @@ class Assembler : public AssemblerBase {
void vneg(const QwNeonRegister dst, const QwNeonRegister src);
void vneg(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src);
void veor(DwVfpRegister dst, DwVfpRegister src1, DwVfpRegister src2);
void vand(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vbsl(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void veor(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vorr(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vadd(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vadd(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1,
......@@ -1401,8 +1404,6 @@ class Assembler : public AssemblerBase {
const QwNeonRegister src2);
void vcgt(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src1, const QwNeonRegister src2);
void vbsl(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vext(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2, int bytes);
void vzip(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src);
......
......@@ -1858,12 +1858,19 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
switch (instr->SpecialValue()) {
case 4:
if (instr->Bits(21, 20) == 2 && instr->Bits(11, 8) == 1 &&
instr->Bit(4) == 1) {
// vmov Qd, Qm
instr->Bit(6) == 1 && instr->Bit(4) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vmov q%d, q%d", Vd, Vm);
int Vn = instr->VFPNRegValue(kSimd128Precision);
if (Vm == Vn) {
// vmov Qd, Qm
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vmov q%d, q%d", Vd, Vm);
} else {
// vorr Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vorr q%d, q%d, q%d", Vd, Vn, Vm);
}
} else if (instr->Bits(11, 8) == 8) {
const char* op = (instr->Bit(4) == 0) ? "vadd" : "vtst";
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
......@@ -1899,6 +1906,14 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
// vceq.f32 Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vceq.f32 q%d, q%d, q%d", Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 &&
instr->Bit(6) == 1 && instr->Bit(4) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vand Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vand q%d, q%d, q%d", Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0x3) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
......
......@@ -3832,12 +3832,21 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
case 4:
if (instr->Bits(21, 20) == 2 && instr->Bits(11, 8) == 1 &&
instr->Bit(4) == 1) {
// vmov Qd, Qm
// vmov Qd, Qm.
// vorr, Qd, Qm, Qn.
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
uint32_t data[4];
get_q_register(Vm, data);
set_q_register(Vd, data);
int Vn = instr->VFPNRegValue(kSimd128Precision);
uint32_t src1[4];
get_q_register(Vm, src1);
if (Vm != Vn) {
uint32_t src2[4];
get_q_register(Vn, src2);
for (int i = 0; i < 4; i++) {
src1[i] = src1[i] | src2[i];
}
}
set_q_register(Vd, src1);
} else if (instr->Bits(11, 8) == 8) {
// vadd/vtst
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
......@@ -3993,6 +4002,19 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
}
set_q_register(Vd, dst);
} else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 &&
instr->Bit(6) == 1 && instr->Bit(4) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
// vand Qd, Qm, Qn.
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
src1[i] = src1[i] & src2[i];
}
set_q_register(Vd, src1);
} else if (instr->Bits(11, 8) == 0x3) {
// vcge/vcgt.s<size> Qd, Qm, Qn.
bool ge = instr->Bit(4) == 1;
......
......@@ -1284,7 +1284,7 @@ TEST(15) {
float vabsf[4], vnegf[4];
uint32_t vabs_s8[4], vabs_s16[4], vabs_s32[4];
uint32_t vneg_s8[4], vneg_s16[4], vneg_s32[4];
uint32_t veor[4];
uint32_t veor[4], vand[4], vorr[4];
float vdupf[4], vaddf[4], vsubf[4], vmulf[4];
uint32_t vadd8[4], vadd16[4], vadd32[4];
uint32_t vsub8[4], vsub16[4], vsub32[4];
......@@ -1456,13 +1456,29 @@ TEST(15) {
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// veor.
__ mov(r4, Operand(0x00aa));
__ mov(r4, Operand(0xaa));
__ vdup(Neon16, q0, r4);
__ mov(r4, Operand(0x0055));
__ mov(r4, Operand(0x55));
__ vdup(Neon16, q1, r4);
__ veor(q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, veor))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vand.
__ mov(r4, Operand(0xff));
__ vdup(Neon16, q0, r4);
__ mov(r4, Operand(0xfe));
__ vdup(Neon16, q1, r4);
__ vand(q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vand))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vorr.
__ mov(r4, Operand(0xaa));
__ vdup(Neon16, q0, r4);
__ mov(r4, Operand(0x55));
__ vdup(Neon16, q1, r4);
__ vorr(q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vorr))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vadd (float).
__ vmov(s4, 1.0);
......@@ -1819,6 +1835,8 @@ TEST(15) {
CHECK_EQ_32X4(vneg_s32, 0x80808081u, 0xfefefeffu, 0x00000001u, 0x7f7f7f80u);
CHECK_EQ_SPLAT(veor, 0x00ff00ffu);
CHECK_EQ_SPLAT(vand, 0x00fe00feu);
CHECK_EQ_SPLAT(vorr, 0x00ff00ffu);
CHECK_EQ_SPLAT(vaddf, 2.0);
CHECK_EQ_SPLAT(vsubf, -1.0);
CHECK_EQ_SPLAT(vmulf, 4.0);
......
......@@ -1025,6 +1025,10 @@ TEST(Neon) {
"f3020154 veor q0, q1, q2");
COMPARE(veor(q15, q0, q8),
"f340e170 veor q15, q0, q8");
COMPARE(vand(q15, q0, q8),
"f240e170 vand q15, q0, q8");
COMPARE(vorr(q15, q0, q8),
"f260e170 vorr q15, q0, q8");
COMPARE(vadd(q15, q0, q8),
"f240ed60 vadd.f32 q15, q0, q8");
COMPARE(vadd(Neon8, q0, q1, q2),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment