Commit ac620154 authored by bbudge's avatar bbudge Committed by Commit bot

[ARM] Refactor NEON binary ops in assembler.

- Refactors many FP, integer, and signed integer instructions where
possible.

LOG=N
BUG=v8:4124

Review-Url: https://codereview.chromium.org/2639443002
Cr-Commit-Position: refs/heads/master@{#42463}
parent 761f373b
...@@ -3878,8 +3878,10 @@ void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) { ...@@ -3878,8 +3878,10 @@ void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
dst.split_code(&vd, &d); dst.split_code(&vd, &d);
int vm, m; int vm, m;
src.split_code(&vm, &m); src.split_code(&vm, &m);
emit(0xFU*B28 | B25 | (dt & NeonDataTypeUMask) | B23 | d*B22 | int U = NeonU(dt);
(dt & NeonDataTypeSizeMask)*B19 | vd*B12 | 0xA*B8 | m*B5 | B4 | vm); int imm3 = 1 << NeonSz(dt);
emit(0xFU * B28 | B25 | U * B24 | B23 | d * B22 | imm3 * B19 | vd * B12 |
0xA * B8 | m * B5 | B4 | vm);
} }
static int EncodeScalar(NeonDataType dt, int index) { static int EncodeScalar(NeonDataType dt, int index) {
...@@ -3928,7 +3930,7 @@ void Assembler::vmov(NeonDataType dt, Register dst, DwVfpRegister src, ...@@ -3928,7 +3930,7 @@ void Assembler::vmov(NeonDataType dt, Register dst, DwVfpRegister src,
int vn, n; int vn, n;
src.split_code(&vn, &n); src.split_code(&vn, &n);
int opc1_opc2 = EncodeScalar(dt, index); int opc1_opc2 = EncodeScalar(dt, index);
int u = (dt & NeonDataTypeUMask) != 0 ? 1 : 0; int u = NeonU(dt);
emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 | emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 |
n * B7 | B4 | opc1_opc2); n * B7 | B4 | opc1_opc2);
} }
...@@ -4209,81 +4211,165 @@ void Assembler::vorr(QwNeonRegister dst, QwNeonRegister src1, ...@@ -4209,81 +4211,165 @@ void Assembler::vorr(QwNeonRegister dst, QwNeonRegister src1,
emit(EncodeNeonBinaryBitwiseOp(VORR, dst, src1, src2)); emit(EncodeNeonBinaryBitwiseOp(VORR, dst, src1, src2));
} }
void Assembler::vadd(QwNeonRegister dst, const QwNeonRegister src1, enum FPBinOp {
const QwNeonRegister src2) { VADDF,
DCHECK(IsEnabled(NEON)); VSUBF,
// Qd = vadd(Qn, Qm) SIMD floating point addition. VMULF,
// Instruction details available in ARM DDI 0406C.b, A8-830. VMINF,
VMAXF,
VRECPS,
VRSQRTS,
VCEQF,
VCGEF,
VCGTF
};
static Instr EncodeNeonBinOp(FPBinOp op, QwNeonRegister dst,
QwNeonRegister src1, QwNeonRegister src2) {
int op_encoding = 0;
switch (op) {
case VADDF:
op_encoding = 0xD * B8;
break;
case VSUBF:
op_encoding = B21 | 0xD * B8;
break;
case VMULF:
op_encoding = B24 | 0xD * B8 | B4;
break;
case VMINF:
op_encoding = B21 | 0xF * B8;
break;
case VMAXF:
op_encoding = 0xF * B8;
break;
case VRECPS:
op_encoding = 0xF * B8 | B4;
break;
case VRSQRTS:
op_encoding = B21 | 0xF * B8 | B4;
break;
case VCEQF:
op_encoding = 0xE * B8;
break;
case VCGEF:
op_encoding = B24 | 0xE * B8;
break;
case VCGTF:
op_encoding = B24 | B21 | 0xE * B8;
break;
default:
UNREACHABLE();
break;
}
int vd, d; int vd, d;
dst.split_code(&vd, &d); dst.split_code(&vd, &d);
int vn, n; int vn, n;
src1.split_code(&vn, &n); src1.split_code(&vn, &n);
int vm, m; int vm, m;
src2.split_code(&vm, &m); src2.split_code(&vm, &m);
emit(0x1E4U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | B6 | return 0x1E4U * B23 | d * B22 | vn * B16 | vd * B12 | n * B7 | B6 | m * B5 |
m * B5 | vm); vm | op_encoding;
} }
void Assembler::vadd(NeonSize size, QwNeonRegister dst, enum IntegerBinOp { VADD, VSUB, VMUL, VMIN, VMAX, VTST, VCEQ, VCGE, VCGT };
const QwNeonRegister src1, const QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
// Qd = vadd(Qn, Qm) SIMD integer addition. const QwNeonRegister dst,
// Instruction details available in ARM DDI 0406C.b, A8-828. const QwNeonRegister src1,
const QwNeonRegister src2) {
int op_encoding = 0;
switch (op) {
case VADD:
op_encoding = 0x8 * B8;
break;
case VSUB:
op_encoding = B24 | 0x8 * B8;
break;
case VMUL:
op_encoding = 0x9 * B8 | B4;
break;
case VMIN:
op_encoding = 0x6 * B8 | B4;
break;
case VMAX:
op_encoding = 0x6 * B8;
break;
case VTST:
op_encoding = 0x8 * B8 | B4;
break;
case VCEQ:
op_encoding = B24 | 0x8 * B8 | B4;
break;
case VCGE:
op_encoding = 0x3 * B8 | B4;
break;
case VCGT:
op_encoding = 0x3 * B8;
break;
default:
UNREACHABLE();
break;
}
int vd, d; int vd, d;
dst.split_code(&vd, &d); dst.split_code(&vd, &d);
int vn, n; int vn, n;
src1.split_code(&vn, &n); src1.split_code(&vn, &n);
int vm, m; int vm, m;
src2.split_code(&vm, &m); src2.split_code(&vm, &m);
int sz = static_cast<int>(size); int size = NeonSz(dt);
emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | int u = NeonU(dt);
n * B7 | B6 | m * B5 | vm); return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 |
n * B7 | B6 | m * B5 | vm | op_encoding;
} }
void Assembler::vsub(QwNeonRegister dst, const QwNeonRegister src1, static Instr EncodeNeonBinOp(IntegerBinOp op, NeonSize size,
const QwNeonRegister dst,
const QwNeonRegister src1,
const QwNeonRegister src2) { const QwNeonRegister src2) {
// Map NeonSize values to the signed values in NeonDataType, so the U bit
// will be 0.
return EncodeNeonBinOp(op, static_cast<NeonDataType>(size), dst, src1, src2);
}
void Assembler::vadd(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vadd(Qn, Qm) SIMD floating point addition.
// Instruction details available in ARM DDI 0406C.b, A8-830.
emit(EncodeNeonBinOp(VADDF, dst, src1, src2));
}
void Assembler::vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vadd(Qn, Qm) SIMD integer addition.
// Instruction details available in ARM DDI 0406C.b, A8-828.
emit(EncodeNeonBinOp(VADD, size, dst, src1, src2));
}
void Assembler::vsub(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vsub(Qn, Qm) SIMD floating point subtraction. // Qd = vsub(Qn, Qm) SIMD floating point subtraction.
// Instruction details available in ARM DDI 0406C.b, A8-1086. // Instruction details available in ARM DDI 0406C.b, A8-1086.
int vd, d; emit(EncodeNeonBinOp(VSUBF, dst, src1, src2));
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
emit(0x1E4U * B23 | d * B22 | B21 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 |
B6 | m * B5 | vm);
} }
void Assembler::vsub(NeonSize size, QwNeonRegister dst, void Assembler::vsub(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
const QwNeonRegister src1, const QwNeonRegister src2) { QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vsub(Qn, Qm) SIMD integer subtraction. // Qd = vsub(Qn, Qm) SIMD integer subtraction.
// Instruction details available in ARM DDI 0406C.b, A8-1084. // Instruction details available in ARM DDI 0406C.b, A8-1084.
int vd, d; emit(EncodeNeonBinOp(VSUB, size, dst, src1, src2));
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
int sz = static_cast<int>(size);
emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
n * B7 | B6 | m * B5 | vm);
} }
void Assembler::vmul(QwNeonRegister dst, const QwNeonRegister src1, void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1,
const QwNeonRegister src2) { QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vadd(Qn, Qm) SIMD floating point multiply. // Qd = vadd(Qn, Qm) SIMD floating point multiply.
// Instruction details available in ARM DDI 0406C.b, A8-958. // Instruction details available in ARM DDI 0406C.b, A8-958.
int vd, d; emit(EncodeNeonBinOp(VMULF, dst, src1, src2));
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | B6 |
m * B5 | B4 | vm);
} }
void Assembler::vmul(NeonSize size, QwNeonRegister dst, void Assembler::vmul(NeonSize size, QwNeonRegister dst,
...@@ -4291,43 +4377,7 @@ void Assembler::vmul(NeonSize size, QwNeonRegister dst, ...@@ -4291,43 +4377,7 @@ void Assembler::vmul(NeonSize size, QwNeonRegister dst,
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vadd(Qn, Qm) SIMD integer multiply. // Qd = vadd(Qn, Qm) SIMD integer multiply.
// Instruction details available in ARM DDI 0406C.b, A8-960. // Instruction details available in ARM DDI 0406C.b, A8-960.
int vd, d; emit(EncodeNeonBinOp(VMUL, size, dst, src1, src2));
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
int sz = static_cast<int>(size);
emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x9 * B8 |
n * B7 | B6 | m * B5 | B4 | vm);
}
static Instr EncodeNeonMinMax(bool is_min, QwNeonRegister dst,
QwNeonRegister src1, QwNeonRegister src2) {
int vd, d;
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
int min = is_min ? 1 : 0;
return 0x1E4U * B23 | d * B22 | min * B21 | vn * B16 | vd * B12 | 0xF * B8 |
n * B7 | B6 | m * B5 | vm;
}
static Instr EncodeNeonMinMax(bool is_min, NeonDataType dt, QwNeonRegister dst,
QwNeonRegister src1, QwNeonRegister src2) {
int vd, d;
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
int min = is_min ? 1 : 0;
int size = (dt & NeonDataTypeSizeMask) / 2;
int U = dt & NeonDataTypeUMask;
return 0x1E4U * B23 | U | d * B22 | size * B20 | vn * B16 | vd * B12 |
0x6 * B8 | B6 | m * B5 | min * B4 | vm;
} }
void Assembler::vmin(const QwNeonRegister dst, const QwNeonRegister src1, void Assembler::vmin(const QwNeonRegister dst, const QwNeonRegister src1,
...@@ -4335,7 +4385,7 @@ void Assembler::vmin(const QwNeonRegister dst, const QwNeonRegister src1, ...@@ -4335,7 +4385,7 @@ void Assembler::vmin(const QwNeonRegister dst, const QwNeonRegister src1,
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vmin(Qn, Qm) SIMD floating point MIN. // Qd = vmin(Qn, Qm) SIMD floating point MIN.
// Instruction details available in ARM DDI 0406C.b, A8-928. // Instruction details available in ARM DDI 0406C.b, A8-928.
emit(EncodeNeonMinMax(true, dst, src1, src2)); emit(EncodeNeonBinOp(VMINF, dst, src1, src2));
} }
void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
...@@ -4343,7 +4393,7 @@ void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, ...@@ -4343,7 +4393,7 @@ void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vmin(Qn, Qm) SIMD integer MIN. // Qd = vmin(Qn, Qm) SIMD integer MIN.
// Instruction details available in ARM DDI 0406C.b, A8-926. // Instruction details available in ARM DDI 0406C.b, A8-926.
emit(EncodeNeonMinMax(true, dt, dst, src1, src2)); emit(EncodeNeonBinOp(VMIN, dt, dst, src1, src2));
} }
void Assembler::vmax(QwNeonRegister dst, QwNeonRegister src1, void Assembler::vmax(QwNeonRegister dst, QwNeonRegister src1,
...@@ -4351,7 +4401,7 @@ void Assembler::vmax(QwNeonRegister dst, QwNeonRegister src1, ...@@ -4351,7 +4401,7 @@ void Assembler::vmax(QwNeonRegister dst, QwNeonRegister src1,
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vmax(Qn, Qm) SIMD floating point MAX. // Qd = vmax(Qn, Qm) SIMD floating point MAX.
// Instruction details available in ARM DDI 0406C.b, A8-928. // Instruction details available in ARM DDI 0406C.b, A8-928.
emit(EncodeNeonMinMax(false, dst, src1, src2)); emit(EncodeNeonBinOp(VMAXF, dst, src1, src2));
} }
void Assembler::vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, void Assembler::vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
...@@ -4359,7 +4409,7 @@ void Assembler::vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, ...@@ -4359,7 +4409,7 @@ void Assembler::vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vmax(Qn, Qm) SIMD integer MAX. // Qd = vmax(Qn, Qm) SIMD integer MAX.
// Instruction details available in ARM DDI 0406C.b, A8-926. // Instruction details available in ARM DDI 0406C.b, A8-926.
emit(EncodeNeonMinMax(false, dt, dst, src1, src2)); emit(EncodeNeonBinOp(VMAX, dt, dst, src1, src2));
} }
static Instr EncodeNeonEstimateOp(bool is_rsqrt, QwNeonRegister dst, static Instr EncodeNeonEstimateOp(bool is_rsqrt, QwNeonRegister dst,
...@@ -4373,158 +4423,90 @@ static Instr EncodeNeonEstimateOp(bool is_rsqrt, QwNeonRegister dst, ...@@ -4373,158 +4423,90 @@ static Instr EncodeNeonEstimateOp(bool is_rsqrt, QwNeonRegister dst,
rsqrt * B7 | B6 | m * B5 | vm; rsqrt * B7 | B6 | m * B5 | vm;
} }
void Assembler::vrecpe(const QwNeonRegister dst, const QwNeonRegister src) { void Assembler::vrecpe(QwNeonRegister dst, QwNeonRegister src) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vrecpe(Qm) SIMD reciprocal estimate. // Qd = vrecpe(Qm) SIMD reciprocal estimate.
// Instruction details available in ARM DDI 0406C.b, A8-1024. // Instruction details available in ARM DDI 0406C.b, A8-1024.
emit(EncodeNeonEstimateOp(false, dst, src)); emit(EncodeNeonEstimateOp(false, dst, src));
} }
void Assembler::vrsqrte(const QwNeonRegister dst, const QwNeonRegister src) { void Assembler::vrsqrte(QwNeonRegister dst, QwNeonRegister src) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vrsqrte(Qm) SIMD reciprocal square root estimate. // Qd = vrsqrte(Qm) SIMD reciprocal square root estimate.
// Instruction details available in ARM DDI 0406C.b, A8-1038. // Instruction details available in ARM DDI 0406C.b, A8-1038.
emit(EncodeNeonEstimateOp(true, dst, src)); emit(EncodeNeonEstimateOp(true, dst, src));
} }
static Instr EncodeNeonRefinementOp(bool is_rsqrt, QwNeonRegister dst, void Assembler::vrecps(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src1, QwNeonRegister src2) { QwNeonRegister src2) {
int vd, d;
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
int rsqrt = is_rsqrt ? 1 : 0;
return 0x1E4U * B23 | d * B22 | rsqrt * B21 | vn * B16 | vd * B12 | 0xF * B8 |
n * B7 | B6 | m * B5 | B4 | vm;
}
void Assembler::vrecps(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vrecps(Qn, Qm) SIMD reciprocal refinement step. // Qd = vrecps(Qn, Qm) SIMD reciprocal refinement step.
// Instruction details available in ARM DDI 0406C.b, A8-1026. // Instruction details available in ARM DDI 0406C.b, A8-1026.
emit(EncodeNeonRefinementOp(false, dst, src1, src2)); emit(EncodeNeonBinOp(VRECPS, dst, src1, src2));
} }
void Assembler::vrsqrts(const QwNeonRegister dst, const QwNeonRegister src1, void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1,
const QwNeonRegister src2) { QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step. // Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step.
// Instruction details available in ARM DDI 0406C.b, A8-1040. // Instruction details available in ARM DDI 0406C.b, A8-1040.
emit(EncodeNeonRefinementOp(true, dst, src1, src2)); emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2));
} }
void Assembler::vtst(NeonSize size, QwNeonRegister dst, void Assembler::vtst(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
const QwNeonRegister src1, const QwNeonRegister src2) { QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vtst(Qn, Qm) SIMD test integer operands. // Qd = vtst(Qn, Qm) SIMD test integer operands.
// Instruction details available in ARM DDI 0406C.b, A8-1098. // Instruction details available in ARM DDI 0406C.b, A8-1098.
int vd, d; emit(EncodeNeonBinOp(VTST, size, dst, src1, src2));
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
int sz = static_cast<int>(size);
emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
n * B7 | B6 | m * B5 | B4 | vm);
} }
void Assembler::vceq(const QwNeonRegister dst, const QwNeonRegister src1, void Assembler::vceq(QwNeonRegister dst, QwNeonRegister src1,
const QwNeonRegister src2) { QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vceq(Qn, Qm) SIMD floating point compare equal. // Qd = vceq(Qn, Qm) SIMD floating point compare equal.
// Instruction details available in ARM DDI 0406C.b, A8-844. // Instruction details available in ARM DDI 0406C.b, A8-844.
int vd, d; emit(EncodeNeonBinOp(VCEQF, dst, src1, src2));
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
emit(0x1E4U * B23 | d * B22 | vn * B16 | vd * B12 | 0xe * B8 | n * B7 | B6 |
m * B5 | vm);
} }
void Assembler::vceq(NeonSize size, QwNeonRegister dst, void Assembler::vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
const QwNeonRegister src1, const QwNeonRegister src2) { QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vceq(Qn, Qm) SIMD integer compare equal. // Qd = vceq(Qn, Qm) SIMD integer compare equal.
// Instruction details available in ARM DDI 0406C.b, A8-844. // Instruction details available in ARM DDI 0406C.b, A8-844.
int vd, d; emit(EncodeNeonBinOp(VCEQ, size, dst, src1, src2));
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
int sz = static_cast<int>(size);
emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
n * B7 | B6 | m * B5 | B4 | vm);
} }
static Instr EncodeNeonCompareOp(const QwNeonRegister dst, void Assembler::vcge(QwNeonRegister dst, QwNeonRegister src1,
const QwNeonRegister src1, QwNeonRegister src2) {
const QwNeonRegister src2, Condition cond) {
DCHECK(cond == ge || cond == gt);
int vd, d;
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
int is_gt = (cond == gt) ? 1 : 0;
return 0x1E6U * B23 | d * B22 | is_gt * B21 | vn * B16 | vd * B12 | 0xe * B8 |
n * B7 | B6 | m * B5 | vm;
}
static Instr EncodeNeonCompareOp(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src1,
const QwNeonRegister src2, Condition cond) {
DCHECK(cond == ge || cond == gt);
int vd, d;
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
int size = (dt & NeonDataTypeSizeMask) / 2;
int U = dt & NeonDataTypeUMask;
int is_ge = (cond == ge) ? 1 : 0;
return 0x1E4U * B23 | U | d * B22 | size * B20 | vn * B16 | vd * B12 |
0x3 * B8 | n * B7 | B6 | m * B5 | is_ge * B4 | vm;
}
void Assembler::vcge(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vcge(Qn, Qm) SIMD floating point compare greater or equal. // Qd = vcge(Qn, Qm) SIMD floating point compare greater or equal.
// Instruction details available in ARM DDI 0406C.b, A8-848. // Instruction details available in ARM DDI 0406C.b, A8-848.
emit(EncodeNeonCompareOp(dst, src1, src2, ge)); emit(EncodeNeonBinOp(VCGEF, dst, src1, src2));
} }
void Assembler::vcge(NeonDataType dt, QwNeonRegister dst, void Assembler::vcge(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
const QwNeonRegister src1, const QwNeonRegister src2) { QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vcge(Qn, Qm) SIMD integer compare greater or equal. // Qd = vcge(Qn, Qm) SIMD integer compare greater or equal.
// Instruction details available in ARM DDI 0406C.b, A8-848. // Instruction details available in ARM DDI 0406C.b, A8-848.
emit(EncodeNeonCompareOp(dt, dst, src1, src2, ge)); emit(EncodeNeonBinOp(VCGE, dt, dst, src1, src2));
} }
void Assembler::vcgt(const QwNeonRegister dst, const QwNeonRegister src1, void Assembler::vcgt(QwNeonRegister dst, QwNeonRegister src1,
const QwNeonRegister src2) { QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vcgt(Qn, Qm) SIMD floating point compare greater than. // Qd = vcgt(Qn, Qm) SIMD floating point compare greater than.
// Instruction details available in ARM DDI 0406C.b, A8-852. // Instruction details available in ARM DDI 0406C.b, A8-852.
emit(EncodeNeonCompareOp(dst, src1, src2, gt)); emit(EncodeNeonBinOp(VCGTF, dst, src1, src2));
} }
void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst, void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
const QwNeonRegister src1, const QwNeonRegister src2) { QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
// Qd = vcgt(Qn, Qm) SIMD integer compare greater than. // Qd = vcgt(Qn, Qm) SIMD integer compare greater than.
// Instruction details available in ARM DDI 0406C.b, A8-852. // Instruction details available in ARM DDI 0406C.b, A8-852.
emit(EncodeNeonCompareOp(dt, dst, src1, src2, gt)); emit(EncodeNeonBinOp(VCGT, dt, dst, src1, src2));
} }
void Assembler::vext(QwNeonRegister dst, const QwNeonRegister src1, void Assembler::vext(QwNeonRegister dst, const QwNeonRegister src1,
......
...@@ -1371,47 +1371,38 @@ class Assembler : public AssemblerBase { ...@@ -1371,47 +1371,38 @@ class Assembler : public AssemblerBase {
void vbsl(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2); void vbsl(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void veor(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2); void veor(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vorr(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2); void vorr(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vadd(const QwNeonRegister dst, const QwNeonRegister src1, void vadd(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
const QwNeonRegister src2); void vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
void vadd(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1, QwNeonRegister src2);
const QwNeonRegister src2); void vsub(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vsub(const QwNeonRegister dst, const QwNeonRegister src1, void vsub(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
const QwNeonRegister src2); QwNeonRegister src2);
void vsub(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1, void vmul(QwNeonRegister dst, QwNeonRegister src1,
const QwNeonRegister src2); QwNeonRegister src2);
void vmul(const QwNeonRegister dst, const QwNeonRegister src1, void vmul(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
const QwNeonRegister src2); QwNeonRegister src2);
void vmul(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1, void vmin(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
const QwNeonRegister src2); void vmin(NeonDataType dt, QwNeonRegister dst,
void vmin(const QwNeonRegister dst, const QwNeonRegister src1, QwNeonRegister src1, QwNeonRegister src2);
const QwNeonRegister src2); void vmax(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vmin(NeonDataType dt, const QwNeonRegister dst, void vmax(NeonDataType dt, QwNeonRegister dst,
const QwNeonRegister src1, const QwNeonRegister src2); QwNeonRegister src1, QwNeonRegister src2);
void vmax(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vmax(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src1, const QwNeonRegister src2);
// vrecpe and vrsqrte only support floating point lanes. // vrecpe and vrsqrte only support floating point lanes.
void vrecpe(const QwNeonRegister dst, const QwNeonRegister src); void vrecpe(QwNeonRegister dst, QwNeonRegister src);
void vrsqrte(const QwNeonRegister dst, const QwNeonRegister src); void vrsqrte(QwNeonRegister dst, QwNeonRegister src);
void vrecps(const QwNeonRegister dst, const QwNeonRegister src1, void vrecps(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
const QwNeonRegister src2); void vrsqrts(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vrsqrts(const QwNeonRegister dst, const QwNeonRegister src1, void vtst(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
const QwNeonRegister src2); QwNeonRegister src2);
void vtst(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1, void vceq(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
const QwNeonRegister src2); void vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
void vceq(const QwNeonRegister dst, const QwNeonRegister src1, QwNeonRegister src2);
const QwNeonRegister src2); void vcge(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vceq(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1, void vcge(NeonDataType dt, QwNeonRegister dst,
const QwNeonRegister src2); QwNeonRegister src1, QwNeonRegister src2);
void vcge(const QwNeonRegister dst, const QwNeonRegister src1, void vcgt(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
const QwNeonRegister src2); void vcgt(NeonDataType dt, QwNeonRegister dst,
void vcge(NeonDataType dt, const QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
const QwNeonRegister src1, const QwNeonRegister src2);
void vcgt(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vcgt(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src1, const QwNeonRegister src2);
void vext(const QwNeonRegister dst, const QwNeonRegister src1, void vext(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2, int bytes); const QwNeonRegister src2, int bytes);
void vzip(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src); void vzip(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src);
......
...@@ -327,16 +327,18 @@ enum LFlag { ...@@ -327,16 +327,18 @@ enum LFlag {
// NEON data type // NEON data type
enum NeonDataType { enum NeonDataType {
NeonS8 = 0x1, // U = 0, imm3 = 0b001 NeonS8 = 0,
NeonS16 = 0x2, // U = 0, imm3 = 0b010 NeonS16 = 1,
NeonS32 = 0x4, // U = 0, imm3 = 0b100 NeonS32 = 2,
NeonU8 = 1 << 24 | 0x1, // U = 1, imm3 = 0b001 // Gap to make it easier to extract U and size.
NeonU16 = 1 << 24 | 0x2, // U = 1, imm3 = 0b010 NeonU8 = 4,
NeonU32 = 1 << 24 | 0x4, // U = 1, imm3 = 0b100 NeonU16 = 5,
NeonDataTypeSizeMask = 0x7, NeonU32 = 6
NeonDataTypeUMask = 1 << 24
}; };
inline int NeonU(NeonDataType dt) { return static_cast<int>(dt) >> 2; }
inline int NeonSz(NeonDataType dt) { return static_cast<int>(dt) & 0x3; }
enum NeonListType { enum NeonListType {
nlt_1 = 0x7, nlt_1 = 0x7,
nlt_2 = 0xA, nlt_2 = 0xA,
......
...@@ -1146,12 +1146,11 @@ void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code, ...@@ -1146,12 +1146,11 @@ void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code,
void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src, void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src,
NeonDataType dt, int lane) { NeonDataType dt, int lane) {
int bytes_per_lane = dt & NeonDataTypeSizeMask; // 1, 2, 4 int size = NeonSz(dt); // 0, 1, 2
int log2_bytes_per_lane = bytes_per_lane / 2; // 0, 1, 2 int byte = lane << size;
int byte = lane << log2_bytes_per_lane;
int double_word = byte >> kDoubleSizeLog2; int double_word = byte >> kDoubleSizeLog2;
int double_byte = byte & (kDoubleSize - 1); int double_byte = byte & (kDoubleSize - 1);
int double_lane = double_byte >> log2_bytes_per_lane; int double_lane = double_byte >> size;
DwVfpRegister double_source = DwVfpRegister double_source =
DwVfpRegister::from_code(src.code() * 2 + double_word); DwVfpRegister::from_code(src.code() * 2 + double_word);
vmov(dt, dst, double_source, double_lane); vmov(dt, dst, double_source, double_lane);
...@@ -1166,12 +1165,11 @@ void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src, ...@@ -1166,12 +1165,11 @@ void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src,
void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
Register src_lane, NeonDataType dt, int lane) { Register src_lane, NeonDataType dt, int lane) {
Move(dst, src); Move(dst, src);
int bytes_per_lane = dt & NeonDataTypeSizeMask; // 1, 2, 4 int size = NeonSz(dt); // 0, 1, 2
int log2_bytes_per_lane = bytes_per_lane / 2; // 0, 1, 2 int byte = lane << size;
int byte = lane << log2_bytes_per_lane;
int double_word = byte >> kDoubleSizeLog2; int double_word = byte >> kDoubleSizeLog2;
int double_byte = byte & (kDoubleSize - 1); int double_byte = byte & (kDoubleSize - 1);
int double_lane = double_byte >> log2_bytes_per_lane; int double_lane = double_byte >> size;
DwVfpRegister double_dst = DwVfpRegister double_dst =
DwVfpRegister::from_code(dst.code() * 2 + double_word); DwVfpRegister::from_code(dst.code() * 2 + double_word);
vmov(dt, double_dst, double_lane, src_lane); vmov(dt, double_dst, double_lane, src_lane);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment