Commit ac620154 authored by bbudge's avatar bbudge Committed by Commit bot

[ARM] Refactor NEON binary ops in assembler.

- Refactors many FP, integer, and signed integer instructions where
possible.

LOG=N
BUG=v8:4124

Review-Url: https://codereview.chromium.org/2639443002
Cr-Commit-Position: refs/heads/master@{#42463}
parent 761f373b
This diff is collapsed.
......@@ -1371,47 +1371,38 @@ class Assembler : public AssemblerBase {
void vbsl(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void veor(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vorr(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vadd(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vadd(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vsub(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vsub(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vmul(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vmul(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vmin(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vmin(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src1, const QwNeonRegister src2);
void vmax(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vmax(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src1, const QwNeonRegister src2);
void vadd(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vsub(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vsub(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vmul(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vmul(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vmin(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vmin(NeonDataType dt, QwNeonRegister dst,
QwNeonRegister src1, QwNeonRegister src2);
void vmax(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vmax(NeonDataType dt, QwNeonRegister dst,
QwNeonRegister src1, QwNeonRegister src2);
// vrecpe and vrsqrte only support floating point lanes.
void vrecpe(const QwNeonRegister dst, const QwNeonRegister src);
void vrsqrte(const QwNeonRegister dst, const QwNeonRegister src);
void vrecps(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vrsqrts(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vtst(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vceq(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vceq(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vcge(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vcge(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src1, const QwNeonRegister src2);
void vcgt(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vcgt(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src1, const QwNeonRegister src2);
void vrecpe(QwNeonRegister dst, QwNeonRegister src);
void vrsqrte(QwNeonRegister dst, QwNeonRegister src);
void vrecps(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vrsqrts(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vtst(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vceq(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vcge(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vcge(NeonDataType dt, QwNeonRegister dst,
QwNeonRegister src1, QwNeonRegister src2);
void vcgt(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vcgt(NeonDataType dt, QwNeonRegister dst,
QwNeonRegister src1, QwNeonRegister src2);
void vext(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2, int bytes);
void vzip(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src);
......
......@@ -327,16 +327,18 @@ enum LFlag {
// NEON data type
enum NeonDataType {
NeonS8 = 0x1, // U = 0, imm3 = 0b001
NeonS16 = 0x2, // U = 0, imm3 = 0b010
NeonS32 = 0x4, // U = 0, imm3 = 0b100
NeonU8 = 1 << 24 | 0x1, // U = 1, imm3 = 0b001
NeonU16 = 1 << 24 | 0x2, // U = 1, imm3 = 0b010
NeonU32 = 1 << 24 | 0x4, // U = 1, imm3 = 0b100
NeonDataTypeSizeMask = 0x7,
NeonDataTypeUMask = 1 << 24
NeonS8 = 0,
NeonS16 = 1,
NeonS32 = 2,
// Gap to make it easier to extract U and size.
NeonU8 = 4,
NeonU16 = 5,
NeonU32 = 6
};
inline int NeonU(NeonDataType dt) { return static_cast<int>(dt) >> 2; }
inline int NeonSz(NeonDataType dt) { return static_cast<int>(dt) & 0x3; }
enum NeonListType {
nlt_1 = 0x7,
nlt_2 = 0xA,
......
......@@ -1146,12 +1146,11 @@ void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code,
void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src,
NeonDataType dt, int lane) {
int bytes_per_lane = dt & NeonDataTypeSizeMask; // 1, 2, 4
int log2_bytes_per_lane = bytes_per_lane / 2; // 0, 1, 2
int byte = lane << log2_bytes_per_lane;
int size = NeonSz(dt); // 0, 1, 2
int byte = lane << size;
int double_word = byte >> kDoubleSizeLog2;
int double_byte = byte & (kDoubleSize - 1);
int double_lane = double_byte >> log2_bytes_per_lane;
int double_lane = double_byte >> size;
DwVfpRegister double_source =
DwVfpRegister::from_code(src.code() * 2 + double_word);
vmov(dt, dst, double_source, double_lane);
......@@ -1166,12 +1165,11 @@ void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src,
void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
Register src_lane, NeonDataType dt, int lane) {
Move(dst, src);
int bytes_per_lane = dt & NeonDataTypeSizeMask; // 1, 2, 4
int log2_bytes_per_lane = bytes_per_lane / 2; // 0, 1, 2
int byte = lane << log2_bytes_per_lane;
int size = NeonSz(dt); // 0, 1, 2
int byte = lane << size;
int double_word = byte >> kDoubleSizeLog2;
int double_byte = byte & (kDoubleSize - 1);
int double_lane = double_byte >> log2_bytes_per_lane;
int double_lane = double_byte >> size;
DwVfpRegister double_dst =
DwVfpRegister::from_code(dst.code() * 2 + double_word);
vmov(dt, double_dst, double_lane, src_lane);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment