Commit ef3f125d authored by bbudge's avatar bbudge Committed by Commit bot

[Turbofan] Add ARM support for simd128 moves and swaps.

- Adds vmov, vswp instructions for QwNeonRegisters.
- Refactors existing vswp implementation, moves non-Neon adaption to
MacroAssembler.
- Adds simd128 support to CodeGenerator AssembleMove, AssembleSwap.

LOG=N
BUG=v8:4124

Review-Url: https://codereview.chromium.org/2523933002
Cr-Commit-Position: refs/heads/master@{#41291}
parent 2d3d66ab
......@@ -2873,7 +2873,6 @@ void Assembler::vmov(const DwVfpRegister dst,
vm);
}
void Assembler::vmov(const DwVfpRegister dst,
const VmovIndex index,
const Register src,
......@@ -3903,28 +3902,70 @@ void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
(dt & NeonDataTypeSizeMask)*B19 | vd*B12 | 0xA*B8 | m*B5 | B4 | vm);
}
void Assembler::vswp(DwVfpRegister srcdst0, DwVfpRegister srcdst1) {
DCHECK(VfpRegisterIsAvailable(srcdst0));
DCHECK(VfpRegisterIsAvailable(srcdst1));
DCHECK(!srcdst0.is(kScratchDoubleReg));
DCHECK(!srcdst1.is(kScratchDoubleReg));
void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) {
DCHECK(IsEnabled(NEON));
// Instruction details available in ARM DDI 0406C.b, A8-938.
DCHECK(VfpRegisterIsAvailable(dst));
DCHECK(VfpRegisterIsAvailable(src));
int vd, d;
dst.split_code(&vd, &d);
int vm, m;
src.split_code(&vm, &m);
emit(0x1E4 * B23 | d * B22 | 2 * B20 | vm * B16 | vd * B12 | B8 | m * B7 |
B6 | m * B5 | B4 | vm);
}
if (srcdst0.is(srcdst1)) return; // Swapping aliased registers emits nothing.
void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) {
// Instruction details available in ARM DDI 0406C.b, A8.8.418.
// 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) |
// Vd(15-12) | 000000(11-6) | M(5) | 0(4) | Vm(3-0)
DCHECK(IsEnabled(NEON));
int vd, d;
dst.split_code(&vd, &d);
int vm, m;
src.split_code(&vm, &m);
emit(0xFU * B28 | 7 * B23 | d * B22 | 0x32 * B16 | vd * B12 | m * B5 | vm);
}
if (CpuFeatures::IsSupported(NEON)) {
// Instruction details available in ARM DDI 0406C.b, A8.8.418.
// 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) |
// Vd(15-12) | 000000(11-6) | M(5) | 0(4) | Vm(3-0)
int vd, d;
srcdst0.split_code(&vd, &d);
int vm, m;
srcdst1.split_code(&vm, &m);
emit(0xFU * B28 | 7 * B23 | d * B22 | 0x32 * B16 | vd * B12 | m * B5 | vm);
} else {
vmov(kScratchDoubleReg, srcdst0);
vmov(srcdst0, srcdst1);
vmov(srcdst1, kScratchDoubleReg);
}
void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) {
// Instruction details available in ARM DDI 0406C.b, A8.8.418.
// 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) |
// Vd(15-12) | 000000(11-6) | M(5) | 0(4) | Vm(3-0)
DCHECK(IsEnabled(NEON));
int vd, d;
dst.split_code(&vd, &d);
int vm, m;
src.split_code(&vm, &m);
emit(0xFU * B28 | 7 * B23 | d * B22 | 0x32 * B16 | vd * B12 | B6 | m * B5 |
vm);
}
void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
DwVfpRegister src2) {
// Instruction details available in ARM DDI 0406C.b, A8.8.888.
DCHECK(IsEnabled(NEON));
int vd, d;
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
emit(0x1E6 * B23 | d * B22 | vn * B16 | vd * B12 | B8 | n * B7 | m * B5 | B4 |
vm);
}
void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
// Instruction details available in ARM DDI 0406C.b, A8.8.888.
DCHECK(IsEnabled(NEON));
int vd, d;
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
emit(0x1E6 * B23 | d * B22 | vn * B16 | vd * B12 | B8 | n * B7 | B6 | m * B5 |
B4 | vm);
}
// Pseudo instructions.
......
......@@ -302,6 +302,20 @@ struct QwNeonRegister {
*m = (encoded_code & 0x10) >> 4;
*vm = encoded_code & 0x0F;
}
DwVfpRegister low() const {
DwVfpRegister reg;
reg.reg_code = reg_code * 2;
DCHECK(reg.is_valid());
return reg;
}
DwVfpRegister high() const {
DwVfpRegister reg;
reg.reg_code = reg_code * 2 + 1;
DCHECK(reg.is_valid());
return reg;
}
int reg_code;
};
......@@ -403,9 +417,11 @@ const QwNeonRegister q15 = { 15 };
// compilation unit that includes this header doesn't use the variables.
#define kFirstCalleeSavedDoubleReg d8
#define kLastCalleeSavedDoubleReg d15
// kDoubleRegZero and kScratchDoubleReg must pair to form kScratchQuadReg.
#define kDoubleRegZero d14
#define kScratchDoubleReg d15
// After using kScratchQuadReg, kDoubleRegZero must be reset to 0.
#define kScratchQuadReg q7
// Coprocessor register
struct CRegister {
......@@ -1313,8 +1329,11 @@ class Assembler : public AssemblerBase {
const NeonMemOperand& dst);
void vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src);
// Currently, vswp supports only D0 to D31.
void vswp(DwVfpRegister srcdst0, DwVfpRegister srcdst1);
void vmov(const QwNeonRegister dst, const QwNeonRegister src);
void vswp(DwVfpRegister dst, DwVfpRegister src);
void vswp(QwNeonRegister dst, QwNeonRegister src);
void veor(DwVfpRegister dst, DwVfpRegister src1, DwVfpRegister src2);
void veor(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
// Pseudo instructions
......@@ -1608,6 +1627,12 @@ class Assembler : public AssemblerBase {
(reg.reg_code < LowDwVfpRegister::kMaxNumLowRegisters);
}
bool VfpRegisterIsAvailable(QwNeonRegister reg) {
DCHECK(reg.is_valid());
return IsEnabled(VFP32DREGS) ||
(reg.reg_code < LowDwVfpRegister::kMaxNumLowRegisters / 2);
}
private:
int next_buffer_check_; // pc offset of next buffer check
......
......@@ -374,10 +374,10 @@ const int32_t kDefaultStopCode = -1;
// Type of VFP register. Determines register encoding.
enum VFPRegPrecision {
kSinglePrecision = 0,
kDoublePrecision = 1
kDoublePrecision = 1,
kSimd128Precision = 2
};
// VFP FPSCR constants.
enum VFPConversionMode {
kFPSCRRounding = 0,
......@@ -674,8 +674,15 @@ class Instruction {
inline int VFPGlueRegValue(VFPRegPrecision pre, int four_bit, int one_bit) {
if (pre == kSinglePrecision) {
return (Bits(four_bit + 3, four_bit) << 1) | Bit(one_bit);
} else {
int reg_num = (Bit(one_bit) << 4) | Bits(four_bit + 3, four_bit);
if (pre == kDoublePrecision) {
return reg_num;
}
DCHECK_EQ(kSimd128Precision, pre);
DCHECK_EQ(reg_num & 1, 0);
return reg_num / 2;
}
return (Bit(one_bit) << 4) | Bits(four_bit + 3, four_bit);
}
// We need to prevent the creation of instances of class Instruction.
......
......@@ -1801,6 +1801,18 @@ static const char* const barrier_option_names[] = {
void Decoder::DecodeSpecialCondition(Instruction* instr) {
switch (instr->SpecialValue()) {
case 4:
if (instr->Bits(21, 20) == 2 && instr->Bits(11, 8) == 1 &&
instr->Bit(4) == 1) {
// vmov Qd, Qm
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vmov q%d, q%d", Vd, Vm);
} else {
Unknown(instr);
}
break;
case 5:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
......@@ -1815,6 +1827,29 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
Unknown(instr);
}
break;
case 6:
if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 &&
instr->Bit(4) == 1) {
if (instr->Bit(6) == 0) {
// veor Dd, Dn, Dm
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vn = instr->VFPNRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"veor d%d, d%d, d%d", Vd, Vn, Vm);
} else {
// veor Qd, Qn, Qm
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"veor q%d, q%d, q%d", Vd, Vn, Vm);
}
} else {
Unknown(instr);
}
break;
case 7:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
......@@ -1827,11 +1862,17 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
"vmovl.u%d q%d, d%d", imm3*8, Vd, Vm);
} else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) &&
(instr->Bit(4) == 0)) {
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
char rtype = (instr->Bit(6) == 0) ? 'd' : 'q';
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vswp %c%d, %c%d", rtype, Vd, rtype, Vm);
if (instr->Bit(6) == 0) {
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vswp d%d, d%d", Vd, Vm);
} else {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vswp q%d, q%d", Vd, Vm);
}
} else {
Unknown(instr);
}
......
......@@ -264,6 +264,35 @@ void MacroAssembler::Move(DwVfpRegister dst, DwVfpRegister src,
}
}
void MacroAssembler::Move(QwNeonRegister dst, QwNeonRegister src) {
if (!dst.is(src)) {
vmov(dst, src);
}
}
void MacroAssembler::Swap(DwVfpRegister srcdst0, DwVfpRegister srcdst1) {
if (srcdst0.is(srcdst1)) return; // Swapping aliased registers emits nothing.
DCHECK(VfpRegisterIsAvailable(srcdst0));
DCHECK(VfpRegisterIsAvailable(srcdst1));
if (CpuFeatures::IsSupported(NEON)) {
vswp(srcdst0, srcdst1);
} else {
DCHECK(!srcdst0.is(kScratchDoubleReg));
DCHECK(!srcdst1.is(kScratchDoubleReg));
vmov(kScratchDoubleReg, srcdst0);
vmov(srcdst0, srcdst1);
vmov(srcdst1, kScratchDoubleReg);
}
}
void MacroAssembler::Swap(QwNeonRegister srcdst0, QwNeonRegister srcdst1) {
if (!srcdst0.is(srcdst1)) {
vswp(srcdst0, srcdst1);
}
}
void MacroAssembler::Mls(Register dst, Register src1, Register src2,
Register srcA, Condition cond) {
if (CpuFeatures::IsSupported(ARMv7)) {
......
......@@ -184,6 +184,10 @@ class MacroAssembler: public Assembler {
}
void Move(SwVfpRegister dst, SwVfpRegister src, Condition cond = al);
void Move(DwVfpRegister dst, DwVfpRegister src, Condition cond = al);
void Move(QwNeonRegister dst, QwNeonRegister src);
// Register swap.
void Swap(DwVfpRegister srcdst0, DwVfpRegister srcdst1);
void Swap(QwNeonRegister srcdst0, QwNeonRegister srcdst1);
void Load(Register dst, const MemOperand& src, Representation r);
void Store(Register src, const MemOperand& dst, Representation r);
......
......@@ -3743,6 +3743,19 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) {
void Simulator::DecodeSpecialCondition(Instruction* instr) {
switch (instr->SpecialValue()) {
case 4:
if (instr->Bits(21, 20) == 2 && instr->Bits(11, 8) == 1 &&
instr->Bit(4) == 1) {
// vmov Qd, Qm
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
uint32_t data[4];
get_q_register(Vm, data);
set_q_register(Vd, data);
} else {
UNIMPLEMENTED();
}
break;
case 5:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
......@@ -3767,6 +3780,35 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
UNIMPLEMENTED();
}
break;
case 6:
if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 &&
instr->Bit(4) == 1) {
if (instr->Bit(6) == 0) {
// veor Dd, Dn, Dm
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vn = instr->VFPNRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
uint64_t n_data, m_data;
get_d_register(Vn, &n_data);
get_d_register(Vm, &m_data);
n_data ^= m_data;
set_d_register(Vd, &n_data);
} else {
// veor Qd, Qn, Qm
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
uint32_t n_data[4], m_data[4];
get_q_register(Vn, n_data);
get_q_register(Vm, m_data);
for (int i = 0; i < 4; i++) n_data[i] ^= m_data[i];
set_q_register(Vd, n_data);
}
} else {
UNIMPLEMENTED();
}
break;
case 7:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
......@@ -3789,18 +3831,24 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
} else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) &&
(instr->Bit(4) == 0)) {
int vd = instr->VFPDRegValue(kDoublePrecision);
int vm = instr->VFPMRegValue(kDoublePrecision);
if (instr->Bit(6) == 0) {
// vswp Dd, Dm.
uint64_t dval, mval;
int vd = instr->VFPDRegValue(kDoublePrecision);
int vm = instr->VFPMRegValue(kDoublePrecision);
get_d_register(vd, &dval);
get_d_register(vm, &mval);
set_d_register(vm, &dval);
set_d_register(vd, &mval);
} else {
// Q register vswp unimplemented.
UNIMPLEMENTED();
// vswp Qd, Qm.
uint32_t dval[4], mval[4];
int vd = instr->VFPDRegValue(kSimd128Precision);
int vm = instr->VFPMRegValue(kSimd128Precision);
get_q_register(vd, dval);
get_q_register(vm, mval);
set_q_register(vm, dval);
set_q_register(vd, mval);
}
} else {
UNIMPLEMENTED();
......
......@@ -1891,8 +1891,7 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
DCHECK(destination->IsDoubleStackSlot());
__ vstr(src, g.ToMemOperand(destination));
}
} else {
DCHECK_EQ(MachineRepresentation::kFloat32, rep);
} else if (rep == MachineRepresentation::kFloat32) {
// GapResolver may give us reg codes that don't map to actual s-registers.
// Generate code to work around those cases.
int src_code = LocationOperand::cast(source)->register_code();
......@@ -1903,6 +1902,19 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
DCHECK(destination->IsFloatStackSlot());
__ VmovExtended(g.ToMemOperand(destination), src_code, kScratchReg);
}
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, rep);
QwNeonRegister src = g.ToSimd128Register(source);
if (destination->IsSimd128Register()) {
QwNeonRegister dst = g.ToSimd128Register(destination);
__ Move(dst, src);
} else {
DCHECK(destination->IsSimd128StackSlot());
MemOperand dst = g.ToMemOperand(destination);
__ add(kScratchReg, dst.rn(), Operand(dst.offset()));
__ vst1(Neon8, NeonListOperand(src.low(), 2),
NeonMemOperand(kScratchReg));
}
}
} else if (source->IsFPStackSlot()) {
MemOperand src = g.ToMemOperand(source);
......@@ -1911,24 +1923,38 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
if (destination->IsFPRegister()) {
if (rep == MachineRepresentation::kFloat64) {
__ vldr(g.ToDoubleRegister(destination), src);
} else {
DCHECK_EQ(MachineRepresentation::kFloat32, rep);
} else if (rep == MachineRepresentation::kFloat32) {
// GapResolver may give us reg codes that don't map to actual
// s-registers. Generate code to work around those cases.
int dst_code = LocationOperand::cast(destination)->register_code();
__ VmovExtended(dst_code, src, kScratchReg);
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, rep);
QwNeonRegister dst = g.ToSimd128Register(destination);
__ add(kScratchReg, src.rn(), Operand(src.offset()));
__ vld1(Neon8, NeonListOperand(dst.low(), 2),
NeonMemOperand(kScratchReg));
}
} else {
} else if (rep == MachineRepresentation::kFloat64) {
DCHECK(destination->IsFPStackSlot());
if (rep == MachineRepresentation::kFloat64) {
DwVfpRegister temp = kScratchDoubleReg;
__ vldr(temp, src);
__ vstr(temp, g.ToMemOperand(destination));
} else {
DCHECK_EQ(MachineRepresentation::kFloat32, rep);
} else if (rep == MachineRepresentation::kFloat32) {
SwVfpRegister temp = kScratchDoubleReg.low();
__ vldr(temp, src);
__ vstr(temp, g.ToMemOperand(destination));
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, rep);
MemOperand dst = g.ToMemOperand(destination);
__ add(kScratchReg, src.rn(), Operand(src.offset()));
__ vld1(Neon8, NeonListOperand(kScratchQuadReg.low(), 2),
NeonMemOperand(kScratchReg));
__ add(kScratchReg, dst.rn(), Operand(dst.offset()));
__ vst1(Neon8, NeonListOperand(kScratchQuadReg.low(), 2),
NeonMemOperand(kScratchReg));
__ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
}
}
} else {
......@@ -1936,7 +1962,6 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
}
}
void CodeGenerator::AssembleSwap(InstructionOperand* source,
InstructionOperand* destination) {
ArmOperandConverter g(this, nullptr);
......@@ -1975,7 +2000,7 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
DwVfpRegister src = g.ToDoubleRegister(source);
if (destination->IsFPRegister()) {
DwVfpRegister dst = g.ToDoubleRegister(destination);
__ vswp(src, dst);
__ Swap(src, dst);
} else {
DCHECK(destination->IsFPStackSlot());
MemOperand dst = g.ToMemOperand(destination);
......@@ -1983,8 +2008,7 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
__ vldr(src, dst);
__ vstr(temp, dst);
}
} else {
DCHECK_EQ(MachineRepresentation::kFloat32, rep);
} else if (rep == MachineRepresentation::kFloat32) {
int src_code = LocationOperand::cast(source)->register_code();
if (destination->IsFPRegister()) {
int dst_code = LocationOperand::cast(destination)->register_code();
......@@ -1998,29 +2022,55 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
__ VmovExtended(src_code, dst, kScratchReg);
__ vstr(temp.low(), dst);
}
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, rep);
QwNeonRegister src = g.ToSimd128Register(source);
if (destination->IsFPRegister()) {
QwNeonRegister dst = g.ToSimd128Register(destination);
__ Swap(src, dst);
} else {
DCHECK(destination->IsFPStackSlot());
MemOperand dst = g.ToMemOperand(destination);
__ Move(kScratchQuadReg, src);
__ add(kScratchReg, dst.rn(), Operand(dst.offset()));
__ vld1(Neon8, NeonListOperand(src.low(), 2),
NeonMemOperand(kScratchReg));
__ vst1(Neon8, NeonListOperand(kScratchQuadReg.low(), 2),
NeonMemOperand(kScratchReg));
__ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
}
}
} else if (source->IsFPStackSlot()) {
DCHECK(destination->IsFPStackSlot());
Register temp_0 = kScratchReg;
LowDwVfpRegister temp_1 = kScratchDoubleReg;
MemOperand src0 = g.ToMemOperand(source);
MemOperand dst0 = g.ToMemOperand(destination);
MemOperand src = g.ToMemOperand(source);
MemOperand dst = g.ToMemOperand(destination);
MachineRepresentation rep = LocationOperand::cast(source)->representation();
if (rep == MachineRepresentation::kFloat64) {
MemOperand src1(src0.rn(), src0.offset() + kPointerSize);
MemOperand dst1(dst0.rn(), dst0.offset() + kPointerSize);
__ vldr(temp_1, dst0); // Save destination in temp_1.
__ ldr(temp_0, src0); // Then use temp_0 to copy source to destination.
__ str(temp_0, dst0);
__ ldr(temp_0, src1);
__ str(temp_0, dst1);
__ vstr(temp_1, src0);
__ vldr(kScratchDoubleReg, dst);
__ vldr(kDoubleRegZero, src);
__ vstr(kScratchDoubleReg, src);
__ vstr(kDoubleRegZero, dst);
// Restore the 0 register.
__ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
} else if (rep == MachineRepresentation::kFloat32) {
__ vldr(kScratchDoubleReg.low(), dst);
__ vldr(kScratchDoubleReg.high(), src);
__ vstr(kScratchDoubleReg.low(), src);
__ vstr(kScratchDoubleReg.high(), dst);
} else {
DCHECK_EQ(MachineRepresentation::kFloat32, rep);
__ vldr(temp_1.low(), dst0); // Save destination in temp_1.
__ ldr(temp_0, src0); // Then use temp_0 to copy source to destination.
__ str(temp_0, dst0);
__ vstr(temp_1.low(), src0);
DCHECK_EQ(MachineRepresentation::kSimd128, rep);
__ vldr(kScratchDoubleReg, dst);
__ vldr(kDoubleRegZero, src);
__ vstr(kScratchDoubleReg, src);
__ vstr(kDoubleRegZero, dst);
src.set_offset(src.offset() + kDoubleSize);
dst.set_offset(dst.offset() + kDoubleSize);
__ vldr(kScratchDoubleReg, dst);
__ vldr(kDoubleRegZero, src);
__ vstr(kScratchDoubleReg, src);
__ vstr(kDoubleRegZero, dst);
// Restore the 0 register.
__ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
}
} else {
// No other combinations are possible.
......
......@@ -1255,6 +1255,8 @@ TEST(15) {
uint32_t dstA5;
uint32_t dstA6;
uint32_t dstA7;
uint32_t vmov_src[4], vmov_dst[4];
uint32_t veor_src[4], veor_dst[4];
} T;
T t;
......@@ -1287,6 +1289,22 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, dstA4))));
__ vst1(Neon8, NeonListOperand(d2, 2), NeonMemOperand(r4));
// Test vmov for q-registers.
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmov_src))));
__ vld1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(r4));
__ vmov(q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmov_dst))));
__ vst1(Neon8, NeonListOperand(d2, 2), NeonMemOperand(r4));
// Test veor for q-registers.
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, veor_src))));
__ vld1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(r4));
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, veor_dst))));
__ vld1(Neon8, NeonListOperand(d2, 2), NeonMemOperand(r4));
__ veor(q1, q1, q0);
__ vst1(Neon8, NeonListOperand(d2, 2), NeonMemOperand(r4));
// Restore and return.
__ ldm(ia_w, sp, r4.bit() | pc.bit());
CodeDesc desc;
......@@ -1324,6 +1342,10 @@ TEST(15) {
t.dstA5 = 0;
t.dstA6 = 0;
t.dstA7 = 0;
t.vmov_src[0] = t.vmov_src[1] = t.vmov_src[2] = t.vmov_src[3] = 1;
t.vmov_dst[0] = t.vmov_dst[1] = t.vmov_dst[2] = t.vmov_dst[3] = 0;
t.veor_src[0] = t.veor_src[1] = t.veor_src[2] = t.veor_src[3] = 0xAA;
t.veor_dst[0] = t.veor_dst[1] = t.veor_dst[2] = t.veor_dst[3] = 0x55;
Object* dummy = CALL_GENERATED_CODE(isolate, f, &t, 0, 0, 0, 0);
USE(dummy);
CHECK_EQ(0x01020304u, t.dst0);
......@@ -1342,6 +1364,14 @@ TEST(15) {
CHECK_EQ(0x00410042u, t.dstA5);
CHECK_EQ(0x00830084u, t.dstA6);
CHECK_EQ(0x00810082u, t.dstA7);
CHECK_EQ(1u, t.vmov_dst[0]);
CHECK_EQ(1u, t.vmov_dst[1]);
CHECK_EQ(1u, t.vmov_dst[2]);
CHECK_EQ(1u, t.vmov_dst[3]);
CHECK_EQ(0xFFu, t.veor_dst[0]);
CHECK_EQ(0xFFu, t.veor_dst[1]);
CHECK_EQ(0xFFu, t.veor_dst[2]);
CHECK_EQ(0xFFu, t.veor_dst[3]);
}
}
......@@ -2881,6 +2911,8 @@ TEST(unaligned_stores) {
}
TEST(vswp) {
if (!CpuFeatures::IsSupported(NEON)) return;
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
......@@ -2891,6 +2923,10 @@ TEST(vswp) {
double result1;
double result2;
double result3;
double result4;
double result5;
double result6;
double result7;
} T;
T t;
......@@ -2908,6 +2944,17 @@ TEST(vswp) {
__ vstr(d31, r0, offsetof(T, result3));
}
// q-register swap.
__ vmov(d8, 1.0);
__ vmov(d9, 2.0);
__ vmov(d10, 3.0);
__ vmov(d11, 4.0);
__ vswp(q4, q5);
__ vstr(d8, r0, offsetof(T, result4));
__ vstr(d9, r0, offsetof(T, result5));
__ vstr(d10, r0, offsetof(T, result6));
__ vstr(d11, r0, offsetof(T, result7));
__ bx(lr);
CodeDesc desc;
......@@ -2927,6 +2974,10 @@ TEST(vswp) {
CHECK_EQ(-1.0, t.result2);
CHECK_EQ(1.0, t.result3);
}
CHECK_EQ(3.0, t.result4);
CHECK_EQ(4.0, t.result5);
CHECK_EQ(1.0, t.result6);
CHECK_EQ(2.0, t.result7);
}
TEST(regress4292_b) {
......
......@@ -936,10 +936,26 @@ TEST(Neon) {
"f3886a11 vmovl.u8 q3, d1");
COMPARE(vmovl(NeonU8, q4, d2),
"f3888a12 vmovl.u8 q4, d2");
COMPARE(vmov(q0, q15),
"f22e01fe vmov q0, q15");
COMPARE(vmov(q8, q9),
"f26201f2 vmov q8, q9");
COMPARE(vswp(d0, d31),
"f3b2002f vswp d0, d31");
COMPARE(vswp(d16, d14),
"f3f2000e vswp d16, d14");
COMPARE(vswp(q0, q15),
"f3b2006e vswp q0, q15");
COMPARE(vswp(q8, q9),
"f3f20062 vswp q8, q9");
COMPARE(veor(d0, d1, d2),
"f3010112 veor d0, d1, d2");
COMPARE(veor(d0, d30, d31),
"f30e01bf veor d0, d30, d31");
COMPARE(veor(q0, q1, q2),
"f3020154 veor q0, q1, q2");
COMPARE(veor(q15, q0, q8),
"f340e170 veor q15, q0, q8");
}
VERIFY_RUN();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment