Commit 2837c2e6 authored by martyn.capewell's avatar martyn.capewell Committed by Commit bot

[turbofan] ARM: Implement vswp and use in gap resolver

Use vswp to switch double-precision registers in the gap resolver, with fall
back temp register-based code if NEON is not available.

BUG=

Review-Url: https://codereview.chromium.org/2313803003
Cr-Commit-Position: refs/heads/master@{#39209}
parent 26720293
...@@ -3905,6 +3905,27 @@ void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) { ...@@ -3905,6 +3905,27 @@ void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
(dt & NeonDataTypeSizeMask)*B19 | vd*B12 | 0xA*B8 | m*B5 | B4 | vm); (dt & NeonDataTypeSizeMask)*B19 | vd*B12 | 0xA*B8 | m*B5 | B4 | vm);
} }
void Assembler::vswp(DwVfpRegister srcdst0, DwVfpRegister srcdst1) {
DCHECK(!srcdst0.is(kScratchDoubleReg));
DCHECK(!srcdst1.is(kScratchDoubleReg));
if (srcdst0.is(srcdst1)) return; // Swapping aliased registers emits nothing.
if (CpuFeatures::IsSupported(NEON)) {
// Instruction details available in ARM DDI 0406C.b, A8.8.418.
// 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) |
// Vd(15-12) | 000000(11-6) | M(5) | 0(4) | Vm(3-0)
int vd, d;
srcdst0.split_code(&vd, &d);
int vm, m;
srcdst1.split_code(&vm, &m);
emit(0xF * B28 | 7 * B23 | d * B22 | 0x32 * B16 | vd * B12 | m * B5 | vm);
} else {
vmov(kScratchDoubleReg, srcdst0);
vmov(srcdst0, srcdst1);
vmov(srcdst1, kScratchDoubleReg);
}
}
// Pseudo instructions. // Pseudo instructions.
void Assembler::nop(int type) { void Assembler::nop(int type) {
......
...@@ -1303,8 +1303,8 @@ class Assembler : public AssemblerBase { ...@@ -1303,8 +1303,8 @@ class Assembler : public AssemblerBase {
const Condition cond = al); const Condition cond = al);
// Support for NEON. // Support for NEON.
// All these APIs support D0 to D31 and Q0 to Q15.
// All these APIs support D0 to D31 and Q0 to Q15.
void vld1(NeonSize size, void vld1(NeonSize size,
const NeonListOperand& dst, const NeonListOperand& dst,
const NeonMemOperand& src); const NeonMemOperand& src);
...@@ -1313,6 +1313,9 @@ class Assembler : public AssemblerBase { ...@@ -1313,6 +1313,9 @@ class Assembler : public AssemblerBase {
const NeonMemOperand& dst); const NeonMemOperand& dst);
void vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src); void vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src);
// Currently, vswp supports only D0 to D31.
void vswp(DwVfpRegister srcdst0, DwVfpRegister srcdst1);
// Pseudo instructions // Pseudo instructions
// Different nop operations are used by the code generator to detect certain // Different nop operations are used by the code generator to detect certain
......
...@@ -1825,6 +1825,13 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { ...@@ -1825,6 +1825,13 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
int imm3 = instr->Bits(21, 19); int imm3 = instr->Bits(21, 19);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmovl.u%d q%d, d%d", imm3*8, Vd, Vm); "vmovl.u%d q%d, d%d", imm3*8, Vd, Vm);
} else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) &&
(instr->Bit(4) == 0)) {
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
char rtype = (instr->Bit(6) == 0) ? 'd' : 'q';
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vswp %c%d, %c%d", rtype, Vd, rtype, Vm);
} else { } else {
Unknown(instr); Unknown(instr);
} }
......
...@@ -1054,6 +1054,7 @@ void MacroAssembler::VmovLow(DwVfpRegister dst, Register src) { ...@@ -1054,6 +1054,7 @@ void MacroAssembler::VmovLow(DwVfpRegister dst, Register src) {
vmov(dst, VmovIndexLo, src); vmov(dst, VmovIndexLo, src);
} }
} }
void MacroAssembler::LslPair(Register dst_low, Register dst_high, void MacroAssembler::LslPair(Register dst_low, Register dst_high,
Register src_low, Register src_high, Register src_low, Register src_high,
Register scratch, Register shift) { Register scratch, Register shift) {
......
...@@ -3783,6 +3783,21 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { ...@@ -3783,6 +3783,21 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
e++; e++;
} }
set_q_register(Vd, reinterpret_cast<uint64_t*>(to)); set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
} else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) &&
(instr->Bit(4) == 0)) {
int vd = instr->VFPDRegValue(kDoublePrecision);
int vm = instr->VFPMRegValue(kDoublePrecision);
if (instr->Bit(6) == 0) {
// vswp Dd, Dm.
uint64_t dval, mval;
get_d_register(vd, &dval);
get_d_register(vm, &mval);
set_d_register(vm, &dval);
set_d_register(vd, &mval);
} else {
// Q register vswp unimplemented.
UNIMPLEMENTED();
}
} else { } else {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
......
...@@ -1912,33 +1912,31 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source, ...@@ -1912,33 +1912,31 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
__ vstr(temp_1, src); __ vstr(temp_1, src);
} else if (source->IsFPRegister()) { } else if (source->IsFPRegister()) {
LowDwVfpRegister temp = kScratchDoubleReg; LowDwVfpRegister temp = kScratchDoubleReg;
DwVfpRegister src = g.ToDoubleRegister(source); DwVfpRegister src = g.ToDoubleRegister(source);
if (destination->IsFPRegister()) { if (destination->IsFPRegister()) {
DwVfpRegister dst = g.ToDoubleRegister(destination); DwVfpRegister dst = g.ToDoubleRegister(destination);
__ Move(temp, src); __ vswp(src, dst);
__ Move(src, dst); } else {
__ Move(dst, temp); DCHECK(destination->IsFPStackSlot());
} else { MemOperand dst = g.ToMemOperand(destination);
DCHECK(destination->IsFPStackSlot()); __ Move(temp, src);
MemOperand dst = g.ToMemOperand(destination); __ vldr(src, dst);
__ Move(temp, src); __ vstr(temp, dst);
__ vldr(src, dst); }
__ vstr(temp, dst);
}
} else if (source->IsFPStackSlot()) { } else if (source->IsFPStackSlot()) {
DCHECK(destination->IsFPStackSlot()); DCHECK(destination->IsFPStackSlot());
Register temp_0 = kScratchReg; Register temp_0 = kScratchReg;
LowDwVfpRegister temp_1 = kScratchDoubleReg; LowDwVfpRegister temp_1 = kScratchDoubleReg;
MemOperand src0 = g.ToMemOperand(source); MemOperand src0 = g.ToMemOperand(source);
MemOperand dst0 = g.ToMemOperand(destination); MemOperand dst0 = g.ToMemOperand(destination);
MemOperand src1(src0.rn(), src0.offset() + kPointerSize); MemOperand src1(src0.rn(), src0.offset() + kPointerSize);
MemOperand dst1(dst0.rn(), dst0.offset() + kPointerSize); MemOperand dst1(dst0.rn(), dst0.offset() + kPointerSize);
__ vldr(temp_1, dst0); // Save destination in temp_1. __ vldr(temp_1, dst0); // Save destination in temp_1.
__ ldr(temp_0, src0); // Then use temp_0 to copy source to destination. __ ldr(temp_0, src0); // Then use temp_0 to copy source to destination.
__ str(temp_0, dst0); __ str(temp_0, dst0);
__ ldr(temp_0, src1); __ ldr(temp_0, src1);
__ str(temp_0, dst1); __ str(temp_0, dst1);
__ vstr(temp_1, src0); __ vstr(temp_1, src0);
} else { } else {
// No other combinations are possible. // No other combinations are possible.
UNREACHABLE(); UNREACHABLE();
......
...@@ -2889,6 +2889,55 @@ TEST(unaligned_stores) { ...@@ -2889,6 +2889,55 @@ TEST(unaligned_stores) {
} }
} }
TEST(vswp) {
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
Assembler assm(isolate, NULL, 0);
typedef struct {
double result0;
double result1;
double result2;
double result3;
} T;
T t;
__ vmov(d0, 1.0);
__ vmov(d1, -1.0);
__ vswp(d0, d1);
__ vstr(d0, r0, offsetof(T, result0));
__ vstr(d1, r0, offsetof(T, result1));
if (CpuFeatures::IsSupported(VFP32DREGS)) {
__ vmov(d30, 1.0);
__ vmov(d31, -1.0);
__ vswp(d30, d31);
__ vstr(d30, r0, offsetof(T, result2));
__ vstr(d31, r0, offsetof(T, result3));
}
__ bx(lr);
CodeDesc desc;
assm.GetCode(&desc);
Handle<Code> code = isolate->factory()->NewCode(
desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
#ifdef DEBUG
OFStream os(stdout);
code->Print(os);
#endif
F3 f = FUNCTION_CAST<F3>(code->entry());
Object* dummy = CALL_GENERATED_CODE(isolate, f, &t, 0, 0, 0, 0);
USE(dummy);
CHECK_EQ(-1.0, t.result0);
CHECK_EQ(1.0, t.result1);
if (CpuFeatures::IsSupported(VFP32DREGS)) {
CHECK_EQ(-1.0, t.result2);
CHECK_EQ(1.0, t.result3);
}
}
TEST(regress4292_b) { TEST(regress4292_b) {
CcTest::InitializeVM(); CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate(); Isolate* isolate = CcTest::i_isolate();
......
...@@ -932,6 +932,10 @@ TEST(Neon) { ...@@ -932,6 +932,10 @@ TEST(Neon) {
"f3886a11 vmovl.u8 q3, d1"); "f3886a11 vmovl.u8 q3, d1");
COMPARE(vmovl(NeonU8, q4, d2), COMPARE(vmovl(NeonU8, q4, d2),
"f3888a12 vmovl.u8 q4, d2"); "f3888a12 vmovl.u8 q4, d2");
COMPARE(vswp(d0, d31),
"f3b2002f vswp d0, d31");
COMPARE(vswp(d16, d14),
"f3f2000e vswp d16, d14");
} }
VERIFY_RUN(); VERIFY_RUN();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment