Commit c0637c1f authored by martyn.capewell's avatar martyn.capewell Committed by Commit bot

Reland of [turbofan] ARM: Implement vswp and use in gap resolver

Reason for revert:
Breaks g++ build.

Original issue's description:
> [turbofan] ARM: Implement vswp and use in gap resolver
>
> Use vswp to switch double-precision registers in the gap resolver, with fall
> back temp register-based code if NEON is not available.
>
> BUG=
>
> Committed: https://crrev.com/2837c2e65a2ee5b9fc610f30ce1215f52323ecbd
> Cr-Commit-Position: refs/heads/master@{#39209}

BUG=

Review-Url: https://codereview.chromium.org/2314043002
Cr-Commit-Position: refs/heads/master@{#39264}
parent 26c5b8fa
......@@ -3905,6 +3905,27 @@ void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
(dt & NeonDataTypeSizeMask)*B19 | vd*B12 | 0xA*B8 | m*B5 | B4 | vm);
}
void Assembler::vswp(DwVfpRegister srcdst0, DwVfpRegister srcdst1) {
DCHECK(!srcdst0.is(kScratchDoubleReg));
DCHECK(!srcdst1.is(kScratchDoubleReg));
if (srcdst0.is(srcdst1)) return; // Swapping aliased registers emits nothing.
if (CpuFeatures::IsSupported(NEON)) {
// Instruction details available in ARM DDI 0406C.b, A8.8.418.
// 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) |
// Vd(15-12) | 000000(11-6) | M(5) | 0(4) | Vm(3-0)
int vd, d;
srcdst0.split_code(&vd, &d);
int vm, m;
srcdst1.split_code(&vm, &m);
emit(0xFU * B28 | 7 * B23 | d * B22 | 0x32 * B16 | vd * B12 | m * B5 | vm);
} else {
vmov(kScratchDoubleReg, srcdst0);
vmov(srcdst0, srcdst1);
vmov(srcdst1, kScratchDoubleReg);
}
}
// Pseudo instructions.
void Assembler::nop(int type) {
......
......@@ -1303,8 +1303,8 @@ class Assembler : public AssemblerBase {
const Condition cond = al);
// Support for NEON.
// All these APIs support D0 to D31 and Q0 to Q15.
// All these APIs support D0 to D31 and Q0 to Q15.
void vld1(NeonSize size,
const NeonListOperand& dst,
const NeonMemOperand& src);
......@@ -1313,6 +1313,9 @@ class Assembler : public AssemblerBase {
const NeonMemOperand& dst);
void vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src);
// Currently, vswp supports only D0 to D31.
void vswp(DwVfpRegister srcdst0, DwVfpRegister srcdst1);
// Pseudo instructions
// Different nop operations are used by the code generator to detect certain
......
......@@ -1825,6 +1825,13 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
int imm3 = instr->Bits(21, 19);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmovl.u%d q%d, d%d", imm3*8, Vd, Vm);
} else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) &&
(instr->Bit(4) == 0)) {
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
char rtype = (instr->Bit(6) == 0) ? 'd' : 'q';
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vswp %c%d, %c%d", rtype, Vd, rtype, Vm);
} else {
Unknown(instr);
}
......
......@@ -1054,6 +1054,7 @@ void MacroAssembler::VmovLow(DwVfpRegister dst, Register src) {
vmov(dst, VmovIndexLo, src);
}
}
void MacroAssembler::LslPair(Register dst_low, Register dst_high,
Register src_low, Register src_high,
Register scratch, Register shift) {
......
......@@ -3783,6 +3783,21 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
e++;
}
set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
} else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) &&
(instr->Bit(4) == 0)) {
int vd = instr->VFPDRegValue(kDoublePrecision);
int vm = instr->VFPMRegValue(kDoublePrecision);
if (instr->Bit(6) == 0) {
// vswp Dd, Dm.
uint64_t dval, mval;
get_d_register(vd, &dval);
get_d_register(vm, &mval);
set_d_register(vm, &dval);
set_d_register(vd, &mval);
} else {
// Q register vswp unimplemented.
UNIMPLEMENTED();
}
} else {
UNIMPLEMENTED();
}
......
......@@ -1912,33 +1912,31 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
__ vstr(temp_1, src);
} else if (source->IsFPRegister()) {
LowDwVfpRegister temp = kScratchDoubleReg;
DwVfpRegister src = g.ToDoubleRegister(source);
if (destination->IsFPRegister()) {
DwVfpRegister dst = g.ToDoubleRegister(destination);
__ Move(temp, src);
__ Move(src, dst);
__ Move(dst, temp);
} else {
DCHECK(destination->IsFPStackSlot());
MemOperand dst = g.ToMemOperand(destination);
__ Move(temp, src);
__ vldr(src, dst);
__ vstr(temp, dst);
}
DwVfpRegister src = g.ToDoubleRegister(source);
if (destination->IsFPRegister()) {
DwVfpRegister dst = g.ToDoubleRegister(destination);
__ vswp(src, dst);
} else {
DCHECK(destination->IsFPStackSlot());
MemOperand dst = g.ToMemOperand(destination);
__ Move(temp, src);
__ vldr(src, dst);
__ vstr(temp, dst);
}
} else if (source->IsFPStackSlot()) {
DCHECK(destination->IsFPStackSlot());
Register temp_0 = kScratchReg;
LowDwVfpRegister temp_1 = kScratchDoubleReg;
MemOperand src0 = g.ToMemOperand(source);
MemOperand dst0 = g.ToMemOperand(destination);
MemOperand src1(src0.rn(), src0.offset() + kPointerSize);
MemOperand dst1(dst0.rn(), dst0.offset() + kPointerSize);
__ vldr(temp_1, dst0); // Save destination in temp_1.
__ ldr(temp_0, src0); // Then use temp_0 to copy source to destination.
__ str(temp_0, dst0);
__ ldr(temp_0, src1);
__ str(temp_0, dst1);
__ vstr(temp_1, src0);
MemOperand src1(src0.rn(), src0.offset() + kPointerSize);
MemOperand dst1(dst0.rn(), dst0.offset() + kPointerSize);
__ vldr(temp_1, dst0); // Save destination in temp_1.
__ ldr(temp_0, src0); // Then use temp_0 to copy source to destination.
__ str(temp_0, dst0);
__ ldr(temp_0, src1);
__ str(temp_0, dst1);
__ vstr(temp_1, src0);
} else {
// No other combinations are possible.
UNREACHABLE();
......
......@@ -2889,6 +2889,55 @@ TEST(unaligned_stores) {
}
}
TEST(vswp) {
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
Assembler assm(isolate, NULL, 0);
typedef struct {
double result0;
double result1;
double result2;
double result3;
} T;
T t;
__ vmov(d0, 1.0);
__ vmov(d1, -1.0);
__ vswp(d0, d1);
__ vstr(d0, r0, offsetof(T, result0));
__ vstr(d1, r0, offsetof(T, result1));
if (CpuFeatures::IsSupported(VFP32DREGS)) {
__ vmov(d30, 1.0);
__ vmov(d31, -1.0);
__ vswp(d30, d31);
__ vstr(d30, r0, offsetof(T, result2));
__ vstr(d31, r0, offsetof(T, result3));
}
__ bx(lr);
CodeDesc desc;
assm.GetCode(&desc);
Handle<Code> code = isolate->factory()->NewCode(
desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
#ifdef DEBUG
OFStream os(stdout);
code->Print(os);
#endif
F3 f = FUNCTION_CAST<F3>(code->entry());
Object* dummy = CALL_GENERATED_CODE(isolate, f, &t, 0, 0, 0, 0);
USE(dummy);
CHECK_EQ(-1.0, t.result0);
CHECK_EQ(1.0, t.result1);
if (CpuFeatures::IsSupported(VFP32DREGS)) {
CHECK_EQ(-1.0, t.result2);
CHECK_EQ(1.0, t.result3);
}
}
TEST(regress4292_b) {
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
......
......@@ -932,6 +932,10 @@ TEST(Neon) {
"f3886a11 vmovl.u8 q3, d1");
COMPARE(vmovl(NeonU8, q4, d2),
"f3888a12 vmovl.u8 q4, d2");
COMPARE(vswp(d0, d31),
"f3b2002f vswp d0, d31");
COMPARE(vswp(d16, d14),
"f3f2000e vswp d16, d14");
}
VERIFY_RUN();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment