Commit c710e658 authored by Andreas Haas's avatar Andreas Haas Committed by Commit Bot

[arm64][turbofan] Implement on-stack returns.

This is the implementation of crrev.com/c/766371 for arm64.

Original description:

Add the ability to return (multiple) return values on the stack:

- Extend stack frames with a new buffer region for return slots.
  This region is located at the end of a caller's frame such that
  its slots can be indexed as caller frame slots in a callee
  (located beyond its parameters) and assigned return values.
- Adjust stack frame constructon and deconstruction accordingly.
- Extend linkage computation to support register plus stack returns.
- Reserve return slots in caller frame when respective calls occur.
- Introduce and generate architecture instructions ('peek') for
  reading back results from return slots in the caller.
- Aggressive tests.
- Some minor clean-up.

R=v8-arm-ports@googlegroups.com

Change-Id: I6e344a23f359861c9a1ff5a6511651c2176ce9a8
Reviewed-on: https://chromium-review.googlesource.com/842545Reviewed-by: 's avatarBen Titzer <titzer@chromium.org>
Commit-Queue: Andreas Haas <ahaas@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50585}
parent 997d5a6a
......@@ -1224,6 +1224,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kArm64Peek: {
int reverse_slot = i.InputInt32(0);
int offset =
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
if (instr->OutputAt(0)->IsFPRegister()) {
LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
if (op->representation() == MachineRepresentation::kFloat64) {
__ Ldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
} else {
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
__ Ldr(i.OutputFloatRegister(), MemOperand(fp, offset));
}
} else {
__ Ldr(i.OutputRegister(), MemOperand(fp, offset));
}
break;
}
case kArm64Clz:
__ Clz(i.OutputRegister64(), i.InputRegister64(0));
break;
......@@ -2287,6 +2304,9 @@ void CodeGenerator::AssembleConstructFrame() {
descriptor->CalleeSavedRegisters());
CPURegList saves_fp = CPURegList(CPURegister::kVRegister, kDRegSizeInBits,
descriptor->CalleeSavedFPRegisters());
// The number of slots for returns has to be even to ensure the correct stack
// alignment.
const int returns = RoundUp(frame()->GetReturnSlotCount(), 2);
if (frame_access_state()->has_frame()) {
// Link the frame
......@@ -2358,6 +2378,7 @@ void CodeGenerator::AssembleConstructFrame() {
// Skip callee-saved slots, which are pushed below.
shrink_slots -= saves.Count();
shrink_slots -= saves_fp.Count();
shrink_slots -= returns;
// Build remainder of frame, including accounting for and filling-in
// frame-specific header information, i.e. claiming the extra slot that
......@@ -2400,11 +2421,21 @@ void CodeGenerator::AssembleConstructFrame() {
// CPURegList::GetCalleeSaved(): x30 is missing.
// DCHECK(saves.list() == CPURegList::GetCalleeSaved().list());
__ PushCPURegList(saves);
if (returns != 0) {
__ Claim(returns);
}
}
void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
CallDescriptor* descriptor = linkage()->GetIncomingDescriptor();
const int returns = RoundUp(frame()->GetReturnSlotCount(), 2);
if (returns != 0) {
__ Drop(returns);
}
// Restore registers.
CPURegList saves = CPURegList(CPURegister::kRegister, kXRegSizeInBits,
descriptor->CalleeSavedRegisters());
......
......@@ -82,6 +82,7 @@ namespace compiler {
V(Arm64Claim) \
V(Arm64Poke) \
V(Arm64PokePair) \
V(Arm64Peek) \
V(Arm64Float32Cmp) \
V(Arm64Float32Add) \
V(Arm64Float32Sub) \
......
......@@ -292,6 +292,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64Ldrsw:
case kArm64LdrW:
case kArm64Ldr:
case kArm64Peek:
return kIsLoadOperation;
case kArm64Float64Mod: // This opcode will call a C Function which can
......
......@@ -1721,7 +1721,25 @@ void InstructionSelector::EmitPrepareArguments(
void InstructionSelector::EmitPrepareResults(ZoneVector<PushParameter>* results,
const CallDescriptor* descriptor,
Node* node) {
// TODO(ahaas): Port.
Arm64OperandGenerator g(this);
int reverse_slot = 0;
for (PushParameter output : *results) {
if (!output.location.IsCallerFrameSlot()) continue;
reverse_slot += output.location.GetSizeInPointers();
// Skip any alignment holes in nodes.
if (output.node == nullptr) continue;
DCHECK(!descriptor->IsCFunctionCall());
if (output.location.GetType() == MachineType::Float32()) {
MarkAsFloat32(output.node);
} else if (output.location.GetType() == MachineType::Float64()) {
MarkAsFloat64(output.node);
}
Emit(kArm64Peek, g.DefineAsRegister(output.node),
g.UseImmediate(reverse_slot));
}
}
bool InstructionSelector::IsTailCallAddressImmediate() { return false; }
......
......@@ -22,6 +22,13 @@ Frame::Frame(int fixed_frame_size_in_slots)
int Frame::AlignFrame(int alignment) {
int alignment_slots = alignment / kPointerSize;
// We have to align return slots separately, because they are claimed
// separately on the stack.
int return_delta =
alignment_slots - (return_slot_count_ & (alignment_slots - 1));
if (return_delta != alignment_slots) {
frame_slot_count_ += return_delta;
}
int delta = alignment_slots - (frame_slot_count_ & (alignment_slots - 1));
if (delta != alignment_slots) {
frame_slot_count_ += delta;
......
......@@ -169,7 +169,7 @@
##############################################################################
# TODO(ahaas): Port multiple return values to ARM, MIPS, S390 and PPC
['arch == arm64 or arch == s390 or arch == s390x or arch == ppc or arch == ppc64', {
['arch == s390 or arch == s390x or arch == ppc or arch == ppc64', {
'test-multiple-return/*': [SKIP],
}],
......
......@@ -271,12 +271,15 @@ TEST_MULTI(Float64, MachineType::Float64())
#undef TEST_MULTI
void ReturnLastValue(MachineType type) {
for (int unused_stack_slots = 0; unused_stack_slots <= 2;
++unused_stack_slots) {
v8::internal::AccountingAllocator allocator;
Zone zone(&allocator, ZONE_NAME);
// Let 2 returns be on the stack.
const int return_count = num_registers(type) + 2;
// Let {unused_stack_slots + 1} returns be on the stack.
const int return_count = num_registers(type) + unused_stack_slots + 1;
CallDescriptor* desc = CreateMonoCallDescriptor(&zone, return_count, 0, type);
CallDescriptor* desc =
CreateMonoCallDescriptor(&zone, return_count, 0, type);
HandleAndZoneScope handles;
RawMachineAssembler m(handles.main_isolate(),
......@@ -292,7 +295,8 @@ void ReturnLastValue(MachineType type) {
m.Return(return_count, returns.get());
CompilationInfo info(ArrayVector("testing"), handles.main_zone(), Code::STUB);
CompilationInfo info(ArrayVector("testing"), handles.main_zone(),
Code::STUB);
Handle<Code> code = Pipeline::GenerateCodeForTesting(
&info, handles.main_isolate(), desc, m.graph(), m.Export());
......@@ -307,6 +311,7 @@ void ReturnLastValue(MachineType type) {
mt, type, mt.AddNode(mt.common()->Projection(return_count - 1), call)));
CHECK_EQ(expect, mt.Call());
}
}
TEST(ReturnLastValueInt32) { ReturnLastValue(MachineType::Int32()); }
......@@ -316,6 +321,65 @@ TEST(ReturnLastValueInt64) { ReturnLastValue(MachineType::Int64()); }
TEST(ReturnLastValueFloat32) { ReturnLastValue(MachineType::Float32()); }
TEST(ReturnLastValueFloat64) { ReturnLastValue(MachineType::Float64()); }
void ReturnSumOfReturns(MachineType type) {
for (int unused_stack_slots = 0; unused_stack_slots <= 2;
++unused_stack_slots) {
v8::internal::AccountingAllocator allocator;
Zone zone(&allocator, ZONE_NAME);
// Let {unused_stack_slots + 1} returns be on the stack.
const int return_count = num_registers(type) + unused_stack_slots + 1;
CallDescriptor* desc =
CreateMonoCallDescriptor(&zone, return_count, 0, type);
HandleAndZoneScope handles;
RawMachineAssembler m(handles.main_isolate(),
new (handles.main_zone()) Graph(handles.main_zone()),
desc, MachineType::PointerRepresentation(),
InstructionSelector::SupportedMachineOperatorFlags());
std::unique_ptr<Node* []> returns(new Node*[return_count]);
for (int i = 0; i < return_count; ++i) {
returns[i] = Constant(m, type, i);
}
m.Return(return_count, returns.get());
CompilationInfo info(ArrayVector("testing"), handles.main_zone(),
Code::STUB);
Handle<Code> code = Pipeline::GenerateCodeForTesting(
&info, handles.main_isolate(), desc, m.graph(), m.Export());
// Generate caller.
RawMachineAssemblerTester<int32_t> mt;
Node* code_node = mt.HeapConstant(code);
Node* call = mt.AddNode(mt.common()->Call(desc), 1, &code_node);
uint32_t expect = 0;
Node* result = mt.Int32Constant(0);
for (int i = 0; i < return_count; ++i) {
expect += i;
result = mt.Int32Add(
result,
ToInt32(mt, type, mt.AddNode(mt.common()->Projection(i), call)));
}
mt.Return(result);
CHECK_EQ(expect, mt.Call());
}
}
TEST(ReturnSumOfReturnsInt32) { ReturnSumOfReturns(MachineType::Int32()); }
#if (!V8_TARGET_ARCH_32_BIT)
TEST(ReturnSumOfReturnsInt64) { ReturnSumOfReturns(MachineType::Int64()); }
#endif
TEST(ReturnSumOfReturnsFloat32) { ReturnSumOfReturns(MachineType::Float32()); }
TEST(ReturnSumOfReturnsFloat64) { ReturnSumOfReturns(MachineType::Float64()); }
} // namespace compiler
} // namespace internal
} // namespace v8
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment