Commit bd732f7d authored by Andreas Haas's avatar Andreas Haas Committed by Commit Bot

Reland [turbofan] Implement on-stack returns (Intel)

The original CL introduced a test which uses a random number generator.
I disable the test for now, which is okay because this CL adds to a
work-in-progress feature anyways, and I will fix the problem in another
CL.

Original description:
Add the ability to return (multiple) return values on the stack:

- Extend stack frames with a new buffer region for return slots.
  This region is located at the end of a caller's frame such that
  its slots can be indexed as caller frame slots in a callee
  (located beyond its parameters) and assigned return values.
- Adjust stack frame constructon and deconstruction accordingly.
- Extend linkage computation to support register plus stack returns.
- Reserve return slots in caller frame when respective calls occur.
- Introduce and generate architecture instructions ('peek') for
  reading back results from return slots in the caller.
- Aggressive tests.
- Some minor clean-up.

So far, only ia32 and x64 are implemented.

Change-Id: I8b03fc4e53946daaa0e14a34603f4824a04fad7e
Reviewed-on: https://chromium-review.googlesource.com/819557Reviewed-by: 's avatarBen Titzer <titzer@chromium.org>
Commit-Queue: Andreas Haas <ahaas@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50031}
parent f5a12d86
......@@ -1596,22 +1596,27 @@ void InstructionSelector::EmitPrepareArguments(
// Poke any stack arguments.
for (size_t n = 0; n < arguments->size(); ++n) {
PushParameter input = (*arguments)[n];
if (input.node()) {
if (input.node) {
int slot = static_cast<int>(n);
Emit(kArmPoke | MiscField::encode(slot), g.NoOutput(),
g.UseRegister(input.node()));
g.UseRegister(input.node));
}
}
} else {
// Push any stack arguments.
for (PushParameter input : base::Reversed(*arguments)) {
// Skip any alignment holes in pushed nodes.
if (input.node() == nullptr) continue;
Emit(kArmPush, g.NoOutput(), g.UseRegister(input.node()));
if (input.node == nullptr) continue;
Emit(kArmPush, g.NoOutput(), g.UseRegister(input.node));
}
}
}
void InstructionSelector::EmitPrepareResults(ZoneVector<PushParameter>* results,
const CallDescriptor* descriptor,
Node* node) {
// TODO(ahaas): Port.
}
bool InstructionSelector::IsTailCallAddressImmediate() { return false; }
......
......@@ -1805,7 +1805,7 @@ void InstructionSelector::EmitPrepareArguments(
// Poke the arguments into the stack.
ArchOpcode poke = to_native_stack ? kArm64PokeCSP : kArm64PokeJSSP;
while (slot >= 0) {
Emit(poke, g.NoOutput(), g.UseRegister((*arguments)[slot].node()),
Emit(poke, g.NoOutput(), g.UseRegister((*arguments)[slot].node),
g.TempImmediate(slot));
slot--;
// TODO(ahaas): Poke arguments in pairs if two subsequent arguments have the
......@@ -1816,6 +1816,11 @@ void InstructionSelector::EmitPrepareArguments(
}
}
void InstructionSelector::EmitPrepareResults(ZoneVector<PushParameter>* results,
const CallDescriptor* descriptor,
Node* node) {
// TODO(ahaas): Port.
}
bool InstructionSelector::IsTailCallAddressImmediate() { return false; }
......
......@@ -13,8 +13,10 @@ namespace internal {
namespace compiler {
Frame::Frame(int fixed_frame_size_in_slots)
: frame_slot_count_(fixed_frame_size_in_slots),
: fixed_slot_count_(fixed_frame_size_in_slots),
frame_slot_count_(fixed_frame_size_in_slots),
spill_slot_count_(0),
return_slot_count_(0),
allocated_registers_(nullptr),
allocated_double_registers_(nullptr) {}
......
......@@ -22,7 +22,7 @@ class CallDescriptor;
// into them. Mutable state associated with the frame is stored separately in
// FrameAccessState.
//
// Frames are divided up into three regions.
// Frames are divided up into four regions.
// - The first is the fixed header, which always has a constant size and can be
// predicted before code generation begins depending on the type of code being
// generated.
......@@ -33,11 +33,15 @@ class CallDescriptor;
// reserved after register allocation, since its size can only be precisely
// determined after register allocation once the number of used callee-saved
// register is certain.
// - The fourth region is a scratch area for return values from other functions
// called, if multiple returns cannot all be passed in registers. This region
// Must be last in a stack frame, so that it is positioned immediately below
// the stack frame of a callee to store to.
//
// The frame region immediately below the fixed header contains spill slots
// starting at slot 4 for JSFunctions. The callee-saved frame region below that
// starts at 4+spill_slot_count_. Callee stack slots corresponding to
// parameters are accessible through negative slot ids.
// starts at 4+spill_slot_count_. Callee stack slots correspond to
// parameters that are accessible through negative slot ids.
//
// Every slot of a caller or callee frame is accessible by the register
// allocator and gap resolver with a SpillSlotOperand containing its
......@@ -73,7 +77,13 @@ class CallDescriptor;
// |- - - - - - - - -| | |
// | ... | Callee-saved |
// |- - - - - - - - -| | |
// m+r+3 | callee-saved r | v v
// m+r+3 | callee-saved r | v |
// +-----------------+---- |
// m+r+4 | return 0 | ^ |
// |- - - - - - - - -| | |
// | ... | Return |
// |- - - - - - - - -| | |
// | return q-1 | v v
// -----+-----------------+----- <-- stack ptr -------------
//
class Frame : public ZoneObject {
......@@ -81,8 +91,9 @@ class Frame : public ZoneObject {
explicit Frame(int fixed_frame_size_in_slots);
inline int GetTotalFrameSlotCount() const { return frame_slot_count_; }
inline int GetFixedSlotCount() const { return fixed_slot_count_; }
inline int GetSpillSlotCount() const { return spill_slot_count_; }
inline int GetReturnSlotCount() const { return return_slot_count_; }
void SetAllocatedRegisters(BitVector* regs) {
DCHECK_NULL(allocated_registers_);
......@@ -112,19 +123,25 @@ class Frame : public ZoneObject {
}
int AllocateSpillSlot(int width, int alignment = 0) {
DCHECK_EQ(frame_slot_count_,
fixed_slot_count_ + spill_slot_count_ + return_slot_count_);
int frame_slot_count_before = frame_slot_count_;
if (alignment <= kPointerSize) {
AllocateAlignedFrameSlots(width);
} else {
// We need to allocate more place for spill slot
// in case we need an aligned spill slot to be
// able to properly align start of spill slot
// and still have enough place to hold all the
// data
AllocateAlignedFrameSlots(width + alignment - kPointerSize);
if (alignment > kPointerSize) {
// Slots are pointer sized, so alignment greater than a pointer size
// requires allocating additional slots.
width += alignment - kPointerSize;
}
AllocateAlignedFrameSlots(width);
spill_slot_count_ += frame_slot_count_ - frame_slot_count_before;
return frame_slot_count_ - 1;
return frame_slot_count_ - return_slot_count_ - 1;
}
void EnsureReturnSlots(int count) {
if (count > return_slot_count_) {
count -= return_slot_count_;
frame_slot_count_ += count;
return_slot_count_ += count;
}
}
int AlignFrame(int alignment = kDoubleSize);
......@@ -152,8 +169,10 @@ class Frame : public ZoneObject {
}
private:
int fixed_slot_count_;
int frame_slot_count_;
int spill_slot_count_;
int return_slot_count_;
BitVector* allocated_registers_;
BitVector* allocated_double_registers_;
......
......@@ -53,7 +53,7 @@ MoveOperands* Split(MoveOperands* move, MachineRepresentation smaller_rep,
src_index = src_loc.register_code() * aliases;
} else {
src_index = src_loc.index();
// For operands that occuply multiple slots, the index refers to the last
// For operands that occupy multiple slots, the index refers to the last
// slot. On little-endian architectures, we start at the high slot and use a
// negative step so that register-to-slot moves are in the correct order.
src_step = -slot_size;
......
......@@ -2006,7 +2006,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
case kIA32Poke: {
int const slot = MiscField::decode(instr->opcode());
int slot = MiscField::decode(instr->opcode());
if (HasImmediateInput(instr, 0)) {
__ mov(Operand(esp, slot * kPointerSize), i.InputImmediate(0));
} else {
......@@ -2014,6 +2014,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kIA32PeekFloat32: {
int reverse_slot = MiscField::decode(instr->opcode());
int offset =
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
__ movss(i.OutputFloatRegister(), Operand(ebp, offset));
break;
}
case kIA32PeekFloat64: {
int reverse_slot = MiscField::decode(instr->opcode());
int offset =
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
__ movsd(i.OutputDoubleRegister(), Operand(ebp, offset));
break;
}
case kIA32Peek: {
int reverse_slot = MiscField::decode(instr->opcode());
int offset =
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
__ mov(i.OutputRegister(), Operand(ebp, offset));
break;
}
case kSSEF32x4Splat: {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
XMMRegister dst = i.OutputSimd128Register();
......@@ -3493,8 +3514,9 @@ void CodeGenerator::AssembleConstructFrame() {
__ bind(&done);
}
// Skip callee-saved slots, which are pushed below.
// Skip callee-saved and return slots, which are created below.
shrink_slots -= base::bits::CountPopulation(saves);
shrink_slots -= frame()->GetReturnSlotCount();
if (shrink_slots > 0) {
__ sub(esp, Immediate(shrink_slots * kPointerSize));
}
......@@ -3506,6 +3528,11 @@ void CodeGenerator::AssembleConstructFrame() {
if (((1 << i) & saves)) __ push(Register::from_code(i));
}
}
// Allocate return slots (located after callee-saved).
if (frame()->GetReturnSlotCount() > 0) {
__ sub(esp, Immediate(frame()->GetReturnSlotCount() * kPointerSize));
}
}
void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
......@@ -3514,6 +3541,10 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
const RegList saves = descriptor->CalleeSavedRegisters();
// Restore registers.
if (saves != 0) {
const int returns = frame()->GetReturnSlotCount();
if (returns != 0) {
__ add(esp, Immediate(returns * kPointerSize));
}
for (int i = 0; i < Register::kNumRegisters; i++) {
if (!((1 << i) & saves)) continue;
__ pop(Register::from_code(i));
......
......@@ -111,6 +111,9 @@ namespace compiler {
V(IA32PushFloat32) \
V(IA32PushFloat64) \
V(IA32Poke) \
V(IA32Peek) \
V(IA32PeekFloat32) \
V(IA32PeekFloat64) \
V(IA32StackCheck) \
V(SSEF32x4Splat) \
V(AVXF32x4Splat) \
......
......@@ -263,6 +263,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
case kIA32StackCheck:
case kIA32Peek:
case kIA32PeekFloat32:
case kIA32PeekFloat64:
return kIsLoadOperation;
case kIA32Push:
......
......@@ -1114,11 +1114,11 @@ void InstructionSelector::EmitPrepareArguments(
// Poke any stack arguments.
for (size_t n = 0; n < arguments->size(); ++n) {
PushParameter input = (*arguments)[n];
if (input.node()) {
if (input.node) {
int const slot = static_cast<int>(n);
InstructionOperand value = g.CanBeImmediate(node)
? g.UseImmediate(input.node())
: g.UseRegister(input.node());
? g.UseImmediate(input.node)
: g.UseRegister(input.node);
Emit(kIA32Poke | MiscField::encode(slot), g.NoOutput(), value);
}
}
......@@ -1127,28 +1127,27 @@ void InstructionSelector::EmitPrepareArguments(
int effect_level = GetEffectLevel(node);
for (PushParameter input : base::Reversed(*arguments)) {
// Skip any alignment holes in pushed nodes.
Node* input_node = input.node();
if (input.node() == nullptr) continue;
if (g.CanBeMemoryOperand(kIA32Push, node, input_node, effect_level)) {
if (input.node == nullptr) continue;
if (g.CanBeMemoryOperand(kIA32Push, node, input.node, effect_level)) {
InstructionOperand outputs[1];
InstructionOperand inputs[4];
size_t input_count = 0;
InstructionCode opcode = kIA32Push;
AddressingMode mode = g.GetEffectiveAddressMemoryOperand(
input_node, inputs, &input_count);
input.node, inputs, &input_count);
opcode |= AddressingModeField::encode(mode);
Emit(opcode, 0, outputs, input_count, inputs);
} else {
InstructionOperand value =
g.CanBeImmediate(input.node())
? g.UseImmediate(input.node())
g.CanBeImmediate(input.node)
? g.UseImmediate(input.node)
: IsSupported(ATOM) ||
sequence()->IsFP(GetVirtualRegister(input.node()))
? g.UseRegister(input.node())
: g.Use(input.node());
if (input.type() == MachineType::Float32()) {
sequence()->IsFP(GetVirtualRegister(input.node))
? g.UseRegister(input.node)
: g.Use(input.node);
if (input.location.GetType() == MachineType::Float32()) {
Emit(kIA32PushFloat32, g.NoOutput(), value);
} else if (input.type() == MachineType::Float64()) {
} else if (input.location.GetType() == MachineType::Float64()) {
Emit(kIA32PushFloat64, g.NoOutput(), value);
} else {
Emit(kIA32Push, g.NoOutput(), value);
......@@ -1158,6 +1157,33 @@ void InstructionSelector::EmitPrepareArguments(
}
}
void InstructionSelector::EmitPrepareResults(ZoneVector<PushParameter>* results,
const CallDescriptor* descriptor,
Node* node) {
IA32OperandGenerator g(this);
int reverse_slot = 0;
for (PushParameter output : *results) {
if (!output.location.IsCallerFrameSlot()) continue;
reverse_slot += output.location.GetSizeInPointers();
// Skip any alignment holes in nodes.
if (output.node == nullptr) continue;
DCHECK(!descriptor->IsCFunctionCall());
if (output.location.GetType() == MachineType::Float32()) {
MarkAsFloat32(output.node);
InstructionOperand result = g.DefineAsRegister(output.node);
Emit(kIA32PeekFloat32 | MiscField::encode(reverse_slot), result);
} else if (output.location.GetType() == MachineType::Float64()) {
MarkAsFloat64(output.node);
InstructionOperand result = g.DefineAsRegister(output.node);
Emit(kIA32PeekFloat64 | MiscField::encode(reverse_slot - 1), result);
} else {
InstructionOperand result = g.DefineAsRegister(output.node);
Emit(kIA32Peek | MiscField::encode(reverse_slot), result);
}
}
}
bool InstructionSelector::IsTailCallAddressImmediate() { return true; }
......
......@@ -668,7 +668,7 @@ struct CallBuffer {
const CallDescriptor* descriptor;
FrameStateDescriptor* frame_state_descriptor;
NodeVector output_nodes;
ZoneVector<PushParameter> output_nodes;
InstructionOperandVector outputs;
InstructionOperandVector instruction_args;
ZoneVector<PushParameter> pushed_nodes;
......@@ -702,17 +702,28 @@ void InstructionSelector::InitializeCallBuffer(Node* call, CallBuffer* buffer,
if (buffer->descriptor->ReturnCount() > 0) {
// Collect the projections that represent multiple outputs from this call.
if (buffer->descriptor->ReturnCount() == 1) {
buffer->output_nodes.push_back(call);
PushParameter result = {call, buffer->descriptor->GetReturnLocation(0)};
buffer->output_nodes.push_back(result);
} else {
buffer->output_nodes.resize(buffer->descriptor->ReturnCount(), nullptr);
buffer->output_nodes.resize(buffer->descriptor->ReturnCount());
int stack_count = 0;
for (Edge const edge : call->use_edges()) {
if (!NodeProperties::IsValueEdge(edge)) continue;
DCHECK_EQ(IrOpcode::kProjection, edge.from()->opcode());
size_t const index = ProjectionIndexOf(edge.from()->op());
Node* node = edge.from();
DCHECK_EQ(IrOpcode::kProjection, node->opcode());
size_t const index = ProjectionIndexOf(node->op());
DCHECK_LT(index, buffer->output_nodes.size());
DCHECK(!buffer->output_nodes[index]);
buffer->output_nodes[index] = edge.from();
DCHECK(!buffer->output_nodes[index].node);
PushParameter result = {node,
buffer->descriptor->GetReturnLocation(index)};
buffer->output_nodes[index] = result;
if (result.location.IsCallerFrameSlot()) {
stack_count += result.location.GetSizeInPointers();
}
}
frame_->EnsureReturnSlots(stack_count);
}
// Filter out the outputs that aren't live because no projection uses them.
......@@ -722,22 +733,22 @@ void InstructionSelector::InitializeCallBuffer(Node* call, CallBuffer* buffer,
: buffer->frame_state_descriptor->state_combine()
.ConsumedOutputCount();
for (size_t i = 0; i < buffer->output_nodes.size(); i++) {
bool output_is_live = buffer->output_nodes[i] != nullptr ||
bool output_is_live = buffer->output_nodes[i].node != nullptr ||
i < outputs_needed_by_framestate;
if (output_is_live) {
MachineRepresentation rep =
buffer->descriptor->GetReturnType(static_cast<int>(i))
.representation();
LinkageLocation location =
buffer->descriptor->GetReturnLocation(static_cast<int>(i));
LinkageLocation location = buffer->output_nodes[i].location;
MachineRepresentation rep = location.GetType().representation();
Node* output = buffer->output_nodes[i];
Node* output = buffer->output_nodes[i].node;
InstructionOperand op = output == nullptr
? g.TempLocation(location)
: g.DefineAsLocation(output, location);
MarkAsRepresentation(rep, op);
buffer->outputs.push_back(op);
if (!UnallocatedOperand::cast(op).HasFixedSlotPolicy()) {
buffer->outputs.push_back(op);
buffer->output_nodes[i].node = nullptr;
}
}
}
}
......@@ -842,8 +853,8 @@ void InstructionSelector::InitializeCallBuffer(Node* call, CallBuffer* buffer,
if (static_cast<size_t>(stack_index) >= buffer->pushed_nodes.size()) {
buffer->pushed_nodes.resize(stack_index + 1);
}
PushParameter parameter(*iter, buffer->descriptor->GetInputType(index));
buffer->pushed_nodes[stack_index] = parameter;
PushParameter param = {*iter, location};
buffer->pushed_nodes[stack_index] = param;
pushed_count++;
} else {
buffer->instruction_args.push_back(op);
......@@ -2423,6 +2434,8 @@ void InstructionSelector::VisitCall(Node* node, BasicBlock* handler) {
&buffer.instruction_args.front());
if (instruction_selection_failed()) return;
call_instr->MarkAsCall();
EmitPrepareResults(&(buffer.output_nodes), descriptor, node);
}
void InstructionSelector::VisitCallWithCallerSavedRegisters(
......
......@@ -10,6 +10,7 @@
#include "src/compiler/common-operator.h"
#include "src/compiler/instruction-scheduler.h"
#include "src/compiler/instruction.h"
#include "src/compiler/linkage.h"
#include "src/compiler/machine-operator.h"
#include "src/compiler/node.h"
#include "src/globals.h"
......@@ -30,17 +31,13 @@ class StateObjectDeduplicator;
// This struct connects nodes of parameters which are going to be pushed on the
// call stack with their parameter index in the call descriptor of the callee.
class PushParameter {
public:
PushParameter() : node_(nullptr), type_(MachineType::None()) {}
PushParameter(Node* node, MachineType type) : node_(node), type_(type) {}
Node* node() const { return node_; }
MachineType type() const { return type_; }
struct PushParameter {
PushParameter(Node* n = nullptr,
LinkageLocation l = LinkageLocation::ForAnyRegister())
: node(n), location(l) {}
private:
Node* node_;
MachineType type_;
Node* node;
LinkageLocation location;
};
enum class FrameStateInputKind { kAny, kStackSlot };
......@@ -353,6 +350,8 @@ class V8_EXPORT_PRIVATE InstructionSelector final {
void EmitPrepareArguments(ZoneVector<compiler::PushParameter>* arguments,
const CallDescriptor* descriptor, Node* node);
void EmitPrepareResults(ZoneVector<compiler::PushParameter>* results,
const CallDescriptor* descriptor, Node* node);
void EmitIdentity(Node* node);
bool CanProduceSignalingNaN(Node* node);
......
......@@ -316,9 +316,10 @@ void Int64Lowering::LowerNode(Node* node) {
case IrOpcode::kTailCall: {
CallDescriptor* descriptor =
const_cast<CallDescriptor*>(CallDescriptorOf(node->op()));
if (DefaultLowering(node) ||
(descriptor->ReturnCount() == 1 &&
descriptor->GetReturnType(0) == MachineType::Int64())) {
bool returns_require_lowering =
GetReturnCountAfterLowering(descriptor) !=
static_cast<int>(descriptor->ReturnCount());
if (DefaultLowering(node) || returns_require_lowering) {
// Tail calls do not have return values, so adjusting the call
// descriptor is enough.
auto new_descriptor = GetI32WasmCallDescriptor(zone(), descriptor);
......
......@@ -197,12 +197,14 @@ class V8_EXPORT_PRIVATE CallDescriptor final
RegList callee_saved_registers,
RegList callee_saved_fp_registers, Flags flags,
const char* debug_name = "",
const RegList allocatable_registers = 0)
const RegList allocatable_registers = 0,
size_t stack_return_count = 0)
: kind_(kind),
target_type_(target_type),
target_loc_(target_loc),
location_sig_(location_sig),
stack_param_count_(stack_param_count),
stack_return_count_(stack_return_count),
properties_(properties),
callee_saved_registers_(callee_saved_registers),
callee_saved_fp_registers_(callee_saved_fp_registers),
......@@ -232,6 +234,9 @@ class V8_EXPORT_PRIVATE CallDescriptor final
// The number of stack parameters to the call.
size_t StackParameterCount() const { return stack_param_count_; }
// The number of stack return values from the call.
size_t StackReturnCount() const { return stack_return_count_; }
// The number of parameters to the JS function call.
size_t JSParameterCount() const {
DCHECK(IsJSFunctionCall());
......@@ -318,6 +323,7 @@ class V8_EXPORT_PRIVATE CallDescriptor final
const LinkageLocation target_loc_;
const LocationSignature* const location_sig_;
const size_t stack_param_count_;
const size_t stack_return_count_;
const Operator::Properties properties_;
const RegList callee_saved_registers_;
const RegList callee_saved_fp_registers_;
......
......@@ -1181,8 +1181,8 @@ void InstructionSelector::EmitPrepareArguments(
// Poke any stack arguments.
int slot = kCArgSlotCount;
for (PushParameter input : (*arguments)) {
if (input.node()) {
Emit(kMipsStoreToStackSlot, g.NoOutput(), g.UseRegister(input.node()),
if (input.node) {
Emit(kMipsStoreToStackSlot, g.NoOutput(), g.UseRegister(input.node),
g.TempImmediate(slot << kPointerSizeLog2));
++slot;
}
......@@ -1196,14 +1196,19 @@ void InstructionSelector::EmitPrepareArguments(
}
for (size_t n = 0; n < arguments->size(); ++n) {
PushParameter input = (*arguments)[n];
if (input.node()) {
Emit(kMipsStoreToStackSlot, g.NoOutput(), g.UseRegister(input.node()),
if (input.node) {
Emit(kMipsStoreToStackSlot, g.NoOutput(), g.UseRegister(input.node),
g.TempImmediate(n << kPointerSizeLog2));
}
}
}
}
void InstructionSelector::EmitPrepareResults(ZoneVector<PushParameter>* results,
const CallDescriptor* descriptor,
Node* node) {
// TODO(ahaas): Port.
}
bool InstructionSelector::IsTailCallAddressImmediate() { return false; }
......
......@@ -1676,7 +1676,7 @@ void InstructionSelector::EmitPrepareArguments(
// Poke any stack arguments.
int slot = kCArgSlotCount;
for (PushParameter input : (*arguments)) {
Emit(kMips64StoreToStackSlot, g.NoOutput(), g.UseRegister(input.node()),
Emit(kMips64StoreToStackSlot, g.NoOutput(), g.UseRegister(input.node),
g.TempImmediate(slot << kPointerSizeLog2));
++slot;
}
......@@ -1688,14 +1688,19 @@ void InstructionSelector::EmitPrepareArguments(
}
for (size_t n = 0; n < arguments->size(); ++n) {
PushParameter input = (*arguments)[n];
if (input.node()) {
Emit(kMips64StoreToStackSlot, g.NoOutput(), g.UseRegister(input.node()),
if (input.node) {
Emit(kMips64StoreToStackSlot, g.NoOutput(), g.UseRegister(input.node),
g.TempImmediate(static_cast<int>(n << kPointerSizeLog2)));
}
}
}
}
void InstructionSelector::EmitPrepareResults(ZoneVector<PushParameter>* results,
const CallDescriptor* descriptor,
Node* node) {
// TODO(ahaas): Port.
}
bool InstructionSelector::IsTailCallAddressImmediate() { return false; }
......
......@@ -134,7 +134,6 @@ void RawMachineAssembler::Return(Node* value) {
current_block_ = nullptr;
}
void RawMachineAssembler::Return(Node* v1, Node* v2) {
Node* values[] = {Int32Constant(0), v1, v2};
Node* ret = MakeNode(common()->Return(2), 3, values);
......@@ -142,7 +141,6 @@ void RawMachineAssembler::Return(Node* v1, Node* v2) {
current_block_ = nullptr;
}
void RawMachineAssembler::Return(Node* v1, Node* v2, Node* v3) {
Node* values[] = {Int32Constant(0), v1, v2, v3};
Node* ret = MakeNode(common()->Return(3), 4, values);
......@@ -150,6 +148,24 @@ void RawMachineAssembler::Return(Node* v1, Node* v2, Node* v3) {
current_block_ = nullptr;
}
void RawMachineAssembler::Return(Node* v1, Node* v2, Node* v3, Node* v4) {
Node* values[] = {Int32Constant(0), v1, v2, v3, v4};
Node* ret = MakeNode(common()->Return(4), 5, values);
schedule()->AddReturn(CurrentBlock(), ret);
current_block_ = nullptr;
}
void RawMachineAssembler::Return(int count, Node* vs[]) {
typedef Node* Node_ptr;
Node** values = new Node_ptr[count + 1];
values[0] = Int32Constant(0);
for (int i = 0; i < count; ++i) values[i + 1] = vs[i];
Node* ret = MakeNode(common()->Return(count), count + 1, values);
schedule()->AddReturn(CurrentBlock(), ret);
current_block_ = nullptr;
delete[] values;
}
void RawMachineAssembler::PopAndReturn(Node* pop, Node* value) {
Node* values[] = {pop, value};
Node* ret = MakeNode(common()->Return(1), 2, values);
......@@ -172,6 +188,14 @@ void RawMachineAssembler::PopAndReturn(Node* pop, Node* v1, Node* v2,
current_block_ = nullptr;
}
void RawMachineAssembler::PopAndReturn(Node* pop, Node* v1, Node* v2, Node* v3,
Node* v4) {
Node* values[] = {pop, v1, v2, v3, v4};
Node* ret = MakeNode(common()->Return(4), 5, values);
schedule()->AddReturn(CurrentBlock(), ret);
current_block_ = nullptr;
}
void RawMachineAssembler::DebugAbort(Node* message) {
AddNode(machine()->DebugAbort(), message);
}
......
......@@ -828,9 +828,12 @@ class V8_EXPORT_PRIVATE RawMachineAssembler {
void Return(Node* value);
void Return(Node* v1, Node* v2);
void Return(Node* v1, Node* v2, Node* v3);
void Return(Node* v1, Node* v2, Node* v3, Node* v4);
void Return(int count, Node* v[]);
void PopAndReturn(Node* pop, Node* value);
void PopAndReturn(Node* pop, Node* v1, Node* v2);
void PopAndReturn(Node* pop, Node* v1, Node* v2, Node* v3);
void PopAndReturn(Node* pop, Node* v1, Node* v2, Node* v3, Node* v4);
void Bind(RawMachineLabel* label);
void Deoptimize(Node* state);
void DebugAbort(Node* message);
......
This diff is collapsed.
......@@ -2231,7 +2231,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
case kX64Poke: {
int const slot = MiscField::decode(instr->opcode());
int slot = MiscField::decode(instr->opcode());
if (HasImmediateInput(instr, 0)) {
__ movq(Operand(rsp, slot * kPointerSize), i.InputImmediate(0));
} else {
......@@ -2239,6 +2239,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kX64PeekFloat32: {
int reverse_slot = MiscField::decode(instr->opcode());
int offset =
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
__ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
break;
}
case kX64PeekFloat64: {
int reverse_slot = MiscField::decode(instr->opcode());
int offset =
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
__ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
break;
}
case kX64Peek: {
int reverse_slot = MiscField::decode(instr->opcode());
int offset =
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
__ movq(i.OutputRegister(), Operand(rbp, offset));
break;
}
// TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
case kX64F32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
......@@ -3206,9 +3227,10 @@ void CodeGenerator::AssembleConstructFrame() {
__ bind(&done);
}
// Skip callee-saved slots, which are pushed below.
// Skip callee-saved and return slots, which are created below.
shrink_slots -= base::bits::CountPopulation(saves);
shrink_slots -= base::bits::CountPopulation(saves_fp);
shrink_slots -= frame()->GetReturnSlotCount();
if (shrink_slots > 0) {
__ subq(rsp, Immediate(shrink_slots * kPointerSize));
}
......@@ -3235,6 +3257,11 @@ void CodeGenerator::AssembleConstructFrame() {
__ pushq(Register::from_code(i));
}
}
// Allocate return slots (located after callee-saved).
if (frame()->GetReturnSlotCount() > 0) {
__ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kPointerSize));
}
}
void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
......@@ -3243,6 +3270,10 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
// Restore registers.
const RegList saves = descriptor->CalleeSavedRegisters();
if (saves != 0) {
const int returns = frame()->GetReturnSlotCount();
if (returns != 0) {
__ addq(rsp, Immediate(returns * kPointerSize));
}
for (int i = 0; i < Register::kNumRegisters; i++) {
if (!((1 << i) & saves)) continue;
__ popq(Register::from_code(i));
......
......@@ -143,6 +143,9 @@ namespace compiler {
V(X64Inc32) \
V(X64Push) \
V(X64Poke) \
V(X64Peek) \
V(X64PeekFloat32) \
V(X64PeekFloat64) \
V(X64StackCheck) \
V(X64F32x4Splat) \
V(X64F32x4ExtractLane) \
......
......@@ -254,6 +254,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
case kX64StackCheck:
case kX64Peek:
case kX64PeekFloat32:
case kX64PeekFloat64:
return kIsLoadOperation;
case kX64Push:
......
......@@ -1540,11 +1540,11 @@ void InstructionSelector::EmitPrepareArguments(
// Poke any stack arguments.
for (size_t n = 0; n < arguments->size(); ++n) {
PushParameter input = (*arguments)[n];
if (input.node()) {
if (input.node) {
int slot = static_cast<int>(n);
InstructionOperand value = g.CanBeImmediate(input.node())
? g.UseImmediate(input.node())
: g.UseRegister(input.node());
InstructionOperand value = g.CanBeImmediate(input.node)
? g.UseImmediate(input.node)
: g.UseRegister(input.node);
Emit(kX64Poke | MiscField::encode(slot), g.NoOutput(), value);
}
}
......@@ -1552,31 +1552,56 @@ void InstructionSelector::EmitPrepareArguments(
// Push any stack arguments.
int effect_level = GetEffectLevel(node);
for (PushParameter input : base::Reversed(*arguments)) {
Node* input_node = input.node();
if (g.CanBeImmediate(input_node)) {
Emit(kX64Push, g.NoOutput(), g.UseImmediate(input_node));
if (g.CanBeImmediate(input.node)) {
Emit(kX64Push, g.NoOutput(), g.UseImmediate(input.node));
} else if (IsSupported(ATOM) ||
sequence()->IsFP(GetVirtualRegister(input_node))) {
sequence()->IsFP(GetVirtualRegister(input.node))) {
// TODO(titzer): X64Push cannot handle stack->stack double moves
// because there is no way to encode fixed double slots.
Emit(kX64Push, g.NoOutput(), g.UseRegister(input_node));
} else if (g.CanBeMemoryOperand(kX64Push, node, input_node,
Emit(kX64Push, g.NoOutput(), g.UseRegister(input.node));
} else if (g.CanBeMemoryOperand(kX64Push, node, input.node,
effect_level)) {
InstructionOperand outputs[1];
InstructionOperand inputs[4];
size_t input_count = 0;
InstructionCode opcode = kX64Push;
AddressingMode mode = g.GetEffectiveAddressMemoryOperand(
input_node, inputs, &input_count);
input.node, inputs, &input_count);
opcode |= AddressingModeField::encode(mode);
Emit(opcode, 0, outputs, input_count, inputs);
} else {
Emit(kX64Push, g.NoOutput(), g.Use(input_node));
Emit(kX64Push, g.NoOutput(), g.Use(input.node));
}
}
}
}
void InstructionSelector::EmitPrepareResults(ZoneVector<PushParameter>* results,
const CallDescriptor* descriptor,
Node* node) {
X64OperandGenerator g(this);
int reverse_slot = 0;
for (PushParameter output : *results) {
if (!output.location.IsCallerFrameSlot()) continue;
reverse_slot += output.location.GetSizeInPointers();
// Skip any alignment holes in nodes.
if (output.node == nullptr) continue;
DCHECK(!descriptor->IsCFunctionCall());
if (output.location.GetType() == MachineType::Float32()) {
MarkAsFloat32(output.node);
InstructionOperand result = g.DefineAsRegister(output.node);
Emit(kX64PeekFloat32 | MiscField::encode(reverse_slot), result);
} else if (output.location.GetType() == MachineType::Float64()) {
MarkAsFloat64(output.node);
InstructionOperand result = g.DefineAsRegister(output.node);
Emit(kX64PeekFloat64 | MiscField::encode(reverse_slot), result);
} else {
InstructionOperand result = g.DefineAsRegister(output.node);
Emit(kX64Peek | MiscField::encode(reverse_slot), result);
}
}
}
bool InstructionSelector::IsTailCallAddressImmediate() { return true; }
......
......@@ -113,6 +113,10 @@
'test-strings/StringOOM*': [PASS, ['mode == debug', SKIP]],
'test-serialize/CustomSnapshotDataBlobImmortalImmovableRoots': [PASS, ['mode == debug', SKIP]],
'test-parsing/ObjectRestNegativeTestSlow': [PASS, ['mode == debug', SKIP]],
# todo(ahaas): Flaky test. I want to remove this test eventually but keep it
# for now for debugging.
'test-multiple-return/ReturnMultipleRandom': [SKIP],
}], # ALWAYS
##############################################################################
......@@ -157,6 +161,18 @@
'test-api/Float64Array': [SKIP],
}], # 'arch == arm64 and mode == debug and simulator_run'
##############################################################################
# TODO(ahaas): Port multiple return values to ARM and MIPS
['arch == arm or arch == arm64 or arch == mips or arch == mips64 or arch == mipsel or arch == mips64el', {
'test-multiple-return/*': [SKIP],
}],
['system == windows and arch == x64', {
'test-multiple-return/ReturnMultipleInt32': [SKIP],
'test-multiple-return/ReturnMultipleInt64': [SKIP],
'test-multiple-return/ReturnMultipleFloat32': [SKIP],
'test-multiple-return/ReturnMultipleFloat64': [SKIP],
}],
##############################################################################
['asan == True', {
# Skip tests not suitable for ASAN.
......
......@@ -198,6 +198,12 @@
'asm/embenchen/lua_binarytrees': [SKIP],
}], # novfp3 == True
##############################################################################
# TODO(ahaas): Port multiple return values to ARM and MIPS
['arch == arm or arch == arm64 or arch == mips or arch == mips64 or arch == mipsel or arch == mips64el', {
'wasm/multi-value': [SKIP],
}],
##############################################################################
['gc_stress == True', {
# Skip tests not suitable for GC stress.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment