Commit 36a7cba2 authored by Bill Budge's avatar Bill Budge Committed by Commit Bot

Reland "[wasm][liftoff] Respect CallDescriptor linkage"

This is a reland of 94283811

Patchset #2 fixes the Arm bug. A vpush is used to push the slot,
so subtract kSimd128Size from the stack decrement to get padding.

Original change's description:
> [wasm][liftoff] Respect CallDescriptor linkage
>
> - Adds the actual stack slot location to LiftoffStackSlots::Slot.
> - Adds SortInPushedOrder method for architectures that push
>   parameters.
> - Changes the LiftoffStackSlots::Construct signature to take the
>   number of parameter slots in total, and changes implementations
>   to insert padding when slots aren't contiguous.
> - Changes Arm MacroAssembler::AllocateStackSpace to check the
>   immediate value, and to be a nop when it's zero.
>
> Bug: v8:9198
> Change-Id: Ibd5775dbed3a40051fa9e345556231a1c07cf4e9
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2717120
> Reviewed-by: Andreas Haas <ahaas@chromium.org>
> Reviewed-by: Clemens Backes <clemensb@chromium.org>
> Commit-Queue: Bill Budge <bbudge@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#73191}

Bug: v8:9198
Change-Id: Iae4930e28dd7fc634e3709a5726379c6b37e5195
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2735984Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarAndreas Haas <ahaas@chromium.org>
Commit-Queue: Bill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73195}
parent 76a302f9
......@@ -1389,6 +1389,7 @@ void TurboAssembler::AllocateStackSpace(Register bytes_scratch) {
}
void TurboAssembler::AllocateStackSpace(int bytes) {
DCHECK_GE(bytes, 0);
UseScratchRegisterScope temps(this);
DwVfpRegister scratch = no_dreg;
while (bytes > kStackPageSize) {
......@@ -1399,6 +1400,7 @@ void TurboAssembler::AllocateStackSpace(int bytes) {
vldr(scratch, MemOperand(sp));
bytes -= kStackPageSize;
}
if (bytes == 0) return;
sub(sp, sp, Operand(bytes));
}
#endif
......
......@@ -64,7 +64,11 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void AllocateStackSpace(int bytes);
#else
void AllocateStackSpace(Register bytes) { sub(sp, sp, bytes); }
void AllocateStackSpace(int bytes) { sub(sp, sp, Operand(bytes)); }
void AllocateStackSpace(int bytes) {
DCHECK_GE(bytes, 0);
if (bytes == 0) return;
sub(sp, sp, Operand(bytes));
}
#endif
// Push a fixed frame, consisting of lr, fp
......
......@@ -1789,9 +1789,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Slot-sized arguments are never padded but there may be a gap if
// the slot allocator reclaimed other padding slots. Adjust the stack
// here to skip any gap.
if (slots > pushed_slots) {
__ AllocateStackSpace((slots - pushed_slots) * kSystemPointerSize);
}
switch (rep) {
case MachineRepresentation::kFloat32:
__ vpush(i.InputFloatRegister(1));
......@@ -3909,10 +3907,8 @@ void CodeGenerator::AssembleConstructFrame() {
}
const int returns = frame()->GetReturnSlotCount();
if (returns != 0) {
// Create space for returns.
__ AllocateStackSpace(returns * kSystemPointerSize);
}
}
void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
......
......@@ -4232,8 +4232,15 @@ void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
add(sp, sp, Operand(size));
}
void LiftoffStackSlots::Construct() {
void LiftoffStackSlots::Construct(int param_slots) {
DCHECK_LT(0, slots_.size());
SortInPushOrder();
int last_stack_slot = param_slots;
for (auto& slot : slots_) {
const int stack_slot = slot.dst_slot_;
int stack_decrement = (last_stack_slot - stack_slot) * kSystemPointerSize;
DCHECK_LT(0, stack_decrement);
last_stack_slot = stack_slot;
const LiftoffAssembler::VarState& src = slot.src_;
switch (src.loc()) {
case LiftoffAssembler::VarState::kStack: {
......@@ -4245,6 +4252,7 @@ void LiftoffStackSlots::Construct() {
case kF32:
case kRef:
case kOptRef: {
asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
UseScratchRegisterScope temps(asm_);
Register scratch = temps.Acquire();
asm_->ldr(scratch,
......@@ -4252,12 +4260,14 @@ void LiftoffStackSlots::Construct() {
asm_->Push(scratch);
} break;
case kF64: {
asm_->AllocateStackSpace(stack_decrement - kDoubleSize);
UseScratchRegisterScope temps(asm_);
DwVfpRegister scratch = temps.AcquireD();
asm_->vldr(scratch, liftoff::GetStackSlot(slot.src_offset_));
asm_->vpush(scratch);
} break;
case kS128: {
asm_->AllocateStackSpace(stack_decrement - kSimd128Size);
MemOperand mem_op = liftoff::GetStackSlot(slot.src_offset_);
UseScratchRegisterScope temps(asm_);
Register addr = liftoff::CalculateActualAddress(
......@@ -4272,7 +4282,9 @@ void LiftoffStackSlots::Construct() {
}
break;
}
case LiftoffAssembler::VarState::kRegister:
case LiftoffAssembler::VarState::kRegister: {
int pushed_bytes = SlotSizeInBytes(slot);
asm_->AllocateStackSpace(stack_decrement - pushed_bytes);
switch (src.kind()) {
case kI64: {
LiftoffRegister reg =
......@@ -4297,7 +4309,9 @@ void LiftoffStackSlots::Construct() {
UNREACHABLE();
}
break;
}
case LiftoffAssembler::VarState::kIntConst: {
asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
DCHECK(src.kind() == kI32 || src.kind() == kI64);
UseScratchRegisterScope temps(asm_);
Register scratch = temps.Acquire();
......
......@@ -3209,16 +3209,12 @@ void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
Drop(size, 1);
}
void LiftoffStackSlots::Construct() {
size_t num_slots = 0;
for (auto& slot : slots_) {
num_slots += slot.src_.kind() == kS128 ? 2 : 1;
}
void LiftoffStackSlots::Construct(int param_slots) {
DCHECK_LT(0, slots_.size());
// The stack pointer is required to be quadword aligned.
asm_->Claim(RoundUp(num_slots, 2));
size_t poke_offset = num_slots * kXRegSize;
asm_->Claim(RoundUp(param_slots, 2));
for (auto& slot : slots_) {
poke_offset -= slot.src_.kind() == kS128 ? kXRegSize * 2 : kXRegSize;
int poke_offset = slot.dst_slot_ * kSystemPointerSize;
switch (slot.src_.loc()) {
case LiftoffAssembler::VarState::kStack: {
UseScratchRegisterScope temps(asm_);
......
......@@ -118,27 +118,30 @@ inline void Store(LiftoffAssembler* assm, Register base, int32_t offset,
}
}
inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueKind kind) {
inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueKind kind,
int padding = 0) {
switch (kind) {
case kI32:
case kRef:
case kOptRef:
assm->AllocateStackSpace(padding);
assm->push(reg.gp());
break;
case kI64:
assm->AllocateStackSpace(padding);
assm->push(reg.high_gp());
assm->push(reg.low_gp());
break;
case kF32:
assm->AllocateStackSpace(sizeof(float));
assm->AllocateStackSpace(sizeof(float) + padding);
assm->movss(Operand(esp, 0), reg.fp());
break;
case kF64:
assm->AllocateStackSpace(sizeof(double));
assm->AllocateStackSpace(sizeof(double) + padding);
assm->movsd(Operand(esp, 0), reg.fp());
break;
case kS128:
assm->AllocateStackSpace(sizeof(double) * 2);
assm->AllocateStackSpace(sizeof(double) * 2 + padding);
assm->movdqu(Operand(esp, 0), reg.fp());
break;
default:
......@@ -4905,36 +4908,49 @@ void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
add(esp, Immediate(size));
}
void LiftoffStackSlots::Construct() {
void LiftoffStackSlots::Construct(int param_slots) {
DCHECK_LT(0, slots_.size());
SortInPushOrder();
int last_stack_slot = param_slots;
for (auto& slot : slots_) {
const int stack_slot = slot.dst_slot_;
int stack_decrement = (last_stack_slot - stack_slot) * kSystemPointerSize;
DCHECK_LT(0, stack_decrement);
last_stack_slot = stack_slot;
const LiftoffAssembler::VarState& src = slot.src_;
switch (src.loc()) {
case LiftoffAssembler::VarState::kStack:
// The combination of AllocateStackSpace and 2 movdqu is usually smaller
// in code size than doing 4 pushes.
if (src.kind() == kS128) {
asm_->AllocateStackSpace(sizeof(double) * 2);
asm_->AllocateStackSpace(stack_decrement);
asm_->movdqu(liftoff::kScratchDoubleReg,
liftoff::GetStackSlot(slot.src_offset_));
asm_->movdqu(Operand(esp, 0), liftoff::kScratchDoubleReg);
break;
}
if (src.kind() == kF64) {
asm_->AllocateStackSpace(stack_decrement - kDoubleSize);
DCHECK_EQ(kLowWord, slot.half_);
asm_->push(liftoff::GetHalfStackSlot(slot.src_offset_, kHighWord));
stack_decrement = kSystemPointerSize;
}
asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
asm_->push(liftoff::GetHalfStackSlot(slot.src_offset_, slot.half_));
break;
case LiftoffAssembler::VarState::kRegister:
if (src.kind() == kI64) {
liftoff::push(
asm_, slot.half_ == kLowWord ? src.reg().low() : src.reg().high(),
kI32);
kI32, stack_decrement - kSystemPointerSize);
} else {
liftoff::push(asm_, src.reg(), src.kind());
int pushed_bytes = SlotSizeInBytes(slot);
liftoff::push(asm_, src.reg(), src.kind(),
stack_decrement - pushed_bytes);
}
break;
case LiftoffAssembler::VarState::kIntConst:
asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
// The high word is the sign extension of the low word.
asm_->push(Immediate(slot.half_ == kLowWord ? src.i32_const()
: src.i32_const() >> 31));
......
......@@ -799,8 +799,9 @@ void PrepareStackTransfers(const ValueKindSig* sig,
LiftoffStackSlots* stack_slots,
StackTransferRecipe* stack_transfers,
LiftoffRegList* param_regs) {
// Process parameters backwards, such that pushes of caller frame slots are
// in the correct order.
// Process parameters backwards, to reduce the amount of Slot sorting for
// the most common case - a normal Wasm Call. Slots will be mostly unsorted
// in the Builtin call case.
uint32_t call_desc_input_idx =
static_cast<uint32_t>(call_descriptor->InputCount());
uint32_t num_params = static_cast<uint32_t>(sig->parameter_count());
......@@ -834,7 +835,8 @@ void PrepareStackTransfers(const ValueKindSig* sig,
}
} else {
DCHECK(loc.IsCallerFrameSlot());
stack_slots->Add(slot, stack_offset, half);
int param_offset = -loc.GetLocation() - 1;
stack_slots->Add(slot, stack_offset, half, param_offset);
}
}
}
......@@ -851,10 +853,10 @@ void LiftoffAssembler::PrepareBuiltinCall(
PrepareStackTransfers(sig, call_descriptor, params.begin(), &stack_slots,
&stack_transfers, &param_regs);
SpillAllRegisters();
// Create all the slots.
// Builtin stack parameters are pushed in reversed order.
stack_slots.Reverse();
stack_slots.Construct();
int param_slots = static_cast<int>(call_descriptor->StackParameterCount());
if (param_slots > 0) {
stack_slots.Construct(param_slots);
}
// Execute the stack transfers before filling the instance register.
stack_transfers.Execute();
......@@ -897,6 +899,7 @@ void LiftoffAssembler::PrepareCall(const ValueKindSig* sig,
LiftoffRegister(*target_instance), kIntPtr);
}
int param_slots = static_cast<int>(call_descriptor->StackParameterCount());
if (num_params) {
uint32_t param_base = cache_state_.stack_height() - num_params;
PrepareStackTransfers(sig, call_descriptor,
......@@ -916,13 +919,16 @@ void LiftoffAssembler::PrepareCall(const ValueKindSig* sig,
*target = new_target.gp();
} else {
stack_slots.Add(LiftoffAssembler::VarState(LiftoffAssembler::kIntPtr,
LiftoffRegister(*target), 0));
LiftoffRegister(*target), 0),
param_slots);
param_slots++;
*target = no_reg;
}
}
// Create all the slots.
stack_slots.Construct();
if (param_slots > 0) {
stack_slots.Construct(param_slots);
}
// Execute the stack transfers before filling the instance register.
stack_transfers.Execute();
// Pop parameters from the value stack.
......
......@@ -1563,28 +1563,50 @@ class LiftoffStackSlots {
LiftoffStackSlots& operator=(const LiftoffStackSlots&) = delete;
void Add(const LiftoffAssembler::VarState& src, uint32_t src_offset,
RegPairHalf half) {
slots_.emplace_back(src, src_offset, half);
RegPairHalf half, int dst_slot) {
DCHECK_LE(0, dst_slot);
slots_.emplace_back(src, src_offset, half, dst_slot);
}
void Add(const LiftoffAssembler::VarState& src) { slots_.emplace_back(src); }
void Reverse() { std::reverse(slots_.begin(), slots_.end()); }
void Add(const LiftoffAssembler::VarState& src, int dst_slot) {
DCHECK_LE(0, dst_slot);
slots_.emplace_back(src, dst_slot);
}
void SortInPushOrder() {
std::sort(slots_.begin(), slots_.end(), [](const Slot& a, const Slot& b) {
return a.dst_slot_ > b.dst_slot_;
});
}
inline void Construct();
inline void Construct(int param_slots);
private:
// A logical slot, which may occupy multiple stack slots.
struct Slot {
Slot(const LiftoffAssembler::VarState& src, uint32_t src_offset,
RegPairHalf half)
: src_(src), src_offset_(src_offset), half_(half) {}
explicit Slot(const LiftoffAssembler::VarState& src)
: src_(src), half_(kLowWord) {}
RegPairHalf half, int dst_slot)
: src_(src),
src_offset_(src_offset),
half_(half),
dst_slot_(dst_slot) {}
Slot(const LiftoffAssembler::VarState& src, int dst_slot)
: src_(src), half_(kLowWord), dst_slot_(dst_slot) {}
LiftoffAssembler::VarState src_;
uint32_t src_offset_ = 0;
RegPairHalf half_;
int dst_slot_ = 0;
};
// Returns the size in bytes of the given logical slot.
static int SlotSizeInBytes(const Slot& slot) {
const ValueKind kind = slot.src_.kind();
if (kind == kS128) return kSimd128Size;
if (kind == kF64) return kDoubleSize;
return kSystemPointerSize;
}
base::SmallVector<Slot, 8> slots_;
LiftoffAssembler* const asm_;
};
......
......@@ -133,24 +133,26 @@ inline void Store(LiftoffAssembler* assm, Operand dst, LiftoffRegister src,
}
}
inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueKind kind) {
inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueKind kind,
int padding = 0) {
switch (kind) {
case kI32:
case kI64:
case kRef:
case kOptRef:
assm->AllocateStackSpace(padding);
assm->pushq(reg.gp());
break;
case kF32:
assm->AllocateStackSpace(kSystemPointerSize);
assm->AllocateStackSpace(kSystemPointerSize + padding);
assm->Movss(Operand(rsp, 0), reg.fp());
break;
case kF64:
assm->AllocateStackSpace(kSystemPointerSize);
assm->AllocateStackSpace(kSystemPointerSize + padding);
assm->Movsd(Operand(rsp, 0), reg.fp());
break;
case kS128:
assm->AllocateStackSpace(kSystemPointerSize * 2);
assm->AllocateStackSpace(kSystemPointerSize * 2 + padding);
assm->Movdqu(Operand(rsp, 0), reg.fp());
break;
default:
......@@ -4413,22 +4415,32 @@ void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
addq(rsp, Immediate(size));
}
void LiftoffStackSlots::Construct() {
void LiftoffStackSlots::Construct(int param_slots) {
DCHECK_LT(0, slots_.size());
SortInPushOrder();
int last_stack_slot = param_slots;
for (auto& slot : slots_) {
const int stack_slot = slot.dst_slot_;
int stack_decrement = (last_stack_slot - stack_slot) * kSystemPointerSize;
last_stack_slot = stack_slot;
const LiftoffAssembler::VarState& src = slot.src_;
DCHECK_LT(0, stack_decrement);
switch (src.loc()) {
case LiftoffAssembler::VarState::kStack:
if (src.kind() == kI32) {
asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
// Load i32 values to a register first to ensure they are zero
// extended.
asm_->movl(kScratchRegister, liftoff::GetStackSlot(slot.src_offset_));
asm_->pushq(kScratchRegister);
} else if (src.kind() == kS128) {
asm_->AllocateStackSpace(stack_decrement - kSimd128Size);
// Since offsets are subtracted from sp, we need a smaller offset to
// push the top of a s128 value.
asm_->pushq(liftoff::GetStackSlot(slot.src_offset_ - 8));
asm_->pushq(liftoff::GetStackSlot(slot.src_offset_));
} else {
asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
// For all other types, just push the whole (8-byte) stack slot.
// This is also ok for f32 values (even though we copy 4 uninitialized
// bytes), because f32 and f64 values are clearly distinguished in
......@@ -4436,10 +4448,13 @@ void LiftoffStackSlots::Construct() {
asm_->pushq(liftoff::GetStackSlot(slot.src_offset_));
}
break;
case LiftoffAssembler::VarState::kRegister:
liftoff::push(asm_, src.reg(), src.kind());
case LiftoffAssembler::VarState::kRegister: {
int pushed = src.kind() == kS128 ? kSimd128Size : kSystemPointerSize;
liftoff::push(asm_, src.reg(), src.kind(), stack_decrement - pushed);
break;
}
case LiftoffAssembler::VarState::kIntConst:
asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
asm_->pushq(Immediate(src.i32_const()));
break;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment