Commit b931af5d authored by Thibaud Michaud's avatar Thibaud Michaud Committed by Commit Bot

[liftoff][mv] Support multi-value returns

R=clemensb@chromium.org,ahaas@chromium.org

Bug: v8:10408
Change-Id: I436416e32d814b08543aa2dffbcf8464ec75923f
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2190423
Commit-Queue: Thibaud Michaud <thibaudm@chromium.org>
Reviewed-by: 's avatarAndreas Haas <ahaas@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67734}
parent 4d53833f
......@@ -43,6 +43,7 @@ constexpr int kInstanceOffset = 2 * kSystemPointerSize;
// PatchPrepareStackFrame will use in order to increase the stack appropriately.
// Three instructions are required to sub a large constant, movw + movt + sub.
constexpr int32_t kPatchInstructionsRequired = 3;
constexpr int kHalfStackSlotSize = LiftoffAssembler::kStackSlotSize >> 1;
inline MemOperand GetStackSlot(int offset) {
return MemOperand(offset > 0 ? fp : sp, -offset);
......@@ -258,6 +259,74 @@ inline void F64x2Compare(LiftoffAssembler* assm, LiftoffRegister dst,
assm->vmov(dest.high(), scratch, scratch);
}
inline void Store(LiftoffAssembler* assm, LiftoffRegister src, MemOperand dst,
ValueType type) {
#ifdef DEBUG
// The {str} instruction needs a temp register when the immediate in the
// provided MemOperand does not fit into 12 bits. This happens for large stack
// frames. This DCHECK checks that the temp register is available when needed.
DCHECK(UseScratchRegisterScope{assm}.CanAcquire());
#endif
switch (type.kind()) {
case ValueType::kI32:
assm->str(src.gp(), dst);
break;
case ValueType::kI64:
// Positive offsets should be lowered to kI32.
assm->str(src.low_gp(), MemOperand(dst.rn(), dst.offset()));
assm->str(
src.high_gp(),
MemOperand(dst.rn(), dst.offset() + liftoff::kHalfStackSlotSize));
break;
case ValueType::kF32:
assm->vstr(liftoff::GetFloatRegister(src.fp()), dst);
break;
case ValueType::kF64:
assm->vstr(src.fp(), dst);
break;
case ValueType::kS128: {
UseScratchRegisterScope temps(assm);
Register addr = liftoff::CalculateActualAddress(assm, &temps, dst.rn(),
no_reg, dst.offset());
assm->vst1(Neon8, NeonListOperand(src.low_fp(), 2), NeonMemOperand(addr));
break;
}
default:
UNREACHABLE();
}
}
inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, MemOperand src,
ValueType type) {
switch (type.kind()) {
case ValueType::kI32:
assm->ldr(dst.gp(), src);
break;
case ValueType::kI64:
assm->ldr(dst.low_gp(), MemOperand(src.rn(), src.offset()));
assm->ldr(
dst.high_gp(),
MemOperand(src.rn(), src.offset() + liftoff::kHalfStackSlotSize));
break;
case ValueType::kF32:
assm->vldr(liftoff::GetFloatRegister(dst.fp()), src);
break;
case ValueType::kF64:
assm->vldr(dst.fp(), src);
break;
case ValueType::kS128: {
// Get memory address of slot to fill from.
UseScratchRegisterScope temps(assm);
Register addr = liftoff::CalculateActualAddress(assm, &temps, src.rn(),
no_reg, src.offset());
assm->vld1(Neon8, NeonListOperand(dst.low_fp(), 2), NeonMemOperand(addr));
break;
}
default:
UNREACHABLE();
}
}
} // namespace liftoff
int LiftoffAssembler::PrepareStackFrame() {
......@@ -1083,32 +1152,15 @@ void LiftoffAssembler::AtomicFence() { dmb(ISH); }
void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
uint32_t caller_slot_idx,
ValueType type) {
int32_t offset = (caller_slot_idx + 1) * kSystemPointerSize;
MemOperand src(fp, offset);
switch (type.kind()) {
case ValueType::kI32:
ldr(dst.gp(), src);
break;
case ValueType::kI64:
ldr(dst.low_gp(), src);
ldr(dst.high_gp(), MemOperand(fp, offset + kSystemPointerSize));
break;
case ValueType::kF32:
vldr(liftoff::GetFloatRegister(dst.fp()), src);
break;
case ValueType::kF64:
vldr(dst.fp(), src);
break;
case ValueType::kS128: {
UseScratchRegisterScope temps(this);
Register addr = liftoff::CalculateActualAddress(this, &temps, src.rn(),
no_reg, src.offset());
vld1(Neon8, NeonListOperand(dst.low_fp(), 2), NeonMemOperand(addr));
break;
}
default:
UNREACHABLE();
}
MemOperand src(fp, (caller_slot_idx + 1) * kSystemPointerSize);
liftoff::Load(this, dst, src, type);
}
void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
uint32_t caller_slot_idx,
ValueType type) {
MemOperand dst(fp, (caller_slot_idx + 1) * kSystemPointerSize);
liftoff::Store(this, src, dst, type);
}
void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
......@@ -1145,32 +1197,10 @@ void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueType type) {
// frames. This DCHECK checks that the temp register is available when needed.
DCHECK(UseScratchRegisterScope{this}.CanAcquire());
#endif
DCHECK_LT(0, offset);
RecordUsedSpillOffset(offset);
MemOperand dst = liftoff::GetStackSlot(offset);
switch (type.kind()) {
case ValueType::kI32:
str(reg.gp(), dst);
break;
case ValueType::kI64:
str(reg.low_gp(), liftoff::GetHalfStackSlot(offset, kLowWord));
str(reg.high_gp(), liftoff::GetHalfStackSlot(offset, kHighWord));
break;
case ValueType::kF32:
vstr(liftoff::GetFloatRegister(reg.fp()), dst);
break;
case ValueType::kF64:
vstr(reg.fp(), dst);
break;
case ValueType::kS128: {
UseScratchRegisterScope temps(this);
Register addr = liftoff::CalculateActualAddress(this, &temps, dst.rn(),
no_reg, dst.offset());
vst1(Neon8, NeonListOperand(reg.low_fp(), 2), NeonMemOperand(addr));
break;
}
default:
UNREACHABLE();
}
MemOperand dst(fp, -offset);
liftoff::Store(this, reg, dst, type);
}
void LiftoffAssembler::Spill(int offset, WasmValue value) {
......@@ -1206,32 +1236,7 @@ void LiftoffAssembler::Spill(int offset, WasmValue value) {
}
void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueType type) {
switch (type.kind()) {
case ValueType::kI32:
ldr(reg.gp(), liftoff::GetStackSlot(offset));
break;
case ValueType::kI64:
ldr(reg.low_gp(), liftoff::GetHalfStackSlot(offset, kLowWord));
ldr(reg.high_gp(), liftoff::GetHalfStackSlot(offset, kHighWord));
break;
case ValueType::kF32:
vldr(liftoff::GetFloatRegister(reg.fp()), liftoff::GetStackSlot(offset));
break;
case ValueType::kF64:
vldr(reg.fp(), liftoff::GetStackSlot(offset));
break;
case ValueType::kS128: {
// Get memory address of slot to fill from.
MemOperand slot = liftoff::GetStackSlot(offset);
UseScratchRegisterScope temps(this);
Register addr = liftoff::CalculateActualAddress(this, &temps, slot.rn(),
no_reg, slot.offset());
vld1(Neon8, NeonListOperand(reg.low_fp(), 2), NeonMemOperand(addr));
break;
}
default:
UNREACHABLE();
}
liftoff::Load(this, reg, liftoff::GetStackSlot(offset), type);
}
void LiftoffAssembler::FillI64Half(Register reg, int offset, RegPairHalf half) {
......
......@@ -407,6 +407,13 @@ void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
Ldr(liftoff::GetRegFromType(dst, type), MemOperand(fp, offset));
}
void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
uint32_t caller_slot_idx,
ValueType type) {
int32_t offset = (caller_slot_idx + 1) * LiftoffAssembler::kStackSlotSize;
Str(liftoff::GetRegFromType(src, type), MemOperand(fp, offset));
}
void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
ValueType type) {
UseScratchRegisterScope temps(this);
......
......@@ -544,6 +544,13 @@ void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
type);
}
void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
uint32_t caller_slot_idx,
ValueType type) {
liftoff::Store(this, ebp, kSystemPointerSize * (caller_slot_idx + 1), src,
type);
}
void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
ValueType type) {
if (needs_gp_reg_pair(type)) {
......
......@@ -518,7 +518,6 @@ LiftoffRegister LiftoffAssembler::LoadToRegister(VarState slot,
return reg;
}
case VarState::kRegister:
cache_state_.dec_used(slot.reg());
return slot.reg();
case VarState::kIntConst: {
RegClass rc =
......@@ -531,9 +530,28 @@ LiftoffRegister LiftoffAssembler::LoadToRegister(VarState slot,
UNREACHABLE();
}
LiftoffRegister LiftoffAssembler::LoadI64HalfIntoRegister(VarState slot,
RegPairHalf half) {
if (slot.is_reg()) {
return half == kLowWord ? slot.reg().low() : slot.reg().high();
}
LiftoffRegister dst = GetUnusedRegister(kGpReg);
if (slot.is_stack()) {
FillI64Half(dst.gp(), slot.offset(), half);
return dst;
}
DCHECK(slot.is_const());
int32_t half_word =
static_cast<int32_t>(half == kLowWord ? slot.constant().to_i64()
: slot.constant().to_i64() >> 32);
LoadConstant(dst, WasmValue(half_word));
return dst;
}
LiftoffRegister LiftoffAssembler::PopToRegister(LiftoffRegList pinned) {
DCHECK(!cache_state_.stack_state.empty());
VarState slot = cache_state_.stack_state.back();
if (slot.is_reg()) cache_state_.dec_used(slot.reg());
cache_state_.stack_state.pop_back();
return LoadToRegister(slot, pinned);
}
......@@ -542,6 +560,7 @@ LiftoffRegister LiftoffAssembler::PeekToRegister(int index,
LiftoffRegList pinned) {
DCHECK_LT(index, cache_state_.stack_state.size());
VarState& slot = cache_state_.stack_state.end()[-1 - index];
if (slot.is_reg()) cache_state_.dec_used(slot.reg());
LiftoffRegister reg = LoadToRegister(slot, pinned);
if (!slot.is_reg()) {
slot.MakeRegister(reg);
......@@ -856,26 +875,59 @@ void LiftoffAssembler::ParallelRegisterMove(
}
}
void LiftoffAssembler::MoveToReturnRegisters(const FunctionSig* sig) {
// We do not support multi-value yet.
DCHECK_EQ(1, sig->return_count());
ValueType return_type = sig->GetReturn(0);
void LiftoffAssembler::MoveToReturnLocations(
const FunctionSig* sig, compiler::CallDescriptor* descriptor) {
int call_desc_return_idx = 0;
DCHECK_LE(sig->return_count(), cache_state_.stack_height());
VarState* slots = cache_state_.stack_state.end() - sig->return_count();
// Fill return frame slots first to ensure that all potential spills happen
// before we prepare the stack transfers.
for (size_t i = 0; i < sig->return_count(); ++i) {
ValueType return_type = sig->GetReturn(i);
bool needs_gp_pair = needs_gp_reg_pair(return_type);
int num_lowered_params = 1 + needs_gp_pair;
for (int pair_idx = 0; pair_idx < num_lowered_params; ++pair_idx) {
compiler::LinkageLocation loc =
descriptor->GetReturnLocation(call_desc_return_idx++);
if (loc.IsCallerFrameSlot()) {
RegPairHalf half = pair_idx == 0 ? kLowWord : kHighWord;
VarState& slot = slots[i];
LiftoffRegister reg = needs_gp_pair
? LoadI64HalfIntoRegister(slot, half)
: LoadToRegister(slot, {});
ValueType lowered_type = needs_gp_pair ? kWasmI32 : return_type;
StoreCallerFrameSlot(reg, -loc.AsCallerFrameSlot(), lowered_type);
}
}
}
// Prepare and execute stack transfers.
call_desc_return_idx = 0;
StackTransferRecipe stack_transfers(this);
// Defaults to a gp reg, will be set below if return type is not gp.
LiftoffRegister return_reg = LiftoffRegister(kGpReturnRegisters[0]);
if (needs_gp_reg_pair(return_type)) {
return_reg =
LiftoffRegister::ForPair(kGpReturnRegisters[0], kGpReturnRegisters[1]);
} else if (needs_fp_reg_pair(return_type)) {
return_reg = LiftoffRegister::ForFpPair(kFpReturnRegisters[0]);
} else if (reg_class_for(return_type) == kFpReg) {
return_reg = LiftoffRegister(kFpReturnRegisters[0]);
} else {
DCHECK_EQ(kGpReg, reg_class_for(return_type));
for (size_t i = 0; i < sig->return_count(); ++i) {
ValueType return_type = sig->GetReturn(i);
bool needs_gp_pair = needs_gp_reg_pair(return_type);
int num_lowered_params = 1 + needs_gp_pair;
for (int pair_idx = 0; pair_idx < num_lowered_params; ++pair_idx) {
RegPairHalf half = pair_idx == 0 ? kLowWord : kHighWord;
compiler::LinkageLocation loc =
descriptor->GetReturnLocation(call_desc_return_idx++);
if (loc.IsRegister()) {
DCHECK(!loc.IsAnyRegister());
int reg_code = loc.AsRegister();
ValueType lowered_type = needs_gp_pair ? kWasmI32 : return_type;
RegClass rc = reg_class_for(lowered_type);
LiftoffRegister reg =
LiftoffRegister::from_external_code(rc, return_type, reg_code);
VarState& slot = slots[i];
if (needs_gp_pair) {
stack_transfers.LoadI64HalfIntoRegister(reg, slot, slot.offset(),
half);
} else {
stack_transfers.LoadIntoRegister(reg, slot, slot.offset());
}
}
}
}
stack_transfers.LoadIntoRegister(return_reg, cache_state_.stack_state.back(),
cache_state_.stack_state.back().offset());
}
#ifdef ENABLE_SLOW_DCHECKS
......
......@@ -446,7 +446,8 @@ class LiftoffAssembler : public TurboAssembler {
};
void ParallelRegisterMove(Vector<ParallelRegisterMoveTuple>);
void MoveToReturnRegisters(const FunctionSig*);
void MoveToReturnLocations(const FunctionSig*,
compiler::CallDescriptor* descriptor);
#ifdef ENABLE_SLOW_DCHECKS
// Validate that the register use counts reflect the state of the cache.
......@@ -527,6 +528,8 @@ class LiftoffAssembler : public TurboAssembler {
inline void LoadCallerFrameSlot(LiftoffRegister, uint32_t caller_slot_idx,
ValueType);
inline void StoreCallerFrameSlot(LiftoffRegister, uint32_t caller_slot_idx,
ValueType);
inline void MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
ValueType);
......@@ -1065,6 +1068,7 @@ class LiftoffAssembler : public TurboAssembler {
private:
LiftoffRegister LoadToRegister(VarState slot, LiftoffRegList pinned);
LiftoffRegister LoadI64HalfIntoRegister(VarState slot, RegPairHalf half);
uint32_t num_locals_ = 0;
static constexpr uint32_t kInlineLocalTypes = 8;
......
......@@ -1526,10 +1526,7 @@ class LiftoffCompiler {
void ReturnImpl(FullDecoder* decoder) {
size_t num_returns = decoder->sig_->return_count();
if (num_returns > 1) {
return unsupported(decoder, kMultiValue, "multi-return");
}
if (num_returns > 0) __ MoveToReturnRegisters(decoder->sig_);
if (num_returns > 0) __ MoveToReturnLocations(decoder->sig_, descriptor_);
DEBUG_CODE_COMMENT("leave frame");
__ LeaveFrame(StackFrame::WASM);
__ DropStackSlotsAndRet(
......
......@@ -697,6 +697,13 @@ void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
liftoff::Load(this, dst, src, type);
}
void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
uint32_t caller_slot_idx,
ValueType type) {
Operand dst(rbp, kSystemPointerSize * (caller_slot_idx + 1));
liftoff::Store(this, dst, src, type);
}
void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
ValueType type) {
DCHECK_NE(dst_offset, src_offset);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment