Commit adf035fb authored by Clemens Backes's avatar Clemens Backes Committed by Commit Bot

[liftoff] Cache the instance in a register

This CL avoids redundant loads of the instance from the frame by caching
it in a register if possible. This register will be the first one to be
cleared once we run out of registers (hence it's called a "volatile
register"). On local tests, this seems to reduce most redundant loads
within a function, and it also reduces the load for the stack check in
the function prologue.
After the stack check, we need to discard the cached instance though,
since the potential runtime call for the stack check might clobber it.
This will be addressed in a follow-up CL by re-loading the cached
instance after the stack check. This is expected to remove another good
chunk of instance loads, because the instance would initially be
available in a register when starting the function code.

R=thibaudm@chromium.org

Bug: v8:11336
Change-Id: Ie65ab81263fb9d972f4b7a6daaef86cf704874ef
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2695401
Commit-Queue: Clemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarThibaud Michaud <thibaudm@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72779}
parent 7c743e37
......@@ -574,10 +574,14 @@ void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
}
}
void LiftoffAssembler::LoadFromInstance(Register dst, int offset, int size) {
DCHECK_LE(0, offset);
void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
ldr(dst, liftoff::GetInstanceOperand());
MemOperand src{dst, offset};
}
void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
int offset, int size) {
DCHECK_LE(0, offset);
MemOperand src{instance, offset};
switch (size) {
case 1:
ldrb(dst, src);
......@@ -590,8 +594,11 @@ void LiftoffAssembler::LoadFromInstance(Register dst, int offset, int size) {
}
}
void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst, int offset) {
LoadFromInstance(dst, offset, kTaggedSize);
void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
Register instance,
int offset) {
STATIC_ASSERT(kTaggedSize == kSystemPointerSize);
ldr(dst, MemOperand{instance, offset});
}
void LiftoffAssembler::SpillInstance(Register instance) {
......
......@@ -392,10 +392,14 @@ void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
}
}
void LiftoffAssembler::LoadFromInstance(Register dst, int offset, int size) {
DCHECK_LE(0, offset);
void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
Ldr(dst, liftoff::GetInstanceOperand());
MemOperand src{dst, offset};
}
void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
int offset, int size) {
DCHECK_LE(0, offset);
MemOperand src{instance, offset};
switch (size) {
case 1:
Ldrb(dst.W(), src);
......@@ -411,10 +415,11 @@ void LiftoffAssembler::LoadFromInstance(Register dst, int offset, int size) {
}
}
void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst, int offset) {
void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
Register instance,
int offset) {
DCHECK_LE(0, offset);
Ldr(dst, liftoff::GetInstanceOperand());
LoadTaggedPointerField(dst, MemOperand(dst, offset));
LoadTaggedPointerField(dst, MemOperand{instance, offset});
}
void LiftoffAssembler::SpillInstance(Register instance) {
......
......@@ -295,10 +295,14 @@ void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
}
}
void LiftoffAssembler::LoadFromInstance(Register dst, int offset, int size) {
DCHECK_LE(0, offset);
void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
mov(dst, liftoff::GetInstanceOperand());
Operand src{dst, offset};
}
void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
int offset, int size) {
DCHECK_LE(0, offset);
Operand src{instance, offset};
switch (size) {
case 1:
movzx_b(dst, src);
......@@ -311,8 +315,11 @@ void LiftoffAssembler::LoadFromInstance(Register dst, int offset, int size) {
}
}
void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst, int offset) {
LoadFromInstance(dst, offset, kTaggedSize);
void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
Register instance,
int offset) {
STATIC_ASSERT(kTaggedSize == kSystemPointerSize);
mov(dst, Operand{instance, offset});
}
void LiftoffAssembler::SpillInstance(Register instance) {
......
......@@ -456,6 +456,10 @@ void LiftoffAssembler::CacheState::InitMerge(const CacheState& source,
// |------locals------|---(in between)----|--(discarded)--|----merge----|
// <-- num_locals --> <-- stack_depth -->^stack_base <-- arity -->
if (source.cached_instance != no_reg) {
SetInstanceCacheRegister(source.cached_instance);
}
uint32_t stack_base = stack_depth + num_locals;
uint32_t target_height = stack_base + arity;
uint32_t discarded = source.stack_height() - target_height;
......@@ -687,7 +691,7 @@ void LiftoffAssembler::MaterializeMergedConstants(uint32_t arity) {
}
}
void LiftoffAssembler::MergeFullStackWith(const CacheState& target,
void LiftoffAssembler::MergeFullStackWith(CacheState& target,
const CacheState& source) {
DCHECK_EQ(source.stack_height(), target.stack_height());
// TODO(clemensb): Reuse the same StackTransferRecipe object to save some
......@@ -696,10 +700,16 @@ void LiftoffAssembler::MergeFullStackWith(const CacheState& target,
for (uint32_t i = 0, e = source.stack_height(); i < e; ++i) {
transfers.TransferStackSlot(target.stack_state[i], source.stack_state[i]);
}
if (cache_state_.cached_instance != target.cached_instance) {
// Backward jumps (to loop headers) do not have a cached instance anyway, so
// ignore this. On forward jumps, jump reset the cached instance in the
// target state.
target.ClearCachedInstanceRegister();
}
}
void LiftoffAssembler::MergeStackWith(const CacheState& target,
uint32_t arity) {
void LiftoffAssembler::MergeStackWith(CacheState& target, uint32_t arity) {
// Before: ----------------|----- (discarded) ----|--- arity ---|
// ^target_stack_height ^stack_base ^stack_height
// After: ----|-- arity --|
......@@ -720,6 +730,13 @@ void LiftoffAssembler::MergeStackWith(const CacheState& target,
transfers.TransferStackSlot(target.stack_state[target_stack_base + i],
cache_state_.stack_state[stack_base + i]);
}
if (cache_state_.cached_instance != target.cached_instance) {
// Backward jumps (to loop headers) do not have a cached instance anyway, so
// ignore this. On forward jumps, jump reset the cached instance in the
// target state.
target.ClearCachedInstanceRegister();
}
}
void LiftoffAssembler::Spill(VarState* slot) {
......@@ -750,12 +767,17 @@ void LiftoffAssembler::SpillAllRegisters() {
Spill(slot.offset(), slot.reg(), slot.type());
slot.MakeStack();
}
cache_state_.ClearCachedInstanceRegister();
cache_state_.reset_used_registers();
}
void LiftoffAssembler::ClearRegister(
Register reg, std::initializer_list<Register*> possible_uses,
LiftoffRegList pinned) {
if (reg == cache_state()->cached_instance) {
cache_state()->ClearCachedInstanceRegister();
return;
}
if (cache_state()->is_used(LiftoffRegister(reg))) {
SpillRegister(LiftoffRegister(reg));
}
......@@ -850,6 +872,7 @@ void LiftoffAssembler::PrepareCall(const FunctionSig* sig,
constexpr size_t kInputShift = 1;
// Spill all cache slots which are not being used as parameters.
cache_state_.ClearCachedInstanceRegister();
for (VarState* it = cache_state_.stack_state.end() - 1 - num_params;
it >= cache_state_.stack_state.begin() &&
!cache_state_.used_registers.is_empty();
......@@ -1080,6 +1103,14 @@ bool LiftoffAssembler::ValidateCacheState() const {
}
used_regs.set(reg);
}
if (cache_state_.cached_instance != no_reg) {
DCHECK(!used_regs.has(cache_state_.cached_instance));
int liftoff_code =
LiftoffRegister{cache_state_.cached_instance}.liftoff_code();
used_regs.set(cache_state_.cached_instance);
DCHECK_EQ(0, register_use_count[liftoff_code]);
register_use_count[liftoff_code] = 1;
}
bool valid = memcmp(register_use_count, cache_state_.register_use_count,
sizeof(register_use_count)) == 0 &&
used_regs == cache_state_.used_registers;
......
......@@ -192,6 +192,7 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegList used_registers;
uint32_t register_use_count[kAfterMaxLiftoffRegCode] = {0};
LiftoffRegList last_spilled_regs;
Register cached_instance = no_reg;
bool has_unused_register(RegClass rc, LiftoffRegList pinned = {}) const {
if (kNeedI64RegPair && rc == kGpRegPair) {
......@@ -241,6 +242,51 @@ class LiftoffAssembler : public TurboAssembler {
return available_regs.GetFirstRegSet();
}
// Volatile registers are registers which are used for caching values that
// can easily be reloaded. Those are returned first if we run out of free
// registers.
// Note: This interface is a bit more generic than currently needed, in
// anticipation of more "volatile registers" being added later.
bool has_volatile_register(LiftoffRegList candidates) {
return cached_instance != no_reg && candidates.has(cached_instance);
}
LiftoffRegister take_volatile_register(LiftoffRegList candidates) {
DCHECK(candidates.has(cached_instance));
LiftoffRegister ret{cached_instance};
DCHECK_EQ(1, register_use_count[ret.liftoff_code()]);
register_use_count[ret.liftoff_code()] = 0;
used_registers.clear(ret);
cached_instance = no_reg;
return ret;
}
void SetInstanceCacheRegister(Register reg) {
DCHECK_EQ(no_reg, cached_instance);
cached_instance = reg;
int liftoff_code = LiftoffRegister{reg}.liftoff_code();
DCHECK_EQ(0, register_use_count[liftoff_code]);
register_use_count[liftoff_code] = 1;
used_registers.set(reg);
}
Register TrySetCachedInstanceRegister(LiftoffRegList pinned) {
DCHECK_EQ(no_reg, cached_instance);
if (!has_unused_register(kGpCacheRegList, pinned)) return no_reg;
SetInstanceCacheRegister(unused_register(kGpCacheRegList, pinned).gp());
DCHECK_NE(no_reg, cached_instance);
return cached_instance;
}
void ClearCachedInstanceRegister() {
if (cached_instance == no_reg) return;
int liftoff_code = LiftoffRegister{cached_instance}.liftoff_code();
DCHECK_EQ(1, register_use_count[liftoff_code]);
register_use_count[liftoff_code] = 0;
used_registers.clear(cached_instance);
cached_instance = no_reg;
}
void inc_used(LiftoffRegister reg) {
if (reg.is_pair()) {
inc_used(reg.low());
......@@ -445,13 +491,18 @@ class LiftoffAssembler : public TurboAssembler {
if (cache_state_.has_unused_register(candidates, pinned)) {
return cache_state_.unused_register(candidates, pinned);
}
if (cache_state_.has_volatile_register(candidates)) {
LiftoffRegister reg = cache_state_.take_volatile_register(candidates);
DCHECK(!pinned.has(reg));
return reg;
}
return SpillOneRegister(candidates, pinned);
}
void MaterializeMergedConstants(uint32_t arity);
void MergeFullStackWith(const CacheState& target, const CacheState& source);
void MergeStackWith(const CacheState& target, uint32_t arity);
void MergeFullStackWith(CacheState& target, const CacheState& source);
void MergeStackWith(CacheState& target, uint32_t arity);
void Spill(VarState* slot);
void SpillLocals();
......@@ -469,7 +520,12 @@ class LiftoffAssembler : public TurboAssembler {
template <typename... Regs>
void SpillRegisters(Regs... regs) {
for (LiftoffRegister r : {LiftoffRegister(regs)...}) {
if (cache_state()->is_used(r)) SpillRegister(r);
if (cache_state_.is_free(r)) continue;
if (r.is_gp() && cache_state_.cached_instance == r.gp()) {
cache_state_.ClearCachedInstanceRegister();
} else {
SpillRegister(r);
}
}
}
......@@ -548,8 +604,11 @@ class LiftoffAssembler : public TurboAssembler {
inline void LoadConstant(LiftoffRegister, WasmValue,
RelocInfo::Mode rmode = RelocInfo::NONE);
inline void LoadFromInstance(Register dst, int offset, int size);
inline void LoadTaggedPointerFromInstance(Register dst, int offset);
inline void LoadInstanceFromFrame(Register dst);
inline void LoadFromInstance(Register dst, Register instance, int offset,
int size);
inline void LoadTaggedPointerFromInstance(Register dst, Register instance,
int offset);
inline void SpillInstance(Register instance);
inline void FillInstanceInto(Register dst);
inline void LoadTaggedPointer(Register dst, Register src_addr,
......
This diff is collapsed.
......@@ -377,6 +377,10 @@ class LiftoffRegList {
}
return reg;
}
Register clear(Register reg) { return clear(LiftoffRegister{reg}).gp(); }
DoubleRegister clear(DoubleRegister reg) {
return clear(LiftoffRegister{reg}).fp();
}
bool has(LiftoffRegister reg) const {
if (reg.is_pair()) {
......@@ -385,8 +389,8 @@ class LiftoffRegList {
}
return (regs_ & (storage_t{1} << reg.liftoff_code())) != 0;
}
bool has(Register reg) const { return has(LiftoffRegister(reg)); }
bool has(DoubleRegister reg) const { return has(LiftoffRegister(reg)); }
bool has(Register reg) const { return has(LiftoffRegister{reg}); }
bool has(DoubleRegister reg) const { return has(LiftoffRegister{reg}); }
constexpr bool is_empty() const { return regs_ == 0; }
......
......@@ -280,10 +280,14 @@ void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
}
}
void LiftoffAssembler::LoadFromInstance(Register dst, int offset, int size) {
DCHECK_LE(0, offset);
void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
movq(dst, liftoff::GetInstanceOperand());
Operand src{dst, offset};
}
void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
int offset, int size) {
DCHECK_LE(0, offset);
Operand src{instance, offset};
switch (size) {
case 1:
movzxbl(dst, src);
......@@ -299,10 +303,11 @@ void LiftoffAssembler::LoadFromInstance(Register dst, int offset, int size) {
}
}
void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst, int offset) {
void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
Register instance,
int offset) {
DCHECK_LE(0, offset);
movq(dst, liftoff::GetInstanceOperand());
LoadTaggedPointerField(dst, Operand(dst, offset));
LoadTaggedPointerField(dst, Operand(instance, offset));
}
void LiftoffAssembler::SpillInstance(Register instance) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment