Commit 0ef8ce7d authored by Jakob Kummerow's avatar Jakob Kummerow Committed by V8 LUCI CQ

[wasm][liftoff] Spill multi-used registers before branches

This extends the idea already used by "MaterializeMergedConstants":
certain values have to be processed by every br*, so to protect against
cascades of conditional jumps causing lots of repeated work, it makes
sense to do such processing just once.
For the module in the linked bug, this reduces Liftoff generated code
size from 69MB to 181KB.

Fixed: v8:13072
Change-Id: Ie9f98240e93751988067d4774d4a09b2b39bdad6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3760444Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#81722}
parent e66547b7
......@@ -717,18 +717,36 @@ void LiftoffAssembler::PrepareLoopArgs(int num) {
}
}
void LiftoffAssembler::MaterializeMergedConstants(uint32_t arity) {
// Materialize constants on top of the stack ({arity} many), and locals.
void LiftoffAssembler::PrepareForBranch(uint32_t arity, LiftoffRegList pinned) {
VarState* stack_base = cache_state_.stack_state.data();
for (auto slots :
{base::VectorOf(stack_base + cache_state_.stack_state.size() - arity,
arity),
base::VectorOf(stack_base, num_locals())}) {
for (VarState& slot : slots) {
if (slot.is_reg()) {
// Registers used more than once can't be used for merges.
if (cache_state_.get_use_count(slot.reg()) > 1) {
RegClass rc = reg_class_for(slot.kind());
if (cache_state_.has_unused_register(rc, pinned)) {
LiftoffRegister dst_reg = cache_state_.unused_register(rc, pinned);
Move(dst_reg, slot.reg(), slot.kind());
cache_state_.inc_used(dst_reg);
cache_state_.dec_used(slot.reg());
slot.MakeRegister(dst_reg);
} else {
Spill(slot.offset(), slot.reg(), slot.kind());
cache_state_.dec_used(slot.reg());
slot.MakeStack();
}
}
continue;
}
// Materialize constants.
if (!slot.is_const()) continue;
RegClass rc = reg_class_for(slot.kind());
if (cache_state_.has_unused_register(rc)) {
LiftoffRegister reg = cache_state_.unused_register(rc);
if (cache_state_.has_unused_register(rc, pinned)) {
LiftoffRegister reg = cache_state_.unused_register(rc, pinned);
LoadConstant(reg, slot.constant());
cache_state_.inc_used(reg);
slot.MakeRegister(reg);
......
......@@ -636,7 +636,10 @@ class LiftoffAssembler : public TurboAssembler {
return SpillOneRegister(candidates);
}
void MaterializeMergedConstants(uint32_t arity);
// Performs operations on locals and the top {arity} value stack entries
// that would (very likely) have to be done by branches. Doing this up front
// avoids making each subsequent (conditional) branch repeat this work.
void PrepareForBranch(uint32_t arity, LiftoffRegList pinned);
enum JumpDirection { kForwardJump, kBackwardJump };
void MergeFullStackWith(CacheState& target, const CacheState& source);
......
......@@ -2703,12 +2703,9 @@ class LiftoffCompiler {
}
void BrIf(FullDecoder* decoder, const Value& /* cond */, uint32_t depth) {
// Before branching, materialize all constants. This avoids repeatedly
// materializing them for each conditional branch.
// TODO(clemensb): Do the same for br_table.
// Avoid having sequences of branches do duplicate work.
if (depth != decoder->control_depth() - 1) {
__ MaterializeMergedConstants(
decoder->control_at(depth)->br_merge()->arity);
__ PrepareForBranch(decoder->control_at(depth)->br_merge()->arity, {});
}
Label cont_false;
......@@ -2791,12 +2788,21 @@ class LiftoffCompiler {
}
}
if (need_temps) {
LiftoffRegList pinned;
tmp1 = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
tmp2 = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
}
}
{
// All targets must have the same arity (checked by validation), so
// we can just sample any of them to find that arity.
uint32_t ignored_length;
uint32_t sample_depth = decoder->read_u32v<Decoder::kNoValidation>(
imm.table, &ignored_length, "first depth");
__ PrepareForBranch(decoder->control_at(sample_depth)->br_merge()->arity,
pinned);
}
BranchTableIterator<validate> table_iterator(decoder, imm);
std::map<uint32_t, MovableLabel> br_targets;
......@@ -3487,11 +3493,9 @@ class LiftoffCompiler {
void BrOnNull(FullDecoder* decoder, const Value& ref_object, uint32_t depth,
bool pass_null_along_branch,
Value* /* result_on_fallthrough */) {
// Before branching, materialize all constants. This avoids repeatedly
// materializing them for each conditional branch.
// Avoid having sequences of branches do duplicate work.
if (depth != decoder->control_depth() - 1) {
__ MaterializeMergedConstants(
decoder->control_at(depth)->br_merge()->arity);
__ PrepareForBranch(decoder->control_at(depth)->br_merge()->arity, {});
}
Label cont_false;
......@@ -3520,11 +3524,9 @@ class LiftoffCompiler {
void BrOnNonNull(FullDecoder* decoder, const Value& ref_object,
Value* /* result */, uint32_t depth,
bool drop_null_on_fallthrough) {
// Before branching, materialize all constants. This avoids repeatedly
// materializing them for each conditional branch.
// Avoid having sequences of branches do duplicate work.
if (depth != decoder->control_depth() - 1) {
__ MaterializeMergedConstants(
decoder->control_at(depth)->br_merge()->arity);
__ PrepareForBranch(decoder->control_at(depth)->br_merge()->arity, {});
}
Label cont_false;
......@@ -5899,11 +5901,9 @@ class LiftoffCompiler {
void BrOnCast(FullDecoder* decoder, const Value& obj, const Value& rtt,
Value* /* result_on_branch */, uint32_t depth) {
// Before branching, materialize all constants. This avoids repeatedly
// materializing them for each conditional branch.
// Avoid having sequences of branches do duplicate work.
if (depth != decoder->control_depth() - 1) {
__ MaterializeMergedConstants(
decoder->control_at(depth)->br_merge()->arity);
__ PrepareForBranch(decoder->control_at(depth)->br_merge()->arity, {});
}
Label cont_false;
......@@ -5927,11 +5927,9 @@ class LiftoffCompiler {
void BrOnCastFail(FullDecoder* decoder, const Value& obj, const Value& rtt,
Value* /* result_on_fallthrough */, uint32_t depth) {
// Before branching, materialize all constants. This avoids repeatedly
// materializing them for each conditional branch.
// Avoid having sequences of branches do duplicate work.
if (depth != decoder->control_depth() - 1) {
__ MaterializeMergedConstants(
decoder->control_at(depth)->br_merge()->arity);
__ PrepareForBranch(decoder->control_at(depth)->br_merge()->arity, {});
}
Label cont_branch, fallthrough;
......@@ -6105,11 +6103,9 @@ class LiftoffCompiler {
template <TypeChecker type_checker>
void BrOnAbstractType(const Value& object, FullDecoder* decoder,
uint32_t br_depth) {
// Before branching, materialize all constants. This avoids repeatedly
// materializing them for each conditional branch.
// Avoid having sequences of branches do duplicate work.
if (br_depth != decoder->control_depth() - 1) {
__ MaterializeMergedConstants(
decoder->control_at(br_depth)->br_merge()->arity);
__ PrepareForBranch(decoder->control_at(br_depth)->br_merge()->arity, {});
}
Label no_match;
......@@ -6126,11 +6122,9 @@ class LiftoffCompiler {
template <TypeChecker type_checker>
void BrOnNonAbstractType(const Value& object, FullDecoder* decoder,
uint32_t br_depth) {
// Before branching, materialize all constants. This avoids repeatedly
// materializing them for each conditional branch.
// Avoid having sequences of branches do duplicate work.
if (br_depth != decoder->control_depth() - 1) {
__ MaterializeMergedConstants(
decoder->control_at(br_depth)->br_merge()->arity);
__ PrepareForBranch(decoder->control_at(br_depth)->br_merge()->arity, {});
}
Label no_match, end;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment