Commit 0ef8ce7d authored by Jakob Kummerow's avatar Jakob Kummerow Committed by V8 LUCI CQ

[wasm][liftoff] Spill multi-used registers before branches

This extends the idea already used by "MaterializeMergedConstants":
certain values have to be processed by every br*, so to protect against
cascades of conditional jumps causing lots of repeated work, it makes
sense to do such processing just once.
For the module in the linked bug, this reduces Liftoff generated code
size from 69MB to 181KB.

Fixed: v8:13072
Change-Id: Ie9f98240e93751988067d4774d4a09b2b39bdad6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3760444Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#81722}
parent e66547b7
...@@ -717,18 +717,36 @@ void LiftoffAssembler::PrepareLoopArgs(int num) { ...@@ -717,18 +717,36 @@ void LiftoffAssembler::PrepareLoopArgs(int num) {
} }
} }
void LiftoffAssembler::MaterializeMergedConstants(uint32_t arity) { void LiftoffAssembler::PrepareForBranch(uint32_t arity, LiftoffRegList pinned) {
// Materialize constants on top of the stack ({arity} many), and locals.
VarState* stack_base = cache_state_.stack_state.data(); VarState* stack_base = cache_state_.stack_state.data();
for (auto slots : for (auto slots :
{base::VectorOf(stack_base + cache_state_.stack_state.size() - arity, {base::VectorOf(stack_base + cache_state_.stack_state.size() - arity,
arity), arity),
base::VectorOf(stack_base, num_locals())}) { base::VectorOf(stack_base, num_locals())}) {
for (VarState& slot : slots) { for (VarState& slot : slots) {
if (slot.is_reg()) {
// Registers used more than once can't be used for merges.
if (cache_state_.get_use_count(slot.reg()) > 1) {
RegClass rc = reg_class_for(slot.kind());
if (cache_state_.has_unused_register(rc, pinned)) {
LiftoffRegister dst_reg = cache_state_.unused_register(rc, pinned);
Move(dst_reg, slot.reg(), slot.kind());
cache_state_.inc_used(dst_reg);
cache_state_.dec_used(slot.reg());
slot.MakeRegister(dst_reg);
} else {
Spill(slot.offset(), slot.reg(), slot.kind());
cache_state_.dec_used(slot.reg());
slot.MakeStack();
}
}
continue;
}
// Materialize constants.
if (!slot.is_const()) continue; if (!slot.is_const()) continue;
RegClass rc = reg_class_for(slot.kind()); RegClass rc = reg_class_for(slot.kind());
if (cache_state_.has_unused_register(rc)) { if (cache_state_.has_unused_register(rc, pinned)) {
LiftoffRegister reg = cache_state_.unused_register(rc); LiftoffRegister reg = cache_state_.unused_register(rc, pinned);
LoadConstant(reg, slot.constant()); LoadConstant(reg, slot.constant());
cache_state_.inc_used(reg); cache_state_.inc_used(reg);
slot.MakeRegister(reg); slot.MakeRegister(reg);
......
...@@ -636,7 +636,10 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -636,7 +636,10 @@ class LiftoffAssembler : public TurboAssembler {
return SpillOneRegister(candidates); return SpillOneRegister(candidates);
} }
void MaterializeMergedConstants(uint32_t arity); // Performs operations on locals and the top {arity} value stack entries
// that would (very likely) have to be done by branches. Doing this up front
// avoids making each subsequent (conditional) branch repeat this work.
void PrepareForBranch(uint32_t arity, LiftoffRegList pinned);
enum JumpDirection { kForwardJump, kBackwardJump }; enum JumpDirection { kForwardJump, kBackwardJump };
void MergeFullStackWith(CacheState& target, const CacheState& source); void MergeFullStackWith(CacheState& target, const CacheState& source);
......
...@@ -2703,12 +2703,9 @@ class LiftoffCompiler { ...@@ -2703,12 +2703,9 @@ class LiftoffCompiler {
} }
void BrIf(FullDecoder* decoder, const Value& /* cond */, uint32_t depth) { void BrIf(FullDecoder* decoder, const Value& /* cond */, uint32_t depth) {
// Before branching, materialize all constants. This avoids repeatedly // Avoid having sequences of branches do duplicate work.
// materializing them for each conditional branch.
// TODO(clemensb): Do the same for br_table.
if (depth != decoder->control_depth() - 1) { if (depth != decoder->control_depth() - 1) {
__ MaterializeMergedConstants( __ PrepareForBranch(decoder->control_at(depth)->br_merge()->arity, {});
decoder->control_at(depth)->br_merge()->arity);
} }
Label cont_false; Label cont_false;
...@@ -2791,12 +2788,21 @@ class LiftoffCompiler { ...@@ -2791,12 +2788,21 @@ class LiftoffCompiler {
} }
} }
if (need_temps) { if (need_temps) {
LiftoffRegList pinned;
tmp1 = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp(); tmp1 = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
tmp2 = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp(); tmp2 = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
} }
} }
{
// All targets must have the same arity (checked by validation), so
// we can just sample any of them to find that arity.
uint32_t ignored_length;
uint32_t sample_depth = decoder->read_u32v<Decoder::kNoValidation>(
imm.table, &ignored_length, "first depth");
__ PrepareForBranch(decoder->control_at(sample_depth)->br_merge()->arity,
pinned);
}
BranchTableIterator<validate> table_iterator(decoder, imm); BranchTableIterator<validate> table_iterator(decoder, imm);
std::map<uint32_t, MovableLabel> br_targets; std::map<uint32_t, MovableLabel> br_targets;
...@@ -3487,11 +3493,9 @@ class LiftoffCompiler { ...@@ -3487,11 +3493,9 @@ class LiftoffCompiler {
void BrOnNull(FullDecoder* decoder, const Value& ref_object, uint32_t depth, void BrOnNull(FullDecoder* decoder, const Value& ref_object, uint32_t depth,
bool pass_null_along_branch, bool pass_null_along_branch,
Value* /* result_on_fallthrough */) { Value* /* result_on_fallthrough */) {
// Before branching, materialize all constants. This avoids repeatedly // Avoid having sequences of branches do duplicate work.
// materializing them for each conditional branch.
if (depth != decoder->control_depth() - 1) { if (depth != decoder->control_depth() - 1) {
__ MaterializeMergedConstants( __ PrepareForBranch(decoder->control_at(depth)->br_merge()->arity, {});
decoder->control_at(depth)->br_merge()->arity);
} }
Label cont_false; Label cont_false;
...@@ -3520,11 +3524,9 @@ class LiftoffCompiler { ...@@ -3520,11 +3524,9 @@ class LiftoffCompiler {
void BrOnNonNull(FullDecoder* decoder, const Value& ref_object, void BrOnNonNull(FullDecoder* decoder, const Value& ref_object,
Value* /* result */, uint32_t depth, Value* /* result */, uint32_t depth,
bool drop_null_on_fallthrough) { bool drop_null_on_fallthrough) {
// Before branching, materialize all constants. This avoids repeatedly // Avoid having sequences of branches do duplicate work.
// materializing them for each conditional branch.
if (depth != decoder->control_depth() - 1) { if (depth != decoder->control_depth() - 1) {
__ MaterializeMergedConstants( __ PrepareForBranch(decoder->control_at(depth)->br_merge()->arity, {});
decoder->control_at(depth)->br_merge()->arity);
} }
Label cont_false; Label cont_false;
...@@ -5899,11 +5901,9 @@ class LiftoffCompiler { ...@@ -5899,11 +5901,9 @@ class LiftoffCompiler {
void BrOnCast(FullDecoder* decoder, const Value& obj, const Value& rtt, void BrOnCast(FullDecoder* decoder, const Value& obj, const Value& rtt,
Value* /* result_on_branch */, uint32_t depth) { Value* /* result_on_branch */, uint32_t depth) {
// Before branching, materialize all constants. This avoids repeatedly // Avoid having sequences of branches do duplicate work.
// materializing them for each conditional branch.
if (depth != decoder->control_depth() - 1) { if (depth != decoder->control_depth() - 1) {
__ MaterializeMergedConstants( __ PrepareForBranch(decoder->control_at(depth)->br_merge()->arity, {});
decoder->control_at(depth)->br_merge()->arity);
} }
Label cont_false; Label cont_false;
...@@ -5927,11 +5927,9 @@ class LiftoffCompiler { ...@@ -5927,11 +5927,9 @@ class LiftoffCompiler {
void BrOnCastFail(FullDecoder* decoder, const Value& obj, const Value& rtt, void BrOnCastFail(FullDecoder* decoder, const Value& obj, const Value& rtt,
Value* /* result_on_fallthrough */, uint32_t depth) { Value* /* result_on_fallthrough */, uint32_t depth) {
// Before branching, materialize all constants. This avoids repeatedly // Avoid having sequences of branches do duplicate work.
// materializing them for each conditional branch.
if (depth != decoder->control_depth() - 1) { if (depth != decoder->control_depth() - 1) {
__ MaterializeMergedConstants( __ PrepareForBranch(decoder->control_at(depth)->br_merge()->arity, {});
decoder->control_at(depth)->br_merge()->arity);
} }
Label cont_branch, fallthrough; Label cont_branch, fallthrough;
...@@ -6105,11 +6103,9 @@ class LiftoffCompiler { ...@@ -6105,11 +6103,9 @@ class LiftoffCompiler {
template <TypeChecker type_checker> template <TypeChecker type_checker>
void BrOnAbstractType(const Value& object, FullDecoder* decoder, void BrOnAbstractType(const Value& object, FullDecoder* decoder,
uint32_t br_depth) { uint32_t br_depth) {
// Before branching, materialize all constants. This avoids repeatedly // Avoid having sequences of branches do duplicate work.
// materializing them for each conditional branch.
if (br_depth != decoder->control_depth() - 1) { if (br_depth != decoder->control_depth() - 1) {
__ MaterializeMergedConstants( __ PrepareForBranch(decoder->control_at(br_depth)->br_merge()->arity, {});
decoder->control_at(br_depth)->br_merge()->arity);
} }
Label no_match; Label no_match;
...@@ -6126,11 +6122,9 @@ class LiftoffCompiler { ...@@ -6126,11 +6122,9 @@ class LiftoffCompiler {
template <TypeChecker type_checker> template <TypeChecker type_checker>
void BrOnNonAbstractType(const Value& object, FullDecoder* decoder, void BrOnNonAbstractType(const Value& object, FullDecoder* decoder,
uint32_t br_depth) { uint32_t br_depth) {
// Before branching, materialize all constants. This avoids repeatedly // Avoid having sequences of branches do duplicate work.
// materializing them for each conditional branch.
if (br_depth != decoder->control_depth() - 1) { if (br_depth != decoder->control_depth() - 1) {
__ MaterializeMergedConstants( __ PrepareForBranch(decoder->control_at(br_depth)->br_merge()->arity, {});
decoder->control_at(br_depth)->br_merge()->arity);
} }
Label no_match, end; Label no_match, end;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment