[liftoff] Fix illegal state updates in conditional tierup check

The tier up check in br_if is only executed conditionally, so it is not allowed to update any cache state. Later code would work with that updated state, even though the corresponding code would not have executed. There was a partial implementation for this by passing in a scratch register for {TierupCheck}, but {TierupCheckOnExit} has the same problem, and needs up to three scratch registers. Until we come up with a better solution, just snapshot the cache state before doing the tier up check, and restore it later. This has some performance cost, but it's an effective fix. R=jkummerow@chromium.org Bug: chromium:1314184 Change-Id: I1272010cc247b755e2f4d40615284a03ff8dadb6 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3579363Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Commit-Queue: Clemens Backes <clemensb@chromium.org> Cr-Commit-Position: refs/heads/main@{#79896}

[liftoff] Fix illegal state updates in conditional tierup check
The tier up check in br_if is only executed conditionally, so it is not allowed to update any cache state. Later code would work with that updated state, even though the corresponding code would not have executed. There was a partial implementation for this by passing in a scratch register for {TierupCheck}, but {TierupCheckOnExit} has the same problem, and needs up to three scratch registers. Until we come up with a better solution, just snapshot the cache state before doing the tier up check, and restore it later. This has some performance cost, but it's an effective fix. R=jkummerow@chromium.org Bug: chromium:1314184 Change-Id: I1272010cc247b755e2f4d40615284a03ff8dadb6 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3579363Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Commit-Queue: Clemens Backes <clemensb@chromium.org> Cr-Commit-Position: refs/heads/main@{#79896}
8072d31a · Clemens Backes · V8 LUCI CQ · ed8e8b94 · 8072d31a
Commit 8072d31a authored Apr 08, 2022 by Clemens Backes Committed by V8 LUCI CQ Apr 08, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 24 additions and 15 deletions

liftoff-compiler.cc src/wasm/baseline/liftoff-compiler.cc +24 -15

No files found.
--- a/src/wasm/baseline/liftoff-compiler.cc
+++ b/src/wasm/baseline/liftoff-compiler.cc
@@ -726,7 +726,7 @@ class LiftoffCompiler {
  }

  void TierupCheck(FullDecoder* decoder, WasmCodePosition position,
-                   int budget_used, Register scratch_reg) {
+                   int budget_used) {
    // We should always decrement the budget, and we don't expect integer
    // overflows in the budget calculation.
    DCHECK_LE(1, budget_used);
@@ -737,9 +737,7 @@ class LiftoffCompiler {
    const int kMax = FLAG_wasm_tiering_budget / 4;
    if (budget_used > kMax) budget_used = kMax;

-    LiftoffRegister budget_reg = scratch_reg == no_reg
-                                     ? __ GetUnusedRegister(kGpReg, {})
-                                     : LiftoffRegister(scratch_reg);
+    LiftoffRegister budget_reg = __ GetUnusedRegister(kGpReg, {});
    __ Fill(budget_reg, liftoff::kTierupBudgetOffset, ValueKind::kI32);
    LiftoffRegList regs_to_save = __ cache_state()->used_registers;
    // The cached instance will be reloaded separately.
@@ -2243,7 +2241,8 @@ class LiftoffCompiler {

  void TierupCheckOnExit(FullDecoder* decoder) {
    if (!dynamic_tiering()) return;
-    TierupCheck(decoder, decoder->position(), __ pc_offset(), no_reg);
+    TierupCheck(decoder, decoder->position(), __ pc_offset());
+    CODE_COMMENT("update tiering budget");
    LiftoffRegList pinned;
    LiftoffRegister budget = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
    LiftoffRegister array = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
@@ -2588,7 +2587,7 @@ class LiftoffCompiler {
    __ PushRegister(kind, dst);
  }

-  void BrImpl(FullDecoder* decoder, Control* target, Register scratch_reg) {
+  void BrImpl(FullDecoder* decoder, Control* target) {
    if (dynamic_tiering()) {
      if (target->is_loop()) {
        DCHECK(target->label.get()->is_bound());
@@ -2597,7 +2596,7 @@ class LiftoffCompiler {
        // want to revisit this when tuning tiering budgets later.
        const int kTierUpCheckCost = 1;
        TierupCheck(decoder, decoder->position(),
-                    jump_distance + kTierUpCheckCost, scratch_reg);
+                    jump_distance + kTierUpCheckCost);
      } else {
        // To estimate time spent in this function more accurately, we could
        // increment the tiering budget on forward jumps. However, we don't
@@ -2618,14 +2617,14 @@ class LiftoffCompiler {

  void BrOrRet(FullDecoder* decoder, uint32_t depth,
               uint32_t /* drop_values */) {
-    BrOrRetImpl(decoder, depth, no_reg);
+    BrOrRetImpl(decoder, depth);
  }

-  void BrOrRetImpl(FullDecoder* decoder, uint32_t depth, Register scratch_reg) {
+  void BrOrRetImpl(FullDecoder* decoder, uint32_t depth) {
    if (depth == decoder->control_depth() - 1) {
      DoReturn(decoder, 0);
    } else {
-      BrImpl(decoder, decoder->control_at(depth), scratch_reg);
+      BrImpl(decoder, decoder->control_at(depth));
    }
  }

@@ -2638,16 +2637,26 @@ class LiftoffCompiler {
          decoder->control_at(depth)->br_merge()->arity);
    }

-    Register scratch_reg = no_reg;
-    if (dynamic_tiering()) {
-      scratch_reg = __ GetUnusedRegister(kGpReg, {}).gp();
-    }
    Label cont_false;

    // Test the condition on the value stack, jump to {cont_false} if zero.
    JumpIfFalse(decoder, &cont_false);

-    BrOrRetImpl(decoder, depth, scratch_reg);
+    // As a quickfix for https://crbug.com/1314184 we store the cache state
+    // before calling {BrOrRetImpl} under dynamic tiering, because the tier up
+    // check modifies the cache state (GetUnusedRegister,
+    // LoadInstanceIntoRegister).
+    // TODO(wasm): This causes significant overhead during compilation; try to
+    // avoid this, maybe by passing in scratch registers.
+    if (dynamic_tiering()) {
+      LiftoffAssembler::CacheState old_cache_state;
+      old_cache_state.Split(*__ cache_state());
+      BrOrRetImpl(decoder, depth);
+      __ cache_state()->Steal(old_cache_state);
+    } else {
+      BrOrRetImpl(decoder, depth);
+    }
+
    __ bind(&cont_false);
  }