[maglev] Implement Maglev-to-Turbofan tiering

ML-TF tiering remains very similar to Ignition-TF tiering: - When the interrupt budget is exhausted, enter the TieringManager which potentially decides to tier up and sets the appropriate TieringState on the FeedbackVector. - The prologue on ML function entry recognizes the TieringState (and also available cached TF code) and starts compilation and/or jumps into optimized code. TODOs: - ML-to-TF OSR is not yet supported. - ML code is no longer cached on the FeedbackVector. - Tracing is rudimentary. - The generated function-entry prologue is fairly large and must be either minimized or extracted into a builtin. - Tiering involving Sparkplug is not entirely robust yet (Sparkplug code may be installed with unexpected timing). Bug: v8:7700 Change-Id: I86b0692477f51b9967f318a4093bc874344120b3 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3629149Reviewed-by: Victor Gomes <victorgomes@chromium.org> Commit-Queue: Jakob Linke <jgruber@chromium.org> Cr-Commit-Position: refs/heads/main@{#82351}

[maglev] Implement Maglev-to-Turbofan tiering
ML-TF tiering remains very similar to Ignition-TF tiering: - When the interrupt budget is exhausted, enter the TieringManager which potentially decides to tier up and sets the appropriate TieringState on the FeedbackVector. - The prologue on ML function entry recognizes the TieringState (and also available cached TF code) and starts compilation and/or jumps into optimized code. TODOs: - ML-to-TF OSR is not yet supported. - ML code is no longer cached on the FeedbackVector. - Tracing is rudimentary. - The generated function-entry prologue is fairly large and must be either minimized or extracted into a builtin. - Tiering involving Sparkplug is not entirely robust yet (Sparkplug code may be installed with unexpected timing). Bug: v8:7700 Change-Id: I86b0692477f51b9967f318a4093bc874344120b3 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3629149Reviewed-by: Victor Gomes <victorgomes@chromium.org> Commit-Queue: Jakob Linke <jgruber@chromium.org> Cr-Commit-Position: refs/heads/main@{#82351}
0bc4b452 · Jakob Linke · V8 LUCI CQ · 012fa899 · 0bc4b452 · 0bc4b452
Commit 0bc4b452 authored Aug 10, 2022 by Jakob Linke Committed by V8 LUCI CQ Aug 10, 2022
11 changed files
--- a/src/codegen/compiler.cc
+++ b/src/codegen/compiler.cc
@@ -1184,7 +1184,7 @@ MaybeHandle<CodeT> CompileMaglev(Isolate* isolate, Handle<JSFunction> function,
  CHECK_EQ(status, CompilationJob::SUCCEEDED);  // TODO(v8:7700): Use status.
  if (IsSynchronous(mode)) {
-    function->reset_tiering_state();
+    ResetTieringState(*job->function(), osr_offset);
    {
      // Park the main thread Isolate here, to be in the same state as
      // background threads.
@@ -1201,9 +1201,16 @@ MaybeHandle<CodeT> CompileMaglev(Isolate* isolate, Handle<JSFunction> function,
    }
    RecordMaglevFunctionCompilation(isolate, function);
-    const bool kIsContextSpecializing = false;
-    OptimizedCodeCache::Insert(isolate, *function, osr_offset, function->code(),
+    // TODO(v8:7700): Re-enable caching in a separate feedback vector slot. We
-                               kIsContextSpecializing);
+    // probably shouldn't reuse the same slot as TF since that makes tiering
+    // logic from ML to TF more involved (it'd have to check the cached code
+    // kind).
+    // const bool kIsContextSpecializing = false;
+    // OptimizedCodeCache::Insert(isolate, *function, osr_offset,
+    //                            function->code(),
+    //                            kIsContextSpecializing);
    return handle(function->code(), isolate);
  }
@@ -3974,12 +3981,37 @@ bool Compiler::FinalizeMaglevCompilationJob(maglev::MaglevCompilationJob* job,
                                            Isolate* isolate) {
 #ifdef V8_ENABLE_MAGLEV
  VMState<COMPILER> state(isolate);
-  const bool kIsContextSpecializing = false;
-  OptimizedCodeCache::Insert(isolate, *job->function(), BytecodeOffset::None(),
+  const CompilationJob::Status status = job->FinalizeJob(isolate);
-                             job->function()->code(), kIsContextSpecializing);
-  RecordMaglevFunctionCompilation(isolate, job->function());
+  // TODO(v8:7700): Use the result and check if job succeed
-#endif
+  // when all the bytecodes are implemented.
+  USE(status);
+  // TODO(v8:7700): Re-enable caching in a separate feedback vector slot. We
+  // probably shouldn't reuse the same slot as TF since that makes tiering
+  // logic from ML to TF more involved (it'd have to check the cached code
+  // kind).
+  // const bool kIsContextSpecializing = false;
+  // OptimizedCodeCache::Insert(isolate, *job->function(),
+  //                            BytecodeOffset::None(),
+  //                            job->function()->code(),
+  //                            kIsContextSpecializing);
+  static constexpr BytecodeOffset osr_offset = BytecodeOffset::None();
+  ResetTieringState(*job->function(), osr_offset);
+  if (status == CompilationJob::SUCCEEDED) {
+    // Note the finalized Code object has already been installed on the
+    // function by MaglevCompilationJob::FinalizeJobImpl.
+    RecordMaglevFunctionCompilation(isolate, job->function());
+  }
+  return status;
+#else
  return CompilationJob::SUCCEEDED;
+#endif
 }
 // static

--- a/src/execution/tiering-manager.cc
+++ b/src/execution/tiering-manager.cc
@@ -263,7 +263,8 @@ void TieringManager::MaybeOptimizeFrame(JSFunction function,
  const bool is_marked_for_any_optimization =
      (static_cast<uint32_t>(tiering_state) & kNoneOrInProgressMask) != 0;
-  if (is_marked_for_any_optimization || function.HasAvailableOptimizedCode()) {
+  if (is_marked_for_any_optimization ||
+      function.HasAvailableHigherTierCodeThan(code_kind)) {
    // OSR kicks in only once we've previously decided to tier up, but we are
    // still in the unoptimized frame (this implies a long-running loop).
    if (SmallEnoughForOSR(isolate_, function)) {
@@ -276,7 +277,7 @@ void TieringManager::MaybeOptimizeFrame(JSFunction function,
  }
  DCHECK(!is_marked_for_any_optimization &&
-         !function.HasAvailableOptimizedCode());
+         !function.HasAvailableHigherTierCodeThan(code_kind));
  OptimizationDecision d = ShouldOptimize(function, code_kind);
  if (d.should_optimize()) Optimize(function, d);
 }

--- a/src/maglev/maglev-code-generator.cc
+++ b/src/maglev/maglev-code-generator.cc
@@ -441,6 +441,38 @@ class MaglevCodeGeneratingNodeProcessor {
    __ BailoutIfDeoptimized(rbx);
+    // Tiering support.
+    {
+      // Scratch registers. Don't clobber regs related to the calling
+      // convention (e.g. kJavaScriptCallArgCountRegister).
+      Register optimization_state = rcx;
+      Register feedback_vector = r9;
+      // Load the feedback vector.
+      __ LoadTaggedPointerField(
+          feedback_vector,
+          FieldOperand(kJSFunctionRegister, JSFunction::kFeedbackCellOffset));
+      __ LoadTaggedPointerField(
+          feedback_vector, FieldOperand(feedback_vector, Cell::kValueOffset));
+      __ AssertFeedbackVector(feedback_vector);
+      Label has_optimized_code_or_state, next;
+      __ LoadTieringStateAndJumpIfNeedsProcessing(
+          optimization_state, feedback_vector, &has_optimized_code_or_state);
+      __ jmp(&next);
+      __ bind(&has_optimized_code_or_state);
+      {
+        ASM_CODE_COMMENT_STRING(masm(), "Optimized marker check");
+        __ MaybeOptimizeCodeOrTailCallOptimizedCodeSlot(
+            optimization_state, feedback_vector, kJSFunctionRegister,
+            JumpMode::kJump);
+        __ Trap();
+      }
+      __ bind(&next);
+    }
    __ EnterFrame(StackFrame::MAGLEV);
    // Save arguments in frame.
@@ -450,10 +482,6 @@ class MaglevCodeGeneratingNodeProcessor {
    __ Push(kJSFunctionRegister);              // Callee's JS function.
    __ Push(kJavaScriptCallArgCountRegister);  // Actual argument count.
-    // TODO(v8:7700): Handle TieringState and cached optimized code. See also:
-    // LoadTieringStateAndJumpIfNeedsProcessing and
-    // MaybeOptimizeCodeOrTailCallOptimizedCodeSlot.
    code_gen_state_->set_untagged_slots(graph->untagged_stack_slots());
    code_gen_state_->set_tagged_slots(graph->tagged_stack_slots());

--- a/src/maglev/maglev-concurrent-dispatcher.cc
+++ b/src/maglev/maglev-concurrent-dispatcher.cc
@@ -188,12 +188,7 @@ void MaglevConcurrentDispatcher::FinalizeFinishedJobs() {
  while (!outgoing_queue_.IsEmpty()) {
    std::unique_ptr<MaglevCompilationJob> job;
    outgoing_queue_.Dequeue(&job);
-    CompilationJob::Status status = job->FinalizeJob(isolate_);
+    Compiler::FinalizeMaglevCompilationJob(job.get(), isolate_);
-    // TODO(v8:7700): Use the result and check if job succeed
-    // when all the bytecodes are implemented.
-    if (status == CompilationJob::SUCCEEDED) {
-      Compiler::FinalizeMaglevCompilationJob(job.get(), isolate_);
-    }
  }
 }

--- a/src/objects/code-kind.h
+++ b/src/objects/code-kind.h
@@ -90,7 +90,7 @@ inline constexpr bool CodeKindCanOSR(CodeKind kind) {
 }
 inline constexpr bool CodeKindCanTierUp(CodeKind kind) {
-  return CodeKindIsUnoptimizedJSFunction(kind);
+  return CodeKindIsUnoptimizedJSFunction(kind) || kind == CodeKind::MAGLEV;
 }
 // TODO(jgruber): Rename or remove this predicate. Currently it means 'is this

--- a/src/objects/js-function.cc
+++ b/src/objects/js-function.cc
@@ -65,6 +65,14 @@ bool JSFunction::HasAttachedOptimizedCode() const {
  return (result & kOptimizedJSFunctionCodeKindsMask) != 0;
 }
+bool JSFunction::HasAvailableHigherTierCodeThan(CodeKind kind) const {
+  const int kind_as_int_flag = static_cast<int>(CodeKindToCodeKindFlag(kind));
+  DCHECK(base::bits::IsPowerOfTwo(kind_as_int_flag));
+  // Smear right - any higher present bit means we have a higher tier available.
+  const int mask = kind_as_int_flag | (kind_as_int_flag - 1);
+  return (GetAvailableCodeKinds() & static_cast<CodeKinds>(~mask)) != 0;
+}
 bool JSFunction::HasAvailableOptimizedCode() const {
  CodeKinds result = GetAvailableCodeKinds();
  return (result & kOptimizedJSFunctionCodeKindsMask) != 0;

--- a/src/objects/js-function.h
+++ b/src/objects/js-function.h
@@ -152,6 +152,8 @@ class JSFunction : public TorqueGeneratedJSFunction<
  // been already deoptimized but its code() still needs to be unlinked, which
  // will happen on its next activation.
+  bool HasAvailableHigherTierCodeThan(CodeKind kind) const;
  // True, iff any generated code kind is attached/available to this function.
  V8_EXPORT_PRIVATE bool HasAttachedOptimizedCode() const;
  bool HasAvailableOptimizedCode() const;

--- a/src/runtime/runtime-test.cc
+++ b/src/runtime/runtime-test.cc
@@ -448,6 +448,20 @@ RUNTIME_FUNCTION(Runtime_BenchMaglev) {
 }
 #endif  // V8_ENABLE_MAGLEV
+RUNTIME_FUNCTION(Runtime_ActiveTierIsIgnition) {
+  HandleScope scope(isolate);
+  DCHECK_EQ(args.length(), 1);
+  Handle<JSFunction> function = args.at<JSFunction>(0);
+  return isolate->heap()->ToBoolean(function->ActiveTierIsIgnition());
+}
+RUNTIME_FUNCTION(Runtime_ActiveTierIsSparkplug) {
+  HandleScope scope(isolate);
+  DCHECK_EQ(args.length(), 1);
+  Handle<JSFunction> function = args.at<JSFunction>(0);
+  return isolate->heap()->ToBoolean(function->ActiveTierIsBaseline());
+}
 RUNTIME_FUNCTION(Runtime_ActiveTierIsMaglev) {
  HandleScope scope(isolate);
  DCHECK_EQ(args.length(), 1);
@@ -455,6 +469,28 @@ RUNTIME_FUNCTION(Runtime_ActiveTierIsMaglev) {
  return isolate->heap()->ToBoolean(function->ActiveTierIsMaglev());
 }
+RUNTIME_FUNCTION(Runtime_ActiveTierIsTurbofan) {
+  HandleScope scope(isolate);
+  DCHECK_EQ(args.length(), 1);
+  Handle<JSFunction> function = args.at<JSFunction>(0);
+  return isolate->heap()->ToBoolean(function->ActiveTierIsTurbofan());
+}
+RUNTIME_FUNCTION(Runtime_IsSparkplugEnabled) {
+  DCHECK_EQ(args.length(), 0);
+  return isolate->heap()->ToBoolean(FLAG_sparkplug);
+}
+RUNTIME_FUNCTION(Runtime_IsMaglevEnabled) {
+  DCHECK_EQ(args.length(), 0);
+  return isolate->heap()->ToBoolean(FLAG_maglev);
+}
+RUNTIME_FUNCTION(Runtime_IsTurbofanEnabled) {
+  DCHECK_EQ(args.length(), 0);
+  return isolate->heap()->ToBoolean(FLAG_turbofan);
+}
 #ifdef V8_ENABLE_MAGLEV
 RUNTIME_FUNCTION(Runtime_OptimizeMaglevOnNextCall) {
  HandleScope scope(isolate);

--- a/src/runtime/runtime.h
+++ b/src/runtime/runtime.h
@@ -477,7 +477,10 @@ namespace internal {
  F(Abort, 1, 1)                              \
  F(AbortCSADcheck, 1, 1)                     \
  F(AbortJS, 1, 1)                            \
+  F(ActiveTierIsIgnition, 1, 1)               \
+  F(ActiveTierIsSparkplug, 1, 1)              \
  F(ActiveTierIsMaglev, 1, 1)                 \
+  F(ActiveTierIsTurbofan, 1, 1)               \
  F(ArrayIteratorProtector, 0, 1)             \
  F(ArraySpeciesProtector, 0, 1)              \
  F(BaselineOsr, -1, 1)                       \
@@ -541,8 +544,11 @@ namespace internal {
  F(IsConcurrentRecompilationSupported, 0, 1) \
  F(IsDictPropertyConstTrackingEnabled, 0, 1) \
  F(IsInternalizedString, 1, 1)               \
+  F(IsMaglevEnabled, 0, 1)                    \
  F(IsSameHeapObject, 2, 1)                   \
  F(IsSharedString, 1, 1)                     \
+  F(IsSparkplugEnabled, 0, 1)                 \
+  F(IsTurbofanEnabled, 0, 1)                  \
  F(MapIteratorProtector, 0, 1)               \
  F(NeverOptimizeFunction, 1, 1)              \
  F(NewRegExpWithBacktrackLimit, 3, 1)        \

--- a/test/mjsunit/maglev/tier-to-ml-to-tf.js
+++ b/test/mjsunit/maglev/tier-to-ml-to-tf.js
+// Copyright 2022 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// Flags: --allow-natives-syntax --maglev --no-stress-opt
+function f(x) {
+  var y = 0;
+  for (var i = 0; i < x; i++) {
+    y = 1;
+  }
+  return y;
+}
+let keep_going = 100000;  // A counter to avoid test hangs on failure.
+function g() {
+  // Test that normal tiering (without OptimizeFooOnNextCall) works.
+  // We test the entire pipeline, i.e. Ignition-SP-ML-TF.
+  f(10);
+  // TODO(v8:7700): Enable.
+  /*
+  if (%IsSparkplugEnabled()) {
+    while (!%ActiveTierIsSparkplug(f) && --keep_going) f(10);
+    assertTrue(%ActiveTierIsSparkplug(f));
+  }
+  if (%IsMaglevEnabled()) {
+    while (!%ActiveTierIsMaglev(f) && --keep_going) f(10);
+    assertTrue(%ActiveTierIsMaglev(f));
+  }
+  */
+  if (%IsTurbofanEnabled()) {
+    while (!%ActiveTierIsTurbofan(f) && --keep_going) f(10);
+    assertTrue(%ActiveTierIsTurbofan(f));
+    f(10);
+    assertTrue(%ActiveTierIsTurbofan(f));
+  }
+}
+%NeverOptimizeFunction(g);
+g();
--- a/test/mjsunit/mjsunit.status
+++ b/test/mjsunit/mjsunit.status
@@ -607,6 +607,9 @@
  # Tests that need to run sequentially (e.g. due to memory consumption).
  'wasm/asm-wasm': [PASS, HEAVY],
+  # TODO(v8:7700): Fix leaks involving std containers in Zone objects.
+  'maglev/tier-to-ml-to-tf': [SKIP],
 }],  # 'asan == True'
 ##############################################################################