Commit 3ce690ee authored by Jakob Gruber's avatar Jakob Gruber Committed by V8 LUCI CQ

[osr] Basic support for concurrent OSR

This CL adds basic support behind --concurrent-osr,
disabled by default.

When enabled:
1) the first OSR request starts a concurrent OSR compile job.
2) on completion, the code object is inserted into the OSR cache.
3) the next OSR request picks up the cached code (assuming the request
   came from the same JumpLoop bytecode).

We add a new osr optimization marker on the feedback vector to
track whether an OSR compile is currently in progress.

One fundamental issue remains: step 3) above is not guaranteed to
hit the same JumpLoop, and a mismatch means the OSR'd code cannot
be installed. This will be addressed in a followup by targeting
specific bytecode offsets for the install request.

This change is based on fanchen.kong@intel.com's earlier
change crrev.com/c/3369361, thank you!

Bug: v8:12161
Change-Id: Ib162906dd4b6ba056f62870aea2990f1369df235
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3548820Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Commit-Queue: Jakob Linke <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/main@{#79685}
parent dbff2a67
This diff is collapsed.
...@@ -30,26 +30,20 @@ namespace internal { ...@@ -30,26 +30,20 @@ namespace internal {
// Forward declarations. // Forward declarations.
class AlignedCachedData; class AlignedCachedData;
class AstRawString;
class BackgroundCompileTask; class BackgroundCompileTask;
class IsCompiledScope; class IsCompiledScope;
class JavaScriptFrame;
class OptimizedCompilationInfo; class OptimizedCompilationInfo;
class OptimizedCompilationJob;
class ParseInfo; class ParseInfo;
class Parser;
class RuntimeCallStats; class RuntimeCallStats;
class TimedHistogram; class TimedHistogram;
class TurbofanCompilationJob; class TurbofanCompilationJob;
class UnoptimizedCompilationInfo; class UnoptimizedCompilationInfo;
class UnoptimizedCompilationJob; class UnoptimizedCompilationJob;
class UnoptimizedFrame;
class WorkerThreadRuntimeCallStats; class WorkerThreadRuntimeCallStats;
struct ScriptDetails; struct ScriptDetails;
struct ScriptStreamingData; struct ScriptStreamingData;
using UnoptimizedCompilationJobList =
std::forward_list<std::unique_ptr<UnoptimizedCompilationJob>>;
// The V8 compiler API. // The V8 compiler API.
// //
// This is the central hub for dispatching to the various compilers within V8. // This is the central hub for dispatching to the various compilers within V8.
...@@ -97,6 +91,13 @@ class V8_EXPORT_PRIVATE Compiler : public AllStatic { ...@@ -97,6 +91,13 @@ class V8_EXPORT_PRIVATE Compiler : public AllStatic {
static void CompileOptimized(Isolate* isolate, Handle<JSFunction> function, static void CompileOptimized(Isolate* isolate, Handle<JSFunction> function,
ConcurrencyMode mode, CodeKind code_kind); ConcurrencyMode mode, CodeKind code_kind);
// Generate and return optimized code for OSR. The empty handle is returned
// either on failure, or after spawning a concurrent OSR task (in which case
// a future OSR request will pick up the resulting code object).
V8_WARN_UNUSED_RESULT static MaybeHandle<CodeT> CompileOptimizedOSR(
Isolate* isolate, Handle<JSFunction> function, BytecodeOffset osr_offset,
UnoptimizedFrame* frame, ConcurrencyMode mode);
V8_WARN_UNUSED_RESULT static MaybeHandle<SharedFunctionInfo> V8_WARN_UNUSED_RESULT static MaybeHandle<SharedFunctionInfo>
CompileForLiveEdit(ParseInfo* parse_info, Handle<Script> script, CompileForLiveEdit(ParseInfo* parse_info, Handle<Script> script,
Isolate* isolate); Isolate* isolate);
...@@ -112,6 +113,10 @@ class V8_EXPORT_PRIVATE Compiler : public AllStatic { ...@@ -112,6 +113,10 @@ class V8_EXPORT_PRIVATE Compiler : public AllStatic {
Isolate* isolate, Isolate* isolate,
ClearExceptionFlag flag); ClearExceptionFlag flag);
// Dispose a job without finalization.
static void DisposeTurbofanCompilationJob(TurbofanCompilationJob* job,
bool restore_function_code);
// Finalize and install Turbofan code from a previously run job. // Finalize and install Turbofan code from a previously run job.
static bool FinalizeTurbofanCompilationJob(TurbofanCompilationJob* job, static bool FinalizeTurbofanCompilationJob(TurbofanCompilationJob* job,
Isolate* isolate); Isolate* isolate);
...@@ -223,20 +228,6 @@ class V8_EXPORT_PRIVATE Compiler : public AllStatic { ...@@ -223,20 +228,6 @@ class V8_EXPORT_PRIVATE Compiler : public AllStatic {
static Handle<SharedFunctionInfo> GetSharedFunctionInfo(FunctionLiteral* node, static Handle<SharedFunctionInfo> GetSharedFunctionInfo(FunctionLiteral* node,
Handle<Script> script, Handle<Script> script,
IsolateT* isolate); IsolateT* isolate);
// ===========================================================================
// The following family of methods provides support for OSR. Code generated
// for entry via OSR might not be suitable for normal entry, hence will be
// returned directly to the caller.
//
// Please note this interface is the only part dealing with {Code} objects
// directly. Other methods are agnostic to {Code} and can use an interpreter
// instead of generating JIT code for a function at all.
// Generate and return optimized code for OSR, or empty handle on failure.
V8_WARN_UNUSED_RESULT static MaybeHandle<CodeT> GetOptimizedCodeForOSR(
Isolate* isolate, Handle<JSFunction> function, BytecodeOffset osr_offset,
JavaScriptFrame* osr_frame);
}; };
// A base class for compilation jobs intended to run concurrent to the main // A base class for compilation jobs intended to run concurrent to the main
......
...@@ -9,35 +9,20 @@ ...@@ -9,35 +9,20 @@
#include "src/codegen/optimized-compilation-info.h" #include "src/codegen/optimized-compilation-info.h"
#include "src/execution/isolate.h" #include "src/execution/isolate.h"
#include "src/execution/local-isolate.h" #include "src/execution/local-isolate.h"
#include "src/handles/handles-inl.h"
#include "src/heap/local-heap.h" #include "src/heap/local-heap.h"
#include "src/heap/parked-scope.h" #include "src/heap/parked-scope.h"
#include "src/init/v8.h" #include "src/init/v8.h"
#include "src/logging/counters.h" #include "src/logging/counters.h"
#include "src/logging/log.h" #include "src/logging/log.h"
#include "src/logging/runtime-call-stats-scope.h" #include "src/logging/runtime-call-stats-scope.h"
#include "src/objects/objects-inl.h" #include "src/objects/js-function.h"
#include "src/tasks/cancelable-task.h" #include "src/tasks/cancelable-task.h"
#include "src/tracing/trace-event.h" #include "src/tracing/trace-event.h"
namespace v8 { namespace v8 {
namespace internal { namespace internal {
namespace {
void DisposeCompilationJob(TurbofanCompilationJob* job,
bool restore_function_code) {
if (restore_function_code) {
Handle<JSFunction> function = job->compilation_info()->closure();
function->set_code(function->shared().GetCode(), kReleaseStore);
if (IsInProgress(function->tiering_state())) {
function->reset_tiering_state();
}
}
delete job;
}
} // namespace
class OptimizingCompileDispatcher::CompileTask : public CancelableTask { class OptimizingCompileDispatcher::CompileTask : public CancelableTask {
public: public:
explicit CompileTask(Isolate* isolate, explicit CompileTask(Isolate* isolate,
...@@ -129,26 +114,27 @@ void OptimizingCompileDispatcher::CompileNext(TurbofanCompilationJob* job, ...@@ -129,26 +114,27 @@ void OptimizingCompileDispatcher::CompileNext(TurbofanCompilationJob* job,
void OptimizingCompileDispatcher::FlushOutputQueue(bool restore_function_code) { void OptimizingCompileDispatcher::FlushOutputQueue(bool restore_function_code) {
for (;;) { for (;;) {
TurbofanCompilationJob* job = nullptr; std::unique_ptr<TurbofanCompilationJob> job;
{ {
base::MutexGuard access_output_queue_(&output_queue_mutex_); base::MutexGuard access_output_queue_(&output_queue_mutex_);
if (output_queue_.empty()) return; if (output_queue_.empty()) return;
job = output_queue_.front(); job.reset(output_queue_.front());
output_queue_.pop(); output_queue_.pop();
} }
DisposeCompilationJob(job, restore_function_code); Compiler::DisposeTurbofanCompilationJob(job.get(), restore_function_code);
} }
} }
void OptimizingCompileDispatcher::FlushInputQueue() { void OptimizingCompileDispatcher::FlushInputQueue() {
base::MutexGuard access_input_queue_(&input_queue_mutex_); base::MutexGuard access_input_queue_(&input_queue_mutex_);
while (input_queue_length_ > 0) { while (input_queue_length_ > 0) {
TurbofanCompilationJob* job = input_queue_[InputQueueIndex(0)]; std::unique_ptr<TurbofanCompilationJob> job(
input_queue_[InputQueueIndex(0)]);
DCHECK_NOT_NULL(job); DCHECK_NOT_NULL(job);
input_queue_shift_ = InputQueueIndex(1); input_queue_shift_ = InputQueueIndex(1);
input_queue_length_--; input_queue_length_--;
DisposeCompilationJob(job, true); Compiler::DisposeTurbofanCompilationJob(job.get(), true);
} }
} }
...@@ -196,25 +182,29 @@ void OptimizingCompileDispatcher::InstallOptimizedFunctions() { ...@@ -196,25 +182,29 @@ void OptimizingCompileDispatcher::InstallOptimizedFunctions() {
HandleScope handle_scope(isolate_); HandleScope handle_scope(isolate_);
for (;;) { for (;;) {
TurbofanCompilationJob* job = nullptr; std::unique_ptr<TurbofanCompilationJob> job;
{ {
base::MutexGuard access_output_queue_(&output_queue_mutex_); base::MutexGuard access_output_queue_(&output_queue_mutex_);
if (output_queue_.empty()) return; if (output_queue_.empty()) return;
job = output_queue_.front(); job.reset(output_queue_.front());
output_queue_.pop(); output_queue_.pop();
} }
OptimizedCompilationInfo* info = job->compilation_info(); OptimizedCompilationInfo* info = job->compilation_info();
Handle<JSFunction> function(*info->closure(), isolate_); Handle<JSFunction> function(*info->closure(), isolate_);
if (function->HasAvailableCodeKind(info->code_kind())) {
// If another racing task has already finished compiling and installing the
// requested code kind on the function, throw out the current job.
if (!info->is_osr() && function->HasAvailableCodeKind(info->code_kind())) {
if (FLAG_trace_concurrent_recompilation) { if (FLAG_trace_concurrent_recompilation) {
PrintF(" ** Aborting compilation for "); PrintF(" ** Aborting compilation for ");
function->ShortPrint(); function->ShortPrint();
PrintF(" as it has already been optimized.\n"); PrintF(" as it has already been optimized.\n");
} }
DisposeCompilationJob(job, false); Compiler::DisposeTurbofanCompilationJob(job.get(), false);
} else { return;
Compiler::FinalizeTurbofanCompilationJob(job, isolate_);
} }
Compiler::FinalizeTurbofanCompilationJob(job.get(), isolate_);
} }
} }
......
...@@ -275,7 +275,10 @@ void TieringManager::MaybeOptimizeFrame(JSFunction function, ...@@ -275,7 +275,10 @@ void TieringManager::MaybeOptimizeFrame(JSFunction function,
UnoptimizedFrame* frame, UnoptimizedFrame* frame,
CodeKind code_kind) { CodeKind code_kind) {
const TieringState tiering_state = function.feedback_vector().tiering_state(); const TieringState tiering_state = function.feedback_vector().tiering_state();
if (V8_UNLIKELY(IsInProgress(tiering_state))) { const TieringState osr_tiering_state =
function.feedback_vector().osr_tiering_state();
if (V8_UNLIKELY(IsInProgress(tiering_state)) ||
V8_UNLIKELY(IsInProgress(osr_tiering_state))) {
// Note: This effectively disables OSR for the function while it is being // Note: This effectively disables OSR for the function while it is being
// compiled. // compiled.
TraceInOptimizationQueue(function); TraceInOptimizationQueue(function);
......
...@@ -882,6 +882,7 @@ DEFINE_BOOL(trace_turbo_inlining, false, "trace TurboFan inlining") ...@@ -882,6 +882,7 @@ DEFINE_BOOL(trace_turbo_inlining, false, "trace TurboFan inlining")
DEFINE_BOOL(turbo_inline_array_builtins, true, DEFINE_BOOL(turbo_inline_array_builtins, true,
"inline array builtins in TurboFan code") "inline array builtins in TurboFan code")
DEFINE_BOOL(use_osr, true, "use on-stack replacement") DEFINE_BOOL(use_osr, true, "use on-stack replacement")
DEFINE_BOOL(concurrent_osr, false, "enable concurrent OSR")
DEFINE_BOOL(trace_osr, false, "trace on-stack replacement") DEFINE_BOOL(trace_osr, false, "trace on-stack replacement")
DEFINE_BOOL(analyze_environment_liveness, true, DEFINE_BOOL(analyze_environment_liveness, true,
"analyze liveness of environment slots and zap dead values") "analyze liveness of environment slots and zap dead values")
......
...@@ -104,7 +104,6 @@ inline constexpr bool CodeKindIsStoredInOptimizedCodeCache(CodeKind kind) { ...@@ -104,7 +104,6 @@ inline constexpr bool CodeKindIsStoredInOptimizedCodeCache(CodeKind kind) {
} }
inline CodeKind CodeKindForTopTier() { return CodeKind::TURBOFAN; } inline CodeKind CodeKindForTopTier() { return CodeKind::TURBOFAN; }
inline CodeKind CodeKindForOSR() { return CodeKind::TURBOFAN; }
// The dedicated CodeKindFlag enum represents all code kinds in a format // The dedicated CodeKindFlag enum represents all code kinds in a format
// suitable for bit sets. // suitable for bit sets.
......
...@@ -26,6 +26,7 @@ extern class BytecodeArray extends FixedArrayBase { ...@@ -26,6 +26,7 @@ extern class BytecodeArray extends FixedArrayBase {
// into other fields. // into other fields.
osr_urgency: int8; osr_urgency: int8;
bytecode_age: int8; bytecode_age: int8;
todo_use_me: int16; // Placeholder for osr bytecode offset bits.
} }
extern class CodeDataContainer extends HeapObject; extern class CodeDataContainer extends HeapObject;
...@@ -427,9 +427,23 @@ void FeedbackVector::set_tiering_state(TieringState state) { ...@@ -427,9 +427,23 @@ void FeedbackVector::set_tiering_state(TieringState state) {
void FeedbackVector::reset_flags() { void FeedbackVector::reset_flags() {
set_flags(TieringStateBits::encode(TieringState::kNone) | set_flags(TieringStateBits::encode(TieringState::kNone) |
OsrTieringStateBit::encode(TieringState::kNone) |
MaybeHasOptimizedCodeBit::encode(false)); MaybeHasOptimizedCodeBit::encode(false));
} }
TieringState FeedbackVector::osr_tiering_state() {
return OsrTieringStateBit::decode(flags());
}
void FeedbackVector::set_osr_tiering_state(TieringState marker) {
DCHECK(marker == TieringState::kNone || marker == TieringState::kInProgress);
STATIC_ASSERT(TieringState::kNone <= OsrTieringStateBit::kMax);
STATIC_ASSERT(TieringState::kInProgress <= OsrTieringStateBit::kMax);
int32_t state = flags();
state = OsrTieringStateBit::update(state, marker);
set_flags(state);
}
void FeedbackVector::EvictOptimizedCodeMarkedForDeoptimization( void FeedbackVector::EvictOptimizedCodeMarkedForDeoptimization(
SharedFunctionInfo shared, const char* reason) { SharedFunctionInfo shared, const char* reason) {
MaybeObject slot = maybe_optimized_code(kAcquireLoad); MaybeObject slot = maybe_optimized_code(kAcquireLoad);
......
...@@ -234,11 +234,13 @@ class FeedbackVector ...@@ -234,11 +234,13 @@ class FeedbackVector
const char* reason); const char* reason);
void ClearOptimizedCode(); void ClearOptimizedCode();
inline bool has_tiering_state() const;
inline TieringState tiering_state() const; inline TieringState tiering_state() const;
void set_tiering_state(TieringState state); void set_tiering_state(TieringState state);
void reset_tiering_state(); void reset_tiering_state();
TieringState osr_tiering_state();
void set_osr_tiering_state(TieringState marker);
void reset_flags(); void reset_flags();
// Conversion from a slot to an integer index to the underlying array. // Conversion from a slot to an integer index to the underlying array.
......
...@@ -10,7 +10,9 @@ bitfield struct FeedbackVectorFlags extends uint32 { ...@@ -10,7 +10,9 @@ bitfield struct FeedbackVectorFlags extends uint32 {
// because they flag may lag behind the actual state of the world (it will be // because they flag may lag behind the actual state of the world (it will be
// updated in time). // updated in time).
maybe_has_optimized_code: bool: 1 bit; maybe_has_optimized_code: bool: 1 bit;
all_your_bits_are_belong_to_jgruber: uint32: 28 bit; // Just one bit, since only {kNone,kInProgress} are relevant for OSR.
osr_tiering_state: TieringState: 1 bit;
all_your_bits_are_belong_to_jgruber: uint32: 27 bit;
} }
@generateBodyDescriptor @generateBodyDescriptor
......
...@@ -109,12 +109,20 @@ TieringState JSFunction::tiering_state() const { ...@@ -109,12 +109,20 @@ TieringState JSFunction::tiering_state() const {
void JSFunction::set_tiering_state(TieringState state) { void JSFunction::set_tiering_state(TieringState state) {
DCHECK(has_feedback_vector()); DCHECK(has_feedback_vector());
DCHECK(ChecksTieringState()); DCHECK(IsNone(state) || ChecksTieringState());
DCHECK(!ActiveTierIsTurbofan());
feedback_vector().set_tiering_state(state); feedback_vector().set_tiering_state(state);
} }
TieringState JSFunction::osr_tiering_state() {
DCHECK(has_feedback_vector());
return feedback_vector().osr_tiering_state();
}
void JSFunction::set_osr_tiering_state(TieringState marker) {
DCHECK(has_feedback_vector());
feedback_vector().set_osr_tiering_state(marker);
}
bool JSFunction::has_feedback_vector() const { bool JSFunction::has_feedback_vector() const {
return shared().is_compiled() && return shared().is_compiled() &&
raw_feedback_cell().value().IsFeedbackVector(); raw_feedback_cell().value().IsFeedbackVector();
......
...@@ -180,6 +180,9 @@ class JSFunction : public TorqueGeneratedJSFunction< ...@@ -180,6 +180,9 @@ class JSFunction : public TorqueGeneratedJSFunction<
void MarkForOptimization(Isolate* isolate, CodeKind target_kind, void MarkForOptimization(Isolate* isolate, CodeKind target_kind,
ConcurrencyMode mode); ConcurrencyMode mode);
inline TieringState osr_tiering_state();
inline void set_osr_tiering_state(TieringState marker);
// Sets the interrupt budget based on whether the function has a feedback // Sets the interrupt budget based on whether the function has a feedback
// vector and any optimized code. // vector and any optimized code.
void SetInterruptBudget(Isolate* isolate); void SetInterruptBudget(Isolate* isolate);
......
...@@ -225,64 +225,6 @@ RUNTIME_FUNCTION(Runtime_VerifyType) { ...@@ -225,64 +225,6 @@ RUNTIME_FUNCTION(Runtime_VerifyType) {
return *obj; return *obj;
} }
namespace {
bool IsSuitableForOnStackReplacement(Isolate* isolate,
Handle<JSFunction> function) {
// Don't OSR during serialization.
if (isolate->serializer_enabled()) return false;
// Keep track of whether we've succeeded in optimizing.
if (function->shared().optimization_disabled()) return false;
// TODO(chromium:1031479): Currently, OSR triggering mechanism is tied to the
// bytecode array. So, it might be possible to mark closure in one native
// context and optimize a closure from a different native context. So check if
// there is a feedback vector before OSRing. We don't expect this to happen
// often.
if (!function->has_feedback_vector()) return false;
// If we are trying to do OSR when there are already optimized
// activations of the function, it means (a) the function is directly or
// indirectly recursive and (b) an optimized invocation has been
// deoptimized so that we are currently in an unoptimized activation.
// Check for optimized activations of this function.
for (JavaScriptFrameIterator it(isolate); !it.done(); it.Advance()) {
JavaScriptFrame* frame = it.frame();
if (frame->is_optimized() && frame->function() == *function) return false;
}
return true;
}
BytecodeOffset DetermineEntryAndDisarmOSRForUnoptimized(
JavaScriptFrame* js_frame) {
UnoptimizedFrame* frame = reinterpret_cast<UnoptimizedFrame*>(js_frame);
// Note that the bytecode array active on the stack might be different from
// the one installed on the function (e.g. patched by debugger). This however
// is fine because we guarantee the layout to be in sync, hence any
// BytecodeOffset representing the entry point will be valid for any copy of
// the bytecode.
Handle<BytecodeArray> bytecode(frame->GetBytecodeArray(), frame->isolate());
DCHECK_IMPLIES(frame->is_interpreted(),
frame->LookupCode().is_interpreter_trampoline_builtin());
DCHECK_IMPLIES(frame->is_baseline(),
frame->LookupCode().kind() == CodeKind::BASELINE);
DCHECK(frame->is_unoptimized());
DCHECK(frame->function().shared().HasBytecodeArray());
// Disarm all back edges.
bytecode->reset_osr_urgency();
// Return a BytecodeOffset representing the bytecode offset of the back
// branch.
return BytecodeOffset(frame->GetBytecodeOffset());
}
} // namespace
RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) { RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) {
HandleScope handle_scope(isolate); HandleScope handle_scope(isolate);
DCHECK_EQ(0, args.length()); DCHECK_EQ(0, args.length());
...@@ -290,37 +232,33 @@ RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) { ...@@ -290,37 +232,33 @@ RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) {
// Determine the frame that triggered the OSR request. // Determine the frame that triggered the OSR request.
JavaScriptFrameIterator it(isolate); JavaScriptFrameIterator it(isolate);
JavaScriptFrame* frame = it.frame(); UnoptimizedFrame* frame = UnoptimizedFrame::cast(it.frame());
DCHECK(frame->is_unoptimized());
// Determine the entry point for which this OSR request has been fired and DCHECK_IMPLIES(frame->is_interpreted(),
// also disarm all back edges in the calling code to stop new requests. frame->LookupCode().is_interpreter_trampoline_builtin());
BytecodeOffset osr_offset = DetermineEntryAndDisarmOSRForUnoptimized(frame); DCHECK_IMPLIES(frame->is_baseline(),
frame->LookupCode().kind() == CodeKind::BASELINE);
DCHECK(frame->function().shared().HasBytecodeArray());
// Determine the entry point for which this OSR request has been fired.
BytecodeOffset osr_offset = BytecodeOffset(frame->GetBytecodeOffset());
DCHECK(!osr_offset.IsNone()); DCHECK(!osr_offset.IsNone());
MaybeHandle<CodeT> maybe_result; // TODO(v8:12161): If cache exists with different offset: kSynchronous.
ConcurrencyMode mode =
isolate->concurrent_recompilation_enabled() && FLAG_concurrent_osr
? ConcurrencyMode::kConcurrent
: ConcurrencyMode::kSynchronous;
Handle<JSFunction> function(frame->function(), isolate); Handle<JSFunction> function(frame->function(), isolate);
if (IsSuitableForOnStackReplacement(isolate, function)) { MaybeHandle<CodeT> maybe_result =
if (FLAG_trace_osr) { Compiler::CompileOptimizedOSR(isolate, function, osr_offset, frame, mode);
CodeTracer::Scope scope(isolate->GetCodeTracer());
PrintF(scope.file(), "[OSR - Compiling: ");
function->PrintName(scope.file());
PrintF(scope.file(), " at OSR bytecode offset %d]\n", osr_offset.ToInt());
}
maybe_result =
Compiler::GetOptimizedCodeForOSR(isolate, function, osr_offset, frame);
}
Handle<CodeT> result; Handle<CodeT> result;
if (!maybe_result.ToHandle(&result)) { if (!maybe_result.ToHandle(&result)) {
// No OSR'd code available. // No OSR'd code available.
if (FLAG_trace_osr) { // TODO(v8:12161): Distinguish between actual failure and scheduling a
CodeTracer::Scope scope(isolate->GetCodeTracer()); // concurrent job.
PrintF(scope.file(), "[OSR - Failed: ");
function->PrintName(scope.file());
PrintF(scope.file(), " at OSR bytecode offset %d]\n", osr_offset.ToInt());
}
if (!function->HasAttachedOptimizedCode()) { if (!function->HasAttachedOptimizedCode()) {
function->set_code(function->shared().GetCode(), kReleaseStore); function->set_code(function->shared().GetCode(), kReleaseStore);
} }
...@@ -329,7 +267,7 @@ RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) { ...@@ -329,7 +267,7 @@ RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) {
} }
DCHECK(!result.is_null()); DCHECK(!result.is_null());
DCHECK(result->is_turbofanned()); DCHECK(result->is_turbofanned()); // TODO(v8:7700): Support Maglev.
DCHECK(CodeKindIsOptimizedJSFunction(result->kind())); DCHECK(CodeKindIsOptimizedJSFunction(result->kind()));
DeoptimizationData data = DeoptimizationData data =
...@@ -346,7 +284,11 @@ RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) { ...@@ -346,7 +284,11 @@ RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) {
} }
if (function->feedback_vector().invocation_count() <= 1 && if (function->feedback_vector().invocation_count() <= 1 &&
function->tiering_state() != TieringState::kNone) { !IsNone(function->tiering_state()) && V8_LIKELY(!FLAG_always_opt)) {
// Note: Why consider FLAG_always_opt? Because it makes invocation_count
// unreliable at low counts: the first entry may already be optimized, and
// thus won't increment invocation_count.
//
// With lazy feedback allocation we may not have feedback for the // With lazy feedback allocation we may not have feedback for the
// initial part of the function that was executed before we allocated a // initial part of the function that was executed before we allocated a
// feedback vector. Reset any tiering states for such functions. // feedback vector. Reset any tiering states for such functions.
......
...@@ -521,6 +521,47 @@ RUNTIME_FUNCTION(Runtime_PrepareFunctionForOptimization) { ...@@ -521,6 +521,47 @@ RUNTIME_FUNCTION(Runtime_PrepareFunctionForOptimization) {
return ReadOnlyRoots(isolate).undefined_value(); return ReadOnlyRoots(isolate).undefined_value();
} }
namespace {
void FinalizeOptimization(Isolate* isolate) {
DCHECK(isolate->concurrent_recompilation_enabled());
isolate->optimizing_compile_dispatcher()->AwaitCompileTasks();
isolate->optimizing_compile_dispatcher()->InstallOptimizedFunctions();
isolate->optimizing_compile_dispatcher()->set_finalize(true);
}
BytecodeOffset OffsetOfNextJumpLoop(Isolate* isolate, UnoptimizedFrame* frame) {
Handle<BytecodeArray> bytecode_array(frame->GetBytecodeArray(), isolate);
const int current_offset = frame->GetBytecodeOffset();
interpreter::BytecodeArrayIterator it(bytecode_array, current_offset);
// First, look for a loop that contains the current bytecode offset.
for (; !it.done(); it.Advance()) {
if (it.current_bytecode() != interpreter::Bytecode::kJumpLoop) {
continue;
}
if (!base::IsInRange(current_offset, it.GetJumpTargetOffset(),
it.current_offset())) {
continue;
}
return BytecodeOffset(it.current_offset());
}
// Fall back to any loop after the current offset.
it.SetOffset(current_offset);
for (; !it.done(); it.Advance()) {
if (it.current_bytecode() == interpreter::Bytecode::kJumpLoop) {
return BytecodeOffset(it.current_offset());
}
}
return BytecodeOffset::None();
}
} // namespace
RUNTIME_FUNCTION(Runtime_OptimizeOsr) { RUNTIME_FUNCTION(Runtime_OptimizeOsr) {
HandleScope handle_scope(isolate); HandleScope handle_scope(isolate);
DCHECK(args.length() == 0 || args.length() == 1); DCHECK(args.length() == 0 || args.length() == 1);
...@@ -540,7 +581,9 @@ RUNTIME_FUNCTION(Runtime_OptimizeOsr) { ...@@ -540,7 +581,9 @@ RUNTIME_FUNCTION(Runtime_OptimizeOsr) {
if (!it.done()) function = handle(it.frame()->function(), isolate); if (!it.done()) function = handle(it.frame()->function(), isolate);
if (function.is_null()) return CrashUnlessFuzzing(isolate); if (function.is_null()) return CrashUnlessFuzzing(isolate);
if (!FLAG_opt) return ReadOnlyRoots(isolate).undefined_value(); if (V8_UNLIKELY(!FLAG_opt) || V8_UNLIKELY(!FLAG_use_osr)) {
return ReadOnlyRoots(isolate).undefined_value();
}
if (!function->shared().allows_lazy_compilation()) { if (!function->shared().allows_lazy_compilation()) {
return CrashUnlessFuzzing(isolate); return CrashUnlessFuzzing(isolate);
...@@ -567,6 +610,11 @@ RUNTIME_FUNCTION(Runtime_OptimizeOsr) { ...@@ -567,6 +610,11 @@ RUNTIME_FUNCTION(Runtime_OptimizeOsr) {
return ReadOnlyRoots(isolate).undefined_value(); return ReadOnlyRoots(isolate).undefined_value();
} }
if (!it.frame()->is_unoptimized()) {
// Nothing to be done.
return ReadOnlyRoots(isolate).undefined_value();
}
// Ensure that the function is marked for non-concurrent optimization, so that // Ensure that the function is marked for non-concurrent optimization, so that
// subsequent runs don't also optimize. // subsequent runs don't also optimize.
if (FLAG_trace_osr) { if (FLAG_trace_osr) {
...@@ -581,8 +629,40 @@ RUNTIME_FUNCTION(Runtime_OptimizeOsr) { ...@@ -581,8 +629,40 @@ RUNTIME_FUNCTION(Runtime_OptimizeOsr) {
function->MarkForOptimization(isolate, CodeKind::TURBOFAN, function->MarkForOptimization(isolate, CodeKind::TURBOFAN,
ConcurrencyMode::kSynchronous); ConcurrencyMode::kSynchronous);
if (it.frame()->is_unoptimized()) {
isolate->tiering_manager()->RequestOsrAtNextOpportunity(*function); isolate->tiering_manager()->RequestOsrAtNextOpportunity(*function);
// If concurrent OSR is enabled, the testing workflow is a bit tricky. We
// must guarantee that the next JumpLoop installs the finished OSR'd code
// object, but we still want to exercise concurrent code paths. To do so,
// we attempt to find the next JumpLoop, start an OSR job for it now, and
// immediately force finalization.
// If this succeeds and we correctly match up the next JumpLoop, once we
// reach the JumpLoop we'll hit the OSR cache and install the generated code.
// If not (e.g. because we enter a nested loop first), the next JumpLoop will
// see the cached OSR code with a mismatched offset, and trigger
// non-concurrent OSR compilation and installation.
if (isolate->concurrent_recompilation_enabled() && FLAG_concurrent_osr) {
const BytecodeOffset osr_offset =
OffsetOfNextJumpLoop(isolate, UnoptimizedFrame::cast(it.frame()));
if (osr_offset.IsNone()) {
// The loop may have been elided by bytecode generation (e.g. for
// patterns such as `do { ... } while (false);`.
return ReadOnlyRoots(isolate).undefined_value();
}
// Finalize first to ensure all pending tasks are done (since we can't
// queue more than one OSR job for each function).
FinalizeOptimization(isolate);
// Queue the job.
auto unused_result = Compiler::CompileOptimizedOSR(
isolate, function, osr_offset, UnoptimizedFrame::cast(it.frame()),
ConcurrencyMode::kConcurrent);
USE(unused_result);
// Finalize again to finish the queued job. The next call into
// CompileForOnStackReplacement will pick up the cached Code object.
FinalizeOptimization(isolate);
} }
return ReadOnlyRoots(isolate).undefined_value(); return ReadOnlyRoots(isolate).undefined_value();
...@@ -746,9 +826,7 @@ RUNTIME_FUNCTION(Runtime_WaitForBackgroundOptimization) { ...@@ -746,9 +826,7 @@ RUNTIME_FUNCTION(Runtime_WaitForBackgroundOptimization) {
RUNTIME_FUNCTION(Runtime_FinalizeOptimization) { RUNTIME_FUNCTION(Runtime_FinalizeOptimization) {
DCHECK_EQ(0, args.length()); DCHECK_EQ(0, args.length());
if (isolate->concurrent_recompilation_enabled()) { if (isolate->concurrent_recompilation_enabled()) {
isolate->optimizing_compile_dispatcher()->AwaitCompileTasks(); FinalizeOptimization(isolate);
isolate->optimizing_compile_dispatcher()->InstallOptimizedFunctions();
isolate->optimizing_compile_dispatcher()->set_finalize(true);
} }
return ReadOnlyRoots(isolate).undefined_value(); return ReadOnlyRoots(isolate).undefined_value();
} }
......
...@@ -490,10 +490,10 @@ V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream& os, FeedbackSlot); ...@@ -490,10 +490,10 @@ V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream& os, FeedbackSlot);
class BytecodeOffset { class BytecodeOffset {
public: public:
explicit BytecodeOffset(int id) : id_(id) {} explicit constexpr BytecodeOffset(int id) : id_(id) {}
int ToInt() const { return id_; } int ToInt() const { return id_; }
static BytecodeOffset None() { return BytecodeOffset(kNoneId); } static constexpr BytecodeOffset None() { return BytecodeOffset(kNoneId); }
// Special bailout id support for deopting into the {JSConstructStub} stub. // Special bailout id support for deopting into the {JSConstructStub} stub.
// The following hard-coded deoptimization points are supported by the stub: // The following hard-coded deoptimization points are supported by the stub:
...@@ -506,7 +506,7 @@ class BytecodeOffset { ...@@ -506,7 +506,7 @@ class BytecodeOffset {
id_ == ConstructStubInvoke().ToInt(); id_ == ConstructStubInvoke().ToInt();
} }
bool IsNone() const { return id_ == kNoneId; } constexpr bool IsNone() const { return id_ == kNoneId; }
bool operator==(const BytecodeOffset& other) const { bool operator==(const BytecodeOffset& other) const {
return id_ == other.id_; return id_ == other.id_;
} }
......
// Copyright 2016 the V8 project authors. All rights reserved. // Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
//
// Flags: --allow-natives-syntax --opt // Flags: --allow-natives-syntax --opt --no-use-osr
//
// Why not OSR? Because it may inline the `store` function into OSR'd code
// below before it has a chance to be optimized, making
// `assertOptimized(store)` fail.
function load(o) { function load(o) {
return o.x; return o.x;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment