Commit d187c6c2 authored by Jakob Gruber's avatar Jakob Gruber Committed by V8 LUCI CQ

Reland "[osr] Basic support for concurrent OSR"

This is a reland of commit 3ce690ee

Changed for the reland:
- Remove the currently-unused BytecodeArray member to avoid MSAN
  failures.
- s/return/continue/ in optimizing-compile-dispatcher.

Original change's description:
> [osr] Basic support for concurrent OSR
>
> This CL adds basic support behind --concurrent-osr,
> disabled by default.
>
> When enabled:
> 1) the first OSR request starts a concurrent OSR compile job.
> 2) on completion, the code object is inserted into the OSR cache.
> 3) the next OSR request picks up the cached code (assuming the request
>    came from the same JumpLoop bytecode).
>
> We add a new osr optimization marker on the feedback vector to
> track whether an OSR compile is currently in progress.
>
> One fundamental issue remains: step 3) above is not guaranteed to
> hit the same JumpLoop, and a mismatch means the OSR'd code cannot
> be installed. This will be addressed in a followup by targeting
> specific bytecode offsets for the install request.
>
> This change is based on fanchen.kong@intel.com's earlier
> change crrev.com/c/3369361, thank you!
>
> Bug: v8:12161
> Change-Id: Ib162906dd4b6ba056f62870aea2990f1369df235
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3548820
> Reviewed-by: Leszek Swirski <leszeks@chromium.org>
> Commit-Queue: Jakob Linke <jgruber@chromium.org>
> Cr-Commit-Position: refs/heads/main@{#79685}

Bug: v8:12161
Change-Id: I48b100e5980c909ec5e79d190aaea730c83e9386
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3565720Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Commit-Queue: Jakob Linke <jgruber@chromium.org>
Auto-Submit: Jakob Linke <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/main@{#79746}
parent 334016ac
This diff is collapsed.
...@@ -30,26 +30,20 @@ namespace internal { ...@@ -30,26 +30,20 @@ namespace internal {
// Forward declarations. // Forward declarations.
class AlignedCachedData; class AlignedCachedData;
class AstRawString;
class BackgroundCompileTask; class BackgroundCompileTask;
class IsCompiledScope; class IsCompiledScope;
class JavaScriptFrame;
class OptimizedCompilationInfo; class OptimizedCompilationInfo;
class OptimizedCompilationJob;
class ParseInfo; class ParseInfo;
class Parser;
class RuntimeCallStats; class RuntimeCallStats;
class TimedHistogram; class TimedHistogram;
class TurbofanCompilationJob; class TurbofanCompilationJob;
class UnoptimizedCompilationInfo; class UnoptimizedCompilationInfo;
class UnoptimizedCompilationJob; class UnoptimizedCompilationJob;
class UnoptimizedFrame;
class WorkerThreadRuntimeCallStats; class WorkerThreadRuntimeCallStats;
struct ScriptDetails; struct ScriptDetails;
struct ScriptStreamingData; struct ScriptStreamingData;
using UnoptimizedCompilationJobList =
std::forward_list<std::unique_ptr<UnoptimizedCompilationJob>>;
// The V8 compiler API. // The V8 compiler API.
// //
// This is the central hub for dispatching to the various compilers within V8. // This is the central hub for dispatching to the various compilers within V8.
...@@ -97,6 +91,13 @@ class V8_EXPORT_PRIVATE Compiler : public AllStatic { ...@@ -97,6 +91,13 @@ class V8_EXPORT_PRIVATE Compiler : public AllStatic {
static void CompileOptimized(Isolate* isolate, Handle<JSFunction> function, static void CompileOptimized(Isolate* isolate, Handle<JSFunction> function,
ConcurrencyMode mode, CodeKind code_kind); ConcurrencyMode mode, CodeKind code_kind);
// Generate and return optimized code for OSR. The empty handle is returned
// either on failure, or after spawning a concurrent OSR task (in which case
// a future OSR request will pick up the resulting code object).
V8_WARN_UNUSED_RESULT static MaybeHandle<CodeT> CompileOptimizedOSR(
Isolate* isolate, Handle<JSFunction> function, BytecodeOffset osr_offset,
UnoptimizedFrame* frame, ConcurrencyMode mode);
V8_WARN_UNUSED_RESULT static MaybeHandle<SharedFunctionInfo> V8_WARN_UNUSED_RESULT static MaybeHandle<SharedFunctionInfo>
CompileForLiveEdit(ParseInfo* parse_info, Handle<Script> script, CompileForLiveEdit(ParseInfo* parse_info, Handle<Script> script,
Isolate* isolate); Isolate* isolate);
...@@ -112,6 +113,10 @@ class V8_EXPORT_PRIVATE Compiler : public AllStatic { ...@@ -112,6 +113,10 @@ class V8_EXPORT_PRIVATE Compiler : public AllStatic {
Isolate* isolate, Isolate* isolate,
ClearExceptionFlag flag); ClearExceptionFlag flag);
// Dispose a job without finalization.
static void DisposeTurbofanCompilationJob(TurbofanCompilationJob* job,
bool restore_function_code);
// Finalize and install Turbofan code from a previously run job. // Finalize and install Turbofan code from a previously run job.
static bool FinalizeTurbofanCompilationJob(TurbofanCompilationJob* job, static bool FinalizeTurbofanCompilationJob(TurbofanCompilationJob* job,
Isolate* isolate); Isolate* isolate);
...@@ -223,20 +228,6 @@ class V8_EXPORT_PRIVATE Compiler : public AllStatic { ...@@ -223,20 +228,6 @@ class V8_EXPORT_PRIVATE Compiler : public AllStatic {
static Handle<SharedFunctionInfo> GetSharedFunctionInfo(FunctionLiteral* node, static Handle<SharedFunctionInfo> GetSharedFunctionInfo(FunctionLiteral* node,
Handle<Script> script, Handle<Script> script,
IsolateT* isolate); IsolateT* isolate);
// ===========================================================================
// The following family of methods provides support for OSR. Code generated
// for entry via OSR might not be suitable for normal entry, hence will be
// returned directly to the caller.
//
// Please note this interface is the only part dealing with {Code} objects
// directly. Other methods are agnostic to {Code} and can use an interpreter
// instead of generating JIT code for a function at all.
// Generate and return optimized code for OSR, or empty handle on failure.
V8_WARN_UNUSED_RESULT static MaybeHandle<CodeT> GetOptimizedCodeForOSR(
Isolate* isolate, Handle<JSFunction> function, BytecodeOffset osr_offset,
JavaScriptFrame* osr_frame);
}; };
// A base class for compilation jobs intended to run concurrent to the main // A base class for compilation jobs intended to run concurrent to the main
......
...@@ -9,35 +9,20 @@ ...@@ -9,35 +9,20 @@
#include "src/codegen/optimized-compilation-info.h" #include "src/codegen/optimized-compilation-info.h"
#include "src/execution/isolate.h" #include "src/execution/isolate.h"
#include "src/execution/local-isolate.h" #include "src/execution/local-isolate.h"
#include "src/handles/handles-inl.h"
#include "src/heap/local-heap.h" #include "src/heap/local-heap.h"
#include "src/heap/parked-scope.h" #include "src/heap/parked-scope.h"
#include "src/init/v8.h" #include "src/init/v8.h"
#include "src/logging/counters.h" #include "src/logging/counters.h"
#include "src/logging/log.h" #include "src/logging/log.h"
#include "src/logging/runtime-call-stats-scope.h" #include "src/logging/runtime-call-stats-scope.h"
#include "src/objects/objects-inl.h" #include "src/objects/js-function.h"
#include "src/tasks/cancelable-task.h" #include "src/tasks/cancelable-task.h"
#include "src/tracing/trace-event.h" #include "src/tracing/trace-event.h"
namespace v8 { namespace v8 {
namespace internal { namespace internal {
namespace {
void DisposeCompilationJob(TurbofanCompilationJob* job,
bool restore_function_code) {
if (restore_function_code) {
Handle<JSFunction> function = job->compilation_info()->closure();
function->set_code(function->shared().GetCode(), kReleaseStore);
if (IsInProgress(function->tiering_state())) {
function->reset_tiering_state();
}
}
delete job;
}
} // namespace
class OptimizingCompileDispatcher::CompileTask : public CancelableTask { class OptimizingCompileDispatcher::CompileTask : public CancelableTask {
public: public:
explicit CompileTask(Isolate* isolate, explicit CompileTask(Isolate* isolate,
...@@ -129,26 +114,27 @@ void OptimizingCompileDispatcher::CompileNext(TurbofanCompilationJob* job, ...@@ -129,26 +114,27 @@ void OptimizingCompileDispatcher::CompileNext(TurbofanCompilationJob* job,
void OptimizingCompileDispatcher::FlushOutputQueue(bool restore_function_code) { void OptimizingCompileDispatcher::FlushOutputQueue(bool restore_function_code) {
for (;;) { for (;;) {
TurbofanCompilationJob* job = nullptr; std::unique_ptr<TurbofanCompilationJob> job;
{ {
base::MutexGuard access_output_queue_(&output_queue_mutex_); base::MutexGuard access_output_queue_(&output_queue_mutex_);
if (output_queue_.empty()) return; if (output_queue_.empty()) return;
job = output_queue_.front(); job.reset(output_queue_.front());
output_queue_.pop(); output_queue_.pop();
} }
DisposeCompilationJob(job, restore_function_code); Compiler::DisposeTurbofanCompilationJob(job.get(), restore_function_code);
} }
} }
void OptimizingCompileDispatcher::FlushInputQueue() { void OptimizingCompileDispatcher::FlushInputQueue() {
base::MutexGuard access_input_queue_(&input_queue_mutex_); base::MutexGuard access_input_queue_(&input_queue_mutex_);
while (input_queue_length_ > 0) { while (input_queue_length_ > 0) {
TurbofanCompilationJob* job = input_queue_[InputQueueIndex(0)]; std::unique_ptr<TurbofanCompilationJob> job(
input_queue_[InputQueueIndex(0)]);
DCHECK_NOT_NULL(job); DCHECK_NOT_NULL(job);
input_queue_shift_ = InputQueueIndex(1); input_queue_shift_ = InputQueueIndex(1);
input_queue_length_--; input_queue_length_--;
DisposeCompilationJob(job, true); Compiler::DisposeTurbofanCompilationJob(job.get(), true);
} }
} }
...@@ -196,25 +182,29 @@ void OptimizingCompileDispatcher::InstallOptimizedFunctions() { ...@@ -196,25 +182,29 @@ void OptimizingCompileDispatcher::InstallOptimizedFunctions() {
HandleScope handle_scope(isolate_); HandleScope handle_scope(isolate_);
for (;;) { for (;;) {
TurbofanCompilationJob* job = nullptr; std::unique_ptr<TurbofanCompilationJob> job;
{ {
base::MutexGuard access_output_queue_(&output_queue_mutex_); base::MutexGuard access_output_queue_(&output_queue_mutex_);
if (output_queue_.empty()) return; if (output_queue_.empty()) return;
job = output_queue_.front(); job.reset(output_queue_.front());
output_queue_.pop(); output_queue_.pop();
} }
OptimizedCompilationInfo* info = job->compilation_info(); OptimizedCompilationInfo* info = job->compilation_info();
Handle<JSFunction> function(*info->closure(), isolate_); Handle<JSFunction> function(*info->closure(), isolate_);
if (function->HasAvailableCodeKind(info->code_kind())) {
// If another racing task has already finished compiling and installing the
// requested code kind on the function, throw out the current job.
if (!info->is_osr() && function->HasAvailableCodeKind(info->code_kind())) {
if (FLAG_trace_concurrent_recompilation) { if (FLAG_trace_concurrent_recompilation) {
PrintF(" ** Aborting compilation for "); PrintF(" ** Aborting compilation for ");
function->ShortPrint(); function->ShortPrint();
PrintF(" as it has already been optimized.\n"); PrintF(" as it has already been optimized.\n");
} }
DisposeCompilationJob(job, false); Compiler::DisposeTurbofanCompilationJob(job.get(), false);
} else { continue;
Compiler::FinalizeTurbofanCompilationJob(job, isolate_);
} }
Compiler::FinalizeTurbofanCompilationJob(job.get(), isolate_);
} }
} }
......
...@@ -275,7 +275,10 @@ void TieringManager::MaybeOptimizeFrame(JSFunction function, ...@@ -275,7 +275,10 @@ void TieringManager::MaybeOptimizeFrame(JSFunction function,
UnoptimizedFrame* frame, UnoptimizedFrame* frame,
CodeKind code_kind) { CodeKind code_kind) {
const TieringState tiering_state = function.feedback_vector().tiering_state(); const TieringState tiering_state = function.feedback_vector().tiering_state();
if (V8_UNLIKELY(IsInProgress(tiering_state))) { const TieringState osr_tiering_state =
function.feedback_vector().osr_tiering_state();
if (V8_UNLIKELY(IsInProgress(tiering_state)) ||
V8_UNLIKELY(IsInProgress(osr_tiering_state))) {
// Note: This effectively disables OSR for the function while it is being // Note: This effectively disables OSR for the function while it is being
// compiled. // compiled.
TraceInOptimizationQueue(function); TraceInOptimizationQueue(function);
......
...@@ -882,6 +882,7 @@ DEFINE_BOOL(trace_turbo_inlining, false, "trace TurboFan inlining") ...@@ -882,6 +882,7 @@ DEFINE_BOOL(trace_turbo_inlining, false, "trace TurboFan inlining")
DEFINE_BOOL(turbo_inline_array_builtins, true, DEFINE_BOOL(turbo_inline_array_builtins, true,
"inline array builtins in TurboFan code") "inline array builtins in TurboFan code")
DEFINE_BOOL(use_osr, true, "use on-stack replacement") DEFINE_BOOL(use_osr, true, "use on-stack replacement")
DEFINE_BOOL(concurrent_osr, false, "enable concurrent OSR")
DEFINE_BOOL(trace_osr, false, "trace on-stack replacement") DEFINE_BOOL(trace_osr, false, "trace on-stack replacement")
DEFINE_BOOL(analyze_environment_liveness, true, DEFINE_BOOL(analyze_environment_liveness, true,
"analyze liveness of environment slots and zap dead values") "analyze liveness of environment slots and zap dead values")
......
...@@ -104,7 +104,6 @@ inline constexpr bool CodeKindIsStoredInOptimizedCodeCache(CodeKind kind) { ...@@ -104,7 +104,6 @@ inline constexpr bool CodeKindIsStoredInOptimizedCodeCache(CodeKind kind) {
} }
inline CodeKind CodeKindForTopTier() { return CodeKind::TURBOFAN; } inline CodeKind CodeKindForTopTier() { return CodeKind::TURBOFAN; }
inline CodeKind CodeKindForOSR() { return CodeKind::TURBOFAN; }
// The dedicated CodeKindFlag enum represents all code kinds in a format // The dedicated CodeKindFlag enum represents all code kinds in a format
// suitable for bit sets. // suitable for bit sets.
......
...@@ -427,9 +427,23 @@ void FeedbackVector::set_tiering_state(TieringState state) { ...@@ -427,9 +427,23 @@ void FeedbackVector::set_tiering_state(TieringState state) {
void FeedbackVector::reset_flags() { void FeedbackVector::reset_flags() {
set_flags(TieringStateBits::encode(TieringState::kNone) | set_flags(TieringStateBits::encode(TieringState::kNone) |
OsrTieringStateBit::encode(TieringState::kNone) |
MaybeHasOptimizedCodeBit::encode(false)); MaybeHasOptimizedCodeBit::encode(false));
} }
TieringState FeedbackVector::osr_tiering_state() {
return OsrTieringStateBit::decode(flags());
}
void FeedbackVector::set_osr_tiering_state(TieringState marker) {
DCHECK(marker == TieringState::kNone || marker == TieringState::kInProgress);
STATIC_ASSERT(TieringState::kNone <= OsrTieringStateBit::kMax);
STATIC_ASSERT(TieringState::kInProgress <= OsrTieringStateBit::kMax);
int32_t state = flags();
state = OsrTieringStateBit::update(state, marker);
set_flags(state);
}
void FeedbackVector::EvictOptimizedCodeMarkedForDeoptimization( void FeedbackVector::EvictOptimizedCodeMarkedForDeoptimization(
SharedFunctionInfo shared, const char* reason) { SharedFunctionInfo shared, const char* reason) {
MaybeObject slot = maybe_optimized_code(kAcquireLoad); MaybeObject slot = maybe_optimized_code(kAcquireLoad);
......
...@@ -234,11 +234,13 @@ class FeedbackVector ...@@ -234,11 +234,13 @@ class FeedbackVector
const char* reason); const char* reason);
void ClearOptimizedCode(); void ClearOptimizedCode();
inline bool has_tiering_state() const;
inline TieringState tiering_state() const; inline TieringState tiering_state() const;
void set_tiering_state(TieringState state); void set_tiering_state(TieringState state);
void reset_tiering_state(); void reset_tiering_state();
TieringState osr_tiering_state();
void set_osr_tiering_state(TieringState marker);
void reset_flags(); void reset_flags();
// Conversion from a slot to an integer index to the underlying array. // Conversion from a slot to an integer index to the underlying array.
......
...@@ -10,7 +10,9 @@ bitfield struct FeedbackVectorFlags extends uint32 { ...@@ -10,7 +10,9 @@ bitfield struct FeedbackVectorFlags extends uint32 {
// because they flag may lag behind the actual state of the world (it will be // because they flag may lag behind the actual state of the world (it will be
// updated in time). // updated in time).
maybe_has_optimized_code: bool: 1 bit; maybe_has_optimized_code: bool: 1 bit;
all_your_bits_are_belong_to_jgruber: uint32: 28 bit; // Just one bit, since only {kNone,kInProgress} are relevant for OSR.
osr_tiering_state: TieringState: 1 bit;
all_your_bits_are_belong_to_jgruber: uint32: 27 bit;
} }
@generateBodyDescriptor @generateBodyDescriptor
......
...@@ -109,12 +109,20 @@ TieringState JSFunction::tiering_state() const { ...@@ -109,12 +109,20 @@ TieringState JSFunction::tiering_state() const {
void JSFunction::set_tiering_state(TieringState state) { void JSFunction::set_tiering_state(TieringState state) {
DCHECK(has_feedback_vector()); DCHECK(has_feedback_vector());
DCHECK(ChecksTieringState()); DCHECK(IsNone(state) || ChecksTieringState());
DCHECK(!ActiveTierIsTurbofan());
feedback_vector().set_tiering_state(state); feedback_vector().set_tiering_state(state);
} }
TieringState JSFunction::osr_tiering_state() {
DCHECK(has_feedback_vector());
return feedback_vector().osr_tiering_state();
}
void JSFunction::set_osr_tiering_state(TieringState marker) {
DCHECK(has_feedback_vector());
feedback_vector().set_osr_tiering_state(marker);
}
bool JSFunction::has_feedback_vector() const { bool JSFunction::has_feedback_vector() const {
return shared().is_compiled() && return shared().is_compiled() &&
raw_feedback_cell().value().IsFeedbackVector(); raw_feedback_cell().value().IsFeedbackVector();
......
...@@ -180,6 +180,9 @@ class JSFunction : public TorqueGeneratedJSFunction< ...@@ -180,6 +180,9 @@ class JSFunction : public TorqueGeneratedJSFunction<
void MarkForOptimization(Isolate* isolate, CodeKind target_kind, void MarkForOptimization(Isolate* isolate, CodeKind target_kind,
ConcurrencyMode mode); ConcurrencyMode mode);
inline TieringState osr_tiering_state();
inline void set_osr_tiering_state(TieringState marker);
// Sets the interrupt budget based on whether the function has a feedback // Sets the interrupt budget based on whether the function has a feedback
// vector and any optimized code. // vector and any optimized code.
void SetInterruptBudget(Isolate* isolate); void SetInterruptBudget(Isolate* isolate);
......
...@@ -225,64 +225,6 @@ RUNTIME_FUNCTION(Runtime_VerifyType) { ...@@ -225,64 +225,6 @@ RUNTIME_FUNCTION(Runtime_VerifyType) {
return *obj; return *obj;
} }
namespace {
bool IsSuitableForOnStackReplacement(Isolate* isolate,
Handle<JSFunction> function) {
// Don't OSR during serialization.
if (isolate->serializer_enabled()) return false;
// Keep track of whether we've succeeded in optimizing.
if (function->shared().optimization_disabled()) return false;
// TODO(chromium:1031479): Currently, OSR triggering mechanism is tied to the
// bytecode array. So, it might be possible to mark closure in one native
// context and optimize a closure from a different native context. So check if
// there is a feedback vector before OSRing. We don't expect this to happen
// often.
if (!function->has_feedback_vector()) return false;
// If we are trying to do OSR when there are already optimized
// activations of the function, it means (a) the function is directly or
// indirectly recursive and (b) an optimized invocation has been
// deoptimized so that we are currently in an unoptimized activation.
// Check for optimized activations of this function.
for (JavaScriptFrameIterator it(isolate); !it.done(); it.Advance()) {
JavaScriptFrame* frame = it.frame();
if (frame->is_optimized() && frame->function() == *function) return false;
}
return true;
}
BytecodeOffset DetermineEntryAndDisarmOSRForUnoptimized(
JavaScriptFrame* js_frame) {
UnoptimizedFrame* frame = reinterpret_cast<UnoptimizedFrame*>(js_frame);
// Note that the bytecode array active on the stack might be different from
// the one installed on the function (e.g. patched by debugger). This however
// is fine because we guarantee the layout to be in sync, hence any
// BytecodeOffset representing the entry point will be valid for any copy of
// the bytecode.
Handle<BytecodeArray> bytecode(frame->GetBytecodeArray(), frame->isolate());
DCHECK_IMPLIES(frame->is_interpreted(),
frame->LookupCode().is_interpreter_trampoline_builtin());
DCHECK_IMPLIES(frame->is_baseline(),
frame->LookupCode().kind() == CodeKind::BASELINE);
DCHECK(frame->is_unoptimized());
DCHECK(frame->function().shared().HasBytecodeArray());
// Disarm all back edges.
bytecode->reset_osr_urgency();
// Return a BytecodeOffset representing the bytecode offset of the back
// branch.
return BytecodeOffset(frame->GetBytecodeOffset());
}
} // namespace
RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) { RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) {
HandleScope handle_scope(isolate); HandleScope handle_scope(isolate);
DCHECK_EQ(0, args.length()); DCHECK_EQ(0, args.length());
...@@ -290,37 +232,33 @@ RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) { ...@@ -290,37 +232,33 @@ RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) {
// Determine the frame that triggered the OSR request. // Determine the frame that triggered the OSR request.
JavaScriptFrameIterator it(isolate); JavaScriptFrameIterator it(isolate);
JavaScriptFrame* frame = it.frame(); UnoptimizedFrame* frame = UnoptimizedFrame::cast(it.frame());
DCHECK(frame->is_unoptimized());
// Determine the entry point for which this OSR request has been fired and DCHECK_IMPLIES(frame->is_interpreted(),
// also disarm all back edges in the calling code to stop new requests. frame->LookupCode().is_interpreter_trampoline_builtin());
BytecodeOffset osr_offset = DetermineEntryAndDisarmOSRForUnoptimized(frame); DCHECK_IMPLIES(frame->is_baseline(),
frame->LookupCode().kind() == CodeKind::BASELINE);
DCHECK(frame->function().shared().HasBytecodeArray());
// Determine the entry point for which this OSR request has been fired.
BytecodeOffset osr_offset = BytecodeOffset(frame->GetBytecodeOffset());
DCHECK(!osr_offset.IsNone()); DCHECK(!osr_offset.IsNone());
MaybeHandle<CodeT> maybe_result; // TODO(v8:12161): If cache exists with different offset: kSynchronous.
ConcurrencyMode mode =
isolate->concurrent_recompilation_enabled() && FLAG_concurrent_osr
? ConcurrencyMode::kConcurrent
: ConcurrencyMode::kSynchronous;
Handle<JSFunction> function(frame->function(), isolate); Handle<JSFunction> function(frame->function(), isolate);
if (IsSuitableForOnStackReplacement(isolate, function)) { MaybeHandle<CodeT> maybe_result =
if (FLAG_trace_osr) { Compiler::CompileOptimizedOSR(isolate, function, osr_offset, frame, mode);
CodeTracer::Scope scope(isolate->GetCodeTracer());
PrintF(scope.file(), "[OSR - Compiling: ");
function->PrintName(scope.file());
PrintF(scope.file(), " at OSR bytecode offset %d]\n", osr_offset.ToInt());
}
maybe_result =
Compiler::GetOptimizedCodeForOSR(isolate, function, osr_offset, frame);
}
Handle<CodeT> result; Handle<CodeT> result;
if (!maybe_result.ToHandle(&result)) { if (!maybe_result.ToHandle(&result)) {
// No OSR'd code available. // No OSR'd code available.
if (FLAG_trace_osr) { // TODO(v8:12161): Distinguish between actual failure and scheduling a
CodeTracer::Scope scope(isolate->GetCodeTracer()); // concurrent job.
PrintF(scope.file(), "[OSR - Failed: ");
function->PrintName(scope.file());
PrintF(scope.file(), " at OSR bytecode offset %d]\n", osr_offset.ToInt());
}
if (!function->HasAttachedOptimizedCode()) { if (!function->HasAttachedOptimizedCode()) {
function->set_code(function->shared().GetCode(), kReleaseStore); function->set_code(function->shared().GetCode(), kReleaseStore);
} }
...@@ -329,7 +267,7 @@ RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) { ...@@ -329,7 +267,7 @@ RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) {
} }
DCHECK(!result.is_null()); DCHECK(!result.is_null());
DCHECK(result->is_turbofanned()); DCHECK(result->is_turbofanned()); // TODO(v8:7700): Support Maglev.
DCHECK(CodeKindIsOptimizedJSFunction(result->kind())); DCHECK(CodeKindIsOptimizedJSFunction(result->kind()));
DeoptimizationData data = DeoptimizationData data =
...@@ -346,7 +284,11 @@ RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) { ...@@ -346,7 +284,11 @@ RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) {
} }
if (function->feedback_vector().invocation_count() <= 1 && if (function->feedback_vector().invocation_count() <= 1 &&
function->tiering_state() != TieringState::kNone) { !IsNone(function->tiering_state()) && V8_LIKELY(!FLAG_always_opt)) {
// Note: Why consider FLAG_always_opt? Because it makes invocation_count
// unreliable at low counts: the first entry may already be optimized, and
// thus won't increment invocation_count.
//
// With lazy feedback allocation we may not have feedback for the // With lazy feedback allocation we may not have feedback for the
// initial part of the function that was executed before we allocated a // initial part of the function that was executed before we allocated a
// feedback vector. Reset any tiering states for such functions. // feedback vector. Reset any tiering states for such functions.
......
...@@ -521,6 +521,47 @@ RUNTIME_FUNCTION(Runtime_PrepareFunctionForOptimization) { ...@@ -521,6 +521,47 @@ RUNTIME_FUNCTION(Runtime_PrepareFunctionForOptimization) {
return ReadOnlyRoots(isolate).undefined_value(); return ReadOnlyRoots(isolate).undefined_value();
} }
namespace {
void FinalizeOptimization(Isolate* isolate) {
DCHECK(isolate->concurrent_recompilation_enabled());
isolate->optimizing_compile_dispatcher()->AwaitCompileTasks();
isolate->optimizing_compile_dispatcher()->InstallOptimizedFunctions();
isolate->optimizing_compile_dispatcher()->set_finalize(true);
}
BytecodeOffset OffsetOfNextJumpLoop(Isolate* isolate, UnoptimizedFrame* frame) {
Handle<BytecodeArray> bytecode_array(frame->GetBytecodeArray(), isolate);
const int current_offset = frame->GetBytecodeOffset();
interpreter::BytecodeArrayIterator it(bytecode_array, current_offset);
// First, look for a loop that contains the current bytecode offset.
for (; !it.done(); it.Advance()) {
if (it.current_bytecode() != interpreter::Bytecode::kJumpLoop) {
continue;
}
if (!base::IsInRange(current_offset, it.GetJumpTargetOffset(),
it.current_offset())) {
continue;
}
return BytecodeOffset(it.current_offset());
}
// Fall back to any loop after the current offset.
it.SetOffset(current_offset);
for (; !it.done(); it.Advance()) {
if (it.current_bytecode() == interpreter::Bytecode::kJumpLoop) {
return BytecodeOffset(it.current_offset());
}
}
return BytecodeOffset::None();
}
} // namespace
RUNTIME_FUNCTION(Runtime_OptimizeOsr) { RUNTIME_FUNCTION(Runtime_OptimizeOsr) {
HandleScope handle_scope(isolate); HandleScope handle_scope(isolate);
DCHECK(args.length() == 0 || args.length() == 1); DCHECK(args.length() == 0 || args.length() == 1);
...@@ -540,7 +581,9 @@ RUNTIME_FUNCTION(Runtime_OptimizeOsr) { ...@@ -540,7 +581,9 @@ RUNTIME_FUNCTION(Runtime_OptimizeOsr) {
if (!it.done()) function = handle(it.frame()->function(), isolate); if (!it.done()) function = handle(it.frame()->function(), isolate);
if (function.is_null()) return CrashUnlessFuzzing(isolate); if (function.is_null()) return CrashUnlessFuzzing(isolate);
if (!FLAG_opt) return ReadOnlyRoots(isolate).undefined_value(); if (V8_UNLIKELY(!FLAG_opt) || V8_UNLIKELY(!FLAG_use_osr)) {
return ReadOnlyRoots(isolate).undefined_value();
}
if (!function->shared().allows_lazy_compilation()) { if (!function->shared().allows_lazy_compilation()) {
return CrashUnlessFuzzing(isolate); return CrashUnlessFuzzing(isolate);
...@@ -567,6 +610,11 @@ RUNTIME_FUNCTION(Runtime_OptimizeOsr) { ...@@ -567,6 +610,11 @@ RUNTIME_FUNCTION(Runtime_OptimizeOsr) {
return ReadOnlyRoots(isolate).undefined_value(); return ReadOnlyRoots(isolate).undefined_value();
} }
if (!it.frame()->is_unoptimized()) {
// Nothing to be done.
return ReadOnlyRoots(isolate).undefined_value();
}
// Ensure that the function is marked for non-concurrent optimization, so that // Ensure that the function is marked for non-concurrent optimization, so that
// subsequent runs don't also optimize. // subsequent runs don't also optimize.
if (FLAG_trace_osr) { if (FLAG_trace_osr) {
...@@ -581,8 +629,40 @@ RUNTIME_FUNCTION(Runtime_OptimizeOsr) { ...@@ -581,8 +629,40 @@ RUNTIME_FUNCTION(Runtime_OptimizeOsr) {
function->MarkForOptimization(isolate, CodeKind::TURBOFAN, function->MarkForOptimization(isolate, CodeKind::TURBOFAN,
ConcurrencyMode::kSynchronous); ConcurrencyMode::kSynchronous);
if (it.frame()->is_unoptimized()) {
isolate->tiering_manager()->RequestOsrAtNextOpportunity(*function); isolate->tiering_manager()->RequestOsrAtNextOpportunity(*function);
// If concurrent OSR is enabled, the testing workflow is a bit tricky. We
// must guarantee that the next JumpLoop installs the finished OSR'd code
// object, but we still want to exercise concurrent code paths. To do so,
// we attempt to find the next JumpLoop, start an OSR job for it now, and
// immediately force finalization.
// If this succeeds and we correctly match up the next JumpLoop, once we
// reach the JumpLoop we'll hit the OSR cache and install the generated code.
// If not (e.g. because we enter a nested loop first), the next JumpLoop will
// see the cached OSR code with a mismatched offset, and trigger
// non-concurrent OSR compilation and installation.
if (isolate->concurrent_recompilation_enabled() && FLAG_concurrent_osr) {
const BytecodeOffset osr_offset =
OffsetOfNextJumpLoop(isolate, UnoptimizedFrame::cast(it.frame()));
if (osr_offset.IsNone()) {
// The loop may have been elided by bytecode generation (e.g. for
// patterns such as `do { ... } while (false);`.
return ReadOnlyRoots(isolate).undefined_value();
}
// Finalize first to ensure all pending tasks are done (since we can't
// queue more than one OSR job for each function).
FinalizeOptimization(isolate);
// Queue the job.
auto unused_result = Compiler::CompileOptimizedOSR(
isolate, function, osr_offset, UnoptimizedFrame::cast(it.frame()),
ConcurrencyMode::kConcurrent);
USE(unused_result);
// Finalize again to finish the queued job. The next call into
// CompileForOnStackReplacement will pick up the cached Code object.
FinalizeOptimization(isolate);
} }
return ReadOnlyRoots(isolate).undefined_value(); return ReadOnlyRoots(isolate).undefined_value();
...@@ -748,9 +828,7 @@ RUNTIME_FUNCTION(Runtime_WaitForBackgroundOptimization) { ...@@ -748,9 +828,7 @@ RUNTIME_FUNCTION(Runtime_WaitForBackgroundOptimization) {
RUNTIME_FUNCTION(Runtime_FinalizeOptimization) { RUNTIME_FUNCTION(Runtime_FinalizeOptimization) {
DCHECK_EQ(0, args.length()); DCHECK_EQ(0, args.length());
if (isolate->concurrent_recompilation_enabled()) { if (isolate->concurrent_recompilation_enabled()) {
isolate->optimizing_compile_dispatcher()->AwaitCompileTasks(); FinalizeOptimization(isolate);
isolate->optimizing_compile_dispatcher()->InstallOptimizedFunctions();
isolate->optimizing_compile_dispatcher()->set_finalize(true);
} }
return ReadOnlyRoots(isolate).undefined_value(); return ReadOnlyRoots(isolate).undefined_value();
} }
......
...@@ -490,10 +490,10 @@ V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream& os, FeedbackSlot); ...@@ -490,10 +490,10 @@ V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream& os, FeedbackSlot);
class BytecodeOffset { class BytecodeOffset {
public: public:
explicit BytecodeOffset(int id) : id_(id) {} explicit constexpr BytecodeOffset(int id) : id_(id) {}
int ToInt() const { return id_; } int ToInt() const { return id_; }
static BytecodeOffset None() { return BytecodeOffset(kNoneId); } static constexpr BytecodeOffset None() { return BytecodeOffset(kNoneId); }
// Special bailout id support for deopting into the {JSConstructStub} stub. // Special bailout id support for deopting into the {JSConstructStub} stub.
// The following hard-coded deoptimization points are supported by the stub: // The following hard-coded deoptimization points are supported by the stub:
...@@ -506,7 +506,7 @@ class BytecodeOffset { ...@@ -506,7 +506,7 @@ class BytecodeOffset {
id_ == ConstructStubInvoke().ToInt(); id_ == ConstructStubInvoke().ToInt();
} }
bool IsNone() const { return id_ == kNoneId; } constexpr bool IsNone() const { return id_ == kNoneId; }
bool operator==(const BytecodeOffset& other) const { bool operator==(const BytecodeOffset& other) const {
return id_ == other.id_; return id_ == other.id_;
} }
......
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Flags: --allow-natives-syntax
function __getProperties(obj) {
let properties = [];
for (let name of Object.getOwnPropertyNames(obj)) {
properties.push(name);
}
return properties;
}
function* __getObjects(root = this, level = 0) {
if (level > 4) return;
let obj_names = __getProperties(root);
for (let obj_name of obj_names) {
let obj = root[obj_name];
yield* __getObjects(obj, level + 1);
}
}
function __getRandomObject() {
for (let obj of __getObjects()) {}
}
%PrepareFunctionForOptimization(__f_23);
%OptimizeFunctionOnNextCall(__f_23);
try {
__getRandomObject(), {};
} catch (e) {}
function __f_23(__v_93) {
var __v_95 = "x";
return __v_93[__v_95] + __v_94[__v_95];
}
%PrepareFunctionForOptimization(__f_23);
try {
__f_23();
} catch (e) {}
try {
%OptimizeFunctionOnNextCall(__f_23);
__f_23();
} catch (e) {}
%DisableOptimizationFinalization();
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Flags: --stress-wasm-code-gc --gc-interval=46 --cache=code --no-lazy
// No contents - just the flag combination above triggered the MSAN failure.
// Copyright 2016 the V8 project authors. All rights reserved. // Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
//
// Flags: --allow-natives-syntax --opt // Flags: --allow-natives-syntax --opt --no-use-osr
//
// Why not OSR? Because it may inline the `store` function into OSR'd code
// below before it has a chance to be optimized, making
// `assertOptimized(store)` fail.
function load(o) { function load(o) {
return o.x; return o.x;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment