Commit 070e3b0a authored by yangguo@chromium.org's avatar yangguo@chromium.org

Introduce concurrent on-stack replacement.

Currently disabled behind --concurrent-osr.

R=titzer@chromium.org
BUG=

Review URL: https://codereview.chromium.org/23710014

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@16527 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 45d6ef06
This diff is collapsed.
......@@ -308,6 +308,14 @@ class CompilationInfo {
return abort_due_to_dependency_;
}
void set_osr_pc_offset(uint32_t pc_offset) {
osr_pc_offset_ = pc_offset;
}
bool HasSameOsrEntry(Handle<JSFunction> function, uint32_t pc_offset) {
return osr_pc_offset_ == pc_offset && function.is_identical_to(closure_);
}
protected:
CompilationInfo(Handle<Script> script,
Zone* zone);
......@@ -402,6 +410,9 @@ class CompilationInfo {
// Compilation mode flag and whether deoptimization is allowed.
Mode mode_;
BailoutId osr_ast_id_;
// The pc_offset corresponding to osr_ast_id_ in unoptimized code.
// We can look this up in the back edge table, but cache it for quick access.
uint32_t osr_pc_offset_;
// Flag whether compilation needs to be aborted due to dependency change.
bool abort_due_to_dependency_;
......@@ -600,7 +611,8 @@ class Compiler : public AllStatic {
// success and false if the compilation resulted in a stack overflow.
static bool CompileLazy(CompilationInfo* info);
static void RecompileConcurrent(Handle<JSFunction> function);
static bool RecompileConcurrent(Handle<JSFunction> function,
uint32_t osr_pc_offset = 0);
// Compile a shared function info object (the function is possibly lazily
// compiled).
......@@ -613,7 +625,11 @@ class Compiler : public AllStatic {
bool is_toplevel,
Handle<Script> script);
static void InstallOptimizedCode(OptimizingCompiler* info);
static bool InstallOptimizedCode(OptimizingCompiler* info);
static BailoutId CompileForOnStackReplacement(Handle<JSFunction> function);
static BailoutId CompileForConcurrentOSR(Handle<JSFunction> function);
#ifdef ENABLE_DEBUGGER_SUPPORT
static bool MakeCodeForLiveEdit(CompilationInfo* info);
......
......@@ -2591,71 +2591,68 @@ bool Deoptimizer::DoOsrTranslateCommand(TranslationIterator* iterator,
void Deoptimizer::PatchInterruptCode(Isolate* isolate,
Code* unoptimized_code) {
Code* unoptimized) {
DisallowHeapAllocation no_gc;
Code* replacement_code =
isolate->builtins()->builtin(Builtins::kOnStackReplacement);
// Iterate over the back edge table and patch every interrupt
// call to an unconditional call to the replacement code.
int loop_nesting_level = unoptimized_code->allow_osr_at_loop_nesting_level();
int loop_nesting_level = unoptimized->allow_osr_at_loop_nesting_level();
for (FullCodeGenerator::BackEdgeTableIterator back_edges(unoptimized_code);
for (FullCodeGenerator::BackEdgeTableIterator back_edges(unoptimized, &no_gc);
!back_edges.Done();
back_edges.Next()) {
if (static_cast<int>(back_edges.loop_depth()) == loop_nesting_level) {
ASSERT_EQ(NOT_PATCHED, GetInterruptPatchState(isolate,
unoptimized_code,
unoptimized,
back_edges.pc()));
PatchInterruptCodeAt(unoptimized_code,
PatchInterruptCodeAt(unoptimized,
back_edges.pc(),
replacement_code);
}
}
unoptimized_code->set_back_edges_patched_for_osr(true);
#ifdef DEBUG
Deoptimizer::VerifyInterruptCode(
isolate, unoptimized_code, loop_nesting_level);
#endif // DEBUG
unoptimized->set_back_edges_patched_for_osr(true);
ASSERT(Deoptimizer::VerifyInterruptCode(
isolate, unoptimized, loop_nesting_level));
}
void Deoptimizer::RevertInterruptCode(Isolate* isolate,
Code* unoptimized_code) {
Code* unoptimized) {
DisallowHeapAllocation no_gc;
Code* interrupt_code =
isolate->builtins()->builtin(Builtins::kInterruptCheck);
// Iterate over the back edge table and revert the patched interrupt calls.
ASSERT(unoptimized_code->back_edges_patched_for_osr());
int loop_nesting_level = unoptimized_code->allow_osr_at_loop_nesting_level();
ASSERT(unoptimized->back_edges_patched_for_osr());
int loop_nesting_level = unoptimized->allow_osr_at_loop_nesting_level();
for (FullCodeGenerator::BackEdgeTableIterator back_edges(unoptimized_code);
for (FullCodeGenerator::BackEdgeTableIterator back_edges(unoptimized, &no_gc);
!back_edges.Done();
back_edges.Next()) {
if (static_cast<int>(back_edges.loop_depth()) <= loop_nesting_level) {
ASSERT_EQ(PATCHED_FOR_OSR, GetInterruptPatchState(isolate,
unoptimized_code,
unoptimized,
back_edges.pc()));
RevertInterruptCodeAt(unoptimized_code, back_edges.pc(), interrupt_code);
RevertInterruptCodeAt(unoptimized, back_edges.pc(), interrupt_code);
}
}
unoptimized_code->set_back_edges_patched_for_osr(false);
unoptimized_code->set_allow_osr_at_loop_nesting_level(0);
#ifdef DEBUG
unoptimized->set_back_edges_patched_for_osr(false);
unoptimized->set_allow_osr_at_loop_nesting_level(0);
// Assert that none of the back edges are patched anymore.
Deoptimizer::VerifyInterruptCode(isolate, unoptimized_code, -1);
#endif // DEBUG
ASSERT(Deoptimizer::VerifyInterruptCode(isolate, unoptimized, -1));
}
#ifdef DEBUG
void Deoptimizer::VerifyInterruptCode(Isolate* isolate,
Code* unoptimized_code,
bool Deoptimizer::VerifyInterruptCode(Isolate* isolate,
Code* unoptimized,
int loop_nesting_level) {
for (FullCodeGenerator::BackEdgeTableIterator back_edges(unoptimized_code);
DisallowHeapAllocation no_gc;
for (FullCodeGenerator::BackEdgeTableIterator back_edges(unoptimized, &no_gc);
!back_edges.Done();
back_edges.Next()) {
uint32_t loop_depth = back_edges.loop_depth();
......@@ -2664,9 +2661,10 @@ void Deoptimizer::VerifyInterruptCode(Isolate* isolate,
// have already been patched.
CHECK_EQ((static_cast<int>(loop_depth) <= loop_nesting_level),
GetInterruptPatchState(isolate,
unoptimized_code,
unoptimized,
back_edges.pc()) != NOT_PATCHED);
}
return true;
}
#endif // DEBUG
......
......@@ -262,7 +262,7 @@ class Deoptimizer : public Malloced {
Address pc_after);
// Verify that all back edges of a certain loop depth are patched.
static void VerifyInterruptCode(Isolate* isolate,
static bool VerifyInterruptCode(Isolate* isolate,
Code* unoptimized_code,
int loop_nesting_level);
#endif // DEBUG
......
......@@ -331,6 +331,8 @@ DEFINE_int(concurrent_recompilation_queue_length, 8,
"the length of the concurrent compilation queue")
DEFINE_int(concurrent_recompilation_delay, 0,
"artificial compilation delay in ms")
DEFINE_bool(concurrent_osr, false,
"concurrent on-stack replacement")
DEFINE_bool(omit_map_checks_for_leaf_maps, true,
"do not emit check maps for constant values that have a leaf map, "
......
......@@ -141,7 +141,8 @@ class FullCodeGenerator: public AstVisitor {
class BackEdgeTableIterator {
public:
explicit BackEdgeTableIterator(Code* unoptimized) {
explicit BackEdgeTableIterator(Code* unoptimized,
DisallowHeapAllocation* required) {
ASSERT(unoptimized->kind() == Code::FUNCTION);
instruction_start_ = unoptimized->instruction_start();
cursor_ = instruction_start_ + unoptimized->back_edge_table_offset();
......@@ -192,7 +193,6 @@ class FullCodeGenerator: public AstVisitor {
Address end_;
Address instruction_start_;
uint32_t table_length_;
DisallowHeapAllocation no_gc_while_iterating_over_raw_addresses_;
DISALLOW_COPY_AND_ASSIGN(BackEdgeTableIterator);
};
......
......@@ -10376,6 +10376,18 @@ void Code::ClearTypeFeedbackCells(Heap* heap) {
}
BailoutId Code::TranslatePcOffsetToAstId(uint32_t pc_offset) {
DisallowHeapAllocation no_gc;
ASSERT(kind() == FUNCTION);
for (FullCodeGenerator::BackEdgeTableIterator it(this, &no_gc);
!it.Done();
it.Next()) {
if (it.pc_offset() == pc_offset) return it.ast_id();
}
return BailoutId::None();
}
bool Code::allowed_in_shared_map_code_cache() {
return is_keyed_load_stub() || is_keyed_store_stub() ||
(is_compare_ic_stub() &&
......@@ -10836,7 +10848,8 @@ void Code::Disassemble(const char* name, FILE* out) {
// If there is no back edge table, the "table start" will be at or after
// (due to alignment) the end of the instruction stream.
if (static_cast<int>(offset) < instruction_size()) {
FullCodeGenerator::BackEdgeTableIterator back_edges(this);
DisallowHeapAllocation no_gc;
FullCodeGenerator::BackEdgeTableIterator back_edges(this, &no_gc);
PrintF(out, "Back edges (size = %u)\n", back_edges.table_length());
PrintF(out, "ast_id pc_offset loop_depth\n");
......
......@@ -5107,6 +5107,8 @@ class Code: public HeapObject {
void ClearInlineCaches();
void ClearTypeFeedbackCells(Heap* heap);
BailoutId TranslatePcOffsetToAstId(uint32_t pc_offset);
#define DECLARE_CODE_AGE_ENUM(X) k##X##CodeAge,
enum Age {
kNoAge = 0,
......
......@@ -108,12 +108,18 @@ void OptimizingCompilerThread::CompileNext() {
// The function may have already been optimized by OSR. Simply continue.
// Use a mutex to make sure that functions marked for install
// are always also queued.
if (!optimizing_compiler->info()->osr_ast_id().IsNone()) {
ASSERT(FLAG_concurrent_osr);
LockGuard<Mutex> access_osr_lists(&osr_list_mutex_);
osr_candidates_.RemoveElement(optimizing_compiler);
ready_for_osr_.Add(optimizing_compiler);
} else {
LockGuard<Mutex> mark_and_queue(&install_mutex_);
{ Heap::RelocationLock relocation_lock(isolate_->heap());
Heap::RelocationLock relocation_lock(isolate_->heap());
AllowHandleDereference ahd;
optimizing_compiler->info()->closure()->MarkForInstallingRecompiledCode();
}
output_queue_.Enqueue(optimizing_compiler);
}
}
......@@ -145,6 +151,9 @@ void OptimizingCompilerThread::FlushOutputQueue(bool restore_function_code) {
}
delete info;
}
osr_candidates_.Clear();
RemoveStaleOSRCandidates(0);
}
......@@ -179,6 +188,10 @@ void OptimizingCompilerThread::Stop() {
PrintF(" ** Compiler thread did %.2f%% useful work\n", percentage);
}
if (FLAG_trace_osr && FLAG_concurrent_osr) {
PrintF("[COSR hit rate %d / %d]\n", osr_hits_, osr_attempts_);
}
Join();
}
......@@ -194,6 +207,10 @@ void OptimizingCompilerThread::InstallOptimizedFunctions() {
}
Compiler::InstallOptimizedCode(compiler);
}
// Remove the oldest OSR candidates that are ready so that we
// only have limited number of them waiting.
if (FLAG_concurrent_osr) RemoveStaleOSRCandidates();
}
......@@ -202,12 +219,62 @@ void OptimizingCompilerThread::QueueForOptimization(
ASSERT(IsQueueAvailable());
ASSERT(!IsOptimizerThread());
Barrier_AtomicIncrement(&queue_length_, static_cast<Atomic32>(1));
if (optimizing_compiler->info()->osr_ast_id().IsNone()) {
optimizing_compiler->info()->closure()->MarkInRecompileQueue();
} else {
LockGuard<Mutex> access_osr_lists(&osr_list_mutex_);
osr_candidates_.Add(optimizing_compiler);
osr_attempts_++;
}
input_queue_.Enqueue(optimizing_compiler);
input_queue_semaphore_.Signal();
}
OptimizingCompiler* OptimizingCompilerThread::FindReadyOSRCandidate(
Handle<JSFunction> function, uint32_t osr_pc_offset) {
ASSERT(!IsOptimizerThread());
LockGuard<Mutex> access_osr_lists(&osr_list_mutex_);
for (int i = 0; i < ready_for_osr_.length(); i++) {
if (ready_for_osr_[i]->info()->HasSameOsrEntry(function, osr_pc_offset)) {
osr_hits_++;
return ready_for_osr_.Remove(i);
}
}
return NULL;
}
bool OptimizingCompilerThread::IsQueuedForOSR(Handle<JSFunction> function,
uint32_t osr_pc_offset) {
ASSERT(!IsOptimizerThread());
LockGuard<Mutex> access_osr_lists(&osr_list_mutex_);
for (int i = 0; i < osr_candidates_.length(); i++) {
if (osr_candidates_[i]->info()->HasSameOsrEntry(function, osr_pc_offset)) {
return true;
}
}
return false;
}
void OptimizingCompilerThread::RemoveStaleOSRCandidates(int limit) {
ASSERT(!IsOptimizerThread());
LockGuard<Mutex> access_osr_lists(&osr_list_mutex_);
while (ready_for_osr_.length() > limit) {
OptimizingCompiler* compiler = ready_for_osr_.Remove(0);
CompilationInfo* throw_away = compiler->info();
if (FLAG_trace_osr) {
PrintF("[COSR - Discarded ");
throw_away->closure()->PrintName();
PrintF(", AST id %d]\n",
throw_away->osr_ast_id().ToInt());
}
delete throw_away;
}
}
#ifdef DEBUG
bool OptimizingCompilerThread::IsOptimizerThread() {
if (!FLAG_concurrent_recompilation) return false;
......
......@@ -30,6 +30,7 @@
#include "atomicops.h"
#include "flags.h"
#include "list.h"
#include "platform.h"
#include "platform/mutex.h"
#include "platform/time.h"
......@@ -51,7 +52,11 @@ class OptimizingCompilerThread : public Thread {
#endif
isolate_(isolate),
stop_semaphore_(0),
input_queue_semaphore_(0) {
input_queue_semaphore_(0),
osr_candidates_(2),
ready_for_osr_(2),
osr_hits_(0),
osr_attempts_(0) {
NoBarrier_Store(&stop_thread_, static_cast<AtomicWord>(CONTINUE));
NoBarrier_Store(&queue_length_, static_cast<AtomicWord>(0));
}
......@@ -62,6 +67,13 @@ class OptimizingCompilerThread : public Thread {
void Flush();
void QueueForOptimization(OptimizingCompiler* optimizing_compiler);
void InstallOptimizedFunctions();
OptimizingCompiler* FindReadyOSRCandidate(Handle<JSFunction> function,
uint32_t osr_pc_offset);
bool IsQueuedForOSR(Handle<JSFunction> function, uint32_t osr_pc_offset);
// Remove the oldest OSR candidates that are ready so that we
// only have |limit| left waiting.
void RemoveStaleOSRCandidates(int limit = kReadyForOSRLimit);
inline bool IsQueueAvailable() {
// We don't need a barrier since we have a data dependency right
......@@ -86,7 +98,6 @@ class OptimizingCompilerThread : public Thread {
void FlushInputQueue(bool restore_function_code);
void FlushOutputQueue(bool restore_function_code);
void CompileNext();
#ifdef DEBUG
......@@ -97,13 +108,27 @@ class OptimizingCompilerThread : public Thread {
Isolate* isolate_;
Semaphore stop_semaphore_;
Semaphore input_queue_semaphore_;
// Queue of incoming recompilation tasks (including OSR).
UnboundQueue<OptimizingCompiler*> input_queue_;
// Queue of recompilation tasks ready to be installed (excluding OSR).
UnboundQueue<OptimizingCompiler*> output_queue_;
// List of all OSR related recompilation tasks (both incoming and ready ones).
List<OptimizingCompiler*> osr_candidates_;
// List of recompilation tasks ready for OSR.
List<OptimizingCompiler*> ready_for_osr_;
Mutex install_mutex_;
volatile AtomicWord stop_thread_;
volatile Atomic32 queue_length_;
TimeDelta time_spent_compiling_;
TimeDelta time_spent_total_;
Mutex osr_list_mutex_;
int osr_hits_;
int osr_attempts_;
static const int kReadyForOSRLimit = 4;
};
} } // namespace v8::internal
......
......@@ -230,8 +230,7 @@ class LockGuard V8_FINAL {
private:
Mutex* mutex_;
LockGuard(const LockGuard<Mutex>& other) V8_DELETE;
LockGuard<Mutex>& operator=(const LockGuard<Mutex>& other) V8_DELETE;
DISALLOW_COPY_AND_ASSIGN(LockGuard);
};
} } // namespace v8::internal
......
......@@ -172,9 +172,9 @@ void RuntimeProfiler::AttemptOnStackReplacement(JSFunction* function) {
// any back edge in any unoptimized frame will trigger on-stack
// replacement for that frame.
if (FLAG_trace_osr) {
PrintF("[patching back edges in ");
PrintF("[OSR - patching back edges in ");
function->PrintName();
PrintF(" for on-stack replacement]\n");
PrintF("]\n");
}
Deoptimizer::PatchInterruptCode(isolate_, shared->code());
......
......@@ -8343,7 +8343,9 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_ConcurrentRecompile) {
}
function->shared()->code()->set_profiler_ticks(0);
ASSERT(FLAG_concurrent_recompilation);
Compiler::RecompileConcurrent(function);
if (!Compiler::RecompileConcurrent(function)) {
function->ReplaceCode(function->shared()->code());
}
return isolate->heap()->undefined_value();
}
......@@ -8512,7 +8514,11 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_OptimizeFunctionOnNextCall) {
unoptimized->kind() == Code::FUNCTION) {
CONVERT_ARG_HANDLE_CHECKED(String, type, 1);
if (type->IsOneByteEqualTo(STATIC_ASCII_VECTOR("osr"))) {
for (int i = 0; i <= Code::kMaxLoopNestingMarker; i++) {
// Start patching from the currently patched loop nesting level.
int current_level = unoptimized->allow_osr_at_loop_nesting_level();
ASSERT(Deoptimizer::VerifyInterruptCode(
isolate, unoptimized, current_level));
for (int i = current_level + 1; i <= Code::kMaxLoopNestingMarker; i++) {
unoptimized->set_allow_osr_at_loop_nesting_level(i);
isolate->runtime_profiler()->AttemptOnStackReplacement(*function);
}
......@@ -8586,98 +8592,19 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_CompileForOnStackReplacement) {
// We're not prepared to handle a function with arguments object.
ASSERT(!function->shared()->uses_arguments());
// We have hit a back edge in an unoptimized frame for a function that was
// selected for on-stack replacement. Find the unoptimized code object.
Handle<Code> unoptimized(function->shared()->code(), isolate);
// Keep track of whether we've succeeded in optimizing.
bool succeeded = unoptimized->optimizable();
if (succeeded) {
// If we are trying to do OSR when there are already optimized
// activations of the function, it means (a) the function is directly or
// indirectly recursive and (b) an optimized invocation has been
// deoptimized so that we are currently in an unoptimized activation.
// Check for optimized activations of this function.
JavaScriptFrameIterator it(isolate);
while (succeeded && !it.done()) {
JavaScriptFrame* frame = it.frame();
succeeded = !frame->is_optimized() || frame->function() != *function;
it.Advance();
}
}
BailoutId ast_id = BailoutId::None();
if (succeeded) {
// The top JS function is this one, the PC is somewhere in the
// unoptimized code.
JavaScriptFrameIterator it(isolate);
JavaScriptFrame* frame = it.frame();
ASSERT(frame->function() == *function);
ASSERT(frame->LookupCode() == *unoptimized);
ASSERT(unoptimized->contains(frame->pc()));
// Use linear search of the unoptimized code's back edge table to find
// the AST id matching the PC.
uint32_t target_pc_offset =
static_cast<uint32_t>(frame->pc() - unoptimized->instruction_start());
uint32_t loop_depth = 0;
for (FullCodeGenerator::BackEdgeTableIterator back_edges(*unoptimized);
!back_edges.Done();
back_edges.Next()) {
if (back_edges.pc_offset() == target_pc_offset) {
ast_id = back_edges.ast_id();
loop_depth = back_edges.loop_depth();
break;
}
}
ASSERT(!ast_id.IsNone());
if (FLAG_trace_osr) {
PrintF("[replacing on-stack at AST id %d, loop depth %d in ",
ast_id.ToInt(), loop_depth);
function->PrintName();
PrintF("]\n");
}
// Try to compile the optimized code. A true return value from
// CompileOptimized means that compilation succeeded, not necessarily
// that optimization succeeded.
if (JSFunction::CompileOptimized(function, ast_id, CLEAR_EXCEPTION) &&
function->IsOptimized()) {
DeoptimizationInputData* data = DeoptimizationInputData::cast(
function->code()->deoptimization_data());
if (data->OsrPcOffset()->value() >= 0) {
if (FLAG_trace_osr) {
PrintF("[on-stack replacement offset %d in optimized code]\n",
data->OsrPcOffset()->value());
}
ASSERT(BailoutId(data->OsrAstId()->value()) == ast_id);
} else {
// We may never generate the desired OSR entry if we emit an
// early deoptimize.
succeeded = false;
}
} else {
succeeded = false;
}
}
// Revert to the original interrupt calls in the original unoptimized code.
if (FLAG_trace_osr) {
PrintF("[restoring original interrupt calls in ");
function->PrintName();
PrintF("]\n");
}
Deoptimizer::RevertInterruptCode(isolate, *unoptimized);
// If the optimization attempt succeeded, return the AST id tagged as a
// smi. This tells the builtin that we need to translate the unoptimized
// frame to an optimized one.
if (succeeded) {
BailoutId ast_id =
(FLAG_concurrent_recompilation && FLAG_concurrent_osr)
? Compiler::CompileForConcurrentOSR(function)
: Compiler::CompileForOnStackReplacement(function);
if (!ast_id.IsNone()) {
ASSERT(function->code()->kind() == Code::OPTIMIZED_FUNCTION);
return Smi::FromInt(ast_id.ToInt());
} else {
if (function->IsMarkedForLazyRecompilation()) {
if (function->IsMarkedForLazyRecompilation() ||
function->IsMarkedForConcurrentRecompilation()) {
function->ReplaceCode(function->shared()->code());
}
return Smi::FromInt(-1);
......
......@@ -26,6 +26,7 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Flags: --optimize-for-in --allow-natives-syntax
// Flags: --no-concurrent-osr
// Test for-in support in Crankshaft. For simplicity this tests assumes certain
// fixed iteration order for properties and will have to be adjusted if V8
......
......@@ -39,6 +39,8 @@ function g() {
var o2 = [{ x: 1.5, y: 1 }];
return o2;
}
// Clear type feedback from previous stress runs.
%ClearFunctionTypeFeedback(f);
return f;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment