Commit 10b45a5b authored by yangguo@chromium.org's avatar yangguo@chromium.org

Less aggressive polling when concurrently compiling for OSR.

Changes include:
- completed concurrent OSR tasks trigger a stack check interrupt.
- polling for completion is now guarded by a stack check.
- circular buffer for completed OSR tasks instead of list.

R=titzer@chromium.org
BUG=

Review URL: https://codereview.chromium.org/24237009

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@16934 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent d268078c
......@@ -937,6 +937,24 @@ void Builtins::Generate_OnStackReplacement(MacroAssembler* masm) {
}
void Builtins::Generate_OsrAfterStackCheck(MacroAssembler* masm) {
// We check the stack limit as indicator that recompilation might be done.
Label ok;
__ LoadRoot(ip, Heap::kStackLimitRootIndex);
__ cmp(sp, Operand(ip));
__ b(hs, &ok);
{
FrameScope scope(masm, StackFrame::INTERNAL);
__ CallRuntime(Runtime::kStackGuard, 0);
}
__ Jump(masm->isolate()->builtins()->OnStackReplacement(),
RelocInfo::CODE_TARGET);
__ bind(&ok);
__ Ret();
}
void Builtins::Generate_FunctionCall(MacroAssembler* masm) {
// 1. Make sure we have at least one argument.
// r0: actual number of arguments
......
......@@ -4893,96 +4893,86 @@ FullCodeGenerator::NestedStatement* FullCodeGenerator::TryFinally::Exit(
static const int32_t kBranchBeforeInterrupt = 0x5a000004;
// The back edge bookkeeping code matches the pattern:
//
// <decrement profiling counter>
// 2a 00 00 01 bpl ok
// e5 9f c? ?? ldr ip, [pc, <interrupt stub address>]
// e1 2f ff 3c blx ip
// ok-label
//
// We patch the code to the following form:
//
// <decrement profiling counter>
// e1 a0 00 00 mov r0, r0 (NOP)
// e5 9f c? ?? ldr ip, [pc, <on-stack replacement address>]
// e1 2f ff 3c blx ip
// ok-label
void BackEdgeTable::PatchAt(Code* unoptimized_code,
Address pc_after,
Address pc,
BackEdgeState target_state,
Code* replacement_code) {
static const int kInstrSize = Assembler::kInstrSize;
// Turn the jump into nops.
CodePatcher patcher(pc_after - 3 * kInstrSize, 1);
patcher.masm()->nop();
Address branch_address = pc - 3 * kInstrSize;
CodePatcher patcher(branch_address, 1);
switch (target_state) {
case INTERRUPT:
// <decrement profiling counter>
// 2a 00 00 01 bpl ok
// e5 9f c? ?? ldr ip, [pc, <interrupt stub address>]
// e1 2f ff 3c blx ip
// ok-label
patcher.masm()->b(4 * kInstrSize, pl); // Jump offset is 4 instructions.
ASSERT_EQ(kBranchBeforeInterrupt, Memory::int32_at(branch_address));
break;
case ON_STACK_REPLACEMENT:
case OSR_AFTER_STACK_CHECK:
// <decrement profiling counter>
// e1 a0 00 00 mov r0, r0 (NOP)
// e5 9f c? ?? ldr ip, [pc, <on-stack replacement address>]
// e1 2f ff 3c blx ip
// ok-label
patcher.masm()->nop();
break;
}
Address pc_immediate_load_address = pc - 2 * kInstrSize;
// Replace the call address.
uint32_t interrupt_address_offset = Memory::uint16_at(pc_after -
2 * kInstrSize) & 0xfff;
Address interrupt_address_pointer = pc_after + interrupt_address_offset;
uint32_t interrupt_address_offset =
Memory::uint16_at(pc_immediate_load_address) & 0xfff;
Address interrupt_address_pointer = pc + interrupt_address_offset;
Memory::uint32_at(interrupt_address_pointer) =
reinterpret_cast<uint32_t>(replacement_code->entry());
unoptimized_code->GetHeap()->incremental_marking()->RecordCodeTargetPatch(
unoptimized_code, pc_after - 2 * kInstrSize, replacement_code);
unoptimized_code, pc_immediate_load_address, replacement_code);
}
void BackEdgeTable::RevertAt(Code* unoptimized_code,
Address pc_after,
Code* interrupt_code) {
static const int kInstrSize = Assembler::kInstrSize;
// Restore the original jump.
CodePatcher patcher(pc_after - 3 * kInstrSize, 1);
patcher.masm()->b(4 * kInstrSize, pl); // ok-label is 4 instructions later.
ASSERT_EQ(kBranchBeforeInterrupt,
Memory::int32_at(pc_after - 3 * kInstrSize));
// Restore the original call address.
uint32_t interrupt_address_offset = Memory::uint16_at(pc_after -
2 * kInstrSize) & 0xfff;
Address interrupt_address_pointer = pc_after + interrupt_address_offset;
Memory::uint32_at(interrupt_address_pointer) =
reinterpret_cast<uint32_t>(interrupt_code->entry());
interrupt_code->GetHeap()->incremental_marking()->RecordCodeTargetPatch(
unoptimized_code, pc_after - 2 * kInstrSize, interrupt_code);
}
#ifdef DEBUG
BackEdgeTable::BackEdgeState BackEdgeTable::GetBackEdgeState(
Isolate* isolate,
Code* unoptimized_code,
Address pc_after) {
Address pc) {
static const int kInstrSize = Assembler::kInstrSize;
ASSERT(Memory::int32_at(pc_after - kInstrSize) == kBlxIp);
ASSERT(Memory::int32_at(pc - kInstrSize) == kBlxIp);
Address branch_address = pc - 3 * kInstrSize;
Address pc_immediate_load_address = pc - 2 * kInstrSize;
uint32_t interrupt_address_offset =
Memory::uint16_at(pc_after - 2 * kInstrSize) & 0xfff;
Address interrupt_address_pointer = pc_after + interrupt_address_offset;
Memory::uint16_at(pc_immediate_load_address) & 0xfff;
Address interrupt_address_pointer = pc + interrupt_address_offset;
if (Assembler::IsNop(Assembler::instr_at(pc_after - 3 * kInstrSize))) {
ASSERT(Assembler::IsLdrPcImmediateOffset(
Assembler::instr_at(pc_after - 2 * kInstrSize)));
Code* osr_builtin =
isolate->builtins()->builtin(Builtins::kOnStackReplacement);
ASSERT(reinterpret_cast<uint32_t>(osr_builtin->entry()) ==
Memory::uint32_at(interrupt_address_pointer));
return ON_STACK_REPLACEMENT;
} else {
// Get the interrupt stub code object to match against from cache.
Code* interrupt_builtin =
isolate->builtins()->builtin(Builtins::kInterruptCheck);
if (Memory::int32_at(branch_address) == kBranchBeforeInterrupt) {
ASSERT(Memory::uint32_at(interrupt_address_pointer) ==
reinterpret_cast<uint32_t>(
isolate->builtins()->InterruptCheck()->entry()));
ASSERT(Assembler::IsLdrPcImmediateOffset(
Assembler::instr_at(pc_after - 2 * kInstrSize)));
ASSERT_EQ(kBranchBeforeInterrupt,
Memory::int32_at(pc_after - 3 * kInstrSize));
ASSERT(reinterpret_cast<uint32_t>(interrupt_builtin->entry()) ==
Memory::uint32_at(interrupt_address_pointer));
Assembler::instr_at(pc_immediate_load_address)));
return INTERRUPT;
}
ASSERT(Assembler::IsNop(Assembler::instr_at(branch_address)));
ASSERT(Assembler::IsLdrPcImmediateOffset(
Assembler::instr_at(pc_immediate_load_address)));
if (Memory::uint32_at(interrupt_address_pointer) ==
reinterpret_cast<uint32_t>(
isolate->builtins()->OnStackReplacement()->entry())) {
return ON_STACK_REPLACEMENT;
}
ASSERT(Memory::uint32_at(interrupt_address_pointer) ==
reinterpret_cast<uint32_t>(
isolate->builtins()->OsrAfterStackCheck()->entry()));
return OSR_AFTER_STACK_CHECK;
}
#endif // DEBUG
} } // namespace v8::internal
......
......@@ -214,6 +214,8 @@ enum BuiltinExtraArguments {
Code::kNoExtraICState) \
V(InterruptCheck, BUILTIN, UNINITIALIZED, \
Code::kNoExtraICState) \
V(OsrAfterStackCheck, BUILTIN, UNINITIALIZED, \
Code::kNoExtraICState) \
V(StackCheck, BUILTIN, UNINITIALIZED, \
Code::kNoExtraICState) \
CODE_AGE_LIST_WITH_ARG(DECLARE_CODE_AGE_BUILTIN, V)
......@@ -397,7 +399,7 @@ class Builtins {
static void Generate_StringConstructCode(MacroAssembler* masm);
static void Generate_OnStackReplacement(MacroAssembler* masm);
static void Generate_OsrAfterStackCheck(MacroAssembler* masm);
static void Generate_InterruptCheck(MacroAssembler* masm);
static void Generate_StackCheck(MacroAssembler* masm);
......
......@@ -86,6 +86,7 @@ class CompilationInfo {
ScriptDataImpl* pre_parse_data() const { return pre_parse_data_; }
Handle<Context> context() const { return context_; }
BailoutId osr_ast_id() const { return osr_ast_id_; }
uint32_t osr_pc_offset() const { return osr_pc_offset_; }
int opt_count() const { return opt_count_; }
int num_parameters() const;
int num_heap_slots() const;
......@@ -532,6 +533,13 @@ class OptimizingCompiler: public ZoneObject {
return SetLastStatus(BAILED_OUT);
}
void WaitForInstall() {
ASSERT(!info_->osr_ast_id().IsNone());
awaiting_install_ = true;
}
bool IsWaitingForInstall() { return awaiting_install_; }
private:
CompilationInfo* info_;
HOptimizedGraphBuilder* graph_builder_;
......@@ -541,6 +549,7 @@ class OptimizingCompiler: public ZoneObject {
TimeDelta time_taken_to_optimize_;
TimeDelta time_taken_to_codegen_;
Status last_status_;
bool awaiting_install_;
MUST_USE_RESULT Status SetLastStatus(Status status) {
last_status_ = status;
......
......@@ -322,6 +322,7 @@ DEFINE_int(concurrent_recompilation_delay, 0,
"artificial compilation delay in ms")
DEFINE_bool(concurrent_osr, false,
"concurrent on-stack replacement")
DEFINE_implication(concurrent_osr, concurrent_recompilation)
DEFINE_bool(omit_map_checks_for_leaf_maps, true,
"do not emit check maps for constant values that have a leaf map, "
......
......@@ -1618,8 +1618,7 @@ bool FullCodeGenerator::TryLiteralCompare(CompareOperation* expr) {
void BackEdgeTable::Patch(Isolate* isolate,
Code* unoptimized) {
DisallowHeapAllocation no_gc;
Code* replacement_code =
isolate->builtins()->builtin(Builtins::kOnStackReplacement);
Code* patch = isolate->builtins()->builtin(Builtins::kOnStackReplacement);
// Iterate over the back edge table and patch every interrupt
// call to an unconditional call to the replacement code.
......@@ -1631,7 +1630,7 @@ void BackEdgeTable::Patch(Isolate* isolate,
ASSERT_EQ(INTERRUPT, GetBackEdgeState(isolate,
unoptimized,
back_edges.pc(i)));
PatchAt(unoptimized, back_edges.pc(i), replacement_code);
PatchAt(unoptimized, back_edges.pc(i), ON_STACK_REPLACEMENT, patch);
}
}
......@@ -1643,8 +1642,7 @@ void BackEdgeTable::Patch(Isolate* isolate,
void BackEdgeTable::Revert(Isolate* isolate,
Code* unoptimized) {
DisallowHeapAllocation no_gc;
Code* interrupt_code =
isolate->builtins()->builtin(Builtins::kInterruptCheck);
Code* patch = isolate->builtins()->builtin(Builtins::kInterruptCheck);
// Iterate over the back edge table and revert the patched interrupt calls.
ASSERT(unoptimized->back_edges_patched_for_osr());
......@@ -1653,10 +1651,10 @@ void BackEdgeTable::Revert(Isolate* isolate,
BackEdgeTable back_edges(unoptimized, &no_gc);
for (uint32_t i = 0; i < back_edges.length(); i++) {
if (static_cast<int>(back_edges.loop_depth(i)) <= loop_nesting_level) {
ASSERT_EQ(ON_STACK_REPLACEMENT, GetBackEdgeState(isolate,
unoptimized,
back_edges.pc(i)));
RevertAt(unoptimized, back_edges.pc(i), interrupt_code);
ASSERT_NE(INTERRUPT, GetBackEdgeState(isolate,
unoptimized,
back_edges.pc(i)));
PatchAt(unoptimized, back_edges.pc(i), INTERRUPT, patch);
}
}
......@@ -1667,6 +1665,29 @@ void BackEdgeTable::Revert(Isolate* isolate,
}
void BackEdgeTable::AddStackCheck(CompilationInfo* info) {
DisallowHeapAllocation no_gc;
Isolate* isolate = info->isolate();
Code* code = info->shared_info()->code();
Address pc = code->instruction_start() + info->osr_pc_offset();
ASSERT_EQ(ON_STACK_REPLACEMENT, GetBackEdgeState(isolate, code, pc));
Code* patch = isolate->builtins()->builtin(Builtins::kOsrAfterStackCheck);
PatchAt(code, pc, OSR_AFTER_STACK_CHECK, patch);
}
void BackEdgeTable::RemoveStackCheck(CompilationInfo* info) {
DisallowHeapAllocation no_gc;
Isolate* isolate = info->isolate();
Code* code = info->shared_info()->code();
Address pc = code->instruction_start() + info->osr_pc_offset();
if (GetBackEdgeState(isolate, code, pc) == OSR_AFTER_STACK_CHECK) {
Code* patch = isolate->builtins()->builtin(Builtins::kOnStackReplacement);
PatchAt(code, pc, ON_STACK_REPLACEMENT, patch);
}
}
#ifdef DEBUG
bool BackEdgeTable::Verify(Isolate* isolate,
Code* unoptimized,
......
......@@ -912,7 +912,8 @@ class BackEdgeTable {
enum BackEdgeState {
INTERRUPT,
ON_STACK_REPLACEMENT
ON_STACK_REPLACEMENT,
OSR_AFTER_STACK_CHECK
};
// Patch all interrupts with allowed loop depth in the unoptimized code to
......@@ -920,28 +921,29 @@ class BackEdgeTable {
static void Patch(Isolate* isolate,
Code* unoptimized_code);
// Patch the interrupt at the instruction before pc_after in
// the unoptimized code to unconditionally call replacement_code.
// Patch the back edge to the target state, provided the correct callee.
static void PatchAt(Code* unoptimized_code,
Address pc_after,
Address pc,
BackEdgeState target_state,
Code* replacement_code);
// Change all patched interrupts patched in the unoptimized code
// back to normal interrupts.
// Change all patched back edges back to normal interrupts.
static void Revert(Isolate* isolate,
Code* unoptimized_code);
// Change patched interrupt in the unoptimized code
// back to a normal interrupt.
static void RevertAt(Code* unoptimized_code,
Address pc_after,
Code* interrupt_code);
// Change a back edge patched for on-stack replacement to perform a
// stack check first.
static void AddStackCheck(CompilationInfo* info);
#ifdef DEBUG
// Remove the stack check, if available, and replace by on-stack replacement.
static void RemoveStackCheck(CompilationInfo* info);
// Return the current patch state of the back edge.
static BackEdgeState GetBackEdgeState(Isolate* isolate,
Code* unoptimized_code,
Address pc_after);
#ifdef DEBUG
// Verify that all back edges of a certain loop depth are patched.
static bool Verify(Isolate* isolate,
Code* unoptimized_code,
......
......@@ -1307,6 +1307,24 @@ void Builtins::Generate_OnStackReplacement(MacroAssembler* masm) {
}
void Builtins::Generate_OsrAfterStackCheck(MacroAssembler* masm) {
// We check the stack limit as indicator that recompilation might be done.
Label ok;
ExternalReference stack_limit =
ExternalReference::address_of_stack_limit(masm->isolate());
__ cmp(esp, Operand::StaticVariable(stack_limit));
__ j(above_equal, &ok, Label::kNear);
{
FrameScope scope(masm, StackFrame::INTERNAL);
__ CallRuntime(Runtime::kStackGuard, 0);
}
__ jmp(masm->isolate()->builtins()->OnStackReplacement(),
RelocInfo::CODE_TARGET);
__ bind(&ok);
__ ret(0);
}
#undef __
}
} // namespace v8::internal
......
......@@ -4898,79 +4898,70 @@ static const byte kCallInstruction = 0xe8;
static const byte kNopByteOne = 0x66;
static const byte kNopByteTwo = 0x90;
// The back edge bookkeeping code matches the pattern:
//
// sub <profiling_counter>, <delta>
// jns ok
// call <interrupt stub>
// ok:
//
// The patched back edge looks like this:
//
// sub <profiling_counter>, <delta> ;; Not changed
// nop
// nop
// call <on-stack replacment>
// ok:
void BackEdgeTable::PatchAt(Code* unoptimized_code,
Address pc,
BackEdgeState target_state,
Code* replacement_code) {
// Turn the jump into nops.
Address call_target_address = pc - kIntSize;
*(call_target_address - 3) = kNopByteOne;
*(call_target_address - 2) = kNopByteTwo;
// Replace the call address.
Address jns_instr_address = call_target_address - 3;
Address jns_offset_address = call_target_address - 2;
switch (target_state) {
case INTERRUPT:
// sub <profiling_counter>, <delta> ;; Not changed
// jns ok
// call <interrupt stub>
// ok:
*jns_instr_address = kJnsInstruction;
*jns_offset_address = kJnsOffset;
break;
case ON_STACK_REPLACEMENT:
case OSR_AFTER_STACK_CHECK:
// sub <profiling_counter>, <delta> ;; Not changed
// nop
// nop
// call <on-stack replacment>
// ok:
*jns_instr_address = kNopByteOne;
*jns_offset_address = kNopByteTwo;
break;
}
Assembler::set_target_address_at(call_target_address,
replacement_code->entry());
unoptimized_code->GetHeap()->incremental_marking()->RecordCodeTargetPatch(
unoptimized_code, call_target_address, replacement_code);
}
void BackEdgeTable::RevertAt(Code* unoptimized_code,
Address pc,
Code* interrupt_code) {
// Restore the original jump.
Address call_target_address = pc - kIntSize;
*(call_target_address - 3) = kJnsInstruction;
*(call_target_address - 2) = kJnsOffset;
// Restore the original call address.
Assembler::set_target_address_at(call_target_address,
interrupt_code->entry());
interrupt_code->GetHeap()->incremental_marking()->RecordCodeTargetPatch(
unoptimized_code, call_target_address, interrupt_code);
}
#ifdef DEBUG
BackEdgeTable::BackEdgeState BackEdgeTable::GetBackEdgeState(
Isolate* isolate,
Code* unoptimized_code,
Address pc) {
Address call_target_address = pc - kIntSize;
Address jns_instr_address = call_target_address - 3;
ASSERT_EQ(kCallInstruction, *(call_target_address - 1));
if (*(call_target_address - 3) == kNopByteOne) {
ASSERT_EQ(kNopByteTwo, *(call_target_address - 2));
Code* osr_builtin =
isolate->builtins()->builtin(Builtins::kOnStackReplacement);
ASSERT_EQ(osr_builtin->entry(),
Assembler::target_address_at(call_target_address));
return ON_STACK_REPLACEMENT;
} else {
// Get the interrupt stub code object to match against from cache.
Code* interrupt_builtin =
isolate->builtins()->builtin(Builtins::kInterruptCheck);
ASSERT_EQ(interrupt_builtin->entry(),
if (*jns_instr_address == kJnsInstruction) {
ASSERT_EQ(kJnsOffset, *(call_target_address - 2));
ASSERT_EQ(isolate->builtins()->InterruptCheck()->entry(),
Assembler::target_address_at(call_target_address));
ASSERT_EQ(kJnsInstruction, *(call_target_address - 3));
ASSERT_EQ(kJnsOffset, *(call_target_address - 2));
return INTERRUPT;
}
ASSERT_EQ(kNopByteOne, *jns_instr_address);
ASSERT_EQ(kNopByteTwo, *(call_target_address - 2));
if (Assembler::target_address_at(call_target_address) ==
isolate->builtins()->OnStackReplacement()->entry()) {
return ON_STACK_REPLACEMENT;
}
ASSERT_EQ(isolate->builtins()->OsrAfterStackCheck()->entry(),
Assembler::target_address_at(call_target_address));
return OSR_AFTER_STACK_CHECK;
}
#endif // DEBUG
} } // namespace v8::internal
......
......@@ -29,6 +29,7 @@
#include "v8.h"
#include "full-codegen.h"
#include "hydrogen.h"
#include "isolate.h"
#include "v8threads.h"
......@@ -107,16 +108,20 @@ void OptimizingCompilerThread::CompileNext() {
// The function may have already been optimized by OSR. Simply continue.
// Use a mutex to make sure that functions marked for install
// are always also queued.
if (!optimizing_compiler->info()->osr_ast_id().IsNone()) {
ASSERT(FLAG_concurrent_osr);
LockGuard<Mutex> access_osr_lists(&osr_list_mutex_);
osr_candidates_.RemoveElement(optimizing_compiler);
ready_for_osr_.Add(optimizing_compiler);
} else {
LockGuard<Mutex> access_queue(&queue_mutex_);
output_queue_.Enqueue(optimizing_compiler);
isolate_->stack_guard()->RequestInstallCode();
LockGuard<Mutex> access_queue(&queue_mutex_);
output_queue_.Enqueue(optimizing_compiler);
isolate_->stack_guard()->RequestInstallCode();
}
static void DisposeOptimizingCompiler(OptimizingCompiler* compiler,
bool restore_function_code) {
CompilationInfo* info = compiler->info();
if (restore_function_code) {
Handle<JSFunction> function = info->closure();
function->ReplaceCode(function->shared()->code());
}
delete info;
}
......@@ -127,17 +132,12 @@ void OptimizingCompilerThread::FlushInputQueue(bool restore_function_code) {
// This should not block, since we have one signal on the input queue
// semaphore corresponding to each element in the input queue.
input_queue_semaphore_.Wait();
CompilationInfo* info = optimizing_compiler->info();
if (restore_function_code) {
Handle<JSFunction> function = info->closure();
function->ReplaceCode(function->shared()->code());
if (optimizing_compiler->info()->osr_ast_id().IsNone()) {
// OSR jobs are dealt with separately.
DisposeOptimizingCompiler(optimizing_compiler, restore_function_code);
}
delete info;
}
Release_Store(&queue_length_, static_cast<AtomicWord>(0));
LockGuard<Mutex> access_osr_lists(&osr_list_mutex_);
osr_candidates_.Clear();
}
......@@ -148,15 +148,23 @@ void OptimizingCompilerThread::FlushOutputQueue(bool restore_function_code) {
{ LockGuard<Mutex> access_queue(&queue_mutex_);
if (!output_queue_.Dequeue(&optimizing_compiler)) break;
}
CompilationInfo* info = optimizing_compiler->info();
if (restore_function_code) {
Handle<JSFunction> function = info->closure();
function->ReplaceCode(function->shared()->code());
if (optimizing_compiler->info()->osr_ast_id().IsNone()) {
// OSR jobs are dealt with separately.
DisposeOptimizingCompiler(optimizing_compiler, restore_function_code);
}
delete info;
}
}
RemoveStaleOSRCandidates(0);
void OptimizingCompilerThread::FlushOsrBuffer(bool restore_function_code) {
OptimizingCompiler* optimizing_compiler;
for (int i = 0; i < osr_buffer_size_; i++) {
optimizing_compiler = osr_buffer_[i];
if (optimizing_compiler != NULL) {
DisposeOptimizingCompiler(optimizing_compiler, restore_function_code);
}
}
osr_cursor_ = 0;
}
......@@ -166,6 +174,7 @@ void OptimizingCompilerThread::Flush() {
input_queue_semaphore_.Signal();
stop_semaphore_.Wait();
FlushOutputQueue(true);
if (FLAG_concurrent_osr) FlushOsrBuffer(true);
}
......@@ -186,12 +195,15 @@ void OptimizingCompilerThread::Stop() {
FlushOutputQueue(false);
}
if (FLAG_concurrent_osr) FlushOsrBuffer(false);
if (FLAG_trace_concurrent_recompilation) {
double percentage = time_spent_compiling_.PercentOf(time_spent_total_);
PrintF(" ** Compiler thread did %.2f%% useful work\n", percentage);
}
if (FLAG_trace_osr && FLAG_concurrent_osr) {
if ((FLAG_trace_osr || FLAG_trace_concurrent_recompilation) &&
FLAG_concurrent_osr) {
PrintF("[COSR hit rate %d / %d]\n", osr_hits_, osr_attempts_);
}
......@@ -208,12 +220,20 @@ void OptimizingCompilerThread::InstallOptimizedFunctions() {
{ LockGuard<Mutex> access_queue(&queue_mutex_);
if (!output_queue_.Dequeue(&compiler)) break;
}
Compiler::InstallOptimizedCode(compiler);
CompilationInfo* info = compiler->info();
if (info->osr_ast_id().IsNone()) {
Compiler::InstallOptimizedCode(compiler);
} else {
if (FLAG_trace_osr) {
PrintF("[COSR - ");
info->closure()->PrintName();
PrintF(" is ready for install and entry at AST id %d]\n",
info->osr_ast_id().ToInt());
}
compiler->WaitForInstall();
BackEdgeTable::RemoveStackCheck(info);
}
}
// Remove the oldest OSR candidates that are ready so that we
// only have limited number of them waiting.
if (FLAG_concurrent_osr) RemoveStaleOSRCandidates();
}
......@@ -222,12 +242,18 @@ void OptimizingCompilerThread::QueueForOptimization(
ASSERT(IsQueueAvailable());
ASSERT(!IsOptimizerThread());
Barrier_AtomicIncrement(&queue_length_, static_cast<Atomic32>(1));
if (optimizing_compiler->info()->osr_ast_id().IsNone()) {
optimizing_compiler->info()->closure()->MarkInRecompileQueue();
CompilationInfo* info = optimizing_compiler->info();
if (info->osr_ast_id().IsNone()) {
info->closure()->MarkInRecompileQueue();
} else {
LockGuard<Mutex> access_osr_lists(&osr_list_mutex_);
osr_candidates_.Add(optimizing_compiler);
if (FLAG_trace_concurrent_recompilation) {
PrintF(" ** Queueing ");
info->closure()->PrintName();
PrintF(" for concurrent on-stack replacement.\n");
}
AddToOsrBuffer(optimizing_compiler);
osr_attempts_++;
BackEdgeTable::AddStackCheck(info);
}
input_queue_.Enqueue(optimizing_compiler);
input_queue_semaphore_.Signal();
......@@ -238,27 +264,27 @@ OptimizingCompiler* OptimizingCompilerThread::FindReadyOSRCandidate(
Handle<JSFunction> function, uint32_t osr_pc_offset) {
ASSERT(!IsOptimizerThread());
OptimizingCompiler* result = NULL;
{ LockGuard<Mutex> access_osr_lists(&osr_list_mutex_);
for (int i = 0; i < ready_for_osr_.length(); i++) {
if (ready_for_osr_[i]->info()->HasSameOsrEntry(function, osr_pc_offset)) {
osr_hits_++;
result = ready_for_osr_.Remove(i);
break;
}
for (int i = 0; i < osr_buffer_size_; i++) {
result = osr_buffer_[i];
if (result == NULL) continue;
if (result->IsWaitingForInstall() &&
result->info()->HasSameOsrEntry(function, osr_pc_offset)) {
osr_hits_++;
osr_buffer_[i] = NULL;
return result;
}
}
RemoveStaleOSRCandidates();
return result;
return NULL;
}
bool OptimizingCompilerThread::IsQueuedForOSR(Handle<JSFunction> function,
uint32_t osr_pc_offset) {
ASSERT(!IsOptimizerThread());
LockGuard<Mutex> access_osr_lists(&osr_list_mutex_);
for (int i = 0; i < osr_candidates_.length(); i++) {
if (osr_candidates_[i]->info()->HasSameOsrEntry(function, osr_pc_offset)) {
return true;
for (int i = 0; i < osr_buffer_size_; i++) {
if (osr_buffer_[i] != NULL &&
osr_buffer_[i]->info()->HasSameOsrEntry(function, osr_pc_offset)) {
return !osr_buffer_[i]->IsWaitingForInstall();
}
}
return false;
......@@ -267,30 +293,40 @@ bool OptimizingCompilerThread::IsQueuedForOSR(Handle<JSFunction> function,
bool OptimizingCompilerThread::IsQueuedForOSR(JSFunction* function) {
ASSERT(!IsOptimizerThread());
LockGuard<Mutex> access_osr_lists(&osr_list_mutex_);
for (int i = 0; i < osr_candidates_.length(); i++) {
if (*osr_candidates_[i]->info()->closure() == function) {
return true;
for (int i = 0; i < osr_buffer_size_; i++) {
if (osr_buffer_[i] != NULL &&
*osr_buffer_[i]->info()->closure() == function) {
return !osr_buffer_[i]->IsWaitingForInstall();
}
}
return false;
}
void OptimizingCompilerThread::RemoveStaleOSRCandidates(int limit) {
void OptimizingCompilerThread::AddToOsrBuffer(OptimizingCompiler* compiler) {
ASSERT(!IsOptimizerThread());
LockGuard<Mutex> access_osr_lists(&osr_list_mutex_);
while (ready_for_osr_.length() > limit) {
OptimizingCompiler* compiler = ready_for_osr_.Remove(0);
CompilationInfo* throw_away = compiler->info();
if (FLAG_trace_osr) {
PrintF("[COSR - Discarded ");
throw_away->closure()->PrintName();
PrintF(", AST id %d]\n",
throw_away->osr_ast_id().ToInt());
// Store into next empty slot or replace next stale OSR job that's waiting
// in vain. Dispose in the latter case.
OptimizingCompiler* stale;
while (true) {
stale = osr_buffer_[osr_cursor_];
if (stale == NULL) break;
if (stale->IsWaitingForInstall()) {
CompilationInfo* info = stale->info();
if (FLAG_trace_osr) {
PrintF("[COSR - Discarded ");
info->closure()->PrintName();
PrintF(", AST id %d]\n", info->osr_ast_id().ToInt());
}
BackEdgeTable::RemoveStackCheck(info);
DisposeOptimizingCompiler(stale, false);
break;
}
delete throw_away;
AdvanceOsrCursor();
}
osr_buffer_[osr_cursor_] = compiler;
AdvanceOsrCursor();
}
......
......@@ -53,14 +53,21 @@ class OptimizingCompilerThread : public Thread {
isolate_(isolate),
stop_semaphore_(0),
input_queue_semaphore_(0),
osr_candidates_(2),
ready_for_osr_(2),
osr_cursor_(0),
osr_hits_(0),
osr_attempts_(0) {
NoBarrier_Store(&stop_thread_, static_cast<AtomicWord>(CONTINUE));
NoBarrier_Store(&queue_length_, static_cast<AtomicWord>(0));
if (FLAG_concurrent_osr) {
osr_buffer_size_ = FLAG_concurrent_recompilation_queue_length + 4;
osr_buffer_ = NewArray<OptimizingCompiler*>(osr_buffer_size_);
for (int i = 0; i < osr_buffer_size_; i++) osr_buffer_[i] = NULL;
}
}
~OptimizingCompilerThread() {
if (FLAG_concurrent_osr) DeleteArray(osr_buffer_);
}
~OptimizingCompilerThread() {}
void Run();
void Stop();
......@@ -94,14 +101,18 @@ class OptimizingCompilerThread : public Thread {
private:
enum StopFlag { CONTINUE, STOP, FLUSH };
// Remove the oldest OSR candidates that are ready so that we
// only have |limit| left waiting.
void RemoveStaleOSRCandidates(int limit = kReadyForOSRLimit);
void FlushInputQueue(bool restore_function_code);
void FlushOutputQueue(bool restore_function_code);
void FlushOsrBuffer(bool restore_function_code);
void CompileNext();
// Add a recompilation task for OSR to the cyclic buffer, awaiting OSR entry.
// Tasks evicted from the cyclic buffer are discarded.
void AddToOsrBuffer(OptimizingCompiler* compiler);
void AdvanceOsrCursor() {
osr_cursor_ = (osr_cursor_ + 1) % osr_buffer_size_;
}
#ifdef DEBUG
int thread_id_;
Mutex thread_id_mutex_;
......@@ -115,10 +126,13 @@ class OptimizingCompilerThread : public Thread {
UnboundQueue<OptimizingCompiler*> input_queue_;
// Queue of recompilation tasks ready to be installed (excluding OSR).
UnboundQueue<OptimizingCompiler*> output_queue_;
// List of recompilation tasks for OSR in the input queue.
List<OptimizingCompiler*> osr_candidates_;
// List of recompilation tasks ready for OSR.
List<OptimizingCompiler*> ready_for_osr_;
// Cyclic buffer of recompilation tasks for OSR.
// TODO(yangguo): This may keep zombie tasks indefinitely, holding on to
// a lot of memory. Fix this.
OptimizingCompiler** osr_buffer_;
// Cursor for the cyclic buffer.
int osr_cursor_;
int osr_buffer_size_;
volatile AtomicWord stop_thread_;
volatile Atomic32 queue_length_;
......@@ -127,11 +141,8 @@ class OptimizingCompilerThread : public Thread {
// TODO(yangguo): remove this once the memory leak has been figured out.
Mutex queue_mutex_;
Mutex osr_list_mutex_;
int osr_hits_;
int osr_attempts_;
static const int kReadyForOSRLimit = 4;
};
} } // namespace v8::internal
......
......@@ -33,7 +33,6 @@
#include "bootstrapper.h"
#include "code-stubs.h"
#include "compilation-cache.h"
#include "deoptimizer.h"
#include "execution.h"
#include "full-codegen.h"
#include "global-handles.h"
......
......@@ -8609,7 +8609,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_CompileForOnStackReplacement) {
Handle<Code> result = Handle<Code>::null();
BailoutId ast_id = BailoutId::None();
if (FLAG_concurrent_recompilation && FLAG_concurrent_osr) {
if (FLAG_concurrent_osr) {
if (isolate->optimizing_compiler_thread()->
IsQueuedForOSR(function, pc_offset)) {
// Still waiting for the optimizing compiler thread to finish. Carry on.
......@@ -8631,7 +8631,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_CompileForOnStackReplacement) {
function->IsMarkedForConcurrentRecompilation()) {
// Prevent regular recompilation if we queue this for OSR.
// TODO(yangguo): remove this as soon as OSR becomes one-shot.
function->ReplaceCode(function->shared()->code());
function->ReplaceCode(*unoptimized);
}
return NULL;
}
......
......@@ -1386,6 +1386,23 @@ void Builtins::Generate_OnStackReplacement(MacroAssembler* masm) {
}
void Builtins::Generate_OsrAfterStackCheck(MacroAssembler* masm) {
// We check the stack limit as indicator that recompilation might be done.
Label ok;
__ CompareRoot(rsp, Heap::kStackLimitRootIndex);
__ j(above_equal, &ok);
{
FrameScope scope(masm, StackFrame::INTERNAL);
__ CallRuntime(Runtime::kStackGuard, 0);
}
__ jmp(masm->isolate()->builtins()->OnStackReplacement(),
RelocInfo::CODE_TARGET);
__ bind(&ok);
__ ret(0);
}
#undef __
} } // namespace v8::internal
......
......@@ -4884,79 +4884,70 @@ static const byte kCallInstruction = 0xe8;
static const byte kNopByteOne = 0x66;
static const byte kNopByteTwo = 0x90;
// The back edge bookkeeping code matches the pattern:
//
// add <profiling_counter>, <-delta>
// jns ok
// call <stack guard>
// ok:
//
// We will patch away the branch so the code is:
//
// add <profiling_counter>, <-delta> ;; Not changed
// nop
// nop
// call <on-stack replacment>
// ok:
void BackEdgeTable::PatchAt(Code* unoptimized_code,
Address pc_after,
Address pc,
BackEdgeState target_state,
Code* replacement_code) {
// Turn the jump into nops.
Address call_target_address = pc_after - kIntSize;
*(call_target_address - 3) = kNopByteOne;
*(call_target_address - 2) = kNopByteTwo;
// Replace the call address.
Address call_target_address = pc - kIntSize;
Address jns_instr_address = call_target_address - 3;
Address jns_offset_address = call_target_address - 2;
switch (target_state) {
case INTERRUPT:
// sub <profiling_counter>, <delta> ;; Not changed
// jns ok
// call <interrupt stub>
// ok:
*jns_instr_address = kJnsInstruction;
*jns_offset_address = kJnsOffset;
break;
case ON_STACK_REPLACEMENT:
case OSR_AFTER_STACK_CHECK:
// sub <profiling_counter>, <delta> ;; Not changed
// nop
// nop
// call <on-stack replacment>
// ok:
*jns_instr_address = kNopByteOne;
*jns_offset_address = kNopByteTwo;
break;
}
Assembler::set_target_address_at(call_target_address,
replacement_code->entry());
unoptimized_code->GetHeap()->incremental_marking()->RecordCodeTargetPatch(
unoptimized_code, call_target_address, replacement_code);
}
void BackEdgeTable::RevertAt(Code* unoptimized_code,
Address pc_after,
Code* interrupt_code) {
// Restore the original jump.
Address call_target_address = pc_after - kIntSize;
*(call_target_address - 3) = kJnsInstruction;
*(call_target_address - 2) = kJnsOffset;
// Restore the original call address.
Assembler::set_target_address_at(call_target_address,
interrupt_code->entry());
interrupt_code->GetHeap()->incremental_marking()->RecordCodeTargetPatch(
unoptimized_code, call_target_address, interrupt_code);
}
#ifdef DEBUG
BackEdgeTable::BackEdgeState BackEdgeTable::GetBackEdgeState(
Isolate* isolate,
Code* unoptimized_code,
Address pc_after) {
Address call_target_address = pc_after - kIntSize;
Address pc) {
Address call_target_address = pc - kIntSize;
Address jns_instr_address = call_target_address - 3;
ASSERT_EQ(kCallInstruction, *(call_target_address - 1));
if (*(call_target_address - 3) == kNopByteOne) {
ASSERT_EQ(kNopByteTwo, *(call_target_address - 2));
Code* osr_builtin =
isolate->builtins()->builtin(Builtins::kOnStackReplacement);
ASSERT_EQ(osr_builtin->entry(),
Assembler::target_address_at(call_target_address));
return ON_STACK_REPLACEMENT;
} else {
// Get the interrupt stub code object to match against from cache.
Code* interrupt_builtin =
isolate->builtins()->builtin(Builtins::kInterruptCheck);
ASSERT_EQ(interrupt_builtin->entry(),
if (*jns_instr_address == kJnsInstruction) {
ASSERT_EQ(kJnsOffset, *(call_target_address - 2));
ASSERT_EQ(isolate->builtins()->InterruptCheck()->entry(),
Assembler::target_address_at(call_target_address));
ASSERT_EQ(kJnsInstruction, *(call_target_address - 3));
ASSERT_EQ(kJnsOffset, *(call_target_address - 2));
return INTERRUPT;
}
ASSERT_EQ(kNopByteOne, *jns_instr_address);
ASSERT_EQ(kNopByteTwo, *(call_target_address - 2));
if (Assembler::target_address_at(call_target_address) ==
isolate->builtins()->OnStackReplacement()->entry()) {
return ON_STACK_REPLACEMENT;
}
ASSERT_EQ(isolate->builtins()->OsrAfterStackCheck()->entry(),
Assembler::target_address_at(call_target_address));
return OSR_AFTER_STACK_CHECK;
}
#endif // DEBUG
} } // namespace v8::internal
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment