Added the simplest call stack sampling and call profile in tick processor output.

Currently only two stack frames are sampled (current function and its caller).

Output of tick processor looks like this:

 [Call profile]:
   total  call path
   15.2%  LazyCompile: am3 crypto.js:108  <-  LazyCompile: montReduce crypto.js:583
    6.5%  LazyCompile: am3 crypto.js:108  <-  LazyCompile: bnpSquareTo crypto.js:431
    2.9%  Builtin: KeyedStoreIC_Generic  <-  LazyCompile: montReduce crypto.js:583
    2.3%  LazyCompile: am3 crypto.js:108  <-  LazyCompile: bnpMultiplyTo crypto.js:415

Tested under Windows, Linux and OS X.

Review URL: http://codereview.chromium.org/21403


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1292 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 969d43e7
...@@ -139,12 +139,14 @@ bool Profiler::paused_ = false; ...@@ -139,12 +139,14 @@ bool Profiler::paused_ = false;
// //
class Ticker: public Sampler { class Ticker: public Sampler {
public: public:
explicit Ticker(int interval): explicit Ticker(int interval, unsigned int low_stack_bound):
Sampler(interval, FLAG_prof), window_(NULL), profiler_(NULL) {} Sampler(interval, FLAG_prof), window_(NULL), profiler_(NULL),
low_stack_bound_(low_stack_bound) {}
~Ticker() { if (IsActive()) Stop(); } ~Ticker() { if (IsActive()) Stop(); }
void Tick(TickSample* sample) { void Tick(TickSample* sample) {
if (IsProfiling()) SampleStack(sample);
if (profiler_) profiler_->Insert(sample); if (profiler_) profiler_->Insert(sample);
if (window_) window_->AddState(sample->state); if (window_) window_->AddState(sample->state);
} }
...@@ -170,8 +172,21 @@ class Ticker: public Sampler { ...@@ -170,8 +172,21 @@ class Ticker: public Sampler {
} }
private: private:
void SampleStack(TickSample* sample) {
// Assuming that stack grows from lower addresses
if (sample->sp < sample->fp && sample->fp < low_stack_bound_) {
sample->InitStack(1);
sample->stack[0] = Memory::Address_at(
(Address)(sample->fp + StandardFrameConstants::kCallerPCOffset));
} else {
// FP seems to be in some intermediate state, better discard this sample
sample->InitStack(0);
}
}
SlidingStateWindow* window_; SlidingStateWindow* window_;
Profiler* profiler_; Profiler* profiler_;
unsigned int low_stack_bound_;
}; };
...@@ -239,9 +254,6 @@ void Profiler::Disengage() { ...@@ -239,9 +254,6 @@ void Profiler::Disengage() {
// the thread to terminate. // the thread to terminate.
running_ = false; running_ = false;
TickSample sample; TickSample sample;
sample.pc = 0;
sample.sp = 0;
sample.state = OTHER;
Insert(&sample); Insert(&sample);
Join(); Join();
...@@ -900,6 +912,11 @@ void Logger::TickEvent(TickSample* sample, bool overflow) { ...@@ -900,6 +912,11 @@ void Logger::TickEvent(TickSample* sample, bool overflow) {
if (overflow) { if (overflow) {
msg.Append(",overflow"); msg.Append(",overflow");
} }
if (*(sample->stack)) {
for (size_t i = 0; sample->stack[i]; ++i) {
msg.Append(",0x%x", reinterpret_cast<unsigned int>(sample->stack[i]));
}
}
msg.Append('\n'); msg.Append('\n');
msg.WriteToLogFile(); msg.WriteToLogFile();
} }
...@@ -990,7 +1007,10 @@ bool Logger::Setup() { ...@@ -990,7 +1007,10 @@ bool Logger::Setup() {
current_state_ = new VMState(OTHER); current_state_ = new VMState(OTHER);
ticker_ = new Ticker(10); // as log is initialized early with V8, we can assume that JS execution
// frames can never reach this point on stack
int stack_var;
ticker_ = new Ticker(10, reinterpret_cast<unsigned int>(&stack_var));
if (FLAG_sliding_state_window && sliding_state_window_ == NULL) { if (FLAG_sliding_state_window && sliding_state_window_ == NULL) {
sliding_state_window_ = new SlidingStateWindow(); sliding_state_window_ = new SlidingStateWindow();
......
...@@ -634,9 +634,11 @@ static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) { ...@@ -634,9 +634,11 @@ static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) {
#if defined (__arm__) || defined(__thumb__) #if defined (__arm__) || defined(__thumb__)
sample.pc = mcontext.mc_r15; sample.pc = mcontext.mc_r15;
sample.sp = mcontext.mc_r13; sample.sp = mcontext.mc_r13;
sample.fp = mcontext.mc_r11;
#else #else
sample.pc = mcontext.mc_eip; sample.pc = mcontext.mc_eip;
sample.sp = mcontext.mc_esp; sample.sp = mcontext.mc_esp;
sample.fp = mcontext.mc_ebp;
#endif #endif
} }
......
...@@ -617,9 +617,11 @@ static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) { ...@@ -617,9 +617,11 @@ static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) {
#if defined (__arm__) || defined(__thumb__) #if defined (__arm__) || defined(__thumb__)
sample.pc = mcontext.gregs[R15]; sample.pc = mcontext.gregs[R15];
sample.sp = mcontext.gregs[R13]; sample.sp = mcontext.gregs[R13];
sample.fp = mcontext.gregs[R11];
#else #else
sample.pc = mcontext.gregs[REG_EIP]; sample.pc = mcontext.gregs[REG_EIP];
sample.sp = mcontext.gregs[REG_ESP]; sample.sp = mcontext.gregs[REG_ESP];
sample.fp = mcontext.gregs[REG_EBP];
#endif #endif
} }
......
...@@ -583,9 +583,11 @@ static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) { ...@@ -583,9 +583,11 @@ static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) {
#if __DARWIN_UNIX03 #if __DARWIN_UNIX03
sample.pc = mcontext->__ss.__eip; sample.pc = mcontext->__ss.__eip;
sample.sp = mcontext->__ss.__esp; sample.sp = mcontext->__ss.__esp;
sample.fp = mcontext->__ss.__ebp;
#else // !__DARWIN_UNIX03 #else // !__DARWIN_UNIX03
sample.pc = mcontext->ss.eip; sample.pc = mcontext->ss.eip;
sample.sp = mcontext->ss.esp; sample.sp = mcontext->ss.esp;
sample.fp = mcontext->ss.ebp;
#endif // __DARWIN_UNIX03 #endif // __DARWIN_UNIX03
} }
......
...@@ -1585,6 +1585,7 @@ class Sampler::PlatformData : public Malloced { ...@@ -1585,6 +1585,7 @@ class Sampler::PlatformData : public Malloced {
// Invoke tick handler with program counter and stack pointer. // Invoke tick handler with program counter and stack pointer.
sample.pc = context.Eip; sample.pc = context.Eip;
sample.sp = context.Esp; sample.sp = context.Esp;
sample.fp = context.Ebp;
} }
// We always sample the VM state. // We always sample the VM state.
......
...@@ -422,10 +422,29 @@ class Semaphore { ...@@ -422,10 +422,29 @@ class Semaphore {
// TickSample captures the information collected for each sample. // TickSample captures the information collected for each sample.
class TickSample { class TickSample {
public: public:
TickSample() : pc(0), sp(0), state(OTHER) {} TickSample() : pc(0), sp(0), fp(0), state(OTHER) {}
unsigned int pc; // Instruction pointer. unsigned int pc; // Instruction pointer.
unsigned int sp; // Stack pointer. unsigned int sp; // Stack pointer.
unsigned int fp; // Frame pointer.
StateTag state; // The state of the VM. StateTag state; // The state of the VM.
SmartPointer<Address> stack; // Call stack, null-terminated.
inline TickSample& operator=(const TickSample& rhs) {
if (this == &rhs) return *this;
pc = rhs.pc;
sp = rhs.sp;
fp = rhs.fp;
state = rhs.state;
DeleteArray(stack.Detach());
stack = rhs.stack;
return *this;
}
inline void InitStack(int depth) {
stack = SmartPointer<Address>(NewArray<Address>(depth + 1));
// null-terminate
stack[depth] = 0;
}
}; };
class Sampler { class Sampler {
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import csv, splaytree, sys import csv, splaytree, sys
from operator import itemgetter
class CodeEntry(object): class CodeEntry(object):
...@@ -34,9 +35,14 @@ class CodeEntry(object): ...@@ -34,9 +35,14 @@ class CodeEntry(object):
self.start_addr = start_addr self.start_addr = start_addr
self.tick_count = 0 self.tick_count = 0
self.name = name self.name = name
self.stacks = {}
def Tick(self, pc): def Tick(self, pc, stack):
self.tick_count += 1 self.tick_count += 1
if len(stack) > 0:
stack.insert(0, self.ToString())
stack_key = tuple(stack)
self.stacks[stack_key] = self.stacks.setdefault(stack_key, 0) + 1
def RegionTicks(self): def RegionTicks(self):
return None return None
...@@ -69,8 +75,8 @@ class JSCodeEntry(CodeEntry): ...@@ -69,8 +75,8 @@ class JSCodeEntry(CodeEntry):
self.assembler = assembler self.assembler = assembler
self.region_ticks = None self.region_ticks = None
def Tick(self, pc): def Tick(self, pc, stack):
super(JSCodeEntry, self).Tick(pc) super(JSCodeEntry, self).Tick(pc, stack)
if not pc is None: if not pc is None:
offset = pc - self.start_addr offset = pc - self.start_addr
seen = [] seen = []
...@@ -162,7 +168,7 @@ class TickProcessor(object): ...@@ -162,7 +168,7 @@ class TickProcessor(object):
logreader = csv.reader(logfile) logreader = csv.reader(logfile)
for row in logreader: for row in logreader:
if row[0] == 'tick': if row[0] == 'tick':
self.ProcessTick(int(row[1], 16), int(row[2], 16), int(row[3])) self.ProcessTick(int(row[1], 16), int(row[2], 16), int(row[3]), row[4:])
elif row[0] == 'code-creation': elif row[0] == 'code-creation':
self.ProcessCodeCreation(row[1], int(row[2], 16), int(row[3]), row[4]) self.ProcessCodeCreation(row[1], int(row[2], 16), int(row[3]), row[4])
elif row[0] == 'code-move': elif row[0] == 'code-move':
...@@ -237,25 +243,41 @@ class TickProcessor(object): ...@@ -237,25 +243,41 @@ class TickProcessor(object):
def IncludeTick(self, pc, sp, state): def IncludeTick(self, pc, sp, state):
return (self.included_state is None) or (self.included_state == state) return (self.included_state is None) or (self.included_state == state)
def ProcessTick(self, pc, sp, state): def FindEntry(self, pc):
if not self.IncludeTick(pc, sp, state):
self.excluded_number_of_ticks += 1;
return
self.total_number_of_ticks += 1
page = pc >> 12 page = pc >> 12
if page in self.vm_extent: if page in self.vm_extent:
entry = self.cpp_entries.FindGreatestsLessThan(pc).value entry = self.cpp_entries.FindGreatestsLessThan(pc)
if entry.IsSharedLibraryEntry(): if entry != None:
self.number_of_library_ticks += 1 return entry.value
entry.Tick(None) else:
return return entry
max = self.js_entries.FindMax() max = self.js_entries.FindMax()
min = self.js_entries.FindMin() min = self.js_entries.FindMin()
if max != None and pc < max.key and pc > min.key: if max != None and pc < max.key and pc > min.key:
code_obj = self.js_entries.FindGreatestsLessThan(pc).value return self.js_entries.FindGreatestsLessThan(pc).value
code_obj.Tick(pc) return None
def ProcessStack(self, stack):
result = []
for frame in stack:
if frame.startswith('0x'):
entry = self.FindEntry(int(frame, 16))
if entry != None:
result.append(entry.ToString())
return result
def ProcessTick(self, pc, sp, state, stack):
if not self.IncludeTick(pc, sp, state):
self.excluded_number_of_ticks += 1;
return return
self.unaccounted_number_of_ticks += 1 self.total_number_of_ticks += 1
entry = self.FindEntry(pc)
if entry == None:
self.unaccounted_number_of_ticks += 1
return
if entry.IsSharedLibraryEntry():
self.number_of_library_ticks += 1
entry.Tick(pc, self.ProcessStack(stack))
def PrintResults(self): def PrintResults(self):
print('Statistical profiling result from %s, (%d ticks, %d unaccounted, %d excluded).' % print('Statistical profiling result from %s, (%d ticks, %d unaccounted, %d excluded).' %
...@@ -276,6 +298,11 @@ class TickProcessor(object): ...@@ -276,6 +298,11 @@ class TickProcessor(object):
# Print the C++ ticks. # Print the C++ ticks.
self.PrintHeader('C++') self.PrintHeader('C++')
self.PrintEntries(cpp_entries, lambda e:not e.IsSharedLibraryEntry()) self.PrintEntries(cpp_entries, lambda e:not e.IsSharedLibraryEntry())
# Print call profile.
print('\n [Call profile]:')
print(' total call path')
js_entries.extend(cpp_entries)
self.PrintCallProfile(js_entries)
def PrintHeader(self, header_title): def PrintHeader(self, header_title):
print('\n [%s]:' % header_title) print('\n [%s]:' % header_title)
...@@ -309,5 +336,21 @@ class TickProcessor(object): ...@@ -309,5 +336,21 @@ class TickProcessor(object):
'name': name 'name': name
}) })
def PrintCallProfile(self, entries):
all_stacks = {}
total_stacks = 0
for entry in entries:
all_stacks.update(entry.stacks)
for count in entry.stacks.itervalues():
total_stacks += count
all_stacks_items = all_stacks.items();
all_stacks_items.sort(key = itemgetter(1), reverse=True)
for stack, count in all_stacks_items:
total_percentage = count * 100.0 / total_stacks
print(' %(total)5.1f%% %(call_path)s' % {
'total' : total_percentage,
'call_path' : stack[0] + ' <- ' + stack[1]
})
if __name__ == '__main__': if __name__ == '__main__':
sys.exit('You probably want to run windows-tick-processor.py or linux-tick-processor.py.') sys.exit('You probably want to run windows-tick-processor.py or linux-tick-processor.py.')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment