Commit 3fc96631 authored by Michael Achenbach's avatar Michael Achenbach Committed by Commit Bot

[test] Print hanging tests on linux on test-runner termination

This will print the list of processes still running before and after
joining workers during termination. This will help debugging hanging
tests during flake-bisect or with num-fuzzer, which both terminate
on total timeout and currently still sometimes hang without printing
processes.

Bug: v8:8292
Change-Id: I124b65fa35b8d7a6aa198fcf50f2c20df94dc51a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1735312Reviewed-by: 's avatarTamer Tas <tmrts@chromium.org>
Commit-Queue: Michael Achenbach <machenbach@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63065}
parent 20a7055c
...@@ -115,7 +115,15 @@ class Pool(): ...@@ -115,7 +115,15 @@ class Pool():
# Necessary to not overflow the queue's pipe if a keyboard interrupt happens. # Necessary to not overflow the queue's pipe if a keyboard interrupt happens.
BUFFER_FACTOR = 4 BUFFER_FACTOR = 4
def __init__(self, num_workers, heartbeat_timeout=1): def __init__(self, num_workers, heartbeat_timeout=1, notify_fun=None):
"""
Args:
num_workers: Number of worker processes to run in parallel.
heartbeat_timeout: Timeout in seconds for waiting for results. Each time
the timeout is reached, a heartbeat is signalled and timeout is reset.
notify_fun: Callable called to signale some events like termination. The
event name is passed as string.
"""
self.num_workers = num_workers self.num_workers = num_workers
self.processes = [] self.processes = []
self.terminated = False self.terminated = False
...@@ -130,6 +138,7 @@ class Pool(): ...@@ -130,6 +138,7 @@ class Pool():
# work_queue. # work_queue.
self.processing_count = 0 self.processing_count = 0
self.heartbeat_timeout = heartbeat_timeout self.heartbeat_timeout = heartbeat_timeout
self.notify = notify_fun or (lambda x: x)
# Disable sigint and sigterm to prevent subprocesses from capturing the # Disable sigint and sigterm to prevent subprocesses from capturing the
# signals. # signals.
...@@ -261,11 +270,13 @@ class Pool(): ...@@ -261,11 +270,13 @@ class Pool():
for p in self.processes: for p in self.processes:
os.kill(p.pid, signal.SIGTERM) os.kill(p.pid, signal.SIGTERM)
self.notify("Joining workers")
for p in self.processes: for p in self.processes:
p.join() p.join()
# Drain the queues to prevent stderr chatter when queues are garbage # Drain the queues to prevent stderr chatter when queues are garbage
# collected. # collected.
self.notify("Draining queues")
try: try:
while True: self.work_queue.get(False) while True: self.work_queue.get(False)
except: except:
......
...@@ -109,6 +109,19 @@ class TestProc(object): ...@@ -109,6 +109,19 @@ class TestProc(object):
### Communication ### Communication
def notify_previous(self, event):
self._on_event(event)
if self._prev_proc:
self._prev_proc.notify_previous(event)
def _on_event(self, event):
"""Called when processors to the right signal events, e.g. termination.
Args:
event: A text describing the signalled event.
"""
pass
def _send_test(self, test): def _send_test(self, test):
"""Helper method for sending test to the next processor.""" """Helper method for sending test to the next processor."""
return self._next_proc.next_test(test) return self._next_proc.next_test(test)
...@@ -120,7 +133,6 @@ class TestProc(object): ...@@ -120,7 +133,6 @@ class TestProc(object):
self._prev_proc.result_for(test, result) self._prev_proc.result_for(test, result)
class TestProcObserver(TestProc): class TestProcObserver(TestProc):
"""Processor used for observing the data.""" """Processor used for observing the data."""
def __init__(self): def __init__(self):
......
...@@ -45,7 +45,7 @@ class ExecutionProc(base.TestProc): ...@@ -45,7 +45,7 @@ class ExecutionProc(base.TestProc):
def __init__(self, jobs, outproc_factory=None): def __init__(self, jobs, outproc_factory=None):
super(ExecutionProc, self).__init__() super(ExecutionProc, self).__init__()
self._pool = pool.Pool(jobs) self._pool = pool.Pool(jobs, notify_fun=self.notify_previous)
self._outproc_factory = outproc_factory or (lambda t: t.output_proc) self._outproc_factory = outproc_factory or (lambda t: t.output_proc)
self._tests = {} self._tests = {}
......
...@@ -149,6 +149,10 @@ class VerboseProgressIndicator(SimpleProgressIndicator): ...@@ -149,6 +149,10 @@ class VerboseProgressIndicator(SimpleProgressIndicator):
self._print('Still working...') self._print('Still working...')
self._print_processes_linux() self._print_processes_linux()
def _on_event(self, event):
self._print(event)
self._print_processes_linux()
class DotsProgressIndicator(SimpleProgressIndicator): class DotsProgressIndicator(SimpleProgressIndicator):
def __init__(self): def __init__(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment