Commit d161bc3c authored by Michal Majewski's avatar Michal Majewski Committed by Commit Bot

[test] Added signal handling to test processors

Bug: v8:6917
Change-Id: If91defd11c91fd26bdbacc146992745ea772a941
Reviewed-on: https://chromium-review.googlesource.com/890938Reviewed-by: 's avatarMichael Achenbach <machenbach@chromium.org>
Commit-Queue: Michał Majewski <majeski@google.com>
Cr-Commit-Position: refs/heads/master@{#50985}
parent 957ac364
...@@ -21,6 +21,7 @@ from testrunner.local import testsuite ...@@ -21,6 +21,7 @@ from testrunner.local import testsuite
from testrunner.local import utils from testrunner.local import utils
from testrunner.test_config import TestConfig from testrunner.test_config import TestConfig
from testrunner.testproc.shard import ShardProc from testrunner.testproc.shard import ShardProc
from testrunner.testproc.sigproc import SignalProc
from testrunner.testproc.timeout import TimeoutProc from testrunner.testproc.timeout import TimeoutProc
...@@ -567,3 +568,6 @@ class BaseTestRunner(object): ...@@ -567,3 +568,6 @@ class BaseTestRunner(object):
if not options.total_timeout_sec: if not options.total_timeout_sec:
return None return None
return TimeoutProc(options.total_timeout_sec) return TimeoutProc(options.total_timeout_sec)
def _create_signal_proc(self):
return SignalProc()
...@@ -4,7 +4,9 @@ ...@@ -4,7 +4,9 @@
# found in the LICENSE file. # found in the LICENSE file.
from Queue import Empty from Queue import Empty
from contextlib import contextmanager
from multiprocessing import Event, Process, Queue from multiprocessing import Event, Process, Queue
import signal
import traceback import traceback
...@@ -27,19 +29,10 @@ class NormalResult(): ...@@ -27,19 +29,10 @@ class NormalResult():
def __init__(self, result): def __init__(self, result):
self.result = result self.result = result
self.exception = False self.exception = False
self.break_now = False
class ExceptionResult(): class ExceptionResult():
def __init__(self): def __init__(self):
self.exception = True self.exception = True
self.break_now = False
class BreakResult():
def __init__(self):
self.exception = False
self.break_now = True
class MaybeResult(): class MaybeResult():
...@@ -56,18 +49,22 @@ class MaybeResult(): ...@@ -56,18 +49,22 @@ class MaybeResult():
return MaybeResult(False, value) return MaybeResult(False, value)
def Worker(fn, work_queue, done_queue, done, def Worker(fn, work_queue, done_queue, pause_event, read_again_event,
process_context_fn=None, process_context_args=None): process_context_fn=None, process_context_args=None):
"""Worker to be run in a child process. """Worker to be run in a child process.
The worker stops on two conditions. 1. When the poison pill "STOP" is The worker stops when the poison pill "STOP" is reached.
reached or 2. when the event "done" is set.""" It pauses when pause event is set and waits until read again event is true.
"""
try: try:
kwargs = {} kwargs = {}
if process_context_fn and process_context_args is not None: if process_context_fn and process_context_args is not None:
kwargs.update(process_context=process_context_fn(*process_context_args)) kwargs.update(process_context=process_context_fn(*process_context_args))
for args in iter(work_queue.get, "STOP"): for args in iter(work_queue.get, "STOP"):
if done.is_set(): if pause_event.is_set():
break done_queue.put(NormalResult(None))
read_again_event.wait()
continue
try: try:
done_queue.put(NormalResult(fn(*args, **kwargs))) done_queue.put(NormalResult(fn(*args, **kwargs)))
except Exception, e: except Exception, e:
...@@ -75,7 +72,14 @@ def Worker(fn, work_queue, done_queue, done, ...@@ -75,7 +72,14 @@ def Worker(fn, work_queue, done_queue, done,
print(">>> EXCEPTION: %s" % e) print(">>> EXCEPTION: %s" % e)
done_queue.put(ExceptionResult()) done_queue.put(ExceptionResult())
except KeyboardInterrupt: except KeyboardInterrupt:
done_queue.put(BreakResult()) assert False, 'Unreachable'
@contextmanager
def without_sigint():
handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
yield
signal.signal(signal.SIGINT, handler)
class Pool(): class Pool():
...@@ -93,19 +97,24 @@ class Pool(): ...@@ -93,19 +97,24 @@ class Pool():
self.processes = [] self.processes = []
self.terminated = False self.terminated = False
# Invariant: count >= #work_queue + #done_queue. It is greater when a # Invariant: processing_count >= #work_queue + #done_queue. It is greater
# worker takes an item from the work_queue and before the result is # when a worker takes an item from the work_queue and before the result is
# submitted to the done_queue. It is equal when no worker is working, # submitted to the done_queue. It is equal when no worker is working,
# e.g. when all workers have finished, and when no results are processed. # e.g. when all workers have finished, and when no results are processed.
# Count is only accessed by the parent process. Only the parent process is # Count is only accessed by the parent process. Only the parent process is
# allowed to remove items from the done_queue and to add items to the # allowed to remove items from the done_queue and to add items to the
# work_queue. # work_queue.
self.count = 0 self.processing_count = 0
self.work_queue = Queue()
self.done_queue = Queue()
self.done = Event()
self.heartbeat_timeout = heartbeat_timeout self.heartbeat_timeout = heartbeat_timeout
# Disable sigint to make multiprocessing data structure inherit it and
# ignore ctrl-c
with without_sigint():
self.work_queue = Queue()
self.done_queue = Queue()
self.pause_event = Event()
self.read_again_event = Event()
def imap_unordered(self, fn, gen, def imap_unordered(self, fn, gen,
process_context_fn=None, process_context_args=None): process_context_fn=None, process_context_args=None):
"""Maps function "fn" to items in generator "gen" on the worker processes """Maps function "fn" to items in generator "gen" on the worker processes
...@@ -128,35 +137,35 @@ class Pool(): ...@@ -128,35 +137,35 @@ class Pool():
gen = iter(gen) gen = iter(gen)
self.advance = self._advance_more self.advance = self._advance_more
for w in xrange(self.num_workers): # Disable sigint to make workers inherit it and ignore ctrl-c
p = Process(target=Worker, args=(fn, with without_sigint():
self.work_queue, for w in xrange(self.num_workers):
self.done_queue, p = Process(target=Worker, args=(fn,
self.done, self.work_queue,
process_context_fn, self.done_queue,
process_context_args)) self.pause_event,
p.start() self.read_again_event,
self.processes.append(p) process_context_fn,
process_context_args))
p.start()
self.processes.append(p)
self.advance(gen) self.advance(gen)
while self.count > 0: while self.processing_count > 0:
while True: while True:
try: try:
result = self.done_queue.get(timeout=self.heartbeat_timeout) result = self.done_queue.get(timeout=self.heartbeat_timeout)
self.processing_count -= 1
break break
except Empty: except Empty:
# Indicate a heartbeat. The iterator will continue fetching the # Indicate a heartbeat. The iterator will continue fetching the
# next result. # next result.
yield MaybeResult.create_heartbeat() yield MaybeResult.create_heartbeat()
self.count -= 1
if result.exception: if result.exception:
# TODO(machenbach): Handle a few known types of internal errors # TODO(machenbach): Handle a few known types of internal errors
# gracefully, e.g. missing test files. # gracefully, e.g. missing test files.
internal_error = True internal_error = True
continue continue
elif result.break_now:
# A keyboard interrupt happened in one of the worker processes.
raise KeyboardInterrupt
else: else:
yield MaybeResult.create_result(result.result) yield MaybeResult.create_result(result.result)
self.advance(gen) self.advance(gen)
...@@ -166,15 +175,17 @@ class Pool(): ...@@ -166,15 +175,17 @@ class Pool():
traceback.print_exc() traceback.print_exc()
print(">>> EXCEPTION: %s" % e) print(">>> EXCEPTION: %s" % e)
finally: finally:
# Ignore results
self.terminate() self.terminate()
if internal_error: if internal_error:
raise Exception("Internal error in a worker process.") raise Exception("Internal error in a worker process.")
def _advance_more(self, gen): def _advance_more(self, gen):
while self.count < self.num_workers * self.BUFFER_FACTOR: while self.processing_count < self.num_workers * self.BUFFER_FACTOR:
try: try:
self.work_queue.put(gen.next()) self.work_queue.put(gen.next())
self.count += 1 self.processing_count += 1
except StopIteration: except StopIteration:
self.advance = self._advance_empty self.advance = self._advance_empty
break break
...@@ -186,31 +197,44 @@ class Pool(): ...@@ -186,31 +197,44 @@ class Pool():
"""Adds an item to the work queue. Can be called dynamically while """Adds an item to the work queue. Can be called dynamically while
processing the results from imap_unordered.""" processing the results from imap_unordered."""
self.work_queue.put(args) self.work_queue.put(args)
self.count += 1 self.processing_count += 1
def terminate(self): def terminate(self):
if self.terminated: if self.terminated:
return return
self.terminated = True self.terminated = True
results = []
# For exceptional tear down set the "done" event to stop the workers before self.pause_event.set()
# they empty the queue buffer.
self.done.set()
# Drain out work queue from tests
try:
while self.processing_count:
self.work_queue.get(True, 1)
self.processing_count -= 1
except Empty:
pass
# Make sure all processes stop
for p in self.processes: for p in self.processes:
# During normal tear down the workers block on get(). Feed a poison pill # During normal tear down the workers block on get(). Feed a poison pill
# per worker to make them stop. # per worker to make them stop.
self.work_queue.put("STOP") self.work_queue.put("STOP")
# Workers stopped reading work queue if stop event is true to not overtake
# draining queue, but they should read again to consume poison pill and
# possibly more tests that we couldn't get during draining.
self.read_again_event.set()
# Wait for results
while self.processing_count:
# TODO(majeski): terminate as generator to return results and heartbeats,
result = self.done_queue.get()
if result.result:
results.append(MaybeResult.create_result(result.result))
self.processing_count -= 1
for p in self.processes: for p in self.processes:
p.join() p.join()
# Drain the queues to prevent failures when queues are garbage collected. return results
try:
while True: self.work_queue.get(False)
except:
pass
try:
while True: self.done_queue.get(False)
except:
pass
...@@ -40,3 +40,7 @@ class PoolTest(unittest.TestCase): ...@@ -40,3 +40,7 @@ class PoolTest(unittest.TestCase):
pool.add([result.value + 20]) pool.add([result.value + 20])
self.assertEquals(set(range(0, 10) + range(20, 30) + range(40, 50)), self.assertEquals(set(range(0, 10) + range(20, 30) + range(40, 50)),
results) results)
if __name__ == '__main__':
unittest.main()
...@@ -175,7 +175,8 @@ class NumFuzzer(base_runner.BaseTestRunner): ...@@ -175,7 +175,8 @@ class NumFuzzer(base_runner.BaseTestRunner):
# different random seeds for shards instead of splitting tests. # different random seeds for shards instead of splitting tests.
self._create_shard_proc(options), self._create_shard_proc(options),
combiner, combiner,
self._create_fuzzer(fuzzer_rng, options) self._create_fuzzer(fuzzer_rng, options),
self._create_signal_proc(),
] + indicators + [ ] + indicators + [
results, results,
self._create_timeout_proc(options), self._create_timeout_proc(options),
......
...@@ -589,6 +589,7 @@ class StandardTestRunner(base_runner.BaseTestRunner): ...@@ -589,6 +589,7 @@ class StandardTestRunner(base_runner.BaseTestRunner):
VariantProc(self._variants), VariantProc(self._variants),
StatusFileFilterProc(options.slow_tests, options.pass_fail_tests), StatusFileFilterProc(options.slow_tests, options.pass_fail_tests),
self._create_seed_proc(options), self._create_seed_proc(options),
self._create_signal_proc(),
] + indicators + [ ] + indicators + [
results, results,
self._create_timeout_proc(options), self._create_timeout_proc(options),
...@@ -615,7 +616,7 @@ class StandardTestRunner(base_runner.BaseTestRunner): ...@@ -615,7 +616,7 @@ class StandardTestRunner(base_runner.BaseTestRunner):
for indicator in indicators: for indicator in indicators:
indicator.finished() indicator.finished()
print '>>> %d tests ran' % results.total print '>>> %d tests ran' % (results.total - results.remaining)
exit_code = 0 exit_code = 0
if results.failed: if results.failed:
......
...@@ -94,9 +94,12 @@ class TestProc(object): ...@@ -94,9 +94,12 @@ class TestProc(object):
self._prev_proc.heartbeat() self._prev_proc.heartbeat()
def stop(self): def stop(self):
self._stopped = True if not self._stopped:
if self._prev_proc: self._stopped = True
self._prev_proc.stop() if self._prev_proc:
self._prev_proc.stop()
if self._next_proc:
self._next_proc.stop()
@property @property
def is_stopped(self): def is_stopped(self):
......
...@@ -64,20 +64,7 @@ class ExecutionProc(base.TestProc): ...@@ -64,20 +64,7 @@ class ExecutionProc(base.TestProc):
process_context_args=[self._prev_requirement], process_context_args=[self._prev_requirement],
) )
for pool_result in it: for pool_result in it:
if pool_result.heartbeat: self._unpack_result(pool_result)
continue
job_result = pool_result.value
test_id, result = job_result
test, result.cmd = self._tests[test_id]
del self._tests[test_id]
self._send_result(test, result)
except KeyboardInterrupt:
raise
except:
traceback.print_exc()
raise
finally: finally:
self._pool.terminate() self._pool.terminate()
...@@ -91,3 +78,19 @@ class ExecutionProc(base.TestProc): ...@@ -91,3 +78,19 @@ class ExecutionProc(base.TestProc):
def result_for(self, test, result): def result_for(self, test, result):
assert False, 'ExecutionProc cannot receive results' assert False, 'ExecutionProc cannot receive results'
def stop(self):
for pool_result in self._pool.terminate():
self._unpack_result(pool_result)
def _unpack_result(self, pool_result):
if pool_result.heartbeat:
self.heartbeat()
return
job_result = pool_result.value
test_id, result = job_result
test, result.cmd = self._tests[test_id]
del self._tests[test_id]
self._send_result(test, result)
...@@ -34,7 +34,7 @@ class RerunProc(base.TestProcProducer): ...@@ -34,7 +34,7 @@ class RerunProc(base.TestProcProducer):
results = self._results[test.procid] results = self._results[test.procid]
results.append(result) results.append(result)
if self._needs_rerun(test, result): if not self.is_stopped and self._needs_rerun(test, result):
self._rerun[test.procid] += 1 self._rerun[test.procid] += 1
if self._rerun_total_left is not None: if self._rerun_total_left is not None:
self._rerun_total_left -= 1 self._rerun_total_left -= 1
......
# Copyright 2018 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import signal
from . import base
class SignalProc(base.TestProcObserver):
def __init__(self):
super(SignalProc, self).__init__()
self._ctrlc = False
signal.signal(signal.SIGINT, self._on_ctrlc)
def _on_next_test(self, _test):
self._on_event()
def _on_result_for(self, _test, _result):
self._on_event()
def _on_ctrlc(self, _signum, _stack_frame):
print '>>> Ctrl-C detected, waiting for ongoing tests to finish...'
self._ctrlc = True
def _on_event(self):
if self._ctrlc:
self.stop()
...@@ -7,7 +7,6 @@ import time ...@@ -7,7 +7,6 @@ import time
from . import base from . import base
# TODO(majeski): Signal handler
class TimeoutProc(base.TestProcObserver): class TimeoutProc(base.TestProcObserver):
def __init__(self, duration_sec): def __init__(self, duration_sec):
super(TimeoutProc, self).__init__() super(TimeoutProc, self).__init__()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment