Let test runner rerun failures to test for flakes.

When enabled, this dynamically adds jobs that had failures back to the pool. Special json output for flakes will be handled in a separate CL.

BUG=374134
LOG=n
R=jkummerow@chromium.org

Review URL: https://codereview.chromium.org/360113003

git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@22143 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 2adbfced
...@@ -369,9 +369,11 @@ def Execute(arch, mode, args, options, suites, workspace): ...@@ -369,9 +369,11 @@ def Execute(arch, mode, args, options, suites, workspace):
timeout, options.isolates, timeout, options.isolates,
options.command_prefix, options.command_prefix,
options.extra_flags, options.extra_flags,
False, False, # Keep i18n on by default.
options.random_seed, options.random_seed,
True) True, # No sorting of test cases.
0, # Don't rerun failing tests.
0) # No use of a rerun-failing-tests maximum.
# Find available test suites and read test cases from them. # Find available test suites and read test cases from them.
variables = { variables = {
......
...@@ -181,6 +181,13 @@ def BuildOptions(): ...@@ -181,6 +181,13 @@ def BuildOptions():
default=False, action="store_true") default=False, action="store_true")
result.add_option("--json-test-results", result.add_option("--json-test-results",
help="Path to a file for storing json results.") help="Path to a file for storing json results.")
result.add_option("--rerun-failures-count",
help=("Number of times to rerun each failing test case. "
"Very slow tests will be rerun only once."),
default=0, type="int")
result.add_option("--rerun-failures-max",
help="Maximum number of failing test cases to rerun.",
default=100, type="int")
result.add_option("--shard-count", result.add_option("--shard-count",
help="Split testsuites into this number of shards", help="Split testsuites into this number of shards",
default=1, type="int") default=1, type="int")
...@@ -416,7 +423,9 @@ def Execute(arch, mode, args, options, suites, workspace): ...@@ -416,7 +423,9 @@ def Execute(arch, mode, args, options, suites, workspace):
options.extra_flags, options.extra_flags,
options.no_i18n, options.no_i18n,
options.random_seed, options.random_seed,
options.no_sorting) options.no_sorting,
options.rerun_failures_count,
options.rerun_failures_max)
# TODO(all): Combine "simulator" and "simulator_run". # TODO(all): Combine "simulator" and "simulator_run".
simulator_run = not options.dont_skip_simulator_slow_tests and \ simulator_run = not options.dont_skip_simulator_slow_tests and \
......
...@@ -81,6 +81,7 @@ class Runner(object): ...@@ -81,6 +81,7 @@ class Runner(object):
self.remaining = num_tests self.remaining = num_tests
self.failed = [] self.failed = []
self.crashed = 0 self.crashed = 0
self.reran_tests = 0
def _RunPerfSafe(self, fun): def _RunPerfSafe(self, fun):
try: try:
...@@ -89,6 +90,42 @@ class Runner(object): ...@@ -89,6 +90,42 @@ class Runner(object):
print("PerfData exception: %s" % e) print("PerfData exception: %s" % e)
self.perf_failures = True self.perf_failures = True
def _GetJob(self, test):
command = self.GetCommand(test)
timeout = self.context.timeout
if ("--stress-opt" in test.flags or
"--stress-opt" in self.context.mode_flags or
"--stress-opt" in self.context.extra_flags):
timeout *= 4
if test.dependency is not None:
dep_command = [ c.replace(test.path, test.dependency) for c in command ]
else:
dep_command = None
return Job(command, dep_command, test.id, timeout, self.context.verbose)
def _MaybeRerun(self, pool, test):
if test.run <= self.context.rerun_failures_count:
# Possibly rerun this test if its run count is below the maximum per
# test.
if test.run == 1:
# Count the overall number of reran tests on the first rerun.
if self.reran_tests < self.context.rerun_failures_max:
self.reran_tests += 1
else:
# Don't rerun this if the overall number of rerun tests has been
# reached.
return
if test.run >= 2 and test.duration > self.context.timeout / 20:
# Rerun slow tests at most once.
return
# Rerun this test.
test.duration = None
test.output = None
test.run += 1
pool.add([self._GetJob(test)])
self.remaining += 1
def Run(self, jobs): def Run(self, jobs):
self.indicator.Starting() self.indicator.Starting()
self._RunInternal(jobs) self._RunInternal(jobs)
...@@ -109,23 +146,12 @@ class Runner(object): ...@@ -109,23 +146,12 @@ class Runner(object):
assert test.id >= 0 assert test.id >= 0
test_map[test.id] = test test_map[test.id] = test
try: try:
command = self.GetCommand(test) queue.append([self._GetJob(test)])
except Exception, e: except Exception, e:
# If this failed, save the exception and re-raise it later (after # If this failed, save the exception and re-raise it later (after
# all other tests have had a chance to run). # all other tests have had a chance to run).
queued_exception = e queued_exception = e
continue continue
timeout = self.context.timeout
if ("--stress-opt" in test.flags or
"--stress-opt" in self.context.mode_flags or
"--stress-opt" in self.context.extra_flags):
timeout *= 4
if test.dependency is not None:
dep_command = [ c.replace(test.path, test.dependency) for c in command ]
else:
dep_command = None
job = Job(command, dep_command, test.id, timeout, self.context.verbose)
queue.append([job])
try: try:
it = pool.imap_unordered(RunTest, queue) it = pool.imap_unordered(RunTest, queue)
for result in it: for result in it:
...@@ -143,6 +169,9 @@ class Runner(object): ...@@ -143,6 +169,9 @@ class Runner(object):
self.succeeded += 1 self.succeeded += 1
self.remaining -= 1 self.remaining -= 1
self.indicator.HasRun(test, has_unexpected_output) self.indicator.HasRun(test, has_unexpected_output)
if has_unexpected_output:
# Rerun test failures after the indicator has processed the results.
self._MaybeRerun(pool, test)
finally: finally:
pool.terminate() pool.terminate()
self._RunPerfSafe(lambda: self.perf_data_manager.close()) self._RunPerfSafe(lambda: self.perf_data_manager.close())
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
class Context(): class Context():
def __init__(self, arch, mode, shell_dir, mode_flags, verbose, timeout, def __init__(self, arch, mode, shell_dir, mode_flags, verbose, timeout,
isolates, command_prefix, extra_flags, noi18n, random_seed, isolates, command_prefix, extra_flags, noi18n, random_seed,
no_sorting): no_sorting, rerun_failures_count, rerun_failures_max):
self.arch = arch self.arch = arch
self.mode = mode self.mode = mode
self.shell_dir = shell_dir self.shell_dir = shell_dir
...@@ -42,15 +42,18 @@ class Context(): ...@@ -42,15 +42,18 @@ class Context():
self.noi18n = noi18n self.noi18n = noi18n
self.random_seed = random_seed self.random_seed = random_seed
self.no_sorting = no_sorting self.no_sorting = no_sorting
self.rerun_failures_count = rerun_failures_count
self.rerun_failures_max = rerun_failures_max
def Pack(self): def Pack(self):
return [self.arch, self.mode, self.mode_flags, self.timeout, self.isolates, return [self.arch, self.mode, self.mode_flags, self.timeout, self.isolates,
self.command_prefix, self.extra_flags, self.noi18n, self.command_prefix, self.extra_flags, self.noi18n,
self.random_seed, self.no_sorting] self.random_seed, self.no_sorting, self.rerun_failures_count,
self.rerun_failures_max]
@staticmethod @staticmethod
def Unpack(packed): def Unpack(packed):
# For the order of the fields, refer to Pack() above. # For the order of the fields, refer to Pack() above.
return Context(packed[0], packed[1], None, packed[2], False, return Context(packed[0], packed[1], None, packed[2], False,
packed[3], packed[4], packed[5], packed[6], packed[7], packed[3], packed[4], packed[5], packed[6], packed[7],
packed[8], packed[9]) packed[8], packed[9], packed[10], packed[11])
...@@ -38,6 +38,7 @@ class TestCase(object): ...@@ -38,6 +38,7 @@ class TestCase(object):
self.output = None self.output = None
self.id = None # int, used to map result back to TestCase instance self.id = None # int, used to map result back to TestCase instance
self.duration = None # assigned during execution self.duration = None # assigned during execution
self.run = 1 # The nth time this test is executed.
def CopyAddingFlags(self, flags): def CopyAddingFlags(self, flags):
copy = TestCase(self.suite, self.path, self.flags + flags, self.dependency) copy = TestCase(self.suite, self.path, self.flags + flags, self.dependency)
...@@ -60,6 +61,7 @@ class TestCase(object): ...@@ -60,6 +61,7 @@ class TestCase(object):
test = TestCase(str(task[0]), task[1], task[2], task[3]) test = TestCase(str(task[0]), task[1], task[2], task[3])
test.outcomes = set(task[4]) test.outcomes = set(task[4])
test.id = task[5] test.id = task[5]
test.run = 1
return test return test
def SetSuiteObject(self, suites): def SetSuiteObject(self, suites):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment