Commit cca29094 authored by Michael Achenbach's avatar Michael Achenbach Committed by Commit Bot

[foozzie] Compare baseline/default in every run

Previously we ran baseline (e.g. ignition) and one random secondary
comparison configuration (e.g. turbofan) from the list of experiments.
But Clusterfuzz imposes limitations on the total amount of fuzz tests.
Therefore this change enables more throughput by always running the
default configuration (ignition_turbofan like V8 is shipped)
additionally to the baseline and the secondary configuration.

This, hence, doubles the number of comparisons we run, with less than
50% additional runtime, since the slow baseline configuration is only
run once.

The experiments table is updated accordingly. Explicit entries running
ignition_turbofan are removed (as it always runs now), instead some
of the other configurations are increased in their relative
percentage. We also get a few new configurations that didn't run
before (e.g. forcing the slow path on x86).

No-Try: true
Bug: chromium:1100114
Change-Id: I69b2a41d78c06e556b309743a2aace1053c22f91
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2270307Reviewed-by: 's avatarLiviu Rau <liviurau@chromium.org>
Commit-Queue: Michael Achenbach <machenbach@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68607}
parent c1a57bc2
......@@ -208,11 +208,11 @@ class ExecutionArgumentsConfig(object):
'default: bundled in the directory of this script',
default=DEFAULT_D8)
def make_options(self, options):
def make_options(self, options, default_config=None):
def get(name):
return getattr(options, '%s_%s' % (self.label, name))
config = get('config')
config = default_config or get('config')
assert config in CONFIGS
d8 = get('d8')
......@@ -270,6 +270,8 @@ def parse_args():
options.first = first_config_arguments.make_options(options)
options.second = second_config_arguments.make_options(options)
options.default = second_config_arguments.make_options(
options, DEFAULT_CONFIG)
# Ensure we make a valid comparison.
if (options.first.d8 == options.second.d8 and
......@@ -367,14 +369,14 @@ def cluster_failures(source, known_failures=None):
return long_key[:ORIGINAL_SOURCE_HASH_LENGTH]
def run_comparisons(suppress, first_config, second_config, test_case, timeout,
def run_comparisons(suppress, execution_configs, test_case, timeout,
verbose=True, ignore_crashes=True, source_key=None):
"""Runs two configurations and bails out on output difference.
"""Runs different configurations and bails out on output difference.
Args:
suppress: The helper object for textual suppressions.
first_config: The baseline configuration to run and compare.
second_config: The secondary configuration to run and compare.
execution_configs: Two or more configurations to run. The first one will be
used as baseline to compare all others to.
test_case: The test case to run.
timeout: Timeout in seconds for one run.
verbose: Prints the executed commands.
......@@ -384,27 +386,33 @@ def run_comparisons(suppress, first_config, second_config, test_case, timeout,
source_key: A fixed source key. If not given, it will be inferred from the
output.
"""
first_config_output = first_config.command.run(
run_test_case = lambda config: config.command.run(
test_case, timeout=timeout, verbose=verbose)
second_config_output = second_config.command.run(
test_case, timeout=timeout, verbose=verbose)
difference, source = suppress.diff(first_config_output, second_config_output)
if difference:
# Only bail out due to suppressed output if there was a difference. If a
# suppression doesn't show up anymore in the statistics, we might want to
# remove it.
fail_bailout(first_config_output, suppress.ignore_by_output)
fail_bailout(second_config_output, suppress.ignore_by_output)
source_key = source_key or cluster_failures(source)
raise FailException(format_difference(
source_key, first_config, second_config,
first_config_output, second_config_output, difference, source))
if first_config_output.HasCrashed() or second_config_output.HasCrashed():
# Run the baseline configuration.
baseline_config = execution_configs[0]
baseline_output = run_test_case(baseline_config)
has_crashed = baseline_output.HasCrashed()
# Iterate over the remaining configurations, run and compare.
for comparison_config in execution_configs[1:]:
comparison_output = run_test_case(comparison_config)
has_crashed = has_crashed or comparison_output.HasCrashed()
difference, source = suppress.diff(baseline_output, comparison_output)
if difference:
# Only bail out due to suppressed output if there was a difference. If a
# suppression doesn't show up anymore in the statistics, we might want to
# remove it.
fail_bailout(baseline_output, suppress.ignore_by_output)
fail_bailout(comparison_output, suppress.ignore_by_output)
source_key = source_key or cluster_failures(source)
raise FailException(format_difference(
source_key, baseline_config, comparison_config,
baseline_output, comparison_output, difference, source))
if has_crashed:
if ignore_crashes:
# Show if a crash has happened in one of the runs and no difference was
# detected. This is only for the statistics during experiments.
......@@ -429,14 +437,20 @@ def main():
content_bailout(get_meta_data(content), suppress.ignore_by_metadata)
content_bailout(content, suppress.ignore_by_content)
first_config = ExecutionConfig(options, 'first')
second_config = ExecutionConfig(options, 'second')
# Prepare the baseline, default and a secondary configuration to compare to.
# The baseline (turbofan) takes precedence as many of the secondary configs
# are based on the turbofan config with additional parameters.
execution_configs = [
ExecutionConfig(options, 'first'),
ExecutionConfig(options, 'default'),
ExecutionConfig(options, 'second'),
]
# First, run some fixed smoke tests in all configs to ensure nothing
# is fundamentally wrong, in order to prevent bug flooding.
if not options.skip_sanity_checks:
run_comparisons(
suppress, first_config, second_config,
suppress, execution_configs,
test_case=SANITY_CHECKS,
timeout=SANITY_CHECK_TIMEOUT_SEC,
verbose=False,
......@@ -450,7 +464,7 @@ def main():
# Second, run all configs against the fuzz test case.
run_comparisons(
suppress, first_config, second_config,
suppress, execution_configs,
test_case=options.testcase,
timeout=TEST_TIMEOUT_SEC,
)
......
......@@ -49,10 +49,6 @@ class ConfigTest(unittest.TestCase):
assert all(map(lambda x: x[2] in CONFIGS, EXPERIMENTS))
# The last config item points to a known build configuration.
assert all(map(lambda x: x[3] in KNOWN_BUILDS, EXPERIMENTS))
# Ensure we compare different configs and same d8, or same config
# to different d8.
is_valid_comparison = lambda x: (x[1] == x[2]) == ('d8' != x[3])
assert all(map(is_valid_comparison, EXPERIMENTS))
# All flags have a probability.
first_is_float = lambda x: type(x[0]) == float
assert all(map(first_is_float, FLAGS))
......@@ -238,9 +234,10 @@ other weird stuff
check('123', '45', True, True, '12', '45')
def cut_verbose_output(stdout):
# This removes first lines containing d8 commands.
return '\n'.join(stdout.split('\n')[4:])
def cut_verbose_output(stdout, n_comp):
# This removes the first lines containing d8 commands of `n_comp` comparison
# runs.
return '\n'.join(stdout.split('\n')[n_comp * 2:])
def run_foozzie(second_d8_dir, *extra_flags, **kwargs):
......@@ -274,7 +271,8 @@ class SystemTest(unittest.TestCase):
"""
def testSyntaxErrorDiffPass(self):
stdout = run_foozzie('build1', '--skip-sanity-checks')
self.assertEqual('# V8 correctness - pass\n', cut_verbose_output(stdout))
self.assertEqual('# V8 correctness - pass\n',
cut_verbose_output(stdout, 3))
# Default comparison includes suppressions.
self.assertIn('v8_suppressions.js', stdout)
# Default comparison doesn't include any specific mock files.
......@@ -291,7 +289,7 @@ class SystemTest(unittest.TestCase):
'--second-config-extra-flags=--flag3')
e = ctx.exception
self.assertEqual(v8_foozzie.RETURN_FAIL, e.returncode)
self.assertEqual(expected_output, cut_verbose_output(e.output))
self.assertEqual(expected_output, cut_verbose_output(e.output, 2))
def testSanityCheck(self):
with open(os.path.join(TEST_DATA, 'sanity_check_output.txt')) as f:
......
[
[10, "ignition", "jitless", "d8"],
[15, "ignition", "jitless", "d8"],
[10, "ignition", "slow_path", "d8"],
[10, "ignition_no_ic", "slow_path", "d8"],
[5, "ignition", "slow_path_opt", "d8"],
[25, "ignition", "ignition_turbo", "d8"],
[2, "ignition_no_ic", "ignition_turbo", "d8"],
[2, "ignition", "ignition_turbo_no_ic", "d8"],
[15, "ignition", "ignition_turbo_opt", "d8"],
[5, "ignition", "ignition_turbo_no_ic", "d8"],
[20, "ignition", "ignition_turbo_opt", "d8"],
[5, "ignition_no_ic", "ignition_turbo_opt", "d8"],
[3, "ignition_turbo_opt", "ignition_turbo_opt", "clang_x64_pointer_compression/d8"],
[3, "ignition_turbo", "ignition_turbo", "clang_x64_pointer_compression/d8"],
[4, "ignition", "ignition", "clang_x86/d8"],
[4, "ignition_turbo", "ignition_turbo", "clang_x86/d8"],
[4, "ignition_turbo_opt", "ignition_turbo_opt", "clang_x86/d8"],
[4, "ignition_turbo", "ignition_turbo", "clang_x64_v8_arm64/d8"],
[4, "ignition_turbo", "ignition_turbo", "clang_x86_v8_arm/d8"]
[5, "ignition_turbo", "ignition_turbo_opt", "clang_x64_pointer_compression/d8"],
[5, "ignition", "ignition", "clang_x86/d8"],
[5, "ignition", "ignition_turbo_opt", "clang_x86/d8"],
[5, "ignition", "slow_path", "clang_x86/d8"],
[5, "ignition", "ignition_turbo_opt", "clang_x64_v8_arm64/d8"],
[5, "ignition", "ignition_turbo_opt", "clang_x86_v8_arm/d8"]
]
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment