#!/usr/bin/env python # Copyright 2020 the V8 project authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. # This is main driver for gcmole tool. See README for more details. # Usage: CLANG_BIN=clang-bin-dir python tools/gcmole/gcmole.py [arm|arm64|ia32|x64] # for py2/py3 compatibility from __future__ import print_function import collections import difflib import multiprocessing import os import re import subprocess import sys ArchCfg = collections.namedtuple("ArchCfg", ["triple", "arch_define", "arch_options"]) ARCHITECTURES = { "ia32": ArchCfg( triple="i586-unknown-linux", arch_define="V8_TARGET_ARCH_IA32", arch_options=["-m32"], ), "arm": ArchCfg( triple="i586-unknown-linux", arch_define="V8_TARGET_ARCH_ARM", arch_options=["-m32"], ), "x64": ArchCfg( triple="x86_64-unknown-linux", arch_define="V8_TARGET_ARCH_X64", arch_options=[]), "arm64": ArchCfg( triple="x86_64-unknown-linux", arch_define="V8_TARGET_ARCH_ARM64", arch_options=[], ), } def log(format, *args): print(format.format(*args)) def fatal(format, *args): log(format, *args) sys.exit(1) # ----------------------------------------------------------------------------- # Clang invocation def MakeClangCommandLine(plugin, plugin_args, arch_cfg, clang_bin_dir, clang_plugins_dir): prefixed_plugin_args = [] if plugin_args: for arg in plugin_args: prefixed_plugin_args += [ "-Xclang", "-plugin-arg-{}".format(plugin), "-Xclang", arg, ] return ([ os.path.join(clang_bin_dir, "clang++"), "-std=c++14", "-c", "-Xclang", "-load", "-Xclang", os.path.join(clang_plugins_dir, "libgcmole.so"), "-Xclang", "-plugin", "-Xclang", plugin, ] + prefixed_plugin_args + [ "-Xclang", "-triple", "-Xclang", arch_cfg.triple, "-fno-exceptions", "-D", arch_cfg.arch_define, "-DENABLE_DEBUGGER_SUPPORT", "-DV8_INTL_SUPPORT", "-I./", "-Iinclude/", "-Iout/build/gen", "-Ithird_party/icu/source/common", "-Ithird_party/icu/source/i18n", ] + arch_cfg.arch_options) def InvokeClangPluginForFile(filename, cmd_line, verbose): try: log("-- {}", filename) if verbose: print("popen ", " ".join(cmd_line + [filename])) p = subprocess.Popen(cmd_line + [filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = p.communicate() return p.returncode, stdout, stderr except KeyboardInterrupt: log("-- Interrupting {}", filename) return 1, "" def InvokeClangPluginForEachFile( filenames, plugin, plugin_args, arch_cfg, flags, clang_bin_dir, clang_plugins_dir, ): cmd_line = MakeClangCommandLine(plugin, plugin_args, arch_cfg, clang_bin_dir, clang_plugins_dir) verbose = flags["verbose"] outputs = {} if flags["sequential"]: log("** Sequential execution.") for filename in filenames: returncode, stdout, stderr = InvokeClangPluginForFile(filename, cmd_line, verbose) if returncode != 0: sys.stderr.write(stderr) sys.exit(returncode) outputs[filename] = (stdout, stderr) else: log("** Parallel execution.") cpus = multiprocessing.cpu_count() pool = multiprocessing.Pool(cpus) try: # Track active invokes with a semaphore, to prevent submitting too many # concurrent invokes to the pool. execution_slots = multiprocessing.BoundedSemaphore(cpus) async_outputs = {} for filename in filenames: execution_slots.acquire() def callback(output): execution_slots.release() async_outputs[filename] = pool.apply_async( InvokeClangPluginForFile, (filename, cmd_line, verbose), callback=callback) for filename, output in async_outputs.items(): returncode, stdout, stderr = output.get() if returncode != 0: sys.stderr.write(stderr) sys.exit(returncode) outputs[filename] = (stdout, stderr) except KeyboardInterrupt as e: pool.terminate() pool.join() raise e finally: pool.close() return outputs # ----------------------------------------------------------------------------- def ParseGNFile(for_test): result = {} if for_test: gn_files = [("tools/gcmole/GCMOLE.gn", re.compile('"([^"]*?\.cc)"'), "")] else: gn_files = [ ("BUILD.gn", re.compile('"([^"]*?\.cc)"'), ""), ("test/cctest/BUILD.gn", re.compile('"(test-[^"]*?\.cc)"'), "test/cctest/"), ] for filename, pattern, prefix in gn_files: with open(filename) as gn_file: gn = gn_file.read() for condition, sources in re.findall("### gcmole\((.*?)\) ###(.*?)\]", gn, re.MULTILINE | re.DOTALL): if condition not in result: result[condition] = [] for file in pattern.findall(sources): result[condition].append(prefix + file) return result def EvaluateCondition(cond, props): if cond == "all": return True m = re.match("(\w+):(\w+)", cond) if m is None: fatal("failed to parse condition: {}", cond) p, v = m.groups() if p not in props: fatal("undefined configuration property: {}", p) return props[p] == v def BuildFileList(sources, props): ret = [] for condition, files in sources.items(): if EvaluateCondition(condition, props): ret += files return ret gn_sources = ParseGNFile(for_test=False) gn_test_sources = ParseGNFile(for_test=True) def FilesForArch(arch): return BuildFileList(gn_sources, { "os": "linux", "arch": arch, "mode": "debug", "simulator": "" }) def FilesForTest(arch): return BuildFileList(gn_test_sources, { "os": "linux", "arch": arch, "mode": "debug", "simulator": "" }) # ----------------------------------------------------------------------------- # GCSuspects Generation # Note that the gcsuspects file lists functions in the form: # mangled_name,unmangled_function_name # # This means that we can match just the function name by matching only # after a comma. ALLOWLIST = set([ # The following functions call CEntryStub which is always present. "MacroAssembler.*,CallRuntime", "CompileCallLoadPropertyWithInterceptor", "CallIC.*,GenerateMiss", # DirectCEntryStub is a special stub used on ARM. # It is pinned and always present. "DirectCEntryStub.*,GenerateCall", # TODO GCMole currently is sensitive enough to understand that certain # functions only cause GC and return Failure simulataneously. # Callsites of such functions are safe as long as they are properly # check return value and propagate the Failure to the caller. # It should be possible to extend GCMole to understand this. "Heap.*,TryEvacuateObject", # Ignore all StateTag methods. "StateTag", # Ignore printing of elements transition. "PrintElementsTransition", # CodeCreateEvent receives AbstractCode (a raw ptr) as an argument. "CodeCreateEvent", "WriteField", ]) class GCSuspectsCollector: def __init__(self, flags): self.gc = {} self.gc_caused = collections.defaultdict(lambda: []) self.funcs = {} self.current_scope = None self.allowlist = flags["allowlist"] def AddCause(self, name, cause): self.gc_caused[name].append(cause) def Parse(self, lines): for funcname in lines: if not funcname: continue if funcname[0] != "\t": self.Resolve(funcname) self.current_scope = funcname else: name = funcname[1:] self.Resolve(name)[self.current_scope] = True def Resolve(self, name): if name not in self.funcs: self.funcs[name] = {} if re.search(",.*Collect.*Garbage", name): self.gc[name] = True self.AddCause(name, "<GC>") if re.search(",EnterSafepoint", name): self.gc[name] = True self.AddCause(name, "<Safepoint>") if self.allowlist: for allow in ALLOWLIST: if re.search(allow, name): self.gc[name] = False return self.funcs[name] def Propagate(self): log("** Propagating GC information") def mark(funcname, callers): for caller in callers: if caller not in self.gc: self.gc[caller] = True mark(caller, self.funcs[caller]) self.AddCause(caller, funcname) for funcname, callers in self.funcs.items(): if self.gc.get(funcname, False): mark(funcname, callers) def GenerateGCSuspects(arch, files, arch_cfg, flags, clang_bin_dir, clang_plugins_dir): # Reset the global state. collector = GCSuspectsCollector(flags) log("** Building GC Suspects for {}", arch) for filename, (stdout, stderr) in InvokeClangPluginForEachFile( files, "dump-callees", [], arch_cfg, flags, clang_bin_dir, clang_plugins_dir).items(): collector.Parse(stdout.split('\n')) collector.Propagate() with open("gcsuspects", "w") as out: for name, value in collector.gc.items(): if value: out.write(name + "\n") with open("gccauses", "w") as out: out.write("GC = {\n") for name, causes in collector.gc_caused.items(): out.write(" '{}': [\n".format(name)) for cause in causes: out.write(" '{}',\n".format(cause)) out.write(" ],\n") out.write("}\n") log("** GCSuspects generated for {}", arch) # ------------------------------------------------------------------------------ # Analysis def CheckCorrectnessForArch(arch, for_test, flags, clang_bin_dir, clang_plugins_dir): if for_test: files = FilesForTest(arch) else: files = FilesForArch(arch) arch_cfg = ARCHITECTURES[arch] if not flags["reuse_gcsuspects"]: GenerateGCSuspects(arch, files, arch_cfg, flags, clang_bin_dir, clang_plugins_dir) else: log("** Reusing GCSuspects for {}", arch) processed_files = 0 errors_found = False output = "" log( "** Searching for evaluation order problems{} for {}", " and dead variables" if flags["dead_vars"] else "", arch, ) plugin_args = [] if flags["dead_vars"]: plugin_args.append("--dead-vars") if flags["verbose_trace"]: plugin_args.append("--verbose") for filename, (stdout, stderr) in InvokeClangPluginForEachFile( files, "find-problems", plugin_args, arch_cfg, flags, clang_bin_dir, clang_plugins_dir, ).items(): processed_files = processed_files + 1 for l in stderr.split('\n'): if not errors_found: errors_found = re.match("^[^:]+:\d+:\d+: (warning|error)", l) is not None if for_test: output = output + "\n" + l else: print(l) log( "** Done processing {} files. {}", processed_files, "Errors found" if errors_found else "No errors found", ) return errors_found, output def TestRun(flags, clang_bin_dir, clang_plugins_dir): errors_found, output = CheckCorrectnessForArch("x64", True, flags, clang_bin_dir, clang_plugins_dir) if not errors_found: log("** Test file should produce errors, but none were found. Output:") log(output) return False filename = "tools/gcmole/test-expectations.txt" with open(filename) as exp_file: expectations = exp_file.read() if output != expectations: log("** Output mismatch from running tests. Please run them manually.") for line in difflib.context_diff( expectations.split("\n"), output.split("\n"), fromfile=filename, tofile="output", lineterm="", ): log("{}", line) log("--- Full output ---") log(output) log("------") return False log("** Tests ran successfully") return True def main(args): DIR = os.path.dirname(args[0]) clang_bin_dir = os.getenv("CLANG_BIN") clang_plugins_dir = os.getenv("CLANG_PLUGINS") if not clang_bin_dir or clang_bin_dir == "": fatal("CLANG_BIN not set") if not clang_plugins_dir or clang_plugins_dir == "": clang_plugins_dir = DIR flags = { #: not build gcsuspects file and reuse previously generated one. "reuse_gcsuspects": False, #:n't use parallel python runner. "sequential": False, # Print commands to console before executing them. "verbose": False, # Perform dead variable analysis. "dead_vars": True, # Enable verbose tracing from the plugin itself. "verbose_trace": False, # When building gcsuspects allowlist certain functions as if they can be # causing GC. Currently used to reduce number of false positives in dead # variables analysis. See TODO for ALLOWLIST "allowlist": True, } pos_args = [] flag_regexp = re.compile("^--(no[-_]?)?([\w\-_]+)$") for arg in args[1:]: m = flag_regexp.match(arg) if m: no, flag = m.groups() flag = flag.replace("-", "_") if flag in flags: flags[flag] = no is None else: fatal("Unknown flag: {}", flag) else: pos_args.append(arg) archs = pos_args if len(pos_args) > 0 else ["ia32", "arm", "x64", "arm64"] any_errors_found = False if not TestRun(flags, clang_bin_dir, clang_plugins_dir): any_errors_found = True else: for arch in archs: if not ARCHITECTURES[arch]: fatal("Unknown arch: {}", arch) errors_found, output = CheckCorrectnessForArch(arch, False, flags, clang_bin_dir, clang_plugins_dir) any_errors_found = any_errors_found or errors_found sys.exit(1 if any_errors_found else 0) if __name__ == "__main__": main(sys.argv)