#!/usr/bin/env python
# Copyright 2020 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

# This is main driver for gcmole tool. See README for more details.
# Usage: CLANG_BIN=clang-bin-dir python tools/gcmole/gcmole.py [arm|arm64|ia32|x64]

# for py2/py3 compatibility
from __future__ import print_function

import collections
import difflib
import multiprocessing
import os
import re
import subprocess
import sys

ArchCfg = collections.namedtuple("ArchCfg",
                                 ["triple", "arch_define", "arch_options"])

ARCHITECTURES = {
    "ia32":
        ArchCfg(
            triple="i586-unknown-linux",
            arch_define="V8_TARGET_ARCH_IA32",
            arch_options=["-m32"],
        ),
    "arm":
        ArchCfg(
            triple="i586-unknown-linux",
            arch_define="V8_TARGET_ARCH_ARM",
            arch_options=["-m32"],
        ),
    "x64":
        ArchCfg(
            triple="x86_64-unknown-linux",
            arch_define="V8_TARGET_ARCH_X64",
            arch_options=[]),
    "arm64":
        ArchCfg(
            triple="x86_64-unknown-linux",
            arch_define="V8_TARGET_ARCH_ARM64",
            arch_options=[],
        ),
}


def log(format, *args):
  print(format.format(*args))


def fatal(format, *args):
  log(format, *args)
  sys.exit(1)


# -----------------------------------------------------------------------------
# Clang invocation


def MakeClangCommandLine(plugin, plugin_args, arch_cfg, clang_bin_dir,
                         clang_plugins_dir):
  prefixed_plugin_args = []
  if plugin_args:
    for arg in plugin_args:
      prefixed_plugin_args += [
          "-Xclang",
          "-plugin-arg-{}".format(plugin),
          "-Xclang",
          arg,
      ]

  return ([
      os.path.join(clang_bin_dir, "clang++"),
      "-std=c++14",
      "-c",
      "-Xclang",
      "-load",
      "-Xclang",
      os.path.join(clang_plugins_dir, "libgcmole.so"),
      "-Xclang",
      "-plugin",
      "-Xclang",
      plugin,
  ] + prefixed_plugin_args + [
      "-Xclang",
      "-triple",
      "-Xclang",
      arch_cfg.triple,
      "-fno-exceptions",
      "-D",
      arch_cfg.arch_define,
      "-DENABLE_DEBUGGER_SUPPORT",
      "-DV8_INTL_SUPPORT",
      "-I./",
      "-Iinclude/",
      "-Iout/build/gen",
      "-Ithird_party/icu/source/common",
      "-Ithird_party/icu/source/i18n",
  ] + arch_cfg.arch_options)


def InvokeClangPluginForFile(filename, cmd_line, verbose):
  try:
    log("-- {}", filename)
    if verbose:
      print("popen ", " ".join(cmd_line + [filename]))
    p = subprocess.Popen(cmd_line + [filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = p.communicate()
    return p.returncode, stdout, stderr
  except KeyboardInterrupt:
    log("-- Interrupting {}", filename)
    return 1, ""

def InvokeClangPluginForEachFile(
    filenames,
    plugin,
    plugin_args,
    arch_cfg,
    flags,
    clang_bin_dir,
    clang_plugins_dir,
):
  cmd_line = MakeClangCommandLine(plugin, plugin_args, arch_cfg, clang_bin_dir,
                                  clang_plugins_dir)
  verbose = flags["verbose"]
  outputs = {}
  if flags["sequential"]:
    log("** Sequential execution.")
    for filename in filenames:
      returncode, stdout, stderr = InvokeClangPluginForFile(filename, cmd_line, verbose)
      if returncode != 0:
        sys.stderr.write(stderr)
        sys.exit(returncode)
      outputs[filename] = (stdout, stderr)
  else:
    log("** Parallel execution.")
    cpus = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(cpus)
    try:
      # Track active invokes with a semaphore, to prevent submitting too many
      # concurrent invokes to the pool.
      execution_slots = multiprocessing.BoundedSemaphore(cpus)

      async_outputs = {}
      for filename in filenames:
        execution_slots.acquire()
        def callback(output):
          execution_slots.release()

        async_outputs[filename] = pool.apply_async(
            InvokeClangPluginForFile, (filename, cmd_line, verbose),
            callback=callback)

      for filename, output in async_outputs.items():
        returncode, stdout, stderr = output.get()
        if returncode != 0:
          sys.stderr.write(stderr)
          sys.exit(returncode)
        outputs[filename] = (stdout, stderr)
    except KeyboardInterrupt as e:
      pool.terminate()
      pool.join()
      raise e
    finally:
      pool.close()

  return outputs


# -----------------------------------------------------------------------------


def ParseGNFile(for_test):
  result = {}
  if for_test:
    gn_files = [("tools/gcmole/GCMOLE.gn", re.compile('"([^"]*?\.cc)"'), "")]
  else:
    gn_files = [
        ("BUILD.gn", re.compile('"([^"]*?\.cc)"'), ""),
        ("test/cctest/BUILD.gn", re.compile('"(test-[^"]*?\.cc)"'),
         "test/cctest/"),
    ]

  for filename, pattern, prefix in gn_files:
    with open(filename) as gn_file:
      gn = gn_file.read()
      for condition, sources in re.findall("### gcmole\((.*?)\) ###(.*?)\]", gn,
                                           re.MULTILINE | re.DOTALL):
        if condition not in result:
          result[condition] = []
        for file in pattern.findall(sources):
          result[condition].append(prefix + file)

  return result


def EvaluateCondition(cond, props):
  if cond == "all":
    return True

  m = re.match("(\w+):(\w+)", cond)
  if m is None:
    fatal("failed to parse condition: {}", cond)
  p, v = m.groups()
  if p not in props:
    fatal("undefined configuration property: {}", p)

  return props[p] == v


def BuildFileList(sources, props):
  ret = []
  for condition, files in sources.items():
    if EvaluateCondition(condition, props):
      ret += files
  return ret


gn_sources = ParseGNFile(for_test=False)
gn_test_sources = ParseGNFile(for_test=True)


def FilesForArch(arch):
  return BuildFileList(gn_sources, {
      "os": "linux",
      "arch": arch,
      "mode": "debug",
      "simulator": ""
  })


def FilesForTest(arch):
  return BuildFileList(gn_test_sources, {
      "os": "linux",
      "arch": arch,
      "mode": "debug",
      "simulator": ""
  })


# -----------------------------------------------------------------------------
# GCSuspects Generation

# Note that the gcsuspects file lists functions in the form:
#  mangled_name,unmangled_function_name
#
# This means that we can match just the function name by matching only
# after a comma.
ALLOWLIST = set([
    # The following functions call CEntryStub which is always present.
    "MacroAssembler.*,CallRuntime",
    "CompileCallLoadPropertyWithInterceptor",
    "CallIC.*,GenerateMiss",
    # DirectCEntryStub is a special stub used on ARM.
    # It is pinned and always present.
    "DirectCEntryStub.*,GenerateCall",
    # TODO GCMole currently is sensitive enough to understand that certain
    #    functions only cause GC and return Failure simulataneously.
    #    Callsites of such functions are safe as long as they are properly
    #    check return value and propagate the Failure to the caller.
    #    It should be possible to extend GCMole to understand this.
    "Heap.*,TryEvacuateObject",
    # Ignore all StateTag methods.
    "StateTag",
    # Ignore printing of elements transition.
    "PrintElementsTransition",
    # CodeCreateEvent receives AbstractCode (a raw ptr) as an argument.
    "CodeCreateEvent",
    "WriteField",
])


class GCSuspectsCollector:

  def __init__(self, flags):
    self.gc = {}
    self.gc_caused = collections.defaultdict(lambda: [])
    self.funcs = {}
    self.current_scope = None
    self.allowlist = flags["allowlist"]

  def AddCause(self, name, cause):
    self.gc_caused[name].append(cause)

  def Parse(self, lines):
    for funcname in lines:
      if not funcname:
        continue

      if funcname[0] != "\t":
        self.Resolve(funcname)
        self.current_scope = funcname
      else:
        name = funcname[1:]
        self.Resolve(name)[self.current_scope] = True

  def Resolve(self, name):
    if name not in self.funcs:
      self.funcs[name] = {}

      if re.search(",.*Collect.*Garbage", name):
        self.gc[name] = True
        self.AddCause(name, "<GC>")

      if re.search(",EnterSafepoint", name):
        self.gc[name] = True
        self.AddCause(name, "<Safepoint>")

      if self.allowlist:
        for allow in ALLOWLIST:
          if re.search(allow, name):
            self.gc[name] = False

    return self.funcs[name]

  def Propagate(self):
    log("** Propagating GC information")

    def mark(funcname, callers):
      for caller in callers:
        if caller not in self.gc:
          self.gc[caller] = True
          mark(caller, self.funcs[caller])

        self.AddCause(caller, funcname)

    for funcname, callers in self.funcs.items():
      if self.gc.get(funcname, False):
        mark(funcname, callers)


def GenerateGCSuspects(arch, files, arch_cfg, flags, clang_bin_dir,
                       clang_plugins_dir):
  # Reset the global state.
  collector = GCSuspectsCollector(flags)

  log("** Building GC Suspects for {}", arch)
  for filename, (stdout, stderr) in InvokeClangPluginForEachFile(
      files, "dump-callees", [], arch_cfg, flags, clang_bin_dir,
      clang_plugins_dir).items():
    collector.Parse(stdout.split('\n'))

  collector.Propagate()

  with open("gcsuspects", "w") as out:
    for name, value in collector.gc.items():
      if value:
        out.write(name + "\n")

  with open("gccauses", "w") as out:
    out.write("GC = {\n")
    for name, causes in collector.gc_caused.items():
      out.write("  '{}': [\n".format(name))
      for cause in causes:
        out.write("    '{}',\n".format(cause))
      out.write("  ],\n")
    out.write("}\n")

  log("** GCSuspects generated for {}", arch)


# ------------------------------------------------------------------------------
# Analysis


def CheckCorrectnessForArch(arch, for_test, flags, clang_bin_dir,
                            clang_plugins_dir):
  if for_test:
    files = FilesForTest(arch)
  else:
    files = FilesForArch(arch)
  arch_cfg = ARCHITECTURES[arch]

  if not flags["reuse_gcsuspects"]:
    GenerateGCSuspects(arch, files, arch_cfg, flags, clang_bin_dir,
                       clang_plugins_dir)
  else:
    log("** Reusing GCSuspects for {}", arch)


  processed_files = 0
  errors_found = False
  output = ""

  log(
      "** Searching for evaluation order problems{} for {}",
      " and dead variables" if flags["dead_vars"] else "",
      arch,
  )
  plugin_args = []
  if flags["dead_vars"]:
    plugin_args.append("--dead-vars")
  if flags["verbose_trace"]:
    plugin_args.append("--verbose")
  for filename, (stdout, stderr) in InvokeClangPluginForEachFile(
      files,
      "find-problems",
      plugin_args,
      arch_cfg,
      flags,
      clang_bin_dir,
      clang_plugins_dir,
  ).items():
    processed_files = processed_files + 1
    for l in stderr.split('\n'):
      if not errors_found:
        errors_found = re.match("^[^:]+:\d+:\d+: (warning|error)",
                                l) is not None
      if for_test:
        output = output + "\n" + l
      else:
        print(l)

  log(
      "** Done processing {} files. {}",
      processed_files,
      "Errors found" if errors_found else "No errors found",
  )

  return errors_found, output


def TestRun(flags, clang_bin_dir, clang_plugins_dir):
  errors_found, output = CheckCorrectnessForArch("x64", True, flags,
                                                 clang_bin_dir,
                                                 clang_plugins_dir)
  if not errors_found:
    log("** Test file should produce errors, but none were found. Output:")
    log(output)
    return False

  filename = "tools/gcmole/test-expectations.txt"
  with open(filename) as exp_file:
    expectations = exp_file.read()

  if output != expectations:
    log("** Output mismatch from running tests. Please run them manually.")

    for line in difflib.context_diff(
        expectations.split("\n"),
        output.split("\n"),
        fromfile=filename,
        tofile="output",
        lineterm="",
    ):
      log("{}", line)

    log("--- Full output ---")
    log(output)
    log("------")

    return False

  log("** Tests ran successfully")
  return True


def main(args):
  DIR = os.path.dirname(args[0])

  clang_bin_dir = os.getenv("CLANG_BIN")
  clang_plugins_dir = os.getenv("CLANG_PLUGINS")

  if not clang_bin_dir or clang_bin_dir == "":
    fatal("CLANG_BIN not set")

  if not clang_plugins_dir or clang_plugins_dir == "":
    clang_plugins_dir = DIR

  flags = {
      #: not build gcsuspects file and reuse previously generated one.
      "reuse_gcsuspects": False,
      #:n't use parallel python runner.
      "sequential": False,
      # Print commands to console before executing them.
      "verbose": False,
      # Perform dead variable analysis.
      "dead_vars": True,
      # Enable verbose tracing from the plugin itself.
      "verbose_trace": False,
      # When building gcsuspects allowlist certain functions as if they can be
      # causing GC. Currently used to reduce number of false positives in dead
      # variables analysis. See TODO for ALLOWLIST
      "allowlist": True,
  }
  pos_args = []

  flag_regexp = re.compile("^--(no[-_]?)?([\w\-_]+)$")
  for arg in args[1:]:
    m = flag_regexp.match(arg)
    if m:
      no, flag = m.groups()
      flag = flag.replace("-", "_")
      if flag in flags:
        flags[flag] = no is None
      else:
        fatal("Unknown flag: {}", flag)
    else:
      pos_args.append(arg)

  archs = pos_args if len(pos_args) > 0 else ["ia32", "arm", "x64", "arm64"]

  any_errors_found = False
  if not TestRun(flags, clang_bin_dir, clang_plugins_dir):
    any_errors_found = True
  else:
    for arch in archs:
      if not ARCHITECTURES[arch]:
        fatal("Unknown arch: {}", arch)

      errors_found, output = CheckCorrectnessForArch(arch, False, flags,
                                                     clang_bin_dir,
                                                     clang_plugins_dir)
      any_errors_found = any_errors_found or errors_found

  sys.exit(1 if any_errors_found else 0)


if __name__ == "__main__":
  main(sys.argv)