#!/usr/bin/env python
# Copyright 2020 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

# This is main driver for gcmole tool. See README for more details.
# Usage: CLANG_BIN=clang-bin-dir python tools/gcmole/gcmole.py [arm|arm64|ia32|x64]

# for py2/py3 compatibility
from __future__ import print_function

import collections
import difflib
from multiprocessing import cpu_count
import os
import re
import subprocess
import sys
import threading
if sys.version_info.major > 2:
  import queue
else:
  import Queue as queue

ArchCfg = collections.namedtuple("ArchCfg",
                                 ["triple", "arch_define", "arch_options"])

ARCHITECTURES = {
    "ia32":
        ArchCfg(
            triple="i586-unknown-linux",
            arch_define="V8_TARGET_ARCH_IA32",
            arch_options=["-m32"],
        ),
    "arm":
        ArchCfg(
            triple="i586-unknown-linux",
            arch_define="V8_TARGET_ARCH_ARM",
            arch_options=["-m32"],
        ),
    "x64":
        ArchCfg(
            triple="x86_64-unknown-linux",
            arch_define="V8_TARGET_ARCH_X64",
            arch_options=[]),
    "arm64":
        ArchCfg(
            triple="x86_64-unknown-linux",
            arch_define="V8_TARGET_ARCH_ARM64",
            arch_options=[],
        ),
}


def log(format, *args):
  print(format.format(*args))


def fatal(format, *args):
  log(format, *args)
  sys.exit(1)


# -----------------------------------------------------------------------------
# Clang invocation


def MakeClangCommandLine(plugin, plugin_args, arch_cfg, clang_bin_dir,
                         clang_plugins_dir):
  prefixed_plugin_args = []
  if plugin_args:
    for arg in plugin_args:
      prefixed_plugin_args += [
          "-Xclang",
          "-plugin-arg-{}".format(plugin),
          "-Xclang",
          arg,
      ]

  return ([
      os.path.join(clang_bin_dir, "clang++"),
      "-std=c++14",
      "-c",
      "-Xclang",
      "-load",
      "-Xclang",
      os.path.join(clang_plugins_dir, "libgcmole.so"),
      "-Xclang",
      "-plugin",
      "-Xclang",
      plugin,
  ] + prefixed_plugin_args + [
      "-Xclang",
      "-triple",
      "-Xclang",
      arch_cfg.triple,
      "-fno-exceptions",
      "-D",
      arch_cfg.arch_define,
      "-DENABLE_DEBUGGER_SUPPORT",
      "-DV8_INTL_SUPPORT",
      "-DV8_ENABLE_WEBASSEMBLY",
      "-I./",
      "-Iinclude/",
      "-Iout/build/gen",
      "-Ithird_party/icu/source/common",
      "-Ithird_party/icu/source/i18n",
  ] + arch_cfg.arch_options)


def InvokeClangPluginForFile(filename, cmd_line, verbose):
  if verbose:
    print("popen ", " ".join(cmd_line + [filename]))
  p = subprocess.Popen(
      cmd_line + [filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  stdout, stderr = p.communicate()
  return p.returncode, stdout, stderr


def InvokeClangPluginForFilesInQueue(i, input_queue, output_queue, cancel_event,
                                     cmd_line, verbose):
  success = False
  try:
    while not cancel_event.is_set():
      filename = input_queue.get_nowait()
      ret, stdout, stderr = InvokeClangPluginForFile(filename, cmd_line,
                                                     verbose)
      output_queue.put_nowait((filename, ret, stdout.decode('utf-8'), stderr.decode('utf-8')))
      if ret != 0:
        break
  except KeyboardInterrupt:
    log("-- [{}] Interrupting", i)
  except queue.Empty:
    success = True
  finally:
    # Emit a success bool so that the reader knows that there was either an
    # error or all files were processed.
    output_queue.put_nowait(success)


def InvokeClangPluginForEachFile(
    filenames,
    plugin,
    plugin_args,
    arch_cfg,
    flags,
    clang_bin_dir,
    clang_plugins_dir,
):
  cmd_line = MakeClangCommandLine(plugin, plugin_args, arch_cfg, clang_bin_dir,
                                  clang_plugins_dir)
  verbose = flags["verbose"]
  if flags["sequential"]:
    log("** Sequential execution.")
    for filename in filenames:
      log("-- {}", filename)
      returncode, stdout, stderr = InvokeClangPluginForFile(
          filename, cmd_line, verbose)
      if returncode != 0:
        sys.stderr.write(stderr)
        sys.exit(returncode)
      yield filename, stdout, stderr
  else:
    log("** Parallel execution.")
    cpus = cpu_count()
    input_queue = queue.Queue()
    output_queue = queue.Queue()
    threads = []
    try:
      for filename in filenames:
        input_queue.put(filename)

      cancel_event = threading.Event()

      for i in range(min(len(filenames), cpus)):
        threads.append(
            threading.Thread(
                target=InvokeClangPluginForFilesInQueue,
                args=(i, input_queue, output_queue, cancel_event, cmd_line,
                      verbose)))

      for t in threads:
        t.start()

      num_finished = 0
      while num_finished < len(threads):
        output = output_queue.get()
        if type(output) == bool:
          if output:
            num_finished += 1
            continue
          else:
            break
        filename, returncode, stdout, stderr = output
        log("-- {}", filename)
        if returncode != 0:
          sys.stderr.write(stderr)
          sys.exit(returncode)
        yield filename, stdout, stderr

    finally:
      cancel_event.set()
      for t in threads:
        t.join()


# -----------------------------------------------------------------------------


def ParseGNFile(for_test):
  result = {}
  if for_test:
    gn_files = [("tools/gcmole/GCMOLE.gn", re.compile('"([^"]*?\.cc)"'), "")]
  else:
    gn_files = [
        ("BUILD.gn", re.compile('"([^"]*?\.cc)"'), ""),
        ("test/cctest/BUILD.gn", re.compile('"(test-[^"]*?\.cc)"'),
         "test/cctest/"),
    ]

  for filename, pattern, prefix in gn_files:
    with open(filename) as gn_file:
      gn = gn_file.read()
      for condition, sources in re.findall("### gcmole\((.*?)\) ###(.*?)\]", gn,
                                           re.MULTILINE | re.DOTALL):
        if condition not in result:
          result[condition] = []
        for file in pattern.findall(sources):
          result[condition].append(prefix + file)

  return result


def EvaluateCondition(cond, props):
  if cond == "all":
    return True

  m = re.match("(\w+):(\w+)", cond)
  if m is None:
    fatal("failed to parse condition: {}", cond)
  p, v = m.groups()
  if p not in props:
    fatal("undefined configuration property: {}", p)

  return props[p] == v


def BuildFileList(sources, props):
  ret = []
  for condition, files in sources.items():
    if EvaluateCondition(condition, props):
      ret += files
  return ret


gn_sources = ParseGNFile(for_test=False)
gn_test_sources = ParseGNFile(for_test=True)


def FilesForArch(arch):
  return BuildFileList(gn_sources, {
      "os": "linux",
      "arch": arch,
      "mode": "debug",
      "simulator": ""
  })


def FilesForTest(arch):
  return BuildFileList(gn_test_sources, {
      "os": "linux",
      "arch": arch,
      "mode": "debug",
      "simulator": ""
  })


# -----------------------------------------------------------------------------
# GCSuspects Generation

# Note that the gcsuspects file lists functions in the form:
#  mangled_name,unmangled_function_name
#
# This means that we can match just the function name by matching only
# after a comma.
ALLOWLIST = [
    # The following functions call CEntryStub which is always present.
    "MacroAssembler.*,CallRuntime",
    "CompileCallLoadPropertyWithInterceptor",
    "CallIC.*,GenerateMiss",
    # DirectCEntryStub is a special stub used on ARM.
    # It is pinned and always present.
    "DirectCEntryStub.*,GenerateCall",
    # TODO GCMole currently is sensitive enough to understand that certain
    #    functions only cause GC and return Failure simulataneously.
    #    Callsites of such functions are safe as long as they are properly
    #    check return value and propagate the Failure to the caller.
    #    It should be possible to extend GCMole to understand this.
    "Heap.*,TryEvacuateObject",
    # Ignore all StateTag methods.
    "StateTag",
    # Ignore printing of elements transition.
    "PrintElementsTransition",
    # CodeCreateEvent receives AbstractCode (a raw ptr) as an argument.
    "CodeCreateEvent",
    "WriteField",
]

GC_PATTERN = ",.*Collect.*Garbage"
SAFEPOINT_PATTERN = ",SafepointSlowPath"
ALLOWLIST_PATTERN = "|".join("(?:%s)" % p for p in ALLOWLIST)


def MergeRegexp(pattern_dict):
  return re.compile("|".join(
      "(?P<%s>%s)" % (key, value) for (key, value) in pattern_dict.items()))


IS_SPECIAL_WITHOUT_ALLOW_LIST = MergeRegexp({
    "gc": GC_PATTERN,
    "safepoint": SAFEPOINT_PATTERN
})
IS_SPECIAL_WITH_ALLOW_LIST = MergeRegexp({
    "gc": GC_PATTERN,
    "safepoint": SAFEPOINT_PATTERN,
    "allow": ALLOWLIST_PATTERN
})


class GCSuspectsCollector:

  def __init__(self, flags):
    self.gc = {}
    self.gc_caused = collections.defaultdict(lambda: [])
    self.funcs = {}
    self.current_caller = None
    self.allowlist = flags["allowlist"]
    self.is_special = IS_SPECIAL_WITH_ALLOW_LIST if self.allowlist else IS_SPECIAL_WITHOUT_ALLOW_LIST

  def AddCause(self, name, cause):
    self.gc_caused[name].append(cause)

  def Parse(self, lines):
    for funcname in lines:
      if not funcname:
        continue

      if funcname[0] != "\t":
        self.Resolve(funcname)
        self.current_caller = funcname
      else:
        name = funcname[1:]
        callers_for_name = self.Resolve(name)
        callers_for_name.add(self.current_caller)

  def Resolve(self, name):
    if name not in self.funcs:
      self.funcs[name] = set()
      m = self.is_special.search(name)
      if m:
        if m.group("gc"):
          self.gc[name] = True
          self.AddCause(name, "<GC>")
        elif m.group("safepoint"):
          self.gc[name] = True
          self.AddCause(name, "<Safepoint>")
        elif m.group("allow"):
          self.gc[name] = False

    return self.funcs[name]

  def Propagate(self):
    log("** Propagating GC information")

    def mark(funcname, callers):
      for caller in callers:
        if caller not in self.gc:
          self.gc[caller] = True
          mark(caller, self.funcs[caller])

        self.AddCause(caller, funcname)

    for funcname, callers in self.funcs.items():
      if self.gc.get(funcname, False):
        mark(funcname, callers)


def GenerateGCSuspects(arch, files, arch_cfg, flags, clang_bin_dir,
                       clang_plugins_dir):
  # Reset the global state.
  collector = GCSuspectsCollector(flags)

  log("** Building GC Suspects for {}", arch)
  for filename, stdout, stderr in InvokeClangPluginForEachFile(
      files, "dump-callees", [], arch_cfg, flags, clang_bin_dir,
      clang_plugins_dir):
    collector.Parse(stdout.splitlines())

  collector.Propagate()

  with open("gcsuspects", "w") as out:
    for name, value in collector.gc.items():
      if value:
        out.write(name + "\n")

  with open("gccauses", "w") as out:
    out.write("GC = {\n")
    for name, causes in collector.gc_caused.items():
      out.write("  '{}': [\n".format(name))
      for cause in causes:
        out.write("    '{}',\n".format(cause))
      out.write("  ],\n")
    out.write("}\n")

  log("** GCSuspects generated for {}", arch)


# ------------------------------------------------------------------------------
# Analysis


def CheckCorrectnessForArch(arch, for_test, flags, clang_bin_dir,
                            clang_plugins_dir):
  if for_test:
    files = FilesForTest(arch)
  else:
    files = FilesForArch(arch)
  arch_cfg = ARCHITECTURES[arch]

  if not flags["reuse_gcsuspects"]:
    GenerateGCSuspects(arch, files, arch_cfg, flags, clang_bin_dir,
                       clang_plugins_dir)
  else:
    log("** Reusing GCSuspects for {}", arch)

  processed_files = 0
  errors_found = False
  output = ""

  log(
      "** Searching for evaluation order problems{} for {}",
      " and dead variables" if flags["dead_vars"] else "",
      arch,
  )
  plugin_args = []
  if flags["dead_vars"]:
    plugin_args.append("--dead-vars")
  if flags["verbose_trace"]:
    plugin_args.append("--verbose")
  for filename, stdout, stderr in InvokeClangPluginForEachFile(
      files,
      "find-problems",
      plugin_args,
      arch_cfg,
      flags,
      clang_bin_dir,
      clang_plugins_dir,
  ):
    processed_files = processed_files + 1
    if not errors_found:
      errors_found = re.search("^[^:]+:\d+:\d+: (warning|error)", stderr,
                               re.MULTILINE) is not None
    if for_test:
      output = output + stderr
    else:
      sys.stdout.write(stderr)

  log(
      "** Done processing {} files. {}",
      processed_files,
      "Errors found" if errors_found else "No errors found",
  )

  return errors_found, output


def TestRun(flags, clang_bin_dir, clang_plugins_dir):
  log("** Test Run")
  errors_found, output = CheckCorrectnessForArch("x64", True, flags,
                                                 clang_bin_dir,
                                                 clang_plugins_dir)
  if not errors_found:
    log("** Test file should produce errors, but none were found. Output:")
    log(output)
    return False

  filename = "tools/gcmole/test-expectations.txt"
  with open(filename) as exp_file:
    expectations = exp_file.read()

  if output != expectations:
    log("** Output mismatch from running tests. Please run them manually.")

    for line in difflib.unified_diff(
        expectations.splitlines(),
        output.splitlines(),
        fromfile=filename,
        tofile="output",
        lineterm="",
    ):
      log("{}", line)

    log("------")
    log("--- Full output ---")
    log(output)
    log("------")

    return False

  log("** Tests ran successfully")
  return True


def main(args):
  DIR = os.path.dirname(args[0])

  clang_bin_dir = os.getenv("CLANG_BIN")
  clang_plugins_dir = os.getenv("CLANG_PLUGINS")

  if not clang_bin_dir or clang_bin_dir == "":
    fatal("CLANG_BIN not set")

  if not clang_plugins_dir or clang_plugins_dir == "":
    clang_plugins_dir = DIR

  flags = {
      #: not build gcsuspects file and reuse previously generated one.
      "reuse_gcsuspects": False,
      #:n't use parallel python runner.
      "sequential": False,
      # Print commands to console before executing them.
      "verbose": True,
      # Perform dead variable analysis.
      "dead_vars": True,
      # Enable verbose tracing from the plugin itself.
      "verbose_trace": False,
      # When building gcsuspects allowlist certain functions as if they can be
      # causing GC. Currently used to reduce number of false positives in dead
      # variables analysis. See TODO for ALLOWLIST
      "allowlist": True,
  }
  pos_args = []

  flag_regexp = re.compile("^--(no[-_]?)?([\w\-_]+)$")
  for arg in args[1:]:
    m = flag_regexp.match(arg)
    if m:
      no, flag = m.groups()
      flag = flag.replace("-", "_")
      if flag in flags:
        flags[flag] = no is None
      else:
        fatal("Unknown flag: {}", flag)
    else:
      pos_args.append(arg)

  archs = pos_args if len(pos_args) > 0 else ["ia32", "arm", "x64", "arm64"]

  any_errors_found = False
  if not TestRun(flags, clang_bin_dir, clang_plugins_dir):
    any_errors_found = True
  else:
    for arch in archs:
      if not ARCHITECTURES[arch]:
        fatal("Unknown arch: {}", arch)

      errors_found, output = CheckCorrectnessForArch(arch, False, flags,
                                                     clang_bin_dir,
                                                     clang_plugins_dir)
      any_errors_found = any_errors_found or errors_found

  sys.exit(1 if any_errors_found else 0)


if __name__ == "__main__":
  main(sys.argv)