// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_CCTEST_DISASM_REGEX_HELPER_H_
#define V8_CCTEST_DISASM_REGEX_HELPER_H_

#include <iostream>
#include <map>
#include <regex>  // NOLINT(build/c++11)
#include <vector>

#include "src/base/logging.h"
#include "src/base/macros.h"

namespace v8 {
namespace internal {

// This class provides methods for regular expression matching with an extra
// feature of user defined named capture groups which are alive across
// regex search calls.
//
// The main use case for the class is to test multiple-line assembly
// output with an ability to express dataflow or dependencies by allowing single
// definition / multiple use symbols. When processing output lines and trying to
// match them against the set of patterns a user can define a named group - a
// symbol - and a regex for matching it. If the regex with the definitions is
// matched then whenever this symbol appears again (no redefinitions though) in
// the following patterns the parser will replace the symbol reference in the
// pattern by an actual literal value matched during processing symbol
// definition. This effectively checks that all of the output lines have
// the same literal for the described symbol. To track the symbols this class
// implements a simple single-definition symbol table.
//
// Example: Lets consider a case when we want to test that the assembly
// output consists of two instructions - a load and a store; we also want
// to check that the loaded value is used as store value for the store,
// like here:
//
//    ldr x3, [x4]
//    str x3, [x5]
//
// Using special syntax for symbol definitions and uses one could write the
// following regex making sure that the load register is used by the store:
//
//    'ldr <<NamedReg:x[0-9]+>>, [x[0-9]+]'
//    'str <<NamedReg>>, [x[0-9]+]'
//
// See 'ProcessPattern' for more details.
class RegexParser {
 public:
  RegexParser()
      // Regex to parse symbol references: definitions or uses.
      //                  <<SymbolName[:'def regex']>>
      : symbol_ref_regex_("<<([a-zA-Z_][a-zA-Z0-9_]*)(?::(.*?))?>>") {}

  // Status codes used for return values and error diagnostics.
  enum class Status {
    kSuccess = 0,
    kNotMatched,
    kWrongPattern,
    kDefNotFound,
    kRedefinition,
  };

  // This class holds info on a symbol definition.
  class SymbolInfo {
   public:
    explicit SymbolInfo(const std::string& matched_value)
        : matched_value_(matched_value) {}

    // Returns an actual matched value for the symbol.
    const std::string& matched_value() const { return matched_value_; }

   private:
    std::string matched_value_;
  };

  // This class holds temporary info on a symbol while processing an input line.
  class SymbolVectorElem {
   public:
    SymbolVectorElem(bool is_def, const std::string& symbol_name)
        : is_def_(is_def), symbol_name_(symbol_name) {}

    bool is_def() const { return is_def_; }
    const std::string& symbol_name() const { return symbol_name_; }

   private:
    bool is_def_;
    std::string symbol_name_;
  };

  using SymbolMap = std::map<std::string, SymbolInfo>;
  using MatchVector = std::vector<SymbolVectorElem>;

  // Tries to match (actually search, similar to std::regex_serach) the line
  // against the pattern (possibly containing symbols references) and if
  // matched commits symbols definitions from the pattern to the symbol table.
  //
  // Returns: status of the matching attempt.
  //
  // Important: the format of pattern regexs is based on std::ECMAScript syntax
  // (http://www.cplusplus.com/reference/regex/ECMAScript/) with a few extra
  // restrictions:
  //   * no backreference (or submatch) groups
  //     - when a group (e.g. "(a|b)+") is needed use a passive group
  //       (e.g. "(?:a|b)+").
  //   * special syntax for symbol definitions: <<Name:regex>>
  //     - 'Name' must be c-ctyle variable name ([a-zA-Z_][a-zA-Z0-9_]*).
  //     - 'regex' - is a regex for the actual literal expected in the symbol
  //       definition line. It must not contain any symbol references.
  //   * special syntax for symbol uses <<Name>>
  //
  // Semantical restrictions on symbols references:
  //   * symbols mustn't be referenced before they are defined.
  //     - a pattern R1 which uses symbol 'A' mustn't be processed if a pattern
  //       R2 with the symbol 'A' definition hasn't been yet matched (R1!=R2).
  //     - A pattern mustn't define a symbol and use it inside the same regex.
  //   * symbols mustn't be redefined.
  //     - if a line has been matched against a pattern R1 with symbol 'A'
  //       then other patterns mustn't define symbol 'A'.
  //   * symbols defininitions are only committed and registered if the whole
  //     pattern is successfully matched.
  //
  // Notes:
  //   * A pattern may contain uses of the same or different symbols and
  //     definitions of different symbols however if a symbol is defined in the
  //     pattern it can't be used in the same pattern.
  //
  // Pattern example: "<<A:[0-9]+>> <<B>>, <<B> <<C:[a-z]+>>" (assuming 'B' is
  // defined and matched).
  Status ProcessPattern(const std::string& line, const std::string& pattern) {
    // Processed pattern which is going to be used for std::regex_search; symbol
    // references are replaced accordingly to the reference type - def or use.
    std::string final_pattern;
    // A vector of records for symbols references in the pattern. The format is
    // {is_definition, symbol_name}.
    MatchVector symbols_refs;
    Status status =
        ParseSymbolsInPattern(pattern, &final_pattern, &symbols_refs);
    if (status != Status::kSuccess) {
      return status;
    }

    std::smatch match;
    if (!std::regex_search(line, match, std::regex(final_pattern))) {
      return Status::kNotMatched;
    }

    // This checks that no backreference groups were used in the pattern except
    // for those added by ParseSymbolsInPattern.
    if (symbols_refs.size() != (match.size() - 1)) {
      return Status::kWrongPattern;
    }

    status = CheckSymbolsMatchedValues(symbols_refs, match);
    if (status != Status::kSuccess) {
      return status;
    }

    CommitSymbolsDefinitions(symbols_refs, match);

    return Status::kSuccess;
  }

  // Returns whether a symbol is defined in the symbol name.
  bool IsSymbolDefined(const std::string& symbol_name) const {
    auto symbol_map_iter = map_.find(symbol_name);
    return symbol_map_iter != std::end(map_);
  }

  // Returns the matched value for a symbol.
  std::string GetSymbolMatchedValue(const std::string& symbol_name) const {
    DCHECK(IsSymbolDefined(symbol_name));
    return map_.find(symbol_name)->second.matched_value();
  }

  // Prints the symbol table.
  void PrintSymbols(std::ostream& os) const {
    os << "Printing symbol table..." << std::endl;
    for (const auto& t : map_) {
      const std::string& sym_name = t.first;
      const SymbolInfo& sym_info = t.second;
      os << "<<" << sym_name << ">>: \"" << sym_info.matched_value() << "\""
         << std::endl;
    }
  }

 protected:
  // Fixed layout for the symbol reference match.
  enum SymbolMatchIndex {
    kFullSubmatch = 0,
    kName = 1,
    kDefRegex = 2,
    kSize = kDefRegex + 1,
  };

  // Processes a symbol reference: for definitions it adds the symbol regex, for
  // uses it adds actual literal from a previously matched definition. Also
  // fills the symbol references vector.
  Status ProcessSymbol(const std::smatch& match, MatchVector* symbols_refs,
                       std::string* new_pattern) const {
    bool is_def = match[SymbolMatchIndex::kDefRegex].length() != 0;
    const std::string& symbol_name = match[SymbolMatchIndex::kName];

    if (is_def) {
      // Make sure the symbol isn't already defined.
      auto symbol_iter =
          std::find_if(symbols_refs->begin(), symbols_refs->end(),
                       [symbol_name](const SymbolVectorElem& ref) -> bool {
                         return ref.symbol_name() == symbol_name;
                       });
      if (symbol_iter != std::end(*symbols_refs)) {
        return Status::kRedefinition;
      }

      symbols_refs->emplace_back(true, symbol_name);
      new_pattern->append("(");
      new_pattern->append(match[SymbolMatchIndex::kDefRegex]);
      new_pattern->append(")");
    } else {
      auto symbol_map_iter = map_.find(symbol_name);
      if (symbol_map_iter == std::end(map_)) {
        return Status::kDefNotFound;
      }

      const SymbolInfo& sym_info = symbol_map_iter->second;
      new_pattern->append("(");
      new_pattern->append(sym_info.matched_value());
      new_pattern->append(")");

      symbols_refs->emplace_back(false, symbol_name);
    }
    return Status::kSuccess;
  }

  // Parses the input pattern regex, processes symbols defs and uses inside
  // it, fills a raw pattern used for std::regex_search.
  Status ParseSymbolsInPattern(const std::string& pattern,
                               std::string* raw_pattern,
                               MatchVector* symbols_refs) const {
    std::string::const_iterator low = pattern.cbegin();
    std::string::const_iterator high = pattern.cend();
    std::smatch match;

    while (low != high) {
      // Search for a symbol reference.
      if (!std::regex_search(low, high, match, symbol_ref_regex_)) {
        raw_pattern->append(low, high);
        break;
      }

      if (match.size() != SymbolMatchIndex::kSize) {
        return Status::kWrongPattern;
      }

      raw_pattern->append(match.prefix());

      Status status = ProcessSymbol(match, symbols_refs, raw_pattern);
      if (status != Status::kSuccess) {
        return status;
      }
      low = match[SymbolMatchIndex::kFullSubmatch].second;
    }
    return Status::kSuccess;
  }

  // Checks that there are no symbol redefinitions and the symbols uses matched
  // literal values are equal to corresponding matched definitions.
  Status CheckSymbolsMatchedValues(const MatchVector& symbols_refs,
                                   const std::smatch& match) const {
    // There is a one-to-one correspondence between matched subexpressions and
    // symbols refences in the vector (by construction).
    for (size_t vec_pos = 0, size = symbols_refs.size(); vec_pos < size;
         vec_pos++) {
      auto elem = symbols_refs[vec_pos];
      auto map_iter = map_.find(elem.symbol_name());
      if (elem.is_def()) {
        if (map_iter != std::end(map_)) {
          return Status::kRedefinition;
        }
      } else {
        DCHECK(map_iter != std::end(map_));
        // We replaced use with matched definition value literal.
        DCHECK_EQ(map_iter->second.matched_value().compare(match[vec_pos + 1]),
                  0);
      }
    }
    return Status::kSuccess;
  }

  // Commits symbols definitions and their matched values to the symbol table.
  void CommitSymbolsDefinitions(const MatchVector& groups_vector,
                                const std::smatch& match) {
    for (size_t vec_pos = 0, size = groups_vector.size(); vec_pos < size;
         vec_pos++) {
      size_t match_pos = vec_pos + 1;
      auto elem = groups_vector[vec_pos];
      if (elem.is_def()) {
        auto emplace_res =
            map_.emplace(elem.symbol_name(), SymbolInfo(match[match_pos]));
        USE(emplace_res);  // Silence warning about unused variable.
        DCHECK(emplace_res.second == true);
      }
    }
  }

  const std::regex symbol_ref_regex_;
  SymbolMap map_;
};

bool CheckDisassemblyRegexPatterns(
    const char* function_name, const std::vector<std::string>& patterns_array);

}  // namespace internal
}  // namespace v8

#endif  // V8_CCTEST_DISASM_REGEX_HELPER_H_