// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/regexp/regexp-bytecode-peephole.h"

#include "src/execution/isolate.h"
#include "src/flags/flags.h"
#include "src/objects/fixed-array.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/regexp-bytecodes.h"
#include "src/utils/memcopy.h"
#include "src/utils/utils.h"
#include "src/zone/zone-containers.h"
#include "src/zone/zone.h"

namespace v8 {
namespace internal {

namespace {

struct BytecodeArgument {
  int offset;
  int length;

  BytecodeArgument(int offset, int length) : offset(offset), length(length) {}
};

struct BytecodeArgumentMapping : BytecodeArgument {
  int new_length;

  BytecodeArgumentMapping(int offset, int length, int new_length)
      : BytecodeArgument(offset, length), new_length(new_length) {}
};

struct BytecodeArgumentCheck : BytecodeArgument {
  enum CheckType { kCheckAddress = 0, kCheckValue };
  CheckType type;
  int check_offset;
  int check_length;

  BytecodeArgumentCheck(int offset, int length, int check_offset)
      : BytecodeArgument(offset, length),
        type(kCheckAddress),
        check_offset(check_offset) {}
  BytecodeArgumentCheck(int offset, int length, int check_offset,
                        int check_length)
      : BytecodeArgument(offset, length),
        type(kCheckValue),
        check_offset(check_offset),
        check_length(check_length) {}
};

// Trie-Node for storing bytecode sequences we want to optimize.
class BytecodeSequenceNode {
 public:
  // Dummy bytecode used when we need to store/return a bytecode but it's not a
  // valid bytecode in the current context.
  static constexpr int kDummyBytecode = -1;

  BytecodeSequenceNode(int bytecode, Zone* zone);
  // Adds a new node as child of the current node if it isn't a child already.
  BytecodeSequenceNode& FollowedBy(int bytecode);
  // Marks the end of a sequence and sets optimized bytecode to replace all
  // bytecodes of the sequence with.
  BytecodeSequenceNode& ReplaceWith(int bytecode);
  // Maps arguments of bytecodes in the sequence to the optimized bytecode.
  // Order of invocation determines order of arguments in the optimized
  // bytecode.
  // Invoking this method is only allowed on nodes that mark the end of a valid
  // sequence (i.e. after ReplaceWith()).
  // bytecode_index_in_sequence: Zero-based index of the referred bytecode
  // within the sequence (e.g. the bytecode passed to CreateSequence() has
  // index 0).
  // argument_offset: Zero-based offset to the argument within the bytecode
  // (e.g. the first argument that's not packed with the bytecode has offset 4).
  // argument_byte_length: Length of the argument.
  // new_argument_byte_length: Length of the argument in the new bytecode
  // (= argument_byte_length if omitted).
  BytecodeSequenceNode& MapArgument(int bytecode_index_in_sequence,
                                    int argument_offset,
                                    int argument_byte_length,
                                    int new_argument_byte_length = 0);
  // Adds a check to the sequence node making it only a valid sequence when the
  // argument of the current bytecode at the specified offset matches the offset
  // to check against.
  // argument_offset: Zero-based offset to the argument within the bytecode
  // (e.g. the first argument that's not packed with the bytecode has offset 4).
  // argument_byte_length: Length of the argument.
  // check_byte_offset: Zero-based offset relative to the beginning of the
  // sequence that needs to match the value given by argument_offset. (e.g.
  // check_byte_offset 0 matches the address of the first bytecode in the
  // sequence).
  BytecodeSequenceNode& IfArgumentEqualsOffset(int argument_offset,
                                               int argument_byte_length,
                                               int check_byte_offset);
  // Adds a check to the sequence node making it only a valid sequence when the
  // argument of the current bytecode at the specified offset matches the
  // argument of another bytecode in the sequence.
  // This is similar to IfArgumentEqualsOffset, except that this method matches
  // the values of both arguments.
  BytecodeSequenceNode& IfArgumentEqualsValueAtOffset(
      int argument_offset, int argument_byte_length,
      int other_bytecode_index_in_sequence, int other_argument_offset,
      int other_argument_byte_length);
  // Marks an argument as unused.
  // All arguments that are not mapped explicitly have to be marked as unused.
  // bytecode_index_in_sequence: Zero-based index of the referred bytecode
  // within the sequence (e.g. the bytecode passed to CreateSequence() has
  // index 0).
  // argument_offset: Zero-based offset to the argument within the bytecode
  // (e.g. the first argument that's not packed with the bytecode has offset 4).
  // argument_byte_length: Length of the argument.
  BytecodeSequenceNode& IgnoreArgument(int bytecode_index_in_sequence,
                                       int argument_offset,
                                       int argument_byte_length);
  // Checks if the current node is valid for the sequence. I.e. all conditions
  // set by IfArgumentEqualsOffset and IfArgumentEquals are fulfilled by this
  // node for the actual bytecode sequence.
  bool CheckArguments(const byte* bytecode, int pc);
  // Returns whether this node marks the end of a valid sequence (i.e. can be
  // replaced with an optimized bytecode).
  bool IsSequence() const;
  // Returns the length of the sequence in bytes.
  int SequenceLength() const;
  // Returns the optimized bytecode for the node or kDummyBytecode if it is not
  // the end of a valid sequence.
  int OptimizedBytecode() const;
  // Returns the child of the current node matching the given bytecode or
  // nullptr if no such child is found.
  BytecodeSequenceNode* Find(int bytecode) const;
  // Returns number of arguments mapped to the current node.
  // Invoking this method is only allowed on nodes that mark the end of a valid
  // sequence (i.e. if IsSequence())
  size_t ArgumentSize() const;
  // Returns the argument-mapping of the argument at index.
  // Invoking this method is only allowed on nodes that mark the end of a valid
  // sequence (i.e. if IsSequence())
  BytecodeArgumentMapping ArgumentMapping(size_t index) const;
  // Returns an iterator to begin of ignored arguments.
  // Invoking this method is only allowed on nodes that mark the end of a valid
  // sequence (i.e. if IsSequence())
  ZoneLinkedList<BytecodeArgument>::iterator ArgumentIgnoredBegin() const;
  // Returns an iterator to end of ignored arguments.
  // Invoking this method is only allowed on nodes that mark the end of a valid
  // sequence (i.e. if IsSequence())
  ZoneLinkedList<BytecodeArgument>::iterator ArgumentIgnoredEnd() const;
  // Returns whether the current node has ignored argument or not.
  bool HasIgnoredArguments() const;

 private:
  // Returns a node in the sequence specified by its index within the sequence.
  BytecodeSequenceNode& GetNodeByIndexInSequence(int index_in_sequence);
  Zone* zone() const;

  int bytecode_;
  int bytecode_replacement_;
  int index_in_sequence_;
  int start_offset_;
  BytecodeSequenceNode* parent_;
  ZoneUnorderedMap<int, BytecodeSequenceNode*> children_;
  ZoneVector<BytecodeArgumentMapping>* argument_mapping_;
  ZoneLinkedList<BytecodeArgumentCheck>* argument_check_;
  ZoneLinkedList<BytecodeArgument>* argument_ignored_;

  Zone* zone_;
};

// These definitions are here in order to please the linker, which in debug mode
// sometimes requires static constants to be defined in .cc files.
constexpr int BytecodeSequenceNode::kDummyBytecode;

class RegExpBytecodePeephole {
 public:
  RegExpBytecodePeephole(Zone* zone, size_t buffer_size,
                         const ZoneUnorderedMap<int, int>& jump_edges);

  // Parses bytecode and fills the internal buffer with the potentially
  // optimized bytecode. Returns true when optimizations were performed, false
  // otherwise.
  bool OptimizeBytecode(const byte* bytecode, int length);
  // Copies the internal bytecode buffer to another buffer. The caller is
  // responsible for allocating/freeing the memory.
  void CopyOptimizedBytecode(byte* to_address) const;
  int Length() const;

 private:
  // Sets up all sequences that are going to be used.
  void DefineStandardSequences();
  // Starts a new bytecode sequence.
  BytecodeSequenceNode& CreateSequence(int bytecode);
  // Checks for optimization candidates at pc and emits optimized bytecode to
  // the internal buffer. Returns the length of replaced bytecodes in bytes.
  int TryOptimizeSequence(const byte* bytecode, int bytecode_length,
                          int start_pc);
  // Emits optimized bytecode to the internal buffer. start_pc points to the
  // start of the sequence in bytecode and last_node is the last
  // BytecodeSequenceNode of the matching sequence found.
  void EmitOptimization(int start_pc, const byte* bytecode,
                        const BytecodeSequenceNode& last_node);
  // Adds a relative jump source fixup at pos.
  // Jump source fixups are used to find offsets in the new bytecode that
  // contain jump sources.
  void AddJumpSourceFixup(int fixup, int pos);
  // Adds a relative jump destination fixup at pos.
  // Jump destination fixups are used to find offsets in the new bytecode that
  // can be jumped to.
  void AddJumpDestinationFixup(int fixup, int pos);
  // Sets an absolute jump destination fixup at pos.
  void SetJumpDestinationFixup(int fixup, int pos);
  // Prepare internal structures used to fixup jumps.
  void PrepareJumpStructures(const ZoneUnorderedMap<int, int>& jump_edges);
  // Updates all jump targets in the new bytecode.
  void FixJumps();
  // Update a single jump.
  void FixJump(int jump_source, int jump_destination);
  void AddSentinelFixups(int pos);
  template <typename T>
  void EmitValue(T value);
  template <typename T>
  void OverwriteValue(int offset, T value);
  void CopyRangeToOutput(const byte* orig_bytecode, int start, int length);
  void SetRange(byte value, int count);
  void EmitArgument(int start_pc, const byte* bytecode,
                    BytecodeArgumentMapping arg);
  int pc() const;
  Zone* zone() const;

  ZoneVector<byte> optimized_bytecode_buffer_;
  BytecodeSequenceNode* sequences_;
  // Jumps used in old bytecode.
  // Key: Jump source (offset where destination is stored in old bytecode)
  // Value: Destination
  ZoneMap<int, int> jump_edges_;
  // Jumps used in new bytecode.
  // Key: Jump source (offset where destination is stored in new bytecode)
  // Value: Destination
  ZoneMap<int, int> jump_edges_mapped_;
  // Number of times a jump destination is used within the bytecode.
  // Key: Jump destination (offset in old bytecode).
  // Value: Number of times jump destination is used.
  ZoneMap<int, int> jump_usage_counts_;
  // Maps offsets in old bytecode to fixups of sources (delta to new bytecode).
  // Key: Offset in old bytecode from where the fixup is valid.
  // Value: Delta to map jump source from old bytecode to new bytecode in bytes.
  ZoneMap<int, int> jump_source_fixups_;
  // Maps offsets in old bytecode to fixups of destinations (delta to new
  // bytecode).
  // Key: Offset in old bytecode from where the fixup is valid.
  // Value: Delta to map jump destinations from old bytecode to new bytecode in
  // bytes.
  ZoneMap<int, int> jump_destination_fixups_;

  Zone* zone_;

  DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpBytecodePeephole);
};

template <typename T>
T GetValue(const byte* buffer, int pos) {
  DCHECK(IsAligned(reinterpret_cast<Address>(buffer + pos), alignof(T)));
  return *reinterpret_cast<const T*>(buffer + pos);
}

int32_t GetArgumentValue(const byte* bytecode, int offset, int length) {
  switch (length) {
    case 1:
      return GetValue<byte>(bytecode, offset);
      break;
    case 2:
      return GetValue<int16_t>(bytecode, offset);
      break;
    case 4:
      return GetValue<int32_t>(bytecode, offset);
      break;
    default:
      UNREACHABLE();
  }
}

BytecodeSequenceNode::BytecodeSequenceNode(int bytecode, Zone* zone)
    : bytecode_(bytecode),
      bytecode_replacement_(kDummyBytecode),
      index_in_sequence_(0),
      start_offset_(0),
      parent_(nullptr),
      children_(ZoneUnorderedMap<int, BytecodeSequenceNode*>(zone)),
      argument_mapping_(zone->New<ZoneVector<BytecodeArgumentMapping>>(zone)),
      argument_check_(zone->New<ZoneLinkedList<BytecodeArgumentCheck>>(zone)),
      argument_ignored_(zone->New<ZoneLinkedList<BytecodeArgument>>(zone)),
      zone_(zone) {}

BytecodeSequenceNode& BytecodeSequenceNode::FollowedBy(int bytecode) {
  DCHECK(0 <= bytecode && bytecode < kRegExpBytecodeCount);

  if (children_.find(bytecode) == children_.end()) {
    BytecodeSequenceNode* new_node =
        zone()->New<BytecodeSequenceNode>(bytecode, zone());
    // If node is not the first in the sequence, set offsets and parent.
    if (bytecode_ != kDummyBytecode) {
      new_node->start_offset_ = start_offset_ + RegExpBytecodeLength(bytecode_);
      new_node->index_in_sequence_ = index_in_sequence_ + 1;
      new_node->parent_ = this;
    }
    children_[bytecode] = new_node;
  }

  return *children_[bytecode];
}

BytecodeSequenceNode& BytecodeSequenceNode::ReplaceWith(int bytecode) {
  DCHECK(0 <= bytecode && bytecode < kRegExpBytecodeCount);

  bytecode_replacement_ = bytecode;

  return *this;
}

BytecodeSequenceNode& BytecodeSequenceNode::MapArgument(
    int bytecode_index_in_sequence, int argument_offset,
    int argument_byte_length, int new_argument_byte_length) {
  DCHECK(IsSequence());
  DCHECK_LE(bytecode_index_in_sequence, index_in_sequence_);

  BytecodeSequenceNode& ref_node =
      GetNodeByIndexInSequence(bytecode_index_in_sequence);
  DCHECK_LT(argument_offset, RegExpBytecodeLength(ref_node.bytecode_));

  int absolute_offset = ref_node.start_offset_ + argument_offset;
  if (new_argument_byte_length == 0) {
    new_argument_byte_length = argument_byte_length;
  }

  argument_mapping_->push_back(BytecodeArgumentMapping{
      absolute_offset, argument_byte_length, new_argument_byte_length});

  return *this;
}

BytecodeSequenceNode& BytecodeSequenceNode::IfArgumentEqualsOffset(
    int argument_offset, int argument_byte_length, int check_byte_offset) {
  DCHECK_LT(argument_offset, RegExpBytecodeLength(bytecode_));
  DCHECK(argument_byte_length == 1 || argument_byte_length == 2 ||
         argument_byte_length == 4);

  int absolute_offset = start_offset_ + argument_offset;

  argument_check_->push_back(BytecodeArgumentCheck{
      absolute_offset, argument_byte_length, check_byte_offset});

  return *this;
}

BytecodeSequenceNode& BytecodeSequenceNode::IfArgumentEqualsValueAtOffset(
    int argument_offset, int argument_byte_length,
    int other_bytecode_index_in_sequence, int other_argument_offset,
    int other_argument_byte_length) {
  DCHECK_LT(argument_offset, RegExpBytecodeLength(bytecode_));
  DCHECK_LE(other_bytecode_index_in_sequence, index_in_sequence_);
  DCHECK_EQ(argument_byte_length, other_argument_byte_length);

  BytecodeSequenceNode& ref_node =
      GetNodeByIndexInSequence(other_bytecode_index_in_sequence);
  DCHECK_LT(other_argument_offset, RegExpBytecodeLength(ref_node.bytecode_));

  int absolute_offset = start_offset_ + argument_offset;
  int other_absolute_offset = ref_node.start_offset_ + other_argument_offset;

  argument_check_->push_back(
      BytecodeArgumentCheck{absolute_offset, argument_byte_length,
                            other_absolute_offset, other_argument_byte_length});

  return *this;
}

BytecodeSequenceNode& BytecodeSequenceNode::IgnoreArgument(
    int bytecode_index_in_sequence, int argument_offset,
    int argument_byte_length) {
  DCHECK(IsSequence());
  DCHECK_LE(bytecode_index_in_sequence, index_in_sequence_);

  BytecodeSequenceNode& ref_node =
      GetNodeByIndexInSequence(bytecode_index_in_sequence);
  DCHECK_LT(argument_offset, RegExpBytecodeLength(ref_node.bytecode_));

  int absolute_offset = ref_node.start_offset_ + argument_offset;

  argument_ignored_->push_back(
      BytecodeArgument{absolute_offset, argument_byte_length});

  return *this;
}

bool BytecodeSequenceNode::CheckArguments(const byte* bytecode, int pc) {
  bool is_valid = true;
  for (auto check_iter = argument_check_->begin();
       check_iter != argument_check_->end() && is_valid; check_iter++) {
    auto value =
        GetArgumentValue(bytecode, pc + check_iter->offset, check_iter->length);
    if (check_iter->type == BytecodeArgumentCheck::kCheckAddress) {
      is_valid &= value == pc + check_iter->check_offset;
    } else if (check_iter->type == BytecodeArgumentCheck::kCheckValue) {
      auto other_value = GetArgumentValue(
          bytecode, pc + check_iter->check_offset, check_iter->check_length);
      is_valid &= value == other_value;
    } else {
      UNREACHABLE();
    }
  }
  return is_valid;
}

bool BytecodeSequenceNode::IsSequence() const {
  return bytecode_replacement_ != kDummyBytecode;
}

int BytecodeSequenceNode::SequenceLength() const {
  return start_offset_ + RegExpBytecodeLength(bytecode_);
}

int BytecodeSequenceNode::OptimizedBytecode() const {
  return bytecode_replacement_;
}

BytecodeSequenceNode* BytecodeSequenceNode::Find(int bytecode) const {
  auto found = children_.find(bytecode);
  if (found == children_.end()) return nullptr;
  return found->second;
}

size_t BytecodeSequenceNode::ArgumentSize() const {
  DCHECK(IsSequence());
  return argument_mapping_->size();
}

BytecodeArgumentMapping BytecodeSequenceNode::ArgumentMapping(
    size_t index) const {
  DCHECK(IsSequence());
  DCHECK(argument_mapping_ != nullptr);
  DCHECK_LT(index, argument_mapping_->size());

  return argument_mapping_->at(index);
}

ZoneLinkedList<BytecodeArgument>::iterator
BytecodeSequenceNode::ArgumentIgnoredBegin() const {
  DCHECK(IsSequence());
  DCHECK(argument_ignored_ != nullptr);
  return argument_ignored_->begin();
}

ZoneLinkedList<BytecodeArgument>::iterator
BytecodeSequenceNode::ArgumentIgnoredEnd() const {
  DCHECK(IsSequence());
  DCHECK(argument_ignored_ != nullptr);
  return argument_ignored_->end();
}

bool BytecodeSequenceNode::HasIgnoredArguments() const {
  return argument_ignored_ != nullptr;
}

BytecodeSequenceNode& BytecodeSequenceNode::GetNodeByIndexInSequence(
    int index_in_sequence) {
  DCHECK_LE(index_in_sequence, index_in_sequence_);

  if (index_in_sequence < index_in_sequence_) {
    DCHECK(parent_ != nullptr);
    return parent_->GetNodeByIndexInSequence(index_in_sequence);
  } else {
    return *this;
  }
}

Zone* BytecodeSequenceNode::zone() const { return zone_; }

RegExpBytecodePeephole::RegExpBytecodePeephole(
    Zone* zone, size_t buffer_size,
    const ZoneUnorderedMap<int, int>& jump_edges)
    : optimized_bytecode_buffer_(zone),
      sequences_(zone->New<BytecodeSequenceNode>(
          BytecodeSequenceNode::kDummyBytecode, zone)),
      jump_edges_(zone),
      jump_edges_mapped_(zone),
      jump_usage_counts_(zone),
      jump_source_fixups_(zone),
      jump_destination_fixups_(zone),
      zone_(zone) {
  optimized_bytecode_buffer_.reserve(buffer_size);
  PrepareJumpStructures(jump_edges);
  DefineStandardSequences();
  // Sentinel fixups at beginning of bytecode (position -1) so we don't have to
  // check for end of iterator inside the fixup loop.
  // In general fixups are deltas of original offsets of jump
  // sources/destinations (in the old bytecode) to find them in the new
  // bytecode. All jump targets are fixed after the new bytecode is fully
  // emitted in the internal buffer.
  AddSentinelFixups(-1);
  // Sentinel fixups at end of (old) bytecode so we don't have to check for
  // end of iterator inside the fixup loop.
  DCHECK_LE(buffer_size, std::numeric_limits<int>::max());
  AddSentinelFixups(static_cast<int>(buffer_size));
}

void RegExpBytecodePeephole::DefineStandardSequences() {
  // Commonly used sequences can be found by creating regexp bytecode traces
  // (--trace-regexp-bytecodes) and using v8/tools/regexp-sequences.py.
  CreateSequence(BC_LOAD_CURRENT_CHAR)
      .FollowedBy(BC_CHECK_BIT_IN_TABLE)
      .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
      // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
      // first bytecode in this sequence.
      .IfArgumentEqualsOffset(4, 4, 0)
      .ReplaceWith(BC_SKIP_UNTIL_BIT_IN_TABLE)
      .MapArgument(0, 1, 3)      // load offset
      .MapArgument(2, 1, 3, 4)   // advance by
      .MapArgument(1, 8, 16)     // bit table
      .MapArgument(1, 4, 4)      // goto when match
      .MapArgument(0, 4, 4)      // goto on failure
      .IgnoreArgument(2, 4, 4);  // loop jump

  CreateSequence(BC_CHECK_CURRENT_POSITION)
      .FollowedBy(BC_LOAD_CURRENT_CHAR_UNCHECKED)
      .FollowedBy(BC_CHECK_CHAR)
      .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
      // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
      // first bytecode in this sequence.
      .IfArgumentEqualsOffset(4, 4, 0)
      .ReplaceWith(BC_SKIP_UNTIL_CHAR_POS_CHECKED)
      .MapArgument(1, 1, 3)      // load offset
      .MapArgument(3, 1, 3, 2)   // advance_by
      .MapArgument(2, 1, 3, 2)   // c
      .MapArgument(0, 1, 3, 4)   // eats at least
      .MapArgument(2, 4, 4)      // goto when match
      .MapArgument(0, 4, 4)      // goto on failure
      .IgnoreArgument(3, 4, 4);  // loop jump

  CreateSequence(BC_CHECK_CURRENT_POSITION)
      .FollowedBy(BC_LOAD_CURRENT_CHAR_UNCHECKED)
      .FollowedBy(BC_AND_CHECK_CHAR)
      .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
      // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
      // first bytecode in this sequence.
      .IfArgumentEqualsOffset(4, 4, 0)
      .ReplaceWith(BC_SKIP_UNTIL_CHAR_AND)
      .MapArgument(1, 1, 3)      // load offset
      .MapArgument(3, 1, 3, 2)   // advance_by
      .MapArgument(2, 1, 3, 2)   // c
      .MapArgument(2, 4, 4)      // mask
      .MapArgument(0, 1, 3, 4)   // eats at least
      .MapArgument(2, 8, 4)      // goto when match
      .MapArgument(0, 4, 4)      // goto on failure
      .IgnoreArgument(3, 4, 4);  // loop jump

  // TODO(pthier): It might make sense for short sequences like this one to only
  // optimize them if the resulting optimization is not longer than the current
  // one. This could be the case if there are jumps inside the sequence and we
  // have to replicate parts of the sequence. A method to mark such sequences
  // might be useful.
  CreateSequence(BC_LOAD_CURRENT_CHAR)
      .FollowedBy(BC_CHECK_CHAR)
      .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
      // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
      // first bytecode in this sequence.
      .IfArgumentEqualsOffset(4, 4, 0)
      .ReplaceWith(BC_SKIP_UNTIL_CHAR)
      .MapArgument(0, 1, 3)      // load offset
      .MapArgument(2, 1, 3, 2)   // advance by
      .MapArgument(1, 1, 3, 2)   // character
      .MapArgument(1, 4, 4)      // goto when match
      .MapArgument(0, 4, 4)      // goto on failure
      .IgnoreArgument(2, 4, 4);  // loop jump

  CreateSequence(BC_LOAD_CURRENT_CHAR)
      .FollowedBy(BC_CHECK_CHAR)
      .FollowedBy(BC_CHECK_CHAR)
      // Sequence is only valid if the jump targets of both CHECK_CHAR bytecodes
      // are equal.
      .IfArgumentEqualsValueAtOffset(4, 4, 1, 4, 4)
      .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
      // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
      // first bytecode in this sequence.
      .IfArgumentEqualsOffset(4, 4, 0)
      .ReplaceWith(BC_SKIP_UNTIL_CHAR_OR_CHAR)
      .MapArgument(0, 1, 3)      // load offset
      .MapArgument(3, 1, 3, 4)   // advance by
      .MapArgument(1, 1, 3, 2)   // character 1
      .MapArgument(2, 1, 3, 2)   // character 2
      .MapArgument(1, 4, 4)      // goto when match
      .MapArgument(0, 4, 4)      // goto on failure
      .IgnoreArgument(2, 4, 4)   // goto when match 2
      .IgnoreArgument(3, 4, 4);  // loop jump

  CreateSequence(BC_LOAD_CURRENT_CHAR)
      .FollowedBy(BC_CHECK_GT)
      // Sequence is only valid if the jump target of CHECK_GT is the first
      // bytecode AFTER the whole sequence.
      .IfArgumentEqualsOffset(4, 4, 56)
      .FollowedBy(BC_CHECK_BIT_IN_TABLE)
      // Sequence is only valid if the jump target of CHECK_BIT_IN_TABLE is
      // the ADVANCE_CP_AND_GOTO bytecode at the end of the sequence.
      .IfArgumentEqualsOffset(4, 4, 48)
      .FollowedBy(BC_GOTO)
      // Sequence is only valid if the jump target of GOTO is the same as the
      // jump target of CHECK_GT (i.e. both jump to the first bytecode AFTER the
      // whole sequence.
      .IfArgumentEqualsValueAtOffset(4, 4, 1, 4, 4)
      .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
      // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
      // first bytecode in this sequence.
      .IfArgumentEqualsOffset(4, 4, 0)
      .ReplaceWith(BC_SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE)
      .MapArgument(0, 1, 3)      // load offset
      .MapArgument(4, 1, 3, 2)   // advance by
      .MapArgument(1, 1, 3, 2)   // character
      .MapArgument(2, 8, 16)     // bit table
      .MapArgument(1, 4, 4)      // goto when match
      .MapArgument(0, 4, 4)      // goto on failure
      .IgnoreArgument(2, 4, 4)   // indirect loop jump
      .IgnoreArgument(3, 4, 4)   // jump out of loop
      .IgnoreArgument(4, 4, 4);  // loop jump
}

bool RegExpBytecodePeephole::OptimizeBytecode(const byte* bytecode,
                                              int length) {
  int old_pc = 0;
  bool did_optimize = false;

  while (old_pc < length) {
    int replaced_len = TryOptimizeSequence(bytecode, length, old_pc);
    if (replaced_len > 0) {
      old_pc += replaced_len;
      did_optimize = true;
    } else {
      int bc = bytecode[old_pc];
      int bc_len = RegExpBytecodeLength(bc);
      CopyRangeToOutput(bytecode, old_pc, bc_len);
      old_pc += bc_len;
    }
  }

  if (did_optimize) {
    FixJumps();
  }

  return did_optimize;
}

void RegExpBytecodePeephole::CopyOptimizedBytecode(byte* to_address) const {
  MemCopy(to_address, &(*optimized_bytecode_buffer_.begin()), Length());
}

int RegExpBytecodePeephole::Length() const { return pc(); }

BytecodeSequenceNode& RegExpBytecodePeephole::CreateSequence(int bytecode) {
  DCHECK(sequences_ != nullptr);
  DCHECK(0 <= bytecode && bytecode < kRegExpBytecodeCount);

  return sequences_->FollowedBy(bytecode);
}

int RegExpBytecodePeephole::TryOptimizeSequence(const byte* bytecode,
                                                int bytecode_length,
                                                int start_pc) {
  BytecodeSequenceNode* seq_node = sequences_;
  BytecodeSequenceNode* valid_seq_end = nullptr;

  int current_pc = start_pc;

  // Check for the longest valid sequence matching any of the pre-defined
  // sequences in the Trie data structure.
  while (current_pc < bytecode_length) {
    seq_node = seq_node->Find(bytecode[current_pc]);
    if (seq_node == nullptr) break;
    if (!seq_node->CheckArguments(bytecode, start_pc)) break;

    if (seq_node->IsSequence()) valid_seq_end = seq_node;
    current_pc += RegExpBytecodeLength(bytecode[current_pc]);
  }

  if (valid_seq_end) {
    EmitOptimization(start_pc, bytecode, *valid_seq_end);
    return valid_seq_end->SequenceLength();
  }

  return 0;
}

void RegExpBytecodePeephole::EmitOptimization(
    int start_pc, const byte* bytecode, const BytecodeSequenceNode& last_node) {
#ifdef DEBUG
  int optimized_start_pc = pc();
#endif
  // Jump sources that are mapped or marked as unused will be deleted at the end
  // of this method. We don't delete them immediately as we might need the
  // information when we have to preserve bytecodes at the end.
  // TODO(pthier): Replace with a stack-allocated data structure.
  ZoneLinkedList<int> delete_jumps = ZoneLinkedList<int>(zone());

  uint32_t bc = last_node.OptimizedBytecode();
  EmitValue(bc);

  for (size_t arg = 0; arg < last_node.ArgumentSize(); arg++) {
    BytecodeArgumentMapping arg_map = last_node.ArgumentMapping(arg);
    int arg_pos = start_pc + arg_map.offset;
    // If we map any jump source we mark the old source for deletion and insert
    // a new jump.
    auto jump_edge_iter = jump_edges_.find(arg_pos);
    if (jump_edge_iter != jump_edges_.end()) {
      int jump_source = jump_edge_iter->first;
      int jump_destination = jump_edge_iter->second;
      // Add new jump edge add current position.
      jump_edges_mapped_.emplace(Length(), jump_destination);
      // Mark old jump edge for deletion.
      delete_jumps.push_back(jump_source);
      // Decrement usage count of jump destination.
      auto jump_count_iter = jump_usage_counts_.find(jump_destination);
      DCHECK(jump_count_iter != jump_usage_counts_.end());
      int& usage_count = jump_count_iter->second;
      --usage_count;
    }
    // TODO(pthier): DCHECK that mapped arguments are never sources of jumps
    // to destinations inside the sequence.
    EmitArgument(start_pc, bytecode, arg_map);
  }
  DCHECK_EQ(pc(), optimized_start_pc +
                      RegExpBytecodeLength(last_node.OptimizedBytecode()));

  // Remove jumps from arguments we ignore.
  if (last_node.HasIgnoredArguments()) {
    for (auto ignored_arg = last_node.ArgumentIgnoredBegin();
         ignored_arg != last_node.ArgumentIgnoredEnd(); ignored_arg++) {
      auto jump_edge_iter = jump_edges_.find(start_pc + ignored_arg->offset);
      if (jump_edge_iter != jump_edges_.end()) {
        int jump_source = jump_edge_iter->first;
        int jump_destination = jump_edge_iter->second;
        // Mark old jump edge for deletion.
        delete_jumps.push_back(jump_source);
        // Decrement usage count of jump destination.
        auto jump_count_iter = jump_usage_counts_.find(jump_destination);
        DCHECK(jump_count_iter != jump_usage_counts_.end());
        int& usage_count = jump_count_iter->second;
        --usage_count;
      }
    }
  }

  int fixup_length = RegExpBytecodeLength(bc) - last_node.SequenceLength();

  // Check if there are any jumps inside the old sequence.
  // If so we have to keep the bytecodes that are jumped to around.
  auto jump_destination_candidate = jump_usage_counts_.upper_bound(start_pc);
  int jump_candidate_destination = jump_destination_candidate->first;
  int jump_candidate_count = jump_destination_candidate->second;
  // Jump destinations only jumped to from inside the sequence will be ignored.
  while (jump_destination_candidate != jump_usage_counts_.end() &&
         jump_candidate_count == 0) {
    ++jump_destination_candidate;
    jump_candidate_destination = jump_destination_candidate->first;
    jump_candidate_count = jump_destination_candidate->second;
  }

  int preserve_from = start_pc + last_node.SequenceLength();
  if (jump_destination_candidate != jump_usage_counts_.end() &&
      jump_candidate_destination < start_pc + last_node.SequenceLength()) {
    preserve_from = jump_candidate_destination;
    // Check if any jump in the sequence we are preserving has a jump
    // destination inside the optimized sequence before the current position we
    // want to preserve. If so we have to preserve all bytecodes starting at
    // this jump destination.
    for (auto jump_iter = jump_edges_.lower_bound(preserve_from);
         jump_iter != jump_edges_.end() &&
         jump_iter->first /* jump source */ <
             start_pc + last_node.SequenceLength();
         ++jump_iter) {
      int jump_destination = jump_iter->second;
      if (jump_destination > start_pc && jump_destination < preserve_from) {
        preserve_from = jump_destination;
      }
    }

    // We preserve everything to the end of the sequence. This is conservative
    // since it would be enough to preserve all bytecudes up to an unconditional
    // jump.
    int preserve_length = start_pc + last_node.SequenceLength() - preserve_from;
    fixup_length += preserve_length;
    // Jumps after the start of the preserved sequence need fixup.
    AddJumpSourceFixup(fixup_length,
                       start_pc + last_node.SequenceLength() - preserve_length);
    // All jump targets after the start of the optimized sequence need to be
    // fixed relative to the length of the optimized sequence including
    // bytecodes we preserved.
    AddJumpDestinationFixup(fixup_length, start_pc + 1);
    // Jumps to the sequence we preserved need absolute fixup as they could
    // occur before or after the sequence.
    SetJumpDestinationFixup(pc() - preserve_from, preserve_from);
    CopyRangeToOutput(bytecode, preserve_from, preserve_length);
  } else {
    AddJumpDestinationFixup(fixup_length, start_pc + 1);
    // Jumps after the end of the old sequence need fixup.
    AddJumpSourceFixup(fixup_length, start_pc + last_node.SequenceLength());
  }

  // Delete jumps we definitely don't need anymore
  for (int del : delete_jumps) {
    if (del < preserve_from) {
      jump_edges_.erase(del);
    }
  }
}

void RegExpBytecodePeephole::AddJumpSourceFixup(int fixup, int pos) {
  auto previous_fixup = jump_source_fixups_.lower_bound(pos);
  DCHECK(previous_fixup != jump_source_fixups_.end());
  DCHECK(previous_fixup != jump_source_fixups_.begin());

  int previous_fixup_value = (--previous_fixup)->second;
  jump_source_fixups_[pos] = previous_fixup_value + fixup;
}

void RegExpBytecodePeephole::AddJumpDestinationFixup(int fixup, int pos) {
  auto previous_fixup = jump_destination_fixups_.lower_bound(pos);
  DCHECK(previous_fixup != jump_destination_fixups_.end());
  DCHECK(previous_fixup != jump_destination_fixups_.begin());

  int previous_fixup_value = (--previous_fixup)->second;
  jump_destination_fixups_[pos] = previous_fixup_value + fixup;
}

void RegExpBytecodePeephole::SetJumpDestinationFixup(int fixup, int pos) {
  auto previous_fixup = jump_destination_fixups_.lower_bound(pos);
  DCHECK(previous_fixup != jump_destination_fixups_.end());
  DCHECK(previous_fixup != jump_destination_fixups_.begin());

  int previous_fixup_value = (--previous_fixup)->second;
  jump_destination_fixups_.emplace(pos, fixup);
  jump_destination_fixups_.emplace(pos + 1, previous_fixup_value);
}

void RegExpBytecodePeephole::PrepareJumpStructures(
    const ZoneUnorderedMap<int, int>& jump_edges) {
  for (auto jump_edge : jump_edges) {
    int jump_source = jump_edge.first;
    int jump_destination = jump_edge.second;

    jump_edges_.emplace(jump_source, jump_destination);
    jump_usage_counts_[jump_destination]++;
  }
}

void RegExpBytecodePeephole::FixJumps() {
  int position_fixup = 0;
  // Next position where fixup changes.
  auto next_source_fixup = jump_source_fixups_.lower_bound(0);
  int next_source_fixup_offset = next_source_fixup->first;
  int next_source_fixup_value = next_source_fixup->second;

  for (auto jump_edge : jump_edges_) {
    int jump_source = jump_edge.first;
    int jump_destination = jump_edge.second;
    while (jump_source >= next_source_fixup_offset) {
      position_fixup = next_source_fixup_value;
      ++next_source_fixup;
      next_source_fixup_offset = next_source_fixup->first;
      next_source_fixup_value = next_source_fixup->second;
    }
    jump_source += position_fixup;

    FixJump(jump_source, jump_destination);
  }

  // Mapped jump edges don't need source fixups, as the position already is an
  // offset in the new bytecode.
  for (auto jump_edge : jump_edges_mapped_) {
    int jump_source = jump_edge.first;
    int jump_destination = jump_edge.second;

    FixJump(jump_source, jump_destination);
  }
}

void RegExpBytecodePeephole::FixJump(int jump_source, int jump_destination) {
  int fixed_jump_destination =
      jump_destination +
      (--jump_destination_fixups_.upper_bound(jump_destination))->second;
  DCHECK_LT(fixed_jump_destination, Length());
#ifdef DEBUG
  // TODO(pthier): This check could be better if we track the bytecodes
  // actually used and check if we jump to one of them.
  byte jump_bc = optimized_bytecode_buffer_[fixed_jump_destination];
  DCHECK_GT(jump_bc, 0);
  DCHECK_LT(jump_bc, kRegExpBytecodeCount);
#endif

  if (jump_destination != fixed_jump_destination) {
    OverwriteValue<uint32_t>(jump_source, fixed_jump_destination);
  }
}

void RegExpBytecodePeephole::AddSentinelFixups(int pos) {
  jump_source_fixups_.emplace(pos, 0);
  jump_destination_fixups_.emplace(pos, 0);
}

template <typename T>
void RegExpBytecodePeephole::EmitValue(T value) {
  DCHECK(optimized_bytecode_buffer_.begin() + pc() ==
         optimized_bytecode_buffer_.end());
  byte* value_byte_iter = reinterpret_cast<byte*>(&value);
  optimized_bytecode_buffer_.insert(optimized_bytecode_buffer_.end(),
                                    value_byte_iter,
                                    value_byte_iter + sizeof(T));
}

template <typename T>
void RegExpBytecodePeephole::OverwriteValue(int offset, T value) {
  byte* value_byte_iter = reinterpret_cast<byte*>(&value);
  byte* value_byte_iter_end = value_byte_iter + sizeof(T);
  while (value_byte_iter < value_byte_iter_end) {
    optimized_bytecode_buffer_[offset++] = *value_byte_iter++;
  }
}

void RegExpBytecodePeephole::CopyRangeToOutput(const byte* orig_bytecode,
                                               int start, int length) {
  DCHECK(optimized_bytecode_buffer_.begin() + pc() ==
         optimized_bytecode_buffer_.end());
  optimized_bytecode_buffer_.insert(optimized_bytecode_buffer_.end(),
                                    orig_bytecode + start,
                                    orig_bytecode + start + length);
}

void RegExpBytecodePeephole::SetRange(byte value, int count) {
  DCHECK(optimized_bytecode_buffer_.begin() + pc() ==
         optimized_bytecode_buffer_.end());
  optimized_bytecode_buffer_.insert(optimized_bytecode_buffer_.end(), count,
                                    value);
}

void RegExpBytecodePeephole::EmitArgument(int start_pc, const byte* bytecode,
                                          BytecodeArgumentMapping arg) {
  int arg_pos = start_pc + arg.offset;
  switch (arg.length) {
    case 1:
      DCHECK_EQ(arg.new_length, arg.length);
      EmitValue(GetValue<byte>(bytecode, arg_pos));
      break;
    case 2:
      DCHECK_EQ(arg.new_length, arg.length);
      EmitValue(GetValue<uint16_t>(bytecode, arg_pos));
      break;
    case 3: {
      // Length 3 only occurs in 'packed' arguments where the lowermost byte is
      // the current bytecode, and the remaining 3 bytes are the packed value.
      //
      // We load 4 bytes from position - 1 and shift out the bytecode.
#ifdef V8_TARGET_BIG_ENDIAN
      UNIMPLEMENTED();
      int32_t val = 0;
#else
      int32_t val = GetValue<int32_t>(bytecode, arg_pos - 1) >> kBitsPerByte;
#endif  // V8_TARGET_BIG_ENDIAN

      switch (arg.new_length) {
        case 2:
          EmitValue<uint16_t>(val);
          break;
        case 3: {
          // Pack with previously emitted value.
          auto prev_val =
              GetValue<int32_t>(&(*optimized_bytecode_buffer_.begin()),
                                Length() - sizeof(uint32_t));
#ifdef V8_TARGET_BIG_ENDIAN
      UNIMPLEMENTED();
      USE(prev_val);
#else
          DCHECK_EQ(prev_val & 0xFFFFFF00, 0);
          OverwriteValue<uint32_t>(
              pc() - sizeof(uint32_t),
              (static_cast<uint32_t>(val) << 8) | (prev_val & 0xFF));
#endif  // V8_TARGET_BIG_ENDIAN
          break;
        }
        case 4:
          EmitValue<uint32_t>(val);
          break;
      }
      break;
    }
    case 4:
      DCHECK_EQ(arg.new_length, arg.length);
      EmitValue(GetValue<uint32_t>(bytecode, arg_pos));
      break;
    case 8:
      DCHECK_EQ(arg.new_length, arg.length);
      EmitValue(GetValue<uint64_t>(bytecode, arg_pos));
      break;
    default:
      CopyRangeToOutput(bytecode, arg_pos, Min(arg.length, arg.new_length));
      if (arg.length < arg.new_length) {
        SetRange(0x00, arg.new_length - arg.length);
      }
      break;
  }
}

int RegExpBytecodePeephole::pc() const {
  DCHECK_LE(optimized_bytecode_buffer_.size(), std::numeric_limits<int>::max());
  return static_cast<int>(optimized_bytecode_buffer_.size());
}

Zone* RegExpBytecodePeephole::zone() const { return zone_; }

}  // namespace

// static
Handle<ByteArray> RegExpBytecodePeepholeOptimization::OptimizeBytecode(
    Isolate* isolate, Zone* zone, Handle<String> source, const byte* bytecode,
    int length, const ZoneUnorderedMap<int, int>& jump_edges) {
  RegExpBytecodePeephole peephole(zone, length, jump_edges);
  bool did_optimize = peephole.OptimizeBytecode(bytecode, length);
  Handle<ByteArray> array = isolate->factory()->NewByteArray(peephole.Length());
  peephole.CopyOptimizedBytecode(array->GetDataStartAddress());

  if (did_optimize && FLAG_trace_regexp_peephole_optimization) {
    PrintF("Original Bytecode:\n");
    RegExpBytecodeDisassemble(bytecode, length, source->ToCString().get());
    PrintF("Optimized Bytecode:\n");
    RegExpBytecodeDisassemble(array->GetDataStartAddress(), peephole.Length(),
                              source->ToCString().get());
  }

  return array;
}

}  // namespace internal
}  // namespace v8