[regexp] Move AST-to-Node code to a dedicated file

Prior to this CL, jsregexp contains a bunch of things that are slightly related but would be cleaner in separate files, including: AST-to-Node transformations, the compiler implementation, and a debugging printer. This CL extracts AST-to-Node transformations. Bug: v8:9359 Change-Id: I030cfca5c40cfd72e3a7abe2188e4654cfe2277c Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1655303 Auto-Submit: Jakob Gruber <jgruber@chromium.org> Reviewed-by: Yang Guo <yangguo@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#62148}

[regexp] Move AST-to-Node code to a dedicated file
Prior to this CL, jsregexp contains a bunch of things that are slightly related but would be cleaner in separate files, including: AST-to-Node transformations, the compiler implementation, and a debugging printer. This CL extracts AST-to-Node transformations. Bug: v8:9359 Change-Id: I030cfca5c40cfd72e3a7abe2188e4654cfe2277c Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1655303 Auto-Submit: Jakob Gruber <jgruber@chromium.org> Reviewed-by: Yang Guo <yangguo@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#62148}
811bfbbc · Jakob Gruber · Commit Bot · df13503d · 811bfbbc · 811bfbbc
Commit 811bfbbc authored Jun 13, 2019 by Jakob Gruber Committed by Commit Bot Jun 13, 2019
6 changed files
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -2683,6 +2683,8 @@ v8_source_set("v8_base_without_compiler") {
    "src/regexp/regexp-ast.cc",
    "src/regexp/regexp-ast.h",
    "src/regexp/regexp-bytecodes.h",
+    "src/regexp/regexp-compiler-tonode.cc",
+    "src/regexp/regexp-compiler.h",
    "src/regexp/regexp-interpreter.cc",
    "src/regexp/regexp-interpreter.h",
    "src/regexp/regexp-macro-assembler-arch.h",

--- a/src/regexp/jsregexp.cc
+++ b/src/regexp/jsregexp.cc
@@ -17,6 +17,7 @@
 #include "src/heap/heap-inl.h"
 #include "src/objects/elements.h"
 #include "src/regexp/jsregexp-inl.h"
+#include "src/regexp/regexp-compiler.h"
 #include "src/regexp/regexp-interpreter.h"
 #include "src/regexp/regexp-macro-assembler-arch.h"
 #include "src/regexp/regexp-macro-assembler-irregexp.h"
@@ -40,6 +41,8 @@
 namespace v8 {
 namespace internal {

+using namespace regexp_compiler_constants;  // NOLINT(build/namespaces)
+
 V8_WARN_UNUSED_RESULT
 static inline MaybeHandle<Object> ThrowRegExpException(
    Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
@@ -844,149 +847,6 @@ DispatchTable* ChoiceNode::GetTable(bool ignore_case) {
 }


-class FrequencyCollator {
- public:
-  FrequencyCollator() : total_samples_(0) {
-    for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
-      frequencies_[i] = CharacterFrequency(i);
-    }
-  }
-
-  void CountCharacter(int character) {
-    int index = (character & RegExpMacroAssembler::kTableMask);
-    frequencies_[index].Increment();
-    total_samples_++;
-  }
-
-  // Does not measure in percent, but rather per-128 (the table size from the
-  // regexp macro assembler).
-  int Frequency(int in_character) {
-    DCHECK((in_character & RegExpMacroAssembler::kTableMask) == in_character);
-    if (total_samples_ < 1) return 1;  // Division by zero.
-    int freq_in_per128 =
-        (frequencies_[in_character].counter() * 128) / total_samples_;
-    return freq_in_per128;
-  }
-
- private:
-  class CharacterFrequency {
-   public:
-    CharacterFrequency() : counter_(0), character_(-1) { }
-    explicit CharacterFrequency(int character)
-        : counter_(0), character_(character) { }
-
-    void Increment() { counter_++; }
-    int counter() { return counter_; }
-    int character() { return character_; }
-
-   private:
-    int counter_;
-    int character_;
-  };
-
-
- private:
-  CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
-  int total_samples_;
-};
-
-
-class RegExpCompiler {
- public:
-  RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
-                 bool is_one_byte);
-
-  int AllocateRegister() {
-    if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
-      reg_exp_too_big_ = true;
-      return next_register_;
-    }
-    return next_register_++;
-  }
-
-  // Lookarounds to match lone surrogates for unicode character class matches
-  // are never nested. We can therefore reuse registers.
-  int UnicodeLookaroundStackRegister() {
-    if (unicode_lookaround_stack_register_ == kNoRegister) {
-      unicode_lookaround_stack_register_ = AllocateRegister();
-    }
-    return unicode_lookaround_stack_register_;
-  }
-
-  int UnicodeLookaroundPositionRegister() {
-    if (unicode_lookaround_position_register_ == kNoRegister) {
-      unicode_lookaround_position_register_ = AllocateRegister();
-    }
-    return unicode_lookaround_position_register_;
-  }
-
-  RegExpEngine::CompilationResult Assemble(Isolate* isolate,
-                                           RegExpMacroAssembler* assembler,
-                                           RegExpNode* start, int capture_count,
-                                           Handle<String> pattern);
-
-  inline void AddWork(RegExpNode* node) {
-    if (!node->on_work_list() && !node->label()->is_bound()) {
-      node->set_on_work_list(true);
-      work_list_->push_back(node);
-    }
-  }
-
-  static const int kImplementationOffset = 0;
-  static const int kNumberOfRegistersOffset = 0;
-  static const int kCodeOffset = 1;
-
-  RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
-  EndNode* accept() { return accept_; }
-
-  static const int kMaxRecursion = 100;
-  inline int recursion_depth() { return recursion_depth_; }
-  inline void IncrementRecursionDepth() { recursion_depth_++; }
-  inline void DecrementRecursionDepth() { recursion_depth_--; }
-
-  void SetRegExpTooBig() { reg_exp_too_big_ = true; }
-
-  inline bool one_byte() { return one_byte_; }
-  inline bool optimize() { return optimize_; }
-  inline void set_optimize(bool value) { optimize_ = value; }
-  inline bool limiting_recursion() { return limiting_recursion_; }
-  inline void set_limiting_recursion(bool value) {
-    limiting_recursion_ = value;
-  }
-  bool read_backward() { return read_backward_; }
-  void set_read_backward(bool value) { read_backward_ = value; }
-  FrequencyCollator* frequency_collator() { return &frequency_collator_; }
-
-  int current_expansion_factor() { return current_expansion_factor_; }
-  void set_current_expansion_factor(int value) {
-    current_expansion_factor_ = value;
-  }
-
-  Isolate* isolate() const { return isolate_; }
-  Zone* zone() const { return zone_; }
-
-  static const int kNoRegister = -1;
-
- private:
-  EndNode* accept_;
-  int next_register_;
-  int unicode_lookaround_stack_register_;
-  int unicode_lookaround_position_register_;
-  std::vector<RegExpNode*>* work_list_;
-  int recursion_depth_;
-  RegExpMacroAssembler* macro_assembler_;
-  bool one_byte_;
-  bool reg_exp_too_big_;
-  bool limiting_recursion_;
-  bool optimize_;
-  bool read_backward_;
-  int current_expansion_factor_;
-  FrequencyCollator frequency_collator_;
-  Isolate* isolate_;
-  Zone* zone_;
-};
-
-
 class RecursionCheck {
 public:
  explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
@@ -2700,13 +2560,6 @@ RegExpNode* SeqRegExpNode::FilterSuccessor(int depth) {
  return set_replacement(this);
 }

-// We need to check for the following characters: 0x39C 0x3BC 0x178.
-static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {
-  // TODO(dcarney): this could be a lot more efficient.
-  return range.Contains(0x039C) || range.Contains(0x03BC) ||
-         range.Contains(0x0178);
-}
-

 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
  for (int i = 0; i < ranges->length(); i++) {
@@ -3518,30 +3371,6 @@ class AlternativeGenerationList {
  AlternativeGeneration a_few_alt_gens_[kAFew];
 };

-
-static const uc32 kRangeEndMarker = 0x110000;
-
-// The '2' variant is has inclusive from and exclusive to.
-// This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
-// which include WhiteSpace (7.2) or LineTerminator (7.3) values.
-static const int kSpaceRanges[] = {
-    '\t',   '\r' + 1, ' ',    ' ' + 1, 0x00A0, 0x00A1, 0x1680,
-    0x1681, 0x2000,   0x200B, 0x2028,  0x202A, 0x202F, 0x2030,
-    0x205F, 0x2060,   0x3000, 0x3001,  0xFEFF, 0xFF00, kRangeEndMarker};
-static const int kSpaceRangeCount = arraysize(kSpaceRanges);
-
-static const int kWordRanges[] = {
-    '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, kRangeEndMarker};
-static const int kWordRangeCount = arraysize(kWordRanges);
-static const int kDigitRanges[] = {'0', '9' + 1, kRangeEndMarker};
-static const int kDigitRangeCount = arraysize(kDigitRanges);
-static const int kSurrogateRanges[] = {
-    kLeadSurrogateStart, kLeadSurrogateStart + 1, kRangeEndMarker};
-static const int kSurrogateRangeCount = arraysize(kSurrogateRanges);
-static const int kLineTerminatorRanges[] = {
-    0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, kRangeEndMarker};
-static const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges);
-
 void BoyerMoorePositionInfo::Set(int character) {
  SetInterval(Interval(character, character));
 }
@@ -4684,1530 +4513,6 @@ void RegExpEngine::DotPrint(const char* label,

 #endif  // DEBUG

-
-// -------------------------------------------------------------------
-// Tree to graph conversion
-
-RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
-                               RegExpNode* on_success) {
-  ZoneList<TextElement>* elms =
-      new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone());
-  elms->Add(TextElement::Atom(this), compiler->zone());
-  return new (compiler->zone())
-      TextNode(elms, compiler->read_backward(), on_success);
-}
-
-
-RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
-                               RegExpNode* on_success) {
-  return new (compiler->zone())
-      TextNode(elements(), compiler->read_backward(), on_success);
-}
-
-
-static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
-                                 const int* special_class,
-                                 int length) {
-  length--;  // Remove final marker.
-  DCHECK_EQ(kRangeEndMarker, special_class[length]);
-  DCHECK_NE(0, ranges->length());
-  DCHECK_NE(0, length);
-  DCHECK_NE(0, special_class[0]);
-  if (ranges->length() != (length >> 1) + 1) {
-    return false;
-  }
-  CharacterRange range = ranges->at(0);
-  if (range.from() != 0) {
-    return false;
-  }
-  for (int i = 0; i < length; i += 2) {
-    if (special_class[i] != (range.to() + 1)) {
-      return false;
-    }
-    range = ranges->at((i >> 1) + 1);
-    if (special_class[i+1] != range.from()) {
-      return false;
-    }
-  }
-  if (range.to() != String::kMaxCodePoint) {
-    return false;
-  }
-  return true;
-}
-
-
-static bool CompareRanges(ZoneList<CharacterRange>* ranges,
-                          const int* special_class,
-                          int length) {
-  length--;  // Remove final marker.
-  DCHECK_EQ(kRangeEndMarker, special_class[length]);
-  if (ranges->length() * 2 != length) {
-    return false;
-  }
-  for (int i = 0; i < length; i += 2) {
-    CharacterRange range = ranges->at(i >> 1);
-    if (range.from() != special_class[i] ||
-        range.to() != special_class[i + 1] - 1) {
-      return false;
-    }
-  }
-  return true;
-}
-
-
-bool RegExpCharacterClass::is_standard(Zone* zone) {
-  // TODO(lrn): Remove need for this function, by not throwing away information
-  // along the way.
-  if (is_negated()) {
-    return false;
-  }
-  if (set_.is_standard()) {
-    return true;
-  }
-  if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
-    set_.set_standard_set_type('s');
-    return true;
-  }
-  if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
-    set_.set_standard_set_type('S');
-    return true;
-  }
-  if (CompareInverseRanges(set_.ranges(zone),
-                           kLineTerminatorRanges,
-                           kLineTerminatorRangeCount)) {
-    set_.set_standard_set_type('.');
-    return true;
-  }
-  if (CompareRanges(set_.ranges(zone),
-                    kLineTerminatorRanges,
-                    kLineTerminatorRangeCount)) {
-    set_.set_standard_set_type('n');
-    return true;
-  }
-  if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
-    set_.set_standard_set_type('w');
-    return true;
-  }
-  if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
-    set_.set_standard_set_type('W');
-    return true;
-  }
-  return false;
-}
-
-
-UnicodeRangeSplitter::UnicodeRangeSplitter(Zone* zone,
-                                           ZoneList<CharacterRange>* base)
-    : zone_(zone),
-      table_(zone),
-      bmp_(nullptr),
-      lead_surrogates_(nullptr),
-      trail_surrogates_(nullptr),
-      non_bmp_(nullptr) {
-  // The unicode range splitter categorizes given character ranges into:
-  // - Code points from the BMP representable by one code unit.
-  // - Code points outside the BMP that need to be split into surrogate pairs.
-  // - Lone lead surrogates.
-  // - Lone trail surrogates.
-  // Lone surrogates are valid code points, even though no actual characters.
-  // They require special matching to make sure we do not split surrogate pairs.
-  // We use the dispatch table to accomplish this. The base range is split up
-  // by the table by the overlay ranges, and the Call callback is used to
-  // filter and collect ranges for each category.
-  for (int i = 0; i < base->length(); i++) {
-    table_.AddRange(base->at(i), kBase, zone_);
-  }
-  // Add overlay ranges.
-  table_.AddRange(CharacterRange::Range(0, kLeadSurrogateStart - 1),
-                  kBmpCodePoints, zone_);
-  table_.AddRange(CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd),
-                  kLeadSurrogates, zone_);
-  table_.AddRange(
-      CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
-      kTrailSurrogates, zone_);
-  table_.AddRange(
-      CharacterRange::Range(kTrailSurrogateEnd + 1, kNonBmpStart - 1),
-      kBmpCodePoints, zone_);
-  table_.AddRange(CharacterRange::Range(kNonBmpStart, kNonBmpEnd),
-                  kNonBmpCodePoints, zone_);
-  table_.ForEach(this);
-}
-
-
-void UnicodeRangeSplitter::Call(uc32 from, DispatchTable::Entry entry) {
-  OutSet* outset = entry.out_set();
-  if (!outset->Get(kBase)) return;
-  ZoneList<CharacterRange>** target = nullptr;
-  if (outset->Get(kBmpCodePoints)) {
-    target = &bmp_;
-  } else if (outset->Get(kLeadSurrogates)) {
-    target = &lead_surrogates_;
-  } else if (outset->Get(kTrailSurrogates)) {
-    target = &trail_surrogates_;
-  } else {
-    DCHECK(outset->Get(kNonBmpCodePoints));
-    target = &non_bmp_;
-  }
-  if (*target == nullptr)
-    *target = new (zone_) ZoneList<CharacterRange>(2, zone_);
-  (*target)->Add(CharacterRange::Range(entry.from(), entry.to()), zone_);
-}
-
-void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,
-                      RegExpNode* on_success, UnicodeRangeSplitter* splitter) {
-  ZoneList<CharacterRange>* bmp = splitter->bmp();
-  if (bmp == nullptr) return;
-  JSRegExp::Flags default_flags = JSRegExp::Flags();
-  result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
-      compiler->zone(), bmp, compiler->read_backward(), on_success,
-      default_flags)));
-}
-
-void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
-                             RegExpNode* on_success,
-                             UnicodeRangeSplitter* splitter) {
-  ZoneList<CharacterRange>* non_bmp = splitter->non_bmp();
-  if (non_bmp == nullptr) return;
-  DCHECK(!compiler->one_byte());
-  Zone* zone = compiler->zone();
-  JSRegExp::Flags default_flags = JSRegExp::Flags();
-  CharacterRange::Canonicalize(non_bmp);
-  for (int i = 0; i < non_bmp->length(); i++) {
-    // Match surrogate pair.
-    // E.g. [\u10005-\u11005] becomes
-    //      \ud800[\udc05-\udfff]|
-    //      [\ud801-\ud803][\udc00-\udfff]|
-    //      \ud804[\udc00-\udc05]
-    uc32 from = non_bmp->at(i).from();
-    uc32 to = non_bmp->at(i).to();
-    uc16 from_l = unibrow::Utf16::LeadSurrogate(from);
-    uc16 from_t = unibrow::Utf16::TrailSurrogate(from);
-    uc16 to_l = unibrow::Utf16::LeadSurrogate(to);
-    uc16 to_t = unibrow::Utf16::TrailSurrogate(to);
-    if (from_l == to_l) {
-      // The lead surrogate is the same.
-      result->AddAlternative(
-          GuardedAlternative(TextNode::CreateForSurrogatePair(
-              zone, CharacterRange::Singleton(from_l),
-              CharacterRange::Range(from_t, to_t), compiler->read_backward(),
-              on_success, default_flags)));
-    } else {
-      if (from_t != kTrailSurrogateStart) {
-        // Add [from_l][from_t-\udfff]
-        result->AddAlternative(
-            GuardedAlternative(TextNode::CreateForSurrogatePair(
-                zone, CharacterRange::Singleton(from_l),
-                CharacterRange::Range(from_t, kTrailSurrogateEnd),
-                compiler->read_backward(), on_success, default_flags)));
-        from_l++;
-      }
-      if (to_t != kTrailSurrogateEnd) {
-        // Add [to_l][\udc00-to_t]
-        result->AddAlternative(
-            GuardedAlternative(TextNode::CreateForSurrogatePair(
-                zone, CharacterRange::Singleton(to_l),
-                CharacterRange::Range(kTrailSurrogateStart, to_t),
-                compiler->read_backward(), on_success, default_flags)));
-        to_l--;
-      }
-      if (from_l <= to_l) {
-        // Add [from_l-to_l][\udc00-\udfff]
-        result->AddAlternative(
-            GuardedAlternative(TextNode::CreateForSurrogatePair(
-                zone, CharacterRange::Range(from_l, to_l),
-                CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
-                compiler->read_backward(), on_success, default_flags)));
-      }
-    }
-  }
-}
-
-RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(
-    RegExpCompiler* compiler, ZoneList<CharacterRange>* lookbehind,
-    ZoneList<CharacterRange>* match, RegExpNode* on_success, bool read_backward,
-    JSRegExp::Flags flags) {
-  Zone* zone = compiler->zone();
-  RegExpNode* match_node = TextNode::CreateForCharacterRanges(
-      zone, match, read_backward, on_success, flags);
-  int stack_register = compiler->UnicodeLookaroundStackRegister();
-  int position_register = compiler->UnicodeLookaroundPositionRegister();
-  RegExpLookaround::Builder lookaround(false, match_node, stack_register,
-                                       position_register);
-  RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
-      zone, lookbehind, !read_backward, lookaround.on_match_success(), flags);
-  return lookaround.ForMatch(negative_match);
-}
-
-RegExpNode* MatchAndNegativeLookaroundInReadDirection(
-    RegExpCompiler* compiler, ZoneList<CharacterRange>* match,
-    ZoneList<CharacterRange>* lookahead, RegExpNode* on_success,
-    bool read_backward, JSRegExp::Flags flags) {
-  Zone* zone = compiler->zone();
-  int stack_register = compiler->UnicodeLookaroundStackRegister();
-  int position_register = compiler->UnicodeLookaroundPositionRegister();
-  RegExpLookaround::Builder lookaround(false, on_success, stack_register,
-                                       position_register);
-  RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
-      zone, lookahead, read_backward, lookaround.on_match_success(), flags);
-  return TextNode::CreateForCharacterRanges(
-      zone, match, read_backward, lookaround.ForMatch(negative_match), flags);
-}
-
-void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
-                           RegExpNode* on_success,
-                           UnicodeRangeSplitter* splitter) {
-  JSRegExp::Flags default_flags = JSRegExp::Flags();
-  ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates();
-  if (lead_surrogates == nullptr) return;
-  Zone* zone = compiler->zone();
-  // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]).
-  ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
-      zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
-
-  RegExpNode* match;
-  if (compiler->read_backward()) {
-    // Reading backward. Assert that reading forward, there is no trail
-    // surrogate, and then backward match the lead surrogate.
-    match = NegativeLookaroundAgainstReadDirectionAndMatch(
-        compiler, trail_surrogates, lead_surrogates, on_success, true,
-        default_flags);
-  } else {
-    // Reading forward. Forward match the lead surrogate and assert that
-    // no trail surrogate follows.
-    match = MatchAndNegativeLookaroundInReadDirection(
-        compiler, lead_surrogates, trail_surrogates, on_success, false,
-        default_flags);
-  }
-  result->AddAlternative(GuardedAlternative(match));
-}
-
-void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
-                            RegExpNode* on_success,
-                            UnicodeRangeSplitter* splitter) {
-  JSRegExp::Flags default_flags = JSRegExp::Flags();
-  ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates();
-  if (trail_surrogates == nullptr) return;
-  Zone* zone = compiler->zone();
-  // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01
-  ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
-      zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
-
-  RegExpNode* match;
-  if (compiler->read_backward()) {
-    // Reading backward. Backward match the trail surrogate and assert that no
-    // lead surrogate precedes it.
-    match = MatchAndNegativeLookaroundInReadDirection(
-        compiler, trail_surrogates, lead_surrogates, on_success, true,
-        default_flags);
-  } else {
-    // Reading forward. Assert that reading backward, there is no lead
-    // surrogate, and then forward match the trail surrogate.
-    match = NegativeLookaroundAgainstReadDirectionAndMatch(
-        compiler, lead_surrogates, trail_surrogates, on_success, false,
-        default_flags);
-  }
-  result->AddAlternative(GuardedAlternative(match));
-}
-
-RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
-                              RegExpNode* on_success) {
-  // This implements ES2015 21.2.5.2.3, AdvanceStringIndex.
-  DCHECK(!compiler->read_backward());
-  Zone* zone = compiler->zone();
-  // Advance any character. If the character happens to be a lead surrogate and
-  // we advanced into the middle of a surrogate pair, it will work out, as
-  // nothing will match from there. We will have to advance again, consuming
-  // the associated trail surrogate.
-  ZoneList<CharacterRange>* range = CharacterRange::List(
-      zone, CharacterRange::Range(0, String::kMaxUtf16CodeUnit));
-  JSRegExp::Flags default_flags = JSRegExp::Flags();
-  return TextNode::CreateForCharacterRanges(zone, range, false, on_success,
-                                            default_flags);
-}
-
-void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) {
-#ifdef V8_INTL_SUPPORT
-  DCHECK(CharacterRange::IsCanonical(ranges));
-
-  // Micro-optimization to avoid passing large ranges to UnicodeSet::closeOver.
-  // See also https://crbug.com/v8/6727.
-  // TODO(jgruber): This only covers the special case of the {0,0x10FFFF} range,
-  // which we use frequently internally. But large ranges can also easily be
-  // created by the user. We might want to have a more general caching mechanism
-  // for such ranges.
-  if (ranges->length() == 1 && ranges->at(0).IsEverything(kNonBmpEnd)) return;
-
-  // Use ICU to compute the case fold closure over the ranges.
-  icu::UnicodeSet set;
-  for (int i = 0; i < ranges->length(); i++) {
-    set.add(ranges->at(i).from(), ranges->at(i).to());
-  }
-  ranges->Clear();
-  set.closeOver(USET_CASE_INSENSITIVE);
-  // Full case mapping map single characters to multiple characters.
-  // Those are represented as strings in the set. Remove them so that
-  // we end up with only simple and common case mappings.
-  set.removeAllStrings();
-  for (int i = 0; i < set.getRangeCount(); i++) {
-    ranges->Add(CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
-                zone);
-  }
-  // No errors and everything we collected have been ranges.
-  CharacterRange::Canonicalize(ranges);
-#endif  // V8_INTL_SUPPORT
-}
-
-
-RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
-                                         RegExpNode* on_success) {
-  set_.Canonicalize();
-  Zone* zone = compiler->zone();
-  ZoneList<CharacterRange>* ranges = this->ranges(zone);
-  if (NeedsUnicodeCaseEquivalents(flags_)) {
-    AddUnicodeCaseEquivalents(ranges, zone);
-  }
-  if (IsUnicode(flags_) && !compiler->one_byte() &&
-      !contains_split_surrogate()) {
-    if (is_negated()) {
-      ZoneList<CharacterRange>* negated =
-          new (zone) ZoneList<CharacterRange>(2, zone);
-      CharacterRange::Negate(ranges, negated, zone);
-      ranges = negated;
-    }
-    if (ranges->length() == 0) {
-      JSRegExp::Flags default_flags;
-      RegExpCharacterClass* fail =
-          new (zone) RegExpCharacterClass(zone, ranges, default_flags);
-      return new (zone) TextNode(fail, compiler->read_backward(), on_success);
-    }
-    if (standard_type() == '*') {
-      return UnanchoredAdvance(compiler, on_success);
-    } else {
-      ChoiceNode* result = new (zone) ChoiceNode(2, zone);
-      UnicodeRangeSplitter splitter(zone, ranges);
-      AddBmpCharacters(compiler, result, on_success, &splitter);
-      AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
-      AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
-      AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
-      return result;
-    }
-  } else {
-    return new (zone) TextNode(this, compiler->read_backward(), on_success);
-  }
-}
-
-
-int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
-  RegExpAtom* atom1 = (*a)->AsAtom();
-  RegExpAtom* atom2 = (*b)->AsAtom();
-  uc16 character1 = atom1->data().at(0);
-  uc16 character2 = atom2->data().at(0);
-  if (character1 < character2) return -1;
-  if (character1 > character2) return 1;
-  return 0;
-}
-
-#ifdef V8_INTL_SUPPORT
-
-// Case Insensitve comparesion
-int CompareFirstCharCaseInsensitve(RegExpTree* const* a, RegExpTree* const* b) {
-  RegExpAtom* atom1 = (*a)->AsAtom();
-  RegExpAtom* atom2 = (*b)->AsAtom();
-  icu::UnicodeString character1(atom1->data().at(0));
-  return character1.caseCompare(atom2->data().at(0), U_FOLD_CASE_DEFAULT);
-}
-
-#else
-
-static unibrow::uchar Canonical(
-    unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
-    unibrow::uchar c) {
-  unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth];
-  int length = canonicalize->get(c, '\0', chars);
-  DCHECK_LE(length, 1);
-  unibrow::uchar canonical = c;
-  if (length == 1) canonical = chars[0];
-  return canonical;
-}
-
-int CompareFirstCharCaseIndependent(
-    unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
-    RegExpTree* const* a, RegExpTree* const* b) {
-  RegExpAtom* atom1 = (*a)->AsAtom();
-  RegExpAtom* atom2 = (*b)->AsAtom();
-  unibrow::uchar character1 = atom1->data().at(0);
-  unibrow::uchar character2 = atom2->data().at(0);
-  if (character1 == character2) return 0;
-  if (character1 >= 'a' || character2 >= 'a') {
-    character1 = Canonical(canonicalize, character1);
-    character2 = Canonical(canonicalize, character2);
-  }
-  return static_cast<int>(character1) - static_cast<int>(character2);
-}
-#endif  // V8_INTL_SUPPORT
-
-// We can stable sort runs of atoms, since the order does not matter if they
-// start with different characters.
-// Returns true if any consecutive atoms were found.
-bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
-  ZoneList<RegExpTree*>* alternatives = this->alternatives();
-  int length = alternatives->length();
-  bool found_consecutive_atoms = false;
-  for (int i = 0; i < length; i++) {
-    while (i < length) {
-      RegExpTree* alternative = alternatives->at(i);
-      if (alternative->IsAtom()) break;
-      i++;
-    }
-    // i is length or it is the index of an atom.
-    if (i == length) break;
-    int first_atom = i;
-    JSRegExp::Flags flags = alternatives->at(i)->AsAtom()->flags();
-    i++;
-    while (i < length) {
-      RegExpTree* alternative = alternatives->at(i);
-      if (!alternative->IsAtom()) break;
-      if (alternative->AsAtom()->flags() != flags) break;
-      i++;
-    }
-    // Sort atoms to get ones with common prefixes together.
-    // This step is more tricky if we are in a case-independent regexp,
-    // because it would change /is|I/ to /I|is/, and order matters when
-    // the regexp parts don't match only disjoint starting points. To fix
-    // this we have a version of CompareFirstChar that uses case-
-    // independent character classes for comparison.
-    DCHECK_LT(first_atom, alternatives->length());
-    DCHECK_LE(i, alternatives->length());
-    DCHECK_LE(first_atom, i);
-    if (IgnoreCase(flags)) {
-#ifdef V8_INTL_SUPPORT
-      alternatives->StableSort(CompareFirstCharCaseInsensitve, first_atom,
-                               i - first_atom);
-#else
-      unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
-          compiler->isolate()->regexp_macro_assembler_canonicalize();
-      auto compare_closure =
-          [canonicalize](RegExpTree* const* a, RegExpTree* const* b) {
-            return CompareFirstCharCaseIndependent(canonicalize, a, b);
-          };
-      alternatives->StableSort(compare_closure, first_atom, i - first_atom);
-#endif  // V8_INTL_SUPPORT
-    } else {
-      alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom);
-    }
-    if (i - first_atom > 1) found_consecutive_atoms = true;
-  }
-  return found_consecutive_atoms;
-}
-
-
-// Optimizes ab|ac|az to a(?:b|c|d).
-void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
-  Zone* zone = compiler->zone();
-  ZoneList<RegExpTree*>* alternatives = this->alternatives();
-  int length = alternatives->length();
-
-  int write_posn = 0;
-  int i = 0;
-  while (i < length) {
-    RegExpTree* alternative = alternatives->at(i);
-    if (!alternative->IsAtom()) {
-      alternatives->at(write_posn++) = alternatives->at(i);
-      i++;
-      continue;
-    }
-    RegExpAtom* const atom = alternative->AsAtom();
-    JSRegExp::Flags flags = atom->flags();
-#ifdef V8_INTL_SUPPORT
-    icu::UnicodeString common_prefix(atom->data().at(0));
-#else
-    unibrow::uchar common_prefix = atom->data().at(0);
-#endif  // V8_INTL_SUPPORT
-    int first_with_prefix = i;
-    int prefix_length = atom->length();
-    i++;
-    while (i < length) {
-      alternative = alternatives->at(i);
-      if (!alternative->IsAtom()) break;
-      RegExpAtom* const atom = alternative->AsAtom();
-      if (atom->flags() != flags) break;
-#ifdef V8_INTL_SUPPORT
-      icu::UnicodeString new_prefix(atom->data().at(0));
-      if (new_prefix != common_prefix) {
-        if (!IgnoreCase(flags)) break;
-        if (common_prefix.caseCompare(new_prefix, U_FOLD_CASE_DEFAULT) != 0)
-          break;
-      }
-#else
-      unibrow::uchar new_prefix = atom->data().at(0);
-      if (new_prefix != common_prefix) {
-        if (!IgnoreCase(flags)) break;
-        unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
-            compiler->isolate()->regexp_macro_assembler_canonicalize();
-        new_prefix = Canonical(canonicalize, new_prefix);
-        common_prefix = Canonical(canonicalize, common_prefix);
-        if (new_prefix != common_prefix) break;
-      }
-#endif  // V8_INTL_SUPPORT
-      prefix_length = Min(prefix_length, atom->length());
-      i++;
-    }
-    if (i > first_with_prefix + 2) {
-      // Found worthwhile run of alternatives with common prefix of at least one
-      // character.  The sorting function above did not sort on more than one
-      // character for reasons of correctness, but there may still be a longer
-      // common prefix if the terms were similar or presorted in the input.
-      // Find out how long the common prefix is.
-      int run_length = i - first_with_prefix;
-      RegExpAtom* const atom = alternatives->at(first_with_prefix)->AsAtom();
-      for (int j = 1; j < run_length && prefix_length > 1; j++) {
-        RegExpAtom* old_atom =
-            alternatives->at(j + first_with_prefix)->AsAtom();
-        for (int k = 1; k < prefix_length; k++) {
-          if (atom->data().at(k) != old_atom->data().at(k)) {
-            prefix_length = k;
-            break;
-          }
-        }
-      }
-      RegExpAtom* prefix = new (zone)
-          RegExpAtom(atom->data().SubVector(0, prefix_length), flags);
-      ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone);
-      pair->Add(prefix, zone);
-      ZoneList<RegExpTree*>* suffixes =
-          new (zone) ZoneList<RegExpTree*>(run_length, zone);
-      for (int j = 0; j < run_length; j++) {
-        RegExpAtom* old_atom =
-            alternatives->at(j + first_with_prefix)->AsAtom();
-        int len = old_atom->length();
-        if (len == prefix_length) {
-          suffixes->Add(new (zone) RegExpEmpty(), zone);
-        } else {
-          RegExpTree* suffix = new (zone) RegExpAtom(
-              old_atom->data().SubVector(prefix_length, old_atom->length()),
-              flags);
-          suffixes->Add(suffix, zone);
-        }
-      }
-      pair->Add(new (zone) RegExpDisjunction(suffixes), zone);
-      alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair);
-    } else {
-      // Just copy any non-worthwhile alternatives.
-      for (int j = first_with_prefix; j < i; j++) {
-        alternatives->at(write_posn++) = alternatives->at(j);
-      }
-    }
-  }
-  alternatives->Rewind(write_posn);  // Trim end of array.
-}
-
-
-// Optimizes b|c|z to [bcz].
-void RegExpDisjunction::FixSingleCharacterDisjunctions(
-    RegExpCompiler* compiler) {
-  Zone* zone = compiler->zone();
-  ZoneList<RegExpTree*>* alternatives = this->alternatives();
-  int length = alternatives->length();
-
-  int write_posn = 0;
-  int i = 0;
-  while (i < length) {
-    RegExpTree* alternative = alternatives->at(i);
-    if (!alternative->IsAtom()) {
-      alternatives->at(write_posn++) = alternatives->at(i);
-      i++;
-      continue;
-    }
-    RegExpAtom* const atom = alternative->AsAtom();
-    if (atom->length() != 1) {
-      alternatives->at(write_posn++) = alternatives->at(i);
-      i++;
-      continue;
-    }
-    JSRegExp::Flags flags = atom->flags();
-    DCHECK_IMPLIES(IsUnicode(flags),
-                   !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
-    bool contains_trail_surrogate =
-        unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
-    int first_in_run = i;
-    i++;
-    // Find a run of single-character atom alternatives that have identical
-    // flags (case independence and unicode-ness).
-    while (i < length) {
-      alternative = alternatives->at(i);
-      if (!alternative->IsAtom()) break;
-      RegExpAtom* const atom = alternative->AsAtom();
-      if (atom->length() != 1) break;
-      if (atom->flags() != flags) break;
-      DCHECK_IMPLIES(IsUnicode(flags),
-                     !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
-      contains_trail_surrogate |=
-          unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
-      i++;
-    }
-    if (i > first_in_run + 1) {
-      // Found non-trivial run of single-character alternatives.
-      int run_length = i - first_in_run;
-      ZoneList<CharacterRange>* ranges =
-          new (zone) ZoneList<CharacterRange>(2, zone);
-      for (int j = 0; j < run_length; j++) {
-        RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom();
-        DCHECK_EQ(old_atom->length(), 1);
-        ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
-      }
-      RegExpCharacterClass::CharacterClassFlags character_class_flags;
-      if (IsUnicode(flags) && contains_trail_surrogate) {
-        character_class_flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
-      }
-      alternatives->at(write_posn++) = new (zone)
-          RegExpCharacterClass(zone, ranges, flags, character_class_flags);
-    } else {
-      // Just copy any trivial alternatives.
-      for (int j = first_in_run; j < i; j++) {
-        alternatives->at(write_posn++) = alternatives->at(j);
-      }
-    }
-  }
-  alternatives->Rewind(write_posn);  // Trim end of array.
-}
-
-
-RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
-                                      RegExpNode* on_success) {
-  ZoneList<RegExpTree*>* alternatives = this->alternatives();
-
-  if (alternatives->length() > 2) {
-    bool found_consecutive_atoms = SortConsecutiveAtoms(compiler);
-    if (found_consecutive_atoms) RationalizeConsecutiveAtoms(compiler);
-    FixSingleCharacterDisjunctions(compiler);
-    if (alternatives->length() == 1) {
-      return alternatives->at(0)->ToNode(compiler, on_success);
-    }
-  }
-
-  int length = alternatives->length();
-
-  ChoiceNode* result =
-      new(compiler->zone()) ChoiceNode(length, compiler->zone());
-  for (int i = 0; i < length; i++) {
-    GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler,
-                                                               on_success));
-    result->AddAlternative(alternative);
-  }
-  return result;
-}
-
-
-RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
-                                     RegExpNode* on_success) {
-  return ToNode(min(),
-                max(),
-                is_greedy(),
-                body(),
-                compiler,
-                on_success);
-}
-
-
-// Scoped object to keep track of how much we unroll quantifier loops in the
-// regexp graph generator.
-class RegExpExpansionLimiter {
- public:
-  static const int kMaxExpansionFactor = 6;
-  RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
-      : compiler_(compiler),
-        saved_expansion_factor_(compiler->current_expansion_factor()),
-        ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
-    DCHECK_LT(0, factor);
-    if (ok_to_expand_) {
-      if (factor > kMaxExpansionFactor) {
-        // Avoid integer overflow of the current expansion factor.
-        ok_to_expand_ = false;
-        compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
-      } else {
-        int new_factor = saved_expansion_factor_ * factor;
-        ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
-        compiler->set_current_expansion_factor(new_factor);
-      }
-    }
-  }
-
-  ~RegExpExpansionLimiter() {
-    compiler_->set_current_expansion_factor(saved_expansion_factor_);
-  }
-
-  bool ok_to_expand() { return ok_to_expand_; }
-
- private:
-  RegExpCompiler* compiler_;
-  int saved_expansion_factor_;
-  bool ok_to_expand_;
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
-};
-
-
-RegExpNode* RegExpQuantifier::ToNode(int min,
-                                     int max,
-                                     bool is_greedy,
-                                     RegExpTree* body,
-                                     RegExpCompiler* compiler,
-                                     RegExpNode* on_success,
-                                     bool not_at_start) {
-  // x{f, t} becomes this:
-  //
-  //             (r++)<-.
-  //               |     `
-  //               |     (x)
-  //               v     ^
-  //      (r=0)-->(?)---/ [if r < t]
-  //               |
-  //   [if r >= f] \----> ...
-  //
-
-  // 15.10.2.5 RepeatMatcher algorithm.
-  // The parser has already eliminated the case where max is 0.  In the case
-  // where max_match is zero the parser has removed the quantifier if min was
-  // > 0 and removed the atom if min was 0.  See AddQuantifierToAtom.
-
-  // If we know that we cannot match zero length then things are a little
-  // simpler since we don't need to make the special zero length match check
-  // from step 2.1.  If the min and max are small we can unroll a little in
-  // this case.
-  static const int kMaxUnrolledMinMatches = 3;  // Unroll (foo)+ and (foo){3,}
-  static const int kMaxUnrolledMaxMatches = 3;  // Unroll (foo)? and (foo){x,3}
-  if (max == 0) return on_success;  // This can happen due to recursion.
-  bool body_can_be_empty = (body->min_match() == 0);
-  int body_start_reg = RegExpCompiler::kNoRegister;
-  Interval capture_registers = body->CaptureRegisters();
-  bool needs_capture_clearing = !capture_registers.is_empty();
-  Zone* zone = compiler->zone();
-
-  if (body_can_be_empty) {
-    body_start_reg = compiler->AllocateRegister();
-  } else if (compiler->optimize() && !needs_capture_clearing) {
-    // Only unroll if there are no captures and the body can't be
-    // empty.
-    {
-      RegExpExpansionLimiter limiter(
-          compiler, min + ((max != min) ? 1 : 0));
-      if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
-        int new_max = (max == kInfinity) ? max : max - min;
-        // Recurse once to get the loop or optional matches after the fixed
-        // ones.
-        RegExpNode* answer = ToNode(
-            0, new_max, is_greedy, body, compiler, on_success, true);
-        // Unroll the forced matches from 0 to min.  This can cause chains of
-        // TextNodes (which the parser does not generate).  These should be
-        // combined if it turns out they hinder good code generation.
-        for (int i = 0; i < min; i++) {
-          answer = body->ToNode(compiler, answer);
-        }
-        return answer;
-      }
-    }
-    if (max <= kMaxUnrolledMaxMatches && min == 0) {
-      DCHECK_LT(0, max);  // Due to the 'if' above.
-      RegExpExpansionLimiter limiter(compiler, max);
-      if (limiter.ok_to_expand()) {
-        // Unroll the optional matches up to max.
-        RegExpNode* answer = on_success;
-        for (int i = 0; i < max; i++) {
-          ChoiceNode* alternation = new(zone) ChoiceNode(2, zone);
-          if (is_greedy) {
-            alternation->AddAlternative(
-                GuardedAlternative(body->ToNode(compiler, answer)));
-            alternation->AddAlternative(GuardedAlternative(on_success));
-          } else {
-            alternation->AddAlternative(GuardedAlternative(on_success));
-            alternation->AddAlternative(
-                GuardedAlternative(body->ToNode(compiler, answer)));
-          }
-          answer = alternation;
-          if (not_at_start && !compiler->read_backward()) {
-            alternation->set_not_at_start();
-          }
-        }
-        return answer;
-      }
-    }
-  }
-  bool has_min = min > 0;
-  bool has_max = max < RegExpTree::kInfinity;
-  bool needs_counter = has_min || has_max;
-  int reg_ctr = needs_counter
-      ? compiler->AllocateRegister()
-      : RegExpCompiler::kNoRegister;
-  LoopChoiceNode* center = new (zone)
-      LoopChoiceNode(body->min_match() == 0, compiler->read_backward(), zone);
-  if (not_at_start && !compiler->read_backward()) center->set_not_at_start();
-  RegExpNode* loop_return = needs_counter
-      ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
-      : static_cast<RegExpNode*>(center);
-  if (body_can_be_empty) {
-    // If the body can be empty we need to check if it was and then
-    // backtrack.
-    loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
-                                              reg_ctr,
-                                              min,
-                                              loop_return);
-  }
-  RegExpNode* body_node = body->ToNode(compiler, loop_return);
-  if (body_can_be_empty) {
-    // If the body can be empty we need to store the start position
-    // so we can bail out if it was empty.
-    body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
-  }
-  if (needs_capture_clearing) {
-    // Before entering the body of this loop we need to clear captures.
-    body_node = ActionNode::ClearCaptures(capture_registers, body_node);
-  }
-  GuardedAlternative body_alt(body_node);
-  if (has_max) {
-    Guard* body_guard =
-        new(zone) Guard(reg_ctr, Guard::LT, max);
-    body_alt.AddGuard(body_guard, zone);
-  }
-  GuardedAlternative rest_alt(on_success);
-  if (has_min) {
-    Guard* rest_guard = new(compiler->zone()) Guard(reg_ctr, Guard::GEQ, min);
-    rest_alt.AddGuard(rest_guard, zone);
-  }
-  if (is_greedy) {
-    center->AddLoopAlternative(body_alt);
-    center->AddContinueAlternative(rest_alt);
-  } else {
-    center->AddContinueAlternative(rest_alt);
-    center->AddLoopAlternative(body_alt);
-  }
-  if (needs_counter) {
-    return ActionNode::SetRegister(reg_ctr, 0, center);
-  } else {
-    return center;
-  }
-}
-
-namespace {
-// Desugar \b to (?<=\w)(?=\W)|(?<=\W)(?=\w) and
-//         \B to (?<=\w)(?=\w)|(?<=\W)(?=\W)
-RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
-                                          RegExpNode* on_success,
-                                          RegExpAssertion::AssertionType type,
-                                          JSRegExp::Flags flags) {
-  DCHECK(NeedsUnicodeCaseEquivalents(flags));
-  Zone* zone = compiler->zone();
-  ZoneList<CharacterRange>* word_range =
-      new (zone) ZoneList<CharacterRange>(2, zone);
-  CharacterRange::AddClassEscape('w', word_range, true, zone);
-  int stack_register = compiler->UnicodeLookaroundStackRegister();
-  int position_register = compiler->UnicodeLookaroundPositionRegister();
-  ChoiceNode* result = new (zone) ChoiceNode(2, zone);
-  // Add two choices. The (non-)boundary could start with a word or
-  // a non-word-character.
-  for (int i = 0; i < 2; i++) {
-    bool lookbehind_for_word = i == 0;
-    bool lookahead_for_word =
-        (type == RegExpAssertion::BOUNDARY) ^ lookbehind_for_word;
-    // Look to the left.
-    RegExpLookaround::Builder lookbehind(lookbehind_for_word, on_success,
-                                         stack_register, position_register);
-    RegExpNode* backward = TextNode::CreateForCharacterRanges(
-        zone, word_range, true, lookbehind.on_match_success(), flags);
-    // Look to the right.
-    RegExpLookaround::Builder lookahead(lookahead_for_word,
-                                        lookbehind.ForMatch(backward),
-                                        stack_register, position_register);
-    RegExpNode* forward = TextNode::CreateForCharacterRanges(
-        zone, word_range, false, lookahead.on_match_success(), flags);
-    result->AddAlternative(GuardedAlternative(lookahead.ForMatch(forward)));
-  }
-  return result;
-}
-}  // anonymous namespace
-
-RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
-                                    RegExpNode* on_success) {
-  NodeInfo info;
-  Zone* zone = compiler->zone();
-
-  switch (assertion_type()) {
-    case START_OF_LINE:
-      return AssertionNode::AfterNewline(on_success);
-    case START_OF_INPUT:
-      return AssertionNode::AtStart(on_success);
-    case BOUNDARY:
-      return NeedsUnicodeCaseEquivalents(flags_)
-                 ? BoundaryAssertionAsLookaround(compiler, on_success, BOUNDARY,
-                                                 flags_)
-                 : AssertionNode::AtBoundary(on_success);
-    case NON_BOUNDARY:
-      return NeedsUnicodeCaseEquivalents(flags_)
-                 ? BoundaryAssertionAsLookaround(compiler, on_success,
-                                                 NON_BOUNDARY, flags_)
-                 : AssertionNode::AtNonBoundary(on_success);
-    case END_OF_INPUT:
-      return AssertionNode::AtEnd(on_success);
-    case END_OF_LINE: {
-      // Compile $ in multiline regexps as an alternation with a positive
-      // lookahead in one side and an end-of-input on the other side.
-      // We need two registers for the lookahead.
-      int stack_pointer_register = compiler->AllocateRegister();
-      int position_register = compiler->AllocateRegister();
-      // The ChoiceNode to distinguish between a newline and end-of-input.
-      ChoiceNode* result = new(zone) ChoiceNode(2, zone);
-      // Create a newline atom.
-      ZoneList<CharacterRange>* newline_ranges =
-          new(zone) ZoneList<CharacterRange>(3, zone);
-      CharacterRange::AddClassEscape('n', newline_ranges, false, zone);
-      JSRegExp::Flags default_flags = JSRegExp::Flags();
-      RegExpCharacterClass* newline_atom =
-          new (zone) RegExpCharacterClass('n', default_flags);
-      TextNode* newline_matcher = new (zone) TextNode(
-          newline_atom, false, ActionNode::PositiveSubmatchSuccess(
-                                   stack_pointer_register, position_register,
-                                   0,   // No captures inside.
-                                   -1,  // Ignored if no captures.
-                                   on_success));
-      // Create an end-of-input matcher.
-      RegExpNode* end_of_line = ActionNode::BeginSubmatch(
-          stack_pointer_register,
-          position_register,
-          newline_matcher);
-      // Add the two alternatives to the ChoiceNode.
-      GuardedAlternative eol_alternative(end_of_line);
-      result->AddAlternative(eol_alternative);
-      GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
-      result->AddAlternative(end_alternative);
-      return result;
-    }
-    default:
-      UNREACHABLE();
-  }
-  return on_success;
-}
-
-
-RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
-                                        RegExpNode* on_success) {
-  return new (compiler->zone())
-      BackReferenceNode(RegExpCapture::StartRegister(index()),
-                        RegExpCapture::EndRegister(index()), flags_,
-                        compiler->read_backward(), on_success);
-}
-
-
-RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
-                                RegExpNode* on_success) {
-  return on_success;
-}
-
-
-RegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success,
-                                   int stack_pointer_register,
-                                   int position_register,
-                                   int capture_register_count,
-                                   int capture_register_start)
-    : is_positive_(is_positive),
-      on_success_(on_success),
-      stack_pointer_register_(stack_pointer_register),
-      position_register_(position_register) {
-  if (is_positive_) {
-    on_match_success_ = ActionNode::PositiveSubmatchSuccess(
-        stack_pointer_register, position_register, capture_register_count,
-        capture_register_start, on_success_);
-  } else {
-    Zone* zone = on_success_->zone();
-    on_match_success_ = new (zone) NegativeSubmatchSuccess(
-        stack_pointer_register, position_register, capture_register_count,
-        capture_register_start, zone);
-  }
-}
-
-
-RegExpNode* RegExpLookaround::Builder::ForMatch(RegExpNode* match) {
-  if (is_positive_) {
-    return ActionNode::BeginSubmatch(stack_pointer_register_,
-                                     position_register_, match);
-  } else {
-    Zone* zone = on_success_->zone();
-    // We use a ChoiceNode to represent the negative lookaround. The first
-    // alternative is the negative match. On success, the end node backtracks.
-    // On failure, the second alternative is tried and leads to success.
-    // NegativeLookaheadChoiceNode is a special ChoiceNode that ignores the
-    // first exit when calculating quick checks.
-    ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode(
-        GuardedAlternative(match), GuardedAlternative(on_success_), zone);
-    return ActionNode::BeginSubmatch(stack_pointer_register_,
-                                     position_register_, choice_node);
-  }
-}
-
-
-RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,
-                                     RegExpNode* on_success) {
-  int stack_pointer_register = compiler->AllocateRegister();
-  int position_register = compiler->AllocateRegister();
-
-  const int registers_per_capture = 2;
-  const int register_of_first_capture = 2;
-  int register_count = capture_count_ * registers_per_capture;
-  int register_start =
-    register_of_first_capture + capture_from_ * registers_per_capture;
-
-  RegExpNode* result;
-  bool was_reading_backward = compiler->read_backward();
-  compiler->set_read_backward(type() == LOOKBEHIND);
-  Builder builder(is_positive(), on_success, stack_pointer_register,
-                  position_register, register_count, register_start);
-  RegExpNode* match = body_->ToNode(compiler, builder.on_match_success());
-  result = builder.ForMatch(match);
-  compiler->set_read_backward(was_reading_backward);
-  return result;
-}
-
-
-RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
-                                  RegExpNode* on_success) {
-  return ToNode(body(), index(), compiler, on_success);
-}
-
-
-RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
-                                  int index,
-                                  RegExpCompiler* compiler,
-                                  RegExpNode* on_success) {
-  DCHECK_NOT_NULL(body);
-  int start_reg = RegExpCapture::StartRegister(index);
-  int end_reg = RegExpCapture::EndRegister(index);
-  if (compiler->read_backward()) std::swap(start_reg, end_reg);
-  RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
-  RegExpNode* body_node = body->ToNode(compiler, store_end);
-  return ActionNode::StorePosition(start_reg, true, body_node);
-}
-
-
-RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
-                                      RegExpNode* on_success) {
-  ZoneList<RegExpTree*>* children = nodes();
-  RegExpNode* current = on_success;
-  if (compiler->read_backward()) {
-    for (int i = 0; i < children->length(); i++) {
-      current = children->at(i)->ToNode(compiler, current);
-    }
-  } else {
-    for (int i = children->length() - 1; i >= 0; i--) {
-      current = children->at(i)->ToNode(compiler, current);
-    }
-  }
-  return current;
-}
-
-
-static void AddClass(const int* elmv,
-                     int elmc,
-                     ZoneList<CharacterRange>* ranges,
-                     Zone* zone) {
-  elmc--;
-  DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
-  for (int i = 0; i < elmc; i += 2) {
-    DCHECK(elmv[i] < elmv[i + 1]);
-    ranges->Add(CharacterRange::Range(elmv[i], elmv[i + 1] - 1), zone);
-  }
-}
-
-
-static void AddClassNegated(const int *elmv,
-                            int elmc,
-                            ZoneList<CharacterRange>* ranges,
-                            Zone* zone) {
-  elmc--;
-  DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
-  DCHECK_NE(0x0000, elmv[0]);
-  DCHECK_NE(String::kMaxCodePoint, elmv[elmc - 1]);
-  uc16 last = 0x0000;
-  for (int i = 0; i < elmc; i += 2) {
-    DCHECK(last <= elmv[i] - 1);
-    DCHECK(elmv[i] < elmv[i + 1]);
-    ranges->Add(CharacterRange::Range(last, elmv[i] - 1), zone);
-    last = elmv[i + 1];
-  }
-  ranges->Add(CharacterRange::Range(last, String::kMaxCodePoint), zone);
-}
-
-void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
-                                    bool add_unicode_case_equivalents,
-                                    Zone* zone) {
-  if (add_unicode_case_equivalents && (type == 'w' || type == 'W')) {
-    // See #sec-runtime-semantics-wordcharacters-abstract-operation
-    // In case of unicode and ignore_case, we need to create the closure over
-    // case equivalent characters before negating.
-    ZoneList<CharacterRange>* new_ranges =
-        new (zone) ZoneList<CharacterRange>(2, zone);
-    AddClass(kWordRanges, kWordRangeCount, new_ranges, zone);
-    AddUnicodeCaseEquivalents(new_ranges, zone);
-    if (type == 'W') {
-      ZoneList<CharacterRange>* negated =
-          new (zone) ZoneList<CharacterRange>(2, zone);
-      CharacterRange::Negate(new_ranges, negated, zone);
-      new_ranges = negated;
-    }
-    ranges->AddAll(*new_ranges, zone);
-    return;
-  }
-  AddClassEscape(type, ranges, zone);
-}
-
-void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
-                                    Zone* zone) {
-  switch (type) {
-    case 's':
-      AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
-      break;
-    case 'S':
-      AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone);
-      break;
-    case 'w':
-      AddClass(kWordRanges, kWordRangeCount, ranges, zone);
-      break;
-    case 'W':
-      AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone);
-      break;
-    case 'd':
-      AddClass(kDigitRanges, kDigitRangeCount, ranges, zone);
-      break;
-    case 'D':
-      AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone);
-      break;
-    case '.':
-      AddClassNegated(kLineTerminatorRanges,
-                      kLineTerminatorRangeCount,
-                      ranges,
-                      zone);
-      break;
-    // This is not a character range as defined by the spec but a
-    // convenient shorthand for a character class that matches any
-    // character.
-    case '*':
-      ranges->Add(CharacterRange::Everything(), zone);
-      break;
-    // This is the set of characters matched by the $ and ^ symbols
-    // in multiline mode.
-    case 'n':
-      AddClass(kLineTerminatorRanges,
-               kLineTerminatorRangeCount,
-               ranges,
-               zone);
-      break;
-    default:
-      UNREACHABLE();
-  }
-}
-
-
-Vector<const int> CharacterRange::GetWordBounds() {
-  return Vector<const int>(kWordRanges, kWordRangeCount - 1);
-}
-
-// static
-void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
-                                        ZoneList<CharacterRange>* ranges,
-                                        bool is_one_byte) {
-  CharacterRange::Canonicalize(ranges);
-  int range_count = ranges->length();
-#ifdef V8_INTL_SUPPORT
-  icu::UnicodeSet already_added;
-  icu::UnicodeSet others;
-  for (int i = 0; i < range_count; i++) {
-    CharacterRange range = ranges->at(i);
-    uc32 bottom = range.from();
-    if (bottom > String::kMaxUtf16CodeUnit) continue;
-    uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit);
-    // Nothing to be done for surrogates.
-    if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) continue;
-    if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
-      if (bottom > String::kMaxOneByteCharCode) continue;
-      if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
-    }
-    already_added.add(bottom, top);
-    icu::Locale locale = icu::Locale::getRoot();
-    while (bottom <= top) {
-      icu::UnicodeString upper(bottom);
-      upper.toUpper(locale);
-      icu::UnicodeSet expanded(bottom, bottom);
-      expanded.closeOver(USET_CASE_INSENSITIVE);
-      for (int32_t i = 0; i < expanded.getRangeCount(); i++) {
-        UChar32 start = expanded.getRangeStart(i);
-        UChar32 end = expanded.getRangeEnd(i);
-        while (start <= end) {
-          icu::UnicodeString upper2(start);
-          upper2.toUpper(locale);
-          // Only add if the upper case are the same.
-          if (upper[0] == upper2[0]) {
-            // #sec-runtime-semantics-canonicalize-ch
-            // 3.g. If the numeric value of ch ≥ 128 and the numeric value of
-            // cu < 128, return ch.
-            if (bottom >= 128 && start < 128) {
-              others.add(bottom);
-            } else {
-              // 3.h. 3.h. 3.h. Return cu.
-              others.add(start);
-            }
-          }
-          start++;
-        }
-      }
-      bottom++;
-    }
-  }
-  others.removeAll(already_added);
-  for (int32_t i = 0; i < others.getRangeCount(); i++) {
-    UChar32 start = others.getRangeStart(i);
-    UChar32 end = others.getRangeEnd(i);
-    if (start == end) {
-      ranges->Add(CharacterRange::Singleton(start), zone);
-    } else {
-      ranges->Add(CharacterRange::Range(start, end), zone);
-    }
-  }
-#else
-  for (int i = 0; i < range_count; i++) {
-    CharacterRange range = ranges->at(i);
-    uc32 bottom = range.from();
-    if (bottom > String::kMaxUtf16CodeUnit) continue;
-    uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit);
-    // Nothing to be done for surrogates.
-    if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) continue;
-    if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
-      if (bottom > String::kMaxOneByteCharCode) continue;
-      if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
-    }
-    unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
-    if (top == bottom) {
-      // If this is a singleton we just expand the one character.
-      int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
-      for (int i = 0; i < length; i++) {
-        uc32 chr = chars[i];
-        if (chr != bottom) {
-          ranges->Add(CharacterRange::Singleton(chars[i]), zone);
-        }
-      }
-    } else {
-      // If this is a range we expand the characters block by block, expanding
-      // contiguous subranges (blocks) one at a time.  The approach is as
-      // follows.  For a given start character we look up the remainder of the
-      // block that contains it (represented by the end point), for instance we
-      // find 'z' if the character is 'c'.  A block is characterized by the
-      // property that all characters uncanonicalize in the same way, except
-      // that each entry in the result is incremented by the distance from the
-      // first element.  So a-z is a block because 'a' uncanonicalizes to ['a',
-      // 'A'] and the k'th letter uncanonicalizes to ['a' + k, 'A' + k].  Once
-      // we've found the end point we look up its uncanonicalization and
-      // produce a range for each element.  For instance for [c-f] we look up
-      // ['z', 'Z'] and produce [c-f] and [C-F].  We then only add a range if
-      // it is not already contained in the input, so [c-f] will be skipped but
-      // [C-F] will be added.  If this range is not completely contained in a
-      // block we do this for all the blocks covered by the range (handling
-      // characters that is not in a block as a "singleton block").
-      unibrow::uchar equivalents[unibrow::Ecma262UnCanonicalize::kMaxWidth];
-      int pos = bottom;
-      while (pos <= top) {
-        int length =
-            isolate->jsregexp_canonrange()->get(pos, '\0', equivalents);
-        uc32 block_end;
-        if (length == 0) {
-          block_end = pos;
-        } else {
-          DCHECK_EQ(1, length);
-          block_end = equivalents[0];
-        }
-        int end = (block_end > top) ? top : block_end;
-        length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0',
-                                                         equivalents);
-        for (int i = 0; i < length; i++) {
-          uc32 c = equivalents[i];
-          uc32 range_from = c - (block_end - pos);
-          uc32 range_to = c - (block_end - end);
-          if (!(bottom <= range_from && range_to <= top)) {
-            ranges->Add(CharacterRange::Range(range_from, range_to), zone);
-          }
-        }
-        pos = end + 1;
-      }
-    }
-  }
-#endif  // V8_INTL_SUPPORT
-}
-
-bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
-  DCHECK_NOT_NULL(ranges);
-  int n = ranges->length();
-  if (n <= 1) return true;
-  int max = ranges->at(0).to();
-  for (int i = 1; i < n; i++) {
-    CharacterRange next_range = ranges->at(i);
-    if (next_range.from() <= max + 1) return false;
-    max = next_range.to();
-  }
-  return true;
-}
-
-
-ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) {
-  if (ranges_ == nullptr) {
-    ranges_ = new(zone) ZoneList<CharacterRange>(2, zone);
-    CharacterRange::AddClassEscape(standard_set_type_, ranges_, false, zone);
-  }
-  return ranges_;
-}
-
-
-// Move a number of elements in a zonelist to another position
-// in the same list. Handles overlapping source and target areas.
-static void MoveRanges(ZoneList<CharacterRange>* list,
-                       int from,
-                       int to,
-                       int count) {
-  // Ranges are potentially overlapping.
-  if (from < to) {
-    for (int i = count - 1; i >= 0; i--) {
-      list->at(to + i) = list->at(from + i);
-    }
-  } else {
-    for (int i = 0; i < count; i++) {
-      list->at(to + i) = list->at(from + i);
-    }
-  }
-}
-
-
-static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,
-                                      int count,
-                                      CharacterRange insert) {
-  // Inserts a range into list[0..count[, which must be sorted
-  // by from value and non-overlapping and non-adjacent, using at most
-  // list[0..count] for the result. Returns the number of resulting
-  // canonicalized ranges. Inserting a range may collapse existing ranges into
-  // fewer ranges, so the return value can be anything in the range 1..count+1.
-  uc32 from = insert.from();
-  uc32 to = insert.to();
-  int start_pos = 0;
-  int end_pos = count;
-  for (int i = count - 1; i >= 0; i--) {
-    CharacterRange current = list->at(i);
-    if (current.from() > to + 1) {
-      end_pos = i;
-    } else if (current.to() + 1 < from) {
-      start_pos = i + 1;
-      break;
-    }
-  }
-
-  // Inserted range overlaps, or is adjacent to, ranges at positions
-  // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
-  // not affected by the insertion.
-  // If start_pos == end_pos, the range must be inserted before start_pos.
-  // if start_pos < end_pos, the entire range from start_pos to end_pos
-  // must be merged with the insert range.
-
-  if (start_pos == end_pos) {
-    // Insert between existing ranges at position start_pos.
-    if (start_pos < count) {
-      MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
-    }
-    list->at(start_pos) = insert;
-    return count + 1;
-  }
-  if (start_pos + 1 == end_pos) {
-    // Replace single existing range at position start_pos.
-    CharacterRange to_replace = list->at(start_pos);
-    int new_from = Min(to_replace.from(), from);
-    int new_to = Max(to_replace.to(), to);
-    list->at(start_pos) = CharacterRange::Range(new_from, new_to);
-    return count;
-  }
-  // Replace a number of existing ranges from start_pos to end_pos - 1.
-  // Move the remaining ranges down.
-
-  int new_from = Min(list->at(start_pos).from(), from);
-  int new_to = Max(list->at(end_pos - 1).to(), to);
-  if (end_pos < count) {
-    MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
-  }
-  list->at(start_pos) = CharacterRange::Range(new_from, new_to);
-  return count - (end_pos - start_pos) + 1;
-}
-
-
-void CharacterSet::Canonicalize() {
-  // Special/default classes are always considered canonical. The result
-  // of calling ranges() will be sorted.
-  if (ranges_ == nullptr) return;
-  CharacterRange::Canonicalize(ranges_);
-}
-
-
-void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {
-  if (character_ranges->length() <= 1) return;
-  // Check whether ranges are already canonical (increasing, non-overlapping,
-  // non-adjacent).
-  int n = character_ranges->length();
-  int max = character_ranges->at(0).to();
-  int i = 1;
-  while (i < n) {
-    CharacterRange current = character_ranges->at(i);
-    if (current.from() <= max + 1) {
-      break;
-    }
-    max = current.to();
-    i++;
-  }
-  // Canonical until the i'th range. If that's all of them, we are done.
-  if (i == n) return;
-
-  // The ranges at index i and forward are not canonicalized. Make them so by
-  // doing the equivalent of insertion sort (inserting each into the previous
-  // list, in order).
-  // Notice that inserting a range can reduce the number of ranges in the
-  // result due to combining of adjacent and overlapping ranges.
-  int read = i;  // Range to insert.
-  int num_canonical = i;  // Length of canonicalized part of list.
-  do {
-    num_canonical = InsertRangeInCanonicalList(character_ranges,
-                                               num_canonical,
-                                               character_ranges->at(read));
-    read++;
-  } while (read < n);
-  character_ranges->Rewind(num_canonical);
-
-  DCHECK(CharacterRange::IsCanonical(character_ranges));
-}
-
-
-void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
-                            ZoneList<CharacterRange>* negated_ranges,
-                            Zone* zone) {
-  DCHECK(CharacterRange::IsCanonical(ranges));
-  DCHECK_EQ(0, negated_ranges->length());
-  int range_count = ranges->length();
-  uc32 from = 0;
-  int i = 0;
-  if (range_count > 0 && ranges->at(0).from() == 0) {
-    from = ranges->at(0).to() + 1;
-    i = 1;
-  }
-  while (i < range_count) {
-    CharacterRange range = ranges->at(i);
-    negated_ranges->Add(CharacterRange::Range(from, range.from() - 1), zone);
-    from = range.to() + 1;
-    i++;
-  }
-  if (from < String::kMaxCodePoint) {
-    negated_ranges->Add(CharacterRange::Range(from, String::kMaxCodePoint),
-                        zone);
-  }
-}
-
-
 // -------------------------------------------------------------------
 // Splay tree

@@ -6953,5 +5258,12 @@ void RegExpResultsCache::Clear(FixedArray cache) {
  }
 }

+// We need to check for the following characters: 0x39C 0x3BC 0x178.
+bool RangeContainsLatin1Equivalents(CharacterRange range) {
+  // TODO(dcarney): this could be a lot more efficient.
+  return range.Contains(0x039C) || range.Contains(0x03BC) ||
+         range.Contains(0x0178);
+}
+
 }  // namespace internal
 }  // namespace v8
--- a/src/regexp/jsregexp.h
+++ b/src/regexp/jsregexp.h
@@ -290,35 +290,6 @@ class DispatchTable : public ZoneObject {
  ZoneSplayTree<Config> tree_;
 };

-
-// Categorizes character ranges into BMP, non-BMP, lead, and trail surrogates.
-class UnicodeRangeSplitter {
- public:
-  V8_EXPORT_PRIVATE UnicodeRangeSplitter(Zone* zone,
-                                         ZoneList<CharacterRange>* base);
-  void Call(uc32 from, DispatchTable::Entry entry);
-
-  ZoneList<CharacterRange>* bmp() { return bmp_; }
-  ZoneList<CharacterRange>* lead_surrogates() { return lead_surrogates_; }
-  ZoneList<CharacterRange>* trail_surrogates() { return trail_surrogates_; }
-  ZoneList<CharacterRange>* non_bmp() const { return non_bmp_; }
-
- private:
-  static const int kBase = 0;
-  // Separate ranges into
-  static const int kBmpCodePoints = 1;
-  static const int kLeadSurrogates = 2;
-  static const int kTrailSurrogates = 3;
-  static const int kNonBmpCodePoints = 4;
-
-  Zone* zone_;
-  DispatchTable table_;
-  ZoneList<CharacterRange>* bmp_;
-  ZoneList<CharacterRange>* lead_surrogates_;
-  ZoneList<CharacterRange>* trail_surrogates_;
-  ZoneList<CharacterRange>* non_bmp_;
-};
-
 #define FOR_EACH_NODE_TYPE(VISIT)                                    \
  VISIT(End)                                                         \
  VISIT(Action)                                                      \

--- a/src/regexp/regexp-compiler-tonode.cc
+++ b/src/regexp/regexp-compiler-tonode.cc
+// Copyright 2019 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/regexp/regexp-compiler.h"
+
+#include "src/regexp/jsregexp.h"
+#include "src/utils/splay-tree-inl.h"
+#include "src/zone/zone-list-inl.h"
+
+#ifdef V8_INTL_SUPPORT
+#include "unicode/locid.h"
+#include "unicode/uniset.h"
+#include "unicode/utypes.h"
+#endif  // V8_INTL_SUPPORT
+
+namespace v8 {
+namespace internal {
+
+using namespace regexp_compiler_constants;  // NOLINT(build/namespaces)
+
+// Explicit template instantiations.
+template class ZoneSplayTree<DispatchTable::Config>;
+template void DispatchTable::ForEach<UnicodeRangeSplitter>(
+    UnicodeRangeSplitter*);
+
+// -------------------------------------------------------------------
+// Tree to graph conversion
+
+RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
+                               RegExpNode* on_success) {
+  ZoneList<TextElement>* elms =
+      new (compiler->zone()) ZoneList<TextElement>(1, compiler->zone());
+  elms->Add(TextElement::Atom(this), compiler->zone());
+  return new (compiler->zone())
+      TextNode(elms, compiler->read_backward(), on_success);
+}
+
+RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
+                               RegExpNode* on_success) {
+  return new (compiler->zone())
+      TextNode(elements(), compiler->read_backward(), on_success);
+}
+
+static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
+                                 const int* special_class, int length) {
+  length--;  // Remove final marker.
+  DCHECK_EQ(kRangeEndMarker, special_class[length]);
+  DCHECK_NE(0, ranges->length());
+  DCHECK_NE(0, length);
+  DCHECK_NE(0, special_class[0]);
+  if (ranges->length() != (length >> 1) + 1) {
+    return false;
+  }
+  CharacterRange range = ranges->at(0);
+  if (range.from() != 0) {
+    return false;
+  }
+  for (int i = 0; i < length; i += 2) {
+    if (special_class[i] != (range.to() + 1)) {
+      return false;
+    }
+    range = ranges->at((i >> 1) + 1);
+    if (special_class[i + 1] != range.from()) {
+      return false;
+    }
+  }
+  if (range.to() != String::kMaxCodePoint) {
+    return false;
+  }
+  return true;
+}
+
+static bool CompareRanges(ZoneList<CharacterRange>* ranges,
+                          const int* special_class, int length) {
+  length--;  // Remove final marker.
+  DCHECK_EQ(kRangeEndMarker, special_class[length]);
+  if (ranges->length() * 2 != length) {
+    return false;
+  }
+  for (int i = 0; i < length; i += 2) {
+    CharacterRange range = ranges->at(i >> 1);
+    if (range.from() != special_class[i] ||
+        range.to() != special_class[i + 1] - 1) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool RegExpCharacterClass::is_standard(Zone* zone) {
+  // TODO(lrn): Remove need for this function, by not throwing away information
+  // along the way.
+  if (is_negated()) {
+    return false;
+  }
+  if (set_.is_standard()) {
+    return true;
+  }
+  if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
+    set_.set_standard_set_type('s');
+    return true;
+  }
+  if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
+    set_.set_standard_set_type('S');
+    return true;
+  }
+  if (CompareInverseRanges(set_.ranges(zone), kLineTerminatorRanges,
+                           kLineTerminatorRangeCount)) {
+    set_.set_standard_set_type('.');
+    return true;
+  }
+  if (CompareRanges(set_.ranges(zone), kLineTerminatorRanges,
+                    kLineTerminatorRangeCount)) {
+    set_.set_standard_set_type('n');
+    return true;
+  }
+  if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
+    set_.set_standard_set_type('w');
+    return true;
+  }
+  if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
+    set_.set_standard_set_type('W');
+    return true;
+  }
+  return false;
+}
+
+UnicodeRangeSplitter::UnicodeRangeSplitter(Zone* zone,
+                                           ZoneList<CharacterRange>* base)
+    : zone_(zone),
+      table_(zone),
+      bmp_(nullptr),
+      lead_surrogates_(nullptr),
+      trail_surrogates_(nullptr),
+      non_bmp_(nullptr) {
+  // The unicode range splitter categorizes given character ranges into:
+  // - Code points from the BMP representable by one code unit.
+  // - Code points outside the BMP that need to be split into surrogate pairs.
+  // - Lone lead surrogates.
+  // - Lone trail surrogates.
+  // Lone surrogates are valid code points, even though no actual characters.
+  // They require special matching to make sure we do not split surrogate pairs.
+  // We use the dispatch table to accomplish this. The base range is split up
+  // by the table by the overlay ranges, and the Call callback is used to
+  // filter and collect ranges for each category.
+  for (int i = 0; i < base->length(); i++) {
+    table_.AddRange(base->at(i), kBase, zone_);
+  }
+  // Add overlay ranges.
+  table_.AddRange(CharacterRange::Range(0, kLeadSurrogateStart - 1),
+                  kBmpCodePoints, zone_);
+  table_.AddRange(CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd),
+                  kLeadSurrogates, zone_);
+  table_.AddRange(
+      CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
+      kTrailSurrogates, zone_);
+  table_.AddRange(
+      CharacterRange::Range(kTrailSurrogateEnd + 1, kNonBmpStart - 1),
+      kBmpCodePoints, zone_);
+  table_.AddRange(CharacterRange::Range(kNonBmpStart, kNonBmpEnd),
+                  kNonBmpCodePoints, zone_);
+  table_.ForEach(this);
+}
+
+void UnicodeRangeSplitter::Call(uc32 from, DispatchTable::Entry entry) {
+  OutSet* outset = entry.out_set();
+  if (!outset->Get(kBase)) return;
+  ZoneList<CharacterRange>** target = nullptr;
+  if (outset->Get(kBmpCodePoints)) {
+    target = &bmp_;
+  } else if (outset->Get(kLeadSurrogates)) {
+    target = &lead_surrogates_;
+  } else if (outset->Get(kTrailSurrogates)) {
+    target = &trail_surrogates_;
+  } else {
+    DCHECK(outset->Get(kNonBmpCodePoints));
+    target = &non_bmp_;
+  }
+  if (*target == nullptr)
+    *target = new (zone_) ZoneList<CharacterRange>(2, zone_);
+  (*target)->Add(CharacterRange::Range(entry.from(), entry.to()), zone_);
+}
+
+void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,
+                      RegExpNode* on_success, UnicodeRangeSplitter* splitter) {
+  ZoneList<CharacterRange>* bmp = splitter->bmp();
+  if (bmp == nullptr) return;
+  JSRegExp::Flags default_flags = JSRegExp::Flags();
+  result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
+      compiler->zone(), bmp, compiler->read_backward(), on_success,
+      default_flags)));
+}
+
+void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
+                             RegExpNode* on_success,
+                             UnicodeRangeSplitter* splitter) {
+  ZoneList<CharacterRange>* non_bmp = splitter->non_bmp();
+  if (non_bmp == nullptr) return;
+  DCHECK(!compiler->one_byte());
+  Zone* zone = compiler->zone();
+  JSRegExp::Flags default_flags = JSRegExp::Flags();
+  CharacterRange::Canonicalize(non_bmp);
+  for (int i = 0; i < non_bmp->length(); i++) {
+    // Match surrogate pair.
+    // E.g. [\u10005-\u11005] becomes
+    //      \ud800[\udc05-\udfff]|
+    //      [\ud801-\ud803][\udc00-\udfff]|
+    //      \ud804[\udc00-\udc05]
+    uc32 from = non_bmp->at(i).from();
+    uc32 to = non_bmp->at(i).to();
+    uc16 from_l = unibrow::Utf16::LeadSurrogate(from);
+    uc16 from_t = unibrow::Utf16::TrailSurrogate(from);
+    uc16 to_l = unibrow::Utf16::LeadSurrogate(to);
+    uc16 to_t = unibrow::Utf16::TrailSurrogate(to);
+    if (from_l == to_l) {
+      // The lead surrogate is the same.
+      result->AddAlternative(
+          GuardedAlternative(TextNode::CreateForSurrogatePair(
+              zone, CharacterRange::Singleton(from_l),
+              CharacterRange::Range(from_t, to_t), compiler->read_backward(),
+              on_success, default_flags)));
+    } else {
+      if (from_t != kTrailSurrogateStart) {
+        // Add [from_l][from_t-\udfff]
+        result->AddAlternative(
+            GuardedAlternative(TextNode::CreateForSurrogatePair(
+                zone, CharacterRange::Singleton(from_l),
+                CharacterRange::Range(from_t, kTrailSurrogateEnd),
+                compiler->read_backward(), on_success, default_flags)));
+        from_l++;
+      }
+      if (to_t != kTrailSurrogateEnd) {
+        // Add [to_l][\udc00-to_t]
+        result->AddAlternative(
+            GuardedAlternative(TextNode::CreateForSurrogatePair(
+                zone, CharacterRange::Singleton(to_l),
+                CharacterRange::Range(kTrailSurrogateStart, to_t),
+                compiler->read_backward(), on_success, default_flags)));
+        to_l--;
+      }
+      if (from_l <= to_l) {
+        // Add [from_l-to_l][\udc00-\udfff]
+        result->AddAlternative(
+            GuardedAlternative(TextNode::CreateForSurrogatePair(
+                zone, CharacterRange::Range(from_l, to_l),
+                CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
+                compiler->read_backward(), on_success, default_flags)));
+      }
+    }
+  }
+}
+
+RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(
+    RegExpCompiler* compiler, ZoneList<CharacterRange>* lookbehind,
+    ZoneList<CharacterRange>* match, RegExpNode* on_success, bool read_backward,
+    JSRegExp::Flags flags) {
+  Zone* zone = compiler->zone();
+  RegExpNode* match_node = TextNode::CreateForCharacterRanges(
+      zone, match, read_backward, on_success, flags);
+  int stack_register = compiler->UnicodeLookaroundStackRegister();
+  int position_register = compiler->UnicodeLookaroundPositionRegister();
+  RegExpLookaround::Builder lookaround(false, match_node, stack_register,
+                                       position_register);
+  RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
+      zone, lookbehind, !read_backward, lookaround.on_match_success(), flags);
+  return lookaround.ForMatch(negative_match);
+}
+
+RegExpNode* MatchAndNegativeLookaroundInReadDirection(
+    RegExpCompiler* compiler, ZoneList<CharacterRange>* match,
+    ZoneList<CharacterRange>* lookahead, RegExpNode* on_success,
+    bool read_backward, JSRegExp::Flags flags) {
+  Zone* zone = compiler->zone();
+  int stack_register = compiler->UnicodeLookaroundStackRegister();
+  int position_register = compiler->UnicodeLookaroundPositionRegister();
+  RegExpLookaround::Builder lookaround(false, on_success, stack_register,
+                                       position_register);
+  RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
+      zone, lookahead, read_backward, lookaround.on_match_success(), flags);
+  return TextNode::CreateForCharacterRanges(
+      zone, match, read_backward, lookaround.ForMatch(negative_match), flags);
+}
+
+void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
+                           RegExpNode* on_success,
+                           UnicodeRangeSplitter* splitter) {
+  JSRegExp::Flags default_flags = JSRegExp::Flags();
+  ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates();
+  if (lead_surrogates == nullptr) return;
+  Zone* zone = compiler->zone();
+  // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]).
+  ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
+      zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
+
+  RegExpNode* match;
+  if (compiler->read_backward()) {
+    // Reading backward. Assert that reading forward, there is no trail
+    // surrogate, and then backward match the lead surrogate.
+    match = NegativeLookaroundAgainstReadDirectionAndMatch(
+        compiler, trail_surrogates, lead_surrogates, on_success, true,
+        default_flags);
+  } else {
+    // Reading forward. Forward match the lead surrogate and assert that
+    // no trail surrogate follows.
+    match = MatchAndNegativeLookaroundInReadDirection(
+        compiler, lead_surrogates, trail_surrogates, on_success, false,
+        default_flags);
+  }
+  result->AddAlternative(GuardedAlternative(match));
+}
+
+void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
+                            RegExpNode* on_success,
+                            UnicodeRangeSplitter* splitter) {
+  JSRegExp::Flags default_flags = JSRegExp::Flags();
+  ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates();
+  if (trail_surrogates == nullptr) return;
+  Zone* zone = compiler->zone();
+  // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01
+  ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
+      zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
+
+  RegExpNode* match;
+  if (compiler->read_backward()) {
+    // Reading backward. Backward match the trail surrogate and assert that no
+    // lead surrogate precedes it.
+    match = MatchAndNegativeLookaroundInReadDirection(
+        compiler, trail_surrogates, lead_surrogates, on_success, true,
+        default_flags);
+  } else {
+    // Reading forward. Assert that reading backward, there is no lead
+    // surrogate, and then forward match the trail surrogate.
+    match = NegativeLookaroundAgainstReadDirectionAndMatch(
+        compiler, lead_surrogates, trail_surrogates, on_success, false,
+        default_flags);
+  }
+  result->AddAlternative(GuardedAlternative(match));
+}
+
+RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
+                              RegExpNode* on_success) {
+  // This implements ES2015 21.2.5.2.3, AdvanceStringIndex.
+  DCHECK(!compiler->read_backward());
+  Zone* zone = compiler->zone();
+  // Advance any character. If the character happens to be a lead surrogate and
+  // we advanced into the middle of a surrogate pair, it will work out, as
+  // nothing will match from there. We will have to advance again, consuming
+  // the associated trail surrogate.
+  ZoneList<CharacterRange>* range = CharacterRange::List(
+      zone, CharacterRange::Range(0, String::kMaxUtf16CodeUnit));
+  JSRegExp::Flags default_flags = JSRegExp::Flags();
+  return TextNode::CreateForCharacterRanges(zone, range, false, on_success,
+                                            default_flags);
+}
+
+void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) {
+#ifdef V8_INTL_SUPPORT
+  DCHECK(CharacterRange::IsCanonical(ranges));
+
+  // Micro-optimization to avoid passing large ranges to UnicodeSet::closeOver.
+  // See also https://crbug.com/v8/6727.
+  // TODO(jgruber): This only covers the special case of the {0,0x10FFFF} range,
+  // which we use frequently internally. But large ranges can also easily be
+  // created by the user. We might want to have a more general caching mechanism
+  // for such ranges.
+  if (ranges->length() == 1 && ranges->at(0).IsEverything(kNonBmpEnd)) return;
+
+  // Use ICU to compute the case fold closure over the ranges.
+  icu::UnicodeSet set;
+  for (int i = 0; i < ranges->length(); i++) {
+    set.add(ranges->at(i).from(), ranges->at(i).to());
+  }
+  ranges->Clear();
+  set.closeOver(USET_CASE_INSENSITIVE);
+  // Full case mapping map single characters to multiple characters.
+  // Those are represented as strings in the set. Remove them so that
+  // we end up with only simple and common case mappings.
+  set.removeAllStrings();
+  for (int i = 0; i < set.getRangeCount(); i++) {
+    ranges->Add(CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
+                zone);
+  }
+  // No errors and everything we collected have been ranges.
+  CharacterRange::Canonicalize(ranges);
+#endif  // V8_INTL_SUPPORT
+}
+
+RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
+                                         RegExpNode* on_success) {
+  set_.Canonicalize();
+  Zone* zone = compiler->zone();
+  ZoneList<CharacterRange>* ranges = this->ranges(zone);
+  if (NeedsUnicodeCaseEquivalents(flags_)) {
+    AddUnicodeCaseEquivalents(ranges, zone);
+  }
+  if (IsUnicode(flags_) && !compiler->one_byte() &&
+      !contains_split_surrogate()) {
+    if (is_negated()) {
+      ZoneList<CharacterRange>* negated =
+          new (zone) ZoneList<CharacterRange>(2, zone);
+      CharacterRange::Negate(ranges, negated, zone);
+      ranges = negated;
+    }
+    if (ranges->length() == 0) {
+      JSRegExp::Flags default_flags;
+      RegExpCharacterClass* fail =
+          new (zone) RegExpCharacterClass(zone, ranges, default_flags);
+      return new (zone) TextNode(fail, compiler->read_backward(), on_success);
+    }
+    if (standard_type() == '*') {
+      return UnanchoredAdvance(compiler, on_success);
+    } else {
+      ChoiceNode* result = new (zone) ChoiceNode(2, zone);
+      UnicodeRangeSplitter splitter(zone, ranges);
+      AddBmpCharacters(compiler, result, on_success, &splitter);
+      AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
+      AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
+      AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
+      return result;
+    }
+  } else {
+    return new (zone) TextNode(this, compiler->read_backward(), on_success);
+  }
+}
+
+int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
+  RegExpAtom* atom1 = (*a)->AsAtom();
+  RegExpAtom* atom2 = (*b)->AsAtom();
+  uc16 character1 = atom1->data().at(0);
+  uc16 character2 = atom2->data().at(0);
+  if (character1 < character2) return -1;
+  if (character1 > character2) return 1;
+  return 0;
+}
+
+#ifdef V8_INTL_SUPPORT
+
+// Case Insensitve comparesion
+int CompareFirstCharCaseInsensitve(RegExpTree* const* a, RegExpTree* const* b) {
+  RegExpAtom* atom1 = (*a)->AsAtom();
+  RegExpAtom* atom2 = (*b)->AsAtom();
+  icu::UnicodeString character1(atom1->data().at(0));
+  return character1.caseCompare(atom2->data().at(0), U_FOLD_CASE_DEFAULT);
+}
+
+#else
+
+static unibrow::uchar Canonical(
+    unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
+    unibrow::uchar c) {
+  unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth];
+  int length = canonicalize->get(c, '\0', chars);
+  DCHECK_LE(length, 1);
+  unibrow::uchar canonical = c;
+  if (length == 1) canonical = chars[0];
+  return canonical;
+}
+
+int CompareFirstCharCaseIndependent(
+    unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
+    RegExpTree* const* a, RegExpTree* const* b) {
+  RegExpAtom* atom1 = (*a)->AsAtom();
+  RegExpAtom* atom2 = (*b)->AsAtom();
+  unibrow::uchar character1 = atom1->data().at(0);
+  unibrow::uchar character2 = atom2->data().at(0);
+  if (character1 == character2) return 0;
+  if (character1 >= 'a' || character2 >= 'a') {
+    character1 = Canonical(canonicalize, character1);
+    character2 = Canonical(canonicalize, character2);
+  }
+  return static_cast<int>(character1) - static_cast<int>(character2);
+}
+#endif  // V8_INTL_SUPPORT
+
+// We can stable sort runs of atoms, since the order does not matter if they
+// start with different characters.
+// Returns true if any consecutive atoms were found.
+bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
+  ZoneList<RegExpTree*>* alternatives = this->alternatives();
+  int length = alternatives->length();
+  bool found_consecutive_atoms = false;
+  for (int i = 0; i < length; i++) {
+    while (i < length) {
+      RegExpTree* alternative = alternatives->at(i);
+      if (alternative->IsAtom()) break;
+      i++;
+    }
+    // i is length or it is the index of an atom.
+    if (i == length) break;
+    int first_atom = i;
+    JSRegExp::Flags flags = alternatives->at(i)->AsAtom()->flags();
+    i++;
+    while (i < length) {
+      RegExpTree* alternative = alternatives->at(i);
+      if (!alternative->IsAtom()) break;
+      if (alternative->AsAtom()->flags() != flags) break;
+      i++;
+    }
+    // Sort atoms to get ones with common prefixes together.
+    // This step is more tricky if we are in a case-independent regexp,
+    // because it would change /is|I/ to /I|is/, and order matters when
+    // the regexp parts don't match only disjoint starting points. To fix
+    // this we have a version of CompareFirstChar that uses case-
+    // independent character classes for comparison.
+    DCHECK_LT(first_atom, alternatives->length());
+    DCHECK_LE(i, alternatives->length());
+    DCHECK_LE(first_atom, i);
+    if (IgnoreCase(flags)) {
+#ifdef V8_INTL_SUPPORT
+      alternatives->StableSort(CompareFirstCharCaseInsensitve, first_atom,
+                               i - first_atom);
+#else
+      unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
+          compiler->isolate()->regexp_macro_assembler_canonicalize();
+      auto compare_closure = [canonicalize](RegExpTree* const* a,
+                                            RegExpTree* const* b) {
+        return CompareFirstCharCaseIndependent(canonicalize, a, b);
+      };
+      alternatives->StableSort(compare_closure, first_atom, i - first_atom);
+#endif  // V8_INTL_SUPPORT
+    } else {
+      alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom);
+    }
+    if (i - first_atom > 1) found_consecutive_atoms = true;
+  }
+  return found_consecutive_atoms;
+}
+
+// Optimizes ab|ac|az to a(?:b|c|d).
+void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
+  Zone* zone = compiler->zone();
+  ZoneList<RegExpTree*>* alternatives = this->alternatives();
+  int length = alternatives->length();
+
+  int write_posn = 0;
+  int i = 0;
+  while (i < length) {
+    RegExpTree* alternative = alternatives->at(i);
+    if (!alternative->IsAtom()) {
+      alternatives->at(write_posn++) = alternatives->at(i);
+      i++;
+      continue;
+    }
+    RegExpAtom* const atom = alternative->AsAtom();
+    JSRegExp::Flags flags = atom->flags();
+#ifdef V8_INTL_SUPPORT
+    icu::UnicodeString common_prefix(atom->data().at(0));
+#else
+    unibrow::uchar common_prefix = atom->data().at(0);
+#endif  // V8_INTL_SUPPORT
+    int first_with_prefix = i;
+    int prefix_length = atom->length();
+    i++;
+    while (i < length) {
+      alternative = alternatives->at(i);
+      if (!alternative->IsAtom()) break;
+      RegExpAtom* const atom = alternative->AsAtom();
+      if (atom->flags() != flags) break;
+#ifdef V8_INTL_SUPPORT
+      icu::UnicodeString new_prefix(atom->data().at(0));
+      if (new_prefix != common_prefix) {
+        if (!IgnoreCase(flags)) break;
+        if (common_prefix.caseCompare(new_prefix, U_FOLD_CASE_DEFAULT) != 0)
+          break;
+      }
+#else
+      unibrow::uchar new_prefix = atom->data().at(0);
+      if (new_prefix != common_prefix) {
+        if (!IgnoreCase(flags)) break;
+        unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
+            compiler->isolate()->regexp_macro_assembler_canonicalize();
+        new_prefix = Canonical(canonicalize, new_prefix);
+        common_prefix = Canonical(canonicalize, common_prefix);
+        if (new_prefix != common_prefix) break;
+      }
+#endif  // V8_INTL_SUPPORT
+      prefix_length = Min(prefix_length, atom->length());
+      i++;
+    }
+    if (i > first_with_prefix + 2) {
+      // Found worthwhile run of alternatives with common prefix of at least one
+      // character.  The sorting function above did not sort on more than one
+      // character for reasons of correctness, but there may still be a longer
+      // common prefix if the terms were similar or presorted in the input.
+      // Find out how long the common prefix is.
+      int run_length = i - first_with_prefix;
+      RegExpAtom* const atom = alternatives->at(first_with_prefix)->AsAtom();
+      for (int j = 1; j < run_length && prefix_length > 1; j++) {
+        RegExpAtom* old_atom =
+            alternatives->at(j + first_with_prefix)->AsAtom();
+        for (int k = 1; k < prefix_length; k++) {
+          if (atom->data().at(k) != old_atom->data().at(k)) {
+            prefix_length = k;
+            break;
+          }
+        }
+      }
+      RegExpAtom* prefix = new (zone)
+          RegExpAtom(atom->data().SubVector(0, prefix_length), flags);
+      ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone);
+      pair->Add(prefix, zone);
+      ZoneList<RegExpTree*>* suffixes =
+          new (zone) ZoneList<RegExpTree*>(run_length, zone);
+      for (int j = 0; j < run_length; j++) {
+        RegExpAtom* old_atom =
+            alternatives->at(j + first_with_prefix)->AsAtom();
+        int len = old_atom->length();
+        if (len == prefix_length) {
+          suffixes->Add(new (zone) RegExpEmpty(), zone);
+        } else {
+          RegExpTree* suffix = new (zone) RegExpAtom(
+              old_atom->data().SubVector(prefix_length, old_atom->length()),
+              flags);
+          suffixes->Add(suffix, zone);
+        }
+      }
+      pair->Add(new (zone) RegExpDisjunction(suffixes), zone);
+      alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair);
+    } else {
+      // Just copy any non-worthwhile alternatives.
+      for (int j = first_with_prefix; j < i; j++) {
+        alternatives->at(write_posn++) = alternatives->at(j);
+      }
+    }
+  }
+  alternatives->Rewind(write_posn);  // Trim end of array.
+}
+
+// Optimizes b|c|z to [bcz].
+void RegExpDisjunction::FixSingleCharacterDisjunctions(
+    RegExpCompiler* compiler) {
+  Zone* zone = compiler->zone();
+  ZoneList<RegExpTree*>* alternatives = this->alternatives();
+  int length = alternatives->length();
+
+  int write_posn = 0;
+  int i = 0;
+  while (i < length) {
+    RegExpTree* alternative = alternatives->at(i);
+    if (!alternative->IsAtom()) {
+      alternatives->at(write_posn++) = alternatives->at(i);
+      i++;
+      continue;
+    }
+    RegExpAtom* const atom = alternative->AsAtom();
+    if (atom->length() != 1) {
+      alternatives->at(write_posn++) = alternatives->at(i);
+      i++;
+      continue;
+    }
+    JSRegExp::Flags flags = atom->flags();
+    DCHECK_IMPLIES(IsUnicode(flags),
+                   !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
+    bool contains_trail_surrogate =
+        unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
+    int first_in_run = i;
+    i++;
+    // Find a run of single-character atom alternatives that have identical
+    // flags (case independence and unicode-ness).
+    while (i < length) {
+      alternative = alternatives->at(i);
+      if (!alternative->IsAtom()) break;
+      RegExpAtom* const atom = alternative->AsAtom();
+      if (atom->length() != 1) break;
+      if (atom->flags() != flags) break;
+      DCHECK_IMPLIES(IsUnicode(flags),
+                     !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
+      contains_trail_surrogate |=
+          unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
+      i++;
+    }
+    if (i > first_in_run + 1) {
+      // Found non-trivial run of single-character alternatives.
+      int run_length = i - first_in_run;
+      ZoneList<CharacterRange>* ranges =
+          new (zone) ZoneList<CharacterRange>(2, zone);
+      for (int j = 0; j < run_length; j++) {
+        RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom();
+        DCHECK_EQ(old_atom->length(), 1);
+        ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
+      }
+      RegExpCharacterClass::CharacterClassFlags character_class_flags;
+      if (IsUnicode(flags) && contains_trail_surrogate) {
+        character_class_flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
+      }
+      alternatives->at(write_posn++) = new (zone)
+          RegExpCharacterClass(zone, ranges, flags, character_class_flags);
+    } else {
+      // Just copy any trivial alternatives.
+      for (int j = first_in_run; j < i; j++) {
+        alternatives->at(write_posn++) = alternatives->at(j);
+      }
+    }
+  }
+  alternatives->Rewind(write_posn);  // Trim end of array.
+}
+
+RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
+                                      RegExpNode* on_success) {
+  ZoneList<RegExpTree*>* alternatives = this->alternatives();
+
+  if (alternatives->length() > 2) {
+    bool found_consecutive_atoms = SortConsecutiveAtoms(compiler);
+    if (found_consecutive_atoms) RationalizeConsecutiveAtoms(compiler);
+    FixSingleCharacterDisjunctions(compiler);
+    if (alternatives->length() == 1) {
+      return alternatives->at(0)->ToNode(compiler, on_success);
+    }
+  }
+
+  int length = alternatives->length();
+
+  ChoiceNode* result =
+      new (compiler->zone()) ChoiceNode(length, compiler->zone());
+  for (int i = 0; i < length; i++) {
+    GuardedAlternative alternative(
+        alternatives->at(i)->ToNode(compiler, on_success));
+    result->AddAlternative(alternative);
+  }
+  return result;
+}
+
+RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
+                                     RegExpNode* on_success) {
+  return ToNode(min(), max(), is_greedy(), body(), compiler, on_success);
+}
+
+namespace {
+// Desugar \b to (?<=\w)(?=\W)|(?<=\W)(?=\w) and
+//         \B to (?<=\w)(?=\w)|(?<=\W)(?=\W)
+RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
+                                          RegExpNode* on_success,
+                                          RegExpAssertion::AssertionType type,
+                                          JSRegExp::Flags flags) {
+  DCHECK(NeedsUnicodeCaseEquivalents(flags));
+  Zone* zone = compiler->zone();
+  ZoneList<CharacterRange>* word_range =
+      new (zone) ZoneList<CharacterRange>(2, zone);
+  CharacterRange::AddClassEscape('w', word_range, true, zone);
+  int stack_register = compiler->UnicodeLookaroundStackRegister();
+  int position_register = compiler->UnicodeLookaroundPositionRegister();
+  ChoiceNode* result = new (zone) ChoiceNode(2, zone);
+  // Add two choices. The (non-)boundary could start with a word or
+  // a non-word-character.
+  for (int i = 0; i < 2; i++) {
+    bool lookbehind_for_word = i == 0;
+    bool lookahead_for_word =
+        (type == RegExpAssertion::BOUNDARY) ^ lookbehind_for_word;
+    // Look to the left.
+    RegExpLookaround::Builder lookbehind(lookbehind_for_word, on_success,
+                                         stack_register, position_register);
+    RegExpNode* backward = TextNode::CreateForCharacterRanges(
+        zone, word_range, true, lookbehind.on_match_success(), flags);
+    // Look to the right.
+    RegExpLookaround::Builder lookahead(lookahead_for_word,
+                                        lookbehind.ForMatch(backward),
+                                        stack_register, position_register);
+    RegExpNode* forward = TextNode::CreateForCharacterRanges(
+        zone, word_range, false, lookahead.on_match_success(), flags);
+    result->AddAlternative(GuardedAlternative(lookahead.ForMatch(forward)));
+  }
+  return result;
+}
+}  // anonymous namespace
+
+RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
+                                    RegExpNode* on_success) {
+  NodeInfo info;
+  Zone* zone = compiler->zone();
+
+  switch (assertion_type()) {
+    case START_OF_LINE:
+      return AssertionNode::AfterNewline(on_success);
+    case START_OF_INPUT:
+      return AssertionNode::AtStart(on_success);
+    case BOUNDARY:
+      return NeedsUnicodeCaseEquivalents(flags_)
+                 ? BoundaryAssertionAsLookaround(compiler, on_success, BOUNDARY,
+                                                 flags_)
+                 : AssertionNode::AtBoundary(on_success);
+    case NON_BOUNDARY:
+      return NeedsUnicodeCaseEquivalents(flags_)
+                 ? BoundaryAssertionAsLookaround(compiler, on_success,
+                                                 NON_BOUNDARY, flags_)
+                 : AssertionNode::AtNonBoundary(on_success);
+    case END_OF_INPUT:
+      return AssertionNode::AtEnd(on_success);
+    case END_OF_LINE: {
+      // Compile $ in multiline regexps as an alternation with a positive
+      // lookahead in one side and an end-of-input on the other side.
+      // We need two registers for the lookahead.
+      int stack_pointer_register = compiler->AllocateRegister();
+      int position_register = compiler->AllocateRegister();
+      // The ChoiceNode to distinguish between a newline and end-of-input.
+      ChoiceNode* result = new (zone) ChoiceNode(2, zone);
+      // Create a newline atom.
+      ZoneList<CharacterRange>* newline_ranges =
+          new (zone) ZoneList<CharacterRange>(3, zone);
+      CharacterRange::AddClassEscape('n', newline_ranges, false, zone);
+      JSRegExp::Flags default_flags = JSRegExp::Flags();
+      RegExpCharacterClass* newline_atom =
+          new (zone) RegExpCharacterClass('n', default_flags);
+      TextNode* newline_matcher =
+          new (zone) TextNode(newline_atom, false,
+                              ActionNode::PositiveSubmatchSuccess(
+                                  stack_pointer_register, position_register,
+                                  0,   // No captures inside.
+                                  -1,  // Ignored if no captures.
+                                  on_success));
+      // Create an end-of-input matcher.
+      RegExpNode* end_of_line = ActionNode::BeginSubmatch(
+          stack_pointer_register, position_register, newline_matcher);
+      // Add the two alternatives to the ChoiceNode.
+      GuardedAlternative eol_alternative(end_of_line);
+      result->AddAlternative(eol_alternative);
+      GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
+      result->AddAlternative(end_alternative);
+      return result;
+    }
+    default:
+      UNREACHABLE();
+  }
+  return on_success;
+}
+
+RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
+                                        RegExpNode* on_success) {
+  return new (compiler->zone())
+      BackReferenceNode(RegExpCapture::StartRegister(index()),
+                        RegExpCapture::EndRegister(index()), flags_,
+                        compiler->read_backward(), on_success);
+}
+
+RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
+                                RegExpNode* on_success) {
+  return on_success;
+}
+
+RegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success,
+                                   int stack_pointer_register,
+                                   int position_register,
+                                   int capture_register_count,
+                                   int capture_register_start)
+    : is_positive_(is_positive),
+      on_success_(on_success),
+      stack_pointer_register_(stack_pointer_register),
+      position_register_(position_register) {
+  if (is_positive_) {
+    on_match_success_ = ActionNode::PositiveSubmatchSuccess(
+        stack_pointer_register, position_register, capture_register_count,
+        capture_register_start, on_success_);
+  } else {
+    Zone* zone = on_success_->zone();
+    on_match_success_ = new (zone) NegativeSubmatchSuccess(
+        stack_pointer_register, position_register, capture_register_count,
+        capture_register_start, zone);
+  }
+}
+
+RegExpNode* RegExpLookaround::Builder::ForMatch(RegExpNode* match) {
+  if (is_positive_) {
+    return ActionNode::BeginSubmatch(stack_pointer_register_,
+                                     position_register_, match);
+  } else {
+    Zone* zone = on_success_->zone();
+    // We use a ChoiceNode to represent the negative lookaround. The first
+    // alternative is the negative match. On success, the end node backtracks.
+    // On failure, the second alternative is tried and leads to success.
+    // NegativeLookaheadChoiceNode is a special ChoiceNode that ignores the
+    // first exit when calculating quick checks.
+    ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode(
+        GuardedAlternative(match), GuardedAlternative(on_success_), zone);
+    return ActionNode::BeginSubmatch(stack_pointer_register_,
+                                     position_register_, choice_node);
+  }
+}
+
+RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,
+                                     RegExpNode* on_success) {
+  int stack_pointer_register = compiler->AllocateRegister();
+  int position_register = compiler->AllocateRegister();
+
+  const int registers_per_capture = 2;
+  const int register_of_first_capture = 2;
+  int register_count = capture_count_ * registers_per_capture;
+  int register_start =
+      register_of_first_capture + capture_from_ * registers_per_capture;
+
+  RegExpNode* result;
+  bool was_reading_backward = compiler->read_backward();
+  compiler->set_read_backward(type() == LOOKBEHIND);
+  Builder builder(is_positive(), on_success, stack_pointer_register,
+                  position_register, register_count, register_start);
+  RegExpNode* match = body_->ToNode(compiler, builder.on_match_success());
+  result = builder.ForMatch(match);
+  compiler->set_read_backward(was_reading_backward);
+  return result;
+}
+
+RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
+                                  RegExpNode* on_success) {
+  return ToNode(body(), index(), compiler, on_success);
+}
+
+RegExpNode* RegExpCapture::ToNode(RegExpTree* body, int index,
+                                  RegExpCompiler* compiler,
+                                  RegExpNode* on_success) {
+  DCHECK_NOT_NULL(body);
+  int start_reg = RegExpCapture::StartRegister(index);
+  int end_reg = RegExpCapture::EndRegister(index);
+  if (compiler->read_backward()) std::swap(start_reg, end_reg);
+  RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
+  RegExpNode* body_node = body->ToNode(compiler, store_end);
+  return ActionNode::StorePosition(start_reg, true, body_node);
+}
+
+RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
+                                      RegExpNode* on_success) {
+  ZoneList<RegExpTree*>* children = nodes();
+  RegExpNode* current = on_success;
+  if (compiler->read_backward()) {
+    for (int i = 0; i < children->length(); i++) {
+      current = children->at(i)->ToNode(compiler, current);
+    }
+  } else {
+    for (int i = children->length() - 1; i >= 0; i--) {
+      current = children->at(i)->ToNode(compiler, current);
+    }
+  }
+  return current;
+}
+
+static void AddClass(const int* elmv, int elmc,
+                     ZoneList<CharacterRange>* ranges, Zone* zone) {
+  elmc--;
+  DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
+  for (int i = 0; i < elmc; i += 2) {
+    DCHECK(elmv[i] < elmv[i + 1]);
+    ranges->Add(CharacterRange::Range(elmv[i], elmv[i + 1] - 1), zone);
+  }
+}
+
+static void AddClassNegated(const int* elmv, int elmc,
+                            ZoneList<CharacterRange>* ranges, Zone* zone) {
+  elmc--;
+  DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
+  DCHECK_NE(0x0000, elmv[0]);
+  DCHECK_NE(String::kMaxCodePoint, elmv[elmc - 1]);
+  uc16 last = 0x0000;
+  for (int i = 0; i < elmc; i += 2) {
+    DCHECK(last <= elmv[i] - 1);
+    DCHECK(elmv[i] < elmv[i + 1]);
+    ranges->Add(CharacterRange::Range(last, elmv[i] - 1), zone);
+    last = elmv[i + 1];
+  }
+  ranges->Add(CharacterRange::Range(last, String::kMaxCodePoint), zone);
+}
+
+void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
+                                    bool add_unicode_case_equivalents,
+                                    Zone* zone) {
+  if (add_unicode_case_equivalents && (type == 'w' || type == 'W')) {
+    // See #sec-runtime-semantics-wordcharacters-abstract-operation
+    // In case of unicode and ignore_case, we need to create the closure over
+    // case equivalent characters before negating.
+    ZoneList<CharacterRange>* new_ranges =
+        new (zone) ZoneList<CharacterRange>(2, zone);
+    AddClass(kWordRanges, kWordRangeCount, new_ranges, zone);
+    AddUnicodeCaseEquivalents(new_ranges, zone);
+    if (type == 'W') {
+      ZoneList<CharacterRange>* negated =
+          new (zone) ZoneList<CharacterRange>(2, zone);
+      CharacterRange::Negate(new_ranges, negated, zone);
+      new_ranges = negated;
+    }
+    ranges->AddAll(*new_ranges, zone);
+    return;
+  }
+  AddClassEscape(type, ranges, zone);
+}
+
+void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
+                                    Zone* zone) {
+  switch (type) {
+    case 's':
+      AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
+      break;
+    case 'S':
+      AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone);
+      break;
+    case 'w':
+      AddClass(kWordRanges, kWordRangeCount, ranges, zone);
+      break;
+    case 'W':
+      AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone);
+      break;
+    case 'd':
+      AddClass(kDigitRanges, kDigitRangeCount, ranges, zone);
+      break;
+    case 'D':
+      AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone);
+      break;
+    case '.':
+      AddClassNegated(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges,
+                      zone);
+      break;
+    // This is not a character range as defined by the spec but a
+    // convenient shorthand for a character class that matches any
+    // character.
+    case '*':
+      ranges->Add(CharacterRange::Everything(), zone);
+      break;
+    // This is the set of characters matched by the $ and ^ symbols
+    // in multiline mode.
+    case 'n':
+      AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges, zone);
+      break;
+    default:
+      UNREACHABLE();
+  }
+}
+
+Vector<const int> CharacterRange::GetWordBounds() {
+  return Vector<const int>(kWordRanges, kWordRangeCount - 1);
+}
+
+// static
+void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
+                                        ZoneList<CharacterRange>* ranges,
+                                        bool is_one_byte) {
+  CharacterRange::Canonicalize(ranges);
+  int range_count = ranges->length();
+#ifdef V8_INTL_SUPPORT
+  icu::UnicodeSet already_added;
+  icu::UnicodeSet others;
+  for (int i = 0; i < range_count; i++) {
+    CharacterRange range = ranges->at(i);
+    uc32 bottom = range.from();
+    if (bottom > String::kMaxUtf16CodeUnit) continue;
+    uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit);
+    // Nothing to be done for surrogates.
+    if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) continue;
+    if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
+      if (bottom > String::kMaxOneByteCharCode) continue;
+      if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
+    }
+    already_added.add(bottom, top);
+    icu::Locale locale = icu::Locale::getRoot();
+    while (bottom <= top) {
+      icu::UnicodeString upper(bottom);
+      upper.toUpper(locale);
+      icu::UnicodeSet expanded(bottom, bottom);
+      expanded.closeOver(USET_CASE_INSENSITIVE);
+      for (int32_t i = 0; i < expanded.getRangeCount(); i++) {
+        UChar32 start = expanded.getRangeStart(i);
+        UChar32 end = expanded.getRangeEnd(i);
+        while (start <= end) {
+          icu::UnicodeString upper2(start);
+          upper2.toUpper(locale);
+          // Only add if the upper case are the same.
+          if (upper[0] == upper2[0]) {
+            // #sec-runtime-semantics-canonicalize-ch
+            // 3.g. If the numeric value of ch ≥ 128 and the numeric value of
+            // cu < 128, return ch.
+            if (bottom >= 128 && start < 128) {
+              others.add(bottom);
+            } else {
+              // 3.h. 3.h. 3.h. Return cu.
+              others.add(start);
+            }
+          }
+          start++;
+        }
+      }
+      bottom++;
+    }
+  }
+  others.removeAll(already_added);
+  for (int32_t i = 0; i < others.getRangeCount(); i++) {
+    UChar32 start = others.getRangeStart(i);
+    UChar32 end = others.getRangeEnd(i);
+    if (start == end) {
+      ranges->Add(CharacterRange::Singleton(start), zone);
+    } else {
+      ranges->Add(CharacterRange::Range(start, end), zone);
+    }
+  }
+#else
+  for (int i = 0; i < range_count; i++) {
+    CharacterRange range = ranges->at(i);
+    uc32 bottom = range.from();
+    if (bottom > String::kMaxUtf16CodeUnit) continue;
+    uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit);
+    // Nothing to be done for surrogates.
+    if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) continue;
+    if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
+      if (bottom > String::kMaxOneByteCharCode) continue;
+      if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
+    }
+    unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
+    if (top == bottom) {
+      // If this is a singleton we just expand the one character.
+      int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
+      for (int i = 0; i < length; i++) {
+        uc32 chr = chars[i];
+        if (chr != bottom) {
+          ranges->Add(CharacterRange::Singleton(chars[i]), zone);
+        }
+      }
+    } else {
+      // If this is a range we expand the characters block by block, expanding
+      // contiguous subranges (blocks) one at a time.  The approach is as
+      // follows.  For a given start character we look up the remainder of the
+      // block that contains it (represented by the end point), for instance we
+      // find 'z' if the character is 'c'.  A block is characterized by the
+      // property that all characters uncanonicalize in the same way, except
+      // that each entry in the result is incremented by the distance from the
+      // first element.  So a-z is a block because 'a' uncanonicalizes to ['a',
+      // 'A'] and the k'th letter uncanonicalizes to ['a' + k, 'A' + k].  Once
+      // we've found the end point we look up its uncanonicalization and
+      // produce a range for each element.  For instance for [c-f] we look up
+      // ['z', 'Z'] and produce [c-f] and [C-F].  We then only add a range if
+      // it is not already contained in the input, so [c-f] will be skipped but
+      // [C-F] will be added.  If this range is not completely contained in a
+      // block we do this for all the blocks covered by the range (handling
+      // characters that is not in a block as a "singleton block").
+      unibrow::uchar equivalents[unibrow::Ecma262UnCanonicalize::kMaxWidth];
+      int pos = bottom;
+      while (pos <= top) {
+        int length =
+            isolate->jsregexp_canonrange()->get(pos, '\0', equivalents);
+        uc32 block_end;
+        if (length == 0) {
+          block_end = pos;
+        } else {
+          DCHECK_EQ(1, length);
+          block_end = equivalents[0];
+        }
+        int end = (block_end > top) ? top : block_end;
+        length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0',
+                                                         equivalents);
+        for (int i = 0; i < length; i++) {
+          uc32 c = equivalents[i];
+          uc32 range_from = c - (block_end - pos);
+          uc32 range_to = c - (block_end - end);
+          if (!(bottom <= range_from && range_to <= top)) {
+            ranges->Add(CharacterRange::Range(range_from, range_to), zone);
+          }
+        }
+        pos = end + 1;
+      }
+    }
+  }
+#endif  // V8_INTL_SUPPORT
+}
+
+bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
+  DCHECK_NOT_NULL(ranges);
+  int n = ranges->length();
+  if (n <= 1) return true;
+  int max = ranges->at(0).to();
+  for (int i = 1; i < n; i++) {
+    CharacterRange next_range = ranges->at(i);
+    if (next_range.from() <= max + 1) return false;
+    max = next_range.to();
+  }
+  return true;
+}
+
+ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) {
+  if (ranges_ == nullptr) {
+    ranges_ = new (zone) ZoneList<CharacterRange>(2, zone);
+    CharacterRange::AddClassEscape(standard_set_type_, ranges_, false, zone);
+  }
+  return ranges_;
+}
+
+// Move a number of elements in a zonelist to another position
+// in the same list. Handles overlapping source and target areas.
+static void MoveRanges(ZoneList<CharacterRange>* list, int from, int to,
+                       int count) {
+  // Ranges are potentially overlapping.
+  if (from < to) {
+    for (int i = count - 1; i >= 0; i--) {
+      list->at(to + i) = list->at(from + i);
+    }
+  } else {
+    for (int i = 0; i < count; i++) {
+      list->at(to + i) = list->at(from + i);
+    }
+  }
+}
+
+static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, int count,
+                                      CharacterRange insert) {
+  // Inserts a range into list[0..count[, which must be sorted
+  // by from value and non-overlapping and non-adjacent, using at most
+  // list[0..count] for the result. Returns the number of resulting
+  // canonicalized ranges. Inserting a range may collapse existing ranges into
+  // fewer ranges, so the return value can be anything in the range 1..count+1.
+  uc32 from = insert.from();
+  uc32 to = insert.to();
+  int start_pos = 0;
+  int end_pos = count;
+  for (int i = count - 1; i >= 0; i--) {
+    CharacterRange current = list->at(i);
+    if (current.from() > to + 1) {
+      end_pos = i;
+    } else if (current.to() + 1 < from) {
+      start_pos = i + 1;
+      break;
+    }
+  }
+
+  // Inserted range overlaps, or is adjacent to, ranges at positions
+  // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
+  // not affected by the insertion.
+  // If start_pos == end_pos, the range must be inserted before start_pos.
+  // if start_pos < end_pos, the entire range from start_pos to end_pos
+  // must be merged with the insert range.
+
+  if (start_pos == end_pos) {
+    // Insert between existing ranges at position start_pos.
+    if (start_pos < count) {
+      MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
+    }
+    list->at(start_pos) = insert;
+    return count + 1;
+  }
+  if (start_pos + 1 == end_pos) {
+    // Replace single existing range at position start_pos.
+    CharacterRange to_replace = list->at(start_pos);
+    int new_from = Min(to_replace.from(), from);
+    int new_to = Max(to_replace.to(), to);
+    list->at(start_pos) = CharacterRange::Range(new_from, new_to);
+    return count;
+  }
+  // Replace a number of existing ranges from start_pos to end_pos - 1.
+  // Move the remaining ranges down.
+
+  int new_from = Min(list->at(start_pos).from(), from);
+  int new_to = Max(list->at(end_pos - 1).to(), to);
+  if (end_pos < count) {
+    MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
+  }
+  list->at(start_pos) = CharacterRange::Range(new_from, new_to);
+  return count - (end_pos - start_pos) + 1;
+}
+
+void CharacterSet::Canonicalize() {
+  // Special/default classes are always considered canonical. The result
+  // of calling ranges() will be sorted.
+  if (ranges_ == nullptr) return;
+  CharacterRange::Canonicalize(ranges_);
+}
+
+void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {
+  if (character_ranges->length() <= 1) return;
+  // Check whether ranges are already canonical (increasing, non-overlapping,
+  // non-adjacent).
+  int n = character_ranges->length();
+  int max = character_ranges->at(0).to();
+  int i = 1;
+  while (i < n) {
+    CharacterRange current = character_ranges->at(i);
+    if (current.from() <= max + 1) {
+      break;
+    }
+    max = current.to();
+    i++;
+  }
+  // Canonical until the i'th range. If that's all of them, we are done.
+  if (i == n) return;
+
+  // The ranges at index i and forward are not canonicalized. Make them so by
+  // doing the equivalent of insertion sort (inserting each into the previous
+  // list, in order).
+  // Notice that inserting a range can reduce the number of ranges in the
+  // result due to combining of adjacent and overlapping ranges.
+  int read = i;           // Range to insert.
+  int num_canonical = i;  // Length of canonicalized part of list.
+  do {
+    num_canonical = InsertRangeInCanonicalList(character_ranges, num_canonical,
+                                               character_ranges->at(read));
+    read++;
+  } while (read < n);
+  character_ranges->Rewind(num_canonical);
+
+  DCHECK(CharacterRange::IsCanonical(character_ranges));
+}
+
+void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
+                            ZoneList<CharacterRange>* negated_ranges,
+                            Zone* zone) {
+  DCHECK(CharacterRange::IsCanonical(ranges));
+  DCHECK_EQ(0, negated_ranges->length());
+  int range_count = ranges->length();
+  uc32 from = 0;
+  int i = 0;
+  if (range_count > 0 && ranges->at(0).from() == 0) {
+    from = ranges->at(0).to() + 1;
+    i = 1;
+  }
+  while (i < range_count) {
+    CharacterRange range = ranges->at(i);
+    negated_ranges->Add(CharacterRange::Range(from, range.from() - 1), zone);
+    from = range.to() + 1;
+    i++;
+  }
+  if (from < String::kMaxCodePoint) {
+    negated_ranges->Add(CharacterRange::Range(from, String::kMaxCodePoint),
+                        zone);
+  }
+}
+
+// Scoped object to keep track of how much we unroll quantifier loops in the
+// regexp graph generator.
+class RegExpExpansionLimiter {
+ public:
+  static const int kMaxExpansionFactor = 6;
+  RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
+      : compiler_(compiler),
+        saved_expansion_factor_(compiler->current_expansion_factor()),
+        ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
+    DCHECK_LT(0, factor);
+    if (ok_to_expand_) {
+      if (factor > kMaxExpansionFactor) {
+        // Avoid integer overflow of the current expansion factor.
+        ok_to_expand_ = false;
+        compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
+      } else {
+        int new_factor = saved_expansion_factor_ * factor;
+        ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
+        compiler->set_current_expansion_factor(new_factor);
+      }
+    }
+  }
+
+  ~RegExpExpansionLimiter() {
+    compiler_->set_current_expansion_factor(saved_expansion_factor_);
+  }
+
+  bool ok_to_expand() { return ok_to_expand_; }
+
+ private:
+  RegExpCompiler* compiler_;
+  int saved_expansion_factor_;
+  bool ok_to_expand_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
+};
+
+RegExpNode* RegExpQuantifier::ToNode(int min, int max, bool is_greedy,
+                                     RegExpTree* body, RegExpCompiler* compiler,
+                                     RegExpNode* on_success,
+                                     bool not_at_start) {
+  // x{f, t} becomes this:
+  //
+  //             (r++)<-.
+  //               |     `
+  //               |     (x)
+  //               v     ^
+  //      (r=0)-->(?)---/ [if r < t]
+  //               |
+  //   [if r >= f] \----> ...
+  //
+
+  // 15.10.2.5 RepeatMatcher algorithm.
+  // The parser has already eliminated the case where max is 0.  In the case
+  // where max_match is zero the parser has removed the quantifier if min was
+  // > 0 and removed the atom if min was 0.  See AddQuantifierToAtom.
+
+  // If we know that we cannot match zero length then things are a little
+  // simpler since we don't need to make the special zero length match check
+  // from step 2.1.  If the min and max are small we can unroll a little in
+  // this case.
+  static const int kMaxUnrolledMinMatches = 3;  // Unroll (foo)+ and (foo){3,}
+  static const int kMaxUnrolledMaxMatches = 3;  // Unroll (foo)? and (foo){x,3}
+  if (max == 0) return on_success;  // This can happen due to recursion.
+  bool body_can_be_empty = (body->min_match() == 0);
+  int body_start_reg = RegExpCompiler::kNoRegister;
+  Interval capture_registers = body->CaptureRegisters();
+  bool needs_capture_clearing = !capture_registers.is_empty();
+  Zone* zone = compiler->zone();
+
+  if (body_can_be_empty) {
+    body_start_reg = compiler->AllocateRegister();
+  } else if (compiler->optimize() && !needs_capture_clearing) {
+    // Only unroll if there are no captures and the body can't be
+    // empty.
+    {
+      RegExpExpansionLimiter limiter(compiler, min + ((max != min) ? 1 : 0));
+      if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
+        int new_max = (max == kInfinity) ? max : max - min;
+        // Recurse once to get the loop or optional matches after the fixed
+        // ones.
+        RegExpNode* answer =
+            ToNode(0, new_max, is_greedy, body, compiler, on_success, true);
+        // Unroll the forced matches from 0 to min.  This can cause chains of
+        // TextNodes (which the parser does not generate).  These should be
+        // combined if it turns out they hinder good code generation.
+        for (int i = 0; i < min; i++) {
+          answer = body->ToNode(compiler, answer);
+        }
+        return answer;
+      }
+    }
+    if (max <= kMaxUnrolledMaxMatches && min == 0) {
+      DCHECK_LT(0, max);  // Due to the 'if' above.
+      RegExpExpansionLimiter limiter(compiler, max);
+      if (limiter.ok_to_expand()) {
+        // Unroll the optional matches up to max.
+        RegExpNode* answer = on_success;
+        for (int i = 0; i < max; i++) {
+          ChoiceNode* alternation = new (zone) ChoiceNode(2, zone);
+          if (is_greedy) {
+            alternation->AddAlternative(
+                GuardedAlternative(body->ToNode(compiler, answer)));
+            alternation->AddAlternative(GuardedAlternative(on_success));
+          } else {
+            alternation->AddAlternative(GuardedAlternative(on_success));
+            alternation->AddAlternative(
+                GuardedAlternative(body->ToNode(compiler, answer)));
+          }
+          answer = alternation;
+          if (not_at_start && !compiler->read_backward()) {
+            alternation->set_not_at_start();
+          }
+        }
+        return answer;
+      }
+    }
+  }
+  bool has_min = min > 0;
+  bool has_max = max < RegExpTree::kInfinity;
+  bool needs_counter = has_min || has_max;
+  int reg_ctr = needs_counter ? compiler->AllocateRegister()
+                              : RegExpCompiler::kNoRegister;
+  LoopChoiceNode* center = new (zone)
+      LoopChoiceNode(body->min_match() == 0, compiler->read_backward(), zone);
+  if (not_at_start && !compiler->read_backward()) center->set_not_at_start();
+  RegExpNode* loop_return =
+      needs_counter ? static_cast<RegExpNode*>(
+                          ActionNode::IncrementRegister(reg_ctr, center))
+                    : static_cast<RegExpNode*>(center);
+  if (body_can_be_empty) {
+    // If the body can be empty we need to check if it was and then
+    // backtrack.
+    loop_return =
+        ActionNode::EmptyMatchCheck(body_start_reg, reg_ctr, min, loop_return);
+  }
+  RegExpNode* body_node = body->ToNode(compiler, loop_return);
+  if (body_can_be_empty) {
+    // If the body can be empty we need to store the start position
+    // so we can bail out if it was empty.
+    body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
+  }
+  if (needs_capture_clearing) {
+    // Before entering the body of this loop we need to clear captures.
+    body_node = ActionNode::ClearCaptures(capture_registers, body_node);
+  }
+  GuardedAlternative body_alt(body_node);
+  if (has_max) {
+    Guard* body_guard = new (zone) Guard(reg_ctr, Guard::LT, max);
+    body_alt.AddGuard(body_guard, zone);
+  }
+  GuardedAlternative rest_alt(on_success);
+  if (has_min) {
+    Guard* rest_guard = new (compiler->zone()) Guard(reg_ctr, Guard::GEQ, min);
+    rest_alt.AddGuard(rest_guard, zone);
+  }
+  if (is_greedy) {
+    center->AddLoopAlternative(body_alt);
+    center->AddContinueAlternative(rest_alt);
+  } else {
+    center->AddContinueAlternative(rest_alt);
+    center->AddLoopAlternative(body_alt);
+  }
+  if (needs_counter) {
+    return ActionNode::SetRegister(reg_ctr, 0, center);
+  } else {
+    return center;
+  }
+}
+
+}  // namespace internal
+}  // namespace v8
--- a/src/regexp/regexp-compiler.h
+++ b/src/regexp/regexp-compiler.h
+// Copyright 2019 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_REGEXP_REGEXP_COMPILER_H_
+#define V8_REGEXP_REGEXP_COMPILER_H_
+
+#include "src/regexp/jsregexp.h"  // TODO(jgruber): Remove if possible.
+#include "src/regexp/regexp-macro-assembler-arch.h"
+
+namespace v8 {
+namespace internal {
+
+class Isolate;
+
+namespace regexp_compiler_constants {
+
+// The '2' variant is has inclusive from and exclusive to.
+// This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
+// which include WhiteSpace (7.2) or LineTerminator (7.3) values.
+constexpr uc32 kRangeEndMarker = 0x110000;
+constexpr int kSpaceRanges[] = {
+    '\t',   '\r' + 1, ' ',    ' ' + 1, 0x00A0, 0x00A1, 0x1680,
+    0x1681, 0x2000,   0x200B, 0x2028,  0x202A, 0x202F, 0x2030,
+    0x205F, 0x2060,   0x3000, 0x3001,  0xFEFF, 0xFF00, kRangeEndMarker};
+constexpr int kSpaceRangeCount = arraysize(kSpaceRanges);
+
+constexpr int kWordRanges[] = {'0',     '9' + 1, 'A',     'Z' + 1,        '_',
+                               '_' + 1, 'a',     'z' + 1, kRangeEndMarker};
+constexpr int kWordRangeCount = arraysize(kWordRanges);
+constexpr int kDigitRanges[] = {'0', '9' + 1, kRangeEndMarker};
+constexpr int kDigitRangeCount = arraysize(kDigitRanges);
+constexpr int kSurrogateRanges[] = {kLeadSurrogateStart,
+                                    kLeadSurrogateStart + 1, kRangeEndMarker};
+constexpr int kSurrogateRangeCount = arraysize(kSurrogateRanges);
+constexpr int kLineTerminatorRanges[] = {0x000A, 0x000B, 0x000D,         0x000E,
+                                         0x2028, 0x202A, kRangeEndMarker};
+constexpr int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges);
+
+}  // namespace regexp_compiler_constants
+
+class FrequencyCollator {
+ public:
+  FrequencyCollator() : total_samples_(0) {
+    for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
+      frequencies_[i] = CharacterFrequency(i);
+    }
+  }
+
+  void CountCharacter(int character) {
+    int index = (character & RegExpMacroAssembler::kTableMask);
+    frequencies_[index].Increment();
+    total_samples_++;
+  }
+
+  // Does not measure in percent, but rather per-128 (the table size from the
+  // regexp macro assembler).
+  int Frequency(int in_character) {
+    DCHECK((in_character & RegExpMacroAssembler::kTableMask) == in_character);
+    if (total_samples_ < 1) return 1;  // Division by zero.
+    int freq_in_per128 =
+        (frequencies_[in_character].counter() * 128) / total_samples_;
+    return freq_in_per128;
+  }
+
+ private:
+  class CharacterFrequency {
+   public:
+    CharacterFrequency() : counter_(0), character_(-1) {}
+    explicit CharacterFrequency(int character)
+        : counter_(0), character_(character) {}
+
+    void Increment() { counter_++; }
+    int counter() { return counter_; }
+    int character() { return character_; }
+
+   private:
+    int counter_;
+    int character_;
+  };
+
+ private:
+  CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
+  int total_samples_;
+};
+
+class RegExpCompiler {
+ public:
+  RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
+                 bool is_one_byte);
+
+  int AllocateRegister() {
+    if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
+      reg_exp_too_big_ = true;
+      return next_register_;
+    }
+    return next_register_++;
+  }
+
+  // Lookarounds to match lone surrogates for unicode character class matches
+  // are never nested. We can therefore reuse registers.
+  int UnicodeLookaroundStackRegister() {
+    if (unicode_lookaround_stack_register_ == kNoRegister) {
+      unicode_lookaround_stack_register_ = AllocateRegister();
+    }
+    return unicode_lookaround_stack_register_;
+  }
+
+  int UnicodeLookaroundPositionRegister() {
+    if (unicode_lookaround_position_register_ == kNoRegister) {
+      unicode_lookaround_position_register_ = AllocateRegister();
+    }
+    return unicode_lookaround_position_register_;
+  }
+
+  RegExpEngine::CompilationResult Assemble(Isolate* isolate,
+                                           RegExpMacroAssembler* assembler,
+                                           RegExpNode* start, int capture_count,
+                                           Handle<String> pattern);
+
+  inline void AddWork(RegExpNode* node) {
+    if (!node->on_work_list() && !node->label()->is_bound()) {
+      node->set_on_work_list(true);
+      work_list_->push_back(node);
+    }
+  }
+
+  static const int kImplementationOffset = 0;
+  static const int kNumberOfRegistersOffset = 0;
+  static const int kCodeOffset = 1;
+
+  RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
+  EndNode* accept() { return accept_; }
+
+  static const int kMaxRecursion = 100;
+  inline int recursion_depth() { return recursion_depth_; }
+  inline void IncrementRecursionDepth() { recursion_depth_++; }
+  inline void DecrementRecursionDepth() { recursion_depth_--; }
+
+  void SetRegExpTooBig() { reg_exp_too_big_ = true; }
+
+  inline bool one_byte() { return one_byte_; }
+  inline bool optimize() { return optimize_; }
+  inline void set_optimize(bool value) { optimize_ = value; }
+  inline bool limiting_recursion() { return limiting_recursion_; }
+  inline void set_limiting_recursion(bool value) {
+    limiting_recursion_ = value;
+  }
+  bool read_backward() { return read_backward_; }
+  void set_read_backward(bool value) { read_backward_ = value; }
+  FrequencyCollator* frequency_collator() { return &frequency_collator_; }
+
+  int current_expansion_factor() { return current_expansion_factor_; }
+  void set_current_expansion_factor(int value) {
+    current_expansion_factor_ = value;
+  }
+
+  Isolate* isolate() const { return isolate_; }
+  Zone* zone() const { return zone_; }
+
+  static const int kNoRegister = -1;
+
+ private:
+  EndNode* accept_;
+  int next_register_;
+  int unicode_lookaround_stack_register_;
+  int unicode_lookaround_position_register_;
+  std::vector<RegExpNode*>* work_list_;
+  int recursion_depth_;
+  RegExpMacroAssembler* macro_assembler_;
+  bool one_byte_;
+  bool reg_exp_too_big_;
+  bool limiting_recursion_;
+  bool optimize_;
+  bool read_backward_;
+  int current_expansion_factor_;
+  FrequencyCollator frequency_collator_;
+  Isolate* isolate_;
+  Zone* zone_;
+};
+
+// Categorizes character ranges into BMP, non-BMP, lead, and trail surrogates.
+class UnicodeRangeSplitter {
+ public:
+  V8_EXPORT_PRIVATE UnicodeRangeSplitter(Zone* zone,
+                                         ZoneList<CharacterRange>* base);
+  void Call(uc32 from, DispatchTable::Entry entry);
+
+  ZoneList<CharacterRange>* bmp() { return bmp_; }
+  ZoneList<CharacterRange>* lead_surrogates() { return lead_surrogates_; }
+  ZoneList<CharacterRange>* trail_surrogates() { return trail_surrogates_; }
+  ZoneList<CharacterRange>* non_bmp() const { return non_bmp_; }
+
+ private:
+  static const int kBase = 0;
+  // Separate ranges into
+  static const int kBmpCodePoints = 1;
+  static const int kLeadSurrogates = 2;
+  static const int kTrailSurrogates = 3;
+  static const int kNonBmpCodePoints = 4;
+
+  Zone* zone_;
+  DispatchTable table_;
+  ZoneList<CharacterRange>* bmp_;
+  ZoneList<CharacterRange>* lead_surrogates_;
+  ZoneList<CharacterRange>* trail_surrogates_;
+  ZoneList<CharacterRange>* non_bmp_;
+};
+
+// We need to check for the following characters: 0x39C 0x3BC 0x178.
+// TODO(jgruber): Move to CharacterRange.
+bool RangeContainsLatin1Equivalents(CharacterRange range);
+
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_REGEXP_REGEXP_COMPILER_H_
--- a/test/cctest/test-regexp.cc
+++ b/test/cctest/test-regexp.cc
@@ -37,6 +37,7 @@
 #include "src/init/v8.h"
 #include "src/objects/objects-inl.h"
 #include "src/regexp/jsregexp.h"
+#include "src/regexp/regexp-compiler.h"
 #include "src/regexp/regexp-interpreter.h"
 #include "src/regexp/regexp-macro-assembler-arch.h"
 #include "src/regexp/regexp-macro-assembler-irregexp.h"