runtime-regexp.cc 66.6 KB
Newer Older
1 2 3 4
// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

5 6
#include <functional>

7
#include "src/arguments-inl.h"
8
#include "src/conversions-inl.h"
9
#include "src/counters.h"
10
#include "src/heap/heap-inl.h"  // For ToBoolean. TODO(jkummerow): Drop.
11
#include "src/isolate-inl.h"
12
#include "src/message-template.h"
13
#include "src/objects/js-array-inl.h"
14
#include "src/regexp/jsregexp-inl.h"
15
#include "src/regexp/regexp-utils.h"
16
#include "src/runtime/runtime-utils.h"
17
#include "src/string-builder-inl.h"
18
#include "src/string-search.h"
19
#include "src/zone/zone-chunk-list.h"
20 21 22 23

namespace v8 {
namespace internal {

24 25
namespace {

26 27 28 29 30 31 32 33 34 35 36 37 38 39
// Returns -1 for failure.
uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
                                   bool has_named_captures) {
  const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
  const uint32_t kAdditionalArgsWithNamedCaptures = 3;
  if (num_captures > Code::kMaxArguments) return -1;
  uint32_t argc = has_named_captures
                      ? num_captures + kAdditionalArgsWithNamedCaptures
                      : num_captures + kAdditionalArgsWithoutNamedCaptures;
  STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
                                          kAdditionalArgsWithNamedCaptures);
  return (argc > Code::kMaxArguments) ? -1 : argc;
}

40 41
// Looks up the capture of the given name. Returns the (1-based) numbered
// capture index or -1 on failure.
42
int LookupNamedCapture(const std::function<bool(String)>& name_matches,
43
                       FixedArray capture_name_map) {
44 45 46 47 48 49 50 51 52 53 54
  // TODO(jgruber): Sort capture_name_map and do binary search via
  // internalized strings.

  int maybe_capture_index = -1;
  const int named_capture_count = capture_name_map->length() >> 1;
  for (int j = 0; j < named_capture_count; j++) {
    // The format of {capture_name_map} is documented at
    // JSRegExp::kIrregexpCaptureNameMapIndex.
    const int name_ix = j * 2;
    const int index_ix = j * 2 + 1;

55
    String capture_name = String::cast(capture_name_map->get(name_ix));
56 57
    if (!name_matches(capture_name)) continue;

jgruber's avatar
jgruber committed
58
    maybe_capture_index = Smi::ToInt(capture_name_map->get(index_ix));
59 60 61 62 63 64 65 66
    break;
  }

  return maybe_capture_index;
}

}  // namespace

67 68 69
class CompiledReplacement {
 public:
  explicit CompiledReplacement(Zone* zone)
70
      : parts_(zone), replacement_substrings_(zone) {}
71

72
  // Return whether the replacement is simple.
73 74 75
  bool Compile(Isolate* isolate, Handle<JSRegExp> regexp,
               Handle<String> replacement, int capture_count,
               int subject_length);
76 77 78 79 80 81

  // Use Apply only if Compile returned false.
  void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
             int32_t* match);

  // Number of distinct parts of the replacement pattern.
82
  int parts() { return static_cast<int>(parts_.size()); }
83 84 85 86 87 88 89 90

 private:
  enum PartType {
    SUBJECT_PREFIX = 1,
    SUBJECT_SUFFIX,
    SUBJECT_CAPTURE,
    REPLACEMENT_SUBSTRING,
    REPLACEMENT_STRING,
91
    EMPTY_REPLACEMENT,
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
    NUMBER_OF_PART_TYPES
  };

  struct ReplacementPart {
    static inline ReplacementPart SubjectMatch() {
      return ReplacementPart(SUBJECT_CAPTURE, 0);
    }
    static inline ReplacementPart SubjectCapture(int capture_index) {
      return ReplacementPart(SUBJECT_CAPTURE, capture_index);
    }
    static inline ReplacementPart SubjectPrefix() {
      return ReplacementPart(SUBJECT_PREFIX, 0);
    }
    static inline ReplacementPart SubjectSuffix(int subject_length) {
      return ReplacementPart(SUBJECT_SUFFIX, subject_length);
    }
    static inline ReplacementPart ReplacementString() {
      return ReplacementPart(REPLACEMENT_STRING, 0);
    }
111 112 113
    static inline ReplacementPart EmptyReplacement() {
      return ReplacementPart(EMPTY_REPLACEMENT, 0);
    }
114
    static inline ReplacementPart ReplacementSubString(int from, int to) {
115 116
      DCHECK_LE(0, from);
      DCHECK_GT(to, from);
117 118 119 120 121 122 123 124 125 126 127 128 129 130
      return ReplacementPart(-from, to);
    }

    // If tag <= 0 then it is the negation of a start index of a substring of
    // the replacement pattern, otherwise it's a value from PartType.
    ReplacementPart(int tag, int data) : tag(tag), data(data) {
      // Must be non-positive or a PartType value.
      DCHECK(tag < NUMBER_OF_PART_TYPES);
    }
    // Either a value of PartType or a non-positive number that is
    // the negation of an index into the replacement string.
    int tag;
    // The data value's interpretation depends on the value of tag:
    // tag == SUBJECT_PREFIX ||
131
    // tag == SUBJECT_SUFFIX:  data is unused.
132 133 134 135
    // tag == SUBJECT_CAPTURE: data is the number of the capture.
    // tag == REPLACEMENT_SUBSTRING ||
    // tag == REPLACEMENT_STRING:    data is index into array of substrings
    //                               of the replacement string.
136
    // tag == EMPTY_REPLACEMENT: data is unused.
137 138 139 140 141 142 143 144
    // tag <= 0: Temporary representation of the substring of the replacement
    //           string ranging over -tag .. data.
    //           Is replaced by REPLACEMENT_{SUB,}STRING when we create the
    //           substring objects.
    int data;
  };

  template <typename Char>
145
  bool ParseReplacementPattern(ZoneChunkList<ReplacementPart>* parts,
146
                               Vector<Char> characters,
147
                               FixedArray capture_name_map, int capture_count,
148
                               int subject_length) {
149 150 151
    // Equivalent to String::GetSubstitution, except that this method converts
    // the replacement string into an internal representation that avoids
    // repeated parsing when used repeatedly.
152 153 154 155 156 157 158 159 160 161 162 163 164 165
    int length = characters.length();
    int last = 0;
    for (int i = 0; i < length; i++) {
      Char c = characters[i];
      if (c == '$') {
        int next_index = i + 1;
        if (next_index == length) {  // No next character!
          break;
        }
        Char c2 = characters[next_index];
        switch (c2) {
          case '$':
            if (i > last) {
              // There is a substring before. Include the first "$".
166 167
              parts->push_back(
                  ReplacementPart::ReplacementSubString(last, next_index));
168 169 170 171 172 173 174 175 176
              last = next_index + 1;  // Continue after the second "$".
            } else {
              // Let the next substring start with the second "$".
              last = next_index;
            }
            i = next_index;
            break;
          case '`':
            if (i > last) {
177
              parts->push_back(ReplacementPart::ReplacementSubString(last, i));
178
            }
179
            parts->push_back(ReplacementPart::SubjectPrefix());
180 181 182 183 184
            i = next_index;
            last = i + 1;
            break;
          case '\'':
            if (i > last) {
185
              parts->push_back(ReplacementPart::ReplacementSubString(last, i));
186
            }
187
            parts->push_back(ReplacementPart::SubjectSuffix(subject_length));
188 189 190 191 192
            i = next_index;
            last = i + 1;
            break;
          case '&':
            if (i > last) {
193
              parts->push_back(ReplacementPart::ReplacementSubString(last, i));
194
            }
195
            parts->push_back(ReplacementPart::SubjectMatch());
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
            i = next_index;
            last = i + 1;
            break;
          case '0':
          case '1':
          case '2':
          case '3':
          case '4':
          case '5':
          case '6':
          case '7':
          case '8':
          case '9': {
            int capture_ref = c2 - '0';
            if (capture_ref > capture_count) {
              i = next_index;
              continue;
            }
            int second_digit_index = next_index + 1;
            if (second_digit_index < length) {
              // Peek ahead to see if we have two digits.
              Char c3 = characters[second_digit_index];
              if ('0' <= c3 && c3 <= '9') {  // Double digits.
                int double_digit_ref = capture_ref * 10 + c3 - '0';
                if (double_digit_ref <= capture_count) {
                  next_index = second_digit_index;
                  capture_ref = double_digit_ref;
                }
              }
            }
            if (capture_ref > 0) {
              if (i > last) {
228 229
                parts->push_back(
                    ReplacementPart::ReplacementSubString(last, i));
230 231
              }
              DCHECK(capture_ref <= capture_count);
232
              parts->push_back(ReplacementPart::SubjectCapture(capture_ref));
233 234 235 236 237
              last = next_index + 1;
            }
            i = next_index;
            break;
          }
238
          case '<': {
239
            if (capture_name_map.is_null()) {
240 241 242 243
              i = next_index;
              break;
            }

244 245
            // Scan until the next '>', and let the enclosed substring be the
            // groupName.
246 247 248 249 250 251 252 253 254 255

            const int name_start_index = next_index + 1;
            int closing_bracket_index = -1;
            for (int j = name_start_index; j < length; j++) {
              if (characters[j] == '>') {
                closing_bracket_index = j;
                break;
              }
            }

256 257 258 259 260 261
            // If no closing bracket is found, '$<' is treated as a string
            // literal.
            if (closing_bracket_index == -1) {
              i = next_index;
              break;
            }
262 263 264 265 266 267

            Vector<Char> requested_name =
                characters.SubVector(name_start_index, closing_bracket_index);

            // Let capture be ? Get(namedCaptures, groupName).

268
            const int capture_index = LookupNamedCapture(
269
                [=](String capture_name) {
270 271 272 273
                  return capture_name->IsEqualTo(requested_name);
                },
                capture_name_map);

274 275
            // If capture is undefined or does not exist, replace the text
            // through the following '>' with the empty string.
276 277 278
            // Otherwise, replace the text through the following '>' with
            // ? ToString(capture).

279 280
            DCHECK(capture_index == -1 ||
                   (1 <= capture_index && capture_index <= capture_count));
281 282

            if (i > last) {
283
              parts->push_back(ReplacementPart::ReplacementSubString(last, i));
284
            }
285 286 287 288
            parts->push_back(
                (capture_index == -1)
                    ? ReplacementPart::EmptyReplacement()
                    : ReplacementPart::SubjectCapture(capture_index));
289 290 291 292
            last = closing_bracket_index + 1;
            i = closing_bracket_index;
            break;
          }
293 294 295 296 297 298 299 300 301
          default:
            i = next_index;
            break;
        }
      }
    }
    if (length > last) {
      if (last == 0) {
        // Replacement is simple.  Do not use Apply to do the replacement.
302
        return true;
303
      } else {
304
        parts->push_back(ReplacementPart::ReplacementSubString(last, length));
305 306
      }
    }
307
    return false;
308 309
  }

310 311
  ZoneChunkList<ReplacementPart> parts_;
  ZoneVector<Handle<String>> replacement_substrings_;
312 313
};

314
bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
315 316
                                  Handle<String> replacement, int capture_count,
                                  int subject_length) {
317 318
  {
    DisallowHeapAllocation no_gc;
319
    String::FlatContent content = replacement->GetFlatContent(no_gc);
320
    DCHECK(content.IsFlat());
321

322
    FixedArray capture_name_map;
323 324
    if (capture_count > 0) {
      DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
325
      Object maybe_capture_name_map = regexp->CaptureNameMap();
326 327 328 329 330
      if (maybe_capture_name_map->IsFixedArray()) {
        capture_name_map = FixedArray::cast(maybe_capture_name_map);
      }
    }

331
    bool simple;
332 333
    if (content.IsOneByte()) {
      simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
334
                                       capture_name_map, capture_count,
335
                                       subject_length);
336 337 338
    } else {
      DCHECK(content.IsTwoByte());
      simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
339
                                       capture_name_map, capture_count,
340
                                       subject_length);
341
    }
342
    if (simple) return true;
343 344 345 346
  }

  // Find substrings of replacement string and create them as String objects.
  int substring_index = 0;
347 348
  for (ReplacementPart& part : parts_) {
    int tag = part.tag;
349 350
    if (tag <= 0) {  // A replacement string slice.
      int from = -tag;
351 352 353 354 355
      int to = part.data;
      replacement_substrings_.push_back(
          isolate->factory()->NewSubString(replacement, from, to));
      part.tag = REPLACEMENT_SUBSTRING;
      part.data = substring_index;
356 357
      substring_index++;
    } else if (tag == REPLACEMENT_STRING) {
358 359
      replacement_substrings_.push_back(replacement);
      part.data = substring_index;
360 361 362
      substring_index++;
    }
  }
363
  return false;
364 365 366 367 368
}


void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
                                int match_from, int match_to, int32_t* match) {
369 370
  DCHECK_LT(0, parts_.size());
  for (ReplacementPart& part : parts_) {
371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
    switch (part.tag) {
      case SUBJECT_PREFIX:
        if (match_from > 0) builder->AddSubjectSlice(0, match_from);
        break;
      case SUBJECT_SUFFIX: {
        int subject_length = part.data;
        if (match_to < subject_length) {
          builder->AddSubjectSlice(match_to, subject_length);
        }
        break;
      }
      case SUBJECT_CAPTURE: {
        int capture = part.data;
        int from = match[capture * 2];
        int to = match[capture * 2 + 1];
        if (from >= 0 && to > from) {
          builder->AddSubjectSlice(from, to);
        }
        break;
      }
      case REPLACEMENT_SUBSTRING:
      case REPLACEMENT_STRING:
        builder->AddString(replacement_substrings_[part.data]);
        break;
395 396
      case EMPTY_REPLACEMENT:
        break;
397 398 399 400 401 402
      default:
        UNREACHABLE();
    }
  }
}

403
void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
404
                              std::vector<int>* indices, unsigned int limit) {
405
  DCHECK_LT(0, limit);
406 407
  // Collect indices of pattern in subject using memchr.
  // Stop after finding at most limit values.
408
  const uint8_t* subject_start = subject.begin();
409 410 411 412 413
  const uint8_t* subject_end = subject_start + subject.length();
  const uint8_t* pos = subject_start;
  while (limit > 0) {
    pos = reinterpret_cast<const uint8_t*>(
        memchr(pos, pattern, subject_end - pos));
414
    if (pos == nullptr) return;
415
    indices->push_back(static_cast<int>(pos - subject_start));
416 417 418 419 420 421
    pos++;
    limit--;
  }
}

void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
422
                              std::vector<int>* indices, unsigned int limit) {
423
  DCHECK_LT(0, limit);
424
  const uc16* subject_start = subject.begin();
425 426 427
  const uc16* subject_end = subject_start + subject.length();
  for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
    if (*pos == pattern) {
428
      indices->push_back(static_cast<int>(pos - subject_start));
429 430 431 432 433 434 435
      limit--;
    }
  }
}

template <typename SubjectChar, typename PatternChar>
void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
436 437
                       Vector<const PatternChar> pattern,
                       std::vector<int>* indices, unsigned int limit) {
438
  DCHECK_LT(0, limit);
439 440 441 442 443 444 445 446
  // Collect indices of pattern in subject.
  // Stop after finding at most limit values.
  int pattern_length = pattern.length();
  int index = 0;
  StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
  while (limit > 0) {
    index = search.Search(subject, index);
    if (index < 0) return;
447
    indices->push_back(index);
448 449 450 451 452
    index += pattern_length;
    limit--;
  }
}

453 454
void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern,
                               std::vector<int>* indices, unsigned int limit) {
455 456
  {
    DisallowHeapAllocation no_gc;
457 458
    String::FlatContent subject_content = subject->GetFlatContent(no_gc);
    String::FlatContent pattern_content = pattern->GetFlatContent(no_gc);
459 460 461 462 463 464 465 466 467
    DCHECK(subject_content.IsFlat());
    DCHECK(pattern_content.IsFlat());
    if (subject_content.IsOneByte()) {
      Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
      if (pattern_content.IsOneByte()) {
        Vector<const uint8_t> pattern_vector =
            pattern_content.ToOneByteVector();
        if (pattern_vector.length() == 1) {
          FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
468
                                   limit);
469 470
        } else {
          FindStringIndices(isolate, subject_vector, pattern_vector, indices,
471
                            limit);
472 473 474
        }
      } else {
        FindStringIndices(isolate, subject_vector,
475
                          pattern_content.ToUC16Vector(), indices, limit);
476 477 478 479 480 481 482 483
      }
    } else {
      Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
      if (pattern_content.IsOneByte()) {
        Vector<const uint8_t> pattern_vector =
            pattern_content.ToOneByteVector();
        if (pattern_vector.length() == 1) {
          FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
484
                                   limit);
485 486
        } else {
          FindStringIndices(isolate, subject_vector, pattern_vector, indices,
487
                            limit);
488 489 490 491 492
        }
      } else {
        Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
        if (pattern_vector.length() == 1) {
          FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
493
                                   limit);
494 495
        } else {
          FindStringIndices(isolate, subject_vector, pattern_vector, indices,
496
                            limit);
497 498 499 500 501 502
        }
      }
    }
  }
}

503
namespace {
504 505 506
std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
  std::vector<int>* list = isolate->regexp_indices();
  list->clear();
507 508 509
  return list;
}

heimbuef's avatar
heimbuef committed
510 511 512
void TruncateRegexpIndicesList(Isolate* isolate) {
  // Same size as smallest zone segment, preserving behavior from the
  // runtime zone.
513
  static const int kMaxRegexpIndicesListCapacity = 8 * KB;
514 515 516 517 518
  std::vector<int>* indicies = isolate->regexp_indices();
  if (indicies->capacity() > kMaxRegexpIndicesListCapacity) {
    // Throw away backing storage.
    indicies->clear();
    indicies->shrink_to_fit();
519 520 521 522
  }
}
}  // namespace

523
template <typename ResultSeqString>
524
V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString(
525
    Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
526
    Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
527 528 529
  DCHECK(subject->IsFlat());
  DCHECK(replacement->IsFlat());

530
  std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
531

532
  DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
533
  String pattern =
534 535 536 537 538
      String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
  int subject_len = subject->length();
  int pattern_len = pattern->length();
  int replacement_len = replacement->length();

539
  FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
540

541
  if (indices->empty()) return *subject;
542 543 544 545

  // Detect integer overflow.
  int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
                           static_cast<int64_t>(pattern_len)) *
546
                              static_cast<int64_t>(indices->size()) +
547 548 549 550 551 552 553 554
                          static_cast<int64_t>(subject_len);
  int result_len;
  if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
    STATIC_ASSERT(String::kMaxLength < kMaxInt);
    result_len = kMaxInt;  // Provoke exception.
  } else {
    result_len = static_cast<int>(result_len_64);
  }
555
  if (result_len == 0) {
556
    return ReadOnlyRoots(isolate).empty_string();
557
  }
558 559 560 561 562 563 564 565 566 567 568 569 570 571

  int subject_pos = 0;
  int result_pos = 0;

  MaybeHandle<SeqString> maybe_res;
  if (ResultSeqString::kHasOneByteEncoding) {
    maybe_res = isolate->factory()->NewRawOneByteString(result_len);
  } else {
    maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
  }
  Handle<SeqString> untyped_res;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
  Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);

572
  DisallowHeapAllocation no_gc;
573
  for (int index : *indices) {
574
    // Copy non-matched subject content.
575
    if (subject_pos < index) {
576
      String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
577 578
                          subject_pos, index);
      result_pos += index - subject_pos;
579 580 581 582
    }

    // Replace match.
    if (replacement_len > 0) {
583
      String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0,
584 585 586 587
                          replacement_len);
      result_pos += replacement_len;
    }

588
    subject_pos = index + pattern_len;
589 590 591
  }
  // Add remaining subject content at the end.
  if (subject_pos < subject_len) {
592 593
    String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
                        subject_pos, subject_len);
594 595
  }

596
  int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
597 598
  RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, 0,
                               match_indices);
599

600 601
  TruncateRegexpIndicesList(isolate);

602 603 604
  return *result;
}

605
V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
606
    Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
607
    Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
608 609 610 611 612 613
  DCHECK(subject->IsFlat());
  DCHECK(replacement->IsFlat());

  int capture_count = regexp->CaptureCount();
  int subject_length = subject->length();

614 615 616
  JSRegExp::Type typeTag = regexp->TypeTag();
  if (typeTag == JSRegExp::IRREGEXP) {
    // Ensure the RegExp is compiled so we can access the capture-name map.
617
    if (RegExpImpl::IrregexpPrepare(isolate, regexp, subject) == -1) {
618
      DCHECK(isolate->has_pending_exception());
619
      return ReadOnlyRoots(isolate).exception();
620
    }
621 622
  }

623
  // CompiledReplacement uses zone allocation.
624
  Zone zone(isolate->allocator(), ZONE_NAME);
625
  CompiledReplacement compiled_replacement(&zone);
626
  const bool simple_replace = compiled_replacement.Compile(
627
      isolate, regexp, replacement, capture_count, subject_length);
628 629

  // Shortcut for simple non-regexp global replacements
630
  if (typeTag == JSRegExp::ATOM && simple_replace) {
631 632
    if (subject->IsOneByteRepresentation() &&
        replacement->IsOneByteRepresentation()) {
633 634 635 636 637 638 639 640
      return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
          isolate, subject, regexp, replacement, last_match_info);
    } else {
      return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
          isolate, subject, regexp, replacement, last_match_info);
    }
  }

641
  RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
642
  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
643 644

  int32_t* current_match = global_cache.FetchNext();
645
  if (current_match == nullptr) {
646
    if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673
    return *subject;
  }

  // Guessing the number of parts that the final result string is built
  // from. Global regexps can match any number of times, so we guess
  // conservatively.
  int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
  ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);

  int prev = 0;

  do {
    int start = current_match[0];
    int end = current_match[1];

    if (prev < start) {
      builder.AddSubjectSlice(prev, start);
    }

    if (simple_replace) {
      builder.AddString(replacement);
    } else {
      compiled_replacement.Apply(&builder, start, end, current_match);
    }
    prev = end;

    current_match = global_cache.FetchNext();
674
  } while (current_match != nullptr);
675

676
  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
677 678 679 680 681

  if (prev < subject_length) {
    builder.AddSubjectSlice(prev, subject_length);
  }

682
  RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
683 684
                               global_cache.LastSuccessfulMatch());

685
  RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
686 687 688
}

template <typename ResultSeqString>
689
V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
690
    Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
691
    Handle<RegExpMatchInfo> last_match_info) {
692 693 694 695 696 697 698 699 700 701 702 703 704 705
  DCHECK(subject->IsFlat());

  // Shortcut for simple non-regexp global replacements
  if (regexp->TypeTag() == JSRegExp::ATOM) {
    Handle<String> empty_string = isolate->factory()->empty_string();
    if (subject->IsOneByteRepresentation()) {
      return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
          isolate, subject, regexp, empty_string, last_match_info);
    } else {
      return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
          isolate, subject, regexp, empty_string, last_match_info);
    }
  }

706
  RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
707
  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
708 709

  int32_t* current_match = global_cache.FetchNext();
710
  if (current_match == nullptr) {
711
    if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
712 713 714 715 716 717 718 719 720
    return *subject;
  }

  int start = current_match[0];
  int end = current_match[1];
  int capture_count = regexp->CaptureCount();
  int subject_length = subject->length();

  int new_length = subject_length - (end - start);
721
  if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
722 723 724 725 726 727 728 729 730 731 732 733 734

  Handle<ResultSeqString> answer;
  if (ResultSeqString::kHasOneByteEncoding) {
    answer = Handle<ResultSeqString>::cast(
        isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
  } else {
    answer = Handle<ResultSeqString>::cast(
        isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
  }

  int prev = 0;
  int position = 0;

735
  DisallowHeapAllocation no_gc;
736 737 738 739 740
  do {
    start = current_match[0];
    end = current_match[1];
    if (prev < start) {
      // Add substring subject[prev;start] to answer string.
741 742
      String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
                          start);
743 744 745 746 747
      position += start - prev;
    }
    prev = end;

    current_match = global_cache.FetchNext();
748
  } while (current_match != nullptr);
749

750
  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
751

752
  RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
753 754 755 756
                               global_cache.LastSuccessfulMatch());

  if (prev < subject_length) {
    // Add substring subject[prev;length] to answer string.
757
    String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
758 759 760 761
                        subject_length);
    position += subject_length - prev;
  }

762
  if (position == 0) return ReadOnlyRoots(isolate).empty_string();
763 764 765 766 767 768 769 770 771 772 773 774 775

  // Shorten string and fill
  int string_size = ResultSeqString::SizeFor(position);
  int allocated_string_size = ResultSeqString::SizeFor(new_length);
  int delta = allocated_string_size - string_size;

  answer->set_length(position);
  if (delta == 0) return *answer;

  Address end_of_string = answer->address() + string_size;
  Heap* heap = isolate->heap();

  // The trimming is performed on a newly allocated object, which is on a
776
  // freshly allocated page or on an already swept page. Hence, the sweeper
777 778
  // thread can not get confused with the filler creation. No synchronization
  // needed.
779 780
  // TODO(hpayer): We should shrink the large object page if the size
  // of the object changed significantly.
781
  if (!heap->IsLargeObject(*answer)) {
782
    heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
783
  }
784 785 786 787 788
  return *answer;
}

RUNTIME_FUNCTION(Runtime_StringSplit) {
  HandleScope handle_scope(isolate);
789
  DCHECK_EQ(3, args.length());
790 791 792
  CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
  CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
  CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
793
  CHECK_LT(0, limit);
794 795 796

  int subject_length = subject->length();
  int pattern_length = pattern->length();
797
  CHECK_LT(0, pattern_length);
798

799
  if (limit == 0xFFFFFFFFu) {
800
    FixedArray last_match_cache_unused;
801 802
    Handle<Object> cached_answer(
        RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
803
                                   &last_match_cache_unused,
804 805
                                   RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
        isolate);
806
    if (*cached_answer != Smi::kZero) {
807 808 809 810 811 812 813
      // The cache FixedArray is a COW-array and can therefore be reused.
      Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
          Handle<FixedArray>::cast(cached_answer));
      return *result;
    }
  }

814
  // The limit can be very large (0xFFFFFFFFu), but since the pattern
815 816 817
  // isn't empty, we can never create more parts than ~half the length
  // of the subject.

818 819
  subject = String::Flatten(isolate, subject);
  pattern = String::Flatten(isolate, pattern);
820

821
  std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
822

823
  FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
824

825 826
  if (static_cast<uint32_t>(indices->size()) < limit) {
    indices->push_back(subject_length);
827 828 829 830 831
  }

  // The list indices now contains the end of each part to create.

  // Create JSArray of substrings separated by separator.
832
  int part_count = static_cast<int>(indices->size());
833

834
  Handle<JSArray> result =
835
      isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
836
                                     INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
837

838
  DCHECK(result->HasObjectElements());
839

840
  Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate);
841

842
  if (part_count == 1 && indices->at(0) == subject_length) {
843 844 845
    elements->set(0, *subject);
  } else {
    int part_start = 0;
846
    FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
847
      int part_end = indices->at(i);
848 849 850 851
      Handle<String> substring =
          isolate->factory()->NewProperSubString(subject, part_start, part_end);
      elements->set(i, *substring);
      part_start = part_end + pattern_length;
852
    });
853 854
  }

855
  if (limit == 0xFFFFFFFFu) {
856
    if (result->HasObjectElements()) {
857
      RegExpResultsCache::Enter(isolate, subject, pattern, elements,
858
                                isolate->factory()->empty_fixed_array(),
859 860 861 862
                                RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
    }
  }

863 864
  TruncateRegexpIndicesList(isolate);

865 866 867
  return *result;
}

868
RUNTIME_FUNCTION(Runtime_RegExpExec) {
869
  HandleScope scope(isolate);
870
  DCHECK_EQ(4, args.length());
871 872 873
  CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
  CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
  CONVERT_INT32_ARG_CHECKED(index, 2);
874
  CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
875 876
  // Due to the way the JS calls are constructed this must be less than the
  // length of a string, i.e. it is always a Smi.  We check anyway for security.
877 878
  CHECK_LE(0, index);
  CHECK_GE(subject->length(), index);
879
  isolate->counters()->regexp_entry_runtime()->Increment();
880 881
  RETURN_RESULT_OR_FAILURE(isolate, RegExpImpl::Exec(isolate, regexp, subject,
                                                     index, last_match_info));
882 883
}

884 885 886 887
namespace {

class MatchInfoBackedMatch : public String::Match {
 public:
888 889
  MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
                       Handle<String> subject,
890
                       Handle<RegExpMatchInfo> match_info)
891
      : isolate_(isolate), match_info_(match_info) {
892
    subject_ = String::Flatten(isolate, subject);
893 894

    if (regexp->TypeTag() == JSRegExp::IRREGEXP) {
895
      Object o = regexp->CaptureNameMap();
896 897
      has_named_captures_ = o->IsFixedArray();
      if (has_named_captures_) {
898
        capture_name_map_ = handle(FixedArray::cast(o), isolate);
899 900 901 902
      }
    } else {
      has_named_captures_ = false;
    }
903 904 905 906 907 908 909
  }

  Handle<String> GetMatch() override {
    return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
  }

  Handle<String> GetPrefix() override {
910
    const int match_start = match_info_->Capture(0);
911 912 913 914
    return isolate_->factory()->NewSubString(subject_, 0, match_start);
  }

  Handle<String> GetSuffix() override {
915
    const int match_end = match_info_->Capture(1);
916 917 918 919
    return isolate_->factory()->NewSubString(subject_, match_end,
                                             subject_->length());
  }

920 921
  bool HasNamedCaptures() override { return has_named_captures_; }

922
  int CaptureCount() override {
923
    return match_info_->NumberOfCaptureRegisters() / 2;
924 925
  }

926 927 928 929 930 931 932 933
  MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
    Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
        isolate_, match_info_, i, capture_exists);
    return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
                             : isolate_->factory()->empty_string();
  }

  MaybeHandle<String> GetNamedCapture(Handle<String> name,
934
                                      CaptureState* state) override {
935 936
    DCHECK(has_named_captures_);
    const int capture_index = LookupNamedCapture(
937
        [=](String capture_name) { return capture_name->Equals(*name); },
938 939 940
        *capture_name_map_);

    if (capture_index == -1) {
941
      *state = INVALID;
942 943 944 945
      return name;  // Arbitrary string handle.
    }

    DCHECK(1 <= capture_index && capture_index <= CaptureCount());
946 947 948 949 950 951 952 953 954 955 956 957 958 959

    bool capture_exists;
    Handle<String> capture_value;
    ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
                               GetCapture(capture_index, &capture_exists),
                               String);

    if (!capture_exists) {
      *state = UNMATCHED;
      return isolate_->factory()->empty_string();
    } else {
      *state = MATCHED;
      return capture_value;
    }
960
  }
961

962 963 964
 private:
  Isolate* isolate_;
  Handle<String> subject_;
965
  Handle<RegExpMatchInfo> match_info_;
966 967 968

  bool has_named_captures_;
  Handle<FixedArray> capture_name_map_;
969 970 971 972 973 974
};

class VectorBackedMatch : public String::Match {
 public:
  VectorBackedMatch(Isolate* isolate, Handle<String> subject,
                    Handle<String> match, int match_position,
975
                    ZoneVector<Handle<Object>>* captures,
976
                    Handle<Object> groups_obj)
977 978 979 980
      : isolate_(isolate),
        match_(match),
        match_position_(match_position),
        captures_(captures) {
981
    subject_ = String::Flatten(isolate, subject);
982 983 984 985

    DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
    has_named_captures_ = !groups_obj->IsUndefined(isolate);
    if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
986 987 988 989 990 991 992 993 994 995 996 997 998 999
  }

  Handle<String> GetMatch() override { return match_; }

  Handle<String> GetPrefix() override {
    return isolate_->factory()->NewSubString(subject_, 0, match_position_);
  }

  Handle<String> GetSuffix() override {
    const int match_end_position = match_position_ + match_->length();
    return isolate_->factory()->NewSubString(subject_, match_end_position,
                                             subject_->length());
  }

1000 1001
  bool HasNamedCaptures() override { return has_named_captures_; }

1002 1003
  int CaptureCount() override { return static_cast<int>(captures_->size()); }

1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
  MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
    Handle<Object> capture_obj = captures_->at(i);
    if (capture_obj->IsUndefined(isolate_)) {
      *capture_exists = false;
      return isolate_->factory()->empty_string();
    }
    *capture_exists = true;
    return Object::ToString(isolate_, capture_obj);
  }

  MaybeHandle<String> GetNamedCapture(Handle<String> name,
1015
                                      CaptureState* state) override {
1016
    DCHECK(has_named_captures_);
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026

    Maybe<bool> maybe_capture_exists =
        JSReceiver::HasProperty(groups_obj_, name);
    if (maybe_capture_exists.IsNothing()) return MaybeHandle<String>();

    if (!maybe_capture_exists.FromJust()) {
      *state = INVALID;
      return name;  // Arbitrary string handle.
    }

1027 1028
    Handle<Object> capture_obj;
    ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1029 1030
                               Object::GetProperty(isolate_, groups_obj_, name),
                               String);
1031
    if (capture_obj->IsUndefined(isolate_)) {
1032 1033
      *state = UNMATCHED;
      return isolate_->factory()->empty_string();
1034
    } else {
1035
      *state = MATCHED;
1036 1037 1038
      return Object::ToString(isolate_, capture_obj);
    }
  }
1039 1040 1041 1042 1043 1044

 private:
  Isolate* isolate_;
  Handle<String> subject_;
  Handle<String> match_;
  const int match_position_;
1045
  ZoneVector<Handle<Object>>* captures_;
1046 1047 1048

  bool has_named_captures_;
  Handle<JSReceiver> groups_obj_;
1049 1050
};

1051 1052 1053 1054
// Create the groups object (see also the RegExp result creation in
// RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
Handle<JSObject> ConstructNamedCaptureGroupsObject(
    Isolate* isolate, Handle<FixedArray> capture_map,
1055
    const std::function<Object(int)>& f_get_capture) {
1056 1057 1058 1059 1060 1061 1062
  Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();

  const int capture_count = capture_map->length() >> 1;
  for (int i = 0; i < capture_count; i++) {
    const int name_ix = i * 2;
    const int index_ix = i * 2 + 1;

1063 1064
    Handle<String> capture_name(String::cast(capture_map->get(name_ix)),
                                isolate);
jgruber's avatar
jgruber committed
1065
    const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1066 1067 1068
    DCHECK(1 <= capture_ix && capture_ix <= capture_count);

    Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1069
    DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1070

1071
    JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE);
1072 1073 1074 1075 1076
  }

  return groups;
}

1077
// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1078 1079
// separate last match info.  See comment on that function.
template <bool has_capture>
1080 1081 1082 1083
static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
                                   Handle<JSRegExp> regexp,
                                   Handle<RegExpMatchInfo> last_match_array,
                                   Handle<JSArray> result_array) {
1084
  DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1085
  DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
1086
  DCHECK(subject->IsFlat());
1087 1088 1089 1090 1091 1092 1093

  int capture_count = regexp->CaptureCount();
  int subject_length = subject->length();

  static const int kMinLengthToCache = 0x1000;

  if (subject_length > kMinLengthToCache) {
1094
    FixedArray last_match_cache;
1095
    Object cached_answer = RegExpResultsCache::Lookup(
1096 1097 1098 1099 1100 1101
        isolate->heap(), *subject, regexp->data(), &last_match_cache,
        RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
    if (cached_answer->IsFixedArray()) {
      int capture_registers = (capture_count + 1) * 2;
      int32_t* last_match = NewArray<int32_t>(capture_registers);
      for (int i = 0; i < capture_registers; i++) {
jgruber's avatar
jgruber committed
1102
        last_match[i] = Smi::ToInt(last_match_cache->get(i));
1103
      }
1104
      Handle<FixedArray> cached_fixed_array =
1105
          Handle<FixedArray>(FixedArray::cast(cached_answer), isolate);
1106 1107 1108 1109 1110
      // The cache FixedArray is a COW-array and we need to return a copy.
      Handle<FixedArray> copied_fixed_array =
          isolate->factory()->CopyFixedArrayWithMap(
              cached_fixed_array, isolate->factory()->fixed_array_map());
      JSArray::SetContent(result_array, copied_fixed_array);
1111 1112
      RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
                                   capture_count, last_match);
1113
      DeleteArray(last_match);
1114
      return *result_array;
1115 1116 1117
    }
  }

1118
  RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
1119
  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1120 1121

  // Ensured in Runtime_RegExpExecMultiple.
1122
  DCHECK(result_array->HasObjectElements());
1123 1124
  Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()),
                                     isolate);
1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140
  if (result_elements->length() < 16) {
    result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
  }

  FixedArrayBuilder builder(result_elements);

  // Position to search from.
  int match_start = -1;
  int match_end = 0;
  bool first = true;

  // Two smis before and after the match, for very long strings.
  static const int kMaxBuilderEntriesPerRegExpMatch = 5;

  while (true) {
    int32_t* current_match = global_cache.FetchNext();
1141
    if (current_match == nullptr) break;
1142
    match_start = current_match[0];
1143
    builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163
    if (match_end < match_start) {
      ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
                                                match_start);
    }
    match_end = current_match[1];
    {
      // Avoid accumulating new handles inside loop.
      HandleScope temp_scope(isolate);
      Handle<String> match;
      if (!first) {
        match = isolate->factory()->NewProperSubString(subject, match_start,
                                                       match_end);
      } else {
        match =
            isolate->factory()->NewSubString(subject, match_start, match_end);
        first = false;
      }

      if (has_capture) {
        // Arguments array to replace function is match, captures, index and
1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174
        // subject, i.e., 3 + capture count in total. If the RegExp contains
        // named captures, they are also passed as the last argument.

        Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate);
        const bool has_named_captures = maybe_capture_map->IsFixedArray();

        const int argc =
            has_named_captures ? 4 + capture_count : 3 + capture_count;

        Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
        int cursor = 0;
1175

1176
        elements->set(cursor++, *match);
1177 1178 1179 1180 1181 1182 1183
        for (int i = 1; i <= capture_count; i++) {
          int start = current_match[i * 2];
          if (start >= 0) {
            int end = current_match[i * 2 + 1];
            DCHECK(start <= end);
            Handle<String> substring =
                isolate->factory()->NewSubString(subject, start, end);
1184
            elements->set(cursor++, *substring);
1185
          } else {
1186
            DCHECK_GT(0, current_match[i * 2 + 1]);
1187
            elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1188 1189
          }
        }
1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202

        elements->set(cursor++, Smi::FromInt(match_start));
        elements->set(cursor++, *subject);

        if (has_named_captures) {
          Handle<FixedArray> capture_map =
              Handle<FixedArray>::cast(maybe_capture_map);
          Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
              isolate, capture_map, [=](int ix) { return elements->get(ix); });
          elements->set(cursor++, *groups);
        }

        DCHECK_EQ(cursor, argc);
1203 1204 1205 1206 1207 1208 1209
        builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
      } else {
        builder.Add(*match);
      }
    }
  }

1210
  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1211 1212 1213 1214 1215 1216 1217 1218

  if (match_start >= 0) {
    // Finished matching, with at least one match.
    if (match_end < subject_length) {
      ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
                                                subject_length);
    }

1219 1220
    RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
                                 capture_count,
1221
                                 global_cache.LastSuccessfulMatch());
1222 1223

    if (subject_length > kMinLengthToCache) {
1224 1225 1226 1227 1228 1229 1230 1231 1232
      // Store the last successful match into the array for caching.
      // TODO(yangguo): do not expose last match to JS and simplify caching.
      int capture_registers = (capture_count + 1) * 2;
      Handle<FixedArray> last_match_cache =
          isolate->factory()->NewFixedArray(capture_registers);
      int32_t* last_match = global_cache.LastSuccessfulMatch();
      for (int i = 0; i < capture_registers; i++) {
        last_match_cache->set(i, Smi::FromInt(last_match[i]));
      }
1233
      Handle<FixedArray> result_fixed_array =
1234
          FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length());
1235 1236 1237 1238
      // Cache the result and copy the FixedArray into a COW array.
      Handle<FixedArray> copied_fixed_array =
          isolate->factory()->CopyFixedArrayWithMap(
              result_fixed_array, isolate->factory()->fixed_array_map());
1239
      RegExpResultsCache::Enter(
1240
          isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1241
          last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1242
    }
1243
    return *builder.ToJSArray(result_array);
1244
  } else {
1245
    return ReadOnlyRoots(isolate).null_value();  // No matches at all.
1246 1247 1248 1249 1250
  }
}

// Legacy implementation of RegExp.prototype[Symbol.replace] which
// doesn't properly call the underlying exec method.
1251 1252
V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
    Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string,
1253
    Handle<String> replace) {
1254 1255 1256
  // Functional fast-paths are dispatched directly by replace builtin.
  DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));

1257 1258 1259 1260
  Factory* factory = isolate->factory();

  const int flags = regexp->GetFlags();
  const bool global = (flags & JSRegExp::kGlobal) != 0;
1261
  const bool sticky = (flags & JSRegExp::kSticky) != 0;
1262

1263
  replace = String::Flatten(isolate, replace);
1264

1265
  Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1266

1267 1268
  if (!global) {
    // Non-global regexp search, string replace.
1269

1270 1271
    uint32_t last_index = 0;
    if (sticky) {
1272
      Handle<Object> last_index_obj(regexp->last_index(), isolate);
1273 1274 1275 1276 1277 1278
      ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
                                 Object::ToLength(isolate, last_index_obj),
                                 String);
      last_index = PositiveNumberToUint32(*last_index_obj);
    }

1279 1280 1281
    Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
                                     isolate);

1282 1283
    // A lastIndex exceeding the string length always returns null (signalling
    // failure) in RegExpBuiltinExec, thus we can skip the call.
1284 1285 1286 1287 1288 1289
    if (last_index <= static_cast<uint32_t>(string->length())) {
      ASSIGN_RETURN_ON_EXCEPTION(isolate, match_indices_obj,
                                 RegExpImpl::Exec(isolate, regexp, string,
                                                  last_index, last_match_info),
                                 String);
    }
1290

1291
    if (match_indices_obj->IsNull(isolate)) {
1292
      if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1293 1294
      return string;
    }
1295

1296
    auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1297

1298 1299
    const int start_index = match_indices->Capture(0);
    const int end_index = match_indices->Capture(1);
1300

1301
    if (sticky) {
1302
      regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1303
    }
1304

1305 1306
    IncrementalStringBuilder builder(isolate);
    builder.AppendString(factory->NewSubString(string, 0, start_index));
1307

1308
    if (replace->length() > 0) {
1309
      MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1310 1311 1312 1313 1314 1315
      Handle<String> replacement;
      ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
                                 String::GetSubstitution(isolate, &m, replace),
                                 String);
      builder.AppendString(replacement);
    }
1316

1317 1318 1319 1320 1321 1322 1323 1324
    builder.AppendString(
        factory->NewSubString(string, end_index, string->length()));
    return builder.Finish();
  } else {
    // Global regexp search, string replace.
    DCHECK(global);
    RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
                        String);
1325

1326
    if (replace->length() == 0) {
1327
      if (string->IsOneByteRepresentation()) {
1328
        Object result =
1329 1330
            StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
                isolate, string, regexp, last_match_info);
1331 1332
        return handle(String::cast(result), isolate);
      } else {
1333
        Object result =
1334 1335 1336
            StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
                isolate, string, regexp, last_match_info);
        return handle(String::cast(result), isolate);
1337 1338
      }
    }
1339

1340
    Object result = StringReplaceGlobalRegExpWithString(
1341 1342 1343
        isolate, string, regexp, replace, last_match_info);
    if (result->IsString()) {
      return handle(String::cast(result), isolate);
1344
    } else {
1345
      return MaybeHandle<String>();
1346 1347 1348 1349 1350 1351 1352 1353
    }
  }

  UNREACHABLE();
}

}  // namespace

1354 1355 1356
// This is only called for StringReplaceGlobalRegExpWithFunction.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
  HandleScope handles(isolate);
1357
  DCHECK_EQ(4, args.length());
1358

1359 1360 1361 1362
  CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
  CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
  CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
  CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1363 1364

  DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1365
  CHECK(result_array->HasObjectElements());
1366

1367
  subject = String::Flatten(isolate, subject);
1368 1369
  CHECK(regexp->GetFlags() & JSRegExp::kGlobal);

1370
  Object result;
1371
  if (regexp->CaptureCount() == 0) {
1372 1373
    result = SearchRegExpMultiple<false>(isolate, subject, regexp,
                                         last_match_info, result_array);
1374
  } else {
1375 1376
    result = SearchRegExpMultiple<true>(isolate, subject, regexp,
                                        last_match_info, result_array);
1377
  }
1378 1379
  DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
  return result;
1380 1381 1382 1383
}

RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
  HandleScope scope(isolate);
1384
  DCHECK_EQ(3, args.length());
1385 1386
  CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
  CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1387
  CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2);
1388

1389
  DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1390
  DCHECK(replace_obj->map()->is_callable());
1391

1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402
  Factory* factory = isolate->factory();
  Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();

  const int flags = regexp->GetFlags();
  DCHECK_EQ(flags & JSRegExp::kGlobal, 0);

  // TODO(jgruber): This should be an easy port to CSA with massive payback.

  const bool sticky = (flags & JSRegExp::kSticky) != 0;
  uint32_t last_index = 0;
  if (sticky) {
1403
    Handle<Object> last_index_obj(regexp->last_index(), isolate);
1404 1405 1406 1407 1408
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
    last_index = PositiveNumberToUint32(*last_index_obj);
  }

1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419
  Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
                                   isolate);

  // A lastIndex exceeding the string length always returns null (signalling
  // failure) in RegExpBuiltinExec, thus we can skip the call.
  if (last_index <= static_cast<uint32_t>(subject->length())) {
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, match_indices_obj,
        RegExpImpl::Exec(isolate, regexp, subject, last_index,
                         last_match_info));
  }
1420 1421

  if (match_indices_obj->IsNull(isolate)) {
1422
    if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1423 1424 1425 1426 1427 1428 1429 1430 1431
    return *subject;
  }

  Handle<RegExpMatchInfo> match_indices =
      Handle<RegExpMatchInfo>::cast(match_indices_obj);

  const int index = match_indices->Capture(0);
  const int end_of_match = match_indices->Capture(1);

1432
  if (sticky) {
1433
    regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1434
  }
1435 1436 1437 1438 1439

  IncrementalStringBuilder builder(isolate);
  builder.AppendString(factory->NewSubString(subject, 0, index));

  // Compute the parameter list consisting of the match, captures, index,
1440 1441 1442
  // and subject for the replace function invocation. If the RegExp contains
  // named captures, they are also passed as the last argument.

1443 1444 1445
  // The number of captures plus one for the match.
  const int m = match_indices->NumberOfCaptureRegisters() / 2;

1446 1447 1448 1449 1450 1451
  bool has_named_captures = false;
  Handle<FixedArray> capture_map;
  if (m > 1) {
    // The existence of capture groups implies IRREGEXP kind.
    DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);

1452
    Object maybe_capture_map = regexp->CaptureNameMap();
1453 1454
    if (maybe_capture_map->IsFixedArray()) {
      has_named_captures = true;
1455
      capture_map = handle(FixedArray::cast(maybe_capture_map), isolate);
1456 1457 1458
    }
  }

1459 1460 1461 1462 1463
  const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
  if (argc == static_cast<uint32_t>(-1)) {
    THROW_NEW_ERROR_RETURN_FAILURE(
        isolate, NewRangeError(MessageTemplate::kTooManyArguments));
  }
1464 1465
  ScopedVector<Handle<Object>> argv(argc);

1466
  int cursor = 0;
1467 1468 1469 1470 1471
  for (int j = 0; j < m; j++) {
    bool ok;
    Handle<String> capture =
        RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
    if (ok) {
1472
      argv[cursor++] = capture;
1473
    } else {
1474
      argv[cursor++] = factory->undefined_value();
1475 1476 1477
    }
  }

1478 1479 1480 1481 1482 1483 1484 1485 1486
  argv[cursor++] = handle(Smi::FromInt(index), isolate);
  argv[cursor++] = subject;

  if (has_named_captures) {
    argv[cursor++] = ConstructNamedCaptureGroupsObject(
        isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
  }

  DCHECK_EQ(cursor, argc);
1487 1488 1489 1490 1491

  Handle<Object> replacement_obj;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
      isolate, replacement_obj,
      Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1492
                      argv.begin()));
1493 1494 1495 1496 1497 1498 1499 1500 1501 1502

  Handle<String> replacement;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
      isolate, replacement, Object::ToString(isolate, replacement_obj));

  builder.AppendString(replacement);
  builder.AppendString(
      factory->NewSubString(subject, end_of_match, subject->length()));

  RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1503 1504
}

1505 1506
namespace {

1507 1508 1509
V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
                                                   Handle<Object> object,
                                                   uint32_t* out) {
1510 1511 1512 1513 1514 1515
  if (object->IsUndefined(isolate)) {
    *out = kMaxUInt32;
    return object;
  }

  Handle<Object> number;
1516 1517
  ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object),
                             Object);
1518 1519 1520 1521 1522 1523 1524
  *out = NumberToUint32(*number);
  return object;
}

Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
                                       Handle<FixedArray> elems,
                                       int num_elems) {
1525
  return isolate->factory()->NewJSArrayWithElements(
1526
      FixedArray::ShrinkOrEmpty(isolate, elems, num_elems));
1527 1528 1529 1530 1531 1532 1533 1534 1535
}

}  // namespace

// Slow path for:
// ES#sec-regexp.prototype-@@replace
// RegExp.prototype [ @@split ] ( string, limit )
RUNTIME_FUNCTION(Runtime_RegExpSplit) {
  HandleScope scope(isolate);
1536
  DCHECK_EQ(3, args.length());
1537 1538 1539 1540 1541 1542 1543 1544 1545 1546

  CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
  CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
  CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);

  Factory* factory = isolate->factory();

  Handle<JSFunction> regexp_fun = isolate->regexp_function();
  Handle<Object> ctor;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1547
      isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1548 1549 1550

  Handle<Object> flags_obj;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1551 1552
      isolate, flags_obj,
      JSObject::GetProperty(isolate, recv, factory->flags_string()));
1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579

  Handle<String> flags;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
                                     Object::ToString(isolate, flags_obj));

  Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
  const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);

  Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
  const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);

  Handle<String> new_flags = flags;
  if (!sticky) {
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
                                       factory->NewConsString(flags, y_str));
  }

  Handle<JSReceiver> splitter;
  {
    const int argc = 2;

    ScopedVector<Handle<Object>> argv(argc);
    argv[0] = recv;
    argv[1] = new_flags;

    Handle<Object> splitter_obj;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1580
        isolate, splitter_obj,
1581
        Execution::New(isolate, ctor, argc, argv.begin()));
1582 1583 1584 1585 1586 1587 1588

    splitter = Handle<JSReceiver>::cast(splitter_obj);
  }

  uint32_t limit;
  RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));

1589
  const uint32_t length = string->length();
1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607

  if (limit == 0) return *factory->NewJSArray(0);

  if (length == 0) {
    Handle<Object> result;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
                                                 factory->undefined_value()));

    if (!result->IsNull(isolate)) return *factory->NewJSArray(0);

    Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
    elems->set(0, *string);
    return *factory->NewJSArrayWithElements(elems);
  }

  static const int kInitialArraySize = 8;
  Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1608
  uint32_t num_elems = 0;
1609

1610 1611
  uint32_t string_index = 0;
  uint32_t prev_string_index = 0;
1612 1613 1614 1615 1616 1617 1618 1619 1620 1621
  while (string_index < length) {
    RETURN_FAILURE_ON_EXCEPTION(
        isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));

    Handle<Object> result;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
                                                 factory->undefined_value()));

    if (result->IsNull(isolate)) {
1622 1623
      string_index = static_cast<uint32_t>(
          RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1624 1625 1626 1627 1628 1629 1630 1631 1632 1633
      continue;
    }

    Handle<Object> last_index_obj;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));

    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));

1634 1635
    const uint32_t end =
        std::min(PositiveNumberToUint32(*last_index_obj), length);
1636
    if (end == prev_string_index) {
1637 1638
      string_index = static_cast<uint32_t>(
          RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1639 1640 1641 1642 1643 1644
      continue;
    }

    {
      Handle<String> substr =
          factory->NewSubString(string, prev_string_index, string_index);
1645
      elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1646
      if (num_elems == limit) {
1647 1648 1649 1650 1651 1652 1653 1654 1655
        return *NewJSArrayWithElements(isolate, elems, num_elems);
      }
    }

    prev_string_index = end;

    Handle<Object> num_captures_obj;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, num_captures_obj,
1656 1657
        Object::GetProperty(isolate, result,
                            isolate->factory()->length_string()));
1658 1659 1660

    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1661
    const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1662

1663
    for (uint32_t i = 1; i < num_captures; i++) {
1664 1665 1666
      Handle<Object> capture;
      ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
          isolate, capture, Object::GetElement(isolate, result, i));
1667
      elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1668
      if (num_elems == limit) {
1669 1670 1671 1672 1673 1674 1675 1676 1677 1678
        return *NewJSArrayWithElements(isolate, elems, num_elems);
      }
    }

    string_index = prev_string_index;
  }

  {
    Handle<String> substr =
        factory->NewSubString(string, prev_string_index, length);
1679
    elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1680 1681 1682 1683 1684
  }

  return *NewJSArrayWithElements(isolate, elems, num_elems);
}

1685 1686 1687
// Slow path for:
// ES#sec-regexp.prototype-@@replace
// RegExp.prototype [ @@replace ] ( string, replaceValue )
1688
RUNTIME_FUNCTION(Runtime_RegExpReplaceRT) {
1689
  HandleScope scope(isolate);
1690
  DCHECK_EQ(3, args.length());
1691 1692 1693

  CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
  CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1694
  Handle<Object> replace_obj = args.at(2);
1695 1696 1697

  Factory* factory = isolate->factory();

1698
  string = String::Flatten(isolate, string);
1699

1700 1701
  const bool functional_replace = replace_obj->IsCallable();

1702 1703 1704 1705 1706 1707
  Handle<String> replace;
  if (!functional_replace) {
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
                                       Object::ToString(isolate, replace_obj));
  }

1708
  // Fast-path for unmodified JSRegExps (and non-functional replace).
1709
  if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1710 1711 1712
    // We should never get here with functional replace because unmodified
    // regexp and functional replace should be fully handled in CSA code.
    CHECK(!functional_replace);
1713 1714 1715 1716 1717 1718
    Handle<Object> result;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, result,
        RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, replace));
    DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, recv));
    return *result;
1719 1720
  }

1721
  const uint32_t length = string->length();
1722 1723 1724 1725

  Handle<Object> global_obj;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
      isolate, global_obj,
1726
      JSReceiver::GetProperty(isolate, recv, factory->global_string()));
1727
  const bool global = global_obj->BooleanValue(isolate);
1728 1729 1730 1731 1732 1733

  bool unicode = false;
  if (global) {
    Handle<Object> unicode_obj;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, unicode_obj,
1734
        JSReceiver::GetProperty(isolate, recv, factory->unicode_string()));
1735
    unicode = unicode_obj->BooleanValue(isolate);
1736 1737 1738 1739 1740

    RETURN_FAILURE_ON_EXCEPTION(isolate,
                                RegExpUtils::SetLastIndex(isolate, recv, 0));
  }

1741
  Zone zone(isolate->allocator(), ZONE_NAME);
1742 1743 1744 1745 1746
  ZoneVector<Handle<Object>> results(&zone);

  while (true) {
    Handle<Object> result;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1747 1748
        isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
                                                 factory->undefined_value()));
1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770

    if (result->IsNull(isolate)) break;

    results.push_back(result);
    if (!global) break;

    Handle<Object> match_obj;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
                                       Object::GetElement(isolate, result, 0));

    Handle<String> match;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
                                       Object::ToString(isolate, match_obj));

    if (match->length() == 0) {
      RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
                                               isolate, recv, string, unicode));
    }
  }

  // TODO(jgruber): Look into ReplacementStringBuilder instead.
  IncrementalStringBuilder builder(isolate);
1771
  uint32_t next_source_position = 0;
1772 1773

  for (const auto& result : results) {
1774
    HandleScope handle_scope(isolate);
1775 1776 1777
    Handle<Object> captures_length_obj;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, captures_length_obj,
1778
        Object::GetProperty(isolate, result, factory->length_string()));
1779 1780 1781 1782

    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, captures_length_obj,
        Object::ToLength(isolate, captures_length_obj));
1783 1784
    const uint32_t captures_length =
        PositiveNumberToUint32(*captures_length_obj);
1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798

    Handle<Object> match_obj;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
                                       Object::GetElement(isolate, result, 0));

    Handle<String> match;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
                                       Object::ToString(isolate, match_obj));

    const int match_length = match->length();

    Handle<Object> position_obj;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, position_obj,
1799
        Object::GetProperty(isolate, result, factory->index_string()));
1800 1801 1802

    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, position_obj, Object::ToInteger(isolate, position_obj));
1803 1804
    const uint32_t position =
        std::min(PositiveNumberToUint32(*position_obj), length);
1805

1806 1807
    // Do not reserve capacity since captures_length is user-controlled.
    ZoneVector<Handle<Object>> captures(&zone);
1808

1809
    for (uint32_t n = 0; n < captures_length; n++) {
1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820
      Handle<Object> capture;
      ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
          isolate, capture, Object::GetElement(isolate, result, n));

      if (!capture->IsUndefined(isolate)) {
        ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
                                           Object::ToString(isolate, capture));
      }
      captures.push_back(capture);
    }

1821
    Handle<Object> groups_obj = isolate->factory()->undefined_value();
1822 1823
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, groups_obj,
1824
        Object::GetProperty(isolate, result, factory->groups_string()));
1825 1826 1827

    const bool has_named_captures = !groups_obj->IsUndefined(isolate);

1828 1829
    Handle<String> replacement;
    if (functional_replace) {
1830 1831 1832 1833 1834 1835 1836
      const uint32_t argc =
          GetArgcForReplaceCallable(captures_length, has_named_captures);
      if (argc == static_cast<uint32_t>(-1)) {
        THROW_NEW_ERROR_RETURN_FAILURE(
            isolate, NewRangeError(MessageTemplate::kTooManyArguments));
      }

1837 1838
      ScopedVector<Handle<Object>> argv(argc);

1839
      int cursor = 0;
1840
      for (uint32_t j = 0; j < captures_length; j++) {
1841
        argv[cursor++] = captures[j];
1842 1843
      }

1844 1845 1846 1847 1848
      argv[cursor++] = handle(Smi::FromInt(position), isolate);
      argv[cursor++] = string;
      if (has_named_captures) argv[cursor++] = groups_obj;

      DCHECK_EQ(cursor, argc);
1849 1850 1851 1852 1853

      Handle<Object> replacement_obj;
      ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
          isolate, replacement_obj,
          Execution::Call(isolate, replace_obj, factory->undefined_value(),
1854
                          argc, argv.begin()));
1855 1856 1857 1858

      ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
          isolate, replacement, Object::ToString(isolate, replacement_obj));
    } else {
1859
      DCHECK(!functional_replace);
1860 1861
      if (!groups_obj->IsUndefined(isolate)) {
        ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1862
            isolate, groups_obj, Object::ToObject(isolate, groups_obj));
1863 1864 1865
      }
      VectorBackedMatch m(isolate, string, match, position, &captures,
                          groups_obj);
1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885
      ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
          isolate, replacement, String::GetSubstitution(isolate, &m, replace));
    }

    if (position >= next_source_position) {
      builder.AppendString(
          factory->NewSubString(string, next_source_position, position));
      builder.AppendString(replacement);

      next_source_position = position + match_length;
    }
  }

  if (next_source_position < length) {
    builder.AppendString(
        factory->NewSubString(string, next_source_position, length));
  }

  RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
}
1886

1887 1888
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
  HandleScope scope(isolate);
1889
  DCHECK_EQ(3, args.length());
1890 1891 1892
  // TODO(pwong): To follow the spec more closely and simplify calling code,
  // this could handle the canonicalization of pattern and flags. See
  // https://tc39.github.io/ecma262/#sec-regexpinitialize
1893 1894 1895 1896 1897 1898 1899 1900 1901
  CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
  CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
  CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);

  RETURN_FAILURE_ON_EXCEPTION(isolate,
                              JSRegExp::Initialize(regexp, source, flags));

  return *regexp;
}
1902

1903
RUNTIME_FUNCTION(Runtime_IsRegExp) {
1904
  SealHandleScope shs(isolate);
1905
  DCHECK_EQ(1, args.length());
1906 1907 1908
  CONVERT_ARG_CHECKED(Object, obj, 0);
  return isolate->heap()->ToBoolean(obj->IsJSRegExp());
}
1909

1910 1911
}  // namespace internal
}  // namespace v8