// Copyright 2014 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include <functional> #include "src/common/message-template.h" #include "src/execution/arguments-inl.h" #include "src/execution/isolate-inl.h" #include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop. #include "src/logging/counters.h" #include "src/numbers/conversions-inl.h" #include "src/objects/js-array-inl.h" #include "src/objects/js-regexp-inl.h" #include "src/regexp/regexp-utils.h" #include "src/regexp/regexp.h" #include "src/runtime/runtime-utils.h" #include "src/strings/string-builder-inl.h" #include "src/strings/string-search.h" #include "src/zone/zone-chunk-list.h" namespace v8 { namespace internal { namespace { // Returns -1 for failure. uint32_t GetArgcForReplaceCallable(uint32_t num_captures, bool has_named_captures) { const uint32_t kAdditionalArgsWithoutNamedCaptures = 2; const uint32_t kAdditionalArgsWithNamedCaptures = 3; if (num_captures > Code::kMaxArguments) return -1; uint32_t argc = has_named_captures ? num_captures + kAdditionalArgsWithNamedCaptures : num_captures + kAdditionalArgsWithoutNamedCaptures; STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() - kAdditionalArgsWithNamedCaptures); return (argc > Code::kMaxArguments) ? -1 : argc; } // Looks up the capture of the given name. Returns the (1-based) numbered // capture index or -1 on failure. int LookupNamedCapture(const std::function<bool(String)>& name_matches, FixedArray capture_name_map) { // TODO(jgruber): Sort capture_name_map and do binary search via // internalized strings. int maybe_capture_index = -1; const int named_capture_count = capture_name_map.length() >> 1; for (int j = 0; j < named_capture_count; j++) { // The format of {capture_name_map} is documented at // JSRegExp::kIrregexpCaptureNameMapIndex. const int name_ix = j * 2; const int index_ix = j * 2 + 1; String capture_name = String::cast(capture_name_map.get(name_ix)); if (!name_matches(capture_name)) continue; maybe_capture_index = Smi::ToInt(capture_name_map.get(index_ix)); break; } return maybe_capture_index; } } // namespace class CompiledReplacement { public: explicit CompiledReplacement(Zone* zone) : parts_(zone), replacement_substrings_(zone) {} // Return whether the replacement is simple. bool Compile(Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> replacement, int capture_count, int subject_length); // Use Apply only if Compile returned false. void Apply(ReplacementStringBuilder* builder, int match_from, int match_to, int32_t* match); // Number of distinct parts of the replacement pattern. int parts() { return static_cast<int>(parts_.size()); } private: enum PartType { SUBJECT_PREFIX = 1, SUBJECT_SUFFIX, SUBJECT_CAPTURE, REPLACEMENT_SUBSTRING, REPLACEMENT_STRING, EMPTY_REPLACEMENT, NUMBER_OF_PART_TYPES }; struct ReplacementPart { static inline ReplacementPart SubjectMatch() { return ReplacementPart(SUBJECT_CAPTURE, 0); } static inline ReplacementPart SubjectCapture(int capture_index) { return ReplacementPart(SUBJECT_CAPTURE, capture_index); } static inline ReplacementPart SubjectPrefix() { return ReplacementPart(SUBJECT_PREFIX, 0); } static inline ReplacementPart SubjectSuffix(int subject_length) { return ReplacementPart(SUBJECT_SUFFIX, subject_length); } static inline ReplacementPart ReplacementString() { return ReplacementPart(REPLACEMENT_STRING, 0); } static inline ReplacementPart EmptyReplacement() { return ReplacementPart(EMPTY_REPLACEMENT, 0); } static inline ReplacementPart ReplacementSubString(int from, int to) { DCHECK_LE(0, from); DCHECK_GT(to, from); return ReplacementPart(-from, to); } // If tag <= 0 then it is the negation of a start index of a substring of // the replacement pattern, otherwise it's a value from PartType. ReplacementPart(int tag, int data) : tag(tag), data(data) { // Must be non-positive or a PartType value. DCHECK(tag < NUMBER_OF_PART_TYPES); } // Either a value of PartType or a non-positive number that is // the negation of an index into the replacement string. int tag; // The data value's interpretation depends on the value of tag: // tag == SUBJECT_PREFIX || // tag == SUBJECT_SUFFIX: data is unused. // tag == SUBJECT_CAPTURE: data is the number of the capture. // tag == REPLACEMENT_SUBSTRING || // tag == REPLACEMENT_STRING: data is index into array of substrings // of the replacement string. // tag == EMPTY_REPLACEMENT: data is unused. // tag <= 0: Temporary representation of the substring of the replacement // string ranging over -tag .. data. // Is replaced by REPLACEMENT_{SUB,}STRING when we create the // substring objects. int data; }; template <typename Char> bool ParseReplacementPattern(ZoneChunkList<ReplacementPart>* parts, Vector<Char> characters, FixedArray capture_name_map, int capture_count, int subject_length) { // Equivalent to String::GetSubstitution, except that this method converts // the replacement string into an internal representation that avoids // repeated parsing when used repeatedly. int length = characters.length(); int last = 0; for (int i = 0; i < length; i++) { Char c = characters[i]; if (c == '$') { int next_index = i + 1; if (next_index == length) { // No next character! break; } Char c2 = characters[next_index]; switch (c2) { case '$': if (i > last) { // There is a substring before. Include the first "$". parts->push_back( ReplacementPart::ReplacementSubString(last, next_index)); last = next_index + 1; // Continue after the second "$". } else { // Let the next substring start with the second "$". last = next_index; } i = next_index; break; case '`': if (i > last) { parts->push_back(ReplacementPart::ReplacementSubString(last, i)); } parts->push_back(ReplacementPart::SubjectPrefix()); i = next_index; last = i + 1; break; case '\'': if (i > last) { parts->push_back(ReplacementPart::ReplacementSubString(last, i)); } parts->push_back(ReplacementPart::SubjectSuffix(subject_length)); i = next_index; last = i + 1; break; case '&': if (i > last) { parts->push_back(ReplacementPart::ReplacementSubString(last, i)); } parts->push_back(ReplacementPart::SubjectMatch()); i = next_index; last = i + 1; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { int capture_ref = c2 - '0'; if (capture_ref > capture_count) { i = next_index; continue; } int second_digit_index = next_index + 1; if (second_digit_index < length) { // Peek ahead to see if we have two digits. Char c3 = characters[second_digit_index]; if ('0' <= c3 && c3 <= '9') { // Double digits. int double_digit_ref = capture_ref * 10 + c3 - '0'; if (double_digit_ref <= capture_count) { next_index = second_digit_index; capture_ref = double_digit_ref; } } } if (capture_ref > 0) { if (i > last) { parts->push_back( ReplacementPart::ReplacementSubString(last, i)); } DCHECK(capture_ref <= capture_count); parts->push_back(ReplacementPart::SubjectCapture(capture_ref)); last = next_index + 1; } i = next_index; break; } case '<': { if (capture_name_map.is_null()) { i = next_index; break; } // Scan until the next '>', and let the enclosed substring be the // groupName. const int name_start_index = next_index + 1; int closing_bracket_index = -1; for (int j = name_start_index; j < length; j++) { if (characters[j] == '>') { closing_bracket_index = j; break; } } // If no closing bracket is found, '$<' is treated as a string // literal. if (closing_bracket_index == -1) { i = next_index; break; } Vector<Char> requested_name = characters.SubVector(name_start_index, closing_bracket_index); // Let capture be ? Get(namedCaptures, groupName). const int capture_index = LookupNamedCapture( [=](String capture_name) { return capture_name.IsEqualTo(requested_name); }, capture_name_map); // If capture is undefined or does not exist, replace the text // through the following '>' with the empty string. // Otherwise, replace the text through the following '>' with // ? ToString(capture). DCHECK(capture_index == -1 || (1 <= capture_index && capture_index <= capture_count)); if (i > last) { parts->push_back(ReplacementPart::ReplacementSubString(last, i)); } parts->push_back( (capture_index == -1) ? ReplacementPart::EmptyReplacement() : ReplacementPart::SubjectCapture(capture_index)); last = closing_bracket_index + 1; i = closing_bracket_index; break; } default: i = next_index; break; } } } if (length > last) { if (last == 0) { // Replacement is simple. Do not use Apply to do the replacement. return true; } else { parts->push_back(ReplacementPart::ReplacementSubString(last, length)); } } return false; } ZoneChunkList<ReplacementPart> parts_; ZoneVector<Handle<String>> replacement_substrings_; }; bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> replacement, int capture_count, int subject_length) { { DisallowHeapAllocation no_gc; String::FlatContent content = replacement->GetFlatContent(no_gc); DCHECK(content.IsFlat()); FixedArray capture_name_map; if (capture_count > 0) { DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); Object maybe_capture_name_map = regexp->CaptureNameMap(); if (maybe_capture_name_map.IsFixedArray()) { capture_name_map = FixedArray::cast(maybe_capture_name_map); } } bool simple; if (content.IsOneByte()) { simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(), capture_name_map, capture_count, subject_length); } else { DCHECK(content.IsTwoByte()); simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(), capture_name_map, capture_count, subject_length); } if (simple) return true; } // Find substrings of replacement string and create them as String objects. int substring_index = 0; for (ReplacementPart& part : parts_) { int tag = part.tag; if (tag <= 0) { // A replacement string slice. int from = -tag; int to = part.data; replacement_substrings_.push_back( isolate->factory()->NewSubString(replacement, from, to)); part.tag = REPLACEMENT_SUBSTRING; part.data = substring_index; substring_index++; } else if (tag == REPLACEMENT_STRING) { replacement_substrings_.push_back(replacement); part.data = substring_index; substring_index++; } } return false; } void CompiledReplacement::Apply(ReplacementStringBuilder* builder, int match_from, int match_to, int32_t* match) { DCHECK_LT(0, parts_.size()); for (ReplacementPart& part : parts_) { switch (part.tag) { case SUBJECT_PREFIX: if (match_from > 0) builder->AddSubjectSlice(0, match_from); break; case SUBJECT_SUFFIX: { int subject_length = part.data; if (match_to < subject_length) { builder->AddSubjectSlice(match_to, subject_length); } break; } case SUBJECT_CAPTURE: { int capture = part.data; int from = match[capture * 2]; int to = match[capture * 2 + 1]; if (from >= 0 && to > from) { builder->AddSubjectSlice(from, to); } break; } case REPLACEMENT_SUBSTRING: case REPLACEMENT_STRING: builder->AddString(replacement_substrings_[part.data]); break; case EMPTY_REPLACEMENT: break; default: UNREACHABLE(); } } } void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern, std::vector<int>* indices, unsigned int limit) { DCHECK_LT(0, limit); // Collect indices of pattern in subject using memchr. // Stop after finding at most limit values. const uint8_t* subject_start = subject.begin(); const uint8_t* subject_end = subject_start + subject.length(); const uint8_t* pos = subject_start; while (limit > 0) { pos = reinterpret_cast<const uint8_t*>( memchr(pos, pattern, subject_end - pos)); if (pos == nullptr) return; indices->push_back(static_cast<int>(pos - subject_start)); pos++; limit--; } } void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern, std::vector<int>* indices, unsigned int limit) { DCHECK_LT(0, limit); const uc16* subject_start = subject.begin(); const uc16* subject_end = subject_start + subject.length(); for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) { if (*pos == pattern) { indices->push_back(static_cast<int>(pos - subject_start)); limit--; } } } template <typename SubjectChar, typename PatternChar> void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject, Vector<const PatternChar> pattern, std::vector<int>* indices, unsigned int limit) { DCHECK_LT(0, limit); // Collect indices of pattern in subject. // Stop after finding at most limit values. int pattern_length = pattern.length(); int index = 0; StringSearch<PatternChar, SubjectChar> search(isolate, pattern); while (limit > 0) { index = search.Search(subject, index); if (index < 0) return; indices->push_back(index); index += pattern_length; limit--; } } void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern, std::vector<int>* indices, unsigned int limit) { { DisallowHeapAllocation no_gc; String::FlatContent subject_content = subject.GetFlatContent(no_gc); String::FlatContent pattern_content = pattern.GetFlatContent(no_gc); DCHECK(subject_content.IsFlat()); DCHECK(pattern_content.IsFlat()); if (subject_content.IsOneByte()) { Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector(); if (pattern_content.IsOneByte()) { Vector<const uint8_t> pattern_vector = pattern_content.ToOneByteVector(); if (pattern_vector.length() == 1) { FindOneByteStringIndices(subject_vector, pattern_vector[0], indices, limit); } else { FindStringIndices(isolate, subject_vector, pattern_vector, indices, limit); } } else { FindStringIndices(isolate, subject_vector, pattern_content.ToUC16Vector(), indices, limit); } } else { Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); if (pattern_content.IsOneByte()) { Vector<const uint8_t> pattern_vector = pattern_content.ToOneByteVector(); if (pattern_vector.length() == 1) { FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, limit); } else { FindStringIndices(isolate, subject_vector, pattern_vector, indices, limit); } } else { Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector(); if (pattern_vector.length() == 1) { FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, limit); } else { FindStringIndices(isolate, subject_vector, pattern_vector, indices, limit); } } } } } namespace { std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) { std::vector<int>* list = isolate->regexp_indices(); list->clear(); return list; } void TruncateRegexpIndicesList(Isolate* isolate) { // Same size as smallest zone segment, preserving behavior from the // runtime zone. static const int kMaxRegexpIndicesListCapacity = 8 * KB; std::vector<int>* indicies = isolate->regexp_indices(); if (indicies->capacity() > kMaxRegexpIndicesListCapacity) { // Throw away backing storage. indicies->clear(); indicies->shrink_to_fit(); } } } // namespace template <typename ResultSeqString> V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString( Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp, Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) { DCHECK(subject->IsFlat()); DCHECK(replacement->IsFlat()); std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate); DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); String pattern = String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); int subject_len = subject->length(); int pattern_len = pattern.length(); int replacement_len = replacement->length(); FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF); if (indices->empty()) return *subject; // Detect integer overflow. int64_t result_len_64 = (static_cast<int64_t>(replacement_len) - static_cast<int64_t>(pattern_len)) * static_cast<int64_t>(indices->size()) + static_cast<int64_t>(subject_len); int result_len; if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) { STATIC_ASSERT(String::kMaxLength < kMaxInt); result_len = kMaxInt; // Provoke exception. } else { result_len = static_cast<int>(result_len_64); } if (result_len == 0) { return ReadOnlyRoots(isolate).empty_string(); } int subject_pos = 0; int result_pos = 0; MaybeHandle<SeqString> maybe_res; if (ResultSeqString::kHasOneByteEncoding) { maybe_res = isolate->factory()->NewRawOneByteString(result_len); } else { maybe_res = isolate->factory()->NewRawTwoByteString(result_len); } Handle<SeqString> untyped_res; ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res); Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res); DisallowHeapAllocation no_gc; for (int index : *indices) { // Copy non-matched subject content. if (subject_pos < index) { String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos, subject_pos, index); result_pos += index - subject_pos; } // Replace match. if (replacement_len > 0) { String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0, replacement_len); result_pos += replacement_len; } subject_pos = index + pattern_len; } // Add remaining subject content at the end. if (subject_pos < subject_len) { String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos, subject_pos, subject_len); } int32_t match_indices[] = {indices->back(), indices->back() + pattern_len}; RegExp::SetLastMatchInfo(isolate, last_match_info, subject, 0, match_indices); TruncateRegexpIndicesList(isolate); return *result; } V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString( Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) { DCHECK(subject->IsFlat()); DCHECK(replacement->IsFlat()); int capture_count = regexp->CaptureCount(); int subject_length = subject->length(); JSRegExp::Type typeTag = regexp->TypeTag(); if (typeTag == JSRegExp::IRREGEXP) { // Ensure the RegExp is compiled so we can access the capture-name map. if (RegExp::IrregexpPrepare(isolate, regexp, subject) == -1) { DCHECK(isolate->has_pending_exception()); return ReadOnlyRoots(isolate).exception(); } } // CompiledReplacement uses zone allocation. Zone zone(isolate->allocator(), ZONE_NAME); CompiledReplacement compiled_replacement(&zone); const bool simple_replace = compiled_replacement.Compile( isolate, regexp, replacement, capture_count, subject_length); // Shortcut for simple non-regexp global replacements if (typeTag == JSRegExp::ATOM && simple_replace) { if (subject->IsOneByteRepresentation() && replacement->IsOneByteRepresentation()) { return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( isolate, subject, regexp, replacement, last_match_info); } else { return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( isolate, subject, regexp, replacement, last_match_info); } } RegExpGlobalCache global_cache(regexp, subject, isolate); if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); int32_t* current_match = global_cache.FetchNext(); if (current_match == nullptr) { if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); return *subject; } // Guessing the number of parts that the final result string is built // from. Global regexps can match any number of times, so we guess // conservatively. int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1; ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts); int prev = 0; do { int start = current_match[0]; int end = current_match[1]; if (prev < start) { builder.AddSubjectSlice(prev, start); } if (simple_replace) { builder.AddString(replacement); } else { compiled_replacement.Apply(&builder, start, end, current_match); } prev = end; current_match = global_cache.FetchNext(); } while (current_match != nullptr); if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); if (prev < subject_length) { builder.AddSubjectSlice(prev, subject_length); } RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count, global_cache.LastSuccessfulMatch()); RETURN_RESULT_OR_FAILURE(isolate, builder.ToString()); } template <typename ResultSeqString> V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString( Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, Handle<RegExpMatchInfo> last_match_info) { DCHECK(subject->IsFlat()); // Shortcut for simple non-regexp global replacements if (regexp->TypeTag() == JSRegExp::ATOM) { Handle<String> empty_string = isolate->factory()->empty_string(); if (subject->IsOneByteRepresentation()) { return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( isolate, subject, regexp, empty_string, last_match_info); } else { return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( isolate, subject, regexp, empty_string, last_match_info); } } RegExpGlobalCache global_cache(regexp, subject, isolate); if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); int32_t* current_match = global_cache.FetchNext(); if (current_match == nullptr) { if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); return *subject; } int start = current_match[0]; int end = current_match[1]; int capture_count = regexp->CaptureCount(); int subject_length = subject->length(); int new_length = subject_length - (end - start); if (new_length == 0) return ReadOnlyRoots(isolate).empty_string(); Handle<ResultSeqString> answer; if (ResultSeqString::kHasOneByteEncoding) { answer = Handle<ResultSeqString>::cast( isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked()); } else { answer = Handle<ResultSeqString>::cast( isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked()); } int prev = 0; int position = 0; DisallowHeapAllocation no_gc; do { start = current_match[0]; end = current_match[1]; if (prev < start) { // Add substring subject[prev;start] to answer string. String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev, start); position += start - prev; } prev = end; current_match = global_cache.FetchNext(); } while (current_match != nullptr); if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count, global_cache.LastSuccessfulMatch()); if (prev < subject_length) { // Add substring subject[prev;length] to answer string. String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev, subject_length); position += subject_length - prev; } if (position == 0) return ReadOnlyRoots(isolate).empty_string(); // Shorten string and fill int string_size = ResultSeqString::SizeFor(position); int allocated_string_size = ResultSeqString::SizeFor(new_length); int delta = allocated_string_size - string_size; answer->set_length(position); if (delta == 0) return *answer; Address end_of_string = answer->address() + string_size; Heap* heap = isolate->heap(); // The trimming is performed on a newly allocated object, which is on a // freshly allocated page or on an already swept page. Hence, the sweeper // thread can not get confused with the filler creation. No synchronization // needed. // TODO(hpayer): We should shrink the large object page if the size // of the object changed significantly. if (!heap->IsLargeObject(*answer)) { heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo); } return *answer; } RUNTIME_FUNCTION(Runtime_StringSplit) { HandleScope handle_scope(isolate); DCHECK_EQ(3, args.length()); CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1); CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); CHECK_LT(0, limit); int subject_length = subject->length(); int pattern_length = pattern->length(); CHECK_LT(0, pattern_length); if (limit == 0xFFFFFFFFu) { FixedArray last_match_cache_unused; Handle<Object> cached_answer( RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern, &last_match_cache_unused, RegExpResultsCache::STRING_SPLIT_SUBSTRINGS), isolate); if (*cached_answer != Smi::zero()) { // The cache FixedArray is a COW-array and can therefore be reused. Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements( Handle<FixedArray>::cast(cached_answer)); return *result; } } // The limit can be very large (0xFFFFFFFFu), but since the pattern // isn't empty, we can never create more parts than ~half the length // of the subject. subject = String::Flatten(isolate, subject); pattern = String::Flatten(isolate, pattern); std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate); FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit); if (static_cast<uint32_t>(indices->size()) < limit) { indices->push_back(subject_length); } // The list indices now contains the end of each part to create. // Create JSArray of substrings separated by separator. int part_count = static_cast<int>(indices->size()); Handle<JSArray> result = isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count, INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE); DCHECK(result->HasObjectElements()); Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate); if (part_count == 1 && indices->at(0) == subject_length) { elements->set(0, *subject); } else { int part_start = 0; FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, { int part_end = indices->at(i); Handle<String> substring = isolate->factory()->NewProperSubString(subject, part_start, part_end); elements->set(i, *substring); part_start = part_end + pattern_length; }); } if (limit == 0xFFFFFFFFu) { if (result->HasObjectElements()) { RegExpResultsCache::Enter(isolate, subject, pattern, elements, isolate->factory()->empty_fixed_array(), RegExpResultsCache::STRING_SPLIT_SUBSTRINGS); } } TruncateRegexpIndicesList(isolate); return *result; } RUNTIME_FUNCTION(Runtime_RegExpExec) { HandleScope scope(isolate); DCHECK_EQ(4, args.length()); CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); CONVERT_INT32_ARG_CHECKED(index, 2); CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3); // Due to the way the JS calls are constructed this must be less than the // length of a string, i.e. it is always a Smi. We check anyway for security. CHECK_LE(0, index); CHECK_GE(subject->length(), index); isolate->counters()->regexp_entry_runtime()->Increment(); RETURN_RESULT_OR_FAILURE( isolate, RegExp::Exec(isolate, regexp, subject, index, last_match_info)); } namespace { class MatchInfoBackedMatch : public String::Match { public: MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, Handle<RegExpMatchInfo> match_info) : isolate_(isolate), match_info_(match_info) { subject_ = String::Flatten(isolate, subject); if (regexp->TypeTag() == JSRegExp::IRREGEXP) { Object o = regexp->CaptureNameMap(); has_named_captures_ = o.IsFixedArray(); if (has_named_captures_) { capture_name_map_ = handle(FixedArray::cast(o), isolate); } } else { has_named_captures_ = false; } } Handle<String> GetMatch() override { return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr); } Handle<String> GetPrefix() override { const int match_start = match_info_->Capture(0); return isolate_->factory()->NewSubString(subject_, 0, match_start); } Handle<String> GetSuffix() override { const int match_end = match_info_->Capture(1); return isolate_->factory()->NewSubString(subject_, match_end, subject_->length()); } bool HasNamedCaptures() override { return has_named_captures_; } int CaptureCount() override { return match_info_->NumberOfCaptureRegisters() / 2; } MaybeHandle<String> GetCapture(int i, bool* capture_exists) override { Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter( isolate_, match_info_, i, capture_exists); return (*capture_exists) ? Object::ToString(isolate_, capture_obj) : isolate_->factory()->empty_string(); } MaybeHandle<String> GetNamedCapture(Handle<String> name, CaptureState* state) override { DCHECK(has_named_captures_); const int capture_index = LookupNamedCapture( [=](String capture_name) { return capture_name.Equals(*name); }, *capture_name_map_); if (capture_index == -1) { *state = UNMATCHED; return isolate_->factory()->empty_string(); } DCHECK(1 <= capture_index && capture_index <= CaptureCount()); bool capture_exists; Handle<String> capture_value; ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value, GetCapture(capture_index, &capture_exists), String); if (!capture_exists) { *state = UNMATCHED; return isolate_->factory()->empty_string(); } else { *state = MATCHED; return capture_value; } } private: Isolate* isolate_; Handle<String> subject_; Handle<RegExpMatchInfo> match_info_; bool has_named_captures_; Handle<FixedArray> capture_name_map_; }; class VectorBackedMatch : public String::Match { public: VectorBackedMatch(Isolate* isolate, Handle<String> subject, Handle<String> match, int match_position, ZoneVector<Handle<Object>>* captures, Handle<Object> groups_obj) : isolate_(isolate), match_(match), match_position_(match_position), captures_(captures) { subject_ = String::Flatten(isolate, subject); DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver()); has_named_captures_ = !groups_obj->IsUndefined(isolate); if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj); } Handle<String> GetMatch() override { return match_; } Handle<String> GetPrefix() override { return isolate_->factory()->NewSubString(subject_, 0, match_position_); } Handle<String> GetSuffix() override { const int match_end_position = match_position_ + match_->length(); return isolate_->factory()->NewSubString(subject_, match_end_position, subject_->length()); } bool HasNamedCaptures() override { return has_named_captures_; } int CaptureCount() override { return static_cast<int>(captures_->size()); } MaybeHandle<String> GetCapture(int i, bool* capture_exists) override { Handle<Object> capture_obj = captures_->at(i); if (capture_obj->IsUndefined(isolate_)) { *capture_exists = false; return isolate_->factory()->empty_string(); } *capture_exists = true; return Object::ToString(isolate_, capture_obj); } MaybeHandle<String> GetNamedCapture(Handle<String> name, CaptureState* state) override { DCHECK(has_named_captures_); Handle<Object> capture_obj; ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj, Object::GetProperty(isolate_, groups_obj_, name), String); if (capture_obj->IsUndefined(isolate_)) { *state = UNMATCHED; return isolate_->factory()->empty_string(); } else { *state = MATCHED; return Object::ToString(isolate_, capture_obj); } } private: Isolate* isolate_; Handle<String> subject_; Handle<String> match_; const int match_position_; ZoneVector<Handle<Object>>* captures_; bool has_named_captures_; Handle<JSReceiver> groups_obj_; }; // Create the groups object (see also the RegExp result creation in // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo). Handle<JSObject> ConstructNamedCaptureGroupsObject( Isolate* isolate, Handle<FixedArray> capture_map, const std::function<Object(int)>& f_get_capture) { Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto(); const int named_capture_count = capture_map->length() >> 1; for (int i = 0; i < named_capture_count; i++) { const int name_ix = i * 2; const int index_ix = i * 2 + 1; Handle<String> capture_name(String::cast(capture_map->get(name_ix)), isolate); const int capture_ix = Smi::ToInt(capture_map->get(index_ix)); DCHECK_GE(capture_ix, 1); // Explicit groups start at index 1. Handle<Object> capture_value(f_get_capture(capture_ix), isolate); DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString()); JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE); } return groups; } // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain // separate last match info. See comment on that function. template <bool has_capture> static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, Handle<RegExpMatchInfo> last_match_array, Handle<JSArray> result_array) { DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); DCHECK_NE(has_capture, regexp->CaptureCount() == 0); DCHECK(subject->IsFlat()); // Force tier up to native code for global replaces. The global replace is // implemented differently for native code and bytecode execution, where the // native code expects an array to store all the matches, and the bytecode // matches one at a time, so it's easier to tier-up to native code from the // start. if (FLAG_regexp_tier_up && regexp->TypeTag() == JSRegExp::IRREGEXP) { regexp->MarkTierUpForNextExec(); if (FLAG_trace_regexp_tier_up) { PrintF("Forcing tier-up of JSRegExp object %p in SearchRegExpMultiple\n", reinterpret_cast<void*>(regexp->ptr())); } } int capture_count = regexp->CaptureCount(); int subject_length = subject->length(); static const int kMinLengthToCache = 0x1000; if (subject_length > kMinLengthToCache) { FixedArray last_match_cache; Object cached_answer = RegExpResultsCache::Lookup( isolate->heap(), *subject, regexp->data(), &last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES); if (cached_answer.IsFixedArray()) { int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count); int32_t* last_match = NewArray<int32_t>(capture_registers); for (int i = 0; i < capture_registers; i++) { last_match[i] = Smi::ToInt(last_match_cache.get(i)); } Handle<FixedArray> cached_fixed_array = Handle<FixedArray>(FixedArray::cast(cached_answer), isolate); // The cache FixedArray is a COW-array and we need to return a copy. Handle<FixedArray> copied_fixed_array = isolate->factory()->CopyFixedArrayWithMap( cached_fixed_array, isolate->factory()->fixed_array_map()); JSArray::SetContent(result_array, copied_fixed_array); RegExp::SetLastMatchInfo(isolate, last_match_array, subject, capture_count, last_match); DeleteArray(last_match); return *result_array; } } RegExpGlobalCache global_cache(regexp, subject, isolate); if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); // Ensured in Runtime_RegExpExecMultiple. DCHECK(result_array->HasObjectElements()); Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()), isolate); if (result_elements->length() < 16) { result_elements = isolate->factory()->NewFixedArrayWithHoles(16); } FixedArrayBuilder builder(result_elements); // Position to search from. int match_start = -1; int match_end = 0; bool first = true; // Two smis before and after the match, for very long strings. static const int kMaxBuilderEntriesPerRegExpMatch = 5; while (true) { int32_t* current_match = global_cache.FetchNext(); if (current_match == nullptr) break; match_start = current_match[0]; builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch); if (match_end < match_start) { ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, match_start); } match_end = current_match[1]; { // Avoid accumulating new handles inside loop. HandleScope temp_scope(isolate); Handle<String> match; if (!first) { match = isolate->factory()->NewProperSubString(subject, match_start, match_end); } else { match = isolate->factory()->NewSubString(subject, match_start, match_end); first = false; } if (has_capture) { // Arguments array to replace function is match, captures, index and // subject, i.e., 3 + capture count in total. If the RegExp contains // named captures, they are also passed as the last argument. Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate); const bool has_named_captures = maybe_capture_map->IsFixedArray(); const int argc = has_named_captures ? 4 + capture_count : 3 + capture_count; Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc); int cursor = 0; elements->set(cursor++, *match); for (int i = 1; i <= capture_count; i++) { int start = current_match[i * 2]; if (start >= 0) { int end = current_match[i * 2 + 1]; DCHECK(start <= end); Handle<String> substring = isolate->factory()->NewSubString(subject, start, end); elements->set(cursor++, *substring); } else { DCHECK_GT(0, current_match[i * 2 + 1]); elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value()); } } elements->set(cursor++, Smi::FromInt(match_start)); elements->set(cursor++, *subject); if (has_named_captures) { Handle<FixedArray> capture_map = Handle<FixedArray>::cast(maybe_capture_map); Handle<JSObject> groups = ConstructNamedCaptureGroupsObject( isolate, capture_map, [=](int ix) { return elements->get(ix); }); elements->set(cursor++, *groups); } DCHECK_EQ(cursor, argc); builder.Add(*isolate->factory()->NewJSArrayWithElements(elements)); } else { builder.Add(*match); } } } if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); if (match_start >= 0) { // Finished matching, with at least one match. if (match_end < subject_length) { ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, subject_length); } RegExp::SetLastMatchInfo(isolate, last_match_array, subject, capture_count, global_cache.LastSuccessfulMatch()); if (subject_length > kMinLengthToCache) { // Store the last successful match into the array for caching. int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count); Handle<FixedArray> last_match_cache = isolate->factory()->NewFixedArray(capture_registers); int32_t* last_match = global_cache.LastSuccessfulMatch(); for (int i = 0; i < capture_registers; i++) { last_match_cache->set(i, Smi::FromInt(last_match[i])); } Handle<FixedArray> result_fixed_array = FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length()); // Cache the result and copy the FixedArray into a COW array. Handle<FixedArray> copied_fixed_array = isolate->factory()->CopyFixedArrayWithMap( result_fixed_array, isolate->factory()->fixed_array_map()); RegExpResultsCache::Enter( isolate, subject, handle(regexp->data(), isolate), copied_fixed_array, last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES); } return *builder.ToJSArray(result_array); } else { return ReadOnlyRoots(isolate).null_value(); // No matches at all. } } // Legacy implementation of RegExp.prototype[Symbol.replace] which // doesn't properly call the underlying exec method. V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace( Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string, Handle<String> replace) { // Functional fast-paths are dispatched directly by replace builtin. DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); Factory* factory = isolate->factory(); const int flags = regexp->GetFlags(); const bool global = (flags & JSRegExp::kGlobal) != 0; const bool sticky = (flags & JSRegExp::kSticky) != 0; replace = String::Flatten(isolate, replace); Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info(); if (!global) { // Non-global regexp search, string replace. uint32_t last_index = 0; if (sticky) { Handle<Object> last_index_obj(regexp->last_index(), isolate); ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj, Object::ToLength(isolate, last_index_obj), String); last_index = PositiveNumberToUint32(*last_index_obj); } Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(), isolate); // A lastIndex exceeding the string length always returns null (signalling // failure) in RegExpBuiltinExec, thus we can skip the call. if (last_index <= static_cast<uint32_t>(string->length())) { ASSIGN_RETURN_ON_EXCEPTION( isolate, match_indices_obj, RegExp::Exec(isolate, regexp, string, last_index, last_match_info), String); } if (match_indices_obj->IsNull(isolate)) { if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER); return string; } auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj); const int start_index = match_indices->Capture(0); const int end_index = match_indices->Capture(1); if (sticky) { regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER); } IncrementalStringBuilder builder(isolate); builder.AppendString(factory->NewSubString(string, 0, start_index)); if (replace->length() > 0) { MatchInfoBackedMatch m(isolate, regexp, string, match_indices); Handle<String> replacement; ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement, String::GetSubstitution(isolate, &m, replace), String); builder.AppendString(replacement); } builder.AppendString( factory->NewSubString(string, end_index, string->length())); return builder.Finish(); } else { // Global regexp search, string replace. DCHECK(global); RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0), String); // Force tier up to native code for global replaces. The global replace is // implemented differently for native code and bytecode execution, where the // native code expects an array to store all the matches, and the bytecode // matches one at a time, so it's easier to tier-up to native code from the // start. if (FLAG_regexp_tier_up && regexp->TypeTag() == JSRegExp::IRREGEXP) { regexp->MarkTierUpForNextExec(); if (FLAG_trace_regexp_tier_up) { PrintF("Forcing tier-up of JSRegExp object %p in RegExpReplace\n", reinterpret_cast<void*>(regexp->ptr())); } } if (replace->length() == 0) { if (string->IsOneByteRepresentation()) { Object result = StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>( isolate, string, regexp, last_match_info); return handle(String::cast(result), isolate); } else { Object result = StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>( isolate, string, regexp, last_match_info); return handle(String::cast(result), isolate); } } Object result = StringReplaceGlobalRegExpWithString( isolate, string, regexp, replace, last_match_info); if (result.IsString()) { return handle(String::cast(result), isolate); } else { return MaybeHandle<String>(); } } UNREACHABLE(); } } // namespace // This is only called for StringReplaceGlobalRegExpWithFunction. RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) { HandleScope handles(isolate); DCHECK_EQ(4, args.length()); CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2); CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3); DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); CHECK(result_array->HasObjectElements()); subject = String::Flatten(isolate, subject); CHECK(regexp->GetFlags() & JSRegExp::kGlobal); Object result; if (regexp->CaptureCount() == 0) { result = SearchRegExpMultiple<false>(isolate, subject, regexp, last_match_info, result_array); } else { result = SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info, result_array); } DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); return result; } RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) { HandleScope scope(isolate); DCHECK_EQ(3, args.length()); CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2); DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); DCHECK(replace_obj->map().is_callable()); Factory* factory = isolate->factory(); Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info(); const int flags = regexp->GetFlags(); DCHECK_EQ(flags & JSRegExp::kGlobal, 0); // TODO(jgruber): This should be an easy port to CSA with massive payback. const bool sticky = (flags & JSRegExp::kSticky) != 0; uint32_t last_index = 0; if (sticky) { Handle<Object> last_index_obj(regexp->last_index(), isolate); ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, last_index_obj, Object::ToLength(isolate, last_index_obj)); last_index = PositiveNumberToUint32(*last_index_obj); } Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(), isolate); // A lastIndex exceeding the string length always returns null (signalling // failure) in RegExpBuiltinExec, thus we can skip the call. if (last_index <= static_cast<uint32_t>(subject->length())) { ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, match_indices_obj, RegExp::Exec(isolate, regexp, subject, last_index, last_match_info)); } if (match_indices_obj->IsNull(isolate)) { if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER); return *subject; } Handle<RegExpMatchInfo> match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj); const int index = match_indices->Capture(0); const int end_of_match = match_indices->Capture(1); if (sticky) { regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER); } IncrementalStringBuilder builder(isolate); builder.AppendString(factory->NewSubString(subject, 0, index)); // Compute the parameter list consisting of the match, captures, index, // and subject for the replace function invocation. If the RegExp contains // named captures, they are also passed as the last argument. // The number of captures plus one for the match. const int m = match_indices->NumberOfCaptureRegisters() / 2; bool has_named_captures = false; Handle<FixedArray> capture_map; if (m > 1) { // The existence of capture groups implies IRREGEXP kind. DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); Object maybe_capture_map = regexp->CaptureNameMap(); if (maybe_capture_map.IsFixedArray()) { has_named_captures = true; capture_map = handle(FixedArray::cast(maybe_capture_map), isolate); } } const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures); if (argc == static_cast<uint32_t>(-1)) { THROW_NEW_ERROR_RETURN_FAILURE( isolate, NewRangeError(MessageTemplate::kTooManyArguments)); } ScopedVector<Handle<Object>> argv(argc); int cursor = 0; for (int j = 0; j < m; j++) { bool ok; Handle<String> capture = RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok); if (ok) { argv[cursor++] = capture; } else { argv[cursor++] = factory->undefined_value(); } } argv[cursor++] = handle(Smi::FromInt(index), isolate); argv[cursor++] = subject; if (has_named_captures) { argv[cursor++] = ConstructNamedCaptureGroupsObject( isolate, capture_map, [&argv](int ix) { return *argv[ix]; }); } DCHECK_EQ(cursor, argc); Handle<Object> replacement_obj; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, replacement_obj, Execution::Call(isolate, replace_obj, factory->undefined_value(), argc, argv.begin())); Handle<String> replacement; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, replacement, Object::ToString(isolate, replacement_obj)); builder.AppendString(replacement); builder.AppendString( factory->NewSubString(subject, end_of_match, subject->length())); RETURN_RESULT_OR_FAILURE(isolate, builder.Finish()); } namespace { V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate, Handle<Object> object, uint32_t* out) { if (object->IsUndefined(isolate)) { *out = kMaxUInt32; return object; } Handle<Object> number; ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object), Object); *out = NumberToUint32(*number); return object; } Handle<JSArray> NewJSArrayWithElements(Isolate* isolate, Handle<FixedArray> elems, int num_elems) { return isolate->factory()->NewJSArrayWithElements( FixedArray::ShrinkOrEmpty(isolate, elems, num_elems)); } } // namespace // Slow path for: // ES#sec-regexp.prototype-@@replace // RegExp.prototype [ @@split ] ( string, limit ) RUNTIME_FUNCTION(Runtime_RegExpSplit) { HandleScope scope(isolate); DCHECK_EQ(3, args.length()); CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0); CONVERT_ARG_HANDLE_CHECKED(String, string, 1); CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2); Factory* factory = isolate->factory(); Handle<JSFunction> regexp_fun = isolate->regexp_function(); Handle<Object> ctor; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun)); Handle<Object> flags_obj; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, flags_obj, JSObject::GetProperty(isolate, recv, factory->flags_string())); Handle<String> flags; ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags, Object::ToString(isolate, flags_obj)); Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u'); const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0); Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y'); const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0); Handle<String> new_flags = flags; if (!sticky) { ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags, factory->NewConsString(flags, y_str)); } Handle<JSReceiver> splitter; { const int argc = 2; ScopedVector<Handle<Object>> argv(argc); argv[0] = recv; argv[1] = new_flags; Handle<Object> splitter_obj; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, splitter_obj, Execution::New(isolate, ctor, argc, argv.begin())); splitter = Handle<JSReceiver>::cast(splitter_obj); } uint32_t limit; RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit)); const uint32_t length = string->length(); if (limit == 0) return *factory->NewJSArray(0); if (length == 0) { Handle<Object> result; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string, factory->undefined_value())); if (!result->IsNull(isolate)) return *factory->NewJSArray(0); Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1); elems->set(0, *string); return *factory->NewJSArrayWithElements(elems); } static const int kInitialArraySize = 8; Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize); uint32_t num_elems = 0; uint32_t string_index = 0; uint32_t prev_string_index = 0; while (string_index < length) { RETURN_FAILURE_ON_EXCEPTION( isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index)); Handle<Object> result; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string, factory->undefined_value())); if (result->IsNull(isolate)) { string_index = static_cast<uint32_t>( RegExpUtils::AdvanceStringIndex(string, string_index, unicode)); continue; } Handle<Object> last_index_obj; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter)); ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, last_index_obj, Object::ToLength(isolate, last_index_obj)); const uint32_t end = std::min(PositiveNumberToUint32(*last_index_obj), length); if (end == prev_string_index) { string_index = static_cast<uint32_t>( RegExpUtils::AdvanceStringIndex(string, string_index, unicode)); continue; } { Handle<String> substr = factory->NewSubString(string, prev_string_index, string_index); elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr); if (num_elems == limit) { return *NewJSArrayWithElements(isolate, elems, num_elems); } } prev_string_index = end; Handle<Object> num_captures_obj; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, num_captures_obj, Object::GetProperty(isolate, result, isolate->factory()->length_string())); ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj)); const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj); for (uint32_t i = 1; i < num_captures; i++) { Handle<Object> capture; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, capture, Object::GetElement(isolate, result, i)); elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture); if (num_elems == limit) { return *NewJSArrayWithElements(isolate, elems, num_elems); } } string_index = prev_string_index; } { Handle<String> substr = factory->NewSubString(string, prev_string_index, length); elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr); } return *NewJSArrayWithElements(isolate, elems, num_elems); } // Slow path for: // ES#sec-regexp.prototype-@@replace // RegExp.prototype [ @@replace ] ( string, replaceValue ) RUNTIME_FUNCTION(Runtime_RegExpReplaceRT) { HandleScope scope(isolate); DCHECK_EQ(3, args.length()); CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0); CONVERT_ARG_HANDLE_CHECKED(String, string, 1); Handle<Object> replace_obj = args.at(2); Factory* factory = isolate->factory(); string = String::Flatten(isolate, string); const bool functional_replace = replace_obj->IsCallable(); Handle<String> replace; if (!functional_replace) { ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace, Object::ToString(isolate, replace_obj)); } // Fast-path for unmodified JSRegExps (and non-functional replace). if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) { // We should never get here with functional replace because unmodified // regexp and functional replace should be fully handled in CSA code. CHECK(!functional_replace); Handle<Object> result; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, result, RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, replace)); DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, recv)); return *result; } const uint32_t length = string->length(); Handle<Object> global_obj; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, global_obj, JSReceiver::GetProperty(isolate, recv, factory->global_string())); const bool global = global_obj->BooleanValue(isolate); bool unicode = false; if (global) { Handle<Object> unicode_obj; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, unicode_obj, JSReceiver::GetProperty(isolate, recv, factory->unicode_string())); unicode = unicode_obj->BooleanValue(isolate); RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, recv, 0)); } Zone zone(isolate->allocator(), ZONE_NAME); ZoneVector<Handle<Object>> results(&zone); while (true) { Handle<Object> result; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, result, RegExpUtils::RegExpExec(isolate, recv, string, factory->undefined_value())); if (result->IsNull(isolate)) break; results.push_back(result); if (!global) break; Handle<Object> match_obj; ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj, Object::GetElement(isolate, result, 0)); Handle<String> match; ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match, Object::ToString(isolate, match_obj)); if (match->length() == 0) { RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex( isolate, recv, string, unicode)); } } // TODO(jgruber): Look into ReplacementStringBuilder instead. IncrementalStringBuilder builder(isolate); uint32_t next_source_position = 0; for (const auto& result : results) { HandleScope handle_scope(isolate); Handle<Object> captures_length_obj; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, captures_length_obj, Object::GetProperty(isolate, result, factory->length_string())); ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, captures_length_obj, Object::ToLength(isolate, captures_length_obj)); const uint32_t captures_length = PositiveNumberToUint32(*captures_length_obj); Handle<Object> match_obj; ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj, Object::GetElement(isolate, result, 0)); Handle<String> match; ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match, Object::ToString(isolate, match_obj)); const int match_length = match->length(); Handle<Object> position_obj; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, position_obj, Object::GetProperty(isolate, result, factory->index_string())); ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, position_obj, Object::ToInteger(isolate, position_obj)); const uint32_t position = std::min(PositiveNumberToUint32(*position_obj), length); // Do not reserve capacity since captures_length is user-controlled. ZoneVector<Handle<Object>> captures(&zone); for (uint32_t n = 0; n < captures_length; n++) { Handle<Object> capture; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, capture, Object::GetElement(isolate, result, n)); if (!capture->IsUndefined(isolate)) { ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture, Object::ToString(isolate, capture)); } captures.push_back(capture); } Handle<Object> groups_obj = isolate->factory()->undefined_value(); ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, groups_obj, Object::GetProperty(isolate, result, factory->groups_string())); const bool has_named_captures = !groups_obj->IsUndefined(isolate); Handle<String> replacement; if (functional_replace) { const uint32_t argc = GetArgcForReplaceCallable(captures_length, has_named_captures); if (argc == static_cast<uint32_t>(-1)) { THROW_NEW_ERROR_RETURN_FAILURE( isolate, NewRangeError(MessageTemplate::kTooManyArguments)); } ScopedVector<Handle<Object>> argv(argc); int cursor = 0; for (uint32_t j = 0; j < captures_length; j++) { argv[cursor++] = captures[j]; } argv[cursor++] = handle(Smi::FromInt(position), isolate); argv[cursor++] = string; if (has_named_captures) argv[cursor++] = groups_obj; DCHECK_EQ(cursor, argc); Handle<Object> replacement_obj; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, replacement_obj, Execution::Call(isolate, replace_obj, factory->undefined_value(), argc, argv.begin())); ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, replacement, Object::ToString(isolate, replacement_obj)); } else { DCHECK(!functional_replace); if (!groups_obj->IsUndefined(isolate)) { ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, groups_obj, Object::ToObject(isolate, groups_obj)); } VectorBackedMatch m(isolate, string, match, position, &captures, groups_obj); ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, replacement, String::GetSubstitution(isolate, &m, replace)); } if (position >= next_source_position) { builder.AppendString( factory->NewSubString(string, next_source_position, position)); builder.AppendString(replacement); next_source_position = position + match_length; } } if (next_source_position < length) { builder.AppendString( factory->NewSubString(string, next_source_position, length)); } RETURN_RESULT_OR_FAILURE(isolate, builder.Finish()); } RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) { HandleScope scope(isolate); DCHECK_EQ(3, args.length()); // TODO(pwong): To follow the spec more closely and simplify calling code, // this could handle the canonicalization of pattern and flags. See // https://tc39.github.io/ecma262/#sec-regexpinitialize CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); CONVERT_ARG_HANDLE_CHECKED(String, source, 1); CONVERT_ARG_HANDLE_CHECKED(String, flags, 2); RETURN_FAILURE_ON_EXCEPTION(isolate, JSRegExp::Initialize(regexp, source, flags)); return *regexp; } RUNTIME_FUNCTION(Runtime_IsRegExp) { SealHandleScope shs(isolate); DCHECK_EQ(1, args.length()); CONVERT_ARG_CHECKED(Object, obj, 0); return isolate->heap()->ToBoolean(obj.IsJSRegExp()); } } // namespace internal } // namespace v8