string.cc 57.5 KB
Newer Older
1 2 3 4 5 6
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/objects/string.h"

7
#include "src/common/globals.h"
8
#include "src/handles/handles-inl.h"
9 10
#include "src/heap/heap-inl.h"
#include "src/heap/memory-chunk.h"
11
#include "src/heap/read-only-heap.h"
12
#include "src/numbers/conversions.h"
13 14 15
#include "src/objects/map.h"
#include "src/objects/oddball.h"
#include "src/objects/string-comparator.h"
16
#include "src/objects/string-inl.h"
17 18 19 20 21 22
#include "src/strings/char-predicates.h"
#include "src/strings/string-builder-inl.h"
#include "src/strings/string-hasher.h"
#include "src/strings/string-search.h"
#include "src/strings/string-stream.h"
#include "src/strings/unicode-inl.h"
23
#include "src/utils/ostreams.h"
24 25 26 27 28

namespace v8 {
namespace internal {

Handle<String> String::SlowFlatten(Isolate* isolate, Handle<ConsString> cons,
29
                                   AllocationType allocation) {
30
  DCHECK_NE(cons->second().length(), 0);
31 32

  // TurboFan can create cons strings with empty first parts.
33
  while (cons->first().length() == 0) {
34 35 36
    // We do not want to call this function recursively. Therefore we call
    // String::Flatten only in those cases where String::SlowFlatten is not
    // called again.
37
    if (cons->second().IsConsString() && !cons->second().IsFlat()) {
38 39 40 41 42
      cons = handle(ConsString::cast(cons->second()), isolate);
    } else {
      return String::Flatten(isolate, handle(cons->second(), isolate));
    }
  }
43 44 45

  DCHECK(AllowHeapAllocation::IsAllowed());
  int length = cons->length();
46 47
  allocation =
      ObjectInYoungGeneration(*cons) ? allocation : AllocationType::kOld;
48 49
  Handle<SeqString> result;
  if (cons->IsOneByteRepresentation()) {
50 51 52 53
    Handle<SeqOneByteString> flat =
        isolate->factory()
            ->NewRawOneByteString(length, allocation)
            .ToHandleChecked();
54 55 56 57
    DisallowHeapAllocation no_gc;
    WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
    result = flat;
  } else {
58 59 60 61
    Handle<SeqTwoByteString> flat =
        isolate->factory()
            ->NewRawTwoByteString(length, allocation)
            .ToHandleChecked();
62 63 64 65
    DisallowHeapAllocation no_gc;
    WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
    result = flat;
  }
66 67
  cons->set_first(*result);
  cons->set_second(ReadOnlyRoots(isolate).empty_string());
68 69 70 71
  DCHECK(result->IsFlat());
  return result;
}

72 73 74 75 76 77
namespace {

template <class StringClass>
void MigrateExternalStringResource(Isolate* isolate, String from, String to) {
  StringClass cast_from = StringClass::cast(from);
  StringClass cast_to = StringClass::cast(to);
78
  const typename StringClass::Resource* to_resource = cast_to.resource();
79 80
  if (to_resource == nullptr) {
    // |to| is a just-created internalized copy of |from|. Migrate the resource.
81
    cast_to.SetResource(isolate, cast_from.resource());
82 83 84
    // Zap |from|'s resource pointer to reflect the fact that |from| has
    // relinquished ownership of its resource.
    isolate->heap()->UpdateExternalString(
85 86 87
        from, ExternalString::cast(from).ExternalPayloadSize(), 0);
    cast_from.SetResource(isolate, nullptr);
  } else if (to_resource != cast_from.resource()) {
88 89 90 91 92 93 94 95 96 97
    // |to| already existed and has its own resource. Finalize |from|.
    isolate->heap()->FinalizeExternalString(from);
  }
}

}  // namespace

void String::MakeThin(Isolate* isolate, String internalized) {
  DisallowHeapAllocation no_gc;
  DCHECK_NE(*this, internalized);
98
  DCHECK(internalized.IsInternalizedString());
99 100

  if (this->IsExternalString()) {
101
    if (internalized.IsExternalOneByteString()) {
102 103
      MigrateExternalStringResource<ExternalOneByteString>(isolate, *this,
                                                           internalized);
104
    } else if (internalized.IsExternalTwoByteString()) {
105 106 107 108 109 110 111 112 113 114
      MigrateExternalStringResource<ExternalTwoByteString>(isolate, *this,
                                                           internalized);
    } else {
      // If the external string is duped into an existing non-external
      // internalized string, free its resource (it's about to be rewritten
      // into a ThinString below).
      isolate->heap()->FinalizeExternalString(*this);
    }
  }

115 116
  bool has_pointers = StringShape(*this).IsIndirect();

117
  int old_size = this->Size();
118 119 120 121
  // Slot invalidation is not necessary here: ThinString only stores tagged
  // value, so it can't store an untagged value in a recorded slot.
  isolate->heap()->NotifyObjectLayoutChange(*this, no_gc,
                                            InvalidateRecordedSlots::kNo);
122
  bool one_byte = internalized.IsOneByteRepresentation();
123 124 125 126 127
  Handle<Map> map = one_byte ? isolate->factory()->thin_one_byte_string_map()
                             : isolate->factory()->thin_string_map();
  DCHECK_GE(old_size, ThinString::kSize);
  this->synchronized_set_map(*map);
  ThinString thin = ThinString::cast(*this);
128 129
  thin.set_actual(internalized);
  Address thin_end = thin.address() + ThinString::kSize;
130 131 132
  int size_delta = old_size - ThinString::kSize;
  if (size_delta != 0) {
    Heap* heap = isolate->heap();
133 134 135
    heap->CreateFillerObjectAt(
        thin_end, size_delta,
        has_pointers ? ClearRecordedSlots::kYes : ClearRecordedSlots::kNo);
136 137 138
  }
}

139 140 141 142 143 144 145 146 147 148 149
bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
  DisallowHeapAllocation no_allocation;
  // Externalizing twice leaks the external resource, so it's
  // prohibited by the API.
  DCHECK(this->SupportsExternalization());
  DCHECK(resource->IsCacheable());
#ifdef ENABLE_SLOW_DCHECKS
  if (FLAG_enable_slow_asserts) {
    // Assert that the resource and the string are equivalent.
    DCHECK(static_cast<size_t>(this->length()) == resource->length());
    ScopedVector<uc16> smart_chars(this->length());
150 151
    String::WriteToFlat(*this, smart_chars.begin(), 0, this->length());
    DCHECK_EQ(0, memcmp(smart_chars.begin(), resource->data(),
152 153 154 155 156 157 158 159
                        resource->length() * sizeof(smart_chars[0])));
  }
#endif                      // DEBUG
  int size = this->Size();  // Byte size of the original string.
  // Abort if size does not allow in-place conversion.
  if (size < ExternalString::kUncachedSize) return false;
  // Read-only strings cannot be made external, since that would mutate the
  // string.
160 161
  if (IsReadOnlyHeapObject(*this)) return false;
  Isolate* isolate = GetIsolateFromWritableObject(*this);
162 163
  bool is_internalized = this->IsInternalizedString();
  bool has_pointers = StringShape(*this).IsIndirect();
164

165
  if (has_pointers) {
166 167
    isolate->heap()->NotifyObjectLayoutChange(*this, no_allocation,
                                              InvalidateRecordedSlots::kYes);
168 169 170 171 172 173 174 175
  }
  // Morph the string to an external string by replacing the map and
  // reinitializing the fields.  This won't work if the space the existing
  // string occupies is too small for a regular external string.  Instead, we
  // resort to an uncached external string instead, omitting the field caching
  // the address of the backing store.  When we encounter uncached external
  // strings in generated code, we need to bailout to runtime.
  Map new_map;
176
  ReadOnlyRoots roots(isolate);
177
  if (size < ExternalString::kSizeOfAllExternalStrings) {
178
    if (is_internalized) {
179
      new_map = roots.uncached_external_internalized_string_map();
180
    } else {
181
      new_map = roots.uncached_external_string_map();
182 183
    }
  } else {
184 185
    new_map = is_internalized ? roots.external_internalized_string_map()
                              : roots.external_string_map();
186 187 188 189
  }

  // Byte size of the external String object.
  int new_size = this->SizeFromMap(new_map);
190
  isolate->heap()->CreateFillerObjectAt(
191 192
      this->address() + new_size, size - new_size,
      has_pointers ? ClearRecordedSlots::kYes : ClearRecordedSlots::kNo);
193 194 195 196 197 198

  // We are storing the new map using release store after creating a filler for
  // the left-over space to avoid races with the sweeper thread.
  this->synchronized_set_map(new_map);

  ExternalTwoByteString self = ExternalTwoByteString::cast(*this);
199
  self.SetResource(isolate, resource);
200
  isolate->heap()->RegisterExternalString(*this);
201
  if (is_internalized) self.Hash();  // Force regeneration of the hash value.
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
  return true;
}

bool String::MakeExternal(v8::String::ExternalOneByteStringResource* resource) {
  DisallowHeapAllocation no_allocation;
  // Externalizing twice leaks the external resource, so it's
  // prohibited by the API.
  DCHECK(this->SupportsExternalization());
  DCHECK(resource->IsCacheable());
#ifdef ENABLE_SLOW_DCHECKS
  if (FLAG_enable_slow_asserts) {
    // Assert that the resource and the string are equivalent.
    DCHECK(static_cast<size_t>(this->length()) == resource->length());
    if (this->IsTwoByteRepresentation()) {
      ScopedVector<uint16_t> smart_chars(this->length());
217 218
      String::WriteToFlat(*this, smart_chars.begin(), 0, this->length());
      DCHECK(String::IsOneByte(smart_chars.begin(), this->length()));
219 220
    }
    ScopedVector<char> smart_chars(this->length());
221 222
    String::WriteToFlat(*this, smart_chars.begin(), 0, this->length());
    DCHECK_EQ(0, memcmp(smart_chars.begin(), resource->data(),
223 224 225 226 227 228 229 230
                        resource->length() * sizeof(smart_chars[0])));
  }
#endif                      // DEBUG
  int size = this->Size();  // Byte size of the original string.
  // Abort if size does not allow in-place conversion.
  if (size < ExternalString::kUncachedSize) return false;
  // Read-only strings cannot be made external, since that would mutate the
  // string.
231 232
  if (IsReadOnlyHeapObject(*this)) return false;
  Isolate* isolate = GetIsolateFromWritableObject(*this);
233 234 235 236
  bool is_internalized = this->IsInternalizedString();
  bool has_pointers = StringShape(*this).IsIndirect();

  if (has_pointers) {
237 238
    isolate->heap()->NotifyObjectLayoutChange(*this, no_allocation,
                                              InvalidateRecordedSlots::kYes);
239 240 241 242 243 244 245 246
  }
  // Morph the string to an external string by replacing the map and
  // reinitializing the fields.  This won't work if the space the existing
  // string occupies is too small for a regular external string.  Instead, we
  // resort to an uncached external string instead, omitting the field caching
  // the address of the backing store.  When we encounter uncached external
  // strings in generated code, we need to bailout to runtime.
  Map new_map;
247
  ReadOnlyRoots roots(isolate);
248
  if (size < ExternalString::kSizeOfAllExternalStrings) {
249 250 251 252 253 254 255 256 257 258 259
    new_map = is_internalized
                  ? roots.uncached_external_one_byte_internalized_string_map()
                  : roots.uncached_external_one_byte_string_map();
  } else {
    new_map = is_internalized
                  ? roots.external_one_byte_internalized_string_map()
                  : roots.external_one_byte_string_map();
  }

  // Byte size of the external String object.
  int new_size = this->SizeFromMap(new_map);
260
  isolate->heap()->CreateFillerObjectAt(
261 262
      this->address() + new_size, size - new_size,
      has_pointers ? ClearRecordedSlots::kYes : ClearRecordedSlots::kNo);
263 264 265 266 267 268

  // We are storing the new map using release store after creating a filler for
  // the left-over space to avoid races with the sweeper thread.
  this->synchronized_set_map(new_map);

  ExternalOneByteString self = ExternalOneByteString::cast(*this);
269
  self.SetResource(isolate, resource);
270
  isolate->heap()->RegisterExternalString(*this);
271
  if (is_internalized) self.Hash();  // Force regeneration of the hash value.
272 273 274 275 276
  return true;
}

bool String::SupportsExternalization() {
  if (this->IsThinString()) {
277
    return i::ThinString::cast(*this).actual().SupportsExternalization();
278 279 280
  }

  // RO_SPACE strings cannot be externalized.
281
  if (IsReadOnlyHeapObject(*this)) {
282 283 284 285 286 287 288 289
    return false;
  }

  // Already an external string.
  if (StringShape(*this).IsExternal()) {
    return false;
  }

290 291 292 293 294 295 296
#ifdef V8_COMPRESS_POINTERS
  // Small strings may not be in-place externalizable.
  if (this->Size() < ExternalString::kUncachedSize) return false;
#else
  DCHECK_LE(ExternalString::kUncachedSize, this->Size());
#endif

297
  Isolate* isolate = GetIsolateFromWritableObject(*this);
298 299 300
  return !isolate->heap()->IsInGCPostProcessing();
}

301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
const char* String::PrefixForDebugPrint() const {
  StringShape shape(*this);
  if (IsTwoByteRepresentation()) {
    StringShape shape(*this);
    if (shape.IsInternalized()) {
      return "u#";
    } else if (shape.IsCons()) {
      return "uc\"";
    } else if (shape.IsThin()) {
      return "u>\"";
    } else {
      return "u\"";
    }
  } else {
    StringShape shape(*this);
    if (shape.IsInternalized()) {
      return "#";
    } else if (shape.IsCons()) {
      return "c\"";
    } else if (shape.IsThin()) {
      return ">\"";
    } else {
      return "\"";
    }
325
  }
326 327 328 329 330 331 332 333
  UNREACHABLE();
}

const char* String::SuffixForDebugPrint() const {
  StringShape shape(*this);
  if (shape.IsInternalized()) return "";
  return "\"";
}
334

335
void String::StringShortPrint(StringStream* accumulator) {
336 337 338 339 340
  if (!LooksValid()) {
    accumulator->Add("<Invalid String>");
    return;
  }

341 342 343
  const int len = length();
  accumulator->Add("<String[%u]: ", len);
  accumulator->Add(PrefixForDebugPrint());
344 345

  if (len > kMaxShortPrintLength) {
346 347 348 349
    accumulator->Add("...<truncated>>");
    accumulator->Add(SuffixForDebugPrint());
    accumulator->Put('>');
    return;
350 351
  }

352 353 354
  PrintUC16(accumulator, 0, len);
  accumulator->Add(SuffixForDebugPrint());
  accumulator->Put('>');
355 356 357 358 359 360 361 362 363 364
}

void String::PrintUC16(std::ostream& os, int start, int end) {  // NOLINT
  if (end < 0) end = length();
  StringCharacterStream stream(*this, start);
  for (int i = start; i < end && stream.HasMore(); i++) {
    os << AsUC16(stream.GetNext());
  }
}

365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
void String::PrintUC16(StringStream* accumulator, int start, int end) {
  if (end < 0) end = length();
  StringCharacterStream stream(*this, start);
  for (int i = start; i < end && stream.HasMore(); i++) {
    uint16_t c = stream.GetNext();
    if (c == '\n') {
      accumulator->Add("\\n");
    } else if (c == '\r') {
      accumulator->Add("\\r");
    } else if (c == '\\') {
      accumulator->Add("\\\\");
    } else if (!std::isprint(c)) {
      accumulator->Add("\\x%02x", c);
    } else {
      accumulator->Put(static_cast<char>(c));
    }
  }
}

384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409
// static
Handle<String> String::Trim(Isolate* isolate, Handle<String> string,
                            TrimMode mode) {
  string = String::Flatten(isolate, string);
  int const length = string->length();

  // Perform left trimming if requested.
  int left = 0;
  if (mode == kTrim || mode == kTrimStart) {
    while (left < length && IsWhiteSpaceOrLineTerminator(string->Get(left))) {
      left++;
    }
  }

  // Perform right trimming if requested.
  int right = length;
  if (mode == kTrim || mode == kTrimEnd) {
    while (right > left &&
           IsWhiteSpaceOrLineTerminator(string->Get(right - 1))) {
      right--;
    }
  }

  return isolate->factory()->NewSubString(string, left, right);
}

410 411 412 413 414 415 416 417 418 419
int32_t String::ToArrayIndex(Address addr) {
  DisallowHeapAllocation no_gc;
  String key(addr);

  uint32_t index;
  if (!key.AsArrayIndex(&index)) return -1;
  if (index <= INT_MAX) return index;
  return -1;
}

420 421 422 423
bool String::LooksValid() {
  // TODO(leszeks): Maybe remove this check entirely, Heap::Contains uses
  // basically the same logic as the way we access the heap in the first place.
  // RO_SPACE objects should always be valid.
424
  if (ReadOnlyHeap::Contains(*this)) return true;
425
  BasicMemoryChunk* chunk = BasicMemoryChunk::FromHeapObject(*this);
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
  if (chunk->heap() == nullptr) return false;
  return chunk->heap()->Contains(*this);
}

namespace {

bool AreDigits(const uint8_t* s, int from, int to) {
  for (int i = from; i < to; i++) {
    if (s[i] < '0' || s[i] > '9') return false;
  }

  return true;
}

int ParseDecimalInteger(const uint8_t* s, int from, int to) {
  DCHECK_LT(to - from, 10);  // Overflow is not possible.
  DCHECK(from < to);
  int d = s[from] - '0';

  for (int i = from + 1; i < to; i++) {
    d = 10 * d + (s[i] - '0');
  }

  return d;
}

}  // namespace

// static
Handle<Object> String::ToNumber(Isolate* isolate, Handle<String> subject) {
  // Flatten {subject} string first.
  subject = String::Flatten(isolate, subject);

  // Fast array index case.
  uint32_t index;
  if (subject->AsArrayIndex(&index)) {
    return isolate->factory()->NewNumberFromUint(index);
  }

  // Fast case: short integer or some sorts of junk values.
  if (subject->IsSeqOneByteString()) {
    int len = subject->length();
468
    if (len == 0) return handle(Smi::zero(), isolate);
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522

    DisallowHeapAllocation no_gc;
    uint8_t const* data =
        Handle<SeqOneByteString>::cast(subject)->GetChars(no_gc);
    bool minus = (data[0] == '-');
    int start_pos = (minus ? 1 : 0);

    if (start_pos == len) {
      return isolate->factory()->nan_value();
    } else if (data[start_pos] > '9') {
      // Fast check for a junk value. A valid string may start from a
      // whitespace, a sign ('+' or '-'), the decimal point, a decimal digit
      // or the 'I' character ('Infinity'). All of that have codes not greater
      // than '9' except 'I' and &nbsp;.
      if (data[start_pos] != 'I' && data[start_pos] != 0xA0) {
        return isolate->factory()->nan_value();
      }
    } else if (len - start_pos < 10 && AreDigits(data, start_pos, len)) {
      // The maximal/minimal smi has 10 digits. If the string has less digits
      // we know it will fit into the smi-data type.
      int d = ParseDecimalInteger(data, start_pos, len);
      if (minus) {
        if (d == 0) return isolate->factory()->minus_zero_value();
        d = -d;
      } else if (!subject->HasHashCode() && len <= String::kMaxArrayIndexSize &&
                 (len == 1 || data[0] != '0')) {
        // String hash is not calculated yet but all the data are present.
        // Update the hash field to speed up sequential convertions.
        uint32_t hash = StringHasher::MakeArrayIndexHash(d, len);
#ifdef DEBUG
        subject->Hash();  // Force hash calculation.
        DCHECK_EQ(static_cast<int>(subject->hash_field()),
                  static_cast<int>(hash));
#endif
        subject->set_hash_field(hash);
      }
      return handle(Smi::FromInt(d), isolate);
    }
  }

  // Slower case.
  int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY;
  return isolate->factory()->NewNumber(StringToDouble(isolate, subject, flags));
}

String::FlatContent String::GetFlatContent(
    const DisallowHeapAllocation& no_gc) {
  USE(no_gc);
  int length = this->length();
  StringShape shape(*this);
  String string = *this;
  int offset = 0;
  if (shape.representation_tag() == kConsStringTag) {
    ConsString cons = ConsString::cast(string);
523
    if (cons.second().length() != 0) {
524 525
      return FlatContent();
    }
526
    string = cons.first();
527 528 529
    shape = StringShape(string);
  } else if (shape.representation_tag() == kSlicedStringTag) {
    SlicedString slice = SlicedString::cast(string);
530 531
    offset = slice.offset();
    string = slice.parent();
532 533 534 535 536 537
    shape = StringShape(string);
    DCHECK(shape.representation_tag() != kConsStringTag &&
           shape.representation_tag() != kSlicedStringTag);
  }
  if (shape.representation_tag() == kThinStringTag) {
    ThinString thin = ThinString::cast(string);
538
    string = thin.actual();
539 540 541 542 543 544 545
    shape = StringShape(string);
    DCHECK(!shape.IsCons());
    DCHECK(!shape.IsSliced());
  }
  if (shape.encoding_tag() == kOneByteStringTag) {
    const uint8_t* start;
    if (shape.representation_tag() == kSeqStringTag) {
546
      start = SeqOneByteString::cast(string).GetChars(no_gc);
547
    } else {
548
      start = ExternalOneByteString::cast(string).GetChars();
549 550 551 552 553 554
    }
    return FlatContent(start + offset, length);
  } else {
    DCHECK_EQ(shape.encoding_tag(), kTwoByteStringTag);
    const uc16* start;
    if (shape.representation_tag() == kSeqStringTag) {
555
      start = SeqTwoByteString::cast(string).GetChars(no_gc);
556
    } else {
557
      start = ExternalTwoByteString::cast(string).GetChars();
558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619
    }
    return FlatContent(start + offset, length);
  }
}

std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
                                          RobustnessFlag robust_flag,
                                          int offset, int length,
                                          int* length_return) {
  if (robust_flag == ROBUST_STRING_TRAVERSAL && !LooksValid()) {
    return std::unique_ptr<char[]>();
  }
  // Negative length means the to the end of the string.
  if (length < 0) length = kMaxInt - offset;

  // Compute the size of the UTF-8 string. Start at the specified offset.
  StringCharacterStream stream(*this, offset);
  int character_position = offset;
  int utf8_bytes = 0;
  int last = unibrow::Utf16::kNoPreviousCharacter;
  while (stream.HasMore() && character_position++ < offset + length) {
    uint16_t character = stream.GetNext();
    utf8_bytes += unibrow::Utf8::Length(character, last);
    last = character;
  }

  if (length_return) {
    *length_return = utf8_bytes;
  }

  char* result = NewArray<char>(utf8_bytes + 1);

  // Convert the UTF-16 string to a UTF-8 buffer. Start at the specified offset.
  stream.Reset(*this, offset);
  character_position = offset;
  int utf8_byte_position = 0;
  last = unibrow::Utf16::kNoPreviousCharacter;
  while (stream.HasMore() && character_position++ < offset + length) {
    uint16_t character = stream.GetNext();
    if (allow_nulls == DISALLOW_NULLS && character == 0) {
      character = ' ';
    }
    utf8_byte_position +=
        unibrow::Utf8::Encode(result + utf8_byte_position, character, last);
    last = character;
  }
  result[utf8_byte_position] = 0;
  return std::unique_ptr<char[]>(result);
}

std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
                                          RobustnessFlag robust_flag,
                                          int* length_return) {
  return ToCString(allow_nulls, robust_flag, 0, -1, length_return);
}

template <typename sinkchar>
void String::WriteToFlat(String src, sinkchar* sink, int f, int t) {
  DisallowHeapAllocation no_gc;
  String source = src;
  int from = f;
  int to = t;
620
  while (from < to) {
621
    DCHECK_LE(0, from);
622
    DCHECK_LE(to, source.length());
623 624
    switch (StringShape(source).full_representation_tag()) {
      case kOneByteStringTag | kExternalStringTag: {
625
        CopyChars(sink, ExternalOneByteString::cast(source).GetChars() + from,
626 627 628 629
                  to - from);
        return;
      }
      case kTwoByteStringTag | kExternalStringTag: {
630
        const uc16* data = ExternalTwoByteString::cast(source).GetChars();
631 632 633 634
        CopyChars(sink, data + from, to - from);
        return;
      }
      case kOneByteStringTag | kSeqStringTag: {
635
        CopyChars(sink, SeqOneByteString::cast(source).GetChars(no_gc) + from,
636 637 638 639
                  to - from);
        return;
      }
      case kTwoByteStringTag | kSeqStringTag: {
640
        CopyChars(sink, SeqTwoByteString::cast(source).GetChars(no_gc) + from,
641 642 643 644 645 646
                  to - from);
        return;
      }
      case kOneByteStringTag | kConsStringTag:
      case kTwoByteStringTag | kConsStringTag: {
        ConsString cons_string = ConsString::cast(source);
647 648
        String first = cons_string.first();
        int boundary = first.length();
649 650 651 652
        if (to - boundary >= boundary - from) {
          // Right hand side is longer.  Recurse over left.
          if (from < boundary) {
            WriteToFlat(first, sink, from, boundary);
653
            if (from == 0 && cons_string.second() == first) {
654 655 656 657 658 659 660 661 662
              CopyChars(sink + boundary, sink, boundary);
              return;
            }
            sink += boundary - from;
            from = 0;
          } else {
            from -= boundary;
          }
          to -= boundary;
663
          source = cons_string.second();
664 665 666
        } else {
          // Left hand side is longer.  Recurse over right.
          if (to > boundary) {
667
            String second = cons_string.second();
668 669 670 671
            // When repeatedly appending to a string, we get a cons string that
            // is unbalanced to the left, a list, essentially.  We inline the
            // common case of sequential one-byte right child.
            if (to - boundary == 1) {
672 673
              sink[boundary - from] = static_cast<sinkchar>(second.Get(0));
            } else if (second.IsSeqOneByteString()) {
674
              CopyChars(sink + boundary - from,
675
                        SeqOneByteString::cast(second).GetChars(no_gc),
676 677 678 679 680 681 682 683 684 685 686 687 688
                        to - boundary);
            } else {
              WriteToFlat(second, sink + boundary - from, 0, to - boundary);
            }
            to = boundary;
          }
          source = first;
        }
        break;
      }
      case kOneByteStringTag | kSlicedStringTag:
      case kTwoByteStringTag | kSlicedStringTag: {
        SlicedString slice = SlicedString::cast(source);
689 690
        unsigned offset = slice.offset();
        WriteToFlat(slice.parent(), sink, from + offset, to + offset);
691 692 693 694
        return;
      }
      case kOneByteStringTag | kThinStringTag:
      case kTwoByteStringTag | kThinStringTag:
695
        source = ThinString::cast(source).actual();
696 697 698
        break;
    }
  }
699
  DCHECK_EQ(from, to);
700 701 702
}

template <typename SourceChar>
703
static void CalculateLineEndsImpl(std::vector<int>* line_ends,
704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
                                  Vector<const SourceChar> src,
                                  bool include_ending_line) {
  const int src_len = src.length();
  for (int i = 0; i < src_len - 1; i++) {
    SourceChar current = src[i];
    SourceChar next = src[i + 1];
    if (IsLineTerminatorSequence(current, next)) line_ends->push_back(i);
  }

  if (src_len > 0 && IsLineTerminatorSequence(src[src_len - 1], 0)) {
    line_ends->push_back(src_len - 1);
  }
  if (include_ending_line) {
    // Include one character beyond the end of script. The rewriter uses that
    // position for the implicit return statement.
    line_ends->push_back(src_len);
  }
}

723 724 725 726
template <typename LocalIsolate>
Handle<FixedArray> String::CalculateLineEnds(LocalIsolate* isolate,
                                             Handle<String> src,
                                             bool include_ending_line) {
727 728 729 730 731 732 733 734 735 736 737 738
  src = Flatten(isolate, src);
  // Rough estimate of line count based on a roughly estimated average
  // length of (unpacked) code.
  int line_count_estimate = src->length() >> 4;
  std::vector<int> line_ends;
  line_ends.reserve(line_count_estimate);
  {
    DisallowHeapAllocation no_allocation;  // ensure vectors stay valid.
    // Dispatch on type of strings.
    String::FlatContent content = src->GetFlatContent(no_allocation);
    DCHECK(content.IsFlat());
    if (content.IsOneByte()) {
739
      CalculateLineEndsImpl(&line_ends, content.ToOneByteVector(),
740 741
                            include_ending_line);
    } else {
742
      CalculateLineEndsImpl(&line_ends, content.ToUC16Vector(),
743 744 745 746
                            include_ending_line);
    }
  }
  int line_count = static_cast<int>(line_ends.size());
747
  Handle<FixedArray> array =
748
      isolate->factory()->NewFixedArray(line_count, AllocationType::kOld);
749 750 751 752 753 754
  for (int i = 0; i < line_count; i++) {
    array->set(i, Smi::FromInt(line_ends[i]));
  }
  return array;
}

755 756 757
template Handle<FixedArray> String::CalculateLineEnds(Isolate* isolate,
                                                      Handle<String> src,
                                                      bool include_ending_line);
758 759 760
template Handle<FixedArray> String::CalculateLineEnds(OffThreadIsolate* isolate,
                                                      Handle<String> src,
                                                      bool include_ending_line);
761

762 763 764 765
bool String::SlowEquals(String other) {
  DisallowHeapAllocation no_gc;
  // Fast check: negative check with lengths.
  int len = length();
766
  if (len != other.length()) return false;
767 768 769 770
  if (len == 0) return true;

  // Fast check: if at least one ThinString is involved, dereference it/them
  // and restart.
771 772
  if (this->IsThinString() || other.IsThinString()) {
    if (other.IsThinString()) other = ThinString::cast(other).actual();
773
    if (this->IsThinString()) {
774
      return ThinString::cast(*this).actual().Equals(other);
775 776 777 778 779 780 781
    } else {
      return this->Equals(other);
    }
  }

  // Fast check: if hash code is computed for both strings
  // a fast negative check can be performed.
782
  if (HasHashCode() && other.HasHashCode()) {
783 784
#ifdef ENABLE_SLOW_DCHECKS
    if (FLAG_enable_slow_asserts) {
785
      if (Hash() != other.Hash()) {
786 787
        bool found_difference = false;
        for (int i = 0; i < len; i++) {
788
          if (Get(i) != other.Get(i)) {
789 790 791 792 793 794 795 796
            found_difference = true;
            break;
          }
        }
        DCHECK(found_difference);
      }
    }
#endif
797
    if (Hash() != other.Hash()) return false;
798 799 800 801
  }

  // We know the strings are both non-empty. Compare the first chars
  // before we try to flatten the strings.
802
  if (this->Get(0) != other.Get(0)) return false;
803

804 805 806
  if (IsSeqOneByteString() && other.IsSeqOneByteString()) {
    const uint8_t* str1 = SeqOneByteString::cast(*this).GetChars(no_gc);
    const uint8_t* str2 = SeqOneByteString::cast(other).GetChars(no_gc);
807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824
    return CompareRawStringContents(str1, str2, len);
  }

  StringComparator comparator;
  return comparator.Equals(*this, other);
}

bool String::SlowEquals(Isolate* isolate, Handle<String> one,
                        Handle<String> two) {
  // Fast check: negative check with lengths.
  int one_length = one->length();
  if (one_length != two->length()) return false;
  if (one_length == 0) return true;

  // Fast check: if at least one ThinString is involved, dereference it/them
  // and restart.
  if (one->IsThinString() || two->IsThinString()) {
    if (one->IsThinString())
825
      one = handle(ThinString::cast(*one).actual(), isolate);
826
    if (two->IsThinString())
827
      two = handle(ThinString::cast(*two).actual(), isolate);
828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862
    return String::Equals(isolate, one, two);
  }

  // Fast check: if hash code is computed for both strings
  // a fast negative check can be performed.
  if (one->HasHashCode() && two->HasHashCode()) {
#ifdef ENABLE_SLOW_DCHECKS
    if (FLAG_enable_slow_asserts) {
      if (one->Hash() != two->Hash()) {
        bool found_difference = false;
        for (int i = 0; i < one_length; i++) {
          if (one->Get(i) != two->Get(i)) {
            found_difference = true;
            break;
          }
        }
        DCHECK(found_difference);
      }
    }
#endif
    if (one->Hash() != two->Hash()) return false;
  }

  // We know the strings are both non-empty. Compare the first chars
  // before we try to flatten the strings.
  if (one->Get(0) != two->Get(0)) return false;

  one = String::Flatten(isolate, one);
  two = String::Flatten(isolate, two);

  DisallowHeapAllocation no_gc;
  String::FlatContent flat1 = one->GetFlatContent(no_gc);
  String::FlatContent flat2 = two->GetFlatContent(no_gc);

  if (flat1.IsOneByte() && flat2.IsOneByte()) {
863 864
    return CompareRawStringContents(flat1.ToOneByteVector().begin(),
                                    flat2.ToOneByteVector().begin(),
865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913
                                    one_length);
  } else {
    for (int i = 0; i < one_length; i++) {
      if (flat1.Get(i) != flat2.Get(i)) return false;
    }
    return true;
  }
}

// static
ComparisonResult String::Compare(Isolate* isolate, Handle<String> x,
                                 Handle<String> y) {
  // A few fast case tests before we flatten.
  if (x.is_identical_to(y)) {
    return ComparisonResult::kEqual;
  } else if (y->length() == 0) {
    return x->length() == 0 ? ComparisonResult::kEqual
                            : ComparisonResult::kGreaterThan;
  } else if (x->length() == 0) {
    return ComparisonResult::kLessThan;
  }

  int const d = x->Get(0) - y->Get(0);
  if (d < 0) {
    return ComparisonResult::kLessThan;
  } else if (d > 0) {
    return ComparisonResult::kGreaterThan;
  }

  // Slow case.
  x = String::Flatten(isolate, x);
  y = String::Flatten(isolate, y);

  DisallowHeapAllocation no_gc;
  ComparisonResult result = ComparisonResult::kEqual;
  int prefix_length = x->length();
  if (y->length() < prefix_length) {
    prefix_length = y->length();
    result = ComparisonResult::kGreaterThan;
  } else if (y->length() > prefix_length) {
    result = ComparisonResult::kLessThan;
  }
  int r;
  String::FlatContent x_content = x->GetFlatContent(no_gc);
  String::FlatContent y_content = y->GetFlatContent(no_gc);
  if (x_content.IsOneByte()) {
    Vector<const uint8_t> x_chars = x_content.ToOneByteVector();
    if (y_content.IsOneByte()) {
      Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
914
      r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
915 916
    } else {
      Vector<const uc16> y_chars = y_content.ToUC16Vector();
917
      r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
918 919 920 921 922
    }
  } else {
    Vector<const uc16> x_chars = x_content.ToUC16Vector();
    if (y_content.IsOneByte()) {
      Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
923
      r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
924 925
    } else {
      Vector<const uc16> y_chars = y_content.ToUC16Vector();
926
      r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098
    }
  }
  if (r < 0) {
    result = ComparisonResult::kLessThan;
  } else if (r > 0) {
    result = ComparisonResult::kGreaterThan;
  }
  return result;
}

Object String::IndexOf(Isolate* isolate, Handle<Object> receiver,
                       Handle<Object> search, Handle<Object> position) {
  if (receiver->IsNullOrUndefined(isolate)) {
    THROW_NEW_ERROR_RETURN_FAILURE(
        isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
                              isolate->factory()->NewStringFromAsciiChecked(
                                  "String.prototype.indexOf")));
  }
  Handle<String> receiver_string;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
                                     Object::ToString(isolate, receiver));

  Handle<String> search_string;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
                                     Object::ToString(isolate, search));

  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
                                     Object::ToInteger(isolate, position));

  uint32_t index = receiver_string->ToValidIndex(*position);
  return Smi::FromInt(
      String::IndexOf(isolate, receiver_string, search_string, index));
}

namespace {

template <typename T>
int SearchString(Isolate* isolate, String::FlatContent receiver_content,
                 Vector<T> pat_vector, int start_index) {
  if (receiver_content.IsOneByte()) {
    return SearchString(isolate, receiver_content.ToOneByteVector(), pat_vector,
                        start_index);
  }
  return SearchString(isolate, receiver_content.ToUC16Vector(), pat_vector,
                      start_index);
}

}  // namespace

int String::IndexOf(Isolate* isolate, Handle<String> receiver,
                    Handle<String> search, int start_index) {
  DCHECK_LE(0, start_index);
  DCHECK(start_index <= receiver->length());

  uint32_t search_length = search->length();
  if (search_length == 0) return start_index;

  uint32_t receiver_length = receiver->length();
  if (start_index + search_length > receiver_length) return -1;

  receiver = String::Flatten(isolate, receiver);
  search = String::Flatten(isolate, search);

  DisallowHeapAllocation no_gc;  // ensure vectors stay valid
  // Extract flattened substrings of cons strings before getting encoding.
  String::FlatContent receiver_content = receiver->GetFlatContent(no_gc);
  String::FlatContent search_content = search->GetFlatContent(no_gc);

  // dispatch on type of strings
  if (search_content.IsOneByte()) {
    Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
    return SearchString<const uint8_t>(isolate, receiver_content, pat_vector,
                                       start_index);
  }
  Vector<const uc16> pat_vector = search_content.ToUC16Vector();
  return SearchString<const uc16>(isolate, receiver_content, pat_vector,
                                  start_index);
}

MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
                                            Handle<String> replacement,
                                            int start_index) {
  DCHECK_GE(start_index, 0);

  Factory* factory = isolate->factory();

  const int replacement_length = replacement->length();
  const int captures_length = match->CaptureCount();

  replacement = String::Flatten(isolate, replacement);

  Handle<String> dollar_string =
      factory->LookupSingleCharacterStringFromCode('$');
  int next_dollar_ix =
      String::IndexOf(isolate, replacement, dollar_string, start_index);
  if (next_dollar_ix < 0) {
    return replacement;
  }

  IncrementalStringBuilder builder(isolate);

  if (next_dollar_ix > 0) {
    builder.AppendString(factory->NewSubString(replacement, 0, next_dollar_ix));
  }

  while (true) {
    const int peek_ix = next_dollar_ix + 1;
    if (peek_ix >= replacement_length) {
      builder.AppendCharacter('$');
      return builder.Finish();
    }

    int continue_from_ix = -1;
    const uint16_t peek = replacement->Get(peek_ix);
    switch (peek) {
      case '$':  // $$
        builder.AppendCharacter('$');
        continue_from_ix = peek_ix + 1;
        break;
      case '&':  // $& - match
        builder.AppendString(match->GetMatch());
        continue_from_ix = peek_ix + 1;
        break;
      case '`':  // $` - prefix
        builder.AppendString(match->GetPrefix());
        continue_from_ix = peek_ix + 1;
        break;
      case '\'':  // $' - suffix
        builder.AppendString(match->GetSuffix());
        continue_from_ix = peek_ix + 1;
        break;
      case '0':
      case '1':
      case '2':
      case '3':
      case '4':
      case '5':
      case '6':
      case '7':
      case '8':
      case '9': {
        // Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
        int scaled_index = (peek - '0');
        int advance = 1;

        if (peek_ix + 1 < replacement_length) {
          const uint16_t next_peek = replacement->Get(peek_ix + 1);
          if (next_peek >= '0' && next_peek <= '9') {
            const int new_scaled_index = scaled_index * 10 + (next_peek - '0');
            if (new_scaled_index < captures_length) {
              scaled_index = new_scaled_index;
              advance = 2;
            }
          }
        }

        if (scaled_index == 0 || scaled_index >= captures_length) {
          builder.AppendCharacter('$');
          continue_from_ix = peek_ix;
          break;
        }

        bool capture_exists;
        Handle<String> capture;
        ASSIGN_RETURN_ON_EXCEPTION(
            isolate, capture, match->GetCapture(scaled_index, &capture_exists),
            String);
        if (capture_exists) builder.AppendString(capture);
        continue_from_ix = peek_ix + advance;
        break;
      }
      case '<': {  // $<name> - named capture
1099
        using CaptureState = String::Match::CaptureState;
1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126

        if (!match->HasNamedCaptures()) {
          builder.AppendCharacter('$');
          continue_from_ix = peek_ix;
          break;
        }

        Handle<String> bracket_string =
            factory->LookupSingleCharacterStringFromCode('>');
        const int closing_bracket_ix =
            String::IndexOf(isolate, replacement, bracket_string, peek_ix + 1);

        if (closing_bracket_ix == -1) {
          // No closing bracket was found, treat '$<' as a string literal.
          builder.AppendCharacter('$');
          continue_from_ix = peek_ix;
          break;
        }

        Handle<String> capture_name =
            factory->NewSubString(replacement, peek_ix + 1, closing_bracket_ix);
        Handle<String> capture;
        CaptureState capture_state;
        ASSIGN_RETURN_ON_EXCEPTION(
            isolate, capture,
            match->GetNamedCapture(capture_name, &capture_state), String);

1127 1128
        if (capture_state == CaptureState::MATCHED) {
          builder.AppendString(capture);
1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283
        }

        continue_from_ix = closing_bracket_ix + 1;
        break;
      }
      default:
        builder.AppendCharacter('$');
        continue_from_ix = peek_ix;
        break;
    }

    // Go the the next $ in the replacement.
    // TODO(jgruber): Single-char lookups could be much more efficient.
    DCHECK_NE(continue_from_ix, -1);
    next_dollar_ix =
        String::IndexOf(isolate, replacement, dollar_string, continue_from_ix);

    // Return if there are no more $ characters in the replacement. If we
    // haven't reached the end, we need to append the suffix.
    if (next_dollar_ix < 0) {
      if (continue_from_ix < replacement_length) {
        builder.AppendString(factory->NewSubString(
            replacement, continue_from_ix, replacement_length));
      }
      return builder.Finish();
    }

    // Append substring between the previous and the next $ character.
    if (next_dollar_ix > continue_from_ix) {
      builder.AppendString(
          factory->NewSubString(replacement, continue_from_ix, next_dollar_ix));
    }
  }

  UNREACHABLE();
}

namespace {  // for String.Prototype.lastIndexOf

template <typename schar, typename pchar>
int StringMatchBackwards(Vector<const schar> subject,
                         Vector<const pchar> pattern, int idx) {
  int pattern_length = pattern.length();
  DCHECK_GE(pattern_length, 1);
  DCHECK(idx + pattern_length <= subject.length());

  if (sizeof(schar) == 1 && sizeof(pchar) > 1) {
    for (int i = 0; i < pattern_length; i++) {
      uc16 c = pattern[i];
      if (c > String::kMaxOneByteCharCode) {
        return -1;
      }
    }
  }

  pchar pattern_first_char = pattern[0];
  for (int i = idx; i >= 0; i--) {
    if (subject[i] != pattern_first_char) continue;
    int j = 1;
    while (j < pattern_length) {
      if (pattern[j] != subject[i + j]) {
        break;
      }
      j++;
    }
    if (j == pattern_length) {
      return i;
    }
  }
  return -1;
}

}  // namespace

Object String::LastIndexOf(Isolate* isolate, Handle<Object> receiver,
                           Handle<Object> search, Handle<Object> position) {
  if (receiver->IsNullOrUndefined(isolate)) {
    THROW_NEW_ERROR_RETURN_FAILURE(
        isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
                              isolate->factory()->NewStringFromAsciiChecked(
                                  "String.prototype.lastIndexOf")));
  }
  Handle<String> receiver_string;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
                                     Object::ToString(isolate, receiver));

  Handle<String> search_string;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
                                     Object::ToString(isolate, search));

  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
                                     Object::ToNumber(isolate, position));

  uint32_t start_index;

  if (position->IsNaN()) {
    start_index = receiver_string->length();
  } else {
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
                                       Object::ToInteger(isolate, position));
    start_index = receiver_string->ToValidIndex(*position);
  }

  uint32_t pattern_length = search_string->length();
  uint32_t receiver_length = receiver_string->length();

  if (start_index + pattern_length > receiver_length) {
    start_index = receiver_length - pattern_length;
  }

  if (pattern_length == 0) {
    return Smi::FromInt(start_index);
  }

  receiver_string = String::Flatten(isolate, receiver_string);
  search_string = String::Flatten(isolate, search_string);

  int last_index = -1;
  DisallowHeapAllocation no_gc;  // ensure vectors stay valid

  String::FlatContent receiver_content = receiver_string->GetFlatContent(no_gc);
  String::FlatContent search_content = search_string->GetFlatContent(no_gc);

  if (search_content.IsOneByte()) {
    Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
    if (receiver_content.IsOneByte()) {
      last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
                                        pat_vector, start_index);
    } else {
      last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
                                        pat_vector, start_index);
    }
  } else {
    Vector<const uc16> pat_vector = search_content.ToUC16Vector();
    if (receiver_content.IsOneByte()) {
      last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
                                        pat_vector, start_index);
    } else {
      last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
                                        pat_vector, start_index);
    }
  }
  return Smi::FromInt(last_index);
}

template <>
bool String::IsEqualTo(Vector<const uint8_t> str) {
  return IsOneByteEqualTo(str);
}

template <>
bool String::IsEqualTo(Vector<const uc16> str) {
  return IsTwoByteEqualTo(str);
}

1284 1285 1286 1287 1288 1289
bool String::HasOneBytePrefix(Vector<const char> str) {
  int slen = str.length();
  if (slen > length()) return false;
  DisallowHeapAllocation no_gc;
  FlatContent content = GetFlatContent(no_gc);
  if (content.IsOneByte()) {
1290
    return CompareChars(content.ToOneByteVector().begin(), str.begin(), slen) ==
1291 1292
           0;
  }
1293
  return CompareChars(content.ToUC16Vector().begin(), str.begin(), slen) == 0;
1294 1295
}

1296 1297 1298 1299 1300 1301
bool String::IsOneByteEqualTo(Vector<const uint8_t> str) {
  int slen = length();
  if (str.length() != slen) return false;
  DisallowHeapAllocation no_gc;
  FlatContent content = GetFlatContent(no_gc);
  if (content.IsOneByte()) {
1302
    return CompareChars(content.ToOneByteVector().begin(), str.begin(), slen) ==
1303 1304
           0;
  }
1305
  return CompareChars(content.ToUC16Vector().begin(), str.begin(), slen) == 0;
1306 1307 1308 1309 1310 1311 1312 1313
}

bool String::IsTwoByteEqualTo(Vector<const uc16> str) {
  int slen = length();
  if (str.length() != slen) return false;
  DisallowHeapAllocation no_gc;
  FlatContent content = GetFlatContent(no_gc);
  if (content.IsOneByte()) {
1314
    return CompareChars(content.ToOneByteVector().begin(), str.begin(), slen) ==
1315 1316
           0;
  }
1317
  return CompareChars(content.ToUC16Vector().begin(), str.begin(), slen) == 0;
1318 1319
}

1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347
namespace {

template <typename Char>
uint32_t HashString(String string, size_t start, int length, uint64_t seed) {
  DisallowHeapAllocation no_gc;

  if (length > String::kMaxHashCalcLength) {
    return StringHasher::GetTrivialHash(length);
  }

  std::unique_ptr<Char[]> buffer;
  const Char* chars;

  if (string.IsConsString()) {
    DCHECK_EQ(0, start);
    DCHECK(!string.IsFlat());
    buffer.reset(new Char[length]);
    String::WriteToFlat(string, buffer.get(), 0, length);
    chars = buffer.get();
  } else {
    chars = string.GetChars<Char>(no_gc) + start;
  }

  return StringHasher::HashSequentialString<Char>(chars, length, seed);
}

}  // namespace

1348
uint32_t String::ComputeAndSetHash() {
1349 1350 1351 1352 1353
  DisallowHeapAllocation no_gc;
  // Should only be called if hash code has not yet been computed.
  DCHECK(!HasHashCode());

  // Store the hash code in the object.
1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366
  uint64_t seed = HashSeed(GetReadOnlyRoots());
  size_t start = 0;
  String string = *this;
  if (string.IsSlicedString()) {
    SlicedString sliced = SlicedString::cast(string);
    start = sliced.offset();
    string = sliced.parent();
  }
  if (string.IsConsString() && string.IsFlat()) {
    string = ConsString::cast(string).first();
  }
  if (string.IsThinString()) {
    string = ThinString::cast(string).actual();
1367
    if (length() == string.length()) {
1368 1369 1370 1371 1372 1373 1374
      set_hash_field(string.hash_field());
      return hash_field() >> kHashShift;
    }
  }
  uint32_t field = string.IsOneByteRepresentation()
                       ? HashString<uint8_t>(string, start, length(), seed)
                       : HashString<uint16_t>(string, start, length(), seed);
1375 1376 1377 1378 1379 1380 1381 1382 1383
  set_hash_field(field);

  // Check the hash code is there.
  DCHECK(HasHashCode());
  uint32_t result = field >> kHashShift;
  DCHECK_NE(result, 0);  // Ensure that the hash value of 0 is never computed.
  return result;
}

1384 1385
bool String::SlowAsArrayIndex(uint32_t* index) {
  DisallowHeapAllocation no_gc;
1386
  int length = this->length();
1387 1388 1389
  if (length <= kMaxCachedArrayIndexLength) {
    Hash();  // Force computation of hash code.
    uint32_t field = hash_field();
1390
    if ((field & kIsNotIntegerIndexMask) != 0) return false;
1391 1392 1393
    *index = ArrayIndexValueBits::decode(field);
    return true;
  }
1394 1395
  if (length == 0 || length > kMaxArrayIndexSize) return false;
  StringCharacterStream stream(*this);
1396
  return StringToIndex(&stream, index);
1397 1398
}

1399
bool String::SlowAsIntegerIndex(size_t* index) {
1400
  DisallowHeapAllocation no_gc;
1401 1402 1403
  int length = this->length();
  if (length <= kMaxCachedArrayIndexLength) {
    Hash();  // Force computation of hash code.
1404
    uint32_t field = hash_field();
1405
    if ((field & kIsNotIntegerIndexMask) != 0) return false;
1406 1407 1408
    *index = ArrayIndexValueBits::decode(field);
    return true;
  }
1409 1410
  if (length == 0 || length > kMaxIntegerIndexSize) return false;
  StringCharacterStream stream(*this);
1411 1412
  return StringToIndex<StringCharacterStream, size_t, kToIntegerIndex>(&stream,
                                                                       index);
1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440
}

void String::PrintOn(FILE* file) {
  int length = this->length();
  for (int i = 0; i < length; i++) {
    PrintF(file, "%c", Get(i));
  }
}

Handle<String> SeqString::Truncate(Handle<SeqString> string, int new_length) {
  if (new_length == 0) return string->GetReadOnlyRoots().empty_string_handle();

  int new_size, old_size;
  int old_length = string->length();
  if (old_length <= new_length) return string;

  if (string->IsSeqOneByteString()) {
    old_size = SeqOneByteString::SizeFor(old_length);
    new_size = SeqOneByteString::SizeFor(new_length);
  } else {
    DCHECK(string->IsSeqTwoByteString());
    old_size = SeqTwoByteString::SizeFor(old_length);
    new_size = SeqTwoByteString::SizeFor(new_length);
  }

  int delta = old_size - new_size;

  Address start_of_string = string->address();
1441 1442
  DCHECK(IsAligned(start_of_string, kObjectAlignment));
  DCHECK(IsAligned(start_of_string + new_size, kObjectAlignment));
1443 1444 1445 1446

  Heap* heap = Heap::FromWritableHeapObject(*string);
  // Sizes are pointer size aligned, so that we can use filler objects
  // that are a multiple of pointer size.
1447 1448
  heap->CreateFillerObjectAt(start_of_string + new_size, delta,
                             ClearRecordedSlots::kNo);
1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467
  // We are storing the new length using release store after creating a filler
  // for the left-over space to avoid races with the sweeper thread.
  string->synchronized_set_length(new_length);

  return string;
}

void SeqOneByteString::clear_padding() {
  int data_size = SeqString::kHeaderSize + length() * kOneByteSize;
  memset(reinterpret_cast<void*>(address() + data_size), 0,
         SizeFor(length()) - data_size);
}

void SeqTwoByteString::clear_padding() {
  int data_size = SeqString::kHeaderSize + length() * kUC16Size;
  memset(reinterpret_cast<void*>(address() + data_size), 0,
         SizeFor(length()) - data_size);
}

1468
uint16_t ConsString::Get(int index) {
1469 1470 1471
  DCHECK(index >= 0 && index < this->length());

  // Check for a flattened cons string
1472
  if (second().length() == 0) {
1473
    String left = first();
1474
    return left.Get(index);
1475 1476 1477 1478 1479 1480 1481
  }

  String string = String::cast(*this);

  while (true) {
    if (StringShape(string).IsCons()) {
      ConsString cons_string = ConsString::cast(string);
1482 1483
      String left = cons_string.first();
      if (left.length() > index) {
1484 1485
        string = left;
      } else {
1486 1487
        index -= left.length();
        string = cons_string.second();
1488 1489
      }
    } else {
1490
      return string.Get(index);
1491 1492 1493 1494 1495 1496
    }
  }

  UNREACHABLE();
}

1497
uint16_t ThinString::Get(int index) { return actual().Get(index); }
1498

1499
uint16_t SlicedString::Get(int index) { return parent().Get(offset() + index); }
1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515

int ExternalString::ExternalPayloadSize() const {
  int length_multiplier = IsTwoByteRepresentation() ? i::kShortSize : kCharSize;
  return length() * length_multiplier;
}

FlatStringReader::FlatStringReader(Isolate* isolate, Handle<String> str)
    : Relocatable(isolate), str_(str.location()), length_(str->length()) {
  PostGarbageCollection();
}

FlatStringReader::FlatStringReader(Isolate* isolate, Vector<const char> input)
    : Relocatable(isolate),
      str_(nullptr),
      is_one_byte_(true),
      length_(input.length()),
1516
      start_(input.begin()) {}
1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527

void FlatStringReader::PostGarbageCollection() {
  if (str_ == nullptr) return;
  Handle<String> str(str_);
  DCHECK(str->IsFlat());
  DisallowHeapAllocation no_gc;
  // This does not actually prevent the vector from being relocated later.
  String::FlatContent content = str->GetFlatContent(no_gc);
  DCHECK(content.IsFlat());
  is_one_byte_ = content.IsOneByte();
  if (is_one_byte_) {
1528
    start_ = content.ToOneByteVector().begin();
1529
  } else {
1530
    start_ = content.ToUC16Vector().begin();
1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570
  }
}

void ConsStringIterator::Initialize(ConsString cons_string, int offset) {
  DCHECK(!cons_string.is_null());
  root_ = cons_string;
  consumed_ = offset;
  // Force stack blown condition to trigger restart.
  depth_ = 1;
  maximum_depth_ = kStackSize + depth_;
  DCHECK(StackBlown());
}

String ConsStringIterator::Continue(int* offset_out) {
  DCHECK_NE(depth_, 0);
  DCHECK_EQ(0, *offset_out);
  bool blew_stack = StackBlown();
  String string;
  // Get the next leaf if there is one.
  if (!blew_stack) string = NextLeaf(&blew_stack);
  // Restart search from root.
  if (blew_stack) {
    DCHECK(string.is_null());
    string = Search(offset_out);
  }
  // Ensure future calls return null immediately.
  if (string.is_null()) Reset(ConsString());
  return string;
}

String ConsStringIterator::Search(int* offset_out) {
  ConsString cons_string = root_;
  // Reset the stack, pushing the root string.
  depth_ = 1;
  maximum_depth_ = 1;
  frames_[0] = cons_string;
  const int consumed = consumed_;
  int offset = 0;
  while (true) {
    // Loop until the string is found which contains the target offset.
1571 1572
    String string = cons_string.first();
    int length = string.length();
1573 1574 1575 1576
    int32_t type;
    if (consumed < offset + length) {
      // Target offset is in the left branch.
      // Keep going if we're still in a ConString.
1577
      type = string.map().instance_type();
1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589
      if ((type & kStringRepresentationMask) == kConsStringTag) {
        cons_string = ConsString::cast(string);
        PushLeft(cons_string);
        continue;
      }
      // Tell the stack we're done descending.
      AdjustMaximumDepth();
    } else {
      // Descend right.
      // Update progress through the string.
      offset += length;
      // Keep going if we're still in a ConString.
1590 1591
      string = cons_string.second();
      type = string.map().instance_type();
1592 1593 1594 1595 1596 1597
      if ((type & kStringRepresentationMask) == kConsStringTag) {
        cons_string = ConsString::cast(string);
        PushRight(cons_string);
        continue;
      }
      // Need this to be updated for the current string.
1598
      length = string.length();
1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633
      // Account for the possibility of an empty right leaf.
      // This happens only if we have asked for an offset outside the string.
      if (length == 0) {
        // Reset so future operations will return null immediately.
        Reset(ConsString());
        return String();
      }
      // Tell the stack we're done descending.
      AdjustMaximumDepth();
      // Pop stack so next iteration is in correct place.
      Pop();
    }
    DCHECK_NE(length, 0);
    // Adjust return values and exit.
    consumed_ = offset + length;
    *offset_out = consumed - offset;
    return string;
  }
  UNREACHABLE();
}

String ConsStringIterator::NextLeaf(bool* blew_stack) {
  while (true) {
    // Tree traversal complete.
    if (depth_ == 0) {
      *blew_stack = false;
      return String();
    }
    // We've lost track of higher nodes.
    if (StackBlown()) {
      *blew_stack = true;
      return String();
    }
    // Go right.
    ConsString cons_string = frames_[OffsetForDepth(depth_ - 1)];
1634 1635
    String string = cons_string.second();
    int32_t type = string.map().instance_type();
1636 1637 1638
    if ((type & kStringRepresentationMask) != kConsStringTag) {
      // Pop stack so next iteration is in correct place.
      Pop();
1639
      int length = string.length();
1640 1641 1642 1643 1644 1645 1646 1647 1648 1649
      // Could be a flattened ConsString.
      if (length == 0) continue;
      consumed_ += length;
      return string;
    }
    cons_string = ConsString::cast(string);
    PushRight(cons_string);
    // Need to traverse all the way left.
    while (true) {
      // Continue left.
1650 1651
      string = cons_string.first();
      type = string.map().instance_type();
1652 1653
      if ((type & kStringRepresentationMask) != kConsStringTag) {
        AdjustMaximumDepth();
1654
        int length = string.length();
1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665
        if (length == 0) break;  // Skip empty left-hand sides of ConsStrings.
        consumed_ += length;
        return string;
      }
      cons_string = ConsString::cast(string);
      PushLeft(cons_string);
    }
  }
  UNREACHABLE();
}

1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696
const byte* String::AddressOfCharacterAt(int start_index,
                                         const DisallowHeapAllocation& no_gc) {
  DCHECK(IsFlat());
  String subject = *this;
  if (subject.IsConsString()) {
    subject = ConsString::cast(subject).first();
  } else if (subject.IsSlicedString()) {
    start_index += SlicedString::cast(subject).offset();
    subject = SlicedString::cast(subject).parent();
  }
  if (subject.IsThinString()) {
    subject = ThinString::cast(subject).actual();
  }
  CHECK_LE(0, start_index);
  CHECK_LE(start_index, subject.length());
  if (subject.IsSeqOneByteString()) {
    return reinterpret_cast<const byte*>(
        SeqOneByteString::cast(subject).GetChars(no_gc) + start_index);
  } else if (subject.IsSeqTwoByteString()) {
    return reinterpret_cast<const byte*>(
        SeqTwoByteString::cast(subject).GetChars(no_gc) + start_index);
  } else if (subject.IsExternalOneByteString()) {
    return reinterpret_cast<const byte*>(
        ExternalOneByteString::cast(subject).GetChars() + start_index);
  } else {
    DCHECK(subject.IsExternalTwoByteString());
    return reinterpret_cast<const byte*>(
        ExternalTwoByteString::cast(subject).GetChars() + start_index);
  }
}

1697 1698 1699
template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::WriteToFlat(
    String source, uint16_t* sink, int from, int to);

1700 1701
}  // namespace internal
}  // namespace v8