test-strings.cc 72.6 KB
Newer Older
1
// Copyright 2012 the V8 project authors. All rights reserved.
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
//       copyright notice, this list of conditions and the following
//       disclaimer in the documentation and/or other materials provided
//       with the distribution.
//     * Neither the name of Google Inc. nor the names of its
//       contributors may be used to endorse or promote products derived
//       from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 28

// Check that we can traverse very deep stacks of ConsStrings using
29
// StringCharacterStram.  Check that Get(int) works on very deep stacks
30 31 32 33 34
// of ConsStrings.  These operations may not be very fast, but they
// should be possible without getting errors due to too deep recursion.

#include <stdlib.h>

35
#include "src/api/api-inl.h"
36
#include "src/base/platform/elapsed-timer.h"
37
#include "src/execution/messages.h"
38
#include "src/heap/factory.h"
39
#include "src/heap/heap-inl.h"
40
#include "src/init/v8.h"
41
#include "src/objects/objects-inl.h"
42
#include "src/strings/unicode-decoder.h"
43
#include "test/cctest/cctest.h"
44
#include "test/cctest/heap/heap-utils.h"
45

46
// Adapted from http://en.wikipedia.org/wiki/Multiply-with-carry
47
class MyRandomNumberGenerator {
48
 public:
49
  MyRandomNumberGenerator() { init(); }
50

51 52
  void init(uint32_t seed = 0x5688C73E) {
    static const uint32_t phi = 0x9E3779B9;
53
    c = 362436;
54
    i = kQSize - 1;
55 56 57 58 59 60 61 62 63 64
    Q[0] = seed;
    Q[1] = seed + phi;
    Q[2] = seed + phi + phi;
    for (unsigned j = 3; j < kQSize; j++) {
      Q[j] = Q[j - 3] ^ Q[j - 2] ^ phi ^ j;
    }
  }

  uint32_t next() {
    uint64_t a = 18782;
65
    uint32_t r = 0xFFFFFFFE;
66
    i = (i + 1) & (kQSize - 1);
67 68 69 70 71 72 73 74 75 76
    uint64_t t = a * Q[i] + c;
    c = (t >> 32);
    uint32_t x = static_cast<uint32_t>(t + c);
    if (x < c) {
      x++;
      c++;
    }
    return (Q[i] = r - x);
  }

77
  uint32_t next(int max) { return next() % max; }
78 79

  bool next(double threshold) {
80
    CHECK(threshold >= 0.0 && threshold <= 1.0);
81 82 83
    if (threshold == 1.0) return true;
    if (threshold == 0.0) return false;
    uint32_t value = next() % 100000;
84
    return threshold > static_cast<double>(value) / 100000.0;
85 86 87 88 89 90 91 92 93
  }

 private:
  static const uint32_t kQSize = 4096;
  uint32_t Q[kQSize];
  uint32_t c;
  uint32_t i;
};

94 95
namespace v8 {
namespace internal {
96
namespace test_strings {
97 98 99 100

static const int DEEP_DEPTH = 8 * 1024;
static const int SUPER_DEEP_DEPTH = 80 * 1024;

101
class Resource : public v8::String::ExternalStringResource {
102
 public:
103
  Resource(const uc16* data, size_t length) : data_(data), length_(length) {}
104 105 106
  ~Resource() override { i::DeleteArray(data_); }
  const uint16_t* data() const override { return data_; }
  size_t length() const override { return length_; }
107 108 109 110 111 112

 private:
  const uc16* data_;
  size_t length_;
};

113
class OneByteResource : public v8::String::ExternalOneByteStringResource {
114
 public:
115
  OneByteResource(const char* data, size_t length)
116
      : data_(data), length_(length) {}
117 118 119
  ~OneByteResource() override { i::DeleteArray(data_); }
  const char* data() const override { return data_; }
  size_t length() const override { return length_; }
120 121 122 123 124 125

 private:
  const char* data_;
  size_t length_;
};

126
static void InitializeBuildingBlocks(Handle<String>* building_blocks,
127
                                     int bb_length, bool long_blocks,
128
                                     MyRandomNumberGenerator* rng) {
129 130
  // A list of pointers that we don't have any interest in cleaning up.
  // If they are reachable from a root then leak detection won't complain.
131
  Isolate* isolate = CcTest::i_isolate();
132
  Factory* factory = isolate->factory();
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
  for (int i = 0; i < bb_length; i++) {
    int len = rng->next(16);
    int slice_head_chars = 0;
    int slice_tail_chars = 0;
    int slice_depth = 0;
    for (int j = 0; j < 3; j++) {
      if (rng->next(0.35)) slice_depth++;
    }
    // Must truncate something for a slice string. Loop until
    // at least one end will be sliced.
    while (slice_head_chars == 0 && slice_tail_chars == 0) {
      slice_head_chars = rng->next(15);
      slice_tail_chars = rng->next(12);
    }
    if (long_blocks) {
      // Generate building blocks which will never be merged
      len += ConsString::kMinLength + 1;
    } else if (len > 14) {
151 152
      len += 1234;
    }
153 154
    // Don't slice 0 length strings.
    if (len == 0) slice_depth = 0;
155
    int slice_length = slice_depth * (slice_head_chars + slice_tail_chars);
156 157
    len += slice_length;
    switch (rng->next(4)) {
158 159 160
      case 0: {
        uc16 buf[2000];
        for (int j = 0; j < len; j++) {
161
          buf[j] = rng->next(0x10000);
162
        }
163 164 165
        building_blocks[i] =
            factory->NewStringFromTwoByte(Vector<const uc16>(buf, len))
                .ToHandleChecked();
166
        for (int j = 0; j < len; j++) {
167
          CHECK_EQ(buf[j], building_blocks[i]->Get(j));
168 169 170 171 172 173
        }
        break;
      }
      case 1: {
        char buf[2000];
        for (int j = 0; j < len; j++) {
174
          buf[j] = rng->next(0x80);
175
        }
176 177 178
        building_blocks[i] =
            factory->NewStringFromOneByte(OneByteVector(buf, len))
                .ToHandleChecked();
179
        for (int j = 0; j < len; j++) {
180
          CHECK_EQ(buf[j], building_blocks[i]->Get(j));
181 182 183 184
        }
        break;
      }
      case 2: {
185
        uc16* buf = NewArray<uc16>(len);
186
        for (int j = 0; j < len; j++) {
187
          buf[j] = rng->next(0x10000);
188
        }
189
        Resource* resource = new Resource(buf, len);
190 191 192
        building_blocks[i] = v8::Utils::OpenHandle(
            *v8::String::NewExternalTwoByte(CcTest::isolate(), resource)
                 .ToLocalChecked());
193
        for (int j = 0; j < len; j++) {
194
          CHECK_EQ(buf[j], building_blocks[i]->Get(j));
195 196 197 198
        }
        break;
      }
      case 3: {
199
        char* buf = NewArray<char>(len);
200
        for (int j = 0; j < len; j++) {
201
          buf[j] = rng->next(0x80);
202
        }
203
        OneByteResource* resource = new OneByteResource(buf, len);
204 205 206
        building_blocks[i] = v8::Utils::OpenHandle(
            *v8::String::NewExternalOneByte(CcTest::isolate(), resource)
                 .ToLocalChecked());
207
        for (int j = 0; j < len; j++) {
208
          CHECK_EQ(buf[j], building_blocks[i]->Get(j));
209 210 211 212
        }
        break;
      }
    }
213
    for (int j = slice_depth; j > 0; j--) {
214
      building_blocks[i] = factory->NewSubString(
215
          building_blocks[i], slice_head_chars,
216 217 218
          building_blocks[i]->length() - slice_tail_chars);
    }
    CHECK(len == building_blocks[i]->length() + slice_length);
219 220 221
  }
}

222 223
class ConsStringStats {
 public:
224
  ConsStringStats() { Reset(); }
225 226
  ConsStringStats(const ConsStringStats&) = delete;
  ConsStringStats& operator=(const ConsStringStats&) = delete;
227 228
  void Reset();
  void VerifyEqual(const ConsStringStats& that) const;
229 230 231 232 233
  int leaves_;
  int empty_leaves_;
  int chars_;
  int left_traversals_;
  int right_traversals_;
234

235 236 237 238 239 240 241 242 243 244 245 246
 private:
};

void ConsStringStats::Reset() {
  leaves_ = 0;
  empty_leaves_ = 0;
  chars_ = 0;
  left_traversals_ = 0;
  right_traversals_ = 0;
}

void ConsStringStats::VerifyEqual(const ConsStringStats& that) const {
247 248 249 250 251
  CHECK_EQ(this->leaves_, that.leaves_);
  CHECK_EQ(this->empty_leaves_, that.empty_leaves_);
  CHECK_EQ(this->chars_, that.chars_);
  CHECK_EQ(this->left_traversals_, that.left_traversals_);
  CHECK_EQ(this->right_traversals_, that.right_traversals_);
252 253 254 255
}

class ConsStringGenerationData {
 public:
256
  static const int kNumberOfBuildingBlocks = 256;
257
  explicit ConsStringGenerationData(bool long_blocks);
258 259
  ConsStringGenerationData(const ConsStringGenerationData&) = delete;
  ConsStringGenerationData& operator=(const ConsStringGenerationData&) = delete;
260
  void Reset();
261 262
  inline Handle<String> block(int offset);
  inline Handle<String> block(uint32_t offset);
263 264 265 266 267
  // Input variables.
  double early_termination_threshold_;
  double leftness_;
  double rightness_;
  double empty_leaf_threshold_;
268
  int max_leaves_;
269
  // Cached data.
270
  Handle<String> building_blocks_[kNumberOfBuildingBlocks];
271
  String empty_string_;
272
  MyRandomNumberGenerator rng_;
273 274
  // Stats.
  ConsStringStats stats_;
275
  int early_terminations_;
276 277
};

278
ConsStringGenerationData::ConsStringGenerationData(bool long_blocks) {
279
  rng_.init();
280 281
  InitializeBuildingBlocks(building_blocks_, kNumberOfBuildingBlocks,
                           long_blocks, &rng_);
282
  empty_string_ = ReadOnlyRoots(CcTest::heap()).empty_string();
283 284 285
  Reset();
}

286
Handle<String> ConsStringGenerationData::block(uint32_t offset) {
287
  return building_blocks_[offset % kNumberOfBuildingBlocks];
288 289 290 291 292 293 294
}

Handle<String> ConsStringGenerationData::block(int offset) {
  CHECK_GE(offset, 0);
  return building_blocks_[offset % kNumberOfBuildingBlocks];
}

295 296 297 298 299 300 301 302
void ConsStringGenerationData::Reset() {
  early_termination_threshold_ = 0.01;
  leftness_ = 0.75;
  rightness_ = 0.75;
  empty_leaf_threshold_ = 0.02;
  max_leaves_ = 1000;
  stats_.Reset();
  early_terminations_ = 0;
303
  rng_.init();
304 305
}

306
void AccumulateStats(ConsString cons_string, ConsStringStats* stats) {
307 308 309
  int left_length = cons_string.first().length();
  int right_length = cons_string.second().length();
  CHECK(cons_string.length() == left_length + right_length);
310
  // Check left side.
311
  bool left_is_cons = cons_string.first().IsConsString();
312
  if (left_is_cons) {
313
    stats->left_traversals_++;
314
    AccumulateStats(ConsString::cast(cons_string.first()), stats);
315 316 317 318 319 320
  } else {
    CHECK_NE(left_length, 0);
    stats->leaves_++;
    stats->chars_ += left_length;
  }
  // Check right side.
321
  if (cons_string.second().IsConsString()) {
322
    stats->right_traversals_++;
323
    AccumulateStats(ConsString::cast(cons_string.second()), stats);
324
  } else {
325 326 327 328
    if (right_length == 0) {
      stats->empty_leaves_++;
      CHECK(!left_is_cons);
    }
329 330 331 332 333
    stats->leaves_++;
    stats->chars_ += right_length;
  }
}

334
void AccumulateStats(Handle<String> cons_string, ConsStringStats* stats) {
335
  DisallowGarbageCollection no_gc;
336 337 338 339 340 341 342
  if (cons_string->IsConsString()) {
    return AccumulateStats(ConsString::cast(*cons_string), stats);
  }
  // This string got flattened by gc.
  stats->chars_ += cons_string->length();
}

343 344
void AccumulateStatsWithOperator(ConsString cons_string,
                                 ConsStringStats* stats) {
345
  ConsStringIterator iter(cons_string);
346
  int offset;
347 348
  for (String string = iter.Next(&offset); !string.is_null();
       string = iter.Next(&offset)) {
349
    // Accumulate stats.
350
    CHECK_EQ(0, offset);
351
    stats->leaves_++;
352
    stats->chars_ += string.length();
353
  }
354 355 356 357 358
}

void VerifyConsString(Handle<String> root, ConsStringGenerationData* data) {
  // Verify basic data.
  CHECK(root->IsConsString());
359
  CHECK_EQ(root->length(), data->stats_.chars_);
360 361
  // Recursive verify.
  ConsStringStats stats;
362
  AccumulateStats(ConsString::cast(*root), &stats);
363 364 365
  stats.VerifyEqual(data->stats_);
  // Iteratively verify.
  stats.Reset();
366
  AccumulateStatsWithOperator(ConsString::cast(*root), &stats);
367 368 369 370 371 372 373 374 375 376 377
  // Don't see these. Must copy over.
  stats.empty_leaves_ = data->stats_.empty_leaves_;
  stats.left_traversals_ = data->stats_.left_traversals_;
  stats.right_traversals_ = data->stats_.right_traversals_;
  // Adjust total leaves to compensate.
  stats.leaves_ += stats.empty_leaves_;
  stats.VerifyEqual(data->stats_);
}

static Handle<String> ConstructRandomString(ConsStringGenerationData* data,
                                            unsigned max_recursion) {
378 379
  Isolate* isolate = CcTest::i_isolate();
  Factory* factory = isolate->factory();
380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
  // Compute termination characteristics.
  bool terminate = false;
  bool flat = data->rng_.next(data->empty_leaf_threshold_);
  bool terminate_early = data->rng_.next(data->early_termination_threshold_);
  if (terminate_early) data->early_terminations_++;
  // The obvious condition.
  terminate |= max_recursion == 0;
  // Flat cons string terminate by definition.
  terminate |= flat;
  // Cap for max leaves.
  terminate |= data->stats_.leaves_ >= data->max_leaves_;
  // Roll the dice.
  terminate |= terminate_early;
  // Compute termination characteristics for each side.
  bool terminate_left = terminate || !data->rng_.next(data->leftness_);
  bool terminate_right = terminate || !data->rng_.next(data->rightness_);
  // Generate left string.
  Handle<String> left;
  if (terminate_left) {
399
    left = data->block(data->rng_.next());
400 401 402 403 404 405 406 407
    data->stats_.leaves_++;
    data->stats_.chars_ += left->length();
  } else {
    data->stats_.left_traversals_++;
  }
  // Generate right string.
  Handle<String> right;
  if (terminate_right) {
408
    right = data->block(data->rng_.next());
409 410 411 412 413
    data->stats_.leaves_++;
    data->stats_.chars_ += right->length();
  } else {
    data->stats_.right_traversals_++;
  }
414 415 416 417 418 419 420 421 422 423 424
  // Generate the necessary sub-nodes recursively.
  if (!terminate_right) {
    // Need to balance generation fairly.
    if (!terminate_left && data->rng_.next(0.5)) {
      left = ConstructRandomString(data, max_recursion - 1);
    }
    right = ConstructRandomString(data, max_recursion - 1);
  }
  if (!terminate_left && left.is_null()) {
    left = ConstructRandomString(data, max_recursion - 1);
  }
425
  // Build the cons string.
426
  Handle<String> root = factory->NewConsString(left, right).ToHandleChecked();
427 428 429 430
  CHECK(root->IsConsString() && !root->IsFlat());
  // Special work needed for flat string.
  if (flat) {
    data->stats_.empty_leaves_++;
431
    String::Flatten(isolate, root);
432 433 434 435 436
    CHECK(root->IsConsString() && root->IsFlat());
  }
  return root;
}

437
static Handle<String> ConstructLeft(ConsStringGenerationData* data, int depth) {
438
  Factory* factory = CcTest::i_isolate()->factory();
439
  Handle<String> answer = factory->NewStringFromStaticChars("");
440 441 442
  data->stats_.leaves_++;
  for (int i = 0; i < depth; i++) {
    Handle<String> block = data->block(i);
443 444
    Handle<String> next =
        factory->NewConsString(answer, block).ToHandleChecked();
445 446 447 448 449 450 451
    if (next->IsConsString()) data->stats_.leaves_++;
    data->stats_.chars_ += block->length();
    answer = next;
  }
  data->stats_.left_traversals_ = data->stats_.leaves_ - 2;
  return answer;
}
452

453 454
static Handle<String> ConstructRight(ConsStringGenerationData* data,
                                     int depth) {
455
  Factory* factory = CcTest::i_isolate()->factory();
456
  Handle<String> answer = factory->NewStringFromStaticChars("");
457 458 459
  data->stats_.leaves_++;
  for (int i = depth - 1; i >= 0; i--) {
    Handle<String> block = data->block(i);
460 461
    Handle<String> next =
        factory->NewConsString(block, answer).ToHandleChecked();
462 463 464
    if (next->IsConsString()) data->stats_.leaves_++;
    data->stats_.chars_ += block->length();
    answer = next;
465
  }
466 467 468 469
  data->stats_.right_traversals_ = data->stats_.leaves_ - 2;
  return answer;
}

470 471
static Handle<String> ConstructBalancedHelper(ConsStringGenerationData* data,
                                              int from, int to) {
472
  Factory* factory = CcTest::i_isolate()->factory();
473 474 475 476 477 478 479
  CHECK(to > from);
  if (to - from == 1) {
    data->stats_.chars_ += data->block(from)->length();
    return data->block(from);
  }
  if (to - from == 2) {
    data->stats_.chars_ += data->block(from)->length();
480 481
    data->stats_.chars_ += data->block(from + 1)->length();
    return factory->NewConsString(data->block(from), data->block(from + 1))
482
        .ToHandleChecked();
483 484
  }
  Handle<String> part1 =
485
      ConstructBalancedHelper(data, from, from + ((to - from) / 2));
486
  Handle<String> part2 =
487
      ConstructBalancedHelper(data, from + ((to - from) / 2), to);
488 489
  if (part1->IsConsString()) data->stats_.left_traversals_++;
  if (part2->IsConsString()) data->stats_.right_traversals_++;
490
  return factory->NewConsString(part1, part2).ToHandleChecked();
491 492
}

493 494
static Handle<String> ConstructBalanced(ConsStringGenerationData* data,
                                        int depth = DEEP_DEPTH) {
495 496 497 498 499 500 501 502
  Handle<String> string = ConstructBalancedHelper(data, 0, depth);
  data->stats_.leaves_ =
      data->stats_.left_traversals_ + data->stats_.right_traversals_ + 2;
  return string;
}

static void Traverse(Handle<String> s1, Handle<String> s2) {
  int i = 0;
503 504
  StringCharacterStream character_stream_1(*s1);
  StringCharacterStream character_stream_2(*s2);
505
  while (character_stream_1.HasMore()) {
506
    CHECK(character_stream_2.HasMore());
507
    uint16_t c = character_stream_1.GetNext();
508 509 510 511 512 513 514 515 516 517 518
    CHECK_EQ(c, character_stream_2.GetNext());
    i++;
  }
  CHECK(!character_stream_1.HasMore());
  CHECK(!character_stream_2.HasMore());
  CHECK_EQ(s1->length(), i);
  CHECK_EQ(s2->length(), i);
}

static void TraverseFirst(Handle<String> s1, Handle<String> s2, int chars) {
  int i = 0;
519 520
  StringCharacterStream character_stream_1(*s1);
  StringCharacterStream character_stream_2(*s2);
521
  while (character_stream_1.HasMore() && i < chars) {
522
    CHECK(character_stream_2.HasMore());
523
    uint16_t c = character_stream_1.GetNext();
524 525 526 527 528 529 530 531 532
    CHECK_EQ(c, character_stream_2.GetNext());
    i++;
  }
  s1->Get(s1->length() - 1);
  s2->Get(s2->length() - 1);
}

TEST(Traverse) {
  printf("TestTraverse\n");
533
  CcTest::InitializeVM();
534
  Isolate* isolate = CcTest::i_isolate();
535
  v8::HandleScope scope(CcTest::isolate());
536
  ConsStringGenerationData data(false);
537
  Handle<String> flat = ConstructBalanced(&data);
538
  String::Flatten(isolate, flat);
539 540 541 542 543 544 545 546 547 548
  Handle<String> left_asymmetric = ConstructLeft(&data, DEEP_DEPTH);
  Handle<String> right_asymmetric = ConstructRight(&data, DEEP_DEPTH);
  Handle<String> symmetric = ConstructBalanced(&data);
  printf("1\n");
  Traverse(flat, symmetric);
  printf("2\n");
  Traverse(flat, left_asymmetric);
  printf("3\n");
  Traverse(flat, right_asymmetric);
  printf("4\n");
549
  Handle<String> left_deep_asymmetric = ConstructLeft(&data, SUPER_DEEP_DEPTH);
550 551 552 553 554 555 556
  Handle<String> right_deep_asymmetric =
      ConstructRight(&data, SUPER_DEEP_DEPTH);
  printf("5\n");
  TraverseFirst(left_asymmetric, left_deep_asymmetric, 1050);
  printf("6\n");
  TraverseFirst(left_asymmetric, right_deep_asymmetric, 65536);
  printf("7\n");
557
  String::Flatten(isolate, left_asymmetric);
558 559 560
  printf("10\n");
  Traverse(flat, left_asymmetric);
  printf("11\n");
561
  String::Flatten(isolate, right_asymmetric);
562 563 564
  printf("12\n");
  Traverse(flat, right_asymmetric);
  printf("14\n");
565
  String::Flatten(isolate, symmetric);
566 567 568
  printf("15\n");
  Traverse(flat, symmetric);
  printf("16\n");
569
  String::Flatten(isolate, left_deep_asymmetric);
570
  printf("18\n");
571 572
}

573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596
TEST(ConsStringWithEmptyFirstFlatten) {
  printf("ConsStringWithEmptyFirstFlatten\n");
  CcTest::InitializeVM();
  v8::HandleScope scope(CcTest::isolate());
  Isolate* isolate = CcTest::i_isolate();

  i::Handle<i::String> initial_fst =
      isolate->factory()->NewStringFromAsciiChecked("fst012345");
  i::Handle<i::String> initial_snd =
      isolate->factory()->NewStringFromAsciiChecked("snd012345");
  i::Handle<i::String> str = isolate->factory()
                                 ->NewConsString(initial_fst, initial_snd)
                                 .ToHandleChecked();
  CHECK(str->IsConsString());
  auto cons = i::Handle<i::ConsString>::cast(str);

  const int initial_length = cons->length();

  // set_first / set_second does not update the length (which the heap verifier
  // checks), so we need to ensure the length stays the same.

  i::Handle<i::String> new_fst = isolate->factory()->empty_string();
  i::Handle<i::String> new_snd =
      isolate->factory()->NewStringFromAsciiChecked("snd012345012345678");
597 598
  cons->set_first(*new_fst);
  cons->set_second(*new_snd);
599 600 601 602 603
  CHECK(!cons->IsFlat());
  CHECK_EQ(initial_length, new_fst->length() + new_snd->length());
  CHECK_EQ(initial_length, cons->length());

  // Make sure Flatten doesn't alloc a new string.
604
  DisallowGarbageCollection no_alloc;
605 606 607 608 609
  i::Handle<i::String> flat = i::String::Flatten(isolate, cons);
  CHECK(flat->IsFlat());
  CHECK_EQ(initial_length, flat->length());
}

610
static void VerifyCharacterStream(String flat_string, String cons_string) {
611
  // Do not want to test ConString traversal on flat string.
612 613
  CHECK(flat_string.IsFlat() && !flat_string.IsConsString());
  CHECK(cons_string.IsConsString());
614
  // TODO(dcarney) Test stream reset as well.
615
  int length = flat_string.length();
616 617 618
  // Iterate start search in multiple places in the string.
  int outer_iterations = length > 20 ? 20 : length;
  for (int j = 0; j <= outer_iterations; j++) {
619
    int offset = length * j / outer_iterations;
620 621 622
    if (offset < 0) offset = 0;
    // Want to test the offset == length case.
    if (offset > length) offset = length;
623 624
    StringCharacterStream flat_stream(flat_string, offset);
    StringCharacterStream cons_stream(cons_string, offset);
625
    for (int i = offset; i < length; i++) {
626
      uint16_t c = flat_string.Get(i);
627 628 629 630 631 632 633 634 635 636
      CHECK(flat_stream.HasMore());
      CHECK(cons_stream.HasMore());
      CHECK_EQ(c, flat_stream.GetNext());
      CHECK_EQ(c, cons_stream.GetNext());
    }
    CHECK(!flat_stream.HasMore());
    CHECK(!cons_stream.HasMore());
  }
}

637 638
static inline void PrintStats(const ConsStringGenerationData& data) {
#ifdef DEBUG
639 640 641 642 643
  printf("%s: [%u], %s: [%u], %s: [%u], %s: [%u], %s: [%u], %s: [%u]\n",
         "leaves", data.stats_.leaves_, "empty", data.stats_.empty_leaves_,
         "chars", data.stats_.chars_, "lefts", data.stats_.left_traversals_,
         "rights", data.stats_.right_traversals_, "early_terminations",
         data.early_terminations_);
644 645 646
#endif
}

647
template <typename BuildString>
648
void TestStringCharacterStream(BuildString build, int test_cases) {
649
  FLAG_gc_global = true;
650
  CcTest::InitializeVM();
651
  Isolate* isolate = CcTest::i_isolate();
652
  HandleScope outer_scope(isolate);
653
  ConsStringGenerationData data(true);
654
  for (int i = 0; i < test_cases; i++) {
655 656
    printf("%d\n", i);
    HandleScope inner_scope(isolate);
657
    AlwaysAllocateScopeForTesting always_allocate(isolate->heap());
658 659 660 661 662
    // Build flat version of cons string.
    Handle<String> flat_string = build(i, &data);
    ConsStringStats flat_string_stats;
    AccumulateStats(flat_string, &flat_string_stats);
    // Flatten string.
663
    String::Flatten(isolate, flat_string);
664 665 666 667
    // Build unflattened version of cons string to test.
    Handle<String> cons_string = build(i, &data);
    ConsStringStats cons_string_stats;
    AccumulateStats(cons_string, &cons_string_stats);
668
    DisallowGarbageCollection no_gc;
669 670 671 672 673
    PrintStats(data);
    // Full verify of cons string.
    cons_string_stats.VerifyEqual(flat_string_stats);
    cons_string_stats.VerifyEqual(data.stats_);
    VerifyConsString(cons_string, &data);
674
    String flat_string_ptr = flat_string->IsConsString()
675
                                 ? ConsString::cast(*flat_string).first()
676
                                 : *flat_string;
677 678 679 680 681 682
    VerifyCharacterStream(flat_string_ptr, *cons_string);
  }
}

static const int kCharacterStreamNonRandomCases = 8;

683 684 685 686
static Handle<String> BuildEdgeCaseConsString(int test_case,
                                              ConsStringGenerationData* data) {
  Isolate* isolate = CcTest::i_isolate();
  Factory* factory = isolate->factory();
687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
  data->Reset();
  switch (test_case) {
    case 0:
      return ConstructBalanced(data, 71);
    case 1:
      return ConstructLeft(data, 71);
    case 2:
      return ConstructRight(data, 71);
    case 3:
      return ConstructLeft(data, 10);
    case 4:
      return ConstructRight(data, 10);
    case 5:
      // 2 element balanced tree.
      data->stats_.chars_ += data->block(0)->length();
      data->stats_.chars_ += data->block(1)->length();
      data->stats_.leaves_ += 2;
704
      return factory->NewConsString(data->block(0), data->block(1))
705
          .ToHandleChecked();
706 707 708 709 710 711 712 713
    case 6:
      // Simple flattened tree.
      data->stats_.chars_ += data->block(0)->length();
      data->stats_.chars_ += data->block(1)->length();
      data->stats_.leaves_ += 2;
      data->stats_.empty_leaves_ += 1;
      {
        Handle<String> string =
714 715
            factory->NewConsString(data->block(0), data->block(1))
                .ToHandleChecked();
716
        String::Flatten(isolate, string);
717 718 719 720 721 722 723 724 725 726 727 728
        return string;
      }
    case 7:
      // Left node flattened.
      data->stats_.chars_ += data->block(0)->length();
      data->stats_.chars_ += data->block(1)->length();
      data->stats_.chars_ += data->block(2)->length();
      data->stats_.leaves_ += 3;
      data->stats_.empty_leaves_ += 1;
      data->stats_.left_traversals_ += 1;
      {
        Handle<String> left =
729 730
            factory->NewConsString(data->block(0), data->block(1))
                .ToHandleChecked();
731
        String::Flatten(isolate, left);
732
        return factory->NewConsString(left, data->block(2)).ToHandleChecked();
733 734 735 736 737 738 739 740 741 742 743 744 745
      }
    case 8:
      // Left node and right node flattened.
      data->stats_.chars_ += data->block(0)->length();
      data->stats_.chars_ += data->block(1)->length();
      data->stats_.chars_ += data->block(2)->length();
      data->stats_.chars_ += data->block(3)->length();
      data->stats_.leaves_ += 4;
      data->stats_.empty_leaves_ += 2;
      data->stats_.left_traversals_ += 1;
      data->stats_.right_traversals_ += 1;
      {
        Handle<String> left =
746 747
            factory->NewConsString(data->block(0), data->block(1))
                .ToHandleChecked();
748
        String::Flatten(isolate, left);
749
        Handle<String> right =
750 751
            factory->NewConsString(data->block(2), data->block(2))
                .ToHandleChecked();
752
        String::Flatten(isolate, right);
753
        return factory->NewConsString(left, right).ToHandleChecked();
754 755 756 757 758 759 760
      }
  }
  UNREACHABLE();
}

TEST(StringCharacterStreamEdgeCases) {
  printf("TestStringCharacterStreamEdgeCases\n");
761 762
  TestStringCharacterStream(BuildEdgeCaseConsString,
                            kCharacterStreamNonRandomCases);
763 764 765 766 767 768
}

static const int kBalances = 3;
static const int kTreeLengths = 4;
static const int kEmptyLeaves = 4;
static const int kUniqueRandomParameters =
769
    kBalances * kTreeLengths * kEmptyLeaves;
770

771 772
static void InitializeGenerationData(int test_case,
                                     ConsStringGenerationData* data) {
773 774 775 776
  // Clear the settings and reinit the rng.
  data->Reset();
  // Spin up the rng to a known location that is unique per test.
  static const int kPerTestJump = 501;
777
  for (int j = 0; j < test_case * kPerTestJump; j++) {
778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
    data->rng_.next();
  }
  // Choose balanced, left or right heavy trees.
  switch (test_case % kBalances) {
    case 0:
      // Nothing to do.  Already balanced.
      break;
    case 1:
      // Left balanced.
      data->leftness_ = 0.90;
      data->rightness_ = 0.15;
      break;
    case 2:
      // Right balanced.
      data->leftness_ = 0.15;
      data->rightness_ = 0.90;
      break;
    default:
      UNREACHABLE();
  }
  // Must remove the influence of the above decision.
  test_case /= kBalances;
  // Choose tree length.
  switch (test_case % kTreeLengths) {
    case 0:
      data->max_leaves_ = 16;
      data->early_termination_threshold_ = 0.2;
      break;
    case 1:
      data->max_leaves_ = 50;
      data->early_termination_threshold_ = 0.05;
      break;
    case 2:
      data->max_leaves_ = 500;
      data->early_termination_threshold_ = 0.03;
      break;
    case 3:
      data->max_leaves_ = 5000;
      data->early_termination_threshold_ = 0.001;
      break;
    default:
      UNREACHABLE();
820
  }
821 822 823 824 825 826 827
  // Must remove the influence of the above decision.
  test_case /= kTreeLengths;
  // Choose how much we allow empty nodes, including not at all.
  data->empty_leaf_threshold_ =
      0.03 * static_cast<double>(test_case % kEmptyLeaves);
}

828 829
static Handle<String> BuildRandomConsString(int test_case,
                                            ConsStringGenerationData* data) {
830 831 832 833 834 835
  InitializeGenerationData(test_case, data);
  return ConstructRandomString(data, 200);
}

TEST(StringCharacterStreamRandom) {
  printf("StringCharacterStreamRandom\n");
836
  TestStringCharacterStream(BuildRandomConsString, kUniqueRandomParameters * 7);
837 838
}

839
static const int kDeepOneByteDepth = 100000;
840

841
TEST(DeepOneByte) {
842
  CcTest::InitializeVM();
843 844
  Isolate* isolate = CcTest::i_isolate();
  Factory* factory = isolate->factory();
845
  v8::HandleScope scope(CcTest::isolate());
846

847 848
  char* foo = NewArray<char>(kDeepOneByteDepth);
  for (int i = 0; i < kDeepOneByteDepth; i++) {
849 850
    foo[i] = "foo "[i % 4];
  }
851 852 853 854 855
  Handle<String> string =
      factory->NewStringFromOneByte(OneByteVector(foo, kDeepOneByteDepth))
          .ToHandleChecked();
  Handle<String> foo_string = factory->NewStringFromStaticChars("foo");
  for (int i = 0; i < kDeepOneByteDepth; i += 10) {
856
    string = factory->NewConsString(string, foo_string).ToHandleChecked();
857
  }
858 859
  Handle<String> flat_string =
      factory->NewConsString(string, foo_string).ToHandleChecked();
860
  String::Flatten(isolate, flat_string);
861 862

  for (int i = 0; i < 500; i++) {
863
    TraverseFirst(flat_string, string, kDeepOneByteDepth);
864
  }
865
  DeleteArray<char>(foo);
866
}
867 868 869

TEST(Utf8Conversion) {
  // Smoke test for converting strings to utf-8.
870 871
  CcTest::InitializeVM();
  v8::HandleScope handle_scope(CcTest::isolate());
872 873 874
  // A simple one-byte string
  const char* one_byte_string = "abcdef12345";
  int len = v8::String::NewFromUtf8(CcTest::isolate(), one_byte_string,
875
                                    v8::NewStringType::kNormal,
876
                                    static_cast<int>(strlen(one_byte_string)))
877
                .ToLocalChecked()
878
                ->Utf8Length(CcTest::isolate());
879
  CHECK_EQ(strlen(one_byte_string), len);
880
  // A mixed one-byte and two-byte string
881 882 883 884 885 886 887
  // U+02E4 -> CB A4
  // U+0064 -> 64
  // U+12E4 -> E1 8B A4
  // U+0030 -> 30
  // U+3045 -> E3 81 85
  const uint16_t mixed_string[] = {0x02E4, 0x0064, 0x12E4, 0x0030, 0x3045};
  // The characters we expect to be output
888 889
  const unsigned char as_utf8[11] = {0xCB, 0xA4, 0x64, 0xE1, 0x8B, 0xA4,
                                     0x30, 0xE3, 0x81, 0x85, 0x00};
890 891
  // The number of bytes expected to be written for each length
  const int lengths[12] = {0, 0, 2, 3, 3, 3, 6, 7, 7, 7, 10, 11};
892
  const int char_lengths[12] = {0, 0, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5};
893 894 895 896
  v8::Local<v8::String> mixed =
      v8::String::NewFromTwoByte(CcTest::isolate(), mixed_string,
                                 v8::NewStringType::kNormal, 5)
          .ToLocalChecked();
897
  CHECK_EQ(10, mixed->Utf8Length(CcTest::isolate()));
898 899
  // Try encoding the string with all capacities
  char buffer[11];
900
  const char kNoChar = static_cast<char>(-1);
901 902
  for (int i = 0; i <= 11; i++) {
    // Clear the buffer before reusing it
903
    for (int j = 0; j < 11; j++) buffer[j] = kNoChar;
904
    int chars_written;
905 906
    int written =
        mixed->WriteUtf8(CcTest::isolate(), buffer, i, &chars_written);
907
    CHECK_EQ(lengths[i], written);
908
    CHECK_EQ(char_lengths[i], chars_written);
909 910
    // Check that the contents are correct
    for (int j = 0; j < lengths[i]; j++)
911
      CHECK_EQ(as_utf8[j], static_cast<unsigned char>(buffer[j]));
912
    // Check that the rest of the buffer hasn't been touched
913
    for (int j = lengths[i]; j < 11; j++) CHECK_EQ(kNoChar, buffer[j]);
914 915
  }
}
916

917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019
TEST(Utf8ConversionPerf) {
  // Smoke test for converting strings to utf-8.
  LocalContext context;
  v8::HandleScope handle_scope(CcTest::isolate());
  v8::Local<v8::String> ascii_string =
      CompileRun("'abc'.repeat(1E6)").As<v8::String>();
  v8::Local<v8::String> one_byte_string =
      CompileRun("'\\u0255\\u0254\\u0253'.repeat(1E6)").As<v8::String>();
  v8::Local<v8::String> two_byte_string =
      CompileRun("'\\u2255\\u2254\\u2253'.repeat(1E6)").As<v8::String>();
  v8::Local<v8::String> surrogate_string =
      CompileRun("'\\u{12345}\\u2244'.repeat(1E6)").As<v8::String>();
  int size = 1E7;
  char* buffer = new char[4 * size];
  {
    v8::base::ElapsedTimer timer;
    timer.Start();
    ascii_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
    printf("ascii string %0.3f\n", timer.Elapsed().InMillisecondsF());
    timer.Stop();
  }
  {
    v8::base::ElapsedTimer timer;
    timer.Start();
    ascii_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
    printf("ascii string %0.3f\n", timer.Elapsed().InMillisecondsF());
    timer.Stop();
  }
  {
    v8::base::ElapsedTimer timer;
    timer.Start();
    ascii_string->WriteUtf8(CcTest::isolate(), buffer, 4 * size, nullptr);
    printf("ascii string %0.3f\n", timer.Elapsed().InMillisecondsF());
    timer.Stop();
  }

  {
    v8::base::ElapsedTimer timer;
    timer.Start();
    one_byte_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
    printf("one byte string %0.3f\n", timer.Elapsed().InMillisecondsF());
    timer.Stop();
  }
  {
    v8::base::ElapsedTimer timer;
    timer.Start();
    one_byte_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
    printf("one byte string %0.3f\n", timer.Elapsed().InMillisecondsF());
    timer.Stop();
  }
  {
    v8::base::ElapsedTimer timer;
    timer.Start();
    one_byte_string->WriteUtf8(CcTest::isolate(), buffer, 4 * size, nullptr);
    printf("one byte string %0.3f\n", timer.Elapsed().InMillisecondsF());
    timer.Stop();
  }

  {
    v8::base::ElapsedTimer timer;
    timer.Start();
    two_byte_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
    printf("two byte string %0.3f\n", timer.Elapsed().InMillisecondsF());
    timer.Stop();
  }
  {
    v8::base::ElapsedTimer timer;
    timer.Start();
    two_byte_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
    printf("two byte string %0.3f\n", timer.Elapsed().InMillisecondsF());
    timer.Stop();
  }
  {
    v8::base::ElapsedTimer timer;
    timer.Start();
    two_byte_string->WriteUtf8(CcTest::isolate(), buffer, 4 * size, nullptr);
    printf("two byte string %0.3f\n", timer.Elapsed().InMillisecondsF());
    timer.Stop();
  }

  {
    v8::base::ElapsedTimer timer;
    timer.Start();
    surrogate_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
    printf("surrogate string %0.3f\n", timer.Elapsed().InMillisecondsF());
    timer.Stop();
  }
  {
    v8::base::ElapsedTimer timer;
    timer.Start();
    surrogate_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
    printf("surrogate string %0.3f\n", timer.Elapsed().InMillisecondsF());
    timer.Stop();
  }
  {
    v8::base::ElapsedTimer timer;
    timer.Start();
    surrogate_string->WriteUtf8(CcTest::isolate(), buffer, 4 * size, nullptr);
    printf("surrogate string %0.3f\n", timer.Elapsed().InMillisecondsF());
    timer.Stop();
  }
  delete[] buffer;
}
1020

1021
TEST(ExternalShortStringAdd) {
1022
  LocalContext context;
1023
  v8::HandleScope handle_scope(CcTest::isolate());
1024

1025 1026
  // Make sure we cover all always-flat lengths and at least one above.
  static const int kMaxLength = 20;
1027
  CHECK_GT(kMaxLength, i::ConsString::kMinLength);
1028 1029

  // Allocate two JavaScript arrays for holding short strings.
1030
  v8::Local<v8::Array> one_byte_external_strings =
1031
      v8::Array::New(CcTest::isolate(), kMaxLength + 1);
1032
  v8::Local<v8::Array> non_one_byte_external_strings =
1033
      v8::Array::New(CcTest::isolate(), kMaxLength + 1);
1034

1035
  // Generate short one-byte and two-byte external strings.
1036
  for (int i = 0; i <= kMaxLength; i++) {
1037
    char* one_byte = NewArray<char>(i + 1);
1038
    for (int j = 0; j < i; j++) {
1039
      one_byte[j] = 'a';
1040 1041 1042
    }
    // Terminating '\0' is left out on purpose. It is not required for external
    // string data.
1043 1044
    OneByteResource* one_byte_resource = new OneByteResource(one_byte, i);
    v8::Local<v8::String> one_byte_external_string =
1045 1046
        v8::String::NewExternalOneByte(CcTest::isolate(), one_byte_resource)
            .ToLocalChecked();
1047

1048 1049 1050
    one_byte_external_strings
        ->Set(context.local(), v8::Integer::New(CcTest::isolate(), i),
              one_byte_external_string)
1051
        .FromJust();
1052
    uc16* non_one_byte = NewArray<uc16>(i + 1);
1053
    for (int j = 0; j < i; j++) {
1054
      non_one_byte[j] = 0x1234;
1055 1056 1057
    }
    // Terminating '\0' is left out on purpose. It is not required for external
    // string data.
1058 1059
    Resource* resource = new Resource(non_one_byte, i);
    v8::Local<v8::String> non_one_byte_external_string =
1060 1061
        v8::String::NewExternalTwoByte(CcTest::isolate(), resource)
            .ToLocalChecked();
1062 1063 1064
    non_one_byte_external_strings
        ->Set(context.local(), v8::Integer::New(CcTest::isolate(), i),
              non_one_byte_external_string)
1065
        .FromJust();
1066
  }
1067

1068
  // Add the arrays with the short external strings in the global object.
1069
  v8::Local<v8::Object> global = context->Global();
1070 1071 1072
  global
      ->Set(context.local(), v8_str("external_one_byte"),
            one_byte_external_strings)
1073
      .FromJust();
1074 1075 1076
  global
      ->Set(context.local(), v8_str("external_non_one_byte"),
            non_one_byte_external_strings)
1077
      .FromJust();
1078 1079 1080
  global
      ->Set(context.local(), v8_str("max_length"),
            v8::Integer::New(CcTest::isolate(), kMaxLength))
1081
      .FromJust();
1082

1083
  // Add short external one-byte and two-byte strings checking the result.
1084
  static const char* source =
1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126
      "function test() {"
      "  var one_byte_chars = 'aaaaaaaaaaaaaaaaaaaa';"
      "  var non_one_byte_chars = "
      "'\\u1234\\u1234\\u1234\\u1234\\u1234\\u1234\\u1234\\u1234\\u1234\\u1"
      "234\\u1234\\u1234\\u1234\\u1234\\u1234\\u1234\\u1234\\u1234\\u1234\\"
      "u1234';"  // NOLINT
      "  if (one_byte_chars.length != max_length) return 1;"
      "  if (non_one_byte_chars.length != max_length) return 2;"
      "  var one_byte = Array(max_length + 1);"
      "  var non_one_byte = Array(max_length + 1);"
      "  for (var i = 0; i <= max_length; i++) {"
      "    one_byte[i] = one_byte_chars.substring(0, i);"
      "    non_one_byte[i] = non_one_byte_chars.substring(0, i);"
      "  };"
      "  for (var i = 0; i <= max_length; i++) {"
      "    if (one_byte[i] != external_one_byte[i]) return 3;"
      "    if (non_one_byte[i] != external_non_one_byte[i]) return 4;"
      "    for (var j = 0; j < i; j++) {"
      "      if (external_one_byte[i] !="
      "          (external_one_byte[j] + external_one_byte[i - j])) return "
      "5;"
      "      if (external_non_one_byte[i] !="
      "          (external_non_one_byte[j] + external_non_one_byte[i - "
      "j])) return 6;"
      "      if (non_one_byte[i] != (non_one_byte[j] + non_one_byte[i - "
      "j])) return 7;"
      "      if (one_byte[i] != (one_byte[j] + one_byte[i - j])) return 8;"
      "      if (one_byte[i] != (external_one_byte[j] + one_byte[i - j])) "
      "return 9;"
      "      if (one_byte[i] != (one_byte[j] + external_one_byte[i - j])) "
      "return 10;"
      "      if (non_one_byte[i] !="
      "          (external_non_one_byte[j] + non_one_byte[i - j])) return "
      "11;"
      "      if (non_one_byte[i] !="
      "          (non_one_byte[j] + external_non_one_byte[i - j])) return "
      "12;"
      "    }"
      "  }"
      "  return 0;"
      "};"
      "test()";
1127
  CHECK_EQ(0, CompileRun(source)->Int32Value(context.local()).FromJust());
1128
}
1129

1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150
TEST(ReplaceInvalidUtf8) {
  LocalContext context;
  v8::HandleScope handle_scope(CcTest::isolate());
  v8::Local<v8::String> string = CompileRun("'ab\\ud800cd'").As<v8::String>();
  char buffer[7];
  memset(buffer, 0, 7);
  int chars_written = 0;
  int size = string->WriteUtf8(CcTest::isolate(), buffer, 7, &chars_written,
                               v8::String::REPLACE_INVALID_UTF8);
  CHECK_EQ(7, size);
  CHECK_EQ(5, chars_written);
  CHECK_EQ(0, memcmp("\x61\x62\xef\xbf\xbd\x63\x64", buffer, 7));

  memset(buffer, 0, 7);
  chars_written = 0;
  size = string->WriteUtf8(CcTest::isolate(), buffer, 6, &chars_written,
                           v8::String::REPLACE_INVALID_UTF8);
  CHECK_EQ(6, size);
  CHECK_EQ(4, chars_written);
  CHECK_EQ(0, memcmp("\x61\x62\xef\xbf\xbd\x63", buffer, 6));
}
1151

1152
TEST(JSONStringifySliceMadeExternal) {
1153
  if (!FLAG_string_slices) return;
1154
  CcTest::InitializeVM();
1155 1156 1157
  // Create a sliced string from a one-byte string.  The latter is turned
  // into a two-byte external string.  Check that JSON.stringify works.
  v8::HandleScope handle_scope(CcTest::isolate());
1158
  v8::Local<v8::String> underlying =
1159
      CompileRun(
1160
          "var underlying = 'abcdefghijklmnopqrstuvwxyz';"
1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
          "underlying")
          ->ToString(CcTest::isolate()->GetCurrentContext())
          .ToLocalChecked();
  v8::Local<v8::String> slice =
      CompileRun(
          "var slice = '';"
          "slice = underlying.slice(1);"
          "slice")
          ->ToString(CcTest::isolate()->GetCurrentContext())
          .ToLocalChecked();
1171 1172 1173 1174
  CHECK(v8::Utils::OpenHandle(*slice)->IsSlicedString());
  CHECK(v8::Utils::OpenHandle(*underlying)->IsSeqOneByteString());

  int length = underlying->Length();
1175
  uc16* two_byte = NewArray<uc16>(length + 1);
1176
  underlying->Write(CcTest::isolate(), two_byte);
1177
  Resource* resource = new Resource(two_byte, length);
1178 1179 1180 1181
  CHECK(underlying->MakeExternal(resource));
  CHECK(v8::Utils::OpenHandle(*slice)->IsSlicedString());
  CHECK(v8::Utils::OpenHandle(*underlying)->IsExternalTwoByteString());

1182 1183
  CHECK_EQ(0,
           strcmp("\"bcdefghijklmnopqrstuvwxyz\"",
1184 1185
                  *v8::String::Utf8Value(CcTest::isolate(),
                                         CompileRun("JSON.stringify(slice)"))));
1186 1187
}

1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278
TEST(JSONStringifyWellFormed) {
  CcTest::InitializeVM();
  v8::HandleScope handle_scope(CcTest::isolate());
  v8::Local<v8::Context> context = CcTest::isolate()->GetCurrentContext();

  // Test some leading surrogates (U+D800 to U+DBFF).
  {  // U+D800
    CHECK_EQ(
        0, strcmp("\"\\ud800\"", *v8::String::Utf8Value(
                                     CcTest::isolate(),
                                     CompileRun("JSON.stringify('\\uD800')"))));
    v8::Local<v8::String> json = v8_str("\"\\ud800\"");
    v8::Local<v8::Value> parsed =
        v8::JSON::Parse(context, json).ToLocalChecked();
    CHECK(v8::JSON::Stringify(context, parsed)
              .ToLocalChecked()
              ->Equals(context, json)
              .FromJust());
  }

  {  // U+DAAA
    CHECK_EQ(
        0, strcmp("\"\\udaaa\"", *v8::String::Utf8Value(
                                     CcTest::isolate(),
                                     CompileRun("JSON.stringify('\\uDAAA')"))));
    v8::Local<v8::String> json = v8_str("\"\\udaaa\"");
    v8::Local<v8::Value> parsed =
        v8::JSON::Parse(context, json).ToLocalChecked();
    CHECK(v8::JSON::Stringify(context, parsed)
              .ToLocalChecked()
              ->Equals(context, json)
              .FromJust());
  }

  {  // U+DBFF
    CHECK_EQ(
        0, strcmp("\"\\udbff\"", *v8::String::Utf8Value(
                                     CcTest::isolate(),
                                     CompileRun("JSON.stringify('\\uDBFF')"))));
    v8::Local<v8::String> json = v8_str("\"\\udbff\"");
    v8::Local<v8::Value> parsed =
        v8::JSON::Parse(context, json).ToLocalChecked();
    CHECK(v8::JSON::Stringify(context, parsed)
              .ToLocalChecked()
              ->Equals(context, json)
              .FromJust());
  }

  // Test some trailing surrogates (U+DC00 to U+DFFF).
  {  // U+DC00
    CHECK_EQ(
        0, strcmp("\"\\udc00\"", *v8::String::Utf8Value(
                                     CcTest::isolate(),
                                     CompileRun("JSON.stringify('\\uDC00')"))));
    v8::Local<v8::String> json = v8_str("\"\\udc00\"");
    v8::Local<v8::Value> parsed =
        v8::JSON::Parse(context, json).ToLocalChecked();
    CHECK(v8::JSON::Stringify(context, parsed)
              .ToLocalChecked()
              ->Equals(context, json)
              .FromJust());
  }

  {  // U+DDDD
    CHECK_EQ(
        0, strcmp("\"\\udddd\"", *v8::String::Utf8Value(
                                     CcTest::isolate(),
                                     CompileRun("JSON.stringify('\\uDDDD')"))));
    v8::Local<v8::String> json = v8_str("\"\\udddd\"");
    v8::Local<v8::Value> parsed =
        v8::JSON::Parse(context, json).ToLocalChecked();
    CHECK(v8::JSON::Stringify(context, parsed)
              .ToLocalChecked()
              ->Equals(context, json)
              .FromJust());
  }

  {  // U+DFFF
    CHECK_EQ(
        0, strcmp("\"\\udfff\"", *v8::String::Utf8Value(
                                     CcTest::isolate(),
                                     CompileRun("JSON.stringify('\\uDFFF')"))));
    v8::Local<v8::String> json = v8_str("\"\\udfff\"");
    v8::Local<v8::Value> parsed =
        v8::JSON::Parse(context, json).ToLocalChecked();
    CHECK(v8::JSON::Stringify(context, parsed)
              .ToLocalChecked()
              ->Equals(context, json)
              .FromJust());
  }
}
1279

1280
TEST(CachedHashOverflow) {
1281
  CcTest::InitializeVM();
1282 1283 1284
  // We incorrectly allowed strings to be tagged as array indices even if their
  // values didn't fit in the hash field.
  // See http://code.google.com/p/v8/issues/detail?id=728
1285
  Isolate* isolate = CcTest::i_isolate();
1286

1287
  v8::HandleScope handle_scope(CcTest::isolate());
1288 1289
  // Lines must be executed sequentially. Combining them into one script
  // makes the bug go away.
1290 1291 1292
  const char* lines[] = {"var x = [];", "x[4] = 42;", "var s = \"1073741828\";",
                         "x[s];",       "x[s] = 37;", "x[4];",
                         "x[s];"};
1293

1294 1295
  Handle<Smi> fortytwo(Smi::FromInt(42), isolate);
  Handle<Smi> thirtyseven(Smi::FromInt(37), isolate);
1296 1297 1298 1299 1300 1301 1302 1303
  Handle<Object> results[] = {
      isolate->factory()->undefined_value(),
      fortytwo,
      isolate->factory()->undefined_value(),
      isolate->factory()->undefined_value(),
      thirtyseven,
      fortytwo,
      thirtyseven  // Bug yielded 42 here.
1304 1305
  };

1306
  v8::Local<v8::Context> context = CcTest::isolate()->GetCurrentContext();
1307 1308
  for (size_t i = 0; i < arraysize(lines); i++) {
    const char* line = lines[i];
1309
    printf("%s\n", line);
1310
    v8::Local<v8::Value> result =
1311 1312 1313
        v8::Script::Compile(
            context,
            v8::String::NewFromUtf8(CcTest::isolate(), line).ToLocalChecked())
1314 1315 1316
            .ToLocalChecked()
            ->Run(context)
            .ToLocalChecked();
1317 1318
    CHECK_EQ(results[i]->IsUndefined(CcTest::i_isolate()),
             result->IsUndefined());
1319
    CHECK_EQ(results[i]->IsNumber(), result->IsNumber());
1320
    if (result->IsNumber()) {
1321 1322
      int32_t value = 0;
      CHECK(results[i]->ToInt32(&value));
1323
      CHECK_EQ(value, result->ToInt32(context).ToLocalChecked()->Value());
1324 1325 1326
    }
  }
}
1327 1328

TEST(SliceFromCons) {
1329
  if (!FLAG_string_slices) return;
1330
  CcTest::InitializeVM();
1331
  Factory* factory = CcTest::i_isolate()->factory();
1332
  v8::HandleScope scope(CcTest::isolate());
1333
  Handle<String> string =
1334
      factory->NewStringFromStaticChars("parentparentparent");
1335 1336
  Handle<String> parent =
      factory->NewConsString(string, string).ToHandleChecked();
1337 1338
  CHECK(parent->IsConsString());
  CHECK(!parent->IsFlat());
1339
  Handle<String> slice = factory->NewSubString(parent, 1, 25);
1340 1341 1342
  // After slicing, the original string becomes a flat cons.
  CHECK(parent->IsFlat());
  CHECK(slice->IsSlicedString());
1343 1344 1345 1346 1347
  CHECK_EQ(
      SlicedString::cast(*slice).parent(),
      // Parent could have been short-circuited.
      parent->IsConsString() ? ConsString::cast(*parent).first() : *parent);
  CHECK(SlicedString::cast(*slice).parent().IsSeqString());
1348 1349 1350
  CHECK(slice->IsFlat());
}

1351
class OneByteVectorResource : public v8::String::ExternalOneByteStringResource {
1352
 public:
1353
  explicit OneByteVectorResource(i::Vector<const char> vector)
1354
      : data_(vector) {}
1355 1356
  ~OneByteVectorResource() override = default;
  size_t length() const override { return data_.length(); }
1357 1358
  const char* data() const override { return data_.begin(); }

1359 1360 1361 1362
 private:
  i::Vector<const char> data_;
};

1363
TEST(InternalizeExternal) {
1364
#ifdef ENABLE_MINOR_MC
1365
  // TODO(mlippautz): Remove once we add support for forwarding ThinStrings in
1366
  // minor MC
1367
  if (FLAG_minor_mc) return;
1368
#endif  // ENABLE_MINOR_MC
1369
  FLAG_stress_incremental_marking = false;
1370 1371
  FLAG_thin_strings = true;
  CcTest::InitializeVM();
1372 1373 1374 1375
  i::Isolate* isolate = CcTest::i_isolate();
  Factory* factory = isolate->factory();
  // This won't leak; the external string mechanism will call Dispose() on it.
  OneByteVectorResource* resource =
1376
      new OneByteVectorResource(i::Vector<const char>("prop-1234", 9));
1377 1378 1379 1380 1381 1382 1383 1384
  {
    v8::HandleScope scope(CcTest::isolate());
    v8::Local<v8::String> ext_string =
        v8::String::NewExternalOneByte(CcTest::isolate(), resource)
            .ToLocalChecked();
    Handle<String> string = v8::Utils::OpenHandle(*ext_string);
    CHECK(string->IsExternalString());
    CHECK(!string->IsInternalizedString());
1385
    CHECK(!i::Heap::InYoungGeneration(*string));
1386 1387 1388
    CHECK_EQ(isolate->string_table()->TryStringToIndexOrLookupExisting(
                 isolate, string->ptr()),
             Smi::FromInt(ResultSentinel::kNotFound).ptr());
1389
    factory->InternalizeName(string);
1390
    CHECK(string->IsExternalString());
1391
    CHECK(string->IsInternalizedString());
1392
    CHECK(!i::Heap::InYoungGeneration(*string));
1393 1394 1395 1396
  }
  CcTest::CollectGarbage(i::OLD_SPACE);
  CcTest::CollectGarbage(i::OLD_SPACE);
}
1397 1398

TEST(SliceFromExternal) {
1399
  if (!FLAG_string_slices) return;
1400
  CcTest::InitializeVM();
1401
  Factory* factory = CcTest::i_isolate()->factory();
1402
  v8::HandleScope scope(CcTest::isolate());
1403
  OneByteVectorResource resource(
1404
      i::Vector<const char>("abcdefghijklmnopqrstuvwxyz", 26));
1405
  Handle<String> string =
1406
      factory->NewExternalStringFromOneByte(&resource).ToHandleChecked();
1407
  CHECK(string->IsExternalString());
1408
  Handle<String> slice = factory->NewSubString(string, 1, 25);
1409 1410
  CHECK(slice->IsSlicedString());
  CHECK(string->IsExternalString());
1411 1412
  CHECK_EQ(SlicedString::cast(*slice).parent(), *string);
  CHECK(SlicedString::cast(*slice).parent().IsExternalString());
1413
  CHECK(slice->IsFlat());
1414
  // This avoids the GC from trying to free stack allocated resources.
1415 1416
  i::Handle<i::ExternalOneByteString>::cast(string)->SetResource(
      CcTest::i_isolate(), nullptr);
1417 1418
}

1419 1420 1421
TEST(TrivialSlice) {
  // This tests whether a slice that contains the entire parent string
  // actually creates a new string (it should not).
1422
  if (!FLAG_string_slices) return;
1423
  CcTest::InitializeVM();
1424
  Factory* factory = CcTest::i_isolate()->factory();
1425
  v8::HandleScope scope(CcTest::isolate());
1426 1427
  v8::Local<v8::Value> result;
  Handle<String> string;
1428
  const char* init = "var str = 'abcdefghijklmnopqrstuvwxyz';";
1429 1430 1431 1432 1433 1434 1435 1436 1437 1438
  const char* check = "str.slice(0,26)";
  const char* crosscheck = "str.slice(1,25)";

  CompileRun(init);

  result = CompileRun(check);
  CHECK(result->IsString());
  string = v8::Utils::OpenHandle(v8::String::Cast(*result));
  CHECK(!string->IsSlicedString());

1439
  string = factory->NewSubString(string, 0, 26);
1440 1441 1442 1443 1444
  CHECK(!string->IsSlicedString());
  result = CompileRun(crosscheck);
  CHECK(result->IsString());
  string = v8::Utils::OpenHandle(v8::String::Cast(*result));
  CHECK(string->IsSlicedString());
1445
  CHECK_EQ(0, strcmp("bcdefghijklmnopqrstuvwxy", string->ToCString().get()));
1446
}
1447 1448 1449 1450

TEST(SliceFromSlice) {
  // This tests whether a slice that contains the entire parent string
  // actually creates a new string (it should not).
1451
  if (!FLAG_string_slices) return;
1452 1453
  CcTest::InitializeVM();
  v8::HandleScope scope(CcTest::isolate());
1454 1455 1456
  v8::Local<v8::Value> result;
  Handle<String> string;
  const char* init = "var str = 'abcdefghijklmnopqrstuvwxyz';";
1457
  const char* slice = "var slice = ''; slice = str.slice(1,-1); slice";
1458 1459 1460 1461 1462 1463 1464
  const char* slice_from_slice = "slice.slice(1,-1);";

  CompileRun(init);
  result = CompileRun(slice);
  CHECK(result->IsString());
  string = v8::Utils::OpenHandle(v8::String::Cast(*result));
  CHECK(string->IsSlicedString());
1465
  CHECK(SlicedString::cast(*string).parent().IsSeqString());
1466
  CHECK_EQ(0, strcmp("bcdefghijklmnopqrstuvwxy", string->ToCString().get()));
1467 1468 1469 1470 1471

  result = CompileRun(slice_from_slice);
  CHECK(result->IsString());
  string = v8::Utils::OpenHandle(v8::String::Cast(*result));
  CHECK(string->IsSlicedString());
1472
  CHECK(SlicedString::cast(*string).parent().IsSeqString());
1473
  CHECK_EQ(0, strcmp("cdefghijklmnopqrstuvwx", string->ToCString().get()));
1474
}
1475

1476 1477
UNINITIALIZED_TEST(OneByteArrayJoin) {
  v8::Isolate::CreateParams create_params;
1478
  // Set heap limits.
1479
  create_params.constraints.set_max_young_generation_size_in_bytes(3 * MB);
1480
#ifdef DEBUG
1481
  create_params.constraints.set_max_old_generation_size_in_bytes(20 * MB);
1482
#else
1483
  create_params.constraints.set_max_old_generation_size_in_bytes(7 * MB);
1484
#endif
1485
  create_params.array_buffer_allocator = CcTest::array_buffer_allocator();
1486 1487 1488 1489 1490 1491 1492 1493 1494 1495
  v8::Isolate* isolate = v8::Isolate::New(create_params);
  isolate->Enter();

  {
    // String s is made of 2^17 = 131072 'c' characters and a is an array
    // starting with 'bad', followed by 2^14 times the string s. That means the
    // total length of the concatenated strings is 2^31 + 3. So on 32bit systems
    // summing the lengths of the strings (as Smis) overflows and wraps.
    LocalContext context(isolate);
    v8::HandleScope scope(isolate);
1496
    v8::TryCatch try_catch(isolate);
1497 1498 1499 1500 1501 1502 1503 1504
    CHECK(CompileRun("var two_14 = Math.pow(2, 14);"
                     "var two_17 = Math.pow(2, 17);"
                     "var s = Array(two_17 + 1).join('c');"
                     "var a = ['bad'];"
                     "for (var i = 1; i <= two_14; i++) a.push(s);"
                     "a.join("
                     ");")
              .IsEmpty());
1505 1506 1507 1508
    CHECK(try_catch.HasCaught());
  }
  isolate->Exit();
  isolate->Dispose();
1509
}  // namespace
1510 1511
namespace {

1512
int* global_use_counts = nullptr;
1513 1514 1515 1516 1517

void MockUseCounterCallback(v8::Isolate* isolate,
                            v8::Isolate::UseCounterFeature feature) {
  ++global_use_counts[feature];
}
1518
}  // namespace
1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536

TEST(CountBreakIterator) {
  CcTest::InitializeVM();
  v8::HandleScope scope(CcTest::isolate());
  LocalContext context;
  int use_counts[v8::Isolate::kUseCounterFeatureCount] = {};
  global_use_counts = use_counts;
  CcTest::isolate()->SetUseCounterCallback(MockUseCounterCallback);
  CHECK_EQ(0, use_counts[v8::Isolate::kBreakIterator]);
  v8::Local<v8::Value> result = CompileRun(
      "(function() {"
      "  if (!this.Intl) return 0;"
      "  var iterator = Intl.v8BreakIterator(['en']);"
      "  iterator.adoptText('Now is the time');"
      "  iterator.next();"
      "  return iterator.next();"
      "})();");
  CHECK(result->IsNumber());
1537 1538
  int uses =
      result->ToInt32(context.local()).ToLocalChecked()->Value() == 0 ? 0 : 1;
1539 1540 1541 1542 1543 1544
  CHECK_EQ(uses, use_counts[v8::Isolate::kBreakIterator]);
  // Make sure GC cleans up the break iterator, so we don't get a memory leak
  // reported by ASAN.
  CcTest::isolate()->LowMemoryNotification();
}

1545
TEST(StringReplaceAtomTwoByteResult) {
1546 1547
  CcTest::InitializeVM();
  v8::HandleScope scope(CcTest::isolate());
1548 1549
  LocalContext context;
  v8::Local<v8::Value> result = CompileRun(
1550
      "var subject = 'one_byte~only~string~'; "
1551 1552 1553 1554
      "var replace = '\x80';            "
      "subject.replace(/~/g, replace);  ");
  CHECK(result->IsString());
  Handle<String> string = v8::Utils::OpenHandle(v8::String::Cast(*result));
1555
  CHECK(string->IsTwoByteRepresentation());
1556

1557
  v8::Local<v8::String> expected = v8_str("one_byte\x80only\x80string\x80");
1558
  CHECK(expected->Equals(context.local(), result).FromJust());
1559
}
1560 1561

TEST(IsAscii) {
1562 1563
  CHECK(String::IsAscii(static_cast<char*>(nullptr), 0));
  CHECK(String::IsOneByte(static_cast<uc16*>(nullptr), 0));
1564
}
1565

1566
template <typename Op, bool return_first>
1567
static uint16_t ConvertLatin1(uint16_t c) {
1568
  uint32_t result[Op::kMaxWidth];
1569
  int chars;
1570
  chars = Op::Convert(c, 0, result, nullptr);
1571 1572 1573 1574
  if (chars == 0) return 0;
  CHECK_LE(chars, static_cast<int>(sizeof(result)));
  if (!return_first && chars > 1) {
    return 0;
1575
  }
1576
  return result[0];
1577 1578
}

1579
#ifndef V8_INTL_SUPPORT
1580 1581
static void CheckCanonicalEquivalence(uint16_t c, uint16_t test) {
  uint16_t expect = ConvertLatin1<unibrow::Ecma262UnCanonicalize, true>(c);
1582
  if (expect > unibrow::Latin1::kMaxChar || expect == 0) expect = c;
1583 1584 1585 1586
  CHECK_EQ(expect, test);
}

TEST(Latin1IgnoreCase) {
1587 1588 1589
  for (uint16_t c = unibrow::Latin1::kMaxChar + 1; c != 0; c++) {
    uint16_t lower = ConvertLatin1<unibrow::ToLowercase, false>(c);
    uint16_t upper = ConvertLatin1<unibrow::ToUppercase, false>(c);
1590
    uint16_t test = unibrow::Latin1::TryConvertToLatin1(c);
1591 1592 1593 1594 1595
    // Filter out all character whose upper is not their lower or vice versa.
    if (lower == 0 && upper == 0) {
      CheckCanonicalEquivalence(c, test);
      continue;
    }
1596 1597
    if (lower > unibrow::Latin1::kMaxChar &&
        upper > unibrow::Latin1::kMaxChar) {
1598 1599 1600 1601
      CheckCanonicalEquivalence(c, test);
      continue;
    }
    if (lower == 0 && upper != 0) {
1602
      lower = ConvertLatin1<unibrow::ToLowercase, false>(upper);
1603 1604
    }
    if (upper == 0 && lower != c) {
1605
      upper = ConvertLatin1<unibrow::ToUppercase, false>(lower);
1606
    }
1607 1608
    if (lower > unibrow::Latin1::kMaxChar &&
        upper > unibrow::Latin1::kMaxChar) {
1609 1610 1611 1612 1613 1614 1615
      CheckCanonicalEquivalence(c, test);
      continue;
    }
    if (upper != c && lower != c) {
      CheckCanonicalEquivalence(c, test);
      continue;
    }
1616
    CHECK_EQ(std::min(upper, lower), test);
1617 1618
  }
}
1619
#endif
1620

1621
class DummyResource : public v8::String::ExternalStringResource {
1622
 public:
1623 1624
  const uint16_t* data() const override { return nullptr; }
  size_t length() const override { return 1 << 30; }
1625 1626
};

1627
class DummyOneByteResource : public v8::String::ExternalOneByteStringResource {
1628
 public:
1629 1630
  const char* data() const override { return nullptr; }
  size_t length() const override { return 1 << 30; }
1631 1632 1633 1634 1635 1636
};

TEST(InvalidExternalString) {
  CcTest::InitializeVM();
  LocalContext context;
  Isolate* isolate = CcTest::i_isolate();
1637 1638
  {
    HandleScope scope(isolate);
1639
    DummyOneByteResource r;
1640
    CHECK(isolate->factory()->NewExternalStringFromOneByte(&r).is_null());
1641 1642 1643 1644
    CHECK(isolate->has_pending_exception());
    isolate->clear_pending_exception();
  }

1645 1646
  {
    HandleScope scope(isolate);
1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662
    DummyResource r;
    CHECK(isolate->factory()->NewExternalStringFromTwoByte(&r).is_null());
    CHECK(isolate->has_pending_exception());
    isolate->clear_pending_exception();
  }
}

#define INVALID_STRING_TEST(FUN, TYPE)                                         \
  TEST(StringOOM##FUN) {                                                       \
    CcTest::InitializeVM();                                                    \
    LocalContext context;                                                      \
    Isolate* isolate = CcTest::i_isolate();                                    \
    STATIC_ASSERT(String::kMaxLength < kMaxInt);                               \
    static const int invalid = String::kMaxLength + 1;                         \
    HandleScope scope(isolate);                                                \
    Vector<TYPE> dummy = Vector<TYPE>::New(invalid);                           \
1663
    memset(dummy.begin(), 0x0, dummy.length() * sizeof(TYPE));                 \
1664
    CHECK(isolate->factory()->FUN(Vector<const TYPE>::cast(dummy)).is_null()); \
1665
    memset(dummy.begin(), 0x20, dummy.length() * sizeof(TYPE));                \
1666 1667 1668 1669 1670 1671 1672 1673 1674
    CHECK(isolate->has_pending_exception());                                   \
    isolate->clear_pending_exception();                                        \
    dummy.Dispose();                                                           \
  }

INVALID_STRING_TEST(NewStringFromUtf8, char)
INVALID_STRING_TEST(NewStringFromOneByte, uint8_t)

#undef INVALID_STRING_TEST
1675 1676 1677 1678 1679 1680 1681 1682 1683 1684

TEST(FormatMessage) {
  CcTest::InitializeVM();
  LocalContext context;
  Isolate* isolate = CcTest::i_isolate();
  HandleScope scope(isolate);
  Handle<String> arg0 = isolate->factory()->NewStringFromAsciiChecked("arg0");
  Handle<String> arg1 = isolate->factory()->NewStringFromAsciiChecked("arg1");
  Handle<String> arg2 = isolate->factory()->NewStringFromAsciiChecked("arg2");
  Handle<String> result =
1685 1686
      MessageFormatter::Format(isolate, MessageTemplate::kPropertyNotFunction,
                               arg0, arg1, arg2)
1687
          .ToHandleChecked();
1688
  Handle<String> expected = isolate->factory()->NewStringFromAsciiChecked(
1689
      "'arg0' returned for property 'arg1' of object 'arg2' is not a function");
1690
  CHECK(String::Equals(isolate, result, expected));
1691
}
1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713

TEST(Regress609831) {
  CcTest::InitializeVM();
  LocalContext context;
  Isolate* isolate = CcTest::i_isolate();
  {
    HandleScope scope(isolate);
    v8::Local<v8::Value> result = CompileRun(
        "String.fromCharCode(32, 32, 32, 32, 32, "
        "32, 32, 32, 32, 32, 32, 32, 32, 32, 32, "
        "32, 32, 32, 32, 32, 32, 32, 32, 32, 32)");
    CHECK(v8::Utils::OpenHandle(*result)->IsSeqOneByteString());
  }
  {
    HandleScope scope(isolate);
    v8::Local<v8::Value> result = CompileRun(
        "String.fromCharCode(432, 432, 432, 432, 432, "
        "432, 432, 432, 432, 432, 432, 432, 432, 432, "
        "432, 432, 432, 432, 432, 432, 432, 432, 432)");
    CHECK(v8::Utils::OpenHandle(*result)->IsSeqTwoByteString());
  }
}
1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748

TEST(ExternalStringIndexOf) {
  CcTest::InitializeVM();
  LocalContext context;
  v8::HandleScope scope(CcTest::isolate());

  const char* raw_string = "abcdefghijklmnopqrstuvwxyz";
  v8::Local<v8::String> string =
      v8::String::NewExternalOneByte(CcTest::isolate(),
                                     new StaticOneByteResource(raw_string))
          .ToLocalChecked();
  v8::Local<v8::Object> global = context->Global();
  global->Set(context.local(), v8_str("external"), string).FromJust();

  char source[] = "external.indexOf('%')";
  for (size_t i = 0; i < strlen(raw_string); i++) {
    source[18] = raw_string[i];
    int result_position = static_cast<int>(i);
    CHECK_EQ(result_position,
             CompileRun(source)->Int32Value(context.local()).FromJust());
  }
  CHECK_EQ(-1,
           CompileRun("external.indexOf('abcdefghijklmnopqrstuvwxyz%%%%%%')")
               ->Int32Value(context.local())
               .FromJust());
  CHECK_EQ(1, CompileRun("external.indexOf('', 1)")
                  ->Int32Value(context.local())
                  .FromJust());
  CHECK_EQ(-1, CompileRun("external.indexOf('a', 1)")
                   ->Int32Value(context.local())
                   .FromJust());
  CHECK_EQ(-1, CompileRun("external.indexOf('$')")
                   ->Int32Value(context.local())
                   .FromJust());
}
1749

1750 1751
#define GC_INSIDE_NEW_STRING_FROM_UTF8_SUB_STRING(NAME, STRING)                \
  TEST(GCInsideNewStringFromUtf8SubStringWith##NAME) {                         \
1752
    FLAG_stress_concurrent_allocation = false; /* For SimulateFullSpace. */    \
1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764
    CcTest::InitializeVM();                                                    \
    LocalContext context;                                                      \
    v8::HandleScope scope(CcTest::isolate());                                  \
    Factory* factory = CcTest::i_isolate()->factory();                         \
    /* Length must be bigger than the buffer size of the Utf8Decoder. */       \
    const char* buf = STRING;                                                  \
    size_t len = strlen(buf);                                                  \
    Handle<String> main_string =                                               \
        factory                                                                \
            ->NewStringFromOneByte(Vector<const uint8_t>(                      \
                reinterpret_cast<const uint8_t*>(buf), len))                   \
            .ToHandleChecked();                                                \
1765 1766 1767 1768 1769 1770 1771
    if (FLAG_single_generation) {                                              \
      CHECK(!Heap::InYoungGeneration(*main_string));                           \
      heap::SimulateFullSpace(CcTest::i_isolate()->heap()->old_space());       \
    } else {                                                                   \
      CHECK(Heap::InYoungGeneration(*main_string));                            \
      heap::SimulateFullSpace(CcTest::i_isolate()->heap()->new_space());       \
    }                                                                          \
1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810
    /* Offset by two to check substring-ing. */                                \
    Handle<String> s = factory                                                 \
                           ->NewStringFromUtf8SubString(                       \
                               Handle<SeqOneByteString>::cast(main_string), 2, \
                               static_cast<int>(len - 2))                      \
                           .ToHandleChecked();                                 \
    Handle<String> expected_string =                                           \
        factory->NewStringFromUtf8(Vector<const char>(buf + 2, len - 2))       \
            .ToHandleChecked();                                                \
    CHECK(s->Equals(*expected_string));                                        \
  }

GC_INSIDE_NEW_STRING_FROM_UTF8_SUB_STRING(
    OneByte,
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ")
GC_INSIDE_NEW_STRING_FROM_UTF8_SUB_STRING(
    TwoByte,
    "QQ\xF0\x9F\x98\x8D\xF0\x9F\x98\x8D"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    "QQ\xF0\x9F\x98\x8D\xF0\x9F\x98\x8D")

#undef GC_INSIDE_NEW_STRING_FROM_UTF8_SUB_STRING

1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831
namespace {

struct IndexData {
  const char* string;
  bool is_array_index;
  uint32_t array_index;
  bool is_integer_index;
  size_t integer_index;
};

void TestString(i::Isolate* isolate, const IndexData& data) {
  Handle<String> s = isolate->factory()->NewStringFromAsciiChecked(data.string);
  if (data.is_array_index) {
    uint32_t index;
    CHECK(s->AsArrayIndex(&index));
    CHECK_EQ(data.array_index, index);
  }
  if (data.is_integer_index) {
    size_t index;
    CHECK(s->AsIntegerIndex(&index));
    CHECK_EQ(data.integer_index, index);
1832
    s->EnsureHash();
1833
    CHECK_EQ(0, s->raw_hash_field() & String::kIsNotIntegerIndexMask);
1834 1835
    CHECK(s->HasHashCode());
  }
1836
  if (!s->HasHashCode()) s->EnsureHash();
1837 1838
  CHECK(s->HasHashCode());
  if (!data.is_integer_index) {
1839
    CHECK_NE(0, s->raw_hash_field() & String::kIsNotIntegerIndexMask);
1840 1841 1842 1843 1844
  }
}

}  // namespace

1845 1846 1847 1848 1849 1850 1851 1852
TEST(HashArrayIndexStrings) {
  CcTest::InitializeVM();
  LocalContext context;
  v8::HandleScope scope(CcTest::isolate());
  i::Isolate* isolate = CcTest::i_isolate();

  CHECK_EQ(StringHasher::MakeArrayIndexHash(0 /* value */, 1 /* length */) >>
               Name::kHashShift,
1853
           isolate->factory()->zero_string()->hash());
1854 1855 1856

  CHECK_EQ(StringHasher::MakeArrayIndexHash(1 /* value */, 1 /* length */) >>
               Name::kHashShift,
1857
           isolate->factory()->one_string()->hash());
1858 1859 1860 1861 1862 1863 1864 1865 1866 1867

  IndexData tests[] = {
    {"", false, 0, false, 0},
    {"123no", false, 0, false, 0},
    {"12345", true, 12345, true, 12345},
    {"12345678", true, 12345678, true, 12345678},
    {"4294967294", true, 4294967294u, true, 4294967294u},
#if V8_TARGET_ARCH_32_BIT
    {"4294967295", false, 0, false, 0},  // Valid length but not index.
    {"4294967296", false, 0, false, 0},
1868
    {"9007199254740991", false, 0, false, 0},
1869 1870 1871
#else
    {"4294967295", false, 0, true, 4294967295u},
    {"4294967296", false, 0, true, 4294967296ull},
1872
    {"9007199254740991", false, 0, true, 9007199254740991ull},
1873
#endif
1874 1875
    {"9007199254740992", false, 0, false, 0},
    {"18446744073709551615", false, 0, false, 0},
1876 1877 1878 1879 1880
    {"18446744073709551616", false, 0, false, 0}
  };
  for (int i = 0, n = arraysize(tests); i < n; i++) {
    TestString(isolate, tests[i]);
  }
1881 1882
}

1883 1884 1885 1886 1887
TEST(StringEquals) {
  v8::V8::Initialize();
  v8::Isolate* isolate = CcTest::isolate();
  v8::HandleScope scope(isolate);

1888 1889 1890
  auto foo_str = v8::String::NewFromUtf8Literal(isolate, "foo");
  auto bar_str = v8::String::NewFromUtf8Literal(isolate, "bar");
  auto foo_str2 = v8::String::NewFromUtf8Literal(isolate, "foo");
1891 1892 1893

  uint16_t* two_byte_source = AsciiToTwoByteString("foo");
  auto foo_two_byte_str =
1894
      v8::String::NewFromTwoByte(isolate, two_byte_source).ToLocalChecked();
1895 1896 1897 1898 1899 1900 1901 1902 1903
  i::DeleteArray(two_byte_source);

  CHECK(foo_str->StringEquals(foo_str));
  CHECK(!foo_str->StringEquals(bar_str));
  CHECK(foo_str->StringEquals(foo_str2));
  CHECK(foo_str->StringEquals(foo_two_byte_str));
  CHECK(!bar_str->StringEquals(foo_str2));
}

1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917
class OneByteStringResource : public v8::String::ExternalOneByteStringResource {
 public:
  // Takes ownership of |data|.
  OneByteStringResource(char* data, size_t length)
      : data_(data), length_(length) {}
  ~OneByteStringResource() override { delete[] data_; }
  const char* data() const override { return data_; }
  size_t length() const override { return length_; }

 private:
  char* data_;
  size_t length_;
};

1918
TEST(Regress876759) {
1919 1920
  // Thin strings are used in conjunction with young gen
  if (FLAG_single_generation) return;
1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939
  v8::V8::Initialize();
  Isolate* isolate = CcTest::i_isolate();
  Factory* factory = isolate->factory();

  HandleScope handle_scope(isolate);

  const int kLength = 30;
  uc16 two_byte_buf[kLength];
  char* external_one_byte_buf = new char[kLength];
  for (int j = 0; j < kLength; j++) {
    char c = '0' + (j % 10);
    two_byte_buf[j] = c;
    external_one_byte_buf[j] = c;
  }

  Handle<String> parent;
  {
    Handle<SeqTwoByteString> raw =
        factory->NewRawTwoByteString(kLength).ToHandleChecked();
1940
    DisallowGarbageCollection no_gc;
1941
    CopyChars(raw->GetChars(no_gc), two_byte_buf, kLength);
1942 1943 1944 1945 1946 1947 1948 1949
    parent = raw;
  }
  CHECK(parent->IsTwoByteRepresentation());
  Handle<String> sliced = factory->NewSubString(parent, 1, 20);
  CHECK(sliced->IsSlicedString());
  factory->InternalizeString(parent);
  CHECK(parent->IsThinString());
  Handle<String> grandparent =
1950 1951
      handle(ThinString::cast(*parent).actual(), isolate);
  CHECK_EQ(*parent, SlicedString::cast(*sliced).parent());
1952 1953 1954
  OneByteStringResource* resource =
      new OneByteStringResource(external_one_byte_buf, kLength);
  grandparent->MakeExternal(resource);
1955 1956 1957 1958 1959
  // The grandparent string becomes one-byte, but the child strings are still
  // two-byte.
  CHECK(grandparent->IsOneByteRepresentation());
  CHECK(parent->IsTwoByteRepresentation());
  CHECK(sliced->IsTwoByteRepresentation());
1960 1961
  // The *Underneath version returns the correct representation.
  CHECK(String::IsOneByteRepresentationUnderneath(*sliced));
1962 1963
}

1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
// Show that it is possible to internalize an external string without a copy, as
// long as it is not uncached.
TEST(InternalizeExternalString) {
  CcTest::InitializeVM();
  Factory* factory = CcTest::i_isolate()->factory();
  v8::HandleScope scope(CcTest::isolate());

  // Create the string.
  const char* raw_string = "external";
  OneByteResource* resource =
      new OneByteResource(i::StrDup(raw_string), strlen(raw_string));
  Handle<String> string =
      factory->NewExternalStringFromOneByte(resource).ToHandleChecked();
  CHECK(string->IsExternalString());

  // Check it is not uncached.
  Handle<ExternalString> external = Handle<ExternalString>::cast(string);
  CHECK(!external->is_uncached());

  // Internalize succesfully, without a copy.
  Handle<String> internal = factory->InternalizeString(external);
  CHECK(string->IsInternalizedString());
  CHECK(string.equals(internal));
}

// Show that it is possible to internalize an external string without a copy, as
// long as it is not uncached. Two byte version.
TEST(InternalizeExternalStringTwoByte) {
  CcTest::InitializeVM();
  Factory* factory = CcTest::i_isolate()->factory();
  v8::HandleScope scope(CcTest::isolate());

  // Create the string.
  const char* raw_string = "external";
  Resource* resource =
      new Resource(AsciiToTwoByteString(raw_string), strlen(raw_string));
  Handle<String> string =
      factory->NewExternalStringFromTwoByte(resource).ToHandleChecked();
  CHECK(string->IsExternalString());

  // Check it is not uncached.
  Handle<ExternalString> external = Handle<ExternalString>::cast(string);
  CHECK(!external->is_uncached());

  // Internalize succesfully, without a copy.
  Handle<String> internal = factory->InternalizeString(external);
  CHECK(string->IsInternalizedString());
  CHECK(string.equals(internal));
}

2014
}  // namespace test_strings
2015 2016
}  // namespace internal
}  // namespace v8