builtins-string.cc 17.7 KB
Newer Older
1 2 3 4
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

5
#include "src/builtins/builtins-utils-inl.h"
6
#include "src/builtins/builtins.h"
7
#include "src/heap/heap-inl.h"  // For ToBoolean. TODO(jkummerow): Drop.
8
#include "src/logging/counters.h"
9
#include "src/numbers/conversions.h"
10
#include "src/objects/objects-inl.h"
11 12 13
#ifdef V8_INTL_SUPPORT
#include "src/objects/intl-objects.h"
#endif
14
#include "src/regexp/regexp-utils.h"
15 16 17 18
#include "src/strings/string-builder-inl.h"
#include "src/strings/string-case.h"
#include "src/strings/unicode-inl.h"
#include "src/strings/unicode.h"
19 20 21 22 23 24 25

namespace v8 {
namespace internal {

namespace {  // for String.fromCodePoint

bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) {
26 27
  if (!value->IsNumber() &&
      !Object::ToNumber(isolate, value).ToHandle(&value)) {
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
    return false;
  }

  if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() !=
      value->Number()) {
    return false;
  }

  if (value->Number() < 0 || value->Number() > 0x10FFFF) {
    return false;
  }

  return true;
}

43 44
static constexpr uc32 kInvalidCodePoint = static_cast<uc32>(-1);

45
uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) {
46
  Handle<Object> value = args.at(1 + index);
47 48
  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
      isolate, value, Object::ToNumber(isolate, value), kInvalidCodePoint);
49 50 51
  if (!IsValidCodePoint(isolate, value)) {
    isolate->Throw(*isolate->factory()->NewRangeError(
        MessageTemplate::kInvalidCodePoint, value));
52
    return kInvalidCodePoint;
53 54 55 56 57 58 59 60 61 62
  }
  return DoubleToUint32(value->Number());
}

}  // namespace

// ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints )
BUILTIN(StringFromCodePoint) {
  HandleScope scope(isolate);
  int const length = args.length() - 1;
63
  if (length == 0) return ReadOnlyRoots(isolate).empty_string();
64 65 66 67
  DCHECK_LT(0, length);

  // Optimistically assume that the resulting String contains only one byte
  // characters.
68 69
  std::vector<uint8_t> one_byte_buffer;
  one_byte_buffer.reserve(length);
70 71 72 73
  uc32 code = 0;
  int index;
  for (index = 0; index < length; index++) {
    code = NextCodePoint(isolate, args, index);
74
    if (code == kInvalidCodePoint) {
75
      return ReadOnlyRoots(isolate).exception();
76 77 78 79
    }
    if (code > String::kMaxOneByteCharCode) {
      break;
    }
80
    one_byte_buffer.push_back(code);
81 82 83
  }

  if (index == length) {
84 85 86
    RETURN_RESULT_OR_FAILURE(
        isolate, isolate->factory()->NewStringFromOneByte(Vector<uint8_t>(
                     one_byte_buffer.data(), one_byte_buffer.size())));
87 88
  }

89 90
  std::vector<uc16> two_byte_buffer;
  two_byte_buffer.reserve(length - index);
91 92

  while (true) {
93
    if (code <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
94
      two_byte_buffer.push_back(code);
95
    } else {
96 97
      two_byte_buffer.push_back(unibrow::Utf16::LeadSurrogate(code));
      two_byte_buffer.push_back(unibrow::Utf16::TrailSurrogate(code));
98 99 100 101 102 103
    }

    if (++index == length) {
      break;
    }
    code = NextCodePoint(isolate, args, index);
104
    if (code == kInvalidCodePoint) {
105
      return ReadOnlyRoots(isolate).exception();
106 107 108 109 110 111
    }
  }

  Handle<SeqTwoByteString> result;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
      isolate, result,
112 113
      isolate->factory()->NewRawTwoByteString(
          static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size())));
114

115
  DisallowGarbageCollection no_gc;
116 117 118 119
  CopyChars(result->GetChars(no_gc), one_byte_buffer.data(),
            one_byte_buffer.size());
  CopyChars(result->GetChars(no_gc) + one_byte_buffer.size(),
            two_byte_buffer.data(), two_byte_buffer.size());
120 121 122 123

  return *result;
}

124 125 126 127 128 129 130 131 132
// ES6 section 21.1.3.9
// String.prototype.lastIndexOf ( searchString [ , position ] )
BUILTIN(StringPrototypeLastIndexOf) {
  HandleScope handle_scope(isolate);
  return String::LastIndexOf(isolate, args.receiver(),
                             args.atOrUndefined(isolate, 1),
                             args.atOrUndefined(isolate, 2));
}

133 134 135 136 137 138
// ES6 section 21.1.3.10 String.prototype.localeCompare ( that )
//
// This function is implementation specific.  For now, we do not
// do anything locale specific.
BUILTIN(StringPrototypeLocaleCompare) {
  HandleScope handle_scope(isolate);
139 140

  isolate->CountUsage(v8::Isolate::UseCounterFeature::kStringLocaleCompare);
141
  const char* method = "String.prototype.localeCompare";
142

143
#ifdef V8_INTL_SUPPORT
144
  TO_THIS_STRING(str1, method);
145 146 147 148
  Handle<String> str2;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
      isolate, str2, Object::ToString(isolate, args.atOrUndefined(isolate, 1)));
  RETURN_RESULT_OR_FAILURE(
149 150 151
      isolate, Intl::StringLocaleCompare(
                   isolate, str1, str2, args.atOrUndefined(isolate, 2),
                   args.atOrUndefined(isolate, 3), method));
152
#else
153
  DCHECK_LE(2, args.length());
154

155
  TO_THIS_STRING(str1, method);
156
  Handle<String> str2;
157 158
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2,
                                     Object::ToString(isolate, args.at(1)));
159

160
  if (str1.is_identical_to(str2)) return Smi::zero();  // Equal.
161 162 163 164 165
  int str1_length = str1->length();
  int str2_length = str2->length();

  // Decide trivial cases without flattening.
  if (str1_length == 0) {
166
    if (str2_length == 0) return Smi::zero();  // Equal.
167 168 169 170 171 172 173 174 175 176 177 178 179
    return Smi::FromInt(-str2_length);
  } else {
    if (str2_length == 0) return Smi::FromInt(str1_length);
  }

  int end = str1_length < str2_length ? str1_length : str2_length;

  // No need to flatten if we are going to find the answer on the first
  // character. At this point we know there is at least one character
  // in each string, due to the trivial case handling above.
  int d = str1->Get(0) - str2->Get(0);
  if (d != 0) return Smi::FromInt(d);

180 181
  str1 = String::Flatten(isolate, str1);
  str2 = String::Flatten(isolate, str2);
182

183
  DisallowGarbageCollection no_gc;
184 185
  String::FlatContent flat1 = str1->GetFlatContent(no_gc);
  String::FlatContent flat2 = str2->GetFlatContent(no_gc);
186 187 188 189 190 191 192 193

  for (int i = 0; i < end; i++) {
    if (flat1.Get(i) != flat2.Get(i)) {
      return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
    }
  }

  return Smi::FromInt(str1_length - str2_length);
194
#endif  // !V8_INTL_SUPPORT
195 196
}

197
#ifndef V8_INTL_SUPPORT
198 199 200
// ES6 section 21.1.3.12 String.prototype.normalize ( [form] )
//
// Simply checks the argument is valid and returns the string itself.
201
// If internationalization is enabled, then intl.js will override this function
202 203 204 205 206 207 208 209 210 211 212 213
// and provide the proper functionality, so this is just a fallback.
BUILTIN(StringPrototypeNormalize) {
  HandleScope handle_scope(isolate);
  TO_THIS_STRING(string, "String.prototype.normalize");

  Handle<Object> form_input = args.atOrUndefined(isolate, 1);
  if (form_input->IsUndefined(isolate)) return *string;

  Handle<String> form;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
                                     Object::ToString(isolate, form_input));

214 215 216 217
  if (!(String::Equals(isolate, form, isolate->factory()->NFC_string()) ||
        String::Equals(isolate, form, isolate->factory()->NFD_string()) ||
        String::Equals(isolate, form, isolate->factory()->NFKC_string()) ||
        String::Equals(isolate, form, isolate->factory()->NFKD_string()))) {
218 219 220 221 222 223 224 225 226
    Handle<String> valid_forms =
        isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
    THROW_NEW_ERROR_RETURN_FAILURE(
        isolate,
        NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
  }

  return *string;
}
227
#endif  // !V8_INTL_SUPPORT
228

229

230
#ifndef V8_INTL_SUPPORT
231 232 233 234 235
namespace {

inline bool ToUpperOverflows(uc32 character) {
  // y with umlauts and the micro sign are the only characters that stop
  // fitting into one-byte when converting to uppercase.
236 237
  static const uc32 yuml_code = 0xFF;
  static const uc32 micro_code = 0xB5;
238 239 240 241
  return (character == yuml_code || character == micro_code);
}

template <class Converter>
242
V8_WARN_UNUSED_RESULT static Object ConvertCaseHelper(
243
    Isolate* isolate, String string, SeqString result, int result_length,
244
    unibrow::Mapping<Converter, 128>* mapping) {
245
  DisallowGarbageCollection no_gc;
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
  // We try this twice, once with the assumption that the result is no longer
  // than the input and, if that assumption breaks, again with the exact
  // length.  This may not be pretty, but it is nicer than what was here before
  // and I hereby claim my vaffel-is.
  //
  // NOTE: This assumes that the upper/lower case of an ASCII
  // character is also ASCII.  This is currently the case, but it
  // might break in the future if we implement more context and locale
  // dependent upper/lower conversions.
  bool has_changed_character = false;

  // Convert all characters to upper case, assuming that they will fit
  // in the buffer
  StringCharacterStream stream(string);
  unibrow::uchar chars[Converter::kMaxWidth];
  // We can assume that the string is not empty
  uc32 current = stream.GetNext();
263
  bool ignore_overflow = Converter::kIsToLower || result.IsSeqTwoByteString();
264 265 266 267 268 269
  for (int i = 0; i < result_length;) {
    bool has_next = stream.HasMore();
    uc32 next = has_next ? stream.GetNext() : 0;
    int char_length = mapping->get(current, next, chars);
    if (char_length == 0) {
      // The case conversion of this character is the character itself.
270
      result.Set(i, current);
271 272 273 274 275
      i++;
    } else if (char_length == 1 &&
               (ignore_overflow || !ToUpperOverflows(current))) {
      // Common case: converting the letter resulted in one character.
      DCHECK(static_cast<uc32>(chars[0]) != current);
276
      result.Set(i, chars[0]);
277 278
      has_changed_character = true;
      i++;
279
    } else if (result_length == string.length()) {
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
      bool overflows = ToUpperOverflows(current);
      // We've assumed that the result would be as long as the
      // input but here is a character that converts to several
      // characters.  No matter, we calculate the exact length
      // of the result and try the whole thing again.
      //
      // Note that this leaves room for optimization.  We could just
      // memcpy what we already have to the result string.  Also,
      // the result string is the last object allocated we could
      // "realloc" it and probably, in the vast majority of cases,
      // extend the existing string to be able to hold the full
      // result.
      int next_length = 0;
      if (has_next) {
        next_length = mapping->get(next, 0, chars);
        if (next_length == 0) next_length = 1;
      }
      int current_length = i + char_length + next_length;
      while (stream.HasMore()) {
        current = stream.GetNext();
        overflows |= ToUpperOverflows(current);
        // NOTE: we use 0 as the next character here because, while
        // the next character may affect what a character converts to,
        // it does not in any case affect the length of what it convert
        // to.
        int char_length = mapping->get(current, 0, chars);
        if (char_length == 0) char_length = 1;
        current_length += char_length;
        if (current_length > String::kMaxLength) {
309
          AllowGarbageCollection allocate_error_and_return;
310 311 312 313 314 315 316 317 318 319
          THROW_NEW_ERROR_RETURN_FAILURE(isolate,
                                         NewInvalidStringLengthError());
        }
      }
      // Try again with the real length.  Return signed if we need
      // to allocate a two-byte string for to uppercase.
      return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
                                             : Smi::FromInt(current_length);
    } else {
      for (int j = 0; j < char_length; j++) {
320
        result.Set(i, chars[j]);
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
        i++;
      }
      has_changed_character = true;
    }
    current = next;
  }
  if (has_changed_character) {
    return result;
  } else {
    // If we didn't actually change anything in doing the conversion
    // we simple return the result and let the converted string
    // become garbage; there is no reason to keep two identical strings
    // alive.
    return string;
  }
}

template <class Converter>
339
V8_WARN_UNUSED_RESULT static Object ConvertCase(
340 341
    Handle<String> s, Isolate* isolate,
    unibrow::Mapping<Converter, 128>* mapping) {
342
  s = String::Flatten(isolate, s);
343 344 345 346 347 348 349 350 351 352
  int length = s->length();
  // Assume that the string is not empty; we need this assumption later
  if (length == 0) return *s;

  // Simpler handling of ASCII strings.
  //
  // NOTE: This assumes that the upper/lower case of an ASCII
  // character is also ASCII.  This is currently the case, but it
  // might break in the future if we implement more context and locale
  // dependent upper/lower conversions.
353
  if (String::IsOneByteRepresentationUnderneath(*s)) {
354 355 356
    // Same length as input.
    Handle<SeqOneByteString> result =
        isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
357
    DisallowGarbageCollection no_gc;
358
    String::FlatContent flat_content = s->GetFlatContent(no_gc);
359 360 361
    DCHECK(flat_content.IsFlat());
    bool has_changed_character = false;
    int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
362
        reinterpret_cast<char*>(result->GetChars(no_gc)),
363
        reinterpret_cast<const char*>(flat_content.ToOneByteVector().begin()),
364 365 366 367 368 369 370 371 372 373 374 375 376
        length, &has_changed_character);
    // If not ASCII, we discard the result and take the 2 byte path.
    if (index_to_first_unprocessed == length)
      return has_changed_character ? *result : *s;
  }

  Handle<SeqString> result;  // Same length as input.
  if (s->IsOneByteRepresentation()) {
    result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
  } else {
    result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
  }

377
  Object answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
378
  if (answer.IsException(isolate) || answer.IsString()) return answer;
379

380
  DCHECK(answer.IsSmi());
jgruber's avatar
jgruber committed
381
  length = Smi::ToInt(answer);
382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421
  if (s->IsOneByteRepresentation() && length > 0) {
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, result, isolate->factory()->NewRawOneByteString(length));
  } else {
    if (length < 0) length = -length;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, result, isolate->factory()->NewRawTwoByteString(length));
  }
  return ConvertCaseHelper(isolate, *s, *result, length, mapping);
}

}  // namespace

BUILTIN(StringPrototypeToLocaleLowerCase) {
  HandleScope scope(isolate);
  TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase");
  return ConvertCase(string, isolate,
                     isolate->runtime_state()->to_lower_mapping());
}

BUILTIN(StringPrototypeToLocaleUpperCase) {
  HandleScope scope(isolate);
  TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase");
  return ConvertCase(string, isolate,
                     isolate->runtime_state()->to_upper_mapping());
}

BUILTIN(StringPrototypeToLowerCase) {
  HandleScope scope(isolate);
  TO_THIS_STRING(string, "String.prototype.toLowerCase");
  return ConvertCase(string, isolate,
                     isolate->runtime_state()->to_lower_mapping());
}

BUILTIN(StringPrototypeToUpperCase) {
  HandleScope scope(isolate);
  TO_THIS_STRING(string, "String.prototype.toUpperCase");
  return ConvertCase(string, isolate,
                     isolate->runtime_state()->to_upper_mapping());
}
422
#endif  // !V8_INTL_SUPPORT
423

424 425 426 427 428 429 430 431 432 433 434 435 436
// ES6 #sec-string.prototype.raw
BUILTIN(StringRaw) {
  HandleScope scope(isolate);
  Handle<Object> templ = args.atOrUndefined(isolate, 1);
  const uint32_t argc = args.length();
  Handle<String> raw_string =
      isolate->factory()->NewStringFromAsciiChecked("raw");

  Handle<Object> cooked;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, cooked,
                                     Object::ToObject(isolate, templ));

  Handle<Object> raw;
437 438
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
      isolate, raw, Object::GetProperty(isolate, cooked, raw_string));
439 440 441 442 443
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw,
                                     Object::ToObject(isolate, raw));
  Handle<Object> raw_len;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
      isolate, raw_len,
444
      Object::GetProperty(isolate, raw, isolate->factory()->length_string()));
445 446 447 448 449

  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw_len,
                                     Object::ToLength(isolate, raw_len));

  IncrementalStringBuilder result_builder(isolate);
450 451 452 453 454 455
  // Intentional spec violation: we ignore {length} values >= 2^32, because
  // assuming non-empty chunks they would generate too-long strings anyway.
  const double raw_len_number = raw_len->Number();
  const uint32_t length = raw_len_number > std::numeric_limits<uint32_t>::max()
                              ? std::numeric_limits<uint32_t>::max()
                              : static_cast<uint32_t>(raw_len_number);
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
  if (length > 0) {
    Handle<Object> first_element;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, first_element,
                                       Object::GetElement(isolate, raw, 0));

    Handle<String> first_string;
    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
        isolate, first_string, Object::ToString(isolate, first_element));
    result_builder.AppendString(first_string);

    for (uint32_t i = 1, arg_i = 2; i < length; i++, arg_i++) {
      if (arg_i < argc) {
        Handle<String> argument_string;
        ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
            isolate, argument_string,
            Object::ToString(isolate, args.at(arg_i)));
        result_builder.AppendString(argument_string);
      }

      Handle<Object> element;
      ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element,
                                         Object::GetElement(isolate, raw, i));

      Handle<String> element_string;
      ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element_string,
                                         Object::ToString(isolate, element));
      result_builder.AppendString(element_string);
    }
  }

  RETURN_RESULT_OR_FAILURE(isolate, result_builder.Finish());
}

489 490
}  // namespace internal
}  // namespace v8