intl-objects.cc 82.6 KB
Newer Older
1
// Copyright 2013 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4

5 6 7 8 9
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif  // V8_INTL_SUPPORT

#include "src/objects/intl-objects.h"
10

11
#include <algorithm>
12
#include <memory>
13
#include <string>
14
#include <vector>
15

16
#include "src/api/api-inl.h"
17
#include "src/execution/isolate.h"
18
#include "src/handles/global-handles.h"
19
#include "src/heap/factory.h"
20
#include "src/objects/js-collator-inl.h"
21
#include "src/objects/js-date-time-format-inl.h"
22
#include "src/objects/js-locale-inl.h"
23
#include "src/objects/js-locale.h"
24
#include "src/objects/js-number-format-inl.h"
25
#include "src/objects/objects-inl.h"
26
#include "src/objects/property-descriptor.h"
27
#include "src/objects/string.h"
28
#include "src/strings/string-case.h"
Frank Tang's avatar
Frank Tang committed
29
#include "unicode/basictz.h"
30
#include "unicode/brkiter.h"
31
#include "unicode/calendar.h"
32
#include "unicode/coll.h"
33
#include "unicode/datefmt.h"
34
#include "unicode/decimfmt.h"
35
#include "unicode/formattedvalue.h"
36
#include "unicode/localebuilder.h"
37
#include "unicode/localematcher.h"
38
#include "unicode/locid.h"
39
#include "unicode/normalizer2.h"
40
#include "unicode/numberformatter.h"
41
#include "unicode/numfmt.h"
42 43
#include "unicode/numsys.h"
#include "unicode/timezone.h"
44
#include "unicode/ures.h"
Frank Tang's avatar
Frank Tang committed
45
#include "unicode/ustring.h"
46 47 48 49 50 51 52 53 54
#include "unicode/uvernum.h"  // U_ICU_VERSION_MAJOR_NUM

#define XSTR(s) STR(s)
#define STR(s) #s
static_assert(
    V8_MINIMUM_ICU_VERSION <= U_ICU_VERSION_MAJOR_NUM,
    "v8 is required to build with ICU " XSTR(V8_MINIMUM_ICU_VERSION) " and up");
#undef STR
#undef XSTR
55 56 57 58

namespace v8 {
namespace internal {

Frank Tang's avatar
Frank Tang committed
59 60
namespace {

61
constexpr uint8_t kToLower[256] = {
Frank Tang's avatar
Frank Tang committed
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
    0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
    0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
    0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
    0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
    0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
    0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
    0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
    0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
    0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
    0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
    0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
    0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
    0xFC, 0xFD, 0xFE, 0xFF,
};

86
inline constexpr uint16_t ToLatin1Lower(uint16_t ch) {
Frank Tang's avatar
Frank Tang committed
87 88 89 90
  return static_cast<uint16_t>(kToLower[ch]);
}

// Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
91
inline constexpr uint16_t ToLatin1Upper(uint16_t ch) {
92
  CONSTEXPR_DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
Frank Tang's avatar
Frank Tang committed
93
  return ch &
94
         ~((IsAsciiLower(ch) || (((ch & 0xE0) == 0xE0) && ch != 0xF7)) << 5);
Frank Tang's avatar
Frank Tang committed
95 96 97 98 99 100 101 102 103 104 105
}

template <typename Char>
bool ToUpperFastASCII(const Vector<const Char>& src,
                      Handle<SeqOneByteString> result) {
  // Do a faster loop for the case where all the characters are ASCII.
  uint16_t ored = 0;
  int32_t index = 0;
  for (auto it = src.begin(); it != src.end(); ++it) {
    uint16_t ch = static_cast<uint16_t>(*it);
    ored |= ch;
106
    result->SeqOneByteStringSet(index++, ToAsciiUpper(ch));
Frank Tang's avatar
Frank Tang committed
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
  }
  return !(ored & ~0x7F);
}

const uint16_t sharp_s = 0xDF;

template <typename Char>
bool ToUpperOneByte(const Vector<const Char>& src, uint8_t* dest,
                    int* sharp_s_count) {
  // Still pretty-fast path for the input with non-ASCII Latin-1 characters.

  // There are two special cases.
  //  1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
  //  2. Lower case sharp-S converts to "SS" (two characters)
  *sharp_s_count = 0;
  for (auto it = src.begin(); it != src.end(); ++it) {
    uint16_t ch = static_cast<uint16_t>(*it);
    if (V8_UNLIKELY(ch == sharp_s)) {
      ++(*sharp_s_count);
      continue;
    }
    if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
      // Since this upper-cased character does not fit in an 8-bit string, we
      // need to take the 16-bit path.
      return false;
    }
    *dest++ = ToLatin1Upper(ch);
  }

  return true;
}

template <typename Char>
void ToUpperWithSharpS(const Vector<const Char>& src,
                       Handle<SeqOneByteString> result) {
  int32_t dest_index = 0;
  for (auto it = src.begin(); it != src.end(); ++it) {
    uint16_t ch = static_cast<uint16_t>(*it);
    if (ch == sharp_s) {
      result->SeqOneByteStringSet(dest_index++, 'S');
      result->SeqOneByteStringSet(dest_index++, 'S');
    } else {
      result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
    }
  }
}

154
inline int FindFirstUpperOrNonAscii(String s, int length) {
Frank Tang's avatar
Frank Tang committed
155
  for (int index = 0; index < length; ++index) {
156
    uint16_t ch = s.Get(index);
157
    if (V8_UNLIKELY(IsAsciiUpper(ch) || ch & ~0x7F)) {
Frank Tang's avatar
Frank Tang committed
158 159 160 161 162 163 164 165 166 167 168 169 170
      return index;
    }
  }
  return length;
}

const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
                                    std::unique_ptr<uc16[]>* dest,
                                    int32_t length) {
  DCHECK(flat.IsFlat());
  if (flat.IsOneByte()) {
    if (!*dest) {
      dest->reset(NewArray<uc16>(length));
171
      CopyChars(dest->get(), flat.ToOneByteVector().begin(), length);
Frank Tang's avatar
Frank Tang committed
172 173 174
    }
    return reinterpret_cast<const UChar*>(dest->get());
  } else {
175
    return reinterpret_cast<const UChar*>(flat.ToUC16Vector().begin());
Frank Tang's avatar
Frank Tang committed
176 177 178
  }
}

179 180
template <typename T>
MaybeHandle<T> New(Isolate* isolate, Handle<JSFunction> constructor,
181 182
                   Handle<Object> locales, Handle<Object> options,
                   const char* method) {
183
  Handle<Map> map;
184
  ASSIGN_RETURN_ON_EXCEPTION(
185 186
      isolate, map,
      JSFunction::GetDerivedMap(isolate, constructor, constructor), T);
187
  return T::New(isolate, map, locales, options, method);
188
}
Frank Tang's avatar
Frank Tang committed
189 190 191 192 193 194
}  // namespace

const uint8_t* Intl::ToLatin1LowerTable() { return &kToLower[0]; }

icu::UnicodeString Intl::ToICUUnicodeString(Isolate* isolate,
                                            Handle<String> string) {
195 196 197
  DCHECK(string->IsFlat());
  DisallowHeapAllocation no_gc;
  std::unique_ptr<uc16[]> sap;
198 199 200 201 202 203 204 205 206 207 208 209 210 211
  // Short one-byte strings can be expanded on the stack to avoid allocating a
  // temporary buffer.
  constexpr int kShortStringSize = 80;
  UChar short_string_buffer[kShortStringSize];
  const UChar* uchar_buffer = nullptr;
  const String::FlatContent& flat = string->GetFlatContent(no_gc);
  int32_t length = string->length();
  if (flat.IsOneByte() && length <= kShortStringSize) {
    CopyChars(short_string_buffer, flat.ToOneByteVector().begin(), length);
    uchar_buffer = short_string_buffer;
  } else {
    uchar_buffer = GetUCharBufferFromFlat(flat, &sap, length);
  }
  return icu::UnicodeString(uchar_buffer, length);
Frank Tang's avatar
Frank Tang committed
212 213
}

214 215
namespace {
icu::StringPiece ToICUStringPiece(Isolate* isolate, Handle<String> string) {
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
  DCHECK(string->IsFlat());
  DisallowHeapAllocation no_gc;

  const String::FlatContent& flat = string->GetFlatContent(no_gc);
  if (!flat.IsOneByte()) return icu::StringPiece(nullptr, 0);

  int32_t length = string->length();
  const char* char_buffer =
      reinterpret_cast<const char*>(flat.ToOneByteVector().begin());
  if (!String::IsAscii(char_buffer, length)) {
    return icu::StringPiece(nullptr, 0);
  }

  return icu::StringPiece(char_buffer, length);
}

Frank Tang's avatar
Frank Tang committed
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
MaybeHandle<String> LocaleConvertCase(Isolate* isolate, Handle<String> s,
                                      bool is_to_upper, const char* lang) {
  auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
  int32_t src_length = s->length();
  int32_t dest_length = src_length;
  UErrorCode status;
  Handle<SeqTwoByteString> result;
  std::unique_ptr<uc16[]> sap;

  if (dest_length == 0) return ReadOnlyRoots(isolate).empty_string_handle();

  // This is not a real loop. It'll be executed only once (no overflow) or
  // twice (overflow).
  for (int i = 0; i < 2; ++i) {
    // Case conversion can increase the string length (e.g. sharp-S => SS) so
    // that we have to handle RangeError exceptions here.
    ASSIGN_RETURN_ON_EXCEPTION(
        isolate, result, isolate->factory()->NewRawTwoByteString(dest_length),
        String);
    DisallowHeapAllocation no_gc;
    DCHECK(s->IsFlat());
253
    String::FlatContent flat = s->GetFlatContent(no_gc);
Frank Tang's avatar
Frank Tang committed
254 255
    const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
    status = U_ZERO_ERROR;
256 257 258
    dest_length =
        case_converter(reinterpret_cast<UChar*>(result->GetChars(no_gc)),
                       dest_length, src, src_length, lang, &status);
Frank Tang's avatar
Frank Tang committed
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
    if (status != U_BUFFER_OVERFLOW_ERROR) break;
  }

  // In most cases, the output will fill the destination buffer completely
  // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
  // Only in rare cases, it'll be shorter than the destination buffer and
  // |result| has to be truncated.
  DCHECK(U_SUCCESS(status));
  if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
    DCHECK(dest_length == result->length());
    return result;
  }
  DCHECK(dest_length < result->length());
  return SeqString::Truncate(result, dest_length);
}

}  // namespace

// A stripped-down version of ConvertToLower that can only handle flat one-byte
// strings and does not allocate. Note that {src} could still be, e.g., a
// one-byte sliced string with a two-byte parent string.
// Called from TF builtins.
281
String Intl::ConvertOneByteToLower(String src, String dst) {
282 283 284 285
  DCHECK_EQ(src.length(), dst.length());
  DCHECK(src.IsOneByteRepresentation());
  DCHECK(src.IsFlat());
  DCHECK(dst.IsSeqOneByteString());
Frank Tang's avatar
Frank Tang committed
286 287 288

  DisallowHeapAllocation no_gc;

289 290 291
  const int length = src.length();
  String::FlatContent src_flat = src.GetFlatContent(no_gc);
  uint8_t* dst_data = SeqOneByteString::cast(dst).GetChars(no_gc);
Frank Tang's avatar
Frank Tang committed
292 293

  if (src_flat.IsOneByte()) {
294
    const uint8_t* src_data = src_flat.ToOneByteVector().begin();
Frank Tang's avatar
Frank Tang committed
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315

    bool has_changed_character = false;
    int index_to_first_unprocessed =
        FastAsciiConvert<true>(reinterpret_cast<char*>(dst_data),
                               reinterpret_cast<const char*>(src_data), length,
                               &has_changed_character);

    if (index_to_first_unprocessed == length) {
      return has_changed_character ? dst : src;
    }

    // If not ASCII, we keep the result up to index_to_first_unprocessed and
    // process the rest.
    for (int index = index_to_first_unprocessed; index < length; ++index) {
      dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
    }
  } else {
    DCHECK(src_flat.IsTwoByte());
    int index_to_first_unprocessed = FindFirstUpperOrNonAscii(src, length);
    if (index_to_first_unprocessed == length) return src;

316
    const uint16_t* src_data = src_flat.ToUC16Vector().begin();
Frank Tang's avatar
Frank Tang committed
317 318 319 320 321 322 323 324 325 326
    CopyChars(dst_data, src_data, index_to_first_unprocessed);
    for (int index = index_to_first_unprocessed; index < length; ++index) {
      dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
    }
  }

  return dst;
}

MaybeHandle<String> Intl::ConvertToLower(Isolate* isolate, Handle<String> s) {
327
  if (!s->IsOneByteRepresentation()) {
Frank Tang's avatar
Frank Tang committed
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
    // Use a slower implementation for strings with characters beyond U+00FF.
    return LocaleConvertCase(isolate, s, false, "");
  }

  int length = s->length();

  // We depend here on the invariant that the length of a Latin1
  // string is invariant under ToLowerCase, and the result always
  // fits in the Latin1 range in the *root locale*. It does not hold
  // for ToUpperCase even in the root locale.

  // Scan the string for uppercase and non-ASCII characters for strings
  // shorter than a machine-word without any memory allocation overhead.
  // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
  // to two parts, one for scanning the prefix with no change and the other for
  // handling ASCII-only characters.

  bool is_short = length < static_cast<int>(sizeof(uintptr_t));
  if (is_short) {
    bool is_lower_ascii = FindFirstUpperOrNonAscii(*s, length) == length;
    if (is_lower_ascii) return s;
  }

  Handle<SeqOneByteString> result =
      isolate->factory()->NewRawOneByteString(length).ToHandleChecked();

  return Handle<String>(Intl::ConvertOneByteToLower(*s, *result), isolate);
}

MaybeHandle<String> Intl::ConvertToUpper(Isolate* isolate, Handle<String> s) {
  int32_t length = s->length();
359
  if (s->IsOneByteRepresentation() && length > 0) {
Frank Tang's avatar
Frank Tang committed
360 361 362 363 364 365 366 367
    Handle<SeqOneByteString> result =
        isolate->factory()->NewRawOneByteString(length).ToHandleChecked();

    DCHECK(s->IsFlat());
    int sharp_s_count;
    bool is_result_single_byte;
    {
      DisallowHeapAllocation no_gc;
368 369
      String::FlatContent flat = s->GetFlatContent(no_gc);
      uint8_t* dest = result->GetChars(no_gc);
Frank Tang's avatar
Frank Tang committed
370 371 372
      if (flat.IsOneByte()) {
        Vector<const uint8_t> src = flat.ToOneByteVector();
        bool has_changed_character = false;
373 374
        int index_to_first_unprocessed = FastAsciiConvert<false>(
            reinterpret_cast<char*>(result->GetChars(no_gc)),
375
            reinterpret_cast<const char*>(src.begin()), length,
376
            &has_changed_character);
Frank Tang's avatar
Frank Tang committed
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
        if (index_to_first_unprocessed == length) {
          return has_changed_character ? result : s;
        }
        // If not ASCII, we keep the result up to index_to_first_unprocessed and
        // process the rest.
        is_result_single_byte =
            ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length),
                           dest + index_to_first_unprocessed, &sharp_s_count);
      } else {
        DCHECK(flat.IsTwoByte());
        Vector<const uint16_t> src = flat.ToUC16Vector();
        if (ToUpperFastASCII(src, result)) return result;
        is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count);
      }
    }

    // Go to the full Unicode path if there are characters whose uppercase
    // is beyond the Latin-1 range (cannot be represented in OneByteString).
    if (V8_UNLIKELY(!is_result_single_byte)) {
      return LocaleConvertCase(isolate, s, true, "");
    }

    if (sharp_s_count == 0) return result;

    // We have sharp_s_count sharp-s characters, but the result is still
    // in the Latin-1 range.
    ASSIGN_RETURN_ON_EXCEPTION(
        isolate, result,
        isolate->factory()->NewRawOneByteString(length + sharp_s_count),
        String);
    DisallowHeapAllocation no_gc;
408
    String::FlatContent flat = s->GetFlatContent(no_gc);
Frank Tang's avatar
Frank Tang committed
409 410 411 412 413 414 415 416 417 418 419 420
    if (flat.IsOneByte()) {
      ToUpperWithSharpS(flat.ToOneByteVector(), result);
    } else {
      ToUpperWithSharpS(flat.ToUC16Vector(), result);
    }

    return result;
  }

  return LocaleConvertCase(isolate, s, true, "");
}

421 422 423 424 425 426 427
std::string Intl::GetNumberingSystem(const icu::Locale& icu_locale) {
  // Ugly hack. ICU doesn't expose numbering system in any way, so we have
  // to assume that for given locale NumberingSystem constructor produces the
  // same digits as NumberFormat/Calendar would.
  UErrorCode status = U_ZERO_ERROR;
  std::unique_ptr<icu::NumberingSystem> numbering_system(
      icu::NumberingSystem::createInstance(icu_locale, status));
428 429
  if (U_SUCCESS(status)) return numbering_system->getName();
  return "latn";
430
}
431

432 433
namespace {

434
Maybe<icu::Locale> CreateICULocale(const std::string& bcp47_locale) {
435 436 437 438 439
  DisallowHeapAllocation no_gc;

  // Convert BCP47 into ICU locale format.
  UErrorCode status = U_ZERO_ERROR;

440
  icu::Locale icu_locale = icu::Locale::forLanguageTag(bcp47_locale, status);
Frank Tang's avatar
Frank Tang committed
441
  DCHECK(U_SUCCESS(status));
442
  if (icu_locale.isBogus()) {
443
    return Nothing<icu::Locale>();
444 445
  }

446
  return Just(icu_locale);
447 448
}

449 450
}  // anonymous namespace

451
// static
452

453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
MaybeHandle<String> Intl::ToString(Isolate* isolate,
                                   const icu::UnicodeString& string) {
  return isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
      reinterpret_cast<const uint16_t*>(string.getBuffer()), string.length()));
}

MaybeHandle<String> Intl::ToString(Isolate* isolate,
                                   const icu::UnicodeString& string,
                                   int32_t begin, int32_t end) {
  return Intl::ToString(isolate, string.tempSubStringBetween(begin, end));
}

namespace {

Handle<JSObject> InnerAddElement(Isolate* isolate, Handle<JSArray> array,
                                 int index, Handle<String> field_type_string,
                                 Handle<String> value) {
  // let element = $array[$index] = {
  //   type: $field_type_string,
  //   value: $value
  // }
  // return element;
  Factory* factory = isolate->factory();
  Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
  JSObject::AddProperty(isolate, element, factory->type_string(),
                        field_type_string, NONE);

  JSObject::AddProperty(isolate, element, factory->value_string(), value, NONE);
  JSObject::AddDataElement(array, index, element, NONE);
  return element;
}

}  // namespace

void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
                      Handle<String> field_type_string, Handle<String> value) {
  // Same as $array[$index] = {type: $field_type_string, value: $value};
  InnerAddElement(isolate, array, index, field_type_string, value);
}

void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
                      Handle<String> field_type_string, Handle<String> value,
                      Handle<String> additional_property_name,
                      Handle<String> additional_property_value) {
  // Same as $array[$index] = {
  //   type: $field_type_string, value: $value,
  //   $additional_property_name: $additional_property_value
  // }
  Handle<JSObject> element =
      InnerAddElement(isolate, array, index, field_type_string, value);
  JSObject::AddProperty(isolate, element, additional_property_name,
                        additional_property_value, NONE);
}
506 507 508

namespace {

509
// Build the shortened locale; eg, convert xx_Yyyy_ZZ  to xx_ZZ.
510 511 512 513 514
//
// If locale has a script tag then return true and the locale without the
// script else return false and an empty string.
bool RemoveLocaleScriptTag(const std::string& icu_locale,
                           std::string* locale_less_script) {
515 516
  icu::Locale new_locale = icu::Locale::createCanonical(icu_locale.c_str());
  const char* icu_script = new_locale.getScript();
517
  if (icu_script == nullptr || strlen(icu_script) == 0) {
518 519 520 521 522 523 524
    *locale_less_script = std::string();
    return false;
  }

  const char* icu_language = new_locale.getLanguage();
  const char* icu_country = new_locale.getCountry();
  icu::Locale short_locale = icu::Locale(icu_language, icu_country);
525
  *locale_less_script = short_locale.getName();
526 527 528
  return true;
}

529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560
bool ValidateResource(const icu::Locale locale, const char* path,
                      const char* key) {
  bool result = false;
  UErrorCode status = U_ZERO_ERROR;
  UResourceBundle* bundle = ures_open(path, locale.getName(), &status);
  if (bundle != nullptr && status == U_ZERO_ERROR) {
    if (key == nullptr) {
      result = true;
    } else {
      UResourceBundle* key_bundle =
          ures_getByKey(bundle, key, nullptr, &status);
      result = key_bundle != nullptr && (status == U_ZERO_ERROR);
      ures_close(key_bundle);
    }
  }
  ures_close(bundle);
  if (!result) {
    if ((locale.getCountry()[0] != '\0') && (locale.getScript()[0] != '\0')) {
      // Fallback to try without country.
      std::string without_country(locale.getLanguage());
      without_country = without_country.append("-").append(locale.getScript());
      return ValidateResource(without_country.c_str(), path, key);
    } else if ((locale.getCountry()[0] != '\0') ||
               (locale.getScript()[0] != '\0')) {
      // Fallback to try with only language.
      std::string language(locale.getLanguage());
      return ValidateResource(language.c_str(), path, key);
    }
  }
  return result;
}

561 562
}  // namespace

563
std::set<std::string> Intl::BuildLocaleSet(
564
    const std::vector<std::string>& icu_available_locales, const char* path,
565
    const char* validate_key) {
566
  std::set<std::string> locales;
567
  for (const std::string& locale : icu_available_locales) {
568
    if (path != nullptr || validate_key != nullptr) {
569
      if (!ValidateResource(icu::Locale(locale.c_str()), path, validate_key)) {
570 571 572
        continue;
      }
    }
573 574
    locales.insert(locale);
    std::string shortened_locale;
575
    if (RemoveLocaleScriptTag(locale, &shortened_locale)) {
576
      std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-');
577 578 579 580 581 582
      locales.insert(shortened_locale);
    }
  }
  return locales;
}

583
Maybe<std::string> Intl::ToLanguageTag(const icu::Locale& locale) {
584 585
  UErrorCode status = U_ZERO_ERROR;
  std::string res = locale.toLanguageTag<std::string>(status);
586 587 588
  if (U_FAILURE(status)) {
    return Nothing<std::string>();
  }
Frank Tang's avatar
Frank Tang committed
589
  DCHECK(U_SUCCESS(status));
590

591
  // Hack to remove -true and -yes from unicode extensions
592 593 594 595 596
  // Address https://crbug.com/v8/8565
  // TODO(ftang): Move the following "remove true" logic into ICU toLanguageTag
  // by fixing ICU-20310.
  size_t u_ext_start = res.find("-u-");
  if (u_ext_start != std::string::npos) {
597 598 599 600 601 602 603 604 605 606 607 608
    // remove "-true" and "-yes" after -u-
    const std::vector<std::string> remove_items({"-true", "-yes"});
    for (auto item = remove_items.begin(); item != remove_items.end(); item++) {
      for (size_t sep_remove =
               res.find(*item, u_ext_start + 5 /* strlen("-u-xx") == 5 */);
           sep_remove != std::string::npos; sep_remove = res.find(*item)) {
        size_t end_of_sep_remove = sep_remove + item->length();
        if (res.length() == end_of_sep_remove ||
            res.at(end_of_sep_remove) == '-') {
          res.erase(sep_remove, item->length());
        }
      }
609 610
    }
  }
611
  return Just(res);
612 613
}

614 615
namespace {
std::string DefaultLocale(Isolate* isolate) {
616 617 618
  if (isolate->default_locale().empty()) {
    icu::Locale default_locale;
    // Translate ICU's fallback locale to a well-known locale.
619 620
    if (strcmp(default_locale.getName(), "en_US_POSIX") == 0 ||
        strcmp(default_locale.getName(), "c") == 0) {
621 622 623
      isolate->set_default_locale("en-US");
    } else {
      // Set the locale
624
      isolate->set_default_locale(
625 626 627
          default_locale.isBogus()
              ? "und"
              : Intl::ToLanguageTag(default_locale).FromJust());
628 629
    }
    DCHECK(!isolate->default_locale().empty());
630
  }
631
  return isolate->default_locale();
632
}
633
}  // namespace
634

635
// See ecma402/#legacy-constructor.
636 637 638 639
MaybeHandle<Object> Intl::LegacyUnwrapReceiver(Isolate* isolate,
                                               Handle<JSReceiver> receiver,
                                               Handle<JSFunction> constructor,
                                               bool has_initialized_slot) {
640 641 642 643 644 645 646 647 648 649 650 651 652
  Handle<Object> obj_is_instance_of;
  ASSIGN_RETURN_ON_EXCEPTION(isolate, obj_is_instance_of,
                             Object::InstanceOf(isolate, receiver, constructor),
                             Object);
  bool is_instance_of = obj_is_instance_of->BooleanValue(isolate);

  // 2. If receiver does not have an [[Initialized...]] internal slot
  //    and ? InstanceofOperator(receiver, constructor) is true, then
  if (!has_initialized_slot && is_instance_of) {
    // 2. a. Let new_receiver be ? Get(receiver, %Intl%.[[FallbackSymbol]]).
    Handle<Object> new_receiver;
    ASSIGN_RETURN_ON_EXCEPTION(
        isolate, new_receiver,
653 654 655
        JSReceiver::GetProperty(isolate, receiver,
                                isolate->factory()->intl_fallback_symbol()),
        Object);
656 657 658 659 660 661
    return new_receiver;
  }

  return receiver;
}

662 663 664 665 666 667 668 669 670 671 672
Maybe<bool> Intl::GetStringOption(Isolate* isolate, Handle<JSReceiver> options,
                                  const char* property,
                                  std::vector<const char*> values,
                                  const char* service,
                                  std::unique_ptr<char[]>* result) {
  Handle<String> property_str =
      isolate->factory()->NewStringFromAsciiChecked(property);

  // 1. Let value be ? Get(options, property).
  Handle<Object> value;
  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
673 674
      isolate, value,
      Object::GetPropertyOrElement(isolate, options, property_str),
675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721
      Nothing<bool>());

  if (value->IsUndefined(isolate)) {
    return Just(false);
  }

  // 2. c. Let value be ? ToString(value).
  Handle<String> value_str;
  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
      isolate, value_str, Object::ToString(isolate, value), Nothing<bool>());
  std::unique_ptr<char[]> value_cstr = value_str->ToCString();

  // 2. d. if values is not undefined, then
  if (values.size() > 0) {
    // 2. d. i. If values does not contain an element equal to value,
    // throw a RangeError exception.
    for (size_t i = 0; i < values.size(); i++) {
      if (strcmp(values.at(i), value_cstr.get()) == 0) {
        // 2. e. return value
        *result = std::move(value_cstr);
        return Just(true);
      }
    }

    Handle<String> service_str =
        isolate->factory()->NewStringFromAsciiChecked(service);
    THROW_NEW_ERROR_RETURN_VALUE(
        isolate,
        NewRangeError(MessageTemplate::kValueOutOfRange, value, service_str,
                      property_str),
        Nothing<bool>());
  }

  // 2. e. return value
  *result = std::move(value_cstr);
  return Just(true);
}

V8_WARN_UNUSED_RESULT Maybe<bool> Intl::GetBoolOption(
    Isolate* isolate, Handle<JSReceiver> options, const char* property,
    const char* service, bool* result) {
  Handle<String> property_str =
      isolate->factory()->NewStringFromAsciiChecked(property);

  // 1. Let value be ? Get(options, property).
  Handle<Object> value;
  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
722 723
      isolate, value,
      Object::GetPropertyOrElement(isolate, options, property_str),
724 725 726 727 728 729 730 731 732 733 734 735 736 737
      Nothing<bool>());

  // 2. If value is not undefined, then
  if (!value->IsUndefined(isolate)) {
    // 2. b. i. Let value be ToBoolean(value).
    *result = value->BooleanValue(isolate);

    // 2. e. return value
    return Just(true);
  }

  return Just(false);
}

738
namespace {
739

740 741
bool IsTwoLetterLanguage(const std::string& locale) {
  // Two letters, both in range 'a'-'z'...
742 743
  return locale.length() == 2 && IsAsciiLower(locale[0]) &&
         IsAsciiLower(locale[1]);
744 745 746
}

bool IsDeprecatedLanguage(const std::string& locale) {
747
  //  Check if locale is one of the deprecated language tags:
748 749
  return locale == "in" || locale == "iw" || locale == "ji" || locale == "jw" ||
         locale == "mo";
750 751
}

752 753 754 755 756 757 758 759 760 761 762 763
// Reference:
// https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
bool IsGrandfatheredTagWithoutPreferredVaule(const std::string& locale) {
  if (V8_UNLIKELY(locale == "zh-min" || locale == "cel-gaulish")) return true;
  if (locale.length() > 6 /* i-mingo is 7 chars long */ &&
      V8_UNLIKELY(locale[0] == 'i' && locale[1] == '-')) {
    return locale.substr(2) == "default" || locale.substr(2) == "enochian" ||
           locale.substr(2) == "mingo";
  }
  return false;
}

764 765
bool IsStructurallyValidLanguageTag(const std::string& tag) {
  return JSLocale::StartsWithUnicodeLanguageId(tag);
766 767
}

768 769 770 771 772
// Canonicalize the locale.
// https://tc39.github.io/ecma402/#sec-canonicalizelanguagetag,
// including type check and structural validity check.
Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
                                           const std::string& locale_in) {
773 774
  std::string locale = locale_in;

775 776 777 778
  if (locale.length() == 0 ||
      !String::IsAscii(locale.data(), static_cast<int>(locale.length()))) {
    THROW_NEW_ERROR_RETURN_VALUE(
        isolate,
779 780 781
        NewRangeError(
            MessageTemplate::kInvalidLanguageTag,
            isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
782 783 784
        Nothing<std::string>());
  }

785 786 787 788 789 790 791
  // Optimize for the most common case: a 2-letter language code in the
  // canonical form/lowercase that is not one of the deprecated codes
  // (in, iw, ji, jw). Don't check for ~70 of 3-letter deprecated language
  // codes. Instead, let them be handled by ICU in the slow path. However,
  // fast-track 'fil' (3-letter canonical code).
  if ((IsTwoLetterLanguage(locale) && !IsDeprecatedLanguage(locale)) ||
      locale == "fil") {
792
    return Just(locale);
793 794
  }

795 796
  // Because per BCP 47 2.1.1 language tags are case-insensitive, lowercase
  // the input before any more check.
797
  std::transform(locale.begin(), locale.end(), locale.begin(), ToAsciiLower);
798

799 800 801
  // ICU maps a few grandfathered tags to what looks like a regular language
  // tag even though IANA language tag registry does not have a preferred
  // entry map for them. Return them as they're with lowercasing.
802 803 804
  if (IsGrandfatheredTagWithoutPreferredVaule(locale)) {
    return Just(locale);
  }
805

806 807 808 809 810 811 812
  // // ECMA 402 6.2.3
  // TODO(jshin): uloc_{for,to}TanguageTag can fail even for a structually valid
  // language tag if it's too long (much longer than 100 chars). Even if we
  // allocate a longer buffer, ICU will still fail if it's too long. Either
  // propose to Ecma 402 to put a limit on the locale length or change ICU to
  // handle long locale names better. See
  // https://unicode-org.atlassian.net/browse/ICU-13417
813 814
  UErrorCode error = U_ZERO_ERROR;
  // uloc_forLanguageTag checks the structrual validity. If the input BCP47
815 816 817
  // language tag is parsed all the way to the end, it indicates that the input
  // is structurally valid. Due to a couple of bugs, we can't use it
  // without Chromium patches or ICU 62 or earlier.
818 819
  icu::Locale icu_locale = icu::Locale::forLanguageTag(locale.c_str(), error);
  if (U_FAILURE(error) || icu_locale.isBogus()) {
820 821
    THROW_NEW_ERROR_RETURN_VALUE(
        isolate,
822 823 824
        NewRangeError(
            MessageTemplate::kInvalidLanguageTag,
            isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
825 826
        Nothing<std::string>());
  }
827 828
  Maybe<std::string> maybe_to_language_tag = Intl::ToLanguageTag(icu_locale);
  if (maybe_to_language_tag.IsNothing()) {
829
    THROW_NEW_ERROR_RETURN_VALUE(
830
        isolate,
831 832 833
        NewRangeError(
            MessageTemplate::kInvalidLanguageTag,
            isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
834
        Nothing<std::string>());
835 836
  }

837
  return maybe_to_language_tag;
838 839
}

840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872
Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
                                           Handle<Object> locale_in) {
  Handle<String> locale_str;
  // This does part of the validity checking spec'ed in CanonicalizeLocaleList:
  // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
  // exception.
  // 7c iii. Let tag be ? ToString(kValue).
  // 7c iv. If IsStructurallyValidLanguageTag(tag) is false, throw a
  // RangeError exception.

  if (locale_in->IsString()) {
    locale_str = Handle<String>::cast(locale_in);
  } else if (locale_in->IsJSReceiver()) {
    ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, locale_str,
                                     Object::ToString(isolate, locale_in),
                                     Nothing<std::string>());
  } else {
    THROW_NEW_ERROR_RETURN_VALUE(isolate,
                                 NewTypeError(MessageTemplate::kLanguageID),
                                 Nothing<std::string>());
  }
  std::string locale(locale_str->ToCString().get());

  if (!IsStructurallyValidLanguageTag(locale)) {
    THROW_NEW_ERROR_RETURN_VALUE(
        isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
        Nothing<std::string>());
  }
  return CanonicalizeLanguageTag(isolate, locale);
}

}  // anonymous namespace

873 874 875 876 877 878 879 880 881
Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
    Isolate* isolate, Handle<Object> locales, bool only_return_one_result) {
  // 1. If locales is undefined, then
  if (locales->IsUndefined(isolate)) {
    // 1a. Return a new empty List.
    return Just(std::vector<std::string>());
  }
  // 2. Let seen be a new empty List.
  std::vector<std::string> seen;
882 883 884 885 886 887 888 889 890
  // 3. If Type(locales) is String or locales has an [[InitializedLocale]]
  // internal slot,  then
  if (locales->IsJSLocale()) {
    // Since this value came from JSLocale, which is already went though the
    // CanonializeLanguageTag process once, therefore there are no need to
    // call CanonializeLanguageTag again.
    seen.push_back(JSLocale::ToString(Handle<JSLocale>::cast(locales)));
    return Just(seen);
  }
891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925
  if (locales->IsString()) {
    // 3a. Let O be CreateArrayFromList(« locales »).
    // Instead of creating a one-element array and then iterating over it,
    // we inline the body of the iteration:
    std::string canonicalized_tag;
    if (!CanonicalizeLanguageTag(isolate, locales).To(&canonicalized_tag)) {
      return Nothing<std::vector<std::string>>();
    }
    seen.push_back(canonicalized_tag);
    return Just(seen);
  }
  // 4. Else,
  // 4a. Let O be ? ToObject(locales).
  Handle<JSReceiver> o;
  ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, o,
                                   Object::ToObject(isolate, locales),
                                   Nothing<std::vector<std::string>>());
  // 5. Let len be ? ToLength(? Get(O, "length")).
  Handle<Object> length_obj;
  ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, length_obj,
                                   Object::GetLengthFromArrayLike(isolate, o),
                                   Nothing<std::vector<std::string>>());
  // TODO(jkummerow): Spec violation: strictly speaking, we have to iterate
  // up to 2^53-1 if {length_obj} says so. Since cases above 2^32 probably
  // don't happen in practice (and would be very slow if they do), we'll keep
  // the code simple for now by using a saturating to-uint32 conversion.
  double raw_length = length_obj->Number();
  uint32_t len =
      raw_length >= kMaxUInt32 ? kMaxUInt32 : static_cast<uint32_t>(raw_length);
  // 6. Let k be 0.
  // 7. Repeat, while k < len
  for (uint32_t k = 0; k < len; k++) {
    // 7a. Let Pk be ToString(k).
    // 7b. Let kPresent be ? HasProperty(O, Pk).
    LookupIterator it(isolate, o, k);
926 927
    Maybe<bool> maybe_found = JSReceiver::HasProperty(&it);
    MAYBE_RETURN(maybe_found, Nothing<std::vector<std::string>>());
928
    // 7c. If kPresent is true, then
929
    if (!maybe_found.FromJust()) continue;
930 931 932 933 934 935
    // 7c i. Let kValue be ? Get(O, Pk).
    Handle<Object> k_value;
    ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, k_value, Object::GetProperty(&it),
                                     Nothing<std::vector<std::string>>());
    // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
    // exception.
936 937
    // 7c iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]]
    // internal slot, then
938
    std::string canonicalized_tag;
939
    if (k_value->IsJSLocale()) {
940
      // 7c iii. 1. Let tag be kValue.[[Locale]].
941
      canonicalized_tag = JSLocale::ToString(Handle<JSLocale>::cast(k_value));
942
      // 7c iv. Else,
943
    } else {
944 945 946 947
      // 7c iv 1. Let tag be ? ToString(kValue).
      // 7c v. If IsStructurallyValidLanguageTag(tag) is false, throw a
      // RangeError exception.
      // 7c vi. Let canonicalizedTag be CanonicalizeLanguageTag(tag).
948 949 950
      if (!CanonicalizeLanguageTag(isolate, k_value).To(&canonicalized_tag)) {
        return Nothing<std::vector<std::string>>();
      }
951 952 953 954 955 956 957 958 959 960 961 962
    }
    // 7c vi. If canonicalizedTag is not an element of seen, append
    // canonicalizedTag as the last element of seen.
    if (std::find(seen.begin(), seen.end(), canonicalized_tag) == seen.end()) {
      seen.push_back(canonicalized_tag);
    }
    // 7d. Increase k by 1. (See loop header.)
    // Optimization: some callers only need one result.
    if (only_return_one_result) return Just(seen);
  }
  // 8. Return seen.
  return Just(seen);
963
}
964

965 966 967 968 969 970
// ecma402 #sup-string.prototype.tolocalelowercase
// ecma402 #sup-string.prototype.tolocaleuppercase
MaybeHandle<String> Intl::StringLocaleConvertCase(Isolate* isolate,
                                                  Handle<String> s,
                                                  bool to_upper,
                                                  Handle<Object> locales) {
971 972 973
  std::vector<std::string> requested_locales;
  if (!CanonicalizeLocaleList(isolate, locales, true).To(&requested_locales)) {
    return MaybeHandle<String>();
974
  }
975
  std::string requested_locale = requested_locales.size() == 0
976
                                     ? DefaultLocale(isolate)
977
                                     : requested_locales[0];
978
  size_t dash = requested_locale.find('-');
979 980
  if (dash != std::string::npos) {
    requested_locale = requested_locale.substr(0, dash);
981 982 983 984
  }

  // Primary language tag can be up to 8 characters long in theory.
  // https://tools.ietf.org/html/bcp47#section-2.2.1
985
  DCHECK_LE(requested_locale.length(), 8);
986 987 988 989 990 991
  s = String::Flatten(isolate, s);

  // All the languages requiring special-handling have two-letter codes.
  // Note that we have to check for '!= 2' here because private-use language
  // tags (x-foo) or grandfathered irregular tags (e.g. i-enochian) would have
  // only 'x' or 'i' when they get here.
992
  if (V8_UNLIKELY(requested_locale.length() != 2)) {
Frank Tang's avatar
Frank Tang committed
993 994 995 996
    if (to_upper) {
      return ConvertToUpper(isolate, s);
    }
    return ConvertToLower(isolate, s);
997 998 999 1000 1001
  }
  // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
  // in the root locale needs to be adjusted for az, lt and tr because even case
  // mapping of ASCII range characters are different in those locales.
  // Greek (el) does not require any adjustment.
1002 1003
  if (V8_UNLIKELY((requested_locale == "tr") || (requested_locale == "el") ||
                  (requested_locale == "lt") || (requested_locale == "az"))) {
Frank Tang's avatar
Frank Tang committed
1004
    return LocaleConvertCase(isolate, s, to_upper, requested_locale.c_str());
1005
  } else {
Frank Tang's avatar
Frank Tang committed
1006 1007 1008 1009
    if (to_upper) {
      return ConvertToUpper(isolate, s);
    }
    return ConvertToLower(isolate, s);
1010 1011 1012
  }
}

1013 1014 1015
MaybeHandle<Object> Intl::StringLocaleCompare(
    Isolate* isolate, Handle<String> string1, Handle<String> string2,
    Handle<Object> locales, Handle<Object> options, const char* method) {
1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034
  // We only cache the instance when both locales and options are undefined,
  // as that is the only case when the specified side-effects of examining
  // those arguments are unobservable.
  bool can_cache =
      locales->IsUndefined(isolate) && options->IsUndefined(isolate);
  if (can_cache) {
    // Both locales and options are undefined, check the cache.
    icu::Collator* cached_icu_collator =
        static_cast<icu::Collator*>(isolate->get_cached_icu_object(
            Isolate::ICUObjectCacheType::kDefaultCollator));
    // We may use the cached icu::Collator for a fast path.
    if (cached_icu_collator != nullptr) {
      return Intl::CompareStrings(isolate, *cached_icu_collator, string1,
                                  string2);
    }
  }

  Handle<JSFunction> constructor = Handle<JSFunction>(
      JSFunction::cast(
1035
          isolate->context().native_context().intl_collator_function()),
1036 1037 1038
      isolate);

  Handle<JSCollator> collator;
1039
  ASSIGN_RETURN_ON_EXCEPTION(
1040
      isolate, collator,
1041
      New<JSCollator>(isolate, constructor, locales, options, method), Object);
1042 1043 1044
  if (can_cache) {
    isolate->set_icu_object_in_cache(
        Isolate::ICUObjectCacheType::kDefaultCollator,
1045
        std::static_pointer_cast<icu::UMemory>(collator->icu_collator().get()));
1046
  }
1047
  icu::Collator* icu_collator = collator->icu_collator().raw();
1048
  return Intl::CompareStrings(isolate, *icu_collator, string1, string2);
1049 1050
}

1051 1052
// ecma402/#sec-collator-comparestrings
Handle<Object> Intl::CompareStrings(Isolate* isolate,
1053
                                    const icu::Collator& icu_collator,
1054 1055
                                    Handle<String> string1,
                                    Handle<String> string2) {
1056 1057
  Factory* factory = isolate->factory();

1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
  // Early return for identical strings.
  if (string1.is_identical_to(string2)) {
    return factory->NewNumberFromInt(UCollationResult::UCOL_EQUAL);
  }

  // Early return for empty strings.
  if (string1->length() == 0) {
    return factory->NewNumberFromInt(string2->length() == 0
                                         ? UCollationResult::UCOL_EQUAL
                                         : UCollationResult::UCOL_LESS);
  }
  if (string2->length() == 0) {
    return factory->NewNumberFromInt(UCollationResult::UCOL_GREATER);
  }

1073 1074 1075 1076 1077
  string1 = String::Flatten(isolate, string1);
  string2 = String::Flatten(isolate, string2);

  UCollationResult result;
  UErrorCode status = U_ZERO_ERROR;
1078
  icu::StringPiece string_piece1 = ToICUStringPiece(isolate, string1);
1079
  if (!string_piece1.empty()) {
1080
    icu::StringPiece string_piece2 = ToICUStringPiece(isolate, string2);
1081 1082 1083 1084 1085 1086 1087
    if (!string_piece2.empty()) {
      result = icu_collator.compareUTF8(string_piece1, string_piece2, status);
      DCHECK(U_SUCCESS(status));
      return factory->NewNumberFromInt(result);
    }
  }

Frank Tang's avatar
Frank Tang committed
1088 1089
  icu::UnicodeString string_val1 = Intl::ToICUUnicodeString(isolate, string1);
  icu::UnicodeString string_val2 = Intl::ToICUUnicodeString(isolate, string2);
1090
  result = icu_collator.compare(string_val1, string_val2, status);
1091 1092 1093 1094
  DCHECK(U_SUCCESS(status));

  return factory->NewNumberFromInt(result);
}
1095 1096 1097 1098 1099

// ecma402/#sup-properties-of-the-number-prototype-object
MaybeHandle<String> Intl::NumberToLocaleString(Isolate* isolate,
                                               Handle<Object> num,
                                               Handle<Object> locales,
1100 1101
                                               Handle<Object> options,
                                               const char* method) {
1102
  Handle<Object> numeric_obj;
1103 1104
  ASSIGN_RETURN_ON_EXCEPTION(isolate, numeric_obj,
                             Object::ToNumeric(isolate, num), String);
1105

1106 1107 1108 1109 1110 1111
  // We only cache the instance when both locales and options are undefined,
  // as that is the only case when the specified side-effects of examining
  // those arguments are unobservable.
  bool can_cache =
      locales->IsUndefined(isolate) && options->IsUndefined(isolate);
  if (can_cache) {
1112 1113 1114 1115
    icu::number::LocalizedNumberFormatter* cached_number_format =
        static_cast<icu::number::LocalizedNumberFormatter*>(
            isolate->get_cached_icu_object(
                Isolate::ICUObjectCacheType::kDefaultNumberFormat));
1116 1117
    // We may use the cached icu::NumberFormat for a fast path.
    if (cached_number_format != nullptr) {
1118 1119
      return JSNumberFormat::FormatNumeric(isolate, *cached_number_format,
                                           numeric_obj);
1120 1121 1122 1123 1124
    }
  }

  Handle<JSFunction> constructor = Handle<JSFunction>(
      JSFunction::cast(
1125
          isolate->context().native_context().intl_number_format_function()),
1126 1127 1128 1129 1130
      isolate);
  Handle<JSNumberFormat> number_format;
  // 2. Let numberFormat be ? Construct(%NumberFormat%, « locales, options »).
  ASSIGN_RETURN_ON_EXCEPTION(
      isolate, number_format,
1131 1132
      New<JSNumberFormat>(isolate, constructor, locales, options, method),
      String);
1133 1134 1135 1136

  if (can_cache) {
    isolate->set_icu_object_in_cache(
        Isolate::ICUObjectCacheType::kDefaultNumberFormat,
1137
        std::static_pointer_cast<icu::UMemory>(
1138
            number_format->icu_number_formatter().get()));
1139 1140
  }

1141
  // Return FormatNumber(numberFormat, x).
1142
  icu::number::LocalizedNumberFormatter* icu_number_format =
1143
      number_format->icu_number_formatter().raw();
1144 1145
  return JSNumberFormat::FormatNumeric(isolate, *icu_number_format,
                                       numeric_obj);
1146 1147
}

1148 1149
namespace {

1150
// ecma402/#sec-defaultnumberoption
1151 1152
Maybe<int> DefaultNumberOption(Isolate* isolate, Handle<Object> value, int min,
                               int max, int fallback, Handle<String> property) {
1153
  // 2. Else, return fallback.
1154 1155
  if (value->IsUndefined()) return Just(fallback);

1156 1157 1158
  // 1. If value is not undefined, then
  // a. Let value be ? ToNumber(value).
  Handle<Object> value_num;
1159 1160
  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
      isolate, value_num, Object::ToNumber(isolate, value), Nothing<int>());
1161
  DCHECK(value_num->IsNumber());
1162

1163 1164 1165 1166
  // b. If value is NaN or less than minimum or greater than maximum, throw a
  // RangeError exception.
  if (value_num->IsNaN() || value_num->Number() < min ||
      value_num->Number() > max) {
1167
    THROW_NEW_ERROR_RETURN_VALUE(
1168 1169
        isolate,
        NewRangeError(MessageTemplate::kPropertyValueOutOfRange, property),
1170
        Nothing<int>());
1171
  }
1172 1173 1174 1175 1176

  // The max and min arguments are integers and the above check makes
  // sure that we are within the integer range making this double to
  // int conversion safe.
  //
1177
  // c. Return floor(value).
1178
  return Just(FastD2I(floor(value_num->Number())));
1179 1180
}

1181 1182
}  // namespace

1183
// ecma402/#sec-getnumberoption
1184 1185 1186
Maybe<int> Intl::GetNumberOption(Isolate* isolate, Handle<JSReceiver> options,
                                 Handle<String> property, int min, int max,
                                 int fallback) {
1187
  // 1. Let value be ? Get(options, property).
1188 1189 1190 1191 1192
  Handle<Object> value;
  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
      isolate, value, JSReceiver::GetProperty(isolate, options, property),
      Nothing<int>());

1193 1194 1195 1196
  // Return ? DefaultNumberOption(value, minimum, maximum, fallback).
  return DefaultNumberOption(isolate, value, min, max, fallback, property);
}

1197 1198
Maybe<Intl::NumberFormatDigitOptions> Intl::SetNumberFormatDigitOptions(
    Isolate* isolate, Handle<JSReceiver> options, int mnfd_default,
Frank Tang's avatar
Frank Tang committed
1199
    int mxfd_default, bool notation_is_compact) {
1200
  Factory* factory = isolate->factory();
1201
  Intl::NumberFormatDigitOptions digit_options;
1202 1203 1204

  // 5. Let mnid be ? GetNumberOption(options, "minimumIntegerDigits,", 1, 21,
  // 1).
Frank Tang's avatar
Frank Tang committed
1205
  int mnid = 1;
1206 1207
  if (!Intl::GetNumberOption(isolate, options,
                             factory->minimumIntegerDigits_string(), 1, 21, 1)
1208
           .To(&mnid)) {
1209
    return Nothing<NumberFormatDigitOptions>();
1210 1211
  }

Frank Tang's avatar
Frank Tang committed
1212 1213 1214 1215
  int mnfd = 0;
  int mxfd = 0;
  Handle<Object> mnfd_obj;
  Handle<Object> mxfd_obj;
1216

1217 1218 1219 1220 1221
  // 6. Let mnfd be ? Get(options, "minimumFractionDigits").
  Handle<String> mnfd_str = factory->minimumFractionDigits_string();
  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
      isolate, mnfd_obj, JSReceiver::GetProperty(isolate, options, mnfd_str),
      Nothing<NumberFormatDigitOptions>());
1222

1223 1224 1225 1226 1227
  // 8. Let mxfd be ? Get(options, "maximumFractionDigits").
  Handle<String> mxfd_str = factory->maximumFractionDigits_string();
  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
      isolate, mxfd_obj, JSReceiver::GetProperty(isolate, options, mxfd_str),
      Nothing<NumberFormatDigitOptions>());
1228 1229 1230

  // 9.  Let mnsd be ? Get(options, "minimumSignificantDigits").
  Handle<Object> mnsd_obj;
1231
  Handle<String> mnsd_str = factory->minimumSignificantDigits_string();
1232 1233
  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
      isolate, mnsd_obj, JSReceiver::GetProperty(isolate, options, mnsd_str),
1234
      Nothing<NumberFormatDigitOptions>());
1235 1236 1237

  // 10. Let mxsd be ? Get(options, "maximumSignificantDigits").
  Handle<Object> mxsd_obj;
1238
  Handle<String> mxsd_str = factory->maximumSignificantDigits_string();
1239 1240
  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
      isolate, mxsd_obj, JSReceiver::GetProperty(isolate, options, mxsd_str),
1241
      Nothing<NumberFormatDigitOptions>());
1242 1243

  // 11. Set intlObj.[[MinimumIntegerDigits]] to mnid.
1244
  digit_options.minimum_integer_digits = mnid;
1245 1246

  // 12. Set intlObj.[[MinimumFractionDigits]] to mnfd.
1247
  digit_options.minimum_fraction_digits = mnfd;
1248 1249

  // 13. Set intlObj.[[MaximumFractionDigits]] to mxfd.
1250
  digit_options.maximum_fraction_digits = mxfd;
1251 1252 1253 1254 1255 1256

  // 14. If mnsd is not undefined or mxsd is not undefined, then
  if (!mnsd_obj->IsUndefined(isolate) || !mxsd_obj->IsUndefined(isolate)) {
    // 14. a. Let mnsd be ? DefaultNumberOption(mnsd, 1, 21, 1).
    int mnsd;
    if (!DefaultNumberOption(isolate, mnsd_obj, 1, 21, 1, mnsd_str).To(&mnsd)) {
1257
      return Nothing<NumberFormatDigitOptions>();
1258 1259 1260 1261 1262 1263
    }

    // 14. b. Let mxsd be ? DefaultNumberOption(mxsd, mnsd, 21, 21).
    int mxsd;
    if (!DefaultNumberOption(isolate, mxsd_obj, mnsd, 21, 21, mxsd_str)
             .To(&mxsd)) {
1264
      return Nothing<NumberFormatDigitOptions>();
1265 1266 1267
    }

    // 14. c. Set intlObj.[[MinimumSignificantDigits]] to mnsd.
1268
    digit_options.minimum_significant_digits = mnsd;
1269 1270

    // 14. d. Set intlObj.[[MaximumSignificantDigits]] to mxsd.
1271 1272 1273 1274
    digit_options.maximum_significant_digits = mxsd;
  } else {
    digit_options.minimum_significant_digits = 0;
    digit_options.maximum_significant_digits = 0;
1275

1276 1277 1278 1279 1280 1281 1282 1283
    // 15. Else If mnfd is not undefined or mxfd is not undefined, then
    if (!mnfd_obj->IsUndefined(isolate) || !mxfd_obj->IsUndefined(isolate)) {
      // 15. b. Let mnfd be ? DefaultNumberOption(mnfd, 0, 20, mnfdDefault).
      Handle<String> mnfd_str = factory->minimumFractionDigits_string();
      if (!DefaultNumberOption(isolate, mnfd_obj, 0, 20, mnfd_default, mnfd_str)
               .To(&mnfd)) {
        return Nothing<NumberFormatDigitOptions>();
      }
Frank Tang's avatar
Frank Tang committed
1284

1285 1286
      // 15. c. Let mxfdActualDefault be max( mnfd, mxfdDefault ).
      int mxfd_actual_default = std::max(mnfd, mxfd_default);
Frank Tang's avatar
Frank Tang committed
1287

1288 1289 1290 1291 1292 1293 1294
      // 15. d. Let mxfd be ? DefaultNumberOption(mxfd, mnfd, 20,
      // mxfdActualDefault).
      Handle<String> mxfd_str = factory->maximumFractionDigits_string();
      if (!DefaultNumberOption(isolate, mxfd_obj, mnfd, 20, mxfd_actual_default,
                               mxfd_str)
               .To(&mxfd)) {
        return Nothing<NumberFormatDigitOptions>();
Frank Tang's avatar
Frank Tang committed
1295
      }
1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313
      // 15. e. Set intlObj.[[MinimumFractionDigits]] to mnfd.
      digit_options.minimum_fraction_digits = mnfd;

      // 15. f. Set intlObj.[[MaximumFractionDigits]] to mxfd.
      digit_options.maximum_fraction_digits = mxfd;
      // Else If intlObj.[[Notation]] is "compact", then
    } else if (notation_is_compact) {
      // a. Set intlObj.[[RoundingType]] to "compact-rounding".
      // Set minimum_significant_digits to -1 to represent roundingtype is
      // "compact-rounding".
      digit_options.minimum_significant_digits = -1;
      // 17. Else,
    } else {
      // 17. b. Set intlObj.[[MinimumFractionDigits]] to mnfdDefault.
      digit_options.minimum_fraction_digits = mnfd_default;

      // 17. c. Set intlObj.[[MaximumFractionDigits]] to mxfdDefault.
      digit_options.maximum_fraction_digits = mxfd_default;
Frank Tang's avatar
Frank Tang committed
1314 1315
    }
  }
1316
  return Just(digit_options);
1317 1318
}

1319 1320
namespace {

1321 1322 1323
// ecma402/#sec-bestavailablelocale
std::string BestAvailableLocale(const std::set<std::string>& available_locales,
                                const std::string& locale) {
1324
  // 1. Let candidate be locale.
1325 1326
  std::string candidate = locale;

1327
  // 2. Repeat,
1328
  while (true) {
1329 1330
    // 2.a. If availableLocales contains an element equal to candidate, return
    //      candidate.
1331 1332
    if (available_locales.find(candidate) != available_locales.end()) {
      return candidate;
1333
    }
1334

1335 1336 1337
    // 2.b. Let pos be the character index of the last occurrence of "-"
    //      (U+002D) within candidate. If that character does not occur, return
    //      undefined.
1338
    size_t pos = candidate.rfind('-');
1339
    if (pos == std::string::npos) {
1340
      return std::string();
1341
    }
1342

1343 1344
    // 2.c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate,
    //      decrease pos by 2.
1345
    if (pos >= 2 && candidate[pos - 2] == '-') {
1346 1347
      pos -= 2;
    }
1348

1349 1350
    // 2.d. Let candidate be the substring of candidate from position 0,
    //      inclusive, to position pos, exclusive.
1351 1352
    candidate = candidate.substr(0, pos);
  }
1353 1354
}

1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365
struct ParsedLocale {
  std::string no_extensions_locale;
  std::string extension;
};

// Returns a struct containing a bcp47 tag without unicode extensions
// and the removed unicode extensions.
//
// For example, given 'en-US-u-co-emoji' returns 'en-US' and
// 'u-co-emoji'.
ParsedLocale ParseBCP47Locale(const std::string& locale) {
1366
  size_t length = locale.length();
1367
  ParsedLocale parsed_locale;
1368 1369 1370 1371 1372 1373

  // Privateuse or grandfathered locales have no extension sequences.
  if ((length > 1) && (locale[1] == '-')) {
    // Check to make sure that this really is a grandfathered or
    // privateuse extension. ICU can sometimes mess up the
    // canonicalization.
Frank Tang's avatar
Frank Tang committed
1374
    DCHECK(locale[0] == 'x' || locale[0] == 'i');
1375 1376
    parsed_locale.no_extensions_locale = locale;
    return parsed_locale;
1377
  }
1378

1379
  size_t unicode_extension_start = locale.find("-u-");
1380

1381
  // No unicode extensions found.
1382 1383 1384 1385
  if (unicode_extension_start == std::string::npos) {
    parsed_locale.no_extensions_locale = locale;
    return parsed_locale;
  }
1386 1387 1388 1389 1390 1391

  size_t private_extension_start = locale.find("-x-");

  // Unicode extensions found within privateuse subtags don't count.
  if (private_extension_start != std::string::npos &&
      private_extension_start < unicode_extension_start) {
1392 1393
    parsed_locale.no_extensions_locale = locale;
    return parsed_locale;
1394
  }
1395

1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413
  const std::string beginning = locale.substr(0, unicode_extension_start);
  size_t unicode_extension_end = length;
  DCHECK_GT(length, 2);

  // Find the end of the extension production as per the bcp47 grammar
  // by looking for '-' followed by 2 chars and then another '-'.
  for (size_t i = unicode_extension_start + 1; i < length - 2; i++) {
    if (locale[i] != '-') continue;

    if (locale[i + 2] == '-') {
      unicode_extension_end = i;
      break;
    }

    i += 2;
  }

  const std::string end = locale.substr(unicode_extension_end);
1414
  parsed_locale.no_extensions_locale = beginning + end;
1415 1416
  parsed_locale.extension = locale.substr(
      unicode_extension_start, unicode_extension_end - unicode_extension_start);
1417
  return parsed_locale;
1418 1419 1420
}

// ecma402/#sec-lookupsupportedlocales
1421
std::vector<std::string> LookupSupportedLocales(
1422 1423
    const std::set<std::string>& available_locales,
    const std::vector<std::string>& requested_locales) {
1424 1425 1426 1427
  // 1. Let subset be a new empty List.
  std::vector<std::string> subset;

  // 2. For each element locale of requestedLocales in List order, do
1428 1429 1430
  for (const std::string& locale : requested_locales) {
    // 2. a. Let noExtensionsLocale be the String value that is locale
    //       with all Unicode locale extension sequences removed.
1431 1432
    std::string no_extension_locale =
        ParseBCP47Locale(locale).no_extensions_locale;
1433 1434 1435

    // 2. b. Let availableLocale be
    //       BestAvailableLocale(availableLocales, noExtensionsLocale).
1436
    std::string available_locale =
1437 1438 1439 1440
        BestAvailableLocale(available_locales, no_extension_locale);

    // 2. c. If availableLocale is not undefined, append locale to the
    //       end of subset.
1441 1442 1443 1444 1445 1446 1447 1448 1449
    if (!available_locale.empty()) {
      subset.push_back(locale);
    }
  }

  // 3. Return subset.
  return subset;
}

1450 1451 1452 1453 1454
icu::LocaleMatcher BuildLocaleMatcher(
    Isolate* isolate, const std::set<std::string>& available_locales,
    UErrorCode* status) {
  icu::Locale default_locale =
      icu::Locale::forLanguageTag(DefaultLocale(isolate), *status);
Frank Tang's avatar
Frank Tang committed
1455
  DCHECK(U_SUCCESS(*status));
1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471
  icu::LocaleMatcher::Builder builder;
  builder.setDefaultLocale(&default_locale);
  for (auto it = available_locales.begin(); it != available_locales.end();
       ++it) {
    builder.addSupportedLocale(
        icu::Locale::forLanguageTag(it->c_str(), *status));
  }

  return builder.build(*status);
}

class Iterator : public icu::Locale::Iterator {
 public:
  Iterator(std::vector<std::string>::const_iterator begin,
           std::vector<std::string>::const_iterator end)
      : iter_(begin), end_(end) {}
1472
  ~Iterator() override = default;
1473 1474 1475 1476 1477 1478

  UBool hasNext() const override { return iter_ != end_; }

  const icu::Locale& next() override {
    UErrorCode status = U_ZERO_ERROR;
    locale_ = icu::Locale::forLanguageTag(iter_->c_str(), status);
Frank Tang's avatar
Frank Tang committed
1479
    DCHECK(U_SUCCESS(status));
1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511
    ++iter_;
    return locale_;
  }

 private:
  std::vector<std::string>::const_iterator iter_;
  std::vector<std::string>::const_iterator end_;
  icu::Locale locale_;
};

// ecma402/#sec-bestfitmatcher
// The BestFitMatcher abstract operation compares requestedLocales, which must
// be a List as returned by CanonicalizeLocaleList, against the locales in
// availableLocales and determines the best available language to meet the
// request. The algorithm is implementation dependent, but should produce
// results that a typical user of the requested locales would perceive
// as at least as good as those produced by the LookupMatcher abstract
// operation. Options specified through Unicode locale extension sequences must
// be ignored by the algorithm. Information about such subsequences is returned
// separately. The abstract operation returns a record with a [[locale]] field,
// whose value is the language tag of the selected locale, which must be an
// element of availableLocales. If the language tag of the request locale that
// led to the selected locale contained a Unicode locale extension sequence,
// then the returned record also contains an [[extension]] field whose value is
// the first Unicode locale extension sequence within the request locale
// language tag.
std::string BestFitMatcher(Isolate* isolate,
                           const std::set<std::string>& available_locales,
                           const std::vector<std::string>& requested_locales) {
  UErrorCode status = U_ZERO_ERROR;
  icu::LocaleMatcher matcher =
      BuildLocaleMatcher(isolate, available_locales, &status);
Frank Tang's avatar
Frank Tang committed
1512
  DCHECK(U_SUCCESS(status));
1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529

  Iterator iter(requested_locales.cbegin(), requested_locales.cend());
  std::string bestfit =
      matcher.getBestMatch(iter, status)->toLanguageTag<std::string>(status);
  if (U_FAILURE(status)) {
    return DefaultLocale(isolate);
  }
  // We need to return the extensions with it.
  for (auto it = requested_locales.begin(); it != requested_locales.end();
       ++it) {
    if (it->find(bestfit) == 0) {
      return *it;
    }
  }
  return bestfit;
}

1530 1531 1532
// ECMA 402 9.2.8 BestFitSupportedLocales(availableLocales, requestedLocales)
// https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
std::vector<std::string> BestFitSupportedLocales(
1533
    Isolate* isolate, const std::set<std::string>& available_locales,
1534
    const std::vector<std::string>& requested_locales) {
1535 1536 1537
  UErrorCode status = U_ZERO_ERROR;
  icu::LocaleMatcher matcher =
      BuildLocaleMatcher(isolate, available_locales, &status);
Frank Tang's avatar
Frank Tang committed
1538
  DCHECK(U_SUCCESS(status));
1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557

  std::string default_locale = DefaultLocale(isolate);
  std::vector<std::string> result;
  for (auto it = requested_locales.cbegin(); it != requested_locales.cend();
       it++) {
    if (*it == default_locale) {
      result.push_back(*it);
    } else {
      status = U_ZERO_ERROR;
      icu::Locale desired = icu::Locale::forLanguageTag(it->c_str(), status);
      std::string bestfit = matcher.getBestMatch(desired, status)
                                ->toLanguageTag<std::string>(status);
      // We need to return the extensions with it.
      if (U_SUCCESS(status) && it->find(bestfit) == 0) {
        result.push_back(*it);
      }
    }
  }
  return result;
1558 1559
}

1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581
// ecma262 #sec-createarrayfromlist
Handle<JSArray> CreateArrayFromList(Isolate* isolate,
                                    std::vector<std::string> elements,
                                    PropertyAttributes attr) {
  Factory* factory = isolate->factory();
  // Let array be ! ArrayCreate(0).
  Handle<JSArray> array = factory->NewJSArray(0);

  uint32_t length = static_cast<uint32_t>(elements.size());
  // 3. Let n be 0.
  // 4. For each element e of elements, do
  for (uint32_t i = 0; i < length; i++) {
    // a. Let status be CreateDataProperty(array, ! ToString(n), e).
    const std::string& part = elements[i];
    Handle<String> value =
        factory->NewStringFromUtf8(CStrVector(part.c_str())).ToHandleChecked();
    JSObject::AddDataElement(array, i, value, attr);
  }
  // 5. Return array.
  return array;
}

1582 1583 1584 1585
// To mitigate the risk of bestfit locale matcher, we first check in without
// turnning it on.
static bool implement_bestfit = false;

1586 1587 1588
// ECMA 402 9.2.9 SupportedLocales(availableLocales, requestedLocales, options)
// https://tc39.github.io/ecma402/#sec-supportedlocales
MaybeHandle<JSObject> SupportedLocales(
1589
    Isolate* isolate, const char* method,
1590 1591
    const std::set<std::string>& available_locales,
    const std::vector<std::string>& requested_locales, Handle<Object> options) {
1592 1593 1594
  std::vector<std::string> supported_locales;

  // 2. Else, let matcher be "best fit".
1595
  Intl::MatcherOption matcher = Intl::MatcherOption::kBestFit;
1596 1597

  // 1. If options is not undefined, then
1598
  if (!options->IsUndefined(isolate)) {
1599
    // 1. a. Let options be ? ToObject(options).
1600 1601 1602
    Handle<JSReceiver> options_obj;
    ASSIGN_RETURN_ON_EXCEPTION(isolate, options_obj,
                               Object::ToObject(isolate, options), JSObject);
1603 1604 1605

    // 1. b. Let matcher be ? GetOption(options, "localeMatcher", "string",
    //       « "lookup", "best fit" », "best fit").
1606 1607
    Maybe<Intl::MatcherOption> maybe_locale_matcher =
        Intl::GetLocaleMatcher(isolate, options_obj, method);
1608 1609
    MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSObject>());
    matcher = maybe_locale_matcher.FromJust();
1610 1611 1612 1613 1614
  }

  // 3. If matcher is "best fit", then
  //    a. Let supportedLocales be BestFitSupportedLocales(availableLocales,
  //       requestedLocales).
1615
  if (matcher == Intl::MatcherOption::kBestFit && implement_bestfit) {
1616
    supported_locales =
1617
        BestFitSupportedLocales(isolate, available_locales, requested_locales);
1618 1619 1620 1621 1622 1623 1624 1625
  } else {
    // 4. Else,
    //    a. Let supportedLocales be LookupSupportedLocales(availableLocales,
    //       requestedLocales).
    supported_locales =
        LookupSupportedLocales(available_locales, requested_locales);
  }

1626 1627 1628
  // 5. Return CreateArrayFromList(supportedLocales).
  PropertyAttributes attr = static_cast<PropertyAttributes>(NONE);
  return CreateArrayFromList(isolate, supported_locales, attr);
1629
}
1630

1631 1632
}  // namespace

1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645
// ecma-402 #sec-intl.getcanonicallocales
MaybeHandle<JSArray> Intl::GetCanonicalLocales(Isolate* isolate,
                                               Handle<Object> locales) {
  // 1. Let ll be ? CanonicalizeLocaleList(locales).
  Maybe<std::vector<std::string>> maybe_ll =
      CanonicalizeLocaleList(isolate, locales, false);
  MAYBE_RETURN(maybe_ll, MaybeHandle<JSArray>());

  // 2. Return CreateArrayFromList(ll).
  PropertyAttributes attr = static_cast<PropertyAttributes>(NONE);
  return CreateArrayFromList(isolate, maybe_ll.FromJust(), attr);
}

1646
// ECMA 402 Intl.*.supportedLocalesOf
1647 1648 1649 1650
MaybeHandle<JSObject> Intl::SupportedLocalesOf(
    Isolate* isolate, const char* method,
    const std::set<std::string>& available_locales, Handle<Object> locales,
    Handle<Object> options) {
1651
  // Let availableLocales be %Collator%.[[AvailableLocales]].
1652

1653
  // Let requestedLocales be ? CanonicalizeLocaleList(locales).
1654 1655 1656
  Maybe<std::vector<std::string>> requested_locales =
      CanonicalizeLocaleList(isolate, locales, false);
  MAYBE_RETURN(requested_locales, MaybeHandle<JSObject>());
1657 1658

  // Return ? SupportedLocales(availableLocales, requestedLocales, options).
1659
  return SupportedLocales(isolate, method, available_locales,
1660
                          requested_locales.FromJust(), options);
1661 1662
}

1663
namespace {
1664

1665 1666 1667
template <typename T>
bool IsValidExtension(const icu::Locale& locale, const char* key,
                      const std::string& value) {
1668 1669 1670 1671
  const char* legacy_type = uloc_toLegacyType(key, value.c_str());
  if (legacy_type == nullptr) {
    return false;
  }
1672 1673 1674 1675
  UErrorCode status = U_ZERO_ERROR;
  std::unique_ptr<icu::StringEnumeration> enumeration(
      T::getKeywordValuesForLocale(key, icu::Locale(locale.getBaseName()),
                                   false, status));
1676 1677 1678 1679 1680 1681 1682 1683 1684
  if (U_FAILURE(status)) {
    return false;
  }
  int32_t length;
  for (const char* item = enumeration->next(&length, status);
       U_SUCCESS(status) && item != nullptr;
       item = enumeration->next(&length, status)) {
    if (strcmp(legacy_type, item) == 0) {
      return true;
1685 1686 1687 1688 1689
    }
  }
  return false;
}

1690 1691 1692 1693
}  // namespace

bool Intl::IsValidCollation(const icu::Locale& locale,
                            const std::string& value) {
1694 1695 1696 1697 1698
  std::set<std::string> invalid_values = {"standard", "search"};
  if (invalid_values.find(value) != invalid_values.end()) return false;
  return IsValidExtension<icu::Collator>(locale, "collation", value);
}

1699 1700 1701 1702
bool Intl::IsWellFormedCalendar(const std::string& value) {
  return JSLocale::Is38AlphaNumList(value);
}

1703 1704 1705 1706 1707
// ecma402/#sec-iswellformedcurrencycode
bool Intl::IsWellFormedCurrency(const std::string& currency) {
  return JSLocale::Is3Alpha(currency);
}

1708 1709 1710 1711 1712
bool Intl::IsValidCalendar(const icu::Locale& locale,
                           const std::string& value) {
  return IsValidExtension<icu::Calendar>(locale, "calendar", value);
}

1713
bool Intl::IsValidNumberingSystem(const std::string& value) {
1714 1715 1716 1717 1718 1719 1720
  std::set<std::string> invalid_values = {"native", "traditio", "finance"};
  if (invalid_values.find(value) != invalid_values.end()) return false;
  UErrorCode status = U_ZERO_ERROR;
  std::unique_ptr<icu::NumberingSystem> numbering_system(
      icu::NumberingSystem::createInstanceByName(value.c_str(), status));
  return U_SUCCESS(status) && numbering_system.get() != nullptr;
}
1721

1722 1723 1724 1725 1726 1727
namespace {

bool IsWellFormedNumberingSystem(const std::string& value) {
  return JSLocale::Is38AlphaNumList(value);
}

1728 1729
std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
    icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) {
1730 1731 1732
  std::map<std::string, std::string> extensions;

  UErrorCode status = U_ZERO_ERROR;
1733 1734
  icu::LocaleBuilder builder;
  builder.setLocale(*icu_locale).clearExtensions();
1735
  std::unique_ptr<icu::StringEnumeration> keywords(
1736
      icu_locale->createKeywords(status));
1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753
  if (U_FAILURE(status)) return extensions;

  if (!keywords) return extensions;
  char value[ULOC_FULLNAME_CAPACITY];

  int32_t length;
  status = U_ZERO_ERROR;
  for (const char* keyword = keywords->next(&length, status);
       keyword != nullptr; keyword = keywords->next(&length, status)) {
    // Ignore failures in ICU and skip to the next keyword.
    //
    // This is fine.™
    if (U_FAILURE(status)) {
      status = U_ZERO_ERROR;
      continue;
    }

1754
    icu_locale->getKeywordValue(keyword, value, ULOC_FULLNAME_CAPACITY, status);
1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767

    // Ignore failures in ICU and skip to the next keyword.
    //
    // This is fine.™
    if (U_FAILURE(status)) {
      status = U_ZERO_ERROR;
      continue;
    }

    const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword);

    if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) {
      const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
1768 1769 1770
      bool is_valid_value = false;
      // 8.h.ii.1.a If keyLocaleData contains requestedValue, then
      if (strcmp("ca", bcp47_key) == 0) {
1771
        is_valid_value = Intl::IsValidCalendar(*icu_locale, bcp47_value);
1772
      } else if (strcmp("co", bcp47_key) == 0) {
1773
        is_valid_value = Intl::IsValidCollation(*icu_locale, bcp47_value);
1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790
      } else if (strcmp("hc", bcp47_key) == 0) {
        // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml
        std::set<std::string> valid_values = {"h11", "h12", "h23", "h24"};
        is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
      } else if (strcmp("lb", bcp47_key) == 0) {
        // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/segmentation.xml
        std::set<std::string> valid_values = {"strict", "normal", "loose"};
        is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
      } else if (strcmp("kn", bcp47_key) == 0) {
        // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
        std::set<std::string> valid_values = {"true", "false"};
        is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
      } else if (strcmp("kf", bcp47_key) == 0) {
        // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
        std::set<std::string> valid_values = {"upper", "lower", "false"};
        is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
      } else if (strcmp("nu", bcp47_key) == 0) {
1791
        is_valid_value = Intl::IsValidNumberingSystem(bcp47_value);
1792 1793 1794 1795
      }
      if (is_valid_value) {
        extensions.insert(
            std::pair<std::string, std::string>(bcp47_key, bcp47_value));
1796
        builder.setUnicodeLocaleKeyword(bcp47_key, bcp47_value);
1797
      }
1798 1799 1800
    }
  }

1801 1802 1803
  status = U_ZERO_ERROR;
  *icu_locale = builder.build(status);

1804 1805 1806
  return extensions;
}

1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851
// ecma402/#sec-lookupmatcher
std::string LookupMatcher(Isolate* isolate,
                          const std::set<std::string>& available_locales,
                          const std::vector<std::string>& requested_locales) {
  // 1. Let result be a new Record.
  std::string result;

  // 2. For each element locale of requestedLocales in List order, do
  for (const std::string& locale : requested_locales) {
    // 2. a. Let noExtensionsLocale be the String value that is locale
    //       with all Unicode locale extension sequences removed.
    ParsedLocale parsed_locale = ParseBCP47Locale(locale);
    std::string no_extensions_locale = parsed_locale.no_extensions_locale;

    // 2. b. Let availableLocale be
    //       BestAvailableLocale(availableLocales, noExtensionsLocale).
    std::string available_locale =
        BestAvailableLocale(available_locales, no_extensions_locale);

    // 2. c. If availableLocale is not undefined, append locale to the
    //       end of subset.
    if (!available_locale.empty()) {
      // Note: The following steps are not performed here because we
      // can use ICU to parse the unicode locale extension sequence
      // as part of Intl::ResolveLocale.
      //
      // There's no need to separate the unicode locale extensions
      // right here. Instead just return the available locale with the
      // extensions.
      //
      // 2. c. i. Set result.[[locale]] to availableLocale.
      // 2. c. ii. If locale and noExtensionsLocale are not the same
      // String value, then
      // 2. c. ii. 1. Let extension be the String value consisting of
      // the first substring of locale that is a Unicode locale
      // extension sequence.
      // 2. c. ii. 2. Set result.[[extension]] to extension.
      // 2. c. iii. Return result.
      return available_locale + parsed_locale.extension;
    }
  }

  // 3. Let defLocale be DefaultLocale();
  // 4. Set result.[[locale]] to defLocale.
  // 5. Return result.
1852
  return DefaultLocale(isolate);
1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866
}

}  // namespace

// This function doesn't correspond exactly with the spec. Instead
// we use ICU to do all the string manipulations that the spec
// peforms.
//
// The spec uses this function to normalize values for various
// relevant extension keys (such as disallowing "search" for
// collation). Instead of doing this here, we let the callers of
// this method perform such normalization.
//
// ecma402/#sec-resolvelocale
1867
Maybe<Intl::ResolvedLocale> Intl::ResolveLocale(
1868 1869 1870 1871
    Isolate* isolate, const std::set<std::string>& available_locales,
    const std::vector<std::string>& requested_locales, MatcherOption matcher,
    const std::set<std::string>& relevant_extension_keys) {
  std::string locale;
1872 1873 1874
  if (matcher == Intl::MatcherOption::kBestFit && implement_bestfit) {
    locale = BestFitMatcher(isolate, available_locales, requested_locales);
  } else {
1875 1876 1877
    locale = LookupMatcher(isolate, available_locales, requested_locales);
  }

1878 1879 1880
  Maybe<icu::Locale> maybe_icu_locale = CreateICULocale(locale);
  MAYBE_RETURN(maybe_icu_locale, Nothing<Intl::ResolvedLocale>());
  icu::Locale icu_locale = maybe_icu_locale.FromJust();
1881
  std::map<std::string, std::string> extensions =
1882 1883
      LookupAndValidateUnicodeExtensions(&icu_locale, relevant_extension_keys);

1884
  std::string canonicalized_locale = Intl::ToLanguageTag(icu_locale).FromJust();
1885 1886 1887

  // TODO(gsathya): Remove privateuse subtags from extensions.

1888 1889
  return Just(
      Intl::ResolvedLocale{canonicalized_locale, icu_locale, extensions});
1890 1891
}

1892
Handle<Managed<icu::UnicodeString>> Intl::SetTextToBreakIterator(
1893
    Isolate* isolate, Handle<String> text, icu::BreakIterator* break_iterator) {
1894
  text = String::Flatten(isolate, text);
1895 1896
  icu::UnicodeString* u_text = static_cast<icu::UnicodeString*>(
      Intl::ToICUUnicodeString(isolate, text).clone());
1897 1898 1899 1900 1901

  Handle<Managed<icu::UnicodeString>> new_u_text =
      Managed<icu::UnicodeString>::FromRawPtr(isolate, 0, u_text);

  break_iterator->setText(*u_text);
1902
  return new_u_text;
1903 1904
}

1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953
// ecma262 #sec-string.prototype.normalize
MaybeHandle<String> Intl::Normalize(Isolate* isolate, Handle<String> string,
                                    Handle<Object> form_input) {
  const char* form_name;
  UNormalization2Mode form_mode;
  if (form_input->IsUndefined(isolate)) {
    // default is FNC
    form_name = "nfc";
    form_mode = UNORM2_COMPOSE;
  } else {
    Handle<String> form;
    ASSIGN_RETURN_ON_EXCEPTION(isolate, form,
                               Object::ToString(isolate, form_input), String);

    if (String::Equals(isolate, form, isolate->factory()->NFC_string())) {
      form_name = "nfc";
      form_mode = UNORM2_COMPOSE;
    } else if (String::Equals(isolate, form,
                              isolate->factory()->NFD_string())) {
      form_name = "nfc";
      form_mode = UNORM2_DECOMPOSE;
    } else if (String::Equals(isolate, form,
                              isolate->factory()->NFKC_string())) {
      form_name = "nfkc";
      form_mode = UNORM2_COMPOSE;
    } else if (String::Equals(isolate, form,
                              isolate->factory()->NFKD_string())) {
      form_name = "nfkc";
      form_mode = UNORM2_DECOMPOSE;
    } else {
      Handle<String> valid_forms =
          isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
      THROW_NEW_ERROR(
          isolate,
          NewRangeError(MessageTemplate::kNormalizationForm, valid_forms),
          String);
    }
  }

  int length = string->length();
  string = String::Flatten(isolate, string);
  icu::UnicodeString result;
  std::unique_ptr<uc16[]> sap;
  UErrorCode status = U_ZERO_ERROR;
  icu::UnicodeString input = ToICUUnicodeString(isolate, string);
  // Getting a singleton. Should not free it.
  const icu::Normalizer2* normalizer =
      icu::Normalizer2::getInstance(nullptr, form_name, form_mode, status);
  DCHECK(U_SUCCESS(status));
Frank Tang's avatar
Frank Tang committed
1954
  DCHECK_NOT_NULL(normalizer);
1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972
  int32_t normalized_prefix_length =
      normalizer->spanQuickCheckYes(input, status);
  // Quick return if the input is already normalized.
  if (length == normalized_prefix_length) return string;
  icu::UnicodeString unnormalized =
      input.tempSubString(normalized_prefix_length);
  // Read-only alias of the normalized prefix.
  result.setTo(false, input.getBuffer(), normalized_prefix_length);
  // copy-on-write; normalize the suffix and append to |result|.
  normalizer->normalizeSecondAndAppend(result, unnormalized, status);

  if (U_FAILURE(status)) {
    THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
  }

  return Intl::ToString(isolate, result);
}

Frank Tang's avatar
Frank Tang committed
1973 1974 1975 1976
// ICUTimezoneCache calls out to ICU for TimezoneCache
// functionality in a straightforward way.
class ICUTimezoneCache : public base::TimezoneCache {
 public:
1977
  ICUTimezoneCache() : timezone_(nullptr) { Clear(TimeZoneDetection::kSkip); }
Frank Tang's avatar
Frank Tang committed
1978

1979
  ~ICUTimezoneCache() override { Clear(TimeZoneDetection::kSkip); }
Frank Tang's avatar
Frank Tang committed
1980 1981 1982 1983 1984 1985 1986

  const char* LocalTimezone(double time_ms) override;

  double DaylightSavingsOffset(double time_ms) override;

  double LocalTimeOffset(double time_ms, bool is_utc) override;

1987
  void Clear(TimeZoneDetection time_zone_detection) override;
Frank Tang's avatar
Frank Tang committed
1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059

 private:
  icu::TimeZone* GetTimeZone();

  bool GetOffsets(double time_ms, bool is_utc, int32_t* raw_offset,
                  int32_t* dst_offset);

  icu::TimeZone* timezone_;

  std::string timezone_name_;
  std::string dst_timezone_name_;
};

const char* ICUTimezoneCache::LocalTimezone(double time_ms) {
  bool is_dst = DaylightSavingsOffset(time_ms) != 0;
  std::string* name = is_dst ? &dst_timezone_name_ : &timezone_name_;
  if (name->empty()) {
    icu::UnicodeString result;
    GetTimeZone()->getDisplayName(is_dst, icu::TimeZone::LONG, result);
    result += '\0';

    icu::StringByteSink<std::string> byte_sink(name);
    result.toUTF8(byte_sink);
  }
  DCHECK(!name->empty());
  return name->c_str();
}

icu::TimeZone* ICUTimezoneCache::GetTimeZone() {
  if (timezone_ == nullptr) {
    timezone_ = icu::TimeZone::createDefault();
  }
  return timezone_;
}

bool ICUTimezoneCache::GetOffsets(double time_ms, bool is_utc,
                                  int32_t* raw_offset, int32_t* dst_offset) {
  UErrorCode status = U_ZERO_ERROR;
  // TODO(jshin): ICU TimeZone class handles skipped time differently from
  // Ecma 262 (https://github.com/tc39/ecma262/pull/778) and icu::TimeZone
  // class does not expose the necessary API. Fixing
  // http://bugs.icu-project.org/trac/ticket/13268 would make it easy to
  // implement the proposed spec change. A proposed fix for ICU is
  //    https://chromium-review.googlesource.com/851265 .
  // In the meantime, use an internal (still public) API of icu::BasicTimeZone.
  // Once it's accepted by the upstream, get rid of cast. Note that casting
  // TimeZone to BasicTimeZone is safe because we know that icu::TimeZone used
  // here is a BasicTimeZone.
  if (is_utc) {
    GetTimeZone()->getOffset(time_ms, false, *raw_offset, *dst_offset, status);
  } else {
    static_cast<const icu::BasicTimeZone*>(GetTimeZone())
        ->getOffsetFromLocal(time_ms, icu::BasicTimeZone::kFormer,
                             icu::BasicTimeZone::kFormer, *raw_offset,
                             *dst_offset, status);
  }

  return U_SUCCESS(status);
}

double ICUTimezoneCache::DaylightSavingsOffset(double time_ms) {
  int32_t raw_offset, dst_offset;
  if (!GetOffsets(time_ms, true, &raw_offset, &dst_offset)) return 0;
  return dst_offset;
}

double ICUTimezoneCache::LocalTimeOffset(double time_ms, bool is_utc) {
  int32_t raw_offset, dst_offset;
  if (!GetOffsets(time_ms, is_utc, &raw_offset, &dst_offset)) return 0;
  return raw_offset + dst_offset;
}

2060
void ICUTimezoneCache::Clear(TimeZoneDetection time_zone_detection) {
Frank Tang's avatar
Frank Tang committed
2061 2062 2063 2064
  delete timezone_;
  timezone_ = nullptr;
  timezone_name_.clear();
  dst_timezone_name_.clear();
2065 2066 2067
  if (time_zone_detection == TimeZoneDetection::kRedetect) {
    icu::TimeZone::adoptDefault(icu::TimeZone::detectHostTimeZone());
  }
Frank Tang's avatar
Frank Tang committed
2068 2069 2070 2071 2072 2073 2074
}

base::TimezoneCache* Intl::CreateTimeZoneCache() {
  return FLAG_icu_timezone_data ? new ICUTimezoneCache()
                                : base::OS::CreateTimezoneCache();
}

2075 2076 2077
Maybe<Intl::MatcherOption> Intl::GetLocaleMatcher(Isolate* isolate,
                                                  Handle<JSReceiver> options,
                                                  const char* method) {
2078 2079
  return Intl::GetStringOption<Intl::MatcherOption>(
      isolate, options, "localeMatcher", method, {"best fit", "lookup"},
2080 2081
      {Intl::MatcherOption::kBestFit, Intl::MatcherOption::kLookup},
      Intl::MatcherOption::kBestFit);
2082
}
2083

2084 2085 2086 2087 2088 2089 2090 2091 2092
Maybe<bool> Intl::GetNumberingSystem(Isolate* isolate,
                                     Handle<JSReceiver> options,
                                     const char* method,
                                     std::unique_ptr<char[]>* result) {
  const std::vector<const char*> empty_values = {};
  Maybe<bool> maybe = Intl::GetStringOption(isolate, options, "numberingSystem",
                                            empty_values, method, result);
  MAYBE_RETURN(maybe, Nothing<bool>());
  if (maybe.FromJust() && *result != nullptr) {
2093
    if (!IsWellFormedNumberingSystem(result->get())) {
2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106
      THROW_NEW_ERROR_RETURN_VALUE(
          isolate,
          NewRangeError(
              MessageTemplate::kInvalid,
              isolate->factory()->numberingSystem_string(),
              isolate->factory()->NewStringFromAsciiChecked(result->get())),
          Nothing<bool>());
    }
    return Just(true);
  }
  return Just(false);
}

2107 2108 2109
const std::set<std::string>& Intl::GetAvailableLocales() {
  static base::LazyInstance<Intl::AvailableLocales<>>::type available_locales =
      LAZY_INSTANCE_INITIALIZER;
2110 2111 2112
  return available_locales.Pointer()->Get();
}

2113 2114 2115 2116 2117 2118 2119 2120 2121
namespace {

struct CheckCalendar {
  static const char* key() { return "calendar"; }
  static const char* path() { return nullptr; }
};

}  // namespace

2122
const std::set<std::string>& Intl::GetAvailableLocalesForDateFormat() {
2123
  static base::LazyInstance<Intl::AvailableLocales<CheckCalendar>>::type
2124 2125 2126 2127
      available_locales = LAZY_INSTANCE_INITIALIZER;
  return available_locales.Pointer()->Get();
}

2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160
Handle<String> Intl::NumberFieldToType(Isolate* isolate,
                                       Handle<Object> numeric_obj,
                                       int32_t field_id) {
  DCHECK(numeric_obj->IsNumeric());
  switch (static_cast<UNumberFormatFields>(field_id)) {
    case UNUM_INTEGER_FIELD:
      if (numeric_obj->IsBigInt()) {
        // Neither NaN nor Infinite could be stored into BigInt
        // so just return integer.
        return isolate->factory()->integer_string();
      } else {
        double number = numeric_obj->Number();
        if (std::isfinite(number)) return isolate->factory()->integer_string();
        if (std::isnan(number)) return isolate->factory()->nan_string();
        return isolate->factory()->infinity_string();
      }
    case UNUM_FRACTION_FIELD:
      return isolate->factory()->fraction_string();
    case UNUM_DECIMAL_SEPARATOR_FIELD:
      return isolate->factory()->decimal_string();
    case UNUM_GROUPING_SEPARATOR_FIELD:
      return isolate->factory()->group_string();
    case UNUM_CURRENCY_FIELD:
      return isolate->factory()->currency_string();
    case UNUM_PERCENT_FIELD:
      return isolate->factory()->percentSign_string();
    case UNUM_SIGN_FIELD:
      if (numeric_obj->IsBigInt()) {
        Handle<BigInt> big_int = Handle<BigInt>::cast(numeric_obj);
        return big_int->IsNegative() ? isolate->factory()->minusSign_string()
                                     : isolate->factory()->plusSign_string();
      } else {
        double number = numeric_obj->Number();
2161 2162
        return std::signbit(number) ? isolate->factory()->minusSign_string()
                                    : isolate->factory()->plusSign_string();
2163 2164
      }
    case UNUM_EXPONENT_SYMBOL_FIELD:
2165 2166
      return isolate->factory()->exponentSeparator_string();

2167
    case UNUM_EXPONENT_SIGN_FIELD:
2168 2169
      return isolate->factory()->exponentMinusSign_string();

2170
    case UNUM_EXPONENT_FIELD:
2171
      return isolate->factory()->exponentInteger_string();
2172 2173 2174 2175 2176 2177 2178

    case UNUM_PERMILL_FIELD:
      // We're not creating any permill formatter, and it's not even clear how
      // that would be possible with the ICU API.
      UNREACHABLE();
      return Handle<String>();

2179 2180 2181 2182 2183
    case UNUM_COMPACT_FIELD:
      return isolate->factory()->compact_string();
    case UNUM_MEASURE_UNIT_FIELD:
      return isolate->factory()->unit_string();

2184 2185 2186 2187 2188 2189
    default:
      UNREACHABLE();
      return Handle<String>();
  }
}

2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200
// A helper function to convert the FormattedValue for several Intl objects.
MaybeHandle<String> Intl::FormattedToString(
    Isolate* isolate, const icu::FormattedValue& formatted) {
  UErrorCode status = U_ZERO_ERROR;
  icu::UnicodeString result = formatted.toString(status);
  if (U_FAILURE(status)) {
    THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
  }
  return Intl::ToString(isolate, result);
}

2201

2202 2203
}  // namespace internal
}  // namespace v8