js-locale.cc 17 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif  // V8_INTL_SUPPORT

#include "src/objects/js-locale.h"

#include <map>
#include <memory>
#include <string>
14
#include <vector>
15

16
#include "src/api/api.h"
17
#include "src/execution/isolate.h"
18
#include "src/handles/global-handles.h"
19
#include "src/heap/factory.h"
20
#include "src/objects/intl-objects.h"
21
#include "src/objects/js-locale-inl.h"
22
#include "src/objects/objects-inl.h"
23
#include "unicode/char16ptr.h"
24
#include "unicode/localebuilder.h"
25
#include "unicode/locid.h"
26
#include "unicode/uloc.h"
27 28 29 30 31 32
#include "unicode/unistr.h"

namespace v8 {
namespace internal {

namespace {
33 34 35 36 37 38 39 40

struct OptionData {
  const char* name;
  const char* key;
  const std::vector<const char*>* possible_values;
  bool is_bool_value;
};

41 42 43
// Inserts tags from options into locale string.
Maybe<bool> InsertOptionsIntoLocale(Isolate* isolate,
                                    Handle<JSReceiver> options,
44
                                    icu::LocaleBuilder* builder) {
Frank Tang's avatar
Frank Tang committed
45
  DCHECK(isolate);
46

47 48 49 50 51 52
  const std::vector<const char*> hour_cycle_values = {"h11", "h12", "h23",
                                                      "h24"};
  const std::vector<const char*> case_first_values = {"upper", "lower",
                                                      "false"};
  const std::vector<const char*> empty_values = {};
  const std::array<OptionData, 6> kOptionToUnicodeTagMap = {
53 54 55 56 57 58
      {{"calendar", "ca", &empty_values, false},
       {"collation", "co", &empty_values, false},
       {"hourCycle", "hc", &hour_cycle_values, false},
       {"caseFirst", "kf", &case_first_values, false},
       {"numeric", "kn", &empty_values, true},
       {"numberingSystem", "nu", &empty_values, false}}};
59

60 61
  // TODO(cira): Pass in values as per the spec to make this to be
  // spec compliant.
62

63
  for (const auto& option_to_bcp47 : kOptionToUnicodeTagMap) {
64
    std::unique_ptr<char[]> value_str = nullptr;
65 66 67 68 69 70 71 72
    bool value_bool = false;
    Maybe<bool> maybe_found =
        option_to_bcp47.is_bool_value
            ? Intl::GetBoolOption(isolate, options, option_to_bcp47.name,
                                  "locale", &value_bool)
            : Intl::GetStringOption(isolate, options, option_to_bcp47.name,
                                    *(option_to_bcp47.possible_values),
                                    "locale", &value_str);
73
    MAYBE_RETURN(maybe_found, Nothing<bool>());
74

75 76 77
    // TODO(cira): Use fallback value if value is not found to make
    // this spec compliant.
    if (!maybe_found.FromJust()) continue;
78 79 80 81 82

    if (option_to_bcp47.is_bool_value) {
      value_str = value_bool ? isolate->factory()->true_string()->ToCString()
                             : isolate->factory()->false_string()->ToCString();
    }
83
    DCHECK_NOT_NULL(value_str.get());
84 85

    // Overwrite existing, or insert new key-value to the locale string.
86 87
    if (!uloc_toLegacyType(uloc_toLegacyKey(option_to_bcp47.key),
                           value_str.get())) {
88 89
      return Just(false);
    }
90
    builder->setUnicodeLocaleKeyword(option_to_bcp47.key, value_str.get());
91 92 93 94
  }
  return Just(true);
}

95 96
Handle<Object> UnicodeKeywordValue(Isolate* isolate, Handle<JSLocale> locale,
                                   const char* key) {
97
  icu::Locale* icu_locale = locale->icu_locale().raw();
98
  UErrorCode status = U_ZERO_ERROR;
99 100
  std::string value =
      icu_locale->getUnicodeKeywordValue<std::string>(key, status);
101
  if (status == U_ILLEGAL_ARGUMENT_ERROR || value == "") {
102
    return isolate->factory()->undefined_value();
103
  }
104 105 106
  if (value == "yes") {
    value = "true";
  }
107
  return isolate->factory()->NewStringFromAsciiChecked(value.c_str());
108
}
109

110 111 112
bool InRange(size_t value, size_t start, size_t end) {
  return (start <= value) && (value <= end);
}
113

114 115 116
bool InRange(char value, char start, char end) {
  return (start <= value) && (value <= end);
}
117

118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
bool IsCheckRange(const std::string& str, size_t min, size_t max,
                  bool(range_check_func)(char)) {
  if (!InRange(str.length(), min, max)) return false;
  for (size_t i = 0; i < str.length(); i++) {
    if (!range_check_func(str[i])) return false;
  }
  return true;
}
bool IsAlpha(const std::string& str, size_t min, size_t max) {
  return IsCheckRange(str, min, max, [](char c) -> bool {
    return InRange(c, 'a', 'z') || InRange(c, 'A', 'Z');
  });
}

bool IsDigit(const std::string& str, size_t min, size_t max) {
  return IsCheckRange(str, min, max,
                      [](char c) -> bool { return InRange(c, '0', '9'); });
}

137 138 139 140 141 142 143 144 145
bool IsAlphanum(const std::string& str, size_t min, size_t max) {
  return IsCheckRange(str, min, max, [](char c) -> bool {
    return InRange(c, 'a', 'z') || InRange(c, 'A', 'Z') || InRange(c, '0', '9');
  });
}

bool IsUnicodeLanguageSubtag(const std::string& value) {
  // unicode_language_subtag = alpha{2,3} | alpha{5,8};
  return IsAlpha(value, 2, 3) || IsAlpha(value, 5, 8);
146 147
}

148 149
bool IsUnicodeScriptSubtag(const std::string& value) {
  // unicode_script_subtag = alpha{4} ;
150 151 152
  return IsAlpha(value, 4, 4);
}

153 154
bool IsUnicodeRegionSubtag(const std::string& value) {
  // unicode_region_subtag = (alpha{2} | digit{3});
155 156 157
  return IsAlpha(value, 2, 2) || IsDigit(value, 3, 3);
}

158 159 160 161 162 163 164 165 166 167 168 169 170
bool IsDigitAlphanum3(const std::string& value) {
  return value.length() == 4 && InRange(value[0], '0', '9') &&
         IsAlphanum(value.substr(1), 3, 3);
}

bool IsUnicodeVariantSubtag(const std::string& value) {
  // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
  return IsAlphanum(value, 5, 8) || IsDigitAlphanum3(value);
}

bool IsExtensionSingleton(const std::string& value) {
  return IsAlphanum(value, 1, 1);
}
171
}  // namespace
172

173 174 175 176 177 178 179 180 181
bool JSLocale::Is38AlphaNumList(const std::string& value) {
  std::size_t found = value.find("-");
  if (found == std::string::npos) {
    return IsAlphanum(value, 3, 8);
  }
  return IsAlphanum(value.substr(0, found), 3, 8) &&
         JSLocale::Is38AlphaNumList(value.substr(found + 1));
}

182 183 184 185
bool JSLocale::Is3Alpha(const std::string& value) {
  return IsAlpha(value, 3, 3);
}

186 187
// TODO(ftang) Replace the following check w/ icu::LocaleBuilder
// once ICU64 land in March 2019.
188
bool JSLocale::StartsWithUnicodeLanguageId(const std::string& value) {
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
  // unicode_language_id =
  // unicode_language_subtag (sep unicode_script_subtag)?
  //   (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
  std::vector<std::string> tokens;
  std::string token;
  std::istringstream token_stream(value);
  while (std::getline(token_stream, token, '-')) {
    tokens.push_back(token);
  }
  if (tokens.size() == 0) return false;

  // length >= 1
  if (!IsUnicodeLanguageSubtag(tokens[0])) return false;

  if (tokens.size() == 1) return true;

  // length >= 2
  if (IsExtensionSingleton(tokens[1])) return true;

  size_t index = 1;
  if (IsUnicodeScriptSubtag(tokens[index])) {
    index++;
    if (index == tokens.size()) return true;
  }
  if (IsUnicodeRegionSubtag(tokens[index])) {
    index++;
  }
  while (index < tokens.size()) {
    if (IsExtensionSingleton(tokens[index])) return true;
    if (!IsUnicodeVariantSubtag(tokens[index])) return false;
    index++;
  }
  return true;
}

224
namespace {
225 226 227
Maybe<bool> ApplyOptionsToTag(Isolate* isolate, Handle<String> tag,
                              Handle<JSReceiver> options,
                              icu::LocaleBuilder* builder) {
228
  v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate);
229 230 231
  if (tag->length() == 0) {
    THROW_NEW_ERROR_RETURN_VALUE(
        isolate, NewRangeError(MessageTemplate::kLocaleNotEmpty),
232
        Nothing<bool>());
233
  }
234

235
  v8::String::Utf8Value bcp47_tag(v8_isolate, v8::Utils::ToLocal(tag));
236
  builder->setLanguageTag({*bcp47_tag, bcp47_tag.length()});
Frank Tang's avatar
Frank Tang committed
237 238
  DCHECK_LT(0, bcp47_tag.length());
  DCHECK_NOT_NULL(*bcp47_tag);
239 240
  // 2. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError
  // exception.
241
  if (!JSLocale::StartsWithUnicodeLanguageId(*bcp47_tag)) {
242
    return Just(false);
243
  }
244
  UErrorCode status = U_ZERO_ERROR;
245
  builder->build(status);
246
  if (U_FAILURE(status)) {
247
    return Just(false);
248 249
  }

250 251 252 253 254 255 256
  // 3. Let language be ? GetOption(options, "language", "string", undefined,
  // undefined).
  const std::vector<const char*> empty_values = {};
  std::unique_ptr<char[]> language_str = nullptr;
  Maybe<bool> maybe_language =
      Intl::GetStringOption(isolate, options, "language", empty_values,
                            "ApplyOptionsToTag", &language_str);
257
  MAYBE_RETURN(maybe_language, Nothing<bool>());
258 259
  // 4. If language is not undefined, then
  if (maybe_language.FromJust()) {
260 261
    builder->setLanguage(language_str.get());
    builder->build(status);
262 263
    // a. If language does not match the unicode_language_subtag production,
    //    throw a RangeError exception.
264 265 266
    if (U_FAILURE(status) || language_str[0] == '\0' ||
        IsAlpha(language_str.get(), 4, 4)) {
      return Just(false);
267 268 269 270 271 272 273 274
    }
  }
  // 5. Let script be ? GetOption(options, "script", "string", undefined,
  // undefined).
  std::unique_ptr<char[]> script_str = nullptr;
  Maybe<bool> maybe_script =
      Intl::GetStringOption(isolate, options, "script", empty_values,
                            "ApplyOptionsToTag", &script_str);
275
  MAYBE_RETURN(maybe_script, Nothing<bool>());
276 277
  // 6. If script is not undefined, then
  if (maybe_script.FromJust()) {
278 279
    builder->setScript(script_str.get());
    builder->build(status);
280 281
    // a. If script does not match the unicode_script_subtag production, throw
    //    a RangeError exception.
282 283
    if (U_FAILURE(status) || script_str[0] == '\0') {
      return Just(false);
284 285 286 287 288 289 290 291
    }
  }
  // 7. Let region be ? GetOption(options, "region", "string", undefined,
  // undefined).
  std::unique_ptr<char[]> region_str = nullptr;
  Maybe<bool> maybe_region =
      Intl::GetStringOption(isolate, options, "region", empty_values,
                            "ApplyOptionsToTag", &region_str);
292
  MAYBE_RETURN(maybe_region, Nothing<bool>());
293 294 295 296
  // 8. If region is not undefined, then
  if (maybe_region.FromJust()) {
    // a. If region does not match the region production, throw a RangeError
    // exception.
297 298 299 300
    builder->setRegion(region_str.get());
    builder->build(status);
    if (U_FAILURE(status) || region_str[0] == '\0') {
      return Just(false);
301 302
    }
  }
303

304
  // 9. Set tag to CanonicalizeLanguageTag(tag).
305
  // 10.  If language is not undefined,
306 307 308
  // a. Assert: tag matches the unicode_locale_id production.
  // b. Set tag to tag with the substring corresponding to the
  //    unicode_language_subtag production replaced by the string language.
309
  // 11. If script is not undefined, then
310 311 312 313 314 315
  // a. If tag does not contain a unicode_script_subtag production, then
  //   i. Set tag to the concatenation of the unicode_language_subtag
  //      production of tag, "-", script, and the rest of tag.
  // b. Else,
  //   i. Set tag to tag with the substring corresponding to the
  //      unicode_script_subtag production replaced by the string script.
316
  // 12. If region is not undefined, then
317 318 319 320 321 322 323 324
  // a. If tag does not contain a unicode_region_subtag production, then
  //   i. Set tag to the concatenation of the unicode_language_subtag
  //      production of tag, the substring corresponding to the  "-"
  //      unicode_script_subtag production if present, "-", region, and
  //      the rest of tag.
  // b. Else,
  // i. Set tag to tag with the substring corresponding to the
  //    unicode_region_subtag production replaced by the string region.
325
  // 13.  Return CanonicalizeLanguageTag(tag).
326
  return Just(true);
327 328 329 330
}

}  // namespace

331 332 333
MaybeHandle<JSLocale> JSLocale::New(Isolate* isolate, Handle<Map> map,
                                    Handle<String> locale_str,
                                    Handle<JSReceiver> options) {
334 335 336 337 338
  icu::LocaleBuilder builder;
  Maybe<bool> maybe_apply =
      ApplyOptionsToTag(isolate, locale_str, options, &builder);
  MAYBE_RETURN(maybe_apply, MaybeHandle<JSLocale>());
  if (!maybe_apply.FromJust()) {
339 340 341 342
    THROW_NEW_ERROR(isolate,
                    NewRangeError(MessageTemplate::kLocaleBadParameters),
                    JSLocale);
  }
343

344 345 346 347 348 349
  Maybe<bool> maybe_insert =
      InsertOptionsIntoLocale(isolate, options, &builder);
  MAYBE_RETURN(maybe_insert, MaybeHandle<JSLocale>());
  UErrorCode status = U_ZERO_ERROR;
  icu::Locale icu_locale = builder.build(status);
  if (!maybe_insert.FromJust() || U_FAILURE(status)) {
350 351 352
    THROW_NEW_ERROR(isolate,
                    NewRangeError(MessageTemplate::kLocaleBadParameters),
                    JSLocale);
353 354
  }

355 356 357
  // 31. Set locale.[[Locale]] to r.[[locale]].
  Handle<Managed<icu::Locale>> managed_locale =
      Managed<icu::Locale>::FromRawPtr(isolate, 0, icu_locale.clone());
358

359 360 361 362 363
  // Now all properties are ready, so we can allocate the result object.
  Handle<JSLocale> locale = Handle<JSLocale>::cast(
      isolate->factory()->NewFastOrSlowJSObjectFromMap(map));
  DisallowHeapAllocation no_gc;
  locale->set_icu_locale(*managed_locale);
364
  return locale;
365 366
}

367
namespace {
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386

MaybeHandle<JSLocale> Construct(Isolate* isolate,
                                const icu::Locale& icu_locale) {
  Handle<Managed<icu::Locale>> managed_locale =
      Managed<icu::Locale>::FromRawPtr(isolate, 0, icu_locale.clone());

  Handle<JSFunction> constructor(
      isolate->native_context()->intl_locale_function(), isolate);

  Handle<Map> map;
  ASSIGN_RETURN_ON_EXCEPTION(
      isolate, map,
      JSFunction::GetDerivedMap(isolate, constructor, constructor), JSLocale);

  Handle<JSLocale> locale = Handle<JSLocale>::cast(
      isolate->factory()->NewFastOrSlowJSObjectFromMap(map));
  DisallowHeapAllocation no_gc;
  locale->set_icu_locale(*managed_locale);
  return locale;
387
}
388

389 390
}  // namespace

391 392 393 394 395 396 397 398
MaybeHandle<JSLocale> JSLocale::Maximize(Isolate* isolate,
                                         Handle<JSLocale> locale) {
  icu::Locale icu_locale(*(locale->icu_locale().raw()));
  UErrorCode status = U_ZERO_ERROR;
  icu_locale.addLikelySubtags(status);
  DCHECK(U_SUCCESS(status));
  DCHECK(!icu_locale.isBogus());
  return Construct(isolate, icu_locale);
399 400
}

401 402 403 404 405 406 407 408
MaybeHandle<JSLocale> JSLocale::Minimize(Isolate* isolate,
                                         Handle<JSLocale> locale) {
  icu::Locale icu_locale(*(locale->icu_locale().raw()));
  UErrorCode status = U_ZERO_ERROR;
  icu_locale.minimizeSubtags(status);
  DCHECK(U_SUCCESS(status));
  DCHECK(!icu_locale.isBogus());
  return Construct(isolate, icu_locale);
409 410
}

411 412
Handle<Object> JSLocale::Language(Isolate* isolate, Handle<JSLocale> locale) {
  Factory* factory = isolate->factory();
413
  const char* language = locale->icu_locale().raw()->getLanguage();
414 415
  if (strlen(language) == 0) return factory->undefined_value();
  return factory->NewStringFromAsciiChecked(language);
416 417
}

418 419
Handle<Object> JSLocale::Script(Isolate* isolate, Handle<JSLocale> locale) {
  Factory* factory = isolate->factory();
420
  const char* script = locale->icu_locale().raw()->getScript();
421 422
  if (strlen(script) == 0) return factory->undefined_value();
  return factory->NewStringFromAsciiChecked(script);
423 424
}

425 426
Handle<Object> JSLocale::Region(Isolate* isolate, Handle<JSLocale> locale) {
  Factory* factory = isolate->factory();
427
  const char* region = locale->icu_locale().raw()->getCountry();
428 429 430 431 432 433
  if (strlen(region) == 0) return factory->undefined_value();
  return factory->NewStringFromAsciiChecked(region);
}

Handle<String> JSLocale::BaseName(Isolate* isolate, Handle<JSLocale> locale) {
  icu::Locale icu_locale =
434
      icu::Locale::createFromName(locale->icu_locale().raw()->getBaseName());
435
  std::string base_name = Intl::ToLanguageTag(icu_locale).FromJust();
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456
  return isolate->factory()->NewStringFromAsciiChecked(base_name.c_str());
}

Handle<Object> JSLocale::Calendar(Isolate* isolate, Handle<JSLocale> locale) {
  return UnicodeKeywordValue(isolate, locale, "ca");
}

Handle<Object> JSLocale::CaseFirst(Isolate* isolate, Handle<JSLocale> locale) {
  return UnicodeKeywordValue(isolate, locale, "kf");
}

Handle<Object> JSLocale::Collation(Isolate* isolate, Handle<JSLocale> locale) {
  return UnicodeKeywordValue(isolate, locale, "co");
}

Handle<Object> JSLocale::HourCycle(Isolate* isolate, Handle<JSLocale> locale) {
  return UnicodeKeywordValue(isolate, locale, "hc");
}

Handle<Object> JSLocale::Numeric(Isolate* isolate, Handle<JSLocale> locale) {
  Factory* factory = isolate->factory();
457
  icu::Locale* icu_locale = locale->icu_locale().raw();
458 459 460 461 462 463 464 465 466 467 468
  UErrorCode status = U_ZERO_ERROR;
  std::string numeric =
      icu_locale->getUnicodeKeywordValue<std::string>("kn", status);
  return (numeric == "true") ? factory->true_value() : factory->false_value();
}

Handle<Object> JSLocale::NumberingSystem(Isolate* isolate,
                                         Handle<JSLocale> locale) {
  return UnicodeKeywordValue(isolate, locale, "nu");
}

469
std::string JSLocale::ToString(Handle<JSLocale> locale) {
470
  icu::Locale* icu_locale = locale->icu_locale().raw();
471 472 473 474 475
  return Intl::ToLanguageTag(*icu_locale).FromJust();
}

Handle<String> JSLocale::ToString(Isolate* isolate, Handle<JSLocale> locale) {
  std::string locale_str = JSLocale::ToString(locale);
476
  return isolate->factory()->NewStringFromAsciiChecked(locale_str.c_str());
477 478
}

479 480
}  // namespace internal
}  // namespace v8