js-locale.cc 18 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif  // V8_INTL_SUPPORT

#include "src/objects/js-locale.h"

#include <map>
#include <memory>
#include <string>
14
#include <vector>
15 16 17 18 19 20

#include "src/api.h"
#include "src/global-handles.h"
#include "src/heap/factory.h"
#include "src/isolate.h"
#include "src/objects-inl.h"
21
#include "src/objects/intl-objects.h"
22
#include "src/objects/js-locale-inl.h"
23
#include "unicode/char16ptr.h"
24
#include "unicode/locid.h"
25
#include "unicode/uloc.h"
26 27 28 29 30 31
#include "unicode/unistr.h"

namespace v8 {
namespace internal {

namespace {
32

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
// Helper function to check a language tag is valid. It will return false if
// the parsing is not the same as the tag. For example, it will return false if
// the tag is too long.
bool IsValidLanguageTag(const char* tag, int length) {
  // icu::Locale::forLanguageTag won't return U_STRING_NOT_TERMINATED_WARNING
  // for incorrect locale yet. So we still need the following
  // uloc_forLanguageTag
  // TODO(ftang): Remove once icu::Locale::forLanguageTag indicate error.
  char result[ULOC_FULLNAME_CAPACITY];
  UErrorCode status = U_ZERO_ERROR;
  int parsed_length = 0;
  int icu_length = uloc_forLanguageTag(tag, result, ULOC_FULLNAME_CAPACITY,
                                       &parsed_length, &status);
  return U_SUCCESS(status) && parsed_length == length &&
         status != U_STRING_NOT_TERMINATED_WARNING && icu_length != 0;
48 49
}

50 51 52 53 54 55 56 57 58 59 60 61 62
// Helper function to check a locale is valid. It will return false if
// the length of the extension fields are incorrect. For example, en-u-a or
// en-u-co-b will return false.
bool IsValidLocale(const icu::Locale& locale) {
  // icu::Locale::toLanguageTag won't return U_STRING_NOT_TERMINATED_WARNING for
  // incorrect locale yet. So we still need the following uloc_toLanguageTag
  // TODO(ftang): Change to use icu::Locale::toLanguageTag once it indicate
  // error.
  char result[ULOC_FULLNAME_CAPACITY];
  UErrorCode status = U_ZERO_ERROR;
  uloc_toLanguageTag(locale.getName(), result, ULOC_FULLNAME_CAPACITY, true,
                     &status);
  return U_SUCCESS(status) && status != U_STRING_NOT_TERMINATED_WARNING;
63 64
}

65 66 67 68 69 70 71
struct OptionData {
  const char* name;
  const char* key;
  const std::vector<const char*>* possible_values;
  bool is_bool_value;
};

72 73 74
// Inserts tags from options into locale string.
Maybe<bool> InsertOptionsIntoLocale(Isolate* isolate,
                                    Handle<JSReceiver> options,
75
                                    icu::Locale* icu_locale) {
76
  CHECK(isolate);
77
  CHECK(!icu_locale->isBogus());
78

79 80 81 82 83 84
  const std::vector<const char*> hour_cycle_values = {"h11", "h12", "h23",
                                                      "h24"};
  const std::vector<const char*> case_first_values = {"upper", "lower",
                                                      "false"};
  const std::vector<const char*> empty_values = {};
  const std::array<OptionData, 6> kOptionToUnicodeTagMap = {
85 86 87 88 89 90
      {{"calendar", "ca", &empty_values, false},
       {"collation", "co", &empty_values, false},
       {"hourCycle", "hc", &hour_cycle_values, false},
       {"caseFirst", "kf", &case_first_values, false},
       {"numeric", "kn", &empty_values, true},
       {"numberingSystem", "nu", &empty_values, false}}};
91

92 93
  // TODO(cira): Pass in values as per the spec to make this to be
  // spec compliant.
94

95
  UErrorCode status = U_ZERO_ERROR;
96
  for (const auto& option_to_bcp47 : kOptionToUnicodeTagMap) {
97
    std::unique_ptr<char[]> value_str = nullptr;
98 99 100 101 102 103 104 105
    bool value_bool = false;
    Maybe<bool> maybe_found =
        option_to_bcp47.is_bool_value
            ? Intl::GetBoolOption(isolate, options, option_to_bcp47.name,
                                  "locale", &value_bool)
            : Intl::GetStringOption(isolate, options, option_to_bcp47.name,
                                    *(option_to_bcp47.possible_values),
                                    "locale", &value_str);
106
    MAYBE_RETURN(maybe_found, Nothing<bool>());
107

108 109 110
    // TODO(cira): Use fallback value if value is not found to make
    // this spec compliant.
    if (!maybe_found.FromJust()) continue;
111 112 113 114 115

    if (option_to_bcp47.is_bool_value) {
      value_str = value_bool ? isolate->factory()->true_string()->ToCString()
                             : isolate->factory()->false_string()->ToCString();
    }
116
    DCHECK_NOT_NULL(value_str.get());
117 118 119

    // Convert bcp47 key and value into legacy ICU format so we can use
    // uloc_setKeywordValue.
120
    const char* key = uloc_toLegacyKey(option_to_bcp47.key);
121
    DCHECK_NOT_NULL(key);
122 123

    // Overwrite existing, or insert new key-value to the locale string.
124
    const char* value = uloc_toLegacyType(key, value_str.get());
125
    if (value) {
126 127
      icu_locale->setKeywordValue(key, value, status);
      if (U_FAILURE(status)) {
128 129 130 131 132 133 134
        return Just(false);
      }
    } else {
      return Just(false);
    }
  }

135 136 137 138 139 140 141
  // Check all the unicode extension fields are in the right length.
  if (!IsValidLocale(*icu_locale)) {
    THROW_NEW_ERROR_RETURN_VALUE(
        isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
        Nothing<bool>());
  }

142 143 144
  return Just(true);
}

145 146 147
Handle<Object> UnicodeKeywordValue(Isolate* isolate, Handle<JSLocale> locale,
                                   const char* key) {
  icu::Locale* icu_locale = locale->icu_locale()->raw();
148
  UErrorCode status = U_ZERO_ERROR;
149 150 151 152 153
  std::string value =
      icu_locale->getUnicodeKeywordValue<std::string>(key, status);
  CHECK(U_SUCCESS(status));
  if (value == "") {
    return isolate->factory()->undefined_value();
154
  }
155
  return isolate->factory()->NewStringFromAsciiChecked(value.c_str());
156
}
157

158 159 160 161 162 163
bool InRange(size_t value, size_t start, size_t end) {
  return (start <= value) && (value <= end);
}
bool InRange(char value, char start, char end) {
  return (start <= value) && (value <= end);
}
164

165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
bool IsCheckRange(const std::string& str, size_t min, size_t max,
                  bool(range_check_func)(char)) {
  if (!InRange(str.length(), min, max)) return false;
  for (size_t i = 0; i < str.length(); i++) {
    if (!range_check_func(str[i])) return false;
  }
  return true;
}
bool IsAlpha(const std::string& str, size_t min, size_t max) {
  return IsCheckRange(str, min, max, [](char c) -> bool {
    return InRange(c, 'a', 'z') || InRange(c, 'A', 'Z');
  });
}

bool IsDigit(const std::string& str, size_t min, size_t max) {
  return IsCheckRange(str, min, max,
                      [](char c) -> bool { return InRange(c, '0', '9'); });
}

bool ValidateLanguageProduction(const std::string& value) {
  // language      = 2*3ALPHA            ; shortest ISO 639 code
  //                 ["-" extlang]       ; sometimes followed by
  //                                     ; extended language subtags
  //               / 4ALPHA              ; or reserved for future use
  //               / 5*8ALPHA            ; or registered language subtag
  //
  // extlang       = 3ALPHA              ; selected ISO 639 codes
  //                 *2("-" 3ALPHA)      ; permanently reserved
  // TODO(ftang) not handling the [extlang] yet
  return IsAlpha(value, 2, 8);
}

bool ValidateScriptProduction(const std::string& value) {
  // script        = 4ALPHA              ; ISO 15924 code
  return IsAlpha(value, 4, 4);
}

bool ValidateRegionProduction(const std::string& value) {
  // region        = 2ALPHA              ; ISO 3166-1 code
  //               / 3DIGIT              ; UN M.49 code
  return IsAlpha(value, 2, 2) || IsDigit(value, 3, 3);
}

Maybe<icu::Locale> ApplyOptionsToTag(Isolate* isolate, Handle<String> tag,
                                     Handle<JSReceiver> options) {
210
  v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate);
211 212 213 214 215
  if (tag->length() == 0) {
    THROW_NEW_ERROR_RETURN_VALUE(
        isolate, NewRangeError(MessageTemplate::kLocaleNotEmpty),
        Nothing<icu::Locale>());
  }
216

217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
  v8::String::Utf8Value bcp47_tag(v8_isolate, v8::Utils::ToLocal(tag));
  CHECK_LT(0, bcp47_tag.length());
  CHECK_NOT_NULL(*bcp47_tag);
  // 2. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError
  // exception.
  if (!IsValidLanguageTag(*bcp47_tag, bcp47_tag.length())) {
    THROW_NEW_ERROR_RETURN_VALUE(
        isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
        Nothing<icu::Locale>());
  }
  UErrorCode status = U_ZERO_ERROR;
  icu::Locale icu_locale = icu::Locale::forLanguageTag(*bcp47_tag, status);
  if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
    THROW_NEW_ERROR_RETURN_VALUE(
        isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
        Nothing<icu::Locale>());
233 234
  }

235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
  // 3. Let language be ? GetOption(options, "language", "string", undefined,
  // undefined).
  const std::vector<const char*> empty_values = {};
  std::unique_ptr<char[]> language_str = nullptr;
  Maybe<bool> maybe_language =
      Intl::GetStringOption(isolate, options, "language", empty_values,
                            "ApplyOptionsToTag", &language_str);
  MAYBE_RETURN(maybe_language, Nothing<icu::Locale>());
  // 4. If language is not undefined, then
  if (maybe_language.FromJust()) {
    // a. If language does not match the language production, throw a RangeError
    // exception.
    // b. If language matches the grandfathered production, throw a RangeError
    // exception.
    // Currently ValidateLanguageProduction only take 2*3ALPHA / 4ALPHA /
    // 5*8ALPHA and won't take 2*3ALPHA "-" extlang so none of the grandfathered
    // will be matched.
    if (!ValidateLanguageProduction(language_str.get())) {
      THROW_NEW_ERROR_RETURN_VALUE(
          isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
          Nothing<icu::Locale>());
    }
  }
  // 5. Let script be ? GetOption(options, "script", "string", undefined,
  // undefined).
  std::unique_ptr<char[]> script_str = nullptr;
  Maybe<bool> maybe_script =
      Intl::GetStringOption(isolate, options, "script", empty_values,
                            "ApplyOptionsToTag", &script_str);
  MAYBE_RETURN(maybe_script, Nothing<icu::Locale>());
  // 6. If script is not undefined, then
  if (maybe_script.FromJust()) {
    // a. If script does not match the script production, throw a RangeError
    // exception.
    if (!ValidateScriptProduction(script_str.get())) {
      THROW_NEW_ERROR_RETURN_VALUE(
          isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
          Nothing<icu::Locale>());
    }
  }
  // 7. Let region be ? GetOption(options, "region", "string", undefined,
  // undefined).
  std::unique_ptr<char[]> region_str = nullptr;
  Maybe<bool> maybe_region =
      Intl::GetStringOption(isolate, options, "region", empty_values,
                            "ApplyOptionsToTag", &region_str);
  MAYBE_RETURN(maybe_region, Nothing<icu::Locale>());
  // 8. If region is not undefined, then
  if (maybe_region.FromJust()) {
    // a. If region does not match the region production, throw a RangeError
    // exception.
    if (!ValidateRegionProduction(region_str.get())) {
      THROW_NEW_ERROR_RETURN_VALUE(
          isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
          Nothing<icu::Locale>());
    }
  }
  // 9. Set tag to CanonicalizeLanguageTag(tag).
293

294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
  // 10.  If language is not undefined,
  std::string locale_str;
  if (maybe_language.FromJust()) {
    // a. Assert: tag matches the langtag production.
    // b. Set tag to tag with the substring corresponding to the language
    // production replaced by the string language.
    locale_str = language_str.get();
  } else {
    locale_str = icu_locale.getLanguage();
  }
  // 11. If script is not undefined, then
  const char* script_ptr = nullptr;
  if (maybe_script.FromJust()) {
    // a. If tag does not contain a script production, then
    // i. Set tag to the concatenation of the language production of tag, "-",
    // script, and the rest of tag.
    // i. Set tag to tag with the substring corresponding to the script
    // production replaced by the string script.
    script_ptr = script_str.get();
  } else {
    script_ptr = icu_locale.getScript();
  }
  if (script_ptr != nullptr && strlen(script_ptr) > 0) {
    locale_str.append("-");
    locale_str.append(script_ptr);
  }
  // 12. If region is not undefined, then
  const char* region_ptr = nullptr;
  if (maybe_region.FromJust()) {
    // a. If tag does not contain a region production, then
    //
    // i. Set tag to the concatenation of the language production of tag, the
    // substring corresponding to the "-" script production if present,  "-",
    // region, and the rest of tag.
    //
    // b. Else,
    //
    // i. Set tag to tag with the substring corresponding to the region
    // production replaced by the string region.
    region_ptr = region_str.get();
  } else {
    region_ptr = icu_locale.getCountry();
336 337
  }

338 339 340 341 342 343 344 345 346 347 348
  std::string without_options(icu_locale.getName());

  // replace with values from options
  icu_locale =
      icu::Locale(locale_str.c_str(), region_ptr, icu_locale.getVariant());
  locale_str = icu_locale.getName();

  // Append extensions from tag
  size_t others = without_options.find("@");
  if (others != std::string::npos) {
    locale_str += without_options.substr(others);
349 350
  }

351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
  // 13.  Return CanonicalizeLanguageTag(tag).
  icu_locale = icu::Locale::createCanonical(locale_str.c_str());
  return Just(icu_locale);
}

}  // namespace

MaybeHandle<JSLocale> JSLocale::Initialize(Isolate* isolate,
                                           Handle<JSLocale> locale,
                                           Handle<String> locale_str,
                                           Handle<JSReceiver> options) {
  Maybe<icu::Locale> maybe_locale =
      ApplyOptionsToTag(isolate, locale_str, options);
  MAYBE_RETURN(maybe_locale, MaybeHandle<JSLocale>());
  icu::Locale icu_locale = maybe_locale.FromJust();

367 368 369 370 371 372
  Maybe<bool> error = InsertOptionsIntoLocale(isolate, options, &icu_locale);
  MAYBE_RETURN(error, MaybeHandle<JSLocale>());
  if (!error.FromJust()) {
    THROW_NEW_ERROR(isolate,
                    NewRangeError(MessageTemplate::kLocaleBadParameters),
                    JSLocale);
373 374
  }

375 376 377 378
  // 31. Set locale.[[Locale]] to r.[[locale]].
  Handle<Managed<icu::Locale>> managed_locale =
      Managed<icu::Locale>::FromRawPtr(isolate, 0, icu_locale.clone());
  locale->set_icu_locale(*managed_locale);
379

380
  return locale;
381 382
}

383
namespace {
384
Handle<String> MorphLocale(Isolate* isolate, String locale,
385
                           void (*morph_func)(icu::Locale*, UErrorCode*)) {
386
  UErrorCode status = U_ZERO_ERROR;
387 388 389 390 391 392 393
  icu::Locale icu_locale =
      icu::Locale::forLanguageTag(locale.ToCString().get(), status);
  CHECK(U_SUCCESS(status));
  CHECK(!icu_locale.isBogus());
  (*morph_func)(&icu_locale, &status);
  CHECK(U_SUCCESS(status));
  CHECK(!icu_locale.isBogus());
394
  std::string locale_str = Intl::ToLanguageTag(icu_locale).FromJust();
395
  return isolate->factory()->NewStringFromAsciiChecked(locale_str.c_str());
396
}
397

398 399
}  // namespace

400
Handle<String> JSLocale::Maximize(Isolate* isolate, String locale) {
401 402 403 404
  return MorphLocale(isolate, locale,
                     [](icu::Locale* icu_locale, UErrorCode* status) {
                       icu_locale->addLikelySubtags(*status);
                     });
405 406
}

407
Handle<String> JSLocale::Minimize(Isolate* isolate, String locale) {
408 409 410 411
  return MorphLocale(isolate, locale,
                     [](icu::Locale* icu_locale, UErrorCode* status) {
                       icu_locale->minimizeSubtags(*status);
                     });
412 413
}

414 415 416 417 418
Handle<Object> JSLocale::Language(Isolate* isolate, Handle<JSLocale> locale) {
  Factory* factory = isolate->factory();
  const char* language = locale->icu_locale()->raw()->getLanguage();
  if (strlen(language) == 0) return factory->undefined_value();
  return factory->NewStringFromAsciiChecked(language);
419 420
}

421 422 423 424 425
Handle<Object> JSLocale::Script(Isolate* isolate, Handle<JSLocale> locale) {
  Factory* factory = isolate->factory();
  const char* script = locale->icu_locale()->raw()->getScript();
  if (strlen(script) == 0) return factory->undefined_value();
  return factory->NewStringFromAsciiChecked(script);
426 427
}

428 429 430 431 432 433 434 435 436 437
Handle<Object> JSLocale::Region(Isolate* isolate, Handle<JSLocale> locale) {
  Factory* factory = isolate->factory();
  const char* region = locale->icu_locale()->raw()->getCountry();
  if (strlen(region) == 0) return factory->undefined_value();
  return factory->NewStringFromAsciiChecked(region);
}

Handle<String> JSLocale::BaseName(Isolate* isolate, Handle<JSLocale> locale) {
  icu::Locale icu_locale =
      icu::Locale::createFromName(locale->icu_locale()->raw()->getBaseName());
438
  std::string base_name = Intl::ToLanguageTag(icu_locale).FromJust();
439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
  return isolate->factory()->NewStringFromAsciiChecked(base_name.c_str());
}

Handle<Object> JSLocale::Calendar(Isolate* isolate, Handle<JSLocale> locale) {
  return UnicodeKeywordValue(isolate, locale, "ca");
}

Handle<Object> JSLocale::CaseFirst(Isolate* isolate, Handle<JSLocale> locale) {
  return UnicodeKeywordValue(isolate, locale, "kf");
}

Handle<Object> JSLocale::Collation(Isolate* isolate, Handle<JSLocale> locale) {
  return UnicodeKeywordValue(isolate, locale, "co");
}

Handle<Object> JSLocale::HourCycle(Isolate* isolate, Handle<JSLocale> locale) {
  return UnicodeKeywordValue(isolate, locale, "hc");
}

Handle<Object> JSLocale::Numeric(Isolate* isolate, Handle<JSLocale> locale) {
  Factory* factory = isolate->factory();
  icu::Locale* icu_locale = locale->icu_locale()->raw();
  UErrorCode status = U_ZERO_ERROR;
  std::string numeric =
      icu_locale->getUnicodeKeywordValue<std::string>("kn", status);
  return (numeric == "true") ? factory->true_value() : factory->false_value();
}

Handle<Object> JSLocale::NumberingSystem(Isolate* isolate,
                                         Handle<JSLocale> locale) {
  return UnicodeKeywordValue(isolate, locale, "nu");
}

Handle<String> JSLocale::ToString(Isolate* isolate, Handle<JSLocale> locale) {
  icu::Locale* icu_locale = locale->icu_locale()->raw();
474
  std::string locale_str = Intl::ToLanguageTag(*icu_locale).FromJust();
475
  return isolate->factory()->NewStringFromAsciiChecked(locale_str.c_str());
476 477
}

478 479
}  // namespace internal
}  // namespace v8