intl-objects.h 15.2 KB
Newer Older
1
// Copyright 2013 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4

5 6 7 8 9 10
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif  // V8_INTL_SUPPORT

#ifndef V8_OBJECTS_INTL_OBJECTS_H_
#define V8_OBJECTS_INTL_OBJECTS_H_
11

12
#include <map>
13
#include <memory>
14 15 16
#include <set>
#include <string>

Frank Tang's avatar
Frank Tang committed
17
#include "src/base/timezone-cache.h"
18
#include "src/objects/contexts.h"
19
#include "src/objects/managed.h"
20
#include "src/objects/objects.h"
21
#include "unicode/locid.h"
22
#include "unicode/uversion.h"
23

24
#define V8_MINIMUM_ICU_VERSION 71
25

26
namespace U_ICU_NAMESPACE {
27
class BreakIterator;
28
class Collator;
29
class FormattedValue;
30
class StringEnumeration;
31
class TimeZone;
32
class UnicodeString;
33
}  // namespace U_ICU_NAMESPACE
34

35 36
namespace v8 {
namespace internal {
37

38 39 40 41 42 43 44 45 46 47 48 49 50
struct NumberFormatSpan {
  int32_t field_id;
  int32_t begin_pos;
  int32_t end_pos;

  NumberFormatSpan() = default;
  NumberFormatSpan(int32_t field_id, int32_t begin_pos, int32_t end_pos)
      : field_id(field_id), begin_pos(begin_pos), end_pos(end_pos) {}
};

V8_EXPORT_PRIVATE std::vector<NumberFormatSpan> FlattenRegionsToParts(
    std::vector<NumberFormatSpan>* regions);

51 52
template <typename T>
class Handle;
53
class JSCollator;
marja's avatar
marja committed
54

55
class Intl {
56
 public:
57 58 59 60 61
  enum class BoundFunctionContextSlot {
    kBoundFunction = Context::MIN_CONTEXT_SLOTS,
    kLength
  };

62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
  enum class FormatRangeSource { kShared, kStartRange, kEndRange };

  class FormatRangeSourceTracker {
   public:
    FormatRangeSourceTracker();
    void Add(int32_t field, int32_t start, int32_t limit);
    FormatRangeSource GetSource(int32_t start, int32_t limit) const;

   private:
    int32_t start_[2];
    int32_t limit_[2];

    bool FieldContains(int32_t field, int32_t start, int32_t limit) const;
  };

  static Handle<String> SourceString(Isolate* isolate,
                                     FormatRangeSource source);

80 81 82 83 84
  // Build a set of ICU locales from a list of Locales. If there is a locale
  // with a script tag then the locales also include a locale without the
  // script; eg, pa_Guru_IN (language=Panjabi, script=Gurmukhi, country-India)
  // would include pa_IN.
  static std::set<std::string> BuildLocaleSet(
85
      const std::vector<std::string>& locales, const char* path,
86
      const char* validate_key);
87

88
  static Maybe<std::string> ToLanguageTag(const icu::Locale& locale);
89

90 91 92 93 94 95
  // Get the name of the numbering system from locale.
  // ICU doesn't expose numbering system in any way, so we have to assume that
  // for given locale NumberingSystem constructor produces the same digits as
  // NumberFormat/Calendar would.
  static std::string GetNumberingSystem(const icu::Locale& icu_locale);

96
  static V8_WARN_UNUSED_RESULT MaybeHandle<JSObject> SupportedLocalesOf(
97
      Isolate* isolate, const char* method_name,
98
      const std::set<std::string>& available_locales, Handle<Object> locales_in,
99
      Handle<Object> options_in);
100

101 102 103 104 105 106
  // https://tc39.github.io/ecma402/#sec-canonicalizelocalelist
  // {only_return_one_result} is an optimization for callers that only
  // care about the first result.
  static Maybe<std::vector<std::string>> CanonicalizeLocaleList(
      Isolate* isolate, Handle<Object> locales,
      bool only_return_one_result = false);
107

108 109 110 111
  // ecma-402 #sec-intl.getcanonicallocales
  V8_WARN_UNUSED_RESULT static MaybeHandle<JSArray> GetCanonicalLocales(
      Isolate* isolate, Handle<Object> locales);

112 113 114 115
  // ecma-402 #sec-intl.supportedvaluesof
  V8_WARN_UNUSED_RESULT static MaybeHandle<JSArray> SupportedValuesOf(
      Isolate* isolate, Handle<Object> key);

116 117 118 119
  // For locale sensitive functions
  V8_WARN_UNUSED_RESULT static MaybeHandle<String> StringLocaleConvertCase(
      Isolate* isolate, Handle<String> s, bool is_upper,
      Handle<Object> locales);
120

Frank Tang's avatar
Frank Tang committed
121 122 123 124 125 126
  V8_WARN_UNUSED_RESULT static MaybeHandle<String> ConvertToUpper(
      Isolate* isolate, Handle<String> s);

  V8_WARN_UNUSED_RESULT static MaybeHandle<String> ConvertToLower(
      Isolate* isolate, Handle<String> s);

127
  V8_WARN_UNUSED_RESULT static base::Optional<int> StringLocaleCompare(
128
      Isolate* isolate, Handle<String> s1, Handle<String> s2,
129
      Handle<Object> locales, Handle<Object> options, const char* method_name);
130

131 132 133 134
  enum class CompareStringsOptions {
    kNone,
    kTryFastPath,
  };
135
  template <class IsolateT>
136
  V8_EXPORT_PRIVATE static CompareStringsOptions CompareStringsOptionsFor(
137
      IsolateT* isolate, Handle<Object> locales, Handle<Object> options);
138 139 140 141 142
  V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static int CompareStrings(
      Isolate* isolate, const icu::Collator& collator, Handle<String> s1,
      Handle<String> s2,
      CompareStringsOptions compare_strings_options =
          CompareStringsOptions::kNone);
143 144 145 146

  // ecma402/#sup-properties-of-the-number-prototype-object
  V8_WARN_UNUSED_RESULT static MaybeHandle<String> NumberToLocaleString(
      Isolate* isolate, Handle<Object> num, Handle<Object> locales,
147
      Handle<Object> options, const char* method_name);
148

149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
  // [[RoundingPriority]] is one of the String values "auto", "morePrecision",
  // or "lessPrecision", specifying the rounding priority for the number.
  enum class RoundingPriority {
    kAuto,
    kMorePrecision,
    kLessPrecision,
  };

  enum class RoundingType {
    kFractionDigits,
    kSignificantDigits,
    kMorePrecision,
    kLessPrecision,
  };

164
  // ecma402/#sec-setnfdigitoptions
165 166 167 168 169 170
  struct NumberFormatDigitOptions {
    int minimum_integer_digits;
    int minimum_fraction_digits;
    int maximum_fraction_digits;
    int minimum_significant_digits;
    int maximum_significant_digits;
171 172
    RoundingPriority rounding_priority;
    RoundingType rounding_type;
173 174 175
  };
  V8_WARN_UNUSED_RESULT static Maybe<NumberFormatDigitOptions>
  SetNumberFormatDigitOptions(Isolate* isolate, Handle<JSReceiver> options,
Frank Tang's avatar
Frank Tang committed
176 177
                              int mnfd_default, int mxfd_default,
                              bool notation_is_compact);
178

179
  // Helper function to convert a UnicodeString to a Handle<String>
180 181 182 183 184 185 186 187
  V8_WARN_UNUSED_RESULT static MaybeHandle<String> ToString(
      Isolate* isolate, const icu::UnicodeString& string);

  // Helper function to convert a substring of UnicodeString to a Handle<String>
  V8_WARN_UNUSED_RESULT static MaybeHandle<String> ToString(
      Isolate* isolate, const icu::UnicodeString& string, int32_t begin,
      int32_t end);

188 189 190 191
  // Helper function to convert a FormattedValue to String
  V8_WARN_UNUSED_RESULT static MaybeHandle<String> FormattedToString(
      Isolate* isolate, const icu::FormattedValue& formatted);

192 193
  // Helper function to convert number field id to type string.
  static Handle<String> NumberFieldToType(Isolate* isolate,
194 195 196
                                          const NumberFormatSpan& part,
                                          const icu::UnicodeString& text,
                                          bool is_nan);
197

198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
  // A helper function to implement formatToParts which add element to array as
  // $array[$index] = { type: $field_type_string, value: $value }
  static void AddElement(Isolate* isolate, Handle<JSArray> array, int index,
                         Handle<String> field_type_string,
                         Handle<String> value);

  // A helper function to implement formatToParts which add element to array as
  // $array[$index] = {
  //   type: $field_type_string, value: $value,
  //   $additional_property_name: $additional_property_value
  // }
  static void AddElement(Isolate* isolate, Handle<JSArray> array, int index,
                         Handle<String> field_type_string, Handle<String> value,
                         Handle<String> additional_property_name,
                         Handle<String> additional_property_value);
213

214 215 216 217 218 219 220 221 222 223 224 225 226
  // In ECMA 402 v1, Intl constructors supported a mode of operation
  // where calling them with an existing object as a receiver would
  // transform the receiver into the relevant Intl instance with all
  // internal slots. In ECMA 402 v2, this capability was removed, to
  // avoid adding internal slots on existing objects. In ECMA 402 v3,
  // the capability was re-added as "normative optional" in a mode
  // which chains the underlying Intl instance on any object, when the
  // constructor is called
  //
  // See ecma402/#legacy-constructor.
  V8_WARN_UNUSED_RESULT static MaybeHandle<Object> LegacyUnwrapReceiver(
      Isolate* isolate, Handle<JSReceiver> receiver,
      Handle<JSFunction> constructor, bool has_initialized_slot);
227

228 229 230 231 232
  // enum for "localeMatcher" option: shared by many Intl objects.
  enum class MatcherOption { kBestFit, kLookup };

  // Shared function to read the "localeMatcher" option.
  V8_WARN_UNUSED_RESULT static Maybe<MatcherOption> GetLocaleMatcher(
233
      Isolate* isolate, Handle<JSReceiver> options, const char* method_name);
234

235 236
  // Shared function to read the "numberingSystem" option.
  V8_WARN_UNUSED_RESULT static Maybe<bool> GetNumberingSystem(
237
      Isolate* isolate, Handle<JSReceiver> options, const char* method_name,
238 239
      std::unique_ptr<char[]>* result);

240 241 242 243
  // Check the calendar is valid or not for that locale.
  static bool IsValidCalendar(const icu::Locale& locale,
                              const std::string& value);

244 245 246 247
  // Check the collation is valid or not for that locale.
  static bool IsValidCollation(const icu::Locale& locale,
                               const std::string& value);

248 249 250 251 252 253
  // Check the numberingSystem is valid.
  static bool IsValidNumberingSystem(const std::string& value);

  // Check the calendar is well formed.
  static bool IsWellFormedCalendar(const std::string& value);

254 255 256
  // Check the currency is well formed.
  static bool IsWellFormedCurrency(const std::string& value);

257 258 259 260 261 262
  struct ResolvedLocale {
    std::string locale;
    icu::Locale icu_locale;
    std::map<std::string, std::string> extensions;
  };

263
  static Maybe<ResolvedLocale> ResolveLocale(
264 265 266
      Isolate* isolate, const std::set<std::string>& available_locales,
      const std::vector<std::string>& requested_locales, MatcherOption options,
      const std::set<std::string>& relevant_extension_keys);
267

268 269 270 271 272 273 274
  // A helper template to implement the GetAvailableLocales
  // Usage in src/objects/js-XXX.cc
  // const std::set<std::string>& JSXxx::GetAvailableLocales() {
  //   static base::LazyInstance<Intl::AvailableLocales<icu::YYY>>::type
  //       available_locales = LAZY_INSTANCE_INITIALIZER;
  //   return available_locales.Pointer()->Get();
  // }
275 276 277 278 279 280

  struct SkipResourceCheck {
    static const char* key() { return nullptr; }
    static const char* path() { return nullptr; }
  };

281
  template <typename C = SkipResourceCheck>
282 283 284
  class AvailableLocales {
   public:
    AvailableLocales() {
285 286 287 288 289 290 291
      UErrorCode status = U_ZERO_ERROR;
      UEnumeration* uenum =
          uloc_openAvailableByType(ULOC_AVAILABLE_WITH_LEGACY_ALIASES, &status);
      DCHECK(U_SUCCESS(status));

      std::vector<std::string> all_locales;
      const char* loc;
292
      while ((loc = uenum_next(uenum, nullptr, &status)) != nullptr) {
293 294 295 296 297 298 299 300 301 302
        DCHECK(U_SUCCESS(status));
        std::string locstr(loc);
        std::replace(locstr.begin(), locstr.end(), '_', '-');
        // Handle special case
        if (locstr == "en-US-POSIX") locstr = "en-US-u-va-posix";
        all_locales.push_back(locstr);
      }
      uenum_close(uenum);

      set_ = Intl::BuildLocaleSet(all_locales, C::path(), C::key());
303
    }
304
    const std::set<std::string>& Get() const { return set_; }
305 306

   private:
307
    std::set<std::string> set_;
308 309
  };

310
  // Utility function to set text to BreakIterator.
311
  static Handle<Managed<icu::UnicodeString>> SetTextToBreakIterator(
312 313
      Isolate* isolate, Handle<String> text,
      icu::BreakIterator* break_iterator);
Frank Tang's avatar
Frank Tang committed
314

315 316 317
  // ecma262 #sec-string.prototype.normalize
  V8_WARN_UNUSED_RESULT static MaybeHandle<String> Normalize(
      Isolate* isolate, Handle<String> string, Handle<Object> form_input);
Frank Tang's avatar
Frank Tang committed
318 319 320 321
  static base::TimezoneCache* CreateTimeZoneCache();

  // Convert a Handle<String> to icu::UnicodeString
  static icu::UnicodeString ToICUUnicodeString(Isolate* isolate,
322 323
                                               Handle<String> string,
                                               int offset = 0);
Frank Tang's avatar
Frank Tang committed
324 325 326

  static const uint8_t* ToLatin1LowerTable();

327 328 329 330
  static const uint8_t* AsciiCollationWeightsL1();
  static const uint8_t* AsciiCollationWeightsL3();
  static const int kAsciiCollationWeightsLength;

331
  static String ConvertOneByteToLower(String src, String dst);
332

333
  static const std::set<std::string>& GetAvailableLocales();
334 335

  static const std::set<std::string>& GetAvailableLocalesForDateFormat();
336

337 338 339 340 341 342 343 344 345 346 347
  V8_WARN_UNUSED_RESULT static MaybeHandle<JSArray> ToJSArray(
      Isolate* isolate, const char* unicode_key,
      icu::StringEnumeration* enumeration,
      const std::function<bool(const char*)>& removes, bool sort);

  static bool RemoveCollation(const char* collation);

  static std::set<std::string> SanctionedSimpleUnits();

  V8_WARN_UNUSED_RESULT static MaybeHandle<JSArray> AvailableCalendars(
      Isolate* isolate);
348 349 350 351 352 353 354 355 356 357 358

  V8_WARN_UNUSED_RESULT static bool IsValidTimeZoneName(
      const icu::TimeZone& tz);
  V8_WARN_UNUSED_RESULT static bool IsValidTimeZoneName(Isolate* isolate,
                                                        const std::string& id);
  V8_WARN_UNUSED_RESULT static bool IsValidTimeZoneName(Isolate* isolate,
                                                        Handle<String> id);

  // Function to support Temporal
  V8_WARN_UNUSED_RESULT static std::string TimeZoneIdFromIndex(int32_t index);

359 360 361 362
  // Return the index of timezone which later could be used with
  // TimeZoneIdFromIndex. Returns -1 while the identifier is not a built-in
  // TimeZone name.
  static int32_t GetTimeZoneIndex(Isolate* isolate, Handle<String> identifier);
363

364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
  enum class Transition { kNext, kPrevious };

  // Functions to support Temporal

  V8_WARN_UNUSED_RESULT static Maybe<int64_t>
  GetTimeZoneOffsetTransitionMilliseconds(Isolate* isolate,
                                          int32_t time_zone_index,
                                          int64_t time_ms,
                                          Transition transition);

  static Handle<String> DefaultTimeZone(Isolate* isolate);

  V8_WARN_UNUSED_RESULT static Maybe<int64_t> GetTimeZoneOffsetMilliseconds(
      Isolate* isolate, int32_t time_zone_index, int64_t millisecond);

  // This function may return the result, the std::vector<int64_t> in one of
  // the following three condictions:
  // 1. While time_in_millisecond fall into the daylight saving time change
  // moment that skipped one (or two or even six, in some Time Zone) hours
  // later in local time:
  //    [],
  // 2. In other moment not during daylight saving time change:
  //    [offset_former], and
  // 3. when time_in_millisecond fall into they daylight saving time change hour
  // which the clock time roll back one (or two or six, in some Time Zone) hour:
  //    [offset_former, offset_later]
  static std::vector<int64_t> GetTimeZonePossibleOffsetMilliseconds(
      Isolate* isolate, int32_t time_zone_index, int64_t time_ms);

393 394
  V8_WARN_UNUSED_RESULT static MaybeHandle<String> CanonicalizeTimeZoneName(
      Isolate* isolate, Handle<String> identifier);
395 396 397 398

  // ecma402/#sec-coerceoptionstoobject
  V8_WARN_UNUSED_RESULT static MaybeHandle<JSReceiver> CoerceOptionsToObject(
      Isolate* isolate, Handle<Object> options, const char* service);
399 400
};

401 402
}  // namespace internal
}  // namespace v8
403

404
#endif  // V8_OBJECTS_INTL_OBJECTS_H_