// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif  // V8_INTL_SUPPORT

#include "src/objects/js-collator.h"

#include "src/execution/isolate.h"
#include "src/objects/js-collator-inl.h"
#include "src/objects/js-locale.h"
#include "src/objects/objects-inl.h"
#include "unicode/coll.h"
#include "unicode/locid.h"
#include "unicode/strenum.h"
#include "unicode/ucol.h"
#include "unicode/udata.h"
#include "unicode/uloc.h"
#include "unicode/utypes.h"

namespace v8 {
namespace internal {

namespace {

enum class Usage {
  SORT,
  SEARCH,
};

enum class Sensitivity {
  kBase,
  kAccent,
  kCase,
  kVariant,
  kUndefined,
};

// enum for "caseFirst" option.
enum class CaseFirst { kUndefined, kUpper, kLower, kFalse };

Maybe<CaseFirst> GetCaseFirst(Isolate* isolate, Handle<JSReceiver> options,
                              const char* method) {
  return Intl::GetStringOption<CaseFirst>(
      isolate, options, "caseFirst", method, {"upper", "lower", "false"},
      {CaseFirst::kUpper, CaseFirst::kLower, CaseFirst::kFalse},
      CaseFirst::kUndefined);
}

// TODO(gsathya): Consider internalizing the value strings.
void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options,
                                  Handle<String> key, const char* value) {
  DCHECK_NOT_NULL(value);
  Handle<String> value_str =
      isolate->factory()->NewStringFromAsciiChecked(value);

  // This is a brand new JSObject that shouldn't already have the same
  // key so this shouldn't fail.
  Maybe<bool> maybe = JSReceiver::CreateDataProperty(
      isolate, options, key, value_str, Just(kDontThrow));
  DCHECK(maybe.FromJust());
  USE(maybe);
}

void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options,
                                  Handle<String> key, bool value) {
  Handle<Object> value_obj = isolate->factory()->ToBoolean(value);

  // This is a brand new JSObject that shouldn't already have the same
  // key so this shouldn't fail.
  Maybe<bool> maybe = JSReceiver::CreateDataProperty(
      isolate, options, key, value_obj, Just(kDontThrow));
  DCHECK(maybe.FromJust());
  USE(maybe);
}

}  // anonymous namespace

// static
Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
                                             Handle<JSCollator> collator) {
  Handle<JSObject> options =
      isolate->factory()->NewJSObject(isolate->object_function());

  icu::Collator* icu_collator = collator->icu_collator().raw();
  DCHECK_NOT_NULL(icu_collator);

  UErrorCode status = U_ZERO_ERROR;
  bool numeric =
      icu_collator->getAttribute(UCOL_NUMERIC_COLLATION, status) == UCOL_ON;
  DCHECK(U_SUCCESS(status));

  const char* case_first = nullptr;
  status = U_ZERO_ERROR;
  switch (icu_collator->getAttribute(UCOL_CASE_FIRST, status)) {
    case UCOL_LOWER_FIRST:
      case_first = "lower";
      break;
    case UCOL_UPPER_FIRST:
      case_first = "upper";
      break;
    default:
      case_first = "false";
  }
  DCHECK(U_SUCCESS(status));

  const char* sensitivity = nullptr;
  status = U_ZERO_ERROR;
  switch (icu_collator->getAttribute(UCOL_STRENGTH, status)) {
    case UCOL_PRIMARY: {
      DCHECK(U_SUCCESS(status));
      status = U_ZERO_ERROR;
      // case level: true + s1 -> case, s1 -> base.
      if (UCOL_ON == icu_collator->getAttribute(UCOL_CASE_LEVEL, status)) {
        sensitivity = "case";
      } else {
        sensitivity = "base";
      }
      DCHECK(U_SUCCESS(status));
      break;
    }
    case UCOL_SECONDARY:
      sensitivity = "accent";
      break;
    case UCOL_TERTIARY:
      sensitivity = "variant";
      break;
    case UCOL_QUATERNARY:
      // We shouldn't get quaternary and identical from ICU, but if we do
      // put them into variant.
      sensitivity = "variant";
      break;
    default:
      sensitivity = "variant";
  }
  DCHECK(U_SUCCESS(status));

  status = U_ZERO_ERROR;
  bool ignore_punctuation = icu_collator->getAttribute(UCOL_ALTERNATE_HANDLING,
                                                       status) == UCOL_SHIFTED;
  DCHECK(U_SUCCESS(status));

  status = U_ZERO_ERROR;

  icu::Locale icu_locale(icu_collator->getLocale(ULOC_VALID_LOCALE, status));
  DCHECK(U_SUCCESS(status));

  const char* collation = "default";
  const char* usage = "sort";
  const char* collation_key = "co";
  status = U_ZERO_ERROR;
  std::string collation_value =
      icu_locale.getUnicodeKeywordValue<std::string>(collation_key, status);

  std::string locale;
  if (U_SUCCESS(status)) {
    if (collation_value == "search") {
      usage = "search";

      // Search is disallowed as a collation value per spec. Let's
      // use `default`, instead.
      //
      // https://tc39.github.io/ecma402/#sec-properties-of-intl-collator-instances
      collation = "default";

      // We clone the icu::Locale because we don't want the
      // icu_collator to be affected when we remove the collation key
      // below.
      icu::Locale new_icu_locale = icu_locale;

      // The spec forbids the search as a collation value in the
      // locale tag, so let's filter it out.
      status = U_ZERO_ERROR;
      new_icu_locale.setUnicodeKeywordValue(collation_key, nullptr, status);
      DCHECK(U_SUCCESS(status));

      locale = Intl::ToLanguageTag(new_icu_locale).FromJust();
    } else {
      collation = collation_value.c_str();
      locale = Intl::ToLanguageTag(icu_locale).FromJust();
    }
  } else {
    locale = Intl::ToLanguageTag(icu_locale).FromJust();
  }

  // 5. For each row of Table 2, except the header row, in table order, do
  //    ...
  // Table 2: Resolved Options of Collator Instances
  //  Internal Slot            Property               Extension Key
  //    [[Locale]                "locale"
  //    [[Usage]                 "usage"
  //    [[Sensitivity]]          "sensitivity"
  //    [[IgnorePunctuation]]    "ignorePunctuation"
  //    [[Collation]]            "collation"
  //    [[Numeric]]              "numeric"              kn
  //    [[CaseFirst]]            "caseFirst"            kf

  // If the collator return the locale differ from what got requested, we stored
  // it in the collator->locale. Otherwise, we just use the one from the
  // collator.
  if (collator->locale().length() != 0) {
    // Get the locale from collator->locale() since we know in some cases
    // collator won't be able to return the requested one, such as zh_CN.
    Handle<String> locale_from_collator(collator->locale(), isolate);
    Maybe<bool> maybe = JSReceiver::CreateDataProperty(
        isolate, options, isolate->factory()->locale_string(),
        locale_from_collator, Just(kDontThrow));
    DCHECK(maybe.FromJust());
    USE(maybe);
  } else {
    // Just return from the collator for most of the cases that we can recover
    // from the collator.
    CreateDataPropertyForOptions(
        isolate, options, isolate->factory()->locale_string(), locale.c_str());
  }

  CreateDataPropertyForOptions(isolate, options,
                               isolate->factory()->usage_string(), usage);
  CreateDataPropertyForOptions(
      isolate, options, isolate->factory()->sensitivity_string(), sensitivity);
  CreateDataPropertyForOptions(isolate, options,
                               isolate->factory()->ignorePunctuation_string(),
                               ignore_punctuation);
  CreateDataPropertyForOptions(
      isolate, options, isolate->factory()->collation_string(), collation);
  CreateDataPropertyForOptions(isolate, options,
                               isolate->factory()->numeric_string(), numeric);
  CreateDataPropertyForOptions(
      isolate, options, isolate->factory()->caseFirst_string(), case_first);
  return options;
}

namespace {

CaseFirst ToCaseFirst(const char* str) {
  if (strcmp(str, "upper") == 0) return CaseFirst::kUpper;
  if (strcmp(str, "lower") == 0) return CaseFirst::kLower;
  if (strcmp(str, "false") == 0) return CaseFirst::kFalse;
  return CaseFirst::kUndefined;
}

UColAttributeValue ToUColAttributeValue(CaseFirst case_first) {
  switch (case_first) {
    case CaseFirst::kUpper:
      return UCOL_UPPER_FIRST;
    case CaseFirst::kLower:
      return UCOL_LOWER_FIRST;
    case CaseFirst::kFalse:
    case CaseFirst::kUndefined:
      return UCOL_OFF;
  }
}

void SetNumericOption(icu::Collator* icu_collator, bool numeric) {
  DCHECK_NOT_NULL(icu_collator);
  UErrorCode status = U_ZERO_ERROR;
  icu_collator->setAttribute(UCOL_NUMERIC_COLLATION,
                             numeric ? UCOL_ON : UCOL_OFF, status);
  DCHECK(U_SUCCESS(status));
}

void SetCaseFirstOption(icu::Collator* icu_collator, CaseFirst case_first) {
  DCHECK_NOT_NULL(icu_collator);
  UErrorCode status = U_ZERO_ERROR;
  icu_collator->setAttribute(UCOL_CASE_FIRST, ToUColAttributeValue(case_first),
                             status);
  DCHECK(U_SUCCESS(status));
}

}  // anonymous namespace

// static
MaybeHandle<JSCollator> JSCollator::New(Isolate* isolate, Handle<Map> map,
                                        Handle<Object> locales,
                                        Handle<Object> options_obj,
                                        const char* service) {
  // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
  Maybe<std::vector<std::string>> maybe_requested_locales =
      Intl::CanonicalizeLocaleList(isolate, locales);
  MAYBE_RETURN(maybe_requested_locales, Handle<JSCollator>());
  std::vector<std::string> requested_locales =
      maybe_requested_locales.FromJust();

  // 2. If options is undefined, then
  if (options_obj->IsUndefined(isolate)) {
    // 2. a. Let options be ObjectCreate(null).
    options_obj = isolate->factory()->NewJSObjectWithNullProto();
  } else {
    // 3. Else
    // 3. a. Let options be ? ToObject(options).
    ASSIGN_RETURN_ON_EXCEPTION(isolate, options_obj,
                               Object::ToObject(isolate, options_obj, service),
                               JSCollator);
  }

  // At this point, options_obj can either be a JSObject or a JSProxy only.
  Handle<JSReceiver> options = Handle<JSReceiver>::cast(options_obj);

  // 4. Let usage be ? GetOption(options, "usage", "string", « "sort",
  // "search" », "sort").
  Maybe<Usage> maybe_usage = Intl::GetStringOption<Usage>(
      isolate, options, "usage", service, {"sort", "search"},
      {Usage::SORT, Usage::SEARCH}, Usage::SORT);
  MAYBE_RETURN(maybe_usage, MaybeHandle<JSCollator>());
  Usage usage = maybe_usage.FromJust();

  // 9. Let matcher be ? GetOption(options, "localeMatcher", "string",
  // « "lookup", "best fit" », "best fit").
  // 10. Set opt.[[localeMatcher]] to matcher.
  Maybe<Intl::MatcherOption> maybe_locale_matcher =
      Intl::GetLocaleMatcher(isolate, options, service);
  MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSCollator>());
  Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();

  // x. Let _collation_ be ? GetOption(_options_, *"collation"*, *"string"*,
  // *undefined*, *undefined*).
  std::unique_ptr<char[]> collation_str = nullptr;
  const std::vector<const char*> empty_values = {};
  Maybe<bool> maybe_collation = Intl::GetStringOption(
      isolate, options, "collation", empty_values, service, &collation_str);
  MAYBE_RETURN(maybe_collation, MaybeHandle<JSCollator>());
  // x. If _collation_ is not *undefined*, then
  if (maybe_collation.FromJust() && collation_str != nullptr) {
    // 1. If _collation_ does not match the Unicode Locale Identifier `type`
    // nonterminal, throw a *RangeError* exception.
    if (!JSLocale::Is38AlphaNumList(collation_str.get())) {
      THROW_NEW_ERROR_RETURN_VALUE(
          isolate,
          NewRangeError(MessageTemplate::kInvalid,
                        isolate->factory()->collation_string(),
                        isolate->factory()->NewStringFromAsciiChecked(
                            collation_str.get())),
          MaybeHandle<JSCollator>());
    }
  }
  // x. Set _opt_.[[co]] to _collation_.

  // 11. Let numeric be ? GetOption(options, "numeric", "boolean",
  // undefined, undefined).
  // 12. If numeric is not undefined, then
  //    a. Let numeric be ! ToString(numeric).
  //
  // Note: We omit the ToString(numeric) operation as it's not
  // observable. Intl::GetBoolOption returns a Boolean and
  // ToString(Boolean) is not side-effecting.
  //
  // 13. Set opt.[[kn]] to numeric.
  bool numeric;
  Maybe<bool> found_numeric =
      Intl::GetBoolOption(isolate, options, "numeric", service, &numeric);
  MAYBE_RETURN(found_numeric, MaybeHandle<JSCollator>());

  // 14. Let caseFirst be ? GetOption(options, "caseFirst", "string",
  //     « "upper", "lower", "false" », undefined).
  Maybe<CaseFirst> maybe_case_first = GetCaseFirst(isolate, options, service);
  MAYBE_RETURN(maybe_case_first, MaybeHandle<JSCollator>());
  CaseFirst case_first = maybe_case_first.FromJust();

  // The relevant unicode extensions accepted by Collator as specified here:
  // https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots
  //
  // 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
  std::set<std::string> relevant_extension_keys{"co", "kn", "kf"};

  // 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]],
  // requestedLocales, opt, %Collator%.[[RelevantExtensionKeys]],
  // localeData).
  Maybe<Intl::ResolvedLocale> maybe_resolve_locale =
      Intl::ResolveLocale(isolate, JSCollator::GetAvailableLocales(),
                          requested_locales, matcher, relevant_extension_keys);
  if (maybe_resolve_locale.IsNothing()) {
    THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
                    JSCollator);
  }
  Intl::ResolvedLocale r = maybe_resolve_locale.FromJust();

  // 18. Set collator.[[Locale]] to r.[[locale]].
  icu::Locale icu_locale = r.icu_locale;
  DCHECK(!icu_locale.isBogus());

  // 19. Let collation be r.[[co]].
  UErrorCode status = U_ZERO_ERROR;
  if (collation_str != nullptr) {
    auto co_extension_it = r.extensions.find("co");
    if (co_extension_it != r.extensions.end() &&
        co_extension_it->second != collation_str.get()) {
      icu_locale.setUnicodeKeywordValue("co", nullptr, status);
      DCHECK(U_SUCCESS(status));
    }
  }

  // 5. Set collator.[[Usage]] to usage.
  //
  // 6. If usage is "sort", then
  //    a. Let localeData be %Collator%.[[SortLocaleData]].
  // 7. Else,
  //    a. Let localeData be %Collator%.[[SearchLocaleData]].
  //
  // The Intl spec doesn't allow us to use "search" as an extension
  // value for collation as per:
  // https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots
  //
  // But the only way to pass the value "search" for collation from
  // the options object to ICU is to use the 'co' extension keyword.
  //
  // This will need to be filtered out when creating the
  // resolvedOptions object.
  if (usage == Usage::SEARCH) {
    UErrorCode status = U_ZERO_ERROR;
    icu_locale.setUnicodeKeywordValue("co", "search", status);
    DCHECK(U_SUCCESS(status));
  } else {
    if (collation_str != nullptr &&
        Intl::IsValidCollation(icu_locale, collation_str.get())) {
      icu_locale.setUnicodeKeywordValue("co", collation_str.get(), status);
      DCHECK(U_SUCCESS(status));
    }
  }

  // 20. If collation is null, let collation be "default".
  // 21. Set collator.[[Collation]] to collation.
  //
  // We don't store the collation value as per the above two steps
  // here. The collation value can be looked up from icu::Collator on
  // demand, as part of Intl.Collator.prototype.resolvedOptions.

  std::unique_ptr<icu::Collator> icu_collator(
      icu::Collator::createInstance(icu_locale, status));
  if (U_FAILURE(status) || icu_collator.get() == nullptr) {
    status = U_ZERO_ERROR;
    // Remove extensions and try again.
    icu::Locale no_extension_locale(icu_locale.getBaseName());
    icu_collator.reset(
        icu::Collator::createInstance(no_extension_locale, status));

    if (U_FAILURE(status) || icu_collator.get() == nullptr) {
      THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
                      JSCollator);
    }
  }
  DCHECK(U_SUCCESS(status));

  icu::Locale collator_locale(
      icu_collator->getLocale(ULOC_VALID_LOCALE, status));

  // 22. If relevantExtensionKeys contains "kn", then
  //     a. Set collator.[[Numeric]] to ! SameValue(r.[[kn]], "true").
  //
  // If the numeric value is passed in through the options object,
  // then we use it. Otherwise, we check if the numeric value is
  // passed in through the unicode extensions.
  status = U_ZERO_ERROR;
  if (found_numeric.FromJust()) {
    SetNumericOption(icu_collator.get(), numeric);
  } else {
    auto kn_extension_it = r.extensions.find("kn");
    if (kn_extension_it != r.extensions.end()) {
      SetNumericOption(icu_collator.get(), (kn_extension_it->second == "true"));
    }
  }

  // 23. If relevantExtensionKeys contains "kf", then
  //     a. Set collator.[[CaseFirst]] to r.[[kf]].
  //
  // If the caseFirst value is passed in through the options object,
  // then we use it. Otherwise, we check if the caseFirst value is
  // passed in through the unicode extensions.
  if (case_first != CaseFirst::kUndefined) {
    SetCaseFirstOption(icu_collator.get(), case_first);
  } else {
    auto kf_extension_it = r.extensions.find("kf");
    if (kf_extension_it != r.extensions.end()) {
      SetCaseFirstOption(icu_collator.get(),
                         ToCaseFirst(kf_extension_it->second.c_str()));
    }
  }

  // Normalization is always on, by the spec. We are free to optimize
  // if the strings are already normalized (but we don't have a way to tell
  // that right now).
  status = U_ZERO_ERROR;
  icu_collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
  DCHECK(U_SUCCESS(status));

  // 24. Let sensitivity be ? GetOption(options, "sensitivity",
  // "string", « "base", "accent", "case", "variant" », undefined).
  Maybe<Sensitivity> maybe_sensitivity = Intl::GetStringOption<Sensitivity>(
      isolate, options, "sensitivity", service,
      {"base", "accent", "case", "variant"},
      {Sensitivity::kBase, Sensitivity::kAccent, Sensitivity::kCase,
       Sensitivity::kVariant},
      Sensitivity::kUndefined);
  MAYBE_RETURN(maybe_sensitivity, MaybeHandle<JSCollator>());
  Sensitivity sensitivity = maybe_sensitivity.FromJust();

  // 25. If sensitivity is undefined, then
  if (sensitivity == Sensitivity::kUndefined) {
    // 25. a. If usage is "sort", then
    if (usage == Usage::SORT) {
      // 25. a. i. Let sensitivity be "variant".
      sensitivity = Sensitivity::kVariant;
    }
  }
  // 26. Set collator.[[Sensitivity]] to sensitivity.
  switch (sensitivity) {
    case Sensitivity::kBase:
      icu_collator->setStrength(icu::Collator::PRIMARY);
      break;
    case Sensitivity::kAccent:
      icu_collator->setStrength(icu::Collator::SECONDARY);
      break;
    case Sensitivity::kCase:
      icu_collator->setStrength(icu::Collator::PRIMARY);
      status = U_ZERO_ERROR;
      icu_collator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
      DCHECK(U_SUCCESS(status));
      break;
    case Sensitivity::kVariant:
      icu_collator->setStrength(icu::Collator::TERTIARY);
      break;
    case Sensitivity::kUndefined:
      break;
  }

  // 27.Let ignorePunctuation be ? GetOption(options,
  // "ignorePunctuation", "boolean", undefined, false).
  bool ignore_punctuation;
  Maybe<bool> found_ignore_punctuation = Intl::GetBoolOption(
      isolate, options, "ignorePunctuation", service, &ignore_punctuation);
  MAYBE_RETURN(found_ignore_punctuation, MaybeHandle<JSCollator>());

  // 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
  if (found_ignore_punctuation.FromJust() && ignore_punctuation) {
    status = U_ZERO_ERROR;
    icu_collator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
    DCHECK(U_SUCCESS(status));
  }

  Handle<Managed<icu::Collator>> managed_collator =
      Managed<icu::Collator>::FromUniquePtr(isolate, 0,
                                            std::move(icu_collator));

  // We only need to do so if it is different from the collator would return.
  Handle<String> locale_str = isolate->factory()->NewStringFromAsciiChecked(
      (collator_locale != icu_locale) ? r.locale.c_str() : "");
  // Now all properties are ready, so we can allocate the result object.
  Handle<JSCollator> collator = Handle<JSCollator>::cast(
      isolate->factory()->NewFastOrSlowJSObjectFromMap(map));
  DisallowHeapAllocation no_gc;
  collator->set_icu_collator(*managed_collator);
  collator->set_locale(*locale_str);

  // 29. Return collator.
  return collator;
}

namespace {

class CollatorAvailableLocales {
 public:
  CollatorAvailableLocales() {
    int32_t num_locales = 0;
    const icu::Locale* icu_available_locales =
        icu::Collator::getAvailableLocales(num_locales);
    std::vector<std::string> locales;
    for (int32_t i = 0; i < num_locales; ++i) {
      locales.push_back(
          Intl::ToLanguageTag(icu_available_locales[i]).FromJust());
    }
#define U_ICUDATA_COLL U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll"
    set_ = Intl::BuildLocaleSet(locales, U_ICUDATA_COLL, nullptr);
#undef U_ICUDATA_COLL
  }
  virtual ~CollatorAvailableLocales() = default;
  const std::set<std::string>& Get() const { return set_; }

 private:
  std::set<std::string> set_;
};

}  // namespace

const std::set<std::string>& JSCollator::GetAvailableLocales() {
  static base::LazyInstance<CollatorAvailableLocales>::type available_locales =
      LAZY_INSTANCE_INITIALIZER;
  return available_locales.Pointer()->Get();
}

}  // namespace internal
}  // namespace v8