js-break-iterator.cc 9.01 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif  // V8_INTL_SUPPORT

#include "src/objects/js-break-iterator.h"

#include "src/objects/intl-objects.h"
#include "src/objects/js-break-iterator-inl.h"
#include "unicode/brkiter.h"

namespace v8 {
namespace internal {

18 19 20 21
namespace {
enum class Type { CHARACTER, WORD, SENTENCE, LINE };
}  // anonymous namespace

22 23
MaybeHandle<JSV8BreakIterator> JSV8BreakIterator::New(
    Isolate* isolate, Handle<Map> map, Handle<Object> locales,
24
    Handle<Object> options_obj, const char* service) {
25 26
  Factory* factory = isolate->factory();

27 28 29 30 31 32 33
  // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
  Maybe<std::vector<std::string>> maybe_requested_locales =
      Intl::CanonicalizeLocaleList(isolate, locales);
  MAYBE_RETURN(maybe_requested_locales, MaybeHandle<JSV8BreakIterator>());
  std::vector<std::string> requested_locales =
      maybe_requested_locales.FromJust();

34 35 36 37
  Handle<JSReceiver> options;
  if (options_obj->IsUndefined(isolate)) {
    options = factory->NewJSObjectWithNullProto();
  } else {
38 39 40
    ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
                               Object::ToObject(isolate, options_obj, service),
                               JSV8BreakIterator);
41 42 43
  }

  // Extract locale string
44
  Maybe<Intl::MatcherOption> maybe_locale_matcher =
45
      Intl::GetLocaleMatcher(isolate, options, service);
46 47 48
  MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSV8BreakIterator>());
  Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();

49
  Maybe<Intl::ResolvedLocale> maybe_resolve_locale =
50 51
      Intl::ResolveLocale(isolate, JSV8BreakIterator::GetAvailableLocales(),
                          requested_locales, matcher, {});
52 53 54 55 56
  if (maybe_resolve_locale.IsNothing()) {
    THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
                    JSV8BreakIterator);
  }
  Intl::ResolvedLocale r = maybe_resolve_locale.FromJust();
57 58

  // Extract type from options
59
  Maybe<Type> maybe_type = Intl::GetStringOption<Type>(
60
      isolate, options, "type", service,
61 62 63 64
      {"word", "character", "sentence", "line"},
      {Type::WORD, Type::CHARACTER, Type::SENTENCE, Type::LINE}, Type::WORD);
  MAYBE_RETURN(maybe_type, MaybeHandle<JSV8BreakIterator>());
  Type type_enum = maybe_type.FromJust();
65

66
  icu::Locale icu_locale = r.icu_locale;
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
  DCHECK(!icu_locale.isBogus());

  // Construct break_iterator using icu_locale and type
  UErrorCode status = U_ZERO_ERROR;
  std::unique_ptr<icu::BreakIterator> break_iterator = nullptr;
  switch (type_enum) {
    case Type::CHARACTER:
      break_iterator.reset(
          icu::BreakIterator::createCharacterInstance(icu_locale, status));
      break;
    case Type::SENTENCE:
      break_iterator.reset(
          icu::BreakIterator::createSentenceInstance(icu_locale, status));
      break;
    case Type::LINE:
82 83
      isolate->CountUsage(
          v8::Isolate::UseCounterFeature::kBreakIteratorTypeLine);
84 85 86 87
      break_iterator.reset(
          icu::BreakIterator::createLineInstance(icu_locale, status));
      break;
    default:
88 89
      isolate->CountUsage(
          v8::Isolate::UseCounterFeature::kBreakIteratorTypeWord);
90 91 92 93 94 95
      break_iterator.reset(
          icu::BreakIterator::createWordInstance(icu_locale, status));
      break;
  }

  // Error handling for break_iterator
96 97 98
  if (U_FAILURE(status) || break_iterator.get() == nullptr) {
    THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
                    JSV8BreakIterator);
99 100 101 102 103 104 105 106 107 108
  }
  isolate->CountUsage(v8::Isolate::UseCounterFeature::kBreakIterator);

  // Construct managed objects from pointers
  Handle<Managed<icu::BreakIterator>> managed_break_iterator =
      Managed<icu::BreakIterator>::FromUniquePtr(isolate, 0,
                                                 std::move(break_iterator));
  Handle<Managed<icu::UnicodeString>> managed_unicode_string =
      Managed<icu::UnicodeString>::FromRawPtr(isolate, 0, nullptr);

109 110 111
  Handle<String> locale_str =
      isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());

112 113 114 115 116 117
  // Now all properties are ready, so we can allocate the result object.
  Handle<JSV8BreakIterator> break_iterator_holder =
      Handle<JSV8BreakIterator>::cast(
          isolate->factory()->NewFastOrSlowJSObjectFromMap(map));
  DisallowHeapAllocation no_gc;
  break_iterator_holder->set_locale(*locale_str);
118 119 120 121 122 123 124
  break_iterator_holder->set_break_iterator(*managed_break_iterator);
  break_iterator_holder->set_unicode_string(*managed_unicode_string);

  // Return break_iterator_holder
  return break_iterator_holder;
}

125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
namespace {

Type GetType(icu::BreakIterator* break_iterator) {
  // Since the developer calling the Intl.v8BreakIterator already know the type,
  // we usually do not need to know the type unless the resolvedOptions() is
  // called, we use the following trick to figure out the type instead of
  // storing it with the JSV8BreakIterator object to save memory.
  // This routine is not fast but should be seldomly used only.

  // We need to clone a copy of break iteator because we need to setText to it.
  std::unique_ptr<icu::BreakIterator> cloned_break_iterator(
      break_iterator->clone());
  // Use a magic string "He is." to call next().
  //  character type: will return 1 for "H"
  //  word type: will return 2 for "He"
  //  line type: will return 3 for "He "
  //  sentence type: will return 6 for "He is."
  icu::UnicodeString data("He is.");
  cloned_break_iterator->setText(data);
  switch (cloned_break_iterator->next()) {
    case 1:  // After "H"
      return Type::CHARACTER;
    case 2:  // After "He"
      return Type::WORD;
    case 3:  // After "He "
      return Type::LINE;
    case 6:  // After "He is."
      return Type::SENTENCE;
    default:
      UNREACHABLE();
  }
}

Handle<String> TypeAsString(Isolate* isolate, Type type) {
  switch (type) {
    case Type::CHARACTER:
      return ReadOnlyRoots(isolate).character_string_handle();
    case Type::WORD:
      return ReadOnlyRoots(isolate).word_string_handle();
    case Type::SENTENCE:
      return ReadOnlyRoots(isolate).sentence_string_handle();
    case Type::LINE:
      return ReadOnlyRoots(isolate).line_string_handle();
  }
  UNREACHABLE();
}

}  // anonymous namespace

174 175 176 177
Handle<JSObject> JSV8BreakIterator::ResolvedOptions(
    Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) {
  Factory* factory = isolate->factory();

178 179
  Type type = GetType(break_iterator->break_iterator().raw());

180 181 182 183 184 185
  Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
  Handle<String> locale(break_iterator->locale(), isolate);

  JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
                        NONE);
  JSObject::AddProperty(isolate, result, factory->type_string(),
186
                        TypeAsString(isolate, type), NONE);
187 188 189 190 191 192 193
  return result;
}

void JSV8BreakIterator::AdoptText(
    Isolate* isolate, Handle<JSV8BreakIterator> break_iterator_holder,
    Handle<String> text) {
  icu::BreakIterator* break_iterator =
194
      break_iterator_holder->break_iterator().raw();
195
  CHECK_NOT_NULL(break_iterator);
196
  Handle<Managed<icu::UnicodeString>> unicode_string =
197
      Intl::SetTextToBreakIterator(isolate, text, break_iterator);
198
  break_iterator_holder->set_unicode_string(*unicode_string);
199 200
}

201 202 203
Handle<Object> JSV8BreakIterator::Current(
    Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) {
  return isolate->factory()->NewNumberFromInt(
204
      break_iterator->break_iterator().raw()->current());
205 206 207 208 209
}

Handle<Object> JSV8BreakIterator::First(
    Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) {
  return isolate->factory()->NewNumberFromInt(
210
      break_iterator->break_iterator().raw()->first());
211 212 213 214 215
}

Handle<Object> JSV8BreakIterator::Next(
    Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) {
  return isolate->factory()->NewNumberFromInt(
216
      break_iterator->break_iterator().raw()->next());
217 218
}

219 220
String JSV8BreakIterator::BreakType(Isolate* isolate,
                                    Handle<JSV8BreakIterator> break_iterator) {
221
  int32_t status = break_iterator->break_iterator().raw()->getRuleStatus();
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
  // Keep return values in sync with JavaScript BreakType enum.
  if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) {
    return ReadOnlyRoots(isolate).none_string();
  }
  if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) {
    return ReadOnlyRoots(isolate).number_string();
  }
  if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) {
    return ReadOnlyRoots(isolate).letter_string();
  }
  if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) {
    return ReadOnlyRoots(isolate).kana_string();
  }
  if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) {
    return ReadOnlyRoots(isolate).ideo_string();
  }
  return ReadOnlyRoots(isolate).unknown_string();
}

241
const std::set<std::string>& JSV8BreakIterator::GetAvailableLocales() {
242
  return Intl::GetAvailableLocales();
243 244
}

245 246
}  // namespace internal
}  // namespace v8