Commit 1b4436e7 authored by Frank Tang's avatar Frank Tang Committed by Commit Bot

[Intl] Move code from builtins-intl.cc

Move Normalize into intl-objects.*
Move V8BreakIterator code to js-break-iterator*
Add heap-symbol for breakType of JSBreakIterator

Bug: v8:5751
Cq-Include-Trybots: luci.v8.try:v8_linux_noi18n_rel_ng
Change-Id: Id25af28770ae3c0b7716f4e3b602d4b040194a7d
Reviewed-on: https://chromium-review.googlesource.com/c/1293110
Commit-Queue: Frank Tang <ftang@chromium.org>
Reviewed-by: 's avatarSathya Gunasekaran <gsathya@chromium.org>
Cr-Commit-Position: refs/heads/master@{#56875}
parent f03b3297
This diff is collapsed.
...@@ -29,9 +29,12 @@ ...@@ -29,9 +29,12 @@
V(_, collation_string, "collation") \ V(_, collation_string, "collation") \
V(_, currency_string, "currency") \ V(_, currency_string, "currency") \
V(_, currencyDisplay_string, "currencyDisplay") \ V(_, currencyDisplay_string, "currencyDisplay") \
V(_, ideo_string, "ideo") \
V(_, ignorePunctuation_string, "ignorePunctuation") \ V(_, ignorePunctuation_string, "ignorePunctuation") \
V(_, Invalid_Date_string, "Invalid Date") \ V(_, Invalid_Date_string, "Invalid Date") \
V(_, integer_string, "integer") \ V(_, integer_string, "integer") \
V(_, kana_string, "kana") \
V(_, letter_string, "letter") \
V(_, lineBreakStyle_string, "lineBreakStyle") \ V(_, lineBreakStyle_string, "lineBreakStyle") \
V(_, literal_string, "literal") \ V(_, literal_string, "literal") \
V(_, locale_string, "locale") \ V(_, locale_string, "locale") \
...@@ -66,6 +69,7 @@ ...@@ -66,6 +69,7 @@
V(_, timeZone_string, "timeZone") \ V(_, timeZone_string, "timeZone") \
V(_, timeZoneName_string, "timeZoneName") \ V(_, timeZoneName_string, "timeZoneName") \
V(_, type_string, "type") \ V(_, type_string, "type") \
V(_, unknown_string, "unknown") \
V(_, upper_string, "upper") \ V(_, upper_string, "upper") \
V(_, usage_string, "usage") \ V(_, usage_string, "usage") \
V(_, useGrouping_string, "useGrouping") \ V(_, useGrouping_string, "useGrouping") \
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "unicode/coll.h" #include "unicode/coll.h"
#include "unicode/decimfmt.h" #include "unicode/decimfmt.h"
#include "unicode/locid.h" #include "unicode/locid.h"
#include "unicode/normalizer2.h"
#include "unicode/numfmt.h" #include "unicode/numfmt.h"
#include "unicode/numsys.h" #include "unicode/numsys.h"
#include "unicode/regex.h" #include "unicode/regex.h"
...@@ -1856,6 +1857,74 @@ Managed<icu::UnicodeString>* Intl::SetTextToBreakIterator( ...@@ -1856,6 +1857,74 @@ Managed<icu::UnicodeString>* Intl::SetTextToBreakIterator(
return *new_u_text; return *new_u_text;
} }
// ecma262 #sec-string.prototype.normalize
MaybeHandle<String> Intl::Normalize(Isolate* isolate, Handle<String> string,
Handle<Object> form_input) {
const char* form_name;
UNormalization2Mode form_mode;
if (form_input->IsUndefined(isolate)) {
// default is FNC
form_name = "nfc";
form_mode = UNORM2_COMPOSE;
} else {
Handle<String> form;
ASSIGN_RETURN_ON_EXCEPTION(isolate, form,
Object::ToString(isolate, form_input), String);
if (String::Equals(isolate, form, isolate->factory()->NFC_string())) {
form_name = "nfc";
form_mode = UNORM2_COMPOSE;
} else if (String::Equals(isolate, form,
isolate->factory()->NFD_string())) {
form_name = "nfc";
form_mode = UNORM2_DECOMPOSE;
} else if (String::Equals(isolate, form,
isolate->factory()->NFKC_string())) {
form_name = "nfkc";
form_mode = UNORM2_COMPOSE;
} else if (String::Equals(isolate, form,
isolate->factory()->NFKD_string())) {
form_name = "nfkc";
form_mode = UNORM2_DECOMPOSE;
} else {
Handle<String> valid_forms =
isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
THROW_NEW_ERROR(
isolate,
NewRangeError(MessageTemplate::kNormalizationForm, valid_forms),
String);
}
}
int length = string->length();
string = String::Flatten(isolate, string);
icu::UnicodeString result;
std::unique_ptr<uc16[]> sap;
UErrorCode status = U_ZERO_ERROR;
icu::UnicodeString input = ToICUUnicodeString(isolate, string);
// Getting a singleton. Should not free it.
const icu::Normalizer2* normalizer =
icu::Normalizer2::getInstance(nullptr, form_name, form_mode, status);
DCHECK(U_SUCCESS(status));
CHECK_NOT_NULL(normalizer);
int32_t normalized_prefix_length =
normalizer->spanQuickCheckYes(input, status);
// Quick return if the input is already normalized.
if (length == normalized_prefix_length) return string;
icu::UnicodeString unnormalized =
input.tempSubString(normalized_prefix_length);
// Read-only alias of the normalized prefix.
result.setTo(false, input.getBuffer(), normalized_prefix_length);
// copy-on-write; normalize the suffix and append to |result|.
normalizer->normalizeSecondAndAppend(result, unnormalized, status);
if (U_FAILURE(status)) {
THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
}
return Intl::ToString(isolate, result);
}
// ICUTimezoneCache calls out to ICU for TimezoneCache // ICUTimezoneCache calls out to ICU for TimezoneCache
// functionality in a straightforward way. // functionality in a straightforward way.
class ICUTimezoneCache : public base::TimezoneCache { class ICUTimezoneCache : public base::TimezoneCache {
......
...@@ -216,6 +216,9 @@ class Intl { ...@@ -216,6 +216,9 @@ class Intl {
Isolate* isolate, Handle<String> text, Isolate* isolate, Handle<String> text,
icu::BreakIterator* break_iterator); icu::BreakIterator* break_iterator);
// ecma262 #sec-string.prototype.normalize
V8_WARN_UNUSED_RESULT static MaybeHandle<String> Normalize(
Isolate* isolate, Handle<String> string, Handle<Object> form_input);
static base::TimezoneCache* CreateTimeZoneCache(); static base::TimezoneCache* CreateTimeZoneCache();
// Convert a Handle<String> to icu::UnicodeString // Convert a Handle<String> to icu::UnicodeString
......
...@@ -168,5 +168,45 @@ Handle<String> JSV8BreakIterator::TypeAsString() const { ...@@ -168,5 +168,45 @@ Handle<String> JSV8BreakIterator::TypeAsString() const {
} }
} }
Handle<Object> JSV8BreakIterator::Current(
Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) {
return isolate->factory()->NewNumberFromInt(
break_iterator->break_iterator()->raw()->current());
}
Handle<Object> JSV8BreakIterator::First(
Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) {
return isolate->factory()->NewNumberFromInt(
break_iterator->break_iterator()->raw()->first());
}
Handle<Object> JSV8BreakIterator::Next(
Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) {
return isolate->factory()->NewNumberFromInt(
break_iterator->break_iterator()->raw()->next());
}
String* JSV8BreakIterator::BreakType(Isolate* isolate,
Handle<JSV8BreakIterator> break_iterator) {
int32_t status = break_iterator->break_iterator()->raw()->getRuleStatus();
// Keep return values in sync with JavaScript BreakType enum.
if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) {
return ReadOnlyRoots(isolate).none_string();
}
if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) {
return ReadOnlyRoots(isolate).number_string();
}
if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) {
return ReadOnlyRoots(isolate).letter_string();
}
if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) {
return ReadOnlyRoots(isolate).kana_string();
}
if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) {
return ReadOnlyRoots(isolate).ideo_string();
}
return ReadOnlyRoots(isolate).unknown_string();
}
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8
...@@ -26,16 +26,25 @@ namespace internal { ...@@ -26,16 +26,25 @@ namespace internal {
class JSV8BreakIterator : public JSObject { class JSV8BreakIterator : public JSObject {
public: public:
V8_WARN_UNUSED_RESULT static MaybeHandle<JSV8BreakIterator> Initialize( V8_WARN_UNUSED_RESULT static MaybeHandle<JSV8BreakIterator> Initialize(
Isolate* isolate, Handle<JSV8BreakIterator> break_iterator_holder, Isolate* isolate, Handle<JSV8BreakIterator> break_iterator,
Handle<Object> input_locales, Handle<Object> input_options); Handle<Object> input_locales, Handle<Object> input_options);
static Handle<JSObject> ResolvedOptions( static Handle<JSObject> ResolvedOptions(
Isolate* isolate, Handle<JSV8BreakIterator> break_iterator); Isolate* isolate, Handle<JSV8BreakIterator> break_iterator);
static void AdoptText(Isolate* isolate, static void AdoptText(Isolate* isolate,
Handle<JSV8BreakIterator> break_iterator_holder, Handle<JSV8BreakIterator> break_iterator,
Handle<String> text); Handle<String> text);
static Handle<Object> Current(Isolate* isolate,
Handle<JSV8BreakIterator> break_iterator);
static Handle<Object> First(Isolate* isolate,
Handle<JSV8BreakIterator> break_iterator);
static Handle<Object> Next(Isolate* isolate,
Handle<JSV8BreakIterator> break_iterator);
static String* BreakType(Isolate* isolate,
Handle<JSV8BreakIterator> break_iterator);
enum class Type { CHARACTER, WORD, SENTENCE, LINE, COUNT }; enum class Type { CHARACTER, WORD, SENTENCE, LINE, COUNT };
inline void set_type(Type type); inline void set_type(Type type);
inline Type type() const; inline Type type() const;
......
...@@ -296,41 +296,41 @@ KNOWN_MAPS = { ...@@ -296,41 +296,41 @@ KNOWN_MAPS = {
("RO_SPACE", 0x02699): (171, "Tuple2Map"), ("RO_SPACE", 0x02699): (171, "Tuple2Map"),
("RO_SPACE", 0x02739): (173, "ArrayBoilerplateDescriptionMap"), ("RO_SPACE", 0x02739): (173, "ArrayBoilerplateDescriptionMap"),
("RO_SPACE", 0x02a79): (161, "InterceptorInfoMap"), ("RO_SPACE", 0x02a79): (161, "InterceptorInfoMap"),
("RO_SPACE", 0x05001): (153, "AccessCheckInfoMap"), ("RO_SPACE", 0x05061): (153, "AccessCheckInfoMap"),
("RO_SPACE", 0x05051): (154, "AccessorInfoMap"), ("RO_SPACE", 0x050b1): (154, "AccessorInfoMap"),
("RO_SPACE", 0x050a1): (155, "AccessorPairMap"), ("RO_SPACE", 0x05101): (155, "AccessorPairMap"),
("RO_SPACE", 0x050f1): (156, "AliasedArgumentsEntryMap"), ("RO_SPACE", 0x05151): (156, "AliasedArgumentsEntryMap"),
("RO_SPACE", 0x05141): (157, "AllocationMementoMap"), ("RO_SPACE", 0x051a1): (157, "AllocationMementoMap"),
("RO_SPACE", 0x05191): (158, "AsyncGeneratorRequestMap"), ("RO_SPACE", 0x051f1): (158, "AsyncGeneratorRequestMap"),
("RO_SPACE", 0x051e1): (159, "DebugInfoMap"), ("RO_SPACE", 0x05241): (159, "DebugInfoMap"),
("RO_SPACE", 0x05231): (160, "FunctionTemplateInfoMap"), ("RO_SPACE", 0x05291): (160, "FunctionTemplateInfoMap"),
("RO_SPACE", 0x05281): (162, "InterpreterDataMap"), ("RO_SPACE", 0x052e1): (162, "InterpreterDataMap"),
("RO_SPACE", 0x052d1): (163, "ModuleInfoEntryMap"), ("RO_SPACE", 0x05331): (163, "ModuleInfoEntryMap"),
("RO_SPACE", 0x05321): (164, "ModuleMap"), ("RO_SPACE", 0x05381): (164, "ModuleMap"),
("RO_SPACE", 0x05371): (165, "ObjectTemplateInfoMap"), ("RO_SPACE", 0x053d1): (165, "ObjectTemplateInfoMap"),
("RO_SPACE", 0x053c1): (166, "PromiseCapabilityMap"), ("RO_SPACE", 0x05421): (166, "PromiseCapabilityMap"),
("RO_SPACE", 0x05411): (167, "PromiseReactionMap"), ("RO_SPACE", 0x05471): (167, "PromiseReactionMap"),
("RO_SPACE", 0x05461): (168, "PrototypeInfoMap"), ("RO_SPACE", 0x054c1): (168, "PrototypeInfoMap"),
("RO_SPACE", 0x054b1): (169, "ScriptMap"), ("RO_SPACE", 0x05511): (169, "ScriptMap"),
("RO_SPACE", 0x05501): (170, "StackFrameInfoMap"), ("RO_SPACE", 0x05561): (170, "StackFrameInfoMap"),
("RO_SPACE", 0x05551): (172, "Tuple3Map"), ("RO_SPACE", 0x055b1): (172, "Tuple3Map"),
("RO_SPACE", 0x055a1): (174, "WasmDebugInfoMap"), ("RO_SPACE", 0x05601): (174, "WasmDebugInfoMap"),
("RO_SPACE", 0x055f1): (175, "WasmExportedFunctionDataMap"), ("RO_SPACE", 0x05651): (175, "WasmExportedFunctionDataMap"),
("RO_SPACE", 0x05641): (176, "CallableTaskMap"), ("RO_SPACE", 0x056a1): (176, "CallableTaskMap"),
("RO_SPACE", 0x05691): (177, "CallbackTaskMap"), ("RO_SPACE", 0x056f1): (177, "CallbackTaskMap"),
("RO_SPACE", 0x056e1): (178, "PromiseFulfillReactionJobTaskMap"), ("RO_SPACE", 0x05741): (178, "PromiseFulfillReactionJobTaskMap"),
("RO_SPACE", 0x05731): (179, "PromiseRejectReactionJobTaskMap"), ("RO_SPACE", 0x05791): (179, "PromiseRejectReactionJobTaskMap"),
("RO_SPACE", 0x05781): (180, "PromiseResolveThenableJobTaskMap"), ("RO_SPACE", 0x057e1): (180, "PromiseResolveThenableJobTaskMap"),
("RO_SPACE", 0x057d1): (181, "MicrotaskQueueMap"), ("RO_SPACE", 0x05831): (181, "MicrotaskQueueMap"),
("RO_SPACE", 0x05821): (182, "AllocationSiteWithWeakNextMap"), ("RO_SPACE", 0x05881): (182, "AllocationSiteWithWeakNextMap"),
("RO_SPACE", 0x05871): (182, "AllocationSiteWithoutWeakNextMap"), ("RO_SPACE", 0x058d1): (182, "AllocationSiteWithoutWeakNextMap"),
("RO_SPACE", 0x058c1): (214, "LoadHandler1Map"), ("RO_SPACE", 0x05921): (214, "LoadHandler1Map"),
("RO_SPACE", 0x05911): (214, "LoadHandler2Map"), ("RO_SPACE", 0x05971): (214, "LoadHandler2Map"),
("RO_SPACE", 0x05961): (214, "LoadHandler3Map"), ("RO_SPACE", 0x059c1): (214, "LoadHandler3Map"),
("RO_SPACE", 0x059b1): (221, "StoreHandler0Map"), ("RO_SPACE", 0x05a11): (221, "StoreHandler0Map"),
("RO_SPACE", 0x05a01): (221, "StoreHandler1Map"), ("RO_SPACE", 0x05a61): (221, "StoreHandler1Map"),
("RO_SPACE", 0x05a51): (221, "StoreHandler2Map"), ("RO_SPACE", 0x05ab1): (221, "StoreHandler2Map"),
("RO_SPACE", 0x05aa1): (221, "StoreHandler3Map"), ("RO_SPACE", 0x05b01): (221, "StoreHandler3Map"),
("MAP_SPACE", 0x00139): (1057, "ExternalMap"), ("MAP_SPACE", 0x00139): (1057, "ExternalMap"),
("MAP_SPACE", 0x00189): (1073, "JSMessageObjectMap"), ("MAP_SPACE", 0x00189): (1073, "JSMessageObjectMap"),
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment