Commit 2474b734 authored by Josh Wolfe's avatar Josh Wolfe Committed by Commit Bot

Convert String.prototype.normalize from JS to CPP builtin

* When V8_I18N_SUPPORT, completely omit the Unibrow no-op placeholder,
  and instead use the CPP builtin that uses ICU.
* Remove %StringNormalize() runtime function.

Bug: v8:5751
CQ_INCLUDE_TRYBOTS=master.tryserver.v8:v8_linux_noi18n_rel_ng
Change-Id: I3499fa4305d421859253a226f4f09794abe94f4c

Change-Id: I3499fa4305d421859253a226f4f09794abe94f4c
Reviewed-on: https://chromium-review.googlesource.com/462405Reviewed-by: 's avatarCaitlin Potter <caitp@igalia.com>
Reviewed-by: 's avatarDaniel Ehrenberg <littledan@chromium.org>
Commit-Queue: Daniel Ehrenberg <littledan@chromium.org>
Cr-Commit-Position: refs/heads/master@{#44328}
parent 8af394d6
...@@ -1830,8 +1830,13 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object, ...@@ -1830,8 +1830,13 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
Builtins::kStringPrototypeLastIndexOf, 1, false); Builtins::kStringPrototypeLastIndexOf, 1, false);
SimpleInstallFunction(prototype, "localeCompare", SimpleInstallFunction(prototype, "localeCompare",
Builtins::kStringPrototypeLocaleCompare, 1, true); Builtins::kStringPrototypeLocaleCompare, 1, true);
#ifdef V8_I18N_SUPPORT
SimpleInstallFunction(prototype, "normalize",
Builtins::kStringPrototypeNormalizeI18N, 0, false);
#else
SimpleInstallFunction(prototype, "normalize", SimpleInstallFunction(prototype, "normalize",
Builtins::kStringPrototypeNormalize, 0, false); Builtins::kStringPrototypeNormalize, 0, false);
#endif // V8_I18N_SUPPORT
SimpleInstallFunction(prototype, "replace", SimpleInstallFunction(prototype, "replace",
Builtins::kStringPrototypeReplace, 2, true); Builtins::kStringPrototypeReplace, 2, true);
SimpleInstallFunction(prototype, "split", Builtins::kStringPrototypeSplit, SimpleInstallFunction(prototype, "split", Builtins::kStringPrototypeSplit,
......
...@@ -809,8 +809,6 @@ namespace internal { ...@@ -809,8 +809,6 @@ namespace internal {
CPP(StringPrototypeLastIndexOf) \ CPP(StringPrototypeLastIndexOf) \
/* ES6 #sec-string.prototype.localecompare */ \ /* ES6 #sec-string.prototype.localecompare */ \
CPP(StringPrototypeLocaleCompare) \ CPP(StringPrototypeLocaleCompare) \
/* ES6 #sec-string.prototype.normalize */ \
CPP(StringPrototypeNormalize) \
/* ES6 #sec-string.prototype.replace */ \ /* ES6 #sec-string.prototype.replace */ \
TFJ(StringPrototypeReplace, 2, kSearch, kReplace) \ TFJ(StringPrototypeReplace, 2, kSearch, kReplace) \
/* ES6 #sec-string.prototype.split */ \ /* ES6 #sec-string.prototype.split */ \
...@@ -963,10 +961,15 @@ namespace internal { ...@@ -963,10 +961,15 @@ namespace internal {
/* ES #sec-string.prototype.tolowercase */ \ /* ES #sec-string.prototype.tolowercase */ \
CPP(StringPrototypeToLowerCaseI18N) \ CPP(StringPrototypeToLowerCaseI18N) \
/* ES #sec-string.prototype.touppercase */ \ /* ES #sec-string.prototype.touppercase */ \
CPP(StringPrototypeToUpperCaseI18N) CPP(StringPrototypeToUpperCaseI18N) \
/* ES #sec-string.prototype.normalize */ \
CPP(StringPrototypeNormalizeI18N)
#else #else
#define BUILTIN_LIST(CPP, API, TFJ, TFS, TFH, ASM, DBG) \ #define BUILTIN_LIST(CPP, API, TFJ, TFS, TFH, ASM, DBG) \
BUILTIN_LIST_BASE(CPP, API, TFJ, TFS, TFH, ASM, DBG) BUILTIN_LIST_BASE(CPP, API, TFJ, TFS, TFH, ASM, DBG) \
\
/* (obsolete) Unibrow version */ \
CPP(StringPrototypeNormalize)
#endif // V8_I18N_SUPPORT #endif // V8_I18N_SUPPORT
#define BUILTIN_PROMISE_REJECTION_PREDICTION_LIST(V) \ #define BUILTIN_PROMISE_REJECTION_PREDICTION_LIST(V) \
......
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#include "src/i18n.h" #include "src/i18n.h"
#include "src/objects-inl.h" #include "src/objects-inl.h"
#include "unicode/normalizer2.h"
namespace v8 { namespace v8 {
namespace internal { namespace internal {
...@@ -26,6 +28,80 @@ BUILTIN(StringPrototypeToUpperCaseI18N) { ...@@ -26,6 +28,80 @@ BUILTIN(StringPrototypeToUpperCaseI18N) {
return ConvertCase(string, true, isolate); return ConvertCase(string, true, isolate);
} }
BUILTIN(StringPrototypeNormalizeI18N) {
HandleScope handle_scope(isolate);
TO_THIS_STRING(string, "String.prototype.normalize");
Handle<Object> form_input = args.atOrUndefined(isolate, 1);
const char* form_name;
UNormalization2Mode form_mode;
if (form_input->IsUndefined(isolate)) {
// default is FNC
form_name = "nfc";
form_mode = UNORM2_COMPOSE;
} else {
Handle<String> form;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
Object::ToString(isolate, form_input));
if (String::Equals(form, isolate->factory()->NFC_string())) {
form_name = "nfc";
form_mode = UNORM2_COMPOSE;
} else if (String::Equals(form, isolate->factory()->NFD_string())) {
form_name = "nfc";
form_mode = UNORM2_DECOMPOSE;
} else if (String::Equals(form, isolate->factory()->NFKC_string())) {
form_name = "nfkc";
form_mode = UNORM2_COMPOSE;
} else if (String::Equals(form, isolate->factory()->NFKD_string())) {
form_name = "nfkc";
form_mode = UNORM2_DECOMPOSE;
} else {
Handle<String> valid_forms =
isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
THROW_NEW_ERROR_RETURN_FAILURE(
isolate,
NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
}
}
int length = string->length();
string = String::Flatten(string);
icu::UnicodeString result;
std::unique_ptr<uc16[]> sap;
UErrorCode status = U_ZERO_ERROR;
{
DisallowHeapAllocation no_gc;
String::FlatContent flat = string->GetFlatContent();
const UChar* src = GetUCharBufferFromFlat(flat, &sap, length);
icu::UnicodeString input(false, src, length);
// Getting a singleton. Should not free it.
const icu::Normalizer2* normalizer =
icu::Normalizer2::getInstance(nullptr, form_name, form_mode, status);
DCHECK(U_SUCCESS(status));
CHECK(normalizer != nullptr);
int32_t normalized_prefix_length =
normalizer->spanQuickCheckYes(input, status);
// Quick return if the input is already normalized.
if (length == normalized_prefix_length) return *string;
icu::UnicodeString unnormalized =
input.tempSubString(normalized_prefix_length);
// Read-only alias of the normalized prefix.
result.setTo(false, input.getBuffer(), normalized_prefix_length);
// copy-on-write; normalize the suffix and append to |result|.
normalizer->normalizeSecondAndAppend(result, unnormalized, status);
}
if (U_FAILURE(status)) {
return isolate->heap()->undefined_value();
}
RETURN_RESULT_OR_FAILURE(
isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
reinterpret_cast<const uint16_t*>(result.getBuffer()),
result.length())));
}
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8
......
...@@ -264,6 +264,7 @@ BUILTIN(StringPrototypeLocaleCompare) { ...@@ -264,6 +264,7 @@ BUILTIN(StringPrototypeLocaleCompare) {
return Smi::FromInt(str1_length - str2_length); return Smi::FromInt(str1_length - str2_length);
} }
#ifndef V8_I18N_SUPPORT
// ES6 section 21.1.3.12 String.prototype.normalize ( [form] ) // ES6 section 21.1.3.12 String.prototype.normalize ( [form] )
// //
// Simply checks the argument is valid and returns the string itself. // Simply checks the argument is valid and returns the string itself.
...@@ -297,6 +298,7 @@ BUILTIN(StringPrototypeNormalize) { ...@@ -297,6 +298,7 @@ BUILTIN(StringPrototypeNormalize) {
return *string; return *string;
} }
#endif // !V8_I18N_SUPPORT
BUILTIN(StringPrototypeStartsWith) { BUILTIN(StringPrototypeStartsWith) {
HandleScope handle_scope(isolate); HandleScope handle_scope(isolate);
......
...@@ -113,6 +113,10 @@ ...@@ -113,6 +113,10 @@
V(nan_string, "NaN") \ V(nan_string, "NaN") \
V(new_target_string, ".new.target") \ V(new_target_string, ".new.target") \
V(next_string, "next") \ V(next_string, "next") \
V(NFC_string, "NFC") \
V(NFD_string, "NFD") \
V(NFKC_string, "NFKC") \
V(NFKD_string, "NFKD") \
V(not_equal, "not-equal") \ V(not_equal, "not-equal") \
V(null_string, "null") \ V(null_string, "null") \
V(null_to_string, "[object Null]") \ V(null_to_string, "[object Null]") \
......
...@@ -2028,33 +2028,6 @@ OverrideFunction(GlobalString.prototype, 'localeCompare', function(that) { ...@@ -2028,33 +2028,6 @@ OverrideFunction(GlobalString.prototype, 'localeCompare', function(that) {
); );
/**
* Unicode normalization. This method is called with one argument that
* specifies the normalization form.
* If none is specified, "NFC" is assumed.
* If the form is not one of "NFC", "NFD", "NFKC", or "NFKD", then throw
* a RangeError Exception.
*/
OverrideFunction(GlobalString.prototype, 'normalize', function() {
CHECK_OBJECT_COERCIBLE(this, "String.prototype.normalize");
var s = TO_STRING(this);
var formArg = arguments[0];
var form = IS_UNDEFINED(formArg) ? 'NFC' : TO_STRING(formArg);
var NORMALIZATION_FORMS = ['NFC', 'NFD', 'NFKC', 'NFKD'];
var normalizationForm = %ArrayIndexOf(NORMALIZATION_FORMS, form, 0);
if (normalizationForm === -1) {
throw %make_range_error(kNormalizationForm,
%_Call(ArrayJoin, NORMALIZATION_FORMS, ', '));
}
return %StringNormalize(s, normalizationForm);
}
);
// TODO(littledan): Rewrite these two functions as C++ builtins // TODO(littledan): Rewrite these two functions as C++ builtins
function ToLowerCaseI18N() { function ToLowerCaseI18N() {
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLowerCase"); CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLowerCase");
......
...@@ -29,7 +29,6 @@ ...@@ -29,7 +29,6 @@
#include "unicode/fieldpos.h" #include "unicode/fieldpos.h"
#include "unicode/fpositer.h" #include "unicode/fpositer.h"
#include "unicode/locid.h" #include "unicode/locid.h"
#include "unicode/normalizer2.h"
#include "unicode/numfmt.h" #include "unicode/numfmt.h"
#include "unicode/numsys.h" #include "unicode/numsys.h"
#include "unicode/rbbi.h" #include "unicode/rbbi.h"
...@@ -603,64 +602,6 @@ RUNTIME_FUNCTION(Runtime_InternalCompare) { ...@@ -603,64 +602,6 @@ RUNTIME_FUNCTION(Runtime_InternalCompare) {
} }
RUNTIME_FUNCTION(Runtime_StringNormalize) {
HandleScope scope(isolate);
static const struct {
const char* name;
UNormalization2Mode mode;
} normalizationForms[] = {
{"nfc", UNORM2_COMPOSE},
{"nfc", UNORM2_DECOMPOSE},
{"nfkc", UNORM2_COMPOSE},
{"nfkc", UNORM2_DECOMPOSE},
};
DCHECK_EQ(2, args.length());
CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
CONVERT_NUMBER_CHECKED(int, form_id, Int32, args[1]);
CHECK(form_id >= 0 &&
static_cast<size_t>(form_id) < arraysize(normalizationForms));
int length = s->length();
s = String::Flatten(s);
icu::UnicodeString result;
std::unique_ptr<uc16[]> sap;
UErrorCode status = U_ZERO_ERROR;
{
DisallowHeapAllocation no_gc;
String::FlatContent flat = s->GetFlatContent();
const UChar* src = GetUCharBufferFromFlat(flat, &sap, length);
icu::UnicodeString input(false, src, length);
// Getting a singleton. Should not free it.
const icu::Normalizer2* normalizer =
icu::Normalizer2::getInstance(nullptr, normalizationForms[form_id].name,
normalizationForms[form_id].mode, status);
DCHECK(U_SUCCESS(status));
CHECK(normalizer != nullptr);
int32_t normalized_prefix_length =
normalizer->spanQuickCheckYes(input, status);
// Quick return if the input is already normalized.
if (length == normalized_prefix_length) return *s;
icu::UnicodeString unnormalized =
input.tempSubString(normalized_prefix_length);
// Read-only alias of the normalized prefix.
result.setTo(false, input.getBuffer(), normalized_prefix_length);
// copy-on-write; normalize the suffix and append to |result|.
normalizer->normalizeSecondAndAppend(result, unnormalized, status);
}
if (U_FAILURE(status)) {
return isolate->heap()->undefined_value();
}
RETURN_RESULT_OR_FAILURE(
isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
reinterpret_cast<const uint16_t*>(result.getBuffer()),
result.length())));
}
RUNTIME_FUNCTION(Runtime_CreateBreakIterator) { RUNTIME_FUNCTION(Runtime_CreateBreakIterator) {
HandleScope scope(isolate); HandleScope scope(isolate);
......
...@@ -262,7 +262,6 @@ namespace internal { ...@@ -262,7 +262,6 @@ namespace internal {
F(InternalNumberFormat, 2, 1) \ F(InternalNumberFormat, 2, 1) \
F(CreateCollator, 3, 1) \ F(CreateCollator, 3, 1) \
F(InternalCompare, 3, 1) \ F(InternalCompare, 3, 1) \
F(StringNormalize, 2, 1) \
F(CreateBreakIterator, 3, 1) \ F(CreateBreakIterator, 3, 1) \
F(BreakIteratorAdoptText, 2, 1) \ F(BreakIteratorAdoptText, 2, 1) \
F(BreakIteratorFirst, 1, 1) \ F(BreakIteratorFirst, 1, 1) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment