Commit aeeacdda authored by littledan's avatar littledan Committed by Commit bot

[builtins] Move non-i18n String case conversion functions to C++

BUG=v8:5880
CQ_INCLUDE_TRYBOTS=master.tryserver.v8:v8_linux_noi18n_rel_ng

Review-Url: https://codereview.chromium.org/2689283008
Cr-Commit-Position: refs/heads/master@{#43246}
parent 5f1661aa
......@@ -1589,10 +1589,10 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
Builtins::kStringPrototypeLocaleCompare, 1, true);
SimpleInstallFunction(prototype, "normalize",
Builtins::kStringPrototypeNormalize, 0, false);
SimpleInstallFunction(prototype, "split", Builtins::kStringPrototypeSplit,
2, true);
SimpleInstallFunction(prototype, "replace",
Builtins::kStringPrototypeReplace, 2, true);
SimpleInstallFunction(prototype, "split", Builtins::kStringPrototypeSplit,
2, true);
SimpleInstallFunction(prototype, "substr", Builtins::kStringPrototypeSubstr,
2, true);
SimpleInstallFunction(prototype, "substring",
......@@ -1607,6 +1607,16 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
Builtins::kStringPrototypeTrimLeft, 0, false);
SimpleInstallFunction(prototype, "trimRight",
Builtins::kStringPrototypeTrimRight, 0, false);
SimpleInstallFunction(prototype, "toLocaleLowerCase",
Builtins::kStringPrototypeToLocaleLowerCase, 0,
false);
SimpleInstallFunction(prototype, "toLocaleUpperCase",
Builtins::kStringPrototypeToLocaleUpperCase, 0,
false);
SimpleInstallFunction(prototype, "toLowerCase",
Builtins::kStringPrototypeToLowerCase, 0, false);
SimpleInstallFunction(prototype, "toUpperCase",
Builtins::kStringPrototypeToUpperCase, 0, false);
SimpleInstallFunction(prototype, "valueOf",
Builtins::kStringPrototypeValueOf, 0, true);
......
......@@ -8,6 +8,9 @@
#include "src/code-factory.h"
#include "src/code-stub-assembler.h"
#include "src/regexp/regexp-utils.h"
#include "src/string-case.h"
#include "src/unicode-inl.h"
#include "src/unicode.h"
namespace v8 {
namespace internal {
......@@ -1912,5 +1915,197 @@ TF_BUILTIN(StringIteratorPrototypeNext, StringBuiltinsAssembler) {
}
}
namespace {
inline bool ToUpperOverflows(uc32 character) {
// y with umlauts and the micro sign are the only characters that stop
// fitting into one-byte when converting to uppercase.
static const uc32 yuml_code = 0xff;
static const uc32 micro_code = 0xb5;
return (character == yuml_code || character == micro_code);
}
template <class Converter>
MUST_USE_RESULT static Object* ConvertCaseHelper(
Isolate* isolate, String* string, SeqString* result, int result_length,
unibrow::Mapping<Converter, 128>* mapping) {
DisallowHeapAllocation no_gc;
// We try this twice, once with the assumption that the result is no longer
// than the input and, if that assumption breaks, again with the exact
// length. This may not be pretty, but it is nicer than what was here before
// and I hereby claim my vaffel-is.
//
// NOTE: This assumes that the upper/lower case of an ASCII
// character is also ASCII. This is currently the case, but it
// might break in the future if we implement more context and locale
// dependent upper/lower conversions.
bool has_changed_character = false;
// Convert all characters to upper case, assuming that they will fit
// in the buffer
StringCharacterStream stream(string);
unibrow::uchar chars[Converter::kMaxWidth];
// We can assume that the string is not empty
uc32 current = stream.GetNext();
bool ignore_overflow = Converter::kIsToLower || result->IsSeqTwoByteString();
for (int i = 0; i < result_length;) {
bool has_next = stream.HasMore();
uc32 next = has_next ? stream.GetNext() : 0;
int char_length = mapping->get(current, next, chars);
if (char_length == 0) {
// The case conversion of this character is the character itself.
result->Set(i, current);
i++;
} else if (char_length == 1 &&
(ignore_overflow || !ToUpperOverflows(current))) {
// Common case: converting the letter resulted in one character.
DCHECK(static_cast<uc32>(chars[0]) != current);
result->Set(i, chars[0]);
has_changed_character = true;
i++;
} else if (result_length == string->length()) {
bool overflows = ToUpperOverflows(current);
// We've assumed that the result would be as long as the
// input but here is a character that converts to several
// characters. No matter, we calculate the exact length
// of the result and try the whole thing again.
//
// Note that this leaves room for optimization. We could just
// memcpy what we already have to the result string. Also,
// the result string is the last object allocated we could
// "realloc" it and probably, in the vast majority of cases,
// extend the existing string to be able to hold the full
// result.
int next_length = 0;
if (has_next) {
next_length = mapping->get(next, 0, chars);
if (next_length == 0) next_length = 1;
}
int current_length = i + char_length + next_length;
while (stream.HasMore()) {
current = stream.GetNext();
overflows |= ToUpperOverflows(current);
// NOTE: we use 0 as the next character here because, while
// the next character may affect what a character converts to,
// it does not in any case affect the length of what it convert
// to.
int char_length = mapping->get(current, 0, chars);
if (char_length == 0) char_length = 1;
current_length += char_length;
if (current_length > String::kMaxLength) {
AllowHeapAllocation allocate_error_and_return;
THROW_NEW_ERROR_RETURN_FAILURE(isolate,
NewInvalidStringLengthError());
}
}
// Try again with the real length. Return signed if we need
// to allocate a two-byte string for to uppercase.
return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
: Smi::FromInt(current_length);
} else {
for (int j = 0; j < char_length; j++) {
result->Set(i, chars[j]);
i++;
}
has_changed_character = true;
}
current = next;
}
if (has_changed_character) {
return result;
} else {
// If we didn't actually change anything in doing the conversion
// we simple return the result and let the converted string
// become garbage; there is no reason to keep two identical strings
// alive.
return string;
}
}
template <class Converter>
MUST_USE_RESULT static Object* ConvertCase(
Handle<String> s, Isolate* isolate,
unibrow::Mapping<Converter, 128>* mapping) {
s = String::Flatten(s);
int length = s->length();
// Assume that the string is not empty; we need this assumption later
if (length == 0) return *s;
// Simpler handling of ASCII strings.
//
// NOTE: This assumes that the upper/lower case of an ASCII
// character is also ASCII. This is currently the case, but it
// might break in the future if we implement more context and locale
// dependent upper/lower conversions.
if (s->IsOneByteRepresentationUnderneath()) {
// Same length as input.
Handle<SeqOneByteString> result =
isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
DisallowHeapAllocation no_gc;
String::FlatContent flat_content = s->GetFlatContent();
DCHECK(flat_content.IsFlat());
bool has_changed_character = false;
int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
reinterpret_cast<char*>(result->GetChars()),
reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
length, &has_changed_character);
// If not ASCII, we discard the result and take the 2 byte path.
if (index_to_first_unprocessed == length)
return has_changed_character ? *result : *s;
}
Handle<SeqString> result; // Same length as input.
if (s->IsOneByteRepresentation()) {
result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
} else {
result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
}
Object* answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
if (answer->IsException(isolate) || answer->IsString()) return answer;
DCHECK(answer->IsSmi());
length = Smi::cast(answer)->value();
if (s->IsOneByteRepresentation() && length > 0) {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewRawOneByteString(length));
} else {
if (length < 0) length = -length;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewRawTwoByteString(length));
}
return ConvertCaseHelper(isolate, *s, *result, length, mapping);
}
} // namespace
BUILTIN(StringPrototypeToLocaleLowerCase) {
HandleScope scope(isolate);
TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase");
return ConvertCase(string, isolate,
isolate->runtime_state()->to_lower_mapping());
}
BUILTIN(StringPrototypeToLocaleUpperCase) {
HandleScope scope(isolate);
TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase");
return ConvertCase(string, isolate,
isolate->runtime_state()->to_upper_mapping());
}
BUILTIN(StringPrototypeToLowerCase) {
HandleScope scope(isolate);
TO_THIS_STRING(string, "String.prototype.toLowerCase");
return ConvertCase(string, isolate,
isolate->runtime_state()->to_lower_mapping());
}
BUILTIN(StringPrototypeToUpperCase) {
HandleScope scope(isolate);
TO_THIS_STRING(string, "String.prototype.toUpperCase");
return ConvertCase(string, isolate,
isolate->runtime_state()->to_upper_mapping());
}
} // namespace internal
} // namespace v8
......@@ -768,6 +768,14 @@ class Isolate;
CPP(StringPrototypeStartsWith) \
/* ES6 section 21.1.3.25 String.prototype.toString () */ \
TFJ(StringPrototypeToString, 0) \
/* ES #sec-string.prototype.tolocalelowercase */ \
CPP(StringPrototypeToLocaleLowerCase) \
/* ES #sec-string.prototype.tolocaleuppercase */ \
CPP(StringPrototypeToLocaleUpperCase) \
/* ES #sec-string.prototype.tolowercase */ \
CPP(StringPrototypeToLowerCase) \
/* ES #sec-string.prototype.touppercase */ \
CPP(StringPrototypeToUpperCase) \
CPP(StringPrototypeTrim) \
CPP(StringPrototypeTrimLeft) \
CPP(StringPrototypeTrimRight) \
......
......@@ -297,8 +297,6 @@ bool IntrinsicHasNoSideEffect(Runtime::FunctionId id) {
case Runtime::kStringReplaceOneCharWithString:
case Runtime::kSubString:
case Runtime::kInlineSubString:
case Runtime::kStringToLowerCase:
case Runtime::kStringToUpperCase:
case Runtime::kRegExpInternalReplace:
// Literals.
case Runtime::kCreateArrayLiteral:
......@@ -463,6 +461,8 @@ bool BuiltinHasNoSideEffect(Builtins::Name id) {
case Builtins::kStringPrototypeSubstr:
case Builtins::kStringPrototypeSubstring:
case Builtins::kStringPrototypeToString:
case Builtins::kStringPrototypeToLowerCase:
case Builtins::kStringPrototypeToUpperCase:
case Builtins::kStringPrototypeTrim:
case Builtins::kStringPrototypeTrimLeft:
case Builtins::kStringPrototypeTrimRight:
......
......@@ -723,8 +723,8 @@ function addWECPropertyIfDefined(object, property, value) {
* Returns titlecased word, aMeRricA -> America.
*/
function toTitleCaseWord(word) {
return %StringToUpperCase(%_Call(StringSubstr, word, 0, 1)) +
%StringToLowerCase(%_Call(StringSubstr, word, 1));
return %StringToUpperCaseI18N(%_Call(StringSubstr, word, 0, 1)) +
%StringToLowerCaseI18N(%_Call(StringSubstr, word, 1));
}
/**
......@@ -745,7 +745,7 @@ function toTitleCaseTimezoneLocation(location) {
var parts = %StringSplit(match[2], separator, kMaxUint32);
for (var i = 1; i < parts.length; i++) {
var part = parts[i]
var lowercasedPart = %StringToLowerCase(part);
var lowercasedPart = %StringToLowerCaseI18N(part);
result = result + separator +
((lowercasedPart !== 'es' &&
lowercasedPart !== 'of' && lowercasedPart !== 'au') ?
......@@ -851,7 +851,7 @@ function isStructuallyValidLanguageTag(locale) {
return false;
}
locale = %StringToLowerCase(locale);
locale = %StringToLowerCaseI18N(locale);
// Just return if it's a x- form. It's all private.
if (%StringIndexOf(locale, 'x-', 0) === 0) {
......@@ -1179,7 +1179,7 @@ function CreateNumberFormat(locales, options) {
var currencyDisplay = getOption(
'currencyDisplay', 'string', ['code', 'symbol', 'name'], 'symbol');
if (internalOptions.style === 'currency') {
defineWEProperty(internalOptions, 'currency', %StringToUpperCase(currency));
defineWEProperty(internalOptions, 'currency', %StringToUpperCaseI18N(currency));
defineWEProperty(internalOptions, 'currencyDisplay', currencyDisplay);
}
......@@ -1768,7 +1768,7 @@ function canonicalizeTimeZoneID(tzID) {
tzID = TO_STRING(tzID);
// Special case handling (UTC, GMT).
var upperID = %StringToUpperCase(tzID);
var upperID = %StringToUpperCaseI18N(tzID);
if (upperID === 'UTC' || upperID === 'GMT' ||
upperID === 'ETC/UTC' || upperID === 'ETC/GMT') {
return 'UTC';
......
......@@ -106,38 +106,6 @@ function StringSlice(start, end) {
return %_SubString(s, start_i, end_i);
}
// TODO(littledan): Rewrite these four functions as C++ builtins
// ECMA-262, 15.5.4.16
function StringToLowerCaseJS() {
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLowerCase");
return %StringToLowerCase(TO_STRING(this));
}
// ECMA-262, 15.5.4.17
function StringToLocaleLowerCase() {
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLocaleLowerCase");
return %StringToLowerCase(TO_STRING(this));
}
// ECMA-262, 15.5.4.18
function StringToUpperCaseJS() {
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toUpperCase");
return %StringToUpperCase(TO_STRING(this));
}
// ECMA-262, 15.5.4.19
function StringToLocaleUpperCase() {
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLocaleUpperCase");
return %StringToUpperCase(TO_STRING(this));
}
// ES6 draft, revision 26 (2014-07-18), section B.2.3.2.1
function HtmlEscape(str) {
......@@ -325,10 +293,6 @@ utils.InstallFunctions(GlobalString.prototype, DONT_ENUM, [
"repeat", StringRepeat,
"search", StringSearch,
"slice", StringSlice,
"toLowerCase", StringToLowerCaseJS,
"toLocaleLowerCase", StringToLocaleLowerCase,
"toUpperCase", StringToUpperCaseJS,
"toLocaleUpperCase", StringToLocaleUpperCase,
"link", StringLink,
"anchor", StringAnchor,
......
......@@ -10,7 +10,6 @@
#include "src/objects-inl.h"
#include "src/regexp/jsregexp-inl.h"
#include "src/string-builder.h"
#include "src/string-case.h"
#include "src/string-search.h"
namespace v8 {
......@@ -633,182 +632,6 @@ RUNTIME_FUNCTION(Runtime_StringToArray) {
}
static inline bool ToUpperOverflows(uc32 character) {
// y with umlauts and the micro sign are the only characters that stop
// fitting into one-byte when converting to uppercase.
static const uc32 yuml_code = 0xff;
static const uc32 micro_code = 0xb5;
return (character == yuml_code || character == micro_code);
}
template <class Converter>
MUST_USE_RESULT static Object* ConvertCaseHelper(
Isolate* isolate, String* string, SeqString* result, int result_length,
unibrow::Mapping<Converter, 128>* mapping) {
DisallowHeapAllocation no_gc;
// We try this twice, once with the assumption that the result is no longer
// than the input and, if that assumption breaks, again with the exact
// length. This may not be pretty, but it is nicer than what was here before
// and I hereby claim my vaffel-is.
//
// NOTE: This assumes that the upper/lower case of an ASCII
// character is also ASCII. This is currently the case, but it
// might break in the future if we implement more context and locale
// dependent upper/lower conversions.
bool has_changed_character = false;
// Convert all characters to upper case, assuming that they will fit
// in the buffer
StringCharacterStream stream(string);
unibrow::uchar chars[Converter::kMaxWidth];
// We can assume that the string is not empty
uc32 current = stream.GetNext();
bool ignore_overflow = Converter::kIsToLower || result->IsSeqTwoByteString();
for (int i = 0; i < result_length;) {
bool has_next = stream.HasMore();
uc32 next = has_next ? stream.GetNext() : 0;
int char_length = mapping->get(current, next, chars);
if (char_length == 0) {
// The case conversion of this character is the character itself.
result->Set(i, current);
i++;
} else if (char_length == 1 &&
(ignore_overflow || !ToUpperOverflows(current))) {
// Common case: converting the letter resulted in one character.
DCHECK(static_cast<uc32>(chars[0]) != current);
result->Set(i, chars[0]);
has_changed_character = true;
i++;
} else if (result_length == string->length()) {
bool overflows = ToUpperOverflows(current);
// We've assumed that the result would be as long as the
// input but here is a character that converts to several
// characters. No matter, we calculate the exact length
// of the result and try the whole thing again.
//
// Note that this leaves room for optimization. We could just
// memcpy what we already have to the result string. Also,
// the result string is the last object allocated we could
// "realloc" it and probably, in the vast majority of cases,
// extend the existing string to be able to hold the full
// result.
int next_length = 0;
if (has_next) {
next_length = mapping->get(next, 0, chars);
if (next_length == 0) next_length = 1;
}
int current_length = i + char_length + next_length;
while (stream.HasMore()) {
current = stream.GetNext();
overflows |= ToUpperOverflows(current);
// NOTE: we use 0 as the next character here because, while
// the next character may affect what a character converts to,
// it does not in any case affect the length of what it convert
// to.
int char_length = mapping->get(current, 0, chars);
if (char_length == 0) char_length = 1;
current_length += char_length;
if (current_length > String::kMaxLength) {
AllowHeapAllocation allocate_error_and_return;
THROW_NEW_ERROR_RETURN_FAILURE(isolate,
NewInvalidStringLengthError());
}
}
// Try again with the real length. Return signed if we need
// to allocate a two-byte string for to uppercase.
return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
: Smi::FromInt(current_length);
} else {
for (int j = 0; j < char_length; j++) {
result->Set(i, chars[j]);
i++;
}
has_changed_character = true;
}
current = next;
}
if (has_changed_character) {
return result;
} else {
// If we didn't actually change anything in doing the conversion
// we simple return the result and let the converted string
// become garbage; there is no reason to keep two identical strings
// alive.
return string;
}
}
template <class Converter>
MUST_USE_RESULT static Object* ConvertCase(
Handle<String> s, Isolate* isolate,
unibrow::Mapping<Converter, 128>* mapping) {
s = String::Flatten(s);
int length = s->length();
// Assume that the string is not empty; we need this assumption later
if (length == 0) return *s;
// Simpler handling of ASCII strings.
//
// NOTE: This assumes that the upper/lower case of an ASCII
// character is also ASCII. This is currently the case, but it
// might break in the future if we implement more context and locale
// dependent upper/lower conversions.
if (s->IsOneByteRepresentationUnderneath()) {
// Same length as input.
Handle<SeqOneByteString> result =
isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
DisallowHeapAllocation no_gc;
String::FlatContent flat_content = s->GetFlatContent();
DCHECK(flat_content.IsFlat());
bool has_changed_character = false;
int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
reinterpret_cast<char*>(result->GetChars()),
reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
length, &has_changed_character);
// If not ASCII, we discard the result and take the 2 byte path.
if (index_to_first_unprocessed == length)
return has_changed_character ? *result : *s;
}
Handle<SeqString> result; // Same length as input.
if (s->IsOneByteRepresentation()) {
result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
} else {
result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
}
Object* answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
if (answer->IsException(isolate) || answer->IsString()) return answer;
DCHECK(answer->IsSmi());
length = Smi::cast(answer)->value();
if (s->IsOneByteRepresentation() && length > 0) {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewRawOneByteString(length));
} else {
if (length < 0) length = -length;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewRawTwoByteString(length));
}
return ConvertCaseHelper(isolate, *s, *result, length, mapping);
}
RUNTIME_FUNCTION(Runtime_StringToLowerCase) {
HandleScope scope(isolate);
DCHECK_EQ(args.length(), 1);
CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
return ConvertCase(s, isolate, isolate->runtime_state()->to_lower_mapping());
}
RUNTIME_FUNCTION(Runtime_StringToUpperCase) {
HandleScope scope(isolate);
DCHECK_EQ(args.length(), 1);
CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
return ConvertCase(s, isolate, isolate->runtime_state()->to_upper_mapping());
}
RUNTIME_FUNCTION(Runtime_StringLessThan) {
HandleScope handle_scope(isolate);
DCHECK_EQ(2, args.length());
......
......@@ -528,8 +528,6 @@ namespace internal {
F(StringBuilderJoin, 3, 1) \
F(SparseJoinWithSeparator, 3, 1) \
F(StringToArray, 2, 1) \
F(StringToLowerCase, 1, 1) \
F(StringToUpperCase, 1, 1) \
F(StringLessThan, 2, 1) \
F(StringLessThanOrEqual, 2, 1) \
F(StringGreaterThan, 2, 1) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment