Commit eb550c6d authored by yangguo@chromium.org's avatar yangguo@chromium.org

Fix y-umlaut to uppercase.

R=dcarney@chromium.org
BUG=v8:2984

Review URL: https://codereview.chromium.org/59853006

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@17545 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent c44a4d38
...@@ -4111,13 +4111,12 @@ MaybeObject* Heap::LookupSingleCharacterStringFromCode(uint16_t code) { ...@@ -4111,13 +4111,12 @@ MaybeObject* Heap::LookupSingleCharacterStringFromCode(uint16_t code) {
return result; return result;
} }
Object* result; SeqTwoByteString* result;
{ MaybeObject* maybe_result = AllocateRawTwoByteString(1); { MaybeObject* maybe_result = AllocateRawTwoByteString(1);
if (!maybe_result->ToObject(&result)) return maybe_result; if (!maybe_result->To<SeqTwoByteString>(&result)) return maybe_result;
} }
String* answer = String::cast(result); result->SeqTwoByteStringSet(0, code);
answer->Set(0, code); return result;
return answer;
} }
......
...@@ -6192,6 +6192,7 @@ template <class Converter> ...@@ -6192,6 +6192,7 @@ template <class Converter>
MUST_USE_RESULT static MaybeObject* ConvertCaseHelper( MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(
Isolate* isolate, Isolate* isolate,
String* s, String* s,
String::Encoding result_encoding,
int length, int length,
int input_string_length, int input_string_length,
unibrow::Mapping<Converter, 128>* mapping) { unibrow::Mapping<Converter, 128>* mapping) {
...@@ -6207,7 +6208,7 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper( ...@@ -6207,7 +6208,7 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(
// might break in the future if we implement more context and locale // might break in the future if we implement more context and locale
// dependent upper/lower conversions. // dependent upper/lower conversions.
Object* o; Object* o;
{ MaybeObject* maybe_o = s->IsOneByteRepresentation() { MaybeObject* maybe_o = result_encoding == String::ONE_BYTE_ENCODING
? isolate->heap()->AllocateRawOneByteString(length) ? isolate->heap()->AllocateRawOneByteString(length)
: isolate->heap()->AllocateRawTwoByteString(length); : isolate->heap()->AllocateRawTwoByteString(length);
if (!maybe_o->ToObject(&o)) return maybe_o; if (!maybe_o->ToObject(&o)) return maybe_o;
...@@ -6215,6 +6216,8 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper( ...@@ -6215,6 +6216,8 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(
String* result = String::cast(o); String* result = String::cast(o);
bool has_changed_character = false; bool has_changed_character = false;
DisallowHeapAllocation no_gc;
// Convert all characters to upper case, assuming that they will fit // Convert all characters to upper case, assuming that they will fit
// in the buffer // in the buffer
Access<ConsStringIteratorOp> op( Access<ConsStringIteratorOp> op(
...@@ -6223,6 +6226,10 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper( ...@@ -6223,6 +6226,10 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(
unibrow::uchar chars[Converter::kMaxWidth]; unibrow::uchar chars[Converter::kMaxWidth];
// We can assume that the string is not empty // We can assume that the string is not empty
uc32 current = stream.GetNext(); uc32 current = stream.GetNext();
// y with umlauts is the only character that stops fitting into one-byte
// when converting to uppercase.
static const uc32 yuml_code = 0xff;
bool ignore_yuml = result->IsSeqTwoByteString() || Converter::kIsToLower;
for (int i = 0; i < length;) { for (int i = 0; i < length;) {
bool has_next = stream.HasMore(); bool has_next = stream.HasMore();
uc32 next = has_next ? stream.GetNext() : 0; uc32 next = has_next ? stream.GetNext() : 0;
...@@ -6231,13 +6238,14 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper( ...@@ -6231,13 +6238,14 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(
// The case conversion of this character is the character itself. // The case conversion of this character is the character itself.
result->Set(i, current); result->Set(i, current);
i++; i++;
} else if (char_length == 1) { } else if (char_length == 1 && (ignore_yuml || current != yuml_code)) {
// Common case: converting the letter resulted in one character. // Common case: converting the letter resulted in one character.
ASSERT(static_cast<uc32>(chars[0]) != current); ASSERT(static_cast<uc32>(chars[0]) != current);
result->Set(i, chars[0]); result->Set(i, chars[0]);
has_changed_character = true; has_changed_character = true;
i++; i++;
} else if (length == input_string_length) { } else if (length == input_string_length) {
bool found_yuml = (current == yuml_code);
// We've assumed that the result would be as long as the // We've assumed that the result would be as long as the
// input but here is a character that converts to several // input but here is a character that converts to several
// characters. No matter, we calculate the exact length // characters. No matter, we calculate the exact length
...@@ -6257,6 +6265,7 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper( ...@@ -6257,6 +6265,7 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(
int current_length = i + char_length + next_length; int current_length = i + char_length + next_length;
while (stream.HasMore()) { while (stream.HasMore()) {
current = stream.GetNext(); current = stream.GetNext();
found_yuml |= (current == yuml_code);
// NOTE: we use 0 as the next character here because, while // NOTE: we use 0 as the next character here because, while
// the next character may affect what a character converts to, // the next character may affect what a character converts to,
// it does not in any case affect the length of what it convert // it does not in any case affect the length of what it convert
...@@ -6269,8 +6278,10 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper( ...@@ -6269,8 +6278,10 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(
return Failure::OutOfMemoryException(0x13); return Failure::OutOfMemoryException(0x13);
} }
} }
// Try again with the real length. // Try again with the real length. Return signed if we need
return Smi::FromInt(current_length); // to allocate a two-byte string for y-umlaut to uppercase.
return (found_yuml && !ignore_yuml) ? Smi::FromInt(-current_length)
: Smi::FromInt(current_length);
} else { } else {
for (int j = 0; j < char_length; j++) { for (int j = 0; j < char_length; j++) {
result->Set(i, chars[j]); result->Set(i, chars[j]);
...@@ -6316,25 +6327,22 @@ static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) { ...@@ -6316,25 +6327,22 @@ static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) {
} }
enum AsciiCaseConversion { template<class Converter>
ASCII_TO_LOWER, static bool FastAsciiConvert(char* dst,
ASCII_TO_UPPER char* src,
}; int length,
bool* changed_out) {
template <AsciiCaseConversion dir>
struct FastAsciiConverter {
static bool Convert(char* dst, char* src, int length, bool* changed_out) {
#ifdef DEBUG #ifdef DEBUG
char* saved_dst = dst; char* saved_dst = dst;
char* saved_src = src; char* saved_src = src;
#endif #endif
DisallowHeapAllocation no_gc;
// We rely on the distance between upper and lower case letters // We rely on the distance between upper and lower case letters
// being a known power of 2. // being a known power of 2.
ASSERT('a' - 'A' == (1 << 5)); ASSERT('a' - 'A' == (1 << 5));
// Boundaries for the range of input characters than require conversion. // Boundaries for the range of input characters than require conversion.
const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1; static const char lo = Converter::kIsToLower ? 'A' - 1 : 'a' - 1;
const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1; static const char hi = Converter::kIsToLower ? 'Z' + 1 : 'z' + 1;
bool changed = false; bool changed = false;
uintptr_t or_acc = 0; uintptr_t or_acc = 0;
char* const limit = src + length; char* const limit = src + length;
...@@ -6382,55 +6390,44 @@ struct FastAsciiConverter { ...@@ -6382,55 +6390,44 @@ struct FastAsciiConverter {
if ((or_acc & kAsciiMask) != 0) { if ((or_acc & kAsciiMask) != 0) {
return false; return false;
} }
#ifdef DEBUG
CheckConvert(saved_dst, saved_src, length, changed); ASSERT(CheckFastAsciiConvert(
#endif saved_dst, saved_src, length, changed, Converter::kIsToLower));
*changed_out = changed; *changed_out = changed;
return true; return true;
} }
#ifdef DEBUG #ifdef DEBUG
static void CheckConvert(char* dst, char* src, int length, bool changed) { static bool CheckFastAsciiConvert(char* dst,
char* src,
int length,
bool changed,
bool is_to_lower) {
bool expected_changed = false; bool expected_changed = false;
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
if (dst[i] == src[i]) continue; if (dst[i] == src[i]) continue;
expected_changed = true; expected_changed = true;
if (dir == ASCII_TO_LOWER) { if (is_to_lower) {
ASSERT('A' <= src[i] && src[i] <= 'Z'); ASSERT('A' <= src[i] && src[i] <= 'Z');
ASSERT(dst[i] == src[i] + ('a' - 'A')); ASSERT(dst[i] == src[i] + ('a' - 'A'));
} else { } else {
ASSERT(dir == ASCII_TO_UPPER);
ASSERT('a' <= src[i] && src[i] <= 'z'); ASSERT('a' <= src[i] && src[i] <= 'z');
ASSERT(dst[i] == src[i] - ('a' - 'A')); ASSERT(dst[i] == src[i] - ('a' - 'A'));
} }
} }
ASSERT(expected_changed == changed); return (expected_changed == changed);
} }
#endif #endif
};
struct ToLowerTraits {
typedef unibrow::ToLowercase UnibrowConverter;
typedef FastAsciiConverter<ASCII_TO_LOWER> AsciiConverter;
};
struct ToUpperTraits {
typedef unibrow::ToUppercase UnibrowConverter;
typedef FastAsciiConverter<ASCII_TO_UPPER> AsciiConverter;
};
} // namespace } // namespace
template <typename ConvertTraits> template <class Converter>
MUST_USE_RESULT static MaybeObject* ConvertCase( MUST_USE_RESULT static MaybeObject* ConvertCase(
Arguments args, Arguments args,
Isolate* isolate, Isolate* isolate,
unibrow::Mapping<typename ConvertTraits::UnibrowConverter, 128>* mapping) { unibrow::Mapping<Converter, 128>* mapping) {
SealHandleScope shs(isolate); SealHandleScope shs(isolate);
CONVERT_ARG_CHECKED(String, s, 0); CONVERT_ARG_CHECKED(String, s, 0);
s = s->TryFlattenGetString(); s = s->TryFlattenGetString();
...@@ -6452,7 +6449,7 @@ MUST_USE_RESULT static MaybeObject* ConvertCase( ...@@ -6452,7 +6449,7 @@ MUST_USE_RESULT static MaybeObject* ConvertCase(
} }
SeqOneByteString* result = SeqOneByteString::cast(o); SeqOneByteString* result = SeqOneByteString::cast(o);
bool has_changed_character; bool has_changed_character;
bool is_ascii = ConvertTraits::AsciiConverter::Convert( bool is_ascii = FastAsciiConvert<Converter>(
reinterpret_cast<char*>(result->GetChars()), reinterpret_cast<char*>(result->GetChars()),
reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()), reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()),
length, length,
...@@ -6463,31 +6460,35 @@ MUST_USE_RESULT static MaybeObject* ConvertCase( ...@@ -6463,31 +6460,35 @@ MUST_USE_RESULT static MaybeObject* ConvertCase(
} }
} }
String::Encoding result_encoding = s->IsOneByteRepresentationUnderneath()
? String::ONE_BYTE_ENCODING : String::TWO_BYTE_ENCODING;
Object* answer; Object* answer;
{ MaybeObject* maybe_answer = { MaybeObject* maybe_answer = ConvertCaseHelper(
ConvertCaseHelper(isolate, s, length, length, mapping); isolate, s, result_encoding, length, length, mapping);
if (!maybe_answer->ToObject(&answer)) return maybe_answer; if (!maybe_answer->ToObject(&answer)) return maybe_answer;
} }
if (answer->IsSmi()) { if (answer->IsSmi()) {
// Retry with correct length. int new_length = Smi::cast(answer)->value();
{ MaybeObject* maybe_answer = if (new_length < 0) {
ConvertCaseHelper(isolate, result_encoding = String::TWO_BYTE_ENCODING;
s, Smi::cast(answer)->value(), length, mapping); new_length = -new_length;
if (!maybe_answer->ToObject(&answer)) return maybe_answer;
} }
MaybeObject* maybe_answer = ConvertCaseHelper(
isolate, s, result_encoding, new_length, length, mapping);
if (!maybe_answer->ToObject(&answer)) return maybe_answer;
} }
return answer; return answer;
} }
RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToLowerCase) { RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToLowerCase) {
return ConvertCase<ToLowerTraits>( return ConvertCase(
args, isolate, isolate->runtime_state()->to_lower_mapping()); args, isolate, isolate->runtime_state()->to_lower_mapping());
} }
RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToUpperCase) { RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToUpperCase) {
return ConvertCase<ToUpperTraits>( return ConvertCase(
args, isolate, isolate->runtime_state()->to_upper_mapping()); args, isolate, isolate->runtime_state()->to_upper_mapping());
} }
......
...@@ -235,6 +235,7 @@ struct ConnectorPunctuation { ...@@ -235,6 +235,7 @@ struct ConnectorPunctuation {
}; };
struct ToLowercase { struct ToLowercase {
static const int kMaxWidth = 3; static const int kMaxWidth = 3;
static const bool kIsToLower = true;
static int Convert(uchar c, static int Convert(uchar c,
uchar n, uchar n,
uchar* result, uchar* result,
...@@ -242,6 +243,7 @@ struct ToLowercase { ...@@ -242,6 +243,7 @@ struct ToLowercase {
}; };
struct ToUppercase { struct ToUppercase {
static const int kMaxWidth = 3; static const int kMaxWidth = 3;
static const bool kIsToLower = false;
static int Convert(uchar c, static int Convert(uchar c,
uchar n, uchar n,
uchar* result, uchar* result,
......
// Copyright 2013 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
assertEquals("\u0178", "\xff".toUpperCase());
assertEquals("abcdefghijklmn\xffopq",
("ABCDEFGHIJKL" + "MN\u0178OPQ").toLowerCase());
assertEquals("\xff", "\u0178".toLowerCase());
assertEquals("ABCDEFGHIJKLMN\u0178OPQ",
("abcdefghijk" + "lmn\xffopq").toUpperCase());
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment