Commit a609cf61 authored by ager@chromium.org's avatar ager@chromium.org

Avoid decoding overhead when allocating ascii strings.

The assumption is that most utf8 strings allocated are actually ascii
and that if they are not we will encounter a non-ascii char pretty
quickly.

Review URL: http://codereview.chromium.org/6072004

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@6099 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent bc5fb1a1
......@@ -40,6 +40,21 @@ int Heap::MaxObjectSizeInPagedSpace() {
}
MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
PretenureFlag pretenure) {
// Check for ASCII first since this is the common case.
for (int i = 0; i < str.length(); ++i) {
if (static_cast<uint8_t>(str[i]) > String::kMaxAsciiCharCodeU) {
// Non-ASCII and we need to decode.
return AllocateStringFromUtf8Slow(str, pretenure);
}
}
// If the string is ASCII, we do not need to convert the characters
// since UTF8 is backwards compatible with ASCII.
return AllocateStringFromAscii(str, pretenure);
}
MaybeObject* Heap::AllocateSymbol(Vector<const char> str,
int chars,
uint32_t hash_field) {
......
......@@ -3307,8 +3307,8 @@ MaybeObject* Heap::AllocateStringFromAscii(Vector<const char> string,
}
MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> string,
PretenureFlag pretenure) {
MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
PretenureFlag pretenure) {
// V8 only supports characters in the Basic Multilingual Plane.
const uc32 kMaxSupportedChar = 0xFFFF;
// Count the number of characters in the UTF-8 string and check if
......@@ -3317,17 +3317,11 @@ MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> string,
decoder(ScannerConstants::utf8_decoder());
decoder->Reset(string.start(), string.length());
int chars = 0;
bool is_ascii = true;
while (decoder->has_more()) {
uc32 r = decoder->GetNext();
if (r > String::kMaxAsciiCharCode) is_ascii = false;
decoder->GetNext();
chars++;
}
// If the string is ascii, we do not need to convert the characters
// since UTF8 is backwards compatible with ascii.
if (is_ascii) return AllocateStringFromAscii(string, pretenure);
Object* result;
{ MaybeObject* maybe_result = AllocateRawTwoByteString(chars, pretenure);
if (!maybe_result->ToObject(&result)) return maybe_result;
......
......@@ -412,7 +412,10 @@ class Heap : public AllStatic {
MUST_USE_RESULT static MaybeObject* AllocateStringFromAscii(
Vector<const char> str,
PretenureFlag pretenure = NOT_TENURED);
MUST_USE_RESULT static MaybeObject* AllocateStringFromUtf8(
MUST_USE_RESULT static inline MaybeObject* AllocateStringFromUtf8(
Vector<const char> str,
PretenureFlag pretenure = NOT_TENURED);
MUST_USE_RESULT static MaybeObject* AllocateStringFromUtf8Slow(
Vector<const char> str,
PretenureFlag pretenure = NOT_TENURED);
MUST_USE_RESULT static MaybeObject* AllocateStringFromTwoByte(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment