Commit 25f84a48 authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Speed up WriteUtf8 in the case where the output buffer is large enough.

Review URL: https://chromiumcodereview.appspot.com/9696032

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11104 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 2e9b5092
......@@ -3694,6 +3694,94 @@ int String::Utf8Length() const {
}
// Will fail with a negative answer if the recursion depth is too high.
static int RecursivelySerializeToUtf8(i::String* string,
char* buffer,
int start,
int end,
int recursion_budget,
int32_t previous_character,
int32_t* last_character) {
int utf8_bytes = 0;
while (true) {
if (string->IsAsciiRepresentation()) {
i::String::WriteToFlat(string, buffer, start, end);
*last_character = unibrow::Utf16::kNoPreviousCharacter;
return utf8_bytes + end - start;
}
switch (i::StringShape(string).representation_tag()) {
case i::kExternalStringTag: {
const uint16_t* data = i::ExternalTwoByteString::cast(string)->
ExternalTwoByteStringGetData(0);
char* current = buffer;
for (int i = start; i < end; i++) {
uint16_t character = data[i];
current +=
unibrow::Utf8::Encode(current, character, previous_character);
previous_character = character;
}
*last_character = previous_character;
return utf8_bytes + current - buffer;
}
case i::kSeqStringTag: {
const uint16_t* data =
i::SeqTwoByteString::cast(string)->SeqTwoByteStringGetData(0);
char* current = buffer;
for (int i = start; i < end; i++) {
uint16_t character = data[i];
current +=
unibrow::Utf8::Encode(current, character, previous_character);
previous_character = character;
}
*last_character = previous_character;
return utf8_bytes + current - buffer;
}
case i::kSlicedStringTag: {
i::SlicedString* slice = i::SlicedString::cast(string);
unsigned offset = slice->offset();
string = slice->parent();
start += offset;
end += offset;
continue;
}
case i::kConsStringTag: {
i::ConsString* cons_string = i::ConsString::cast(string);
i::String* first = cons_string->first();
int boundary = first->length();
if (start >= boundary) {
// Only need RHS.
string = cons_string->second();
start -= boundary;
end -= boundary;
continue;
} else if (end <= boundary) {
// Only need LHS.
string = first;
} else {
if (recursion_budget == 0) return -1;
int extra_utf8_bytes =
RecursivelySerializeToUtf8(first,
buffer,
start,
boundary,
recursion_budget - 1,
previous_character,
&previous_character);
if (extra_utf8_bytes < 0) return extra_utf8_bytes;
buffer += extra_utf8_bytes;
utf8_bytes += extra_utf8_bytes;
string = cons_string->second();
start = 0;
end -= boundary;
}
}
}
}
UNREACHABLE();
return 0;
}
bool String::MayContainNonAscii() const {
i::Handle<i::String> str = Utils::OpenHandle(this);
if (IsDeadCheck(str->GetIsolate(), "v8::String::MayContainNonAscii()")) {
......@@ -3712,11 +3800,12 @@ int String::WriteUtf8(char* buffer,
LOG_API(isolate, "String::WriteUtf8");
ENTER_V8(isolate);
i::Handle<i::String> str = Utils::OpenHandle(this);
int string_length = str->length();
if (str->IsAsciiRepresentation()) {
int len;
if (capacity == -1) {
capacity = str->length() + 1;
len = str->length();
len = string_length;
} else {
len = i::Min(capacity, str->length());
}
......@@ -3729,6 +3818,42 @@ int String::WriteUtf8(char* buffer,
return len;
}
if (capacity == -1 || capacity >= string_length * 3) {
int32_t previous = unibrow::Utf16::kNoPreviousCharacter;
const int kMaxRecursion = 100;
int utf8_bytes =
RecursivelySerializeToUtf8(*str,
buffer,
0,
string_length,
kMaxRecursion,
previous,
&previous);
if (utf8_bytes >= 0) {
// Success serializing with recursion.
if ((options & NO_NULL_TERMINATION) == 0 &&
(capacity > utf8_bytes || capacity == -1)) {
buffer[utf8_bytes++] = '\0';
}
if (nchars_ref != NULL) *nchars_ref = string_length;
return utf8_bytes;
}
FlattenString(str);
// Recurse once. This time around the string is flat and the serializing
// with recursion will certainly succeed.
return WriteUtf8(buffer, capacity, nchars_ref, options);
} else if (capacity >= string_length) {
// First check that the buffer is large enough. If it is, then recurse
// once without a capacity limit, which will get into the other branch of
// this 'if'.
int utf8_bytes = i::Utf8Length(str);
if ((options & NO_NULL_TERMINATION) == 0) utf8_bytes++;
if (utf8_bytes <= capacity) {
return WriteUtf8(buffer, -1, nchars_ref, options);
}
}
// Slow case.
i::StringInputBuffer& write_input_buffer = *isolate->write_input_buffer();
isolate->string_tracker()->RecordWrite(str);
if (options & HINT_MANY_WRITES_EXPECTED) {
......
......@@ -5870,6 +5870,7 @@ THREADED_TEST(Utf16) {
"p.push(String.fromCharCode(0xdc00));"
"var a = [];"
"var b = [];"
"var c = [];"
"var alens = [];"
"for (var i = 0; i < 3; i++) {"
" p[1] = String.fromCharCode(lead++);"
......@@ -5877,17 +5878,21 @@ THREADED_TEST(Utf16) {
" p[2] = String.fromCharCode(trail++);"
" a.push(p[i] + p[j]);"
" b.push(p[i] + p[j]);"
" c.push(p[i] + p[j]);"
" alens.push(plens[i] + plens[j]);"
" }"
"}"
"alens[5] -= 2;" // Here the surrogate pairs match up.
"var a2 = [];"
"var b2 = [];"
"var c2 = [];"
"var a2lens = [];"
"for (var m = 0; m < 9; m++) {"
" for (var n = 0; n < 9; n++) {"
" a2.push(a[m] + a[n]);"
" b2.push(b[m] + b[n]);"
" var newc = 'x' + c[m] + c[n] + 'y';"
" c2.push(newc.substring(1, newc.length - 1));"
" var utf = alens[m] + alens[n];" // And here.
// The 'n's that start with 0xdc.. are 6-8
// The 'm's that end with 0xd8.. are 1, 4 and 7
......@@ -5899,6 +5904,7 @@ THREADED_TEST(Utf16) {
Utf16Helper(context, "a2", "a2lens", 81);
WriteUtf8Helper(context, "b", "alens", 9);
WriteUtf8Helper(context, "b2", "a2lens", 81);
WriteUtf8Helper(context, "c2", "a2lens", 81);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment