Commit 699badd5 authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[scanner] Add fast path for converting utf8 that's ascii to utf16

Change-Id: I386261bc012d0d4556b0f9417b2880cc7f44bf2c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1647697
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62029}
parent 72b9d704
...@@ -590,7 +590,8 @@ void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() { ...@@ -590,7 +590,8 @@ void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() {
} }
} }
while (cursor < end && output_cursor + 1 < buffer_start_ + kBufferSize) { const uint16_t* max_buffer_end = buffer_start_ + kBufferSize;
while (cursor < end && output_cursor + 1 < max_buffer_end) {
unibrow::uchar t = unibrow::uchar t =
unibrow::Utf8::ValueOfIncremental(&cursor, &state, &incomplete_char); unibrow::Utf8::ValueOfIncremental(&cursor, &state, &incomplete_char);
if (V8_LIKELY(t <= unibrow::Utf16::kMaxNonSurrogateCharCode)) { if (V8_LIKELY(t <= unibrow::Utf16::kMaxNonSurrogateCharCode)) {
...@@ -601,6 +602,18 @@ void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() { ...@@ -601,6 +602,18 @@ void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() {
*(output_cursor++) = unibrow::Utf16::LeadSurrogate(t); *(output_cursor++) = unibrow::Utf16::LeadSurrogate(t);
*(output_cursor++) = unibrow::Utf16::TrailSurrogate(t); *(output_cursor++) = unibrow::Utf16::TrailSurrogate(t);
} }
// Fast path for ascii sequences.
size_t remaining = end - cursor;
size_t max_buffer = max_buffer_end - output_cursor;
int max_length = static_cast<int>(Min(remaining, max_buffer));
DCHECK_EQ(state, unibrow::Utf8::State::kAccept);
const uint8_t* read_end = cursor + max_length;
for (; cursor < read_end; cursor++) {
uint8_t c = *cursor;
DCHECK_EQ(unibrow::Utf8::kMaxOneByteChar, 0x7F);
if (c > unibrow::Utf8::kMaxOneByteChar) break;
*(output_cursor++) = c;
}
} }
current_.pos.bytes = chunk.start.bytes + (cursor - chunk.data); current_.pos.bytes = chunk.start.bytes + (cursor - chunk.data);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment