Commit c295905c authored by Andy Wingo's avatar Andy Wingo Committed by V8 LUCI CQ

[strings] Re-introduce fast path for Utf8Decoder

Utf8Decoder used to use unibrow::Utf8::ValueOfIncremental, which had a
fast path to avoid the decoder for bytes less than 0x80 in the start
state.  We had to switch away from ValueOfIncremental but it's probably
a good idea to keep the fast path.

Bug: v8:12868
Change-Id: I7d83d67f2c13a1c4f026dde04ef0a69b7de47dc3
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3723498
Commit-Queue: Andy Wingo <wingo@igalia.com>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#81377}
parent 0257b0a3
......@@ -73,6 +73,16 @@ Utf8DecoderBase<Decoder>::Utf8DecoderBase(
const uint8_t* end = data.begin() + data.length();
while (cursor < end) {
if (V8_LIKELY(*cursor <= unibrow::Utf8::kMaxOneByteChar &&
state == Traits::DfaDecoder::kAccept)) {
DCHECK_EQ(0u, current);
DCHECK(!Traits::IsInvalidSurrogatePair(previous, *cursor));
previous = *cursor;
utf16_length_++;
cursor++;
continue;
}
auto previous_state = state;
Traits::DfaDecoder::Decode(*cursor, &state, &current);
if (state < Traits::DfaDecoder::kAccept) {
......@@ -132,6 +142,14 @@ void Utf8DecoderBase<Decoder>::Decode(Char* out,
const uint8_t* end = data.begin() + data.length();
while (cursor < end) {
if (V8_LIKELY(*cursor <= unibrow::Utf8::kMaxOneByteChar &&
state == Traits::DfaDecoder::kAccept)) {
DCHECK_EQ(0u, current);
*(out++) = static_cast<Char>(*cursor);
cursor++;
continue;
}
auto previous_state = state;
Traits::DfaDecoder::Decode(*cursor, &state, &current);
if (Traits::kAllowIncompleteSequences &&
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment