Commit 4f5dcd4b authored by Wiktor Garbacz's avatar Wiktor Garbacz Committed by Commit Bot

[parser] Fix chunked utf8 stream handling.

BUG=v8:6377

Change-Id: I5bdd41bdda83d7efe4b37d24d44e2e8c2339a30a
Reviewed-on: https://chromium-review.googlesource.com/500168
Commit-Queue: Wiktor Garbacz <wiktorg@google.com>
Reviewed-by: 's avatarDaniel Vogelheim <vogelheim@chromium.org>
Cr-Commit-Position: refs/heads/master@{#45204}
parent 63c5dd5d
......@@ -387,8 +387,10 @@ void Utf8ExternalStreamingStream::SearchPosition(size_t position) {
// checking whether the # bytes in a chunk are equal to the # chars, and if
// so avoid the expensive SkipToPosition.)
bool ascii_only_chunk =
chunks_[chunk_no].start.incomplete_char ==
unibrow::Utf8::Utf8IncrementalBuffer(0) &&
(chunks_[chunk_no + 1].start.bytes - chunks_[chunk_no].start.bytes) ==
(chunks_[chunk_no + 1].start.chars - chunks_[chunk_no].start.chars);
(chunks_[chunk_no + 1].start.chars - chunks_[chunk_no].start.chars);
if (ascii_only_chunk) {
size_t skip = position - chunks_[chunk_no].start.chars;
current_ = {chunk_no,
......
......@@ -22,6 +22,13 @@ class ChunkSource : public v8::ScriptCompiler::ExternalSourceStream {
chunks++;
} while (chunks_.back().len > 0);
}
explicit ChunkSource(const char* chunks) : current_(0) {
do {
chunks_.push_back(
{reinterpret_cast<const uint8_t*>(chunks), strlen(chunks)});
chunks += strlen(chunks) + 1;
} while (chunks_.back().len > 0);
}
ChunkSource(const uint8_t* data, size_t len, bool extra_chunky)
: current_(0) {
// If extra_chunky, we'll use increasingly large chunk sizes.
......@@ -450,3 +457,42 @@ TEST(Regress651333) {
CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
}
}
TEST(Regress6377) {
const char* cases[] = {
"\xf0\x90\0" // first chunk - start of 4-byte seq
"\x80\x80" // second chunk - end of 4-byte seq
"a\0", // and an 'a'
"\xe0\xbf\0" // first chunk - start of 3-byte seq
"\xbf" // second chunk - one-byte end of 3-byte seq
"a\0", // and an 'a'
"\xc3\0" // first chunk - start of 2-byte seq
"\xbf" // second chunk - end of 2-byte seq
"a\0", // and an 'a'
"\xf0\x90\x80\0" // first chunk - start of 4-byte seq
"\x80" // second chunk - one-byte end of 4-byte seq
"a\xc3\0" // and an 'a' + start of 2-byte seq
"\xbf\0", // third chunk - end of 2-byte seq
};
const std::vector<std::vector<uint16_t>> unicode = {
{0xd800, 0xdc00, 97}, {0xfff, 97}, {0xff, 97}, {0xd800, 0xdc00, 97, 0xff},
};
CHECK_EQ(unicode.size(), sizeof(cases) / sizeof(cases[0]));
for (size_t c = 0; c < unicode.size(); ++c) {
ChunkSource chunk_source(cases[c]);
std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
for (size_t i = 0; i < unicode[c].size(); i++) {
CHECK_EQ(unicode[c][i], stream->Advance());
}
CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
stream->Seek(0);
for (size_t i = 0; i < unicode[c].size(); i++) {
CHECK_EQ(unicode[c][i], stream->Advance());
}
CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment