Commit de9269f3 authored by Wiktor Garbacz's avatar Wiktor Garbacz Committed by Commit Bot

Reland#2 [parser] Refactor streaming scanner streams.

Unify, simplify logic, reduce UTF8 specific handling.

Intend of this is also to have stream views.
Stream views can be used concurrently by multiple threads, but
only one thread may fetch new data from the underlying source.
This together with unified stream view creation is intended to be
used for parse tasks.

BUG=v8:6093

Change-Id: I83c6f1e6ad280c28da690da41c466dfcbb7915e6
Reviewed-on: https://chromium-review.googlesource.com/535474Reviewed-by: 's avatarDaniel Vogelheim <vogelheim@chromium.org>
Reviewed-by: 's avatarMarja Hölttä <marja@chromium.org>
Commit-Queue: Marja Hölttä <marja@chromium.org>
Cr-Commit-Position: refs/heads/master@{#45994}
parent d6c9e534
......@@ -1281,11 +1281,6 @@ class V8_EXPORT ScriptCompiler {
* length of the data returned. When the data ends, GetMoreData should
* return 0. Caller takes ownership of the data.
*
* When streaming UTF-8 data, V8 handles multi-byte characters split between
* two data chunks, but doesn't handle multi-byte characters split between
* more than two data chunks. The embedder can avoid this problem by always
* returning at least 2 bytes of data.
*
* If the embedder wants to cancel the streaming, they should make the next
* GetMoreData call return 0. V8 will interpret it as end of data (and most
* probably, parsing will fail). The streaming task will return as soon as
......
This diff is collapsed.
......@@ -39,6 +39,13 @@ class ChunkSource : public v8::ScriptCompiler::ExternalSourceStream {
}
chunks_.push_back({nullptr, 0});
}
ChunkSource(const uint8_t* data, size_t len, size_t chunk_size)
: current_(0) {
for (size_t i = 0; i < len; i += chunk_size) {
chunks_.push_back({data + i, i::Min(chunk_size, len - i)});
}
chunks_.push_back({nullptr, 0});
}
~ChunkSource() {}
bool SetBookmark() override { return false; }
void ResetToBookmark() override {}
......@@ -438,6 +445,18 @@ TEST(CharacterStreams) {
TestCharacterStreams(buffer, arraysize(buffer) - 1, 576, 3298);
}
TEST(Uft8MultipleBOMChunks) {
const char* chunks = "\xef\xbb\xbf\0\xef\xbb\xbf\0\xef\xbb\xbf\0a\0";
const uint16_t unicode[] = {0xFEFF, 0xFEFF, 97};
ChunkSource chunk_source(chunks);
std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
for (size_t i = 0; i < arraysize(unicode); i++) {
CHECK_EQ(unicode[i], stream->Advance());
}
CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
}
// Regression test for crbug.com/651333. Read invalid utf-8.
TEST(Regress651333) {
const uint8_t bytes[] =
......@@ -499,3 +518,36 @@ TEST(Regress6377) {
CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
}
}
TEST(Regress724166) {
// Chunk size has to be multiple of kBufferCharacterSize
constexpr size_t kBufferCharacterSize = 512;
constexpr size_t kChunkSize = kBufferCharacterSize * 8;
constexpr size_t kChunks = 4;
uint8_t buffer[kChunkSize * kChunks];
for (size_t j = 0; j < kChunks; ++j) {
for (size_t i = 0; i < kChunkSize; ++i) {
buffer[kChunkSize * j + i] = (i % 0x7e) + 1;
}
}
// Add BOM at the beginning
buffer[0] = '\xef';
buffer[1] = '\xbb';
buffer[2] = '\xbf';
ChunkSource chunk_source(buffer, arraysize(buffer), kChunkSize);
std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
for (size_t i = 0; i < arraysize(buffer) - 3; ++i) {
CHECK_EQ(static_cast<i::uc32>(buffer[i + 3]), stream->Advance());
}
CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
for (int z = -8; z < 8; ++z) {
for (size_t j = kBufferCharacterSize + z; j < arraysize(buffer);
j += kBufferCharacterSize) {
stream->Seek(j);
for (size_t i = j; i < arraysize(buffer) - 3; ++i) {
CHECK_EQ(static_cast<i::uc32>(buffer[i + 3]), stream->Advance());
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment