Commit 9f21cab8 authored by Marja Hölttä's avatar Marja Hölttä Committed by Commit Bot

Revert "Reland#2 [parser] Refactor streaming scanner streams."

This reverts commit de9269f3.

Something's still wrong in the encoding handling (see bug).

Bug: chromium:763106
Cq-Include-Trybots: master.tryserver.chromium.linux:linux_chromium_rel_ng
Change-Id: Icd19dd42b84b9d090e191375a2942b9941110bcf
Reviewed-on: https://chromium-review.googlesource.com/657386
Commit-Queue: Marja Hölttä <marja@chromium.org>
Reviewed-by: 's avatarDaniel Vogelheim <vogelheim@chromium.org>
Cr-Commit-Position: refs/heads/master@{#47924}
parent 9b218658
......@@ -1358,6 +1358,11 @@ class V8_EXPORT ScriptCompiler {
* length of the data returned. When the data ends, GetMoreData should
* return 0. Caller takes ownership of the data.
*
* When streaming UTF-8 data, V8 handles multi-byte characters split between
* two data chunks, but doesn't handle multi-byte characters split between
* more than two data chunks. The embedder can avoid this problem by always
* returning at least 2 bytes of data.
*
* If the embedder wants to cancel the streaming, they should make the next
* GetMoreData call return 0. V8 will interpret it as end of data (and most
* probably, parsing will fail). The streaming task will return as soon as
......
This diff is collapsed.
......@@ -39,13 +39,6 @@ class ChunkSource : public v8::ScriptCompiler::ExternalSourceStream {
}
chunks_.push_back({nullptr, 0});
}
ChunkSource(const uint8_t* data, size_t len, size_t chunk_size)
: current_(0) {
for (size_t i = 0; i < len; i += chunk_size) {
chunks_.push_back({data + i, i::Min(chunk_size, len - i)});
}
chunks_.push_back({nullptr, 0});
}
~ChunkSource() {}
bool SetBookmark() override { return false; }
void ResetToBookmark() override {}
......@@ -445,18 +438,6 @@ TEST(CharacterStreams) {
TestCharacterStreams(buffer, arraysize(buffer) - 1, 576, 3298);
}
TEST(Uft8MultipleBOMChunks) {
const char* chunks = "\xef\xbb\xbf\0\xef\xbb\xbf\0\xef\xbb\xbf\0a\0";
const uint16_t unicode[] = {0xFEFF, 0xFEFF, 97};
ChunkSource chunk_source(chunks);
std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
for (size_t i = 0; i < arraysize(unicode); i++) {
CHECK_EQ(unicode[i], stream->Advance());
}
CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
}
// Regression test for crbug.com/651333. Read invalid utf-8.
TEST(Regress651333) {
const uint8_t bytes[] =
......@@ -498,17 +479,10 @@ TEST(Regress6377) {
"\x80" // second chunk - one-byte end of 4-byte seq
"a\xc3\0" // and an 'a' + start of 2-byte seq
"\xbf\0", // third chunk - end of 2-byte seq
// Regression test for
// https://bugs.chromium.org/p/chromium/issues/detail?id=758508
"X\xef\xbb\xbfX\0", // first chunk - no BOM but U+feff in the middle
};
const std::vector<std::vector<uint16_t>> unicode = {
{0xd800, 0xdc00, 97},
{0xfff, 97},
{0xff, 97},
{0xd800, 0xdc00, 97, 0xff},
{88, 0xfeff, 88}};
{0xd800, 0xdc00, 97}, {0xfff, 97}, {0xff, 97}, {0xd800, 0xdc00, 97, 0xff},
};
CHECK_EQ(unicode.size(), sizeof(cases) / sizeof(cases[0]));
for (size_t c = 0; c < unicode.size(); ++c) {
ChunkSource chunk_source(cases[c]);
......@@ -525,36 +499,3 @@ TEST(Regress6377) {
CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
}
}
TEST(Regress724166) {
// Chunk size has to be multiple of kBufferCharacterSize
constexpr size_t kBufferCharacterSize = 512;
constexpr size_t kChunkSize = kBufferCharacterSize * 8;
constexpr size_t kChunks = 4;
uint8_t buffer[kChunkSize * kChunks];
for (size_t j = 0; j < kChunks; ++j) {
for (size_t i = 0; i < kChunkSize; ++i) {
buffer[kChunkSize * j + i] = (i % 0x7e) + 1;
}
}
// Add BOM at the beginning
buffer[0] = '\xef';
buffer[1] = '\xbb';
buffer[2] = '\xbf';
ChunkSource chunk_source(buffer, arraysize(buffer), kChunkSize);
std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
for (size_t i = 0; i < arraysize(buffer) - 3; ++i) {
CHECK_EQ(static_cast<i::uc32>(buffer[i + 3]), stream->Advance());
}
CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
for (int z = -8; z < 8; ++z) {
for (size_t j = kBufferCharacterSize + z; j < arraysize(buffer);
j += kBufferCharacterSize) {
stream->Seek(j);
for (size_t i = j; i < arraysize(buffer) - 3; ++i) {
CHECK_EQ(static_cast<i::uc32>(buffer[i + 3]), stream->Advance());
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment