Commit c7ad1ddd authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[scanner] Drop lonely byte support as it's unused by blink anyway.

The embedder should ultimately be responsible for handling this since they
anyway give us a copy of the data. They can easily make sure that the chunks we
get do not have lonely bytes.

Cq-Include-Trybots: luci.chromium.try:linux_chromium_rel_ng
Change-Id: Ie862107bbbdd00c4d904fbb457a206c2fd52e5d0
Reviewed-on: https://chromium-review.googlesource.com/1127044Reviewed-by: 's avatarUlan Degenbaev <ulan@chromium.org>
Reviewed-by: 's avatarMarja Hölttä <marja@chromium.org>
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Cr-Commit-Position: refs/heads/master@{#54262}
parent bfeb78a7
...@@ -1480,6 +1480,10 @@ class V8_EXPORT ScriptCompiler { ...@@ -1480,6 +1480,10 @@ class V8_EXPORT ScriptCompiler {
* more than two data chunks. The embedder can avoid this problem by always * more than two data chunks. The embedder can avoid this problem by always
* returning at least 2 bytes of data. * returning at least 2 bytes of data.
* *
* When streaming UTF-16 data, V8 does not handle characters split between
* two data chunks. The embedder has to make sure that chunks have an even
* length.
*
* If the embedder wants to cancel the streaming, they should make the next * If the embedder wants to cancel the streaming, they should make the next
* GetMoreData call return 0. V8 will interpret it as end of data (and most * GetMoreData call return 0. V8 will interpret it as end of data (and most
* probably, parsing will fail). The streaming task will return as soon as * probably, parsing will fail). The streaming task will return as soon as
......
...@@ -38,9 +38,8 @@ struct Range { ...@@ -38,9 +38,8 @@ struct Range {
const Char* end; const Char* end;
size_t length() { return static_cast<size_t>(end - start); } size_t length() { return static_cast<size_t>(end - start); }
bool empty() const { return start == end; }
bool unaligned_start() const { bool unaligned_start() const {
return reinterpret_cast<intptr_t>(start) % 2 == 1; return reinterpret_cast<intptr_t>(start) % sizeof(Char) == 1;
} }
}; };
...@@ -95,102 +94,47 @@ class ChunkedStream { ...@@ -95,102 +94,47 @@ class ChunkedStream {
Range<Char> GetDataAt(size_t pos) { Range<Char> GetDataAt(size_t pos) {
Chunk chunk = FindChunk(pos); Chunk chunk = FindChunk(pos);
size_t buffer_end = chunk.length(); size_t buffer_end = chunk.length;
size_t buffer_pos = Min(buffer_end, pos - chunk.position); size_t buffer_pos = Min(buffer_end, pos - chunk.position);
return {&chunk.data()[buffer_pos], &chunk.data()[buffer_end]}; return {&chunk.data[buffer_pos], &chunk.data[buffer_end]};
} }
~ChunkedStream() { ~ChunkedStream() {
for (size_t i = 0; i < chunks_.size(); i++) { for (size_t i = 0; i < chunks_.size(); i++) {
delete[] chunks_[i].raw_data; delete[] chunks_[i].data;
} }
} }
static const bool kCanAccessHeap = false; static const bool kCanAccessHeap = false;
private: private:
// A single chunk of Chars. There may be a lonely bytes at the start and end
// in case sizeof(Char) > 1. They just need to be ignored since additional
// chunks are added by FetchChunk that contain the full character.
// TODO(verwaest): Make sure that those characters are added by blink instead
// so we can get rid of this complexity here.
struct Chunk { struct Chunk {
// A raw chunk of Chars possibly including a lonely start and/or a lonely const Char* const data;
// end byte. // The logical position of data.
const uint8_t* const raw_data;
// The logical position of data() (possibly skipping a lonely start byte).
const size_t position; const size_t position;
// The length of the raw_data. const size_t length;
const size_t raw_length : sizeof(size_t) * 8 - 1; size_t end_position() const { return position + length; }
// Tells us whether the first byte of raw_data is a lonely start byte and
// should be skipped because it's combined with a lonely end byte from the
// previous chunk.
const bool lonely_start : 1;
size_t end_position() const { return position + length(); }
// The chunk includes a lonely end byte if the chunk is 2-byte but has an
// uneven number of chars (possibly ignoring a lonely start byte that is
// merged with the lonely end byte of the previous chunk).
bool lonely_end() const {
return (raw_length - lonely_start) % sizeof(Char) == 1;
}
uint8_t lonely_end_byte() const {
DCHECK(lonely_end());
return raw_data[raw_length - 1];
}
size_t length() const {
return (raw_length - lonely_start) >> (sizeof(Char) - 1);
}
bool has_chars() const { return raw_length - lonely_start > 0; }
const Char* data() const {
return reinterpret_cast<const Char*>(raw_data + lonely_start);
}
}; };
Chunk FindChunk(size_t position) { Chunk FindChunk(size_t position) {
if (chunks_.empty()) FetchFirstChunk(); if (chunks_.empty()) FetchChunk(size_t{0});
// Walk forwards while the position is in front of the current chunk.. // Walk forwards while the position is in front of the current chunk.
if (chunks_.back().position <= position) { while (position >= chunks_.back().end_position() &&
while (position >= chunks_.back().end_position() && chunks_.back().length > 0) {
chunks_.back().has_chars()) { FetchChunk(chunks_.back().end_position());
FetchChunk();
}
// Return if the final chunk's starting position is before the position.
if (chunks_.back().position <= position) return chunks_.back();
// Otherwise walk backwards to find the intermediate chunk added to
// support lonely bytes.
// TODO(verwaest): Remove once we don't need to support lonely bytes here
// anymore.
} }
// Walk backwards. // Walk backwards.
for (auto reverse_it = chunks_.rbegin() + 1; reverse_it != chunks_.rend(); for (auto reverse_it = chunks_.rbegin(); reverse_it != chunks_.rend();
++reverse_it) { ++reverse_it) {
if (reverse_it->position <= position) return *reverse_it; if (reverse_it->position <= position) return *reverse_it;
} }
UNREACHABLE();
}
void FetchFirstChunk() { UNREACHABLE();
const uint8_t* data = nullptr;
size_t length;
{
RuntimeCallTimerScope scope(stats_,
RuntimeCallCounterId::kGetMoreDataCallback);
length = source_->GetMoreData(&data);
}
chunks_.push_back({data, 0, length, false});
} }
void FetchChunk() { void FetchChunk(size_t position) {
DCHECK(!chunks_.empty());
const uint8_t* data = nullptr; const uint8_t* data = nullptr;
size_t length; size_t length;
{ {
...@@ -198,22 +142,10 @@ class ChunkedStream { ...@@ -198,22 +142,10 @@ class ChunkedStream {
RuntimeCallCounterId::kGetMoreDataCallback); RuntimeCallCounterId::kGetMoreDataCallback);
length = source_->GetMoreData(&data); length = source_->GetMoreData(&data);
} }
// Incoming data has to be aligned to Char size.
const Chunk& last_chunk = chunks_.back(); DCHECK_EQ(0, length % sizeof(Char));
bool lonely_start = last_chunk.lonely_end(); chunks_.push_back(
DCHECK(last_chunk.has_chars()); {reinterpret_cast<const Char*>(data), position, length / sizeof(Char)});
size_t position = last_chunk.end_position();
if (lonely_start) {
uint8_t* intermediate = NewArray<uint8_t>(2);
intermediate[0] = last_chunk.lonely_end_byte();
intermediate[1] = length == 0 ? 0 : data[0];
chunks_.push_back({intermediate, position, 2, false});
position += 1;
}
chunks_.push_back({data, position, length, lonely_start});
} }
std::vector<struct Chunk> chunks_; std::vector<struct Chunk> chunks_;
...@@ -240,7 +172,7 @@ class BufferedCharacterStream : public Utf16CharacterStream { ...@@ -240,7 +172,7 @@ class BufferedCharacterStream : public Utf16CharacterStream {
buffer_cursor_ = buffer_start_; buffer_cursor_ = buffer_start_;
Range<Char> range = byte_stream_.GetDataAt(position); Range<Char> range = byte_stream_.GetDataAt(position);
if (range.empty()) { if (range.length() == 0) {
buffer_end_ = buffer_start_; buffer_end_ = buffer_start_;
return false; return false;
} }
...@@ -261,10 +193,8 @@ class BufferedCharacterStream : public Utf16CharacterStream { ...@@ -261,10 +193,8 @@ class BufferedCharacterStream : public Utf16CharacterStream {
ByteStream<Char> byte_stream_; ByteStream<Char> byte_stream_;
}; };
// Provides a (partially) unbuffered utf-16 view on the bytes from the // Provides a unbuffered utf-16 view on the bytes from the underlying
// underlying ByteStream. It is only partially unbuffered when running on MIPS // ByteStream.
// due to lonely start bytes making chunks unaligned. In that case, unaligned
// chars in a chunk (due to lonely start) are locally buffered.
template <template <typename T> class ByteStream> template <template <typename T> class ByteStream>
class UnbufferedCharacterStream : public Utf16CharacterStream { class UnbufferedCharacterStream : public Utf16CharacterStream {
public: public:
...@@ -282,20 +212,9 @@ class UnbufferedCharacterStream : public Utf16CharacterStream { ...@@ -282,20 +212,9 @@ class UnbufferedCharacterStream : public Utf16CharacterStream {
buffer_start_ = range.start; buffer_start_ = range.start;
buffer_end_ = range.end; buffer_end_ = range.end;
buffer_cursor_ = buffer_start_; buffer_cursor_ = buffer_start_;
if (range.empty()) return false; if (range.length() == 0) return false;
// TODO(verwaest): Make sure that this cannot happen by dealing with lonely DCHECK(!range.unaligned_start());
// bytes on the blink side.
#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
// Buffer anyway in case the chunk is unaligned due to a lonely start.
if (range.unaligned_start()) {
size_t length = Min(kBufferSize, range.length());
i::CopyCharsUnsigned(buffer_, buffer_start_, length);
buffer_start_ = &buffer_[0];
buffer_cursor_ = buffer_start_;
buffer_end_ = &buffer_[length];
}
#endif
DCHECK_LE(buffer_start_, buffer_end_); DCHECK_LE(buffer_start_, buffer_end_);
return true; return true;
} }
...@@ -303,10 +222,6 @@ class UnbufferedCharacterStream : public Utf16CharacterStream { ...@@ -303,10 +222,6 @@ class UnbufferedCharacterStream : public Utf16CharacterStream {
bool can_access_heap() override { return false; } bool can_access_heap() override { return false; }
private: private:
#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
static const size_t kBufferSize = 512;
uc16 buffer_[kBufferSize];
#endif
ByteStream<uint16_t> byte_stream_; ByteStream<uint16_t> byte_stream_;
}; };
......
...@@ -28,12 +28,14 @@ class ChunkSource : public v8::ScriptCompiler::ExternalSourceStream { ...@@ -28,12 +28,14 @@ class ChunkSource : public v8::ScriptCompiler::ExternalSourceStream {
chunks += strlen(chunks) + 1; chunks += strlen(chunks) + 1;
} while (chunks_.back().len > 0); } while (chunks_.back().len > 0);
} }
ChunkSource(const uint8_t* data, size_t len, bool extra_chunky) ChunkSource(const uint8_t* data, size_t char_size, size_t len,
bool extra_chunky)
: current_(0) { : current_(0) {
// If extra_chunky, we'll use increasingly large chunk sizes. // If extra_chunky, we'll use increasingly large chunk sizes. If not, we'll
// If not, we'll have a single chunk of full length. // have a single chunk of full length. Make sure that chunks are always
size_t chunk_size = extra_chunky ? 1 : len; // aligned to char-size though.
for (size_t i = 0; i < len; i += chunk_size, chunk_size++) { size_t chunk_size = extra_chunky ? char_size : len;
for (size_t i = 0; i < len; i += chunk_size, chunk_size += char_size) {
chunks_.push_back({data + i, i::Min(chunk_size, len - i)}); chunks_.push_back({data + i, i::Min(chunk_size, len - i)});
} }
chunks_.push_back({nullptr, 0}); chunks_.push_back({nullptr, 0});
...@@ -371,7 +373,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length, ...@@ -371,7 +373,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
const uint8_t* data = one_byte_vector.begin(); const uint8_t* data = one_byte_vector.begin();
const uint8_t* data_end = one_byte_vector.end(); const uint8_t* data_end = one_byte_vector.end();
ChunkSource single_chunk(data, data_end - data, false); ChunkSource single_chunk(data, 1, data_end - data, false);
std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream( std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream(
i::ScannerStream::For(&single_chunk, i::ScannerStream::For(&single_chunk,
v8::ScriptCompiler::StreamedSource::ONE_BYTE, v8::ScriptCompiler::StreamedSource::ONE_BYTE,
...@@ -379,7 +381,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length, ...@@ -379,7 +381,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(), TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
length, start, end); length, start, end);
ChunkSource many_chunks(data, data_end - data, true); ChunkSource many_chunks(data, 1, data_end - data, true);
one_byte_streaming_stream.reset(i::ScannerStream::For( one_byte_streaming_stream.reset(i::ScannerStream::For(
&many_chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE, nullptr)); &many_chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE, nullptr));
TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(), TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
...@@ -390,14 +392,14 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length, ...@@ -390,14 +392,14 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
{ {
const uint8_t* data = one_byte_vector.begin(); const uint8_t* data = one_byte_vector.begin();
const uint8_t* data_end = one_byte_vector.end(); const uint8_t* data_end = one_byte_vector.end();
ChunkSource chunks(data, data_end - data, false); ChunkSource chunks(data, 1, data_end - data, false);
std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream( std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream(
i::ScannerStream::For(&chunks, v8::ScriptCompiler::StreamedSource::UTF8, i::ScannerStream::For(&chunks, v8::ScriptCompiler::StreamedSource::UTF8,
nullptr)); nullptr));
TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length, TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
start, end); start, end);
ChunkSource many_chunks(data, data_end - data, true); ChunkSource many_chunks(data, 1, data_end - data, true);
utf8_streaming_stream.reset(i::ScannerStream::For( utf8_streaming_stream.reset(i::ScannerStream::For(
&many_chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr)); &many_chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length, TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
...@@ -410,14 +412,14 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length, ...@@ -410,14 +412,14 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
reinterpret_cast<const uint8_t*>(two_byte_vector.begin()); reinterpret_cast<const uint8_t*>(two_byte_vector.begin());
const uint8_t* data_end = const uint8_t* data_end =
reinterpret_cast<const uint8_t*>(two_byte_vector.end()); reinterpret_cast<const uint8_t*>(two_byte_vector.end());
ChunkSource chunks(data, data_end - data, false); ChunkSource chunks(data, 2, data_end - data, false);
std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream( std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream(
i::ScannerStream::For( i::ScannerStream::For(
&chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr)); &chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr));
TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(), TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
length, start, end); length, start, end);
ChunkSource many_chunks(data, data_end - data, true); ChunkSource many_chunks(data, 2, data_end - data, true);
two_byte_streaming_stream.reset(i::ScannerStream::For( two_byte_streaming_stream.reset(i::ScannerStream::For(
&many_chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr)); &many_chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr));
TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(), TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
...@@ -459,7 +461,7 @@ TEST(Regress651333) { ...@@ -459,7 +461,7 @@ TEST(Regress651333) {
// Read len bytes from bytes, and compare against the expected unicode // Read len bytes from bytes, and compare against the expected unicode
// characters. Expect kBadChar ( == Unicode replacement char == code point // characters. Expect kBadChar ( == Unicode replacement char == code point
// 65533) instead of the incorrectly coded Latin1 char. // 65533) instead of the incorrectly coded Latin1 char.
ChunkSource chunks(bytes, len, false); ChunkSource chunks(bytes, 1, len, false);
std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For( std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
&chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr)); &chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
for (size_t i = 0; i < len; i++) { for (size_t i = 0; i < len; i++) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment