Commit 3e9ba672 authored by Leszek Swirski's avatar Leszek Swirski Committed by V8 LUCI CQ

[scanner] Make position a parameter of ReadBlock

It's not obvious for Utf16CharacterStream::ReadBlock that the block it
has to read is implicitly the one at the current pos(), and it was
slightly odd how ReadBlockAt had to fiddle with buffer_* values to make
pos() return the desired value before ReadBlock is called -- especially
since ReadBlock would usually overwrite those changes.

Instead, we can just make ReadBlock take an explicit position, and get
rid of ReadBlockAt entirely.

As a drive-by, I was always confused by what the various buffer_*
actually mean (especially the difference between buffer_cursor_ and
buffer_pos_) so document them with some ASCII art.

Change-Id: I610019089920692f54e01ae979c0ba827779e414
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3217194
Auto-Submit: Leszek Swirski <leszeks@chromium.org>
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: 's avatarToon Verwaest <verwaest@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77315}
parent bc4ea5e0
......@@ -252,8 +252,7 @@ class BufferedCharacterStream : public Utf16CharacterStream {
}
protected:
bool ReadBlock() final {
size_t position = pos();
bool ReadBlock(size_t position) final {
buffer_pos_ = position;
buffer_start_ = &buffer_[0];
buffer_cursor_ = buffer_start_;
......@@ -309,8 +308,7 @@ class UnbufferedCharacterStream : public Utf16CharacterStream {
}
protected:
bool ReadBlock() final {
size_t position = pos();
bool ReadBlock(size_t position) final {
buffer_pos_ = position;
DisallowGarbageCollection no_gc;
Range<uint16_t> range =
......@@ -387,7 +385,7 @@ class BufferedUtf16CharacterStream : public Utf16CharacterStream {
protected:
static const size_t kBufferSize = 512;
bool ReadBlock() final;
bool ReadBlock(size_t position) final;
// FillBuffer should read up to kBufferSize characters at position and store
// them into buffer_[0..]. It returns the number of characters stored.
......@@ -401,10 +399,9 @@ class BufferedUtf16CharacterStream : public Utf16CharacterStream {
BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
: Utf16CharacterStream(buffer_, buffer_, buffer_, 0) {}
bool BufferedUtf16CharacterStream::ReadBlock() {
bool BufferedUtf16CharacterStream::ReadBlock(size_t position) {
DCHECK_EQ(buffer_start_, buffer_);
size_t position = pos();
buffer_pos_ = position;
buffer_cursor_ = buffer_;
buffer_end_ = buffer_ + FillBuffer(position);
......@@ -477,8 +474,7 @@ class Windows1252CharacterStream final : public Utf16CharacterStream {
}
protected:
bool ReadBlock() final {
size_t position = pos();
bool ReadBlock(size_t position) final {
buffer_pos_ = position;
buffer_start_ = &buffer_[0];
buffer_cursor_ = buffer_start_;
......
......@@ -54,7 +54,7 @@ class Utf16CharacterStream {
inline base::uc32 Peek() {
if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
return static_cast<base::uc32>(*buffer_cursor_);
} else if (ReadBlockChecked()) {
} else if (ReadBlockChecked(pos())) {
return static_cast<base::uc32>(*buffer_cursor_);
} else {
return kEndOfInput;
......@@ -83,7 +83,7 @@ class Utf16CharacterStream {
if (next_cursor_pos == buffer_end_) {
buffer_cursor_ = buffer_end_;
if (!ReadBlockChecked()) {
if (!ReadBlockChecked(pos())) {
buffer_cursor_++;
return kEndOfInput;
}
......@@ -103,7 +103,7 @@ class Utf16CharacterStream {
if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
buffer_cursor_--;
} else {
ReadBlockAt(pos() - 1);
ReadBlockChecked(pos() - 1);
}
}
......@@ -116,7 +116,7 @@ class Utf16CharacterStream {
pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
} else {
ReadBlockAt(pos);
ReadBlockChecked(pos);
}
}
......@@ -151,10 +151,15 @@ class Utf16CharacterStream {
buffer_pos_(buffer_pos) {}
Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {}
bool ReadBlockChecked() {
size_t position = pos();
USE(position);
bool success = !has_parser_error() && ReadBlock();
bool ReadBlockChecked(size_t position) {
// The callers of this method (Back/Back2/Seek) should handle the easy
// case (seeking within the current buffer), and we should only get here
// if we actually require new data.
// (This is really an efficiency check, not a correctness invariant.)
DCHECK(position < buffer_pos_ ||
position >= buffer_pos_ + (buffer_end_ - buffer_start_));
bool success = !has_parser_error() && ReadBlock(position);
// Post-conditions: 1, We should always be at the right position.
// 2, Cursor should be inside the buffer.
......@@ -166,26 +171,11 @@ class Utf16CharacterStream {
return success;
}
void ReadBlockAt(size_t new_pos) {
// The callers of this method (Back/Back2/Seek) should handle the easy
// case (seeking within the current buffer), and we should only get here
// if we actually require new data.
// (This is really an efficiency check, not a correctness invariant.)
DCHECK(new_pos < buffer_pos_ ||
new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
// Change pos() to point to new_pos.
buffer_pos_ = new_pos;
buffer_cursor_ = buffer_start_;
DCHECK_EQ(pos(), new_pos);
ReadBlockChecked();
}
// Read more data, and update buffer_*_ to point to it.
// Returns true if more data was available.
//
// ReadBlock() may modify any of the buffer_*_ members, but must sure that
// the result of pos() remains unaffected.
// ReadBlock(position) may modify any of the buffer_*_ members, but must make
// sure that the result of pos() becomes |position|.
//
// Examples:
// - a stream could either fill a separate buffer. Then buffer_start_ and
......@@ -195,8 +185,21 @@ class Utf16CharacterStream {
// buffer_end_ to cover the full chunk, and then buffer_cursor_ would
// point into the middle of the buffer, while buffer_pos_ would describe
// the start of the buffer.
virtual bool ReadBlock() = 0;
virtual bool ReadBlock(size_t position) = 0;
// Fields describing the location of the current buffer physically in memory,
// and semantically within the source string.
//
// 0 buffer_pos_ pos()
// | | |
// v________________________v___v_____________
// | | | |
// Source string: | | Buffer | |
// |________________________|________|________|
// ^ ^ ^
// | | |
// Pointers: buffer_start_ | buffer_end_
// buffer_cursor_
const uint16_t* buffer_start_;
const uint16_t* buffer_cursor_;
const uint16_t* buffer_end_;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment