Commit 21e6831e authored by vogelheim's avatar vogelheim Committed by Commit bot

Fix DCHECK on SetBookmark.

The DCHECK was a lie. The idea was that - when a bookmark is set -
the scanner must clearly be at a character boundary and hence the
bookmark does not need to save a 'partial' UTF-8 code point. The
first part is true - the Scanner is always at a character boundary -
but the 'partial' UTF-8 code point is at the end of a block, not at
the current character position of the Scanner.
Hence, the 'partial' character needs to be saved as well.

jkummerow: Thanks for noticing.

BUG=chromium:470930
R=jochen@chromium.org, jkummerow@chromium.org
LOG=N

Review URL: https://codereview.chromium.org/1154773004

Cr-Commit-Position: refs/heads/master@{#28661}
parent 2a058de8
......@@ -388,14 +388,13 @@ size_t ExternalStreamingStream::FillBuffer(size_t position) {
bool ExternalStreamingStream::SetBookmark() {
DCHECK(utf8_split_char_buffer_length_ == 0); // We can't be within a char.
// Bookmarking for this stream is a bit more complex than expected, since
// the stream state is distributed over several places:
// - pos_ (inherited from Utf16CharacterStream)
// - buffer_cursor_ and buffer_end_ (also from Utf16CharacterStream)
// - buffer_ (from BufferedUtf16CharacterStream)
// - current_data_ (+ .._offset_ and .._length) (this class)
// - utf8_split_char_buffer_* (a partial utf8 symbol at the block boundary)
//
// The underlying source_stream_ instance likely could re-construct this
// local data for us, but with the given interfaces we have no way of
......@@ -405,6 +404,7 @@ bool ExternalStreamingStream::SetBookmark() {
// - pos_ => bookmark_
// - buffer_[buffer_cursor_ .. buffer_end_] => bookmark_buffer_
// - current_data_[.._offset_ .. .._length_] => bookmark_data_
// - utf8_split_char_buffer_* => bookmark_utf8_split...
bookmark_ = pos_;
......@@ -419,6 +419,11 @@ bool ExternalStreamingStream::SetBookmark() {
CopyBytes(bookmark_data_.start(), current_data_ + current_data_offset_,
data_length);
bookmark_utf8_split_char_buffer_length_ = utf8_split_char_buffer_length_;
for (size_t i = 0; i < utf8_split_char_buffer_length_; i++) {
bookmark_utf8_split_char_buffer_[i] = utf8_split_char_buffer_[i];
}
return source_stream_->SetBookmark();
}
......@@ -439,6 +444,12 @@ void ExternalStreamingStream::ResetToBookmark() {
bookmark_buffer_.length());
buffer_cursor_ = buffer_;
buffer_end_ = buffer_ + bookmark_buffer_.length();
// utf8 split char buffer
utf8_split_char_buffer_length_ = bookmark_utf8_split_char_buffer_length_;
for (size_t i = 0; i < bookmark_utf8_split_char_buffer_length_; i++) {
utf8_split_char_buffer_[i] = bookmark_utf8_split_char_buffer_[i];
}
}
......
......@@ -93,7 +93,8 @@ class ExternalStreamingStream : public BufferedUtf16CharacterStream {
current_data_offset_(0),
current_data_length_(0),
utf8_split_char_buffer_length_(0),
bookmark_(0) {}
bookmark_(0),
bookmark_utf8_split_char_buffer_length_(0) {}
virtual ~ExternalStreamingStream() {
delete[] current_data_;
......@@ -133,6 +134,8 @@ class ExternalStreamingStream : public BufferedUtf16CharacterStream {
size_t bookmark_;
Vector<uint16_t> bookmark_buffer_;
Vector<uint8_t> bookmark_data_;
uint8_t bookmark_utf8_split_char_buffer_[4];
size_t bookmark_utf8_split_char_buffer_length_;
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment