Commit 12a63796 authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[scanner-streams] Add relocatable character stream for on-heap utf16 streams

Change-Id: I388f6a6c937b6897efe9e88b06ba4b56670fea4f
Reviewed-on: https://chromium-review.googlesource.com/1151191
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: 's avatarMichael Lippautz <mlippautz@chromium.org>
Reviewed-by: 's avatarMarja Hölttä <marja@chromium.org>
Cr-Commit-Position: refs/heads/master@{#54720}
parent 78e8ccff
...@@ -165,7 +165,7 @@ class BufferedCharacterStream : public Utf16CharacterStream { ...@@ -165,7 +165,7 @@ class BufferedCharacterStream : public Utf16CharacterStream {
} }
protected: protected:
bool ReadBlock() override { bool ReadBlock() final {
size_t position = pos(); size_t position = pos();
buffer_pos_ = position; buffer_pos_ = position;
buffer_start_ = &buffer_[0]; buffer_start_ = &buffer_[0];
...@@ -183,9 +183,7 @@ class BufferedCharacterStream : public Utf16CharacterStream { ...@@ -183,9 +183,7 @@ class BufferedCharacterStream : public Utf16CharacterStream {
return true; return true;
} }
bool can_access_heap() override { bool can_access_heap() final { return ByteStream<uint16_t>::kCanAccessHeap; }
return ByteStream<uint16_t>::kCanAccessHeap;
}
private: private:
static const size_t kBufferSize = 512; static const size_t kBufferSize = 512;
...@@ -200,12 +198,11 @@ class UnbufferedCharacterStream : public Utf16CharacterStream { ...@@ -200,12 +198,11 @@ class UnbufferedCharacterStream : public Utf16CharacterStream {
public: public:
template <class... TArgs> template <class... TArgs>
UnbufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) { UnbufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
DCHECK(!ByteStream<uint16_t>::kCanAccessHeap);
buffer_pos_ = pos; buffer_pos_ = pos;
} }
protected: protected:
bool ReadBlock() override { bool ReadBlock() final {
size_t position = pos(); size_t position = pos();
buffer_pos_ = position; buffer_pos_ = position;
Range<uint16_t> range = byte_stream_.GetDataAt(position); Range<uint16_t> range = byte_stream_.GetDataAt(position);
...@@ -219,12 +216,50 @@ class UnbufferedCharacterStream : public Utf16CharacterStream { ...@@ -219,12 +216,50 @@ class UnbufferedCharacterStream : public Utf16CharacterStream {
return true; return true;
} }
bool can_access_heap() override { return false; } bool can_access_heap() final { return ByteStream<uint16_t>::kCanAccessHeap; }
private:
ByteStream<uint16_t> byte_stream_; ByteStream<uint16_t> byte_stream_;
}; };
// Provides a unbuffered utf-16 view on the bytes from the underlying
// ByteStream.
class RelocatingCharacterStream
: public UnbufferedCharacterStream<OnHeapStream> {
public:
template <class... TArgs>
RelocatingCharacterStream(Isolate* isolate, size_t pos, TArgs... args)
: UnbufferedCharacterStream<OnHeapStream>(pos, args...),
isolate_(isolate) {
isolate->heap()->AddGCEpilogueCallback(UpdateBufferPointersCallback,
v8::kGCTypeAll, this);
}
private:
~RelocatingCharacterStream() final {
isolate_->heap()->RemoveGCEpilogueCallback(UpdateBufferPointersCallback,
this);
}
static void UpdateBufferPointersCallback(v8::Isolate* v8_isolate,
v8::GCType type,
v8::GCCallbackFlags flags,
void* stream) {
reinterpret_cast<RelocatingCharacterStream*>(stream)
->UpdateBufferPointers();
}
void UpdateBufferPointers() {
Range<uint16_t> range = byte_stream_.GetDataAt(0);
if (range.start != buffer_start_) {
buffer_cursor_ = (buffer_cursor_ - buffer_start_) + range.start;
buffer_start_ = range.start;
buffer_end_ = range.end;
}
}
Isolate* isolate_;
};
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// BufferedUtf16CharacterStreams // BufferedUtf16CharacterStreams
// //
...@@ -240,7 +275,7 @@ class BufferedUtf16CharacterStream : public Utf16CharacterStream { ...@@ -240,7 +275,7 @@ class BufferedUtf16CharacterStream : public Utf16CharacterStream {
protected: protected:
static const size_t kBufferSize = 512; static const size_t kBufferSize = 512;
bool ReadBlock() override; bool ReadBlock() final;
// FillBuffer should read up to kBufferSize characters at position and store // FillBuffer should read up to kBufferSize characters at position and store
// them into buffer_[0..]. It returns the number of characters stored. // them into buffer_[0..]. It returns the number of characters stored.
...@@ -285,14 +320,14 @@ class Utf8ExternalStreamingStream : public BufferedUtf16CharacterStream { ...@@ -285,14 +320,14 @@ class Utf8ExternalStreamingStream : public BufferedUtf16CharacterStream {
: current_({0, {0, 0, 0, unibrow::Utf8::State::kAccept}}), : current_({0, {0, 0, 0, unibrow::Utf8::State::kAccept}}),
source_stream_(source_stream), source_stream_(source_stream),
stats_(stats) {} stats_(stats) {}
~Utf8ExternalStreamingStream() override { ~Utf8ExternalStreamingStream() final {
for (size_t i = 0; i < chunks_.size(); i++) delete[] chunks_[i].data; for (size_t i = 0; i < chunks_.size(); i++) delete[] chunks_[i].data;
} }
bool can_access_heap() override { return false; } bool can_access_heap() final { return false; }
protected: protected:
size_t FillBuffer(size_t position) override; size_t FillBuffer(size_t position) final;
private: private:
// A position within the data stream. It stores: // A position within the data stream. It stores:
...@@ -585,9 +620,10 @@ Utf16CharacterStream* ScannerStream::For(Isolate* isolate, Handle<String> data, ...@@ -585,9 +620,10 @@ Utf16CharacterStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
static_cast<size_t>(start_pos), Handle<SeqOneByteString>::cast(data), static_cast<size_t>(start_pos), Handle<SeqOneByteString>::cast(data),
start_offset, static_cast<size_t>(end_pos)); start_offset, static_cast<size_t>(end_pos));
} else if (data->IsSeqTwoByteString()) { } else if (data->IsSeqTwoByteString()) {
return new BufferedCharacterStream<uint16_t, OnHeapStream>( return new RelocatingCharacterStream(
static_cast<size_t>(start_pos), Handle<SeqTwoByteString>::cast(data), isolate, static_cast<size_t>(start_pos),
start_offset, static_cast<size_t>(end_pos)); Handle<SeqTwoByteString>::cast(data), start_offset,
static_cast<size_t>(end_pos));
} else { } else {
UNREACHABLE(); UNREACHABLE();
} }
......
...@@ -560,3 +560,34 @@ TEST(TestOverlongAndInvalidSequences) { ...@@ -560,3 +560,34 @@ TEST(TestOverlongAndInvalidSequences) {
CHECK_EQ(unicode_expected.size(), arraysize(cases)); CHECK_EQ(unicode_expected.size(), arraysize(cases));
TestChunkStreamAgainstReference(cases, unicode_expected); TestChunkStreamAgainstReference(cases, unicode_expected);
} }
TEST(RelocatingCharacterStream) {
ManualGCScope manual_gc_scope;
CcTest::InitializeVM();
i::Isolate* i_isolate = CcTest::i_isolate();
v8::HandleScope scope(CcTest::isolate());
const char* string = "abcd";
int length = static_cast<int>(strlen(string));
std::unique_ptr<i::uc16[]> uc16_buffer(new i::uc16[length]);
for (int i = 0; i < length; i++) {
uc16_buffer[i] = string[i];
}
i::Vector<const i::uc16> two_byte_vector(uc16_buffer.get(), length);
i::Handle<i::String> two_byte_string =
i_isolate->factory()
->NewStringFromTwoByte(two_byte_vector, i::NOT_TENURED)
.ToHandleChecked();
std::unique_ptr<i::Utf16CharacterStream> two_byte_string_stream(
i::ScannerStream::For(i_isolate, two_byte_string, 0, length));
CHECK_EQ('a', two_byte_string_stream->Advance());
CHECK_EQ('b', two_byte_string_stream->Advance());
CHECK_EQ(size_t{2}, two_byte_string_stream->pos());
i::String* raw = *two_byte_string;
i_isolate->heap()->CollectGarbage(i::NEW_SPACE,
i::GarbageCollectionReason::kUnknown);
// GC moved the string.
CHECK_NE(raw, *two_byte_string);
CHECK_EQ('c', two_byte_string_stream->Advance());
CHECK_EQ('d', two_byte_string_stream->Advance());
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment