Commit a661f611 authored by jochen's avatar jochen Committed by Commit bot

Implement a character stream for external one byte streams

In contrast to the generic stream, this character stream works without
accessing the heap, and can be used on a background thread.

BUG=v8:5215
R=vogelheim@chromium.org,marja@chromium.org

Review-Url: https://codereview.chromium.org/2184393002
Cr-Commit-Position: refs/heads/master@{#38154}
parent 726520d6
...@@ -888,17 +888,19 @@ FunctionLiteral* Parser::ParseProgram(Isolate* isolate, ParseInfo* info) { ...@@ -888,17 +888,19 @@ FunctionLiteral* Parser::ParseProgram(Isolate* isolate, ParseInfo* info) {
source = String::Flatten(source); source = String::Flatten(source);
FunctionLiteral* result; FunctionLiteral* result;
{
std::unique_ptr<Utf16CharacterStream> stream;
if (source->IsExternalTwoByteString()) { if (source->IsExternalTwoByteString()) {
// Notice that the stream is destroyed at the end of the branch block. stream.reset(new ExternalTwoByteStringUtf16CharacterStream(
// The last line of the blocks can't be moved outside, even though they're Handle<ExternalTwoByteString>::cast(source), 0, source->length()));
// identical calls. } else if (source->IsExternalOneByteString()) {
ExternalTwoByteStringUtf16CharacterStream stream( stream.reset(new ExternalOneByteStringUtf16CharacterStream(
Handle<ExternalTwoByteString>::cast(source), 0, source->length()); Handle<ExternalOneByteString>::cast(source), 0, source->length()));
scanner_.Initialize(&stream);
result = DoParseProgram(info);
} else { } else {
GenericStringUtf16CharacterStream stream(source, 0, source->length()); stream.reset(
scanner_.Initialize(&stream); new GenericStringUtf16CharacterStream(source, 0, source->length()));
}
scanner_.Initialize(stream.get());
result = DoParseProgram(info); result = DoParseProgram(info);
} }
if (result != NULL) { if (result != NULL) {
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
#include "src/globals.h" #include "src/globals.h"
#include "src/handles.h" #include "src/handles.h"
#include "src/list-inl.h" // TODO(mstarzinger): Temporary cycle breaker! #include "src/list-inl.h" // TODO(mstarzinger): Temporary cycle breaker!
#include "src/objects.h" #include "src/objects-inl.h"
#include "src/unicode-inl.h" #include "src/unicode-inl.h"
namespace v8 { namespace v8 {
...@@ -559,15 +559,11 @@ void ExternalStreamingStream::HandleUtf8SplitCharacters( ...@@ -559,15 +559,11 @@ void ExternalStreamingStream::HandleUtf8SplitCharacters(
ExternalTwoByteStringUtf16CharacterStream:: ExternalTwoByteStringUtf16CharacterStream::
~ExternalTwoByteStringUtf16CharacterStream() { } ~ExternalTwoByteStringUtf16CharacterStream() { }
ExternalTwoByteStringUtf16CharacterStream:: ExternalTwoByteStringUtf16CharacterStream::
ExternalTwoByteStringUtf16CharacterStream( ExternalTwoByteStringUtf16CharacterStream(
Handle<ExternalTwoByteString> data, int start_position, Handle<ExternalTwoByteString> data, int start_position,
int end_position) int end_position)
: Utf16CharacterStream(), : raw_data_(data->GetTwoByteData(start_position)), bookmark_(kNoBookmark) {
source_(data),
raw_data_(data->GetTwoByteData(start_position)),
bookmark_(kNoBookmark) {
buffer_cursor_ = raw_data_, buffer_cursor_ = raw_data_,
buffer_end_ = raw_data_ + (end_position - start_position); buffer_end_ = raw_data_ + (end_position - start_position);
pos_ = start_position; pos_ = start_position;
...@@ -585,5 +581,52 @@ void ExternalTwoByteStringUtf16CharacterStream::ResetToBookmark() { ...@@ -585,5 +581,52 @@ void ExternalTwoByteStringUtf16CharacterStream::ResetToBookmark() {
pos_ = bookmark_; pos_ = bookmark_;
buffer_cursor_ = raw_data_ + bookmark_; buffer_cursor_ = raw_data_ + bookmark_;
} }
// ----------------------------------------------------------------------------
// ExternalOneByteStringUtf16CharacterStream
ExternalOneByteStringUtf16CharacterStream::
~ExternalOneByteStringUtf16CharacterStream() {}
ExternalOneByteStringUtf16CharacterStream::
ExternalOneByteStringUtf16CharacterStream(
Handle<ExternalOneByteString> data, int start_position,
int end_position)
: raw_data_(data->GetChars()),
length_(end_position),
bookmark_(kNoBookmark) {
DCHECK(end_position >= start_position);
pos_ = start_position;
}
bool ExternalOneByteStringUtf16CharacterStream::SetBookmark() {
bookmark_ = pos_;
return true;
}
void ExternalOneByteStringUtf16CharacterStream::ResetToBookmark() {
DCHECK(bookmark_ != kNoBookmark);
pos_ = bookmark_;
buffer_cursor_ = buffer_;
buffer_end_ = buffer_ + FillBuffer(pos_);
}
size_t ExternalOneByteStringUtf16CharacterStream::BufferSeekForward(
size_t delta) {
size_t old_pos = pos_;
pos_ = Min(pos_ + delta, length_);
ReadBlock();
return pos_ - old_pos;
}
size_t ExternalOneByteStringUtf16CharacterStream::FillBuffer(size_t from_pos) {
if (from_pos >= length_) return 0;
size_t length = Min(kBufferSize, length_ - from_pos);
for (size_t i = 0; i < length; ++i) {
buffer_[i] = static_cast<uc16>(raw_data_[from_pos + i]);
}
return length;
}
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8
...@@ -14,6 +14,7 @@ namespace internal { ...@@ -14,6 +14,7 @@ namespace internal {
// Forward declarations. // Forward declarations.
class ExternalTwoByteString; class ExternalTwoByteString;
class ExternalOneByteString;
// A buffered character stream based on a random access character // A buffered character stream based on a random access character
// source (ReadBlock can be called with pos_ pointing to any position, // source (ReadBlock can be called with pos_ pointing to any position,
...@@ -167,7 +168,7 @@ class ExternalTwoByteStringUtf16CharacterStream: public Utf16CharacterStream { ...@@ -167,7 +168,7 @@ class ExternalTwoByteStringUtf16CharacterStream: public Utf16CharacterStream {
bool SetBookmark() override; bool SetBookmark() override;
void ResetToBookmark() override; void ResetToBookmark() override;
protected: private:
size_t SlowSeekForward(size_t delta) override { size_t SlowSeekForward(size_t delta) override {
// Fast case always handles seeking. // Fast case always handles seeking.
return 0; return 0;
...@@ -176,12 +177,33 @@ class ExternalTwoByteStringUtf16CharacterStream: public Utf16CharacterStream { ...@@ -176,12 +177,33 @@ class ExternalTwoByteStringUtf16CharacterStream: public Utf16CharacterStream {
// Entire string is read at start. // Entire string is read at start.
return false; return false;
} }
Handle<ExternalTwoByteString> source_;
const uc16* raw_data_; // Pointer to the actual array of characters. const uc16* raw_data_; // Pointer to the actual array of characters.
static const size_t kNoBookmark = -1;
size_t bookmark_;
};
// UTF16 buffer to read characters from an external latin1 string.
class ExternalOneByteStringUtf16CharacterStream
: public BufferedUtf16CharacterStream {
public:
ExternalOneByteStringUtf16CharacterStream(Handle<ExternalOneByteString> data,
int start_position,
int end_position);
~ExternalOneByteStringUtf16CharacterStream() override;
bool SetBookmark() override;
void ResetToBookmark() override;
private: private:
static const size_t kNoBookmark = -1; static const size_t kNoBookmark = -1;
size_t BufferSeekForward(size_t delta) override;
size_t FillBuffer(size_t position) override;
const uint8_t* raw_data_; // Pointer to the actual array of characters.
size_t length_;
size_t bookmark_; size_t bookmark_;
}; };
......
...@@ -594,9 +594,16 @@ void TestCharacterStream(const char* one_byte_source, unsigned length, ...@@ -594,9 +594,16 @@ void TestCharacterStream(const char* one_byte_source, unsigned length,
TestExternalResource resource(uc16_buffer.get(), length); TestExternalResource resource(uc16_buffer.get(), length);
i::Handle<i::String> uc16_string( i::Handle<i::String> uc16_string(
factory->NewExternalStringFromTwoByte(&resource).ToHandleChecked()); factory->NewExternalStringFromTwoByte(&resource).ToHandleChecked());
ScriptResource one_byte_resource(one_byte_source, length);
i::Handle<i::String> ext_one_byte_string(
factory->NewExternalStringFromOneByte(&one_byte_resource)
.ToHandleChecked());
i::ExternalTwoByteStringUtf16CharacterStream uc16_stream( i::ExternalTwoByteStringUtf16CharacterStream uc16_stream(
i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end); i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end);
i::ExternalOneByteStringUtf16CharacterStream one_byte_stream(
i::Handle<i::ExternalOneByteString>::cast(ext_one_byte_string), start,
end);
i::GenericStringUtf16CharacterStream string_stream(one_byte_string, start, i::GenericStringUtf16CharacterStream string_stream(one_byte_string, start,
end); end);
i::Utf8ToUtf16CharacterStream utf8_stream( i::Utf8ToUtf16CharacterStream utf8_stream(
...@@ -609,17 +616,21 @@ void TestCharacterStream(const char* one_byte_source, unsigned length, ...@@ -609,17 +616,21 @@ void TestCharacterStream(const char* one_byte_source, unsigned length,
CHECK_EQU(i, uc16_stream.pos()); CHECK_EQU(i, uc16_stream.pos());
CHECK_EQU(i, string_stream.pos()); CHECK_EQU(i, string_stream.pos());
CHECK_EQU(i, utf8_stream.pos()); CHECK_EQU(i, utf8_stream.pos());
CHECK_EQU(i, one_byte_stream.pos());
int32_t c0 = one_byte_source[i]; int32_t c0 = one_byte_source[i];
int32_t c1 = uc16_stream.Advance(); int32_t c1 = uc16_stream.Advance();
int32_t c2 = string_stream.Advance(); int32_t c2 = string_stream.Advance();
int32_t c3 = utf8_stream.Advance(); int32_t c3 = utf8_stream.Advance();
int32_t c4 = one_byte_stream.Advance();
i++; i++;
CHECK_EQ(c0, c1); CHECK_EQ(c0, c1);
CHECK_EQ(c0, c2); CHECK_EQ(c0, c2);
CHECK_EQ(c0, c3); CHECK_EQ(c0, c3);
CHECK_EQ(c0, c4);
CHECK_EQU(i, uc16_stream.pos()); CHECK_EQU(i, uc16_stream.pos());
CHECK_EQU(i, string_stream.pos()); CHECK_EQU(i, string_stream.pos());
CHECK_EQU(i, utf8_stream.pos()); CHECK_EQU(i, utf8_stream.pos());
CHECK_EQU(i, one_byte_stream.pos());
} }
while (i > start + sub_length / 4) { while (i > start + sub_length / 4) {
// Pushback, re-read, pushback again. // Pushback, re-read, pushback again.
...@@ -627,64 +638,80 @@ void TestCharacterStream(const char* one_byte_source, unsigned length, ...@@ -627,64 +638,80 @@ void TestCharacterStream(const char* one_byte_source, unsigned length,
CHECK_EQU(i, uc16_stream.pos()); CHECK_EQU(i, uc16_stream.pos());
CHECK_EQU(i, string_stream.pos()); CHECK_EQU(i, string_stream.pos());
CHECK_EQU(i, utf8_stream.pos()); CHECK_EQU(i, utf8_stream.pos());
CHECK_EQU(i, one_byte_stream.pos());
uc16_stream.PushBack(c0); uc16_stream.PushBack(c0);
string_stream.PushBack(c0); string_stream.PushBack(c0);
utf8_stream.PushBack(c0); utf8_stream.PushBack(c0);
one_byte_stream.PushBack(c0);
i--; i--;
CHECK_EQU(i, uc16_stream.pos()); CHECK_EQU(i, uc16_stream.pos());
CHECK_EQU(i, string_stream.pos()); CHECK_EQU(i, string_stream.pos());
CHECK_EQU(i, utf8_stream.pos()); CHECK_EQU(i, utf8_stream.pos());
CHECK_EQU(i, one_byte_stream.pos());
int32_t c1 = uc16_stream.Advance(); int32_t c1 = uc16_stream.Advance();
int32_t c2 = string_stream.Advance(); int32_t c2 = string_stream.Advance();
int32_t c3 = utf8_stream.Advance(); int32_t c3 = utf8_stream.Advance();
int32_t c4 = one_byte_stream.Advance();
i++; i++;
CHECK_EQU(i, uc16_stream.pos()); CHECK_EQU(i, uc16_stream.pos());
CHECK_EQU(i, string_stream.pos()); CHECK_EQU(i, string_stream.pos());
CHECK_EQU(i, utf8_stream.pos()); CHECK_EQU(i, utf8_stream.pos());
CHECK_EQU(i, one_byte_stream.pos());
CHECK_EQ(c0, c1); CHECK_EQ(c0, c1);
CHECK_EQ(c0, c2); CHECK_EQ(c0, c2);
CHECK_EQ(c0, c3); CHECK_EQ(c0, c3);
CHECK_EQ(c0, c4);
uc16_stream.PushBack(c0); uc16_stream.PushBack(c0);
string_stream.PushBack(c0); string_stream.PushBack(c0);
utf8_stream.PushBack(c0); utf8_stream.PushBack(c0);
one_byte_stream.PushBack(c0);
i--; i--;
CHECK_EQU(i, uc16_stream.pos()); CHECK_EQU(i, uc16_stream.pos());
CHECK_EQU(i, string_stream.pos()); CHECK_EQU(i, string_stream.pos());
CHECK_EQU(i, utf8_stream.pos()); CHECK_EQU(i, utf8_stream.pos());
CHECK_EQU(i, one_byte_stream.pos());
} }
unsigned halfway = start + sub_length / 2; unsigned halfway = start + sub_length / 2;
uc16_stream.SeekForward(halfway - i); uc16_stream.SeekForward(halfway - i);
string_stream.SeekForward(halfway - i); string_stream.SeekForward(halfway - i);
utf8_stream.SeekForward(halfway - i); utf8_stream.SeekForward(halfway - i);
one_byte_stream.SeekForward(halfway - i);
i = halfway; i = halfway;
CHECK_EQU(i, uc16_stream.pos()); CHECK_EQU(i, uc16_stream.pos());
CHECK_EQU(i, string_stream.pos()); CHECK_EQU(i, string_stream.pos());
CHECK_EQU(i, utf8_stream.pos()); CHECK_EQU(i, utf8_stream.pos());
CHECK_EQU(i, one_byte_stream.pos());
while (i < end) { while (i < end) {
// Read streams one char at a time // Read streams one char at a time
CHECK_EQU(i, uc16_stream.pos()); CHECK_EQU(i, uc16_stream.pos());
CHECK_EQU(i, string_stream.pos()); CHECK_EQU(i, string_stream.pos());
CHECK_EQU(i, utf8_stream.pos()); CHECK_EQU(i, utf8_stream.pos());
CHECK_EQU(i, one_byte_stream.pos());
int32_t c0 = one_byte_source[i]; int32_t c0 = one_byte_source[i];
int32_t c1 = uc16_stream.Advance(); int32_t c1 = uc16_stream.Advance();
int32_t c2 = string_stream.Advance(); int32_t c2 = string_stream.Advance();
int32_t c3 = utf8_stream.Advance(); int32_t c3 = utf8_stream.Advance();
int32_t c4 = one_byte_stream.Advance();
i++; i++;
CHECK_EQ(c0, c1); CHECK_EQ(c0, c1);
CHECK_EQ(c0, c2); CHECK_EQ(c0, c2);
CHECK_EQ(c0, c3); CHECK_EQ(c0, c3);
CHECK_EQ(c0, c4);
CHECK_EQU(i, uc16_stream.pos()); CHECK_EQU(i, uc16_stream.pos());
CHECK_EQU(i, string_stream.pos()); CHECK_EQU(i, string_stream.pos());
CHECK_EQU(i, utf8_stream.pos()); CHECK_EQU(i, utf8_stream.pos());
CHECK_EQU(i, one_byte_stream.pos());
} }
int32_t c1 = uc16_stream.Advance(); int32_t c1 = uc16_stream.Advance();
int32_t c2 = string_stream.Advance(); int32_t c2 = string_stream.Advance();
int32_t c3 = utf8_stream.Advance(); int32_t c3 = utf8_stream.Advance();
int32_t c4 = one_byte_stream.Advance();
CHECK_LT(c1, 0); CHECK_LT(c1, 0);
CHECK_LT(c2, 0); CHECK_LT(c2, 0);
CHECK_LT(c3, 0); CHECK_LT(c3, 0);
CHECK_LT(c4, 0);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment