Revert "[scanner] Rewrite character streams by separating underlying bytestreams from buffering."

This reverts commit 5f2f418d. Reason for revert: Speculative revert for LayoutTest timeouts https://ci.chromium.org/buildbot/client.v8.fyi/V8-Blink%20Linux%2064/24596 https://ci.chromium.org/p/v8/builders/luci.v8.ci/V8-Blink%20Linux%2064%20-%20future/4707 https://ci.chromium.org/buildbot/client.v8.fyi/V8-Blink%20Linux%2064%20(dbg)/12467 Original change's description: > [scanner] Rewrite character streams by separating underlying bytestreams from buffering. > > Additionally now we only scan over flat heap strings. > > Change-Id: Ic449b19aecd7fc3f283a04a3df6a39772d471565 > Reviewed-on: https://chromium-review.googlesource.com/1125854 > Reviewed-by: Marja Hölttä <marja@chromium.org> > Commit-Queue: Toon Verwaest <verwaest@chromium.org> > Cr-Commit-Position: refs/heads/master@{#54224} TBR=marja@chromium.org,verwaest@chromium.org Change-Id: Ica3026f318a85ec6bb24a38a8cd998f12c146d7e No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://chromium-review.googlesource.com/1126819Reviewed-by: Yang Guo <yangguo@chromium.org> Commit-Queue: Yang Guo <yangguo@chromium.org> Cr-Commit-Position: refs/heads/master@{#54231}

Revert "[scanner] Rewrite character streams by separating underlying bytestreams from buffering."
This reverts commit 5f2f418d. Reason for revert: Speculative revert for LayoutTest timeouts https://ci.chromium.org/buildbot/client.v8.fyi/V8-Blink%20Linux%2064/24596 https://ci.chromium.org/p/v8/builders/luci.v8.ci/V8-Blink%20Linux%2064%20-%20future/4707 https://ci.chromium.org/buildbot/client.v8.fyi/V8-Blink%20Linux%2064%20(dbg)/12467 Original change's description: > [scanner] Rewrite character streams by separating underlying bytestreams from buffering. > > Additionally now we only scan over flat heap strings. > > Change-Id: Ic449b19aecd7fc3f283a04a3df6a39772d471565 > Reviewed-on: https://chromium-review.googlesource.com/1125854 > Reviewed-by: Marja Hölttä <marja@chromium.org> > Commit-Queue: Toon Verwaest <verwaest@chromium.org> > Cr-Commit-Position: refs/heads/master@{#54224} TBR=marja@chromium.org,verwaest@chromium.org Change-Id: Ica3026f318a85ec6bb24a38a8cd998f12c146d7e No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://chromium-review.googlesource.com/1126819Reviewed-by: Yang Guo <yangguo@chromium.org> Commit-Queue: Yang Guo <yangguo@chromium.org> Cr-Commit-Position: refs/heads/master@{#54231}
0973a408 · Yang Guo · Commit Bot · 064a3b18 · 0973a408 · 0973a408
Commit 0973a408 authored Jul 05, 2018 by Yang Guo Committed by Commit Bot Jul 05, 2018
6 changed files
--- a/src/compiler-dispatcher/unoptimized-compile-job.cc
+++ b/src/compiler-dispatcher/unoptimized-compile-job.cc
@@ -134,7 +134,7 @@ void UnoptimizedCompileJob::PrepareOnMainThread(Isolate* isolate) {
  Handle<String> source(String::cast(script->source()), isolate);
  if (source->IsExternalTwoByteString() || source->IsExternalOneByteString()) {
    std::unique_ptr<Utf16CharacterStream> stream(ScannerStream::For(
-        isolate, source, shared_->StartPosition(), shared_->EndPosition()));
+        source, shared_->StartPosition(), shared_->EndPosition()));
    parse_info_->set_character_stream(std::move(stream));
  } else {
    source = String::Flatten(isolate, source);
@@ -192,7 +192,7 @@ void UnoptimizedCompileJob::PrepareOnMainThread(Isolate* isolate) {
    }
    wrapper_ = isolate->global_handles()->Create(*wrapper);
    std::unique_ptr<Utf16CharacterStream> stream(
-        ScannerStream::For(isolate, wrapper_, shared_->StartPosition() - offset,
+        ScannerStream::For(wrapper_, shared_->StartPosition() - offset,
                           shared_->EndPosition() - offset));
    parse_info_->set_character_stream(std::move(stream));
  }

--- a/src/parsing/parsing.cc
+++ b/src/parsing/parsing.cc
@@ -25,9 +25,9 @@ bool ParseProgram(ParseInfo* info, Isolate* isolate) {

  // Create a character stream for the parser.
  Handle<String> source(String::cast(info->script()->source()), isolate);
+  source = String::Flatten(isolate, source);
  isolate->counters()->total_parse_size()->Increment(source->length());
-  std::unique_ptr<Utf16CharacterStream> stream(
-      ScannerStream::For(isolate, source));
+  std::unique_ptr<Utf16CharacterStream> stream(ScannerStream::For(source));
  info->set_character_stream(std::move(stream));

  Parser parser(info);
@@ -60,10 +60,10 @@ bool ParseFunction(ParseInfo* info, Handle<SharedFunctionInfo> shared_info,

  // Create a character stream for the parser.
  Handle<String> source(String::cast(info->script()->source()), isolate);
+  source = String::Flatten(isolate, source);
  isolate->counters()->total_parse_size()->Increment(source->length());
-  std::unique_ptr<Utf16CharacterStream> stream(
-      ScannerStream::For(isolate, source, shared_info->StartPosition(),
-                         shared_info->EndPosition()));
+  std::unique_ptr<Utf16CharacterStream> stream(ScannerStream::For(
+      source, shared_info->StartPosition(), shared_info->EndPosition()));
  info->set_character_stream(std::move(stream));

  VMState<PARSER> state(isolate);

--- a/src/parsing/scanner-character-streams.cc
+++ b/src/parsing/scanner-character-streams.cc
@@ -19,335 +19,175 @@ namespace {
 const unibrow::uchar kUtf8Bom = 0xFEFF;
 }  // namespace

-template <typename Char>
-struct HeapStringType;
-
-template <>
-struct HeapStringType<uint8_t> {
-  typedef SeqOneByteString String;
-};
-
-template <>
-struct HeapStringType<uint16_t> {
-  typedef SeqTwoByteString String;
-};
-
-template <typename Char>
-struct Range {
-  const Char* start;
-  const Char* end;
-
-  size_t length() { return static_cast<size_t>(end - start); }
-  bool empty() const { return start == end; }
-  bool unaligned_start() const {
-    return reinterpret_cast<intptr_t>(start) % 2 == 1;
-  }
-};
-
-// A Char stream backed by an on-heap SeqOneByteString or SeqTwoByteString.
-template <typename Char>
-class OnHeapStream {
+// ----------------------------------------------------------------------------
+// BufferedUtf16CharacterStreams
+//
+// A buffered character stream based on a random access character
+// source (ReadBlock can be called with pos() pointing to any position,
+// even positions before the current).
+class BufferedUtf16CharacterStream : public Utf16CharacterStream {
 public:
-  typedef typename HeapStringType<Char>::String String;
+  BufferedUtf16CharacterStream();

-  OnHeapStream(Handle<String> string, size_t start_offset, size_t end)
-      : string_(string), start_offset_(start_offset), length_(end) {}
+ protected:
+  static const size_t kBufferSize = 512;

-  Range<Char> GetDataAt(size_t pos) {
-    return {&string_->GetChars()[start_offset_ + Min(length_, pos)],
-            &string_->GetChars()[start_offset_ + length_]};
-  }
+  bool ReadBlock() override;

-  static const bool kCanAccessHeap = true;
+  // FillBuffer should read up to kBufferSize characters at position and store
+  // them into buffer_[0..]. It returns the number of characters stored.
+  virtual size_t FillBuffer(size_t position) = 0;

- private:
-  Handle<String> string_;
-  const size_t start_offset_;
-  const size_t length_;
+  // Fixed sized buffer that this class reads from.
+  // The base class' buffer_start_ should always point to buffer_.
+  uc16 buffer_[kBufferSize];
 };

-// A Char stream backed by an off-heap ExternalOneByteString or
-// ExternalTwoByteString.
-template <typename Char>
-class ExternalStringStream {
- public:
-  ExternalStringStream(const Char* data, size_t end)
-      : data_(data), length_(end) {}
+BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
+    : Utf16CharacterStream(buffer_, buffer_, buffer_, 0) {}

-  Range<Char> GetDataAt(size_t pos) {
-    return {&data_[Min(length_, pos)], &data_[length_]};
-  }
+bool BufferedUtf16CharacterStream::ReadBlock() {
+  DCHECK_EQ(buffer_start_, buffer_);

-  static const bool kCanAccessHeap = false;
+  size_t position = pos();
+  buffer_pos_ = position;
+  buffer_cursor_ = buffer_;
+  buffer_end_ = buffer_ + FillBuffer(position);
+  DCHECK_EQ(pos(), position);
+  DCHECK_LE(buffer_end_, buffer_start_ + kBufferSize);
+  return buffer_cursor_ < buffer_end_;
+}

- private:
-  const Char* const data_;
-  const size_t length_;
-};
+// ----------------------------------------------------------------------------
+// GenericStringUtf16CharacterStream.
+//
+// A stream w/ a data source being a (flattened) Handle<String>.

-// A Char stream backed by multiple source-stream provided off-heap chunks.
-template <typename Char>
-class ChunkedStream {
+class GenericStringUtf16CharacterStream : public BufferedUtf16CharacterStream {
 public:
-  explicit ChunkedStream(ScriptCompiler::ExternalSourceStream* source,
-                         RuntimeCallStats* stats)
-      : source_(source), stats_(stats) {
-    // Immediately fetch a first chunk upon creation.
-    const uint8_t* data = nullptr;
-    size_t length;
-    {
-      RuntimeCallTimerScope scope(stats_,
-                                  RuntimeCallCounterId::kGetMoreDataCallback);
-      length = source_->GetMoreData(&data);
-    }
-    chunks_.push_back({data, 0, length, false});
-  }
+  GenericStringUtf16CharacterStream(Handle<String> data, size_t start_position,
+                                    size_t end_position);

-  Range<Char> GetDataAt(size_t pos) {
-    Chunk chunk = FindChunk(pos);
-    size_t buffer_end = chunk.length();
-    size_t buffer_pos = Min(buffer_end, pos - chunk.position);
-    return {&chunk.data()[buffer_pos], &chunk.data()[buffer_end]};
-  }
+  bool can_access_heap() override { return true; }

-  ~ChunkedStream() {
-    for (size_t i = 0; i < chunks_.size(); i++) {
-      delete[] chunks_[i].raw_data;
-    }
-  }
-
-  static const bool kCanAccessHeap = false;
-
- private:
-  // A single chunk of Chars. There may be a lonely bytes at the start and end
-  // in case sizeof(Char) > 1. They just need to be ignored since additional
-  // chunks are added by FetchChunk that contain the full character.
-  // TODO(verwaest): Make sure that those characters are added by blink instead
-  // so we can get rid of this complexity here.
-  struct Chunk {
-    // A raw chunk of Chars possibly including a lonely start and/or a lonely
-    // end byte.
-    const uint8_t* const raw_data;
-    // The logical position of data() (possibly skipping a lonely start byte).
-    const size_t position;
-    // The length of the raw_data.
-    const size_t raw_length : sizeof(size_t) * 8 - 1;
-    // Tells us whether the first byte of raw_data is a lonely start byte and
-    // should be skipped because it's combined with a lonely end byte from the
-    // previous chunk.
-    const bool lonely_start : 1;
-
-    size_t end_position() const { return position + length(); }
-
-    // The chunk includes a lonely end byte if the chunk is 2-byte but has an
-    // uneven number of chars (possibly ignoring a lonely start byte that is
-    // merged with the lonely end byte of the previous chunk).
-    bool lonely_end() const {
-      return (raw_length - lonely_start) % sizeof(Char) == 1;
-    }
-
-    uint8_t lonely_end_byte() const {
-      DCHECK(lonely_end());
-      return raw_data[raw_length - 1];
-    }
-
-    size_t length() const {
-      return (raw_length - lonely_start) >> (sizeof(Char) - 1);
-    }
-
-    bool has_chars() const { return raw_length - lonely_start > 0; }
-
-    const Char* data() const {
-      return reinterpret_cast<const Char*>(raw_data + lonely_start);
-    }
-  };
-
-  Chunk FindChunk(size_t position) {
-    // Walk forwards while the position is in front of the current chunk..
-    if (chunks_.back().position <= position) {
-      while (position >= chunks_.back().end_position() &&
-             chunks_.back().has_chars()) {
-        FetchChunk();
-      }
-      // Return if the final chunk's starting position is before the position.
-      if (chunks_.back().position <= position) return chunks_.back();
-      // Otherwise walk backwards to find the intermediate chunk added to
-      // support lonely bytes.
-      // TODO(verwaest): Remove once we don't need to support lonely bytes here
-      // anymore.
-    }
-
-    // Walk backwards.
-    for (auto reverse_it = chunks_.rbegin() + 1; reverse_it != chunks_.rend();
-         ++reverse_it) {
-      if (reverse_it->position <= position) return *reverse_it;
-    }
-    UNREACHABLE();
-  }
-
-  void FetchChunk() {
-    DCHECK(!chunks_.empty());
-
-    const uint8_t* data = nullptr;
-    size_t length;
-    {
-      RuntimeCallTimerScope scope(stats_,
-                                  RuntimeCallCounterId::kGetMoreDataCallback);
-      length = source_->GetMoreData(&data);
-    }
+ protected:
+  size_t FillBuffer(size_t position) override;

-    const Chunk& last_chunk = chunks_.back();
-    bool lonely_start = last_chunk.lonely_end();
-    DCHECK(last_chunk.has_chars());
+  Handle<String> string_;
+  size_t length_;
+};

-    size_t position = last_chunk.end_position();
+GenericStringUtf16CharacterStream::GenericStringUtf16CharacterStream(
+    Handle<String> data, size_t start_position, size_t end_position)
+    : string_(data), length_(end_position) {
+  DCHECK_GE(end_position, start_position);
+  DCHECK_GE(static_cast<size_t>(string_->length()),
+            end_position - start_position);
+  buffer_pos_ = start_position;
+}

-    if (lonely_start) {
-      uint8_t* intermediate = NewArray<uint8_t>(2);
-      intermediate[0] = last_chunk.lonely_end_byte();
-      intermediate[1] = length == 0 ? 0 : data[0];
-      chunks_.push_back({intermediate, position, 2, false});
-      position += 1;
-    }
+size_t GenericStringUtf16CharacterStream::FillBuffer(size_t from_pos) {
+  if (from_pos >= length_) return 0;

-    chunks_.push_back({data, position, length, lonely_start});
-  }
+  size_t length = i::Min(kBufferSize, length_ - from_pos);
+  String::WriteToFlat<uc16>(*string_, buffer_, static_cast<int>(from_pos),
+                            static_cast<int>(from_pos + length));
+  return length;
+}

-  std::vector<struct Chunk> chunks_;
-  ScriptCompiler::ExternalSourceStream* source_;
-  RuntimeCallStats* stats_;
-};
+// ----------------------------------------------------------------------------
+// ExternalTwoByteStringUtf16CharacterStream.
+//
+// A stream whose data source is a Handle<ExternalTwoByteString>. It avoids
+// all data copying.

-// Provides a buffered utf-16 view on the bytes from the underlying ByteStream.
-// Chars are buffered if either the underlying stream isn't utf-16 or the
-// underlying utf-16 stream might move (is on-heap).
-template <typename Char, template <typename T> class ByteStream>
-class BufferedCharacterStream : public Utf16CharacterStream {
+class ExternalTwoByteStringUtf16CharacterStream : public Utf16CharacterStream {
 public:
-  template <class... TArgs>
-  BufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
-    buffer_pos_ = pos;
-    ReadBlock();
-  }
-
- protected:
-  bool ReadBlock() override {
-    size_t position = pos();
-    buffer_pos_ = position;
-    buffer_start_ = &buffer_[0];
-    buffer_cursor_ = buffer_start_;
+  ExternalTwoByteStringUtf16CharacterStream(Handle<ExternalTwoByteString> data,
+                                            size_t start_position,
+                                            size_t end_position);

-    Range<Char> range = byte_stream_.GetDataAt(position);
-    if (range.empty()) {
-      buffer_end_ = buffer_start_;
-      return false;
-    }
-
-    size_t length = Min(kBufferSize, range.length());
-    i::CopyCharsUnsigned(buffer_, range.start, length);
-    buffer_end_ = &buffer_[length];
-    return true;
-  }
-
-  bool can_access_heap() override {
-    return ByteStream<uint16_t>::kCanAccessHeap;
-  }
+  bool can_access_heap() override { return false; }

 private:
-  static const size_t kBufferSize = 512;
-  uc16 buffer_[kBufferSize];
-  ByteStream<Char> byte_stream_;
+  bool ReadBlock() override;
+
+  const uc16* raw_data_;  // Pointer to the actual array of characters.
+  size_t start_pos_;
+  size_t end_pos_;
 };

-// Provides a (partially) unbuffered utf-16 view on the bytes from the
-// underlying ByteStream. It is only partially unbuffered when running on MIPS
-// due to lonely start bytes making chunks unaligned. In that case, unaligned
-// chars in a chunk (due to lonely start) are locally buffered.
-template <template <typename T> class ByteStream>
-class UnbufferedCharacterStream : public Utf16CharacterStream {
- public:
-  template <class... TArgs>
-  UnbufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
-    DCHECK(!ByteStream<uint16_t>::kCanAccessHeap);
-    buffer_pos_ = pos;
-    ReadBlock();
-  }
+ExternalTwoByteStringUtf16CharacterStream::
+    ExternalTwoByteStringUtf16CharacterStream(
+        Handle<ExternalTwoByteString> data, size_t start_position,
+        size_t end_position)
+    : raw_data_(data->GetTwoByteData(static_cast<int>(start_position))),
+      start_pos_(start_position),
+      end_pos_(end_position) {
+  buffer_start_ = raw_data_;
+  buffer_cursor_ = raw_data_;
+  buffer_end_ = raw_data_ + (end_pos_ - start_pos_);
+  buffer_pos_ = start_pos_;
+}

- protected:
-  bool ReadBlock() override {
-    size_t position = pos();
+bool ExternalTwoByteStringUtf16CharacterStream::ReadBlock() {
+  size_t position = pos();
+  bool have_data = start_pos_ <= position && position < end_pos_;
+  if (have_data) {
+    buffer_pos_ = start_pos_;
+    buffer_cursor_ = raw_data_ + (position - start_pos_),
+    buffer_end_ = raw_data_ + (end_pos_ - start_pos_);
+  } else {
    buffer_pos_ = position;
-    Range<uint16_t> range = byte_stream_.GetDataAt(position);
-    buffer_start_ = range.start;
-    buffer_end_ = range.end;
-    buffer_cursor_ = buffer_start_;
-    if (range.empty()) return false;
-
-// TODO(verwaest): Make sure that this cannot happen by dealing with lonely
-// bytes on the blink side.
-#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
-    // Buffer anyway in case the chunk is unaligned due to a lonely start.
-    if (range.unaligned_start()) {
-      size_t length = Min(kBufferSize, range.length());
-      i::CopyCharsUnsigned(buffer_, buffer_start_, length);
-      buffer_start_ = &buffer_[0];
-      buffer_cursor_ = buffer_start_;
-      buffer_end_ = &buffer_[length];
-    }
-#endif
-    DCHECK_LE(buffer_start_, buffer_end_);
-    return true;
+    buffer_cursor_ = raw_data_;
+    buffer_end_ = raw_data_;
  }
-
-  bool can_access_heap() override { return false; }
-
- private:
-#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
-  static const size_t kBufferSize = 512;
-  uc16 buffer_[kBufferSize];
-#endif
-  ByteStream<uint16_t> byte_stream_;
-};
+  return have_data;
+}

 // ----------------------------------------------------------------------------
-// BufferedUtf16CharacterStreams
+// ExternalOneByteStringUtf16CharacterStream
 //
-// A buffered character stream based on a random access character
-// source (ReadBlock can be called with pos() pointing to any position,
-// even positions before the current).
-//
-// TODO(verwaest): Remove together with Utf8 external streaming streams.
-class BufferedUtf16CharacterStream : public Utf16CharacterStream {
+// A stream whose data source is a Handle<ExternalOneByteString>.
+
+class ExternalOneByteStringUtf16CharacterStream
+    : public BufferedUtf16CharacterStream {
 public:
-  BufferedUtf16CharacterStream();
+  ExternalOneByteStringUtf16CharacterStream(Handle<ExternalOneByteString> data,
+                                            size_t start_position,
+                                            size_t end_position);

- protected:
-  static const size_t kBufferSize = 512;
+  // For testing:
+  ExternalOneByteStringUtf16CharacterStream(const char* data, size_t length);

-  bool ReadBlock() override;
+  bool can_access_heap() override { return false; }

-  // FillBuffer should read up to kBufferSize characters at position and store
-  // them into buffer_[0..]. It returns the number of characters stored.
-  virtual size_t FillBuffer(size_t position) = 0;
+ protected:
+  size_t FillBuffer(size_t position) override;

-  // Fixed sized buffer that this class reads from.
-  // The base class' buffer_start_ should always point to buffer_.
-  uc16 buffer_[kBufferSize];
+  const uint8_t* raw_data_;  // Pointer to the actual array of characters.
+  size_t length_;
 };

-BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
-    : Utf16CharacterStream(buffer_, buffer_, buffer_, 0) {}
+ExternalOneByteStringUtf16CharacterStream::
+    ExternalOneByteStringUtf16CharacterStream(
+        Handle<ExternalOneByteString> data, size_t start_position,
+        size_t end_position)
+    : raw_data_(data->GetChars()), length_(end_position) {
+  DCHECK(end_position >= start_position);
+  buffer_pos_ = start_position;
+}

-bool BufferedUtf16CharacterStream::ReadBlock() {
-  DCHECK_EQ(buffer_start_, buffer_);
+ExternalOneByteStringUtf16CharacterStream::
+    ExternalOneByteStringUtf16CharacterStream(const char* data, size_t length)
+    : raw_data_(reinterpret_cast<const uint8_t*>(data)), length_(length) {}

-  size_t position = pos();
-  buffer_pos_ = position;
-  buffer_cursor_ = buffer_;
-  buffer_end_ = buffer_ + FillBuffer(position);
-  DCHECK_EQ(pos(), position);
-  DCHECK_LE(buffer_end_, buffer_start_ + kBufferSize);
-  return buffer_cursor_ < buffer_end_;
+size_t ExternalOneByteStringUtf16CharacterStream::FillBuffer(size_t from_pos) {
+  if (from_pos >= length_) return 0;
+
+  size_t length = Min(kBufferSize, length_ - from_pos);
+  i::CopyCharsUnsigned(buffer_, raw_data_ + from_pos, length);
+  return length;
 }

 // ----------------------------------------------------------------------------
@@ -357,9 +197,6 @@ bool BufferedUtf16CharacterStream::ReadBlock() {
 // may 'cut' arbitrarily into utf-8 characters. Also, seeking to a given
 // character position is tricky because the byte position cannot be dericed
 // from the character position.
-//
-// TODO(verwaest): Decode utf8 chunks into utf16 chunks on the blink side
-// instead so we don't need to buffer.

 class Utf8ExternalStreamingStream : public BufferedUtf16CharacterStream {
 public:
@@ -631,47 +468,393 @@ size_t Utf8ExternalStreamingStream::FillBuffer(size_t position) {
  return buffer_end_ - buffer_cursor_;
 }

+// ----------------------------------------------------------------------------
+// Chunks - helper for One- + TwoByteExternalStreamingStream
+namespace {
+
+struct Chunk {
+  const uint8_t* data;
+  size_t byte_length;
+  size_t byte_pos;
+};
+
+typedef std::vector<struct Chunk> Chunks;
+
+void DeleteChunks(Chunks& chunks) {
+  for (size_t i = 0; i < chunks.size(); i++) delete[] chunks[i].data;
+}
+
+// Return the chunk index for the chunk containing position.
+// If position is behind the end of the stream, the index of the last,
+// zero-length chunk is returned.
+size_t FindChunk(Chunks& chunks, ScriptCompiler::ExternalSourceStream* source,
+                 size_t position, RuntimeCallStats* stats) {
+  size_t end_pos =
+      chunks.empty() ? 0 : (chunks.back().byte_pos + chunks.back().byte_length);
+
+  // Get more data if needed. We usually won't enter the loop body.
+  bool out_of_data = !chunks.empty() && chunks.back().byte_length == 0;
+  {
+    RuntimeCallTimerScope scope(stats,
+                                RuntimeCallCounterId::kGetMoreDataCallback);
+    while (!out_of_data && end_pos <= position + 1) {
+      const uint8_t* chunk = nullptr;
+      size_t len = source->GetMoreData(&chunk);
+
+      chunks.push_back({chunk, len, end_pos});
+      end_pos += len;
+      out_of_data = (len == 0);
+    }
+  }
+
+  // Here, we should always have at least one chunk, and we either have the
+  // chunk we were looking for, or we're out of data. Also, out_of_data and
+  // end_pos are current (and designate whether we have exhausted the stream,
+  // and the length of data received so far, respectively).
+  DCHECK(!chunks.empty());
+  DCHECK_EQ(end_pos, chunks.back().byte_pos + chunks.back().byte_length);
+  DCHECK_EQ(out_of_data, chunks.back().byte_length == 0);
+  DCHECK(position < end_pos || out_of_data);
+
+  // Edge case: position is behind the end of stream: Return the last (length 0)
+  // chunk to indicate the end of the stream.
+  if (position >= end_pos) {
+    DCHECK(out_of_data);
+    return chunks.size() - 1;
+  }
+
+  // We almost always 'stream', meaning we want data from the last chunk, so
+  // let's look at chunks back-to-front.
+  size_t chunk_no = chunks.size() - 1;
+  while (chunks[chunk_no].byte_pos > position) {
+    DCHECK_NE(chunk_no, 0u);
+    chunk_no--;
+  }
+  DCHECK_LE(chunks[chunk_no].byte_pos, position);
+  DCHECK_LT(position, chunks[chunk_no].byte_pos + chunks[chunk_no].byte_length);
+  return chunk_no;
+}
+
+}  // anonymous namespace
+
+// ----------------------------------------------------------------------------
+// OneByteExternalStreamingStream
+//
+// A stream of latin-1 encoded, chunked data.
+
+class OneByteExternalStreamingStream : public BufferedUtf16CharacterStream {
+ public:
+  explicit OneByteExternalStreamingStream(
+      ScriptCompiler::ExternalSourceStream* source, RuntimeCallStats* stats)
+      : source_(source), stats_(stats) {}
+  ~OneByteExternalStreamingStream() override { DeleteChunks(chunks_); }
+
+  bool can_access_heap() override { return false; }
+
+ protected:
+  size_t FillBuffer(size_t position) override;
+
+ private:
+  Chunks chunks_;
+  ScriptCompiler::ExternalSourceStream* source_;
+  RuntimeCallStats* stats_;
+};
+
+size_t OneByteExternalStreamingStream::FillBuffer(size_t position) {
+  const Chunk& chunk = chunks_[FindChunk(chunks_, source_, position, stats_)];
+  if (chunk.byte_length == 0) return 0;
+
+  size_t start_pos = position - chunk.byte_pos;
+  size_t len = i::Min(kBufferSize, chunk.byte_length - start_pos);
+  i::CopyCharsUnsigned(buffer_, chunk.data + start_pos, len);
+  return len;
+}
+
+#if !(V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64)
+// ----------------------------------------------------------------------------
+// TwoByteExternalStreamingStream
+//
+// A stream of ucs-2 data, delivered in chunks. Chunks may be 'cut' into the
+// middle of characters (or even contain only one byte), which adds a bit
+// of complexity. This stream avoid all data copying, except for characters
+// that cross chunk boundaries.
+
+class TwoByteExternalStreamingStream : public Utf16CharacterStream {
+ public:
+  explicit TwoByteExternalStreamingStream(
+      ScriptCompiler::ExternalSourceStream* source, RuntimeCallStats* stats);
+  ~TwoByteExternalStreamingStream() override;
+
+  bool can_access_heap() override { return false; }
+
+ protected:
+  bool ReadBlock() override;
+
+  Chunks chunks_;
+  ScriptCompiler::ExternalSourceStream* source_;
+  RuntimeCallStats* stats_;
+  uc16 one_char_buffer_;
+};
+
+TwoByteExternalStreamingStream::TwoByteExternalStreamingStream(
+    ScriptCompiler::ExternalSourceStream* source, RuntimeCallStats* stats)
+    : Utf16CharacterStream(&one_char_buffer_, &one_char_buffer_,
+                           &one_char_buffer_, 0),
+      source_(source),
+      stats_(stats),
+      one_char_buffer_(0) {}
+
+TwoByteExternalStreamingStream::~TwoByteExternalStreamingStream() {
+  DeleteChunks(chunks_);
+}
+
+bool TwoByteExternalStreamingStream::ReadBlock() {
+  size_t position = pos();
+
+  // We'll search for the 2nd byte of our character, to make sure we
+  // have enough data for at least one character.
+  size_t chunk_no = FindChunk(chunks_, source_, 2 * position + 1, stats_);
+
+  // Out of data? Return 0.
+  if (chunks_[chunk_no].byte_length == 0) {
+    buffer_pos_ = position;
+    buffer_cursor_ = buffer_start_;
+    buffer_end_ = buffer_start_;
+    return false;
+  }
+
+  Chunk& current = chunks_[chunk_no];
+
+  // Annoying edge case: Chunks may not be 2-byte aligned, meaning that a
+  // character may be split between the previous and the current chunk.
+  // If we find such a lonely byte at the beginning of the chunk, we'll use
+  // one_char_buffer_ to hold the full character.
+  bool lonely_byte = (chunks_[chunk_no].byte_pos == (2 * position + 1));
+  if (lonely_byte) {
+    DCHECK_NE(chunk_no, 0u);
+    Chunk& previous_chunk = chunks_[chunk_no - 1];
+#ifdef V8_TARGET_BIG_ENDIAN
+    uc16 character = current.data[0] |
+                     previous_chunk.data[previous_chunk.byte_length - 1] << 8;
+#else
+    uc16 character = previous_chunk.data[previous_chunk.byte_length - 1] |
+                     current.data[0] << 8;
+#endif
+
+    one_char_buffer_ = character;
+    buffer_pos_ = position;
+    buffer_start_ = &one_char_buffer_;
+    buffer_cursor_ = &one_char_buffer_;
+    buffer_end_ = &one_char_buffer_ + 1;
+    return true;
+  }
+
+  // Common case: character is in current chunk.
+  DCHECK_LE(current.byte_pos, 2 * position);
+  DCHECK_LT(2 * position + 1, current.byte_pos + current.byte_length);
+
+  // Determine # of full ucs-2 chars in stream, and whether we started on an odd
+  // byte boundary.
+  bool odd_start = (current.byte_pos % 2) == 1;
+  size_t number_chars = (current.byte_length - odd_start) / 2;
+
+  // Point the buffer_*_ members into the current chunk and set buffer_cursor_
+  // to point to position. Be careful when converting the byte positions (in
+  // Chunk) to the ucs-2 character positions (in buffer_*_ members).
+  buffer_start_ = reinterpret_cast<const uint16_t*>(current.data + odd_start);
+  buffer_end_ = buffer_start_ + number_chars;
+  buffer_pos_ = (current.byte_pos + odd_start) / 2;
+  buffer_cursor_ = buffer_start_ + (position - buffer_pos_);
+  DCHECK_EQ(position, pos());
+  return true;
+}
+
+#else
+
+// ----------------------------------------------------------------------------
+// TwoByteExternalBufferedStream
+//
+// This class is made specifically to address unaligned access to 16-bit data
+// in MIPS and ARM architectures. It replaces class
+// TwoByteExternalStreamingStream which in some cases does have unaligned
+// accesse to 16-bit data
+
+class TwoByteExternalBufferedStream : public Utf16CharacterStream {
+ public:
+  explicit TwoByteExternalBufferedStream(
+      ScriptCompiler::ExternalSourceStream* source, RuntimeCallStats* stats);
+  ~TwoByteExternalBufferedStream();
+
+  bool can_access_heap() override { return false; }
+
+ protected:
+  static const size_t kBufferSize = 512;
+
+  bool ReadBlock() override;
+
+  // FillBuffer should read up to kBufferSize characters at position and store
+  // them into buffer_[0..]. It returns the number of characters stored.
+  size_t FillBuffer(size_t position, size_t chunk_no);
+
+  // Fixed sized buffer that this class reads from.
+  // The base class' buffer_start_ should always point to buffer_.
+  uc16 buffer_[kBufferSize];
+
+  Chunks chunks_;
+  ScriptCompiler::ExternalSourceStream* source_;
+  RuntimeCallStats* stats_;
+};
+
+TwoByteExternalBufferedStream::TwoByteExternalBufferedStream(
+    ScriptCompiler::ExternalSourceStream* source, RuntimeCallStats* stats)
+    : Utf16CharacterStream(buffer_, buffer_, buffer_, 0),
+      source_(source),
+      stats_(stats) {}
+
+TwoByteExternalBufferedStream::~TwoByteExternalBufferedStream() {
+  DeleteChunks(chunks_);
+}
+
+bool TwoByteExternalBufferedStream::ReadBlock() {
+  size_t position = pos();
+  // Find chunk in which the position belongs
+  size_t chunk_no = FindChunk(chunks_, source_, 2 * position + 1, stats_);
+
+  // Out of data? Return 0.
+  if (chunks_[chunk_no].byte_length == 0) {
+    buffer_pos_ = position;
+    buffer_cursor_ = buffer_start_;
+    buffer_end_ = buffer_start_;
+    return false;
+  }
+
+  Chunk& current = chunks_[chunk_no];
+
+  bool odd_start = current.byte_pos % 2;
+  // Common case: character is in current chunk.
+  DCHECK_LE(current.byte_pos, 2 * position + odd_start);
+  DCHECK_LT(2 * position + 1, current.byte_pos + current.byte_length);
+
+  // If character starts on odd address copy text in buffer so there is always
+  // aligned access to characters. This is important on MIPS and ARM
+  // architectures. Otherwise read characters from memory directly.
+  if (!odd_start) {
+    buffer_start_ = reinterpret_cast<const uint16_t*>(current.data);
+    size_t number_chars = current.byte_length / 2;
+    buffer_end_ = buffer_start_ + number_chars;
+    buffer_pos_ = current.byte_pos / 2;
+    buffer_cursor_ = buffer_start_ + (position - buffer_pos_);
+    DCHECK_EQ(position, pos());
+    return true;
+  } else {
+    buffer_start_ = buffer_;
+    buffer_pos_ = position;
+    buffer_cursor_ = buffer_;
+    buffer_end_ = buffer_ + FillBuffer(position, chunk_no);
+    DCHECK_EQ(pos(), position);
+    DCHECK_LE(buffer_end_, buffer_start_ + kBufferSize);
+    return buffer_cursor_ < buffer_end_;
+  }
+}
+
+size_t TwoByteExternalBufferedStream::FillBuffer(size_t position,
+                                                 size_t chunk_no) {
+  DCHECK_EQ(chunks_[chunk_no].byte_pos % 2, 1u);
+  bool odd_start = true;
+  // Align buffer_pos_ to the size of the buffer.
+  {
+    size_t new_pos = position / kBufferSize * kBufferSize;
+    if (new_pos != position) {
+      chunk_no = FindChunk(chunks_, source_, 2 * new_pos + 1, stats_);
+      buffer_pos_ = new_pos;
+      buffer_cursor_ = buffer_start_ + (position - buffer_pos_);
+      position = new_pos;
+      odd_start = chunks_[chunk_no].byte_pos % 2;
+    }
+  }
+
+  Chunk* current = &chunks_[chunk_no];
+
+  // Annoying edge case: Chunks may not be 2-byte aligned, meaning that a
+  // character may be split between the previous and the current chunk.
+  // If we find such a lonely byte at the beginning of the chunk, we'll copy
+  // it to the first byte in buffer_.
+  size_t totalLength = 0;
+  bool lonely_byte = (current->byte_pos == (2 * position + 1));
+  if (lonely_byte) {
+    DCHECK_NE(chunk_no, 0u);
+    Chunk& previous_chunk = chunks_[chunk_no - 1];
+    *reinterpret_cast<uint8_t*>(buffer_) =
+        previous_chunk.data[previous_chunk.byte_length - 1];
+    totalLength++;
+  }
+
+  // Common case: character is in current chunk.
+  DCHECK_LE(current->byte_pos, 2 * position + odd_start);
+  DCHECK_LT(2 * position + 1, current->byte_pos + current->byte_length);
+
+  // Copy characters from current chunk starting from chunk_pos to the end of
+  // buffer or chunk.
+  size_t chunk_pos = position - current->byte_pos / 2;
+  size_t start_offset = odd_start && chunk_pos != 0;
+  size_t bytes_to_move =
+      i::Min(2 * kBufferSize - lonely_byte,
+             current->byte_length - 2 * chunk_pos + start_offset);
+  i::MemMove(reinterpret_cast<uint8_t*>(buffer_) + lonely_byte,
+             current->data + 2 * chunk_pos - start_offset, bytes_to_move);
+
+  // Fill up the rest of the buffer if there is space and data left.
+  totalLength += bytes_to_move;
+  position = (current->byte_pos + current->byte_length) / 2;
+  if (position - buffer_pos_ < kBufferSize) {
+    chunk_no = FindChunk(chunks_, source_, 2 * position + 1, stats_);
+    current = &chunks_[chunk_no];
+    odd_start = current->byte_pos % 2;
+    bytes_to_move = i::Min(2 * kBufferSize - totalLength, current->byte_length);
+    while (bytes_to_move) {
+      // Common case: character is in current chunk.
+      DCHECK_LE(current->byte_pos, 2 * position + odd_start);
+      DCHECK_LT(2 * position + 1, current->byte_pos + current->byte_length);
+
+      i::MemMove(reinterpret_cast<uint8_t*>(buffer_) + totalLength,
+                 current->data, bytes_to_move);
+      totalLength += bytes_to_move;
+      position = (current->byte_pos + current->byte_length) / 2;
+      chunk_no = FindChunk(chunks_, source_, 2 * position + 1, stats_);
+      current = &chunks_[chunk_no];
+      odd_start = current->byte_pos % 2;
+      bytes_to_move =
+          i::Min(2 * kBufferSize - totalLength, current->byte_length);
+    }
+  }
+  return totalLength / 2;
+}
+#endif
+
 // ----------------------------------------------------------------------------
 // ScannerStream: Create stream instances.

-Utf16CharacterStream* ScannerStream::For(Isolate* isolate,
-                                         Handle<String> data) {
-  return ScannerStream::For(isolate, data, 0, data->length());
+Utf16CharacterStream* ScannerStream::For(Handle<String> data) {
+  return ScannerStream::For(data, 0, data->length());
 }

-Utf16CharacterStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
-                                         int start_pos, int end_pos) {
+Utf16CharacterStream* ScannerStream::For(Handle<String> data, int start_pos,
+                                         int end_pos) {
  DCHECK_GE(start_pos, 0);
  DCHECK_LE(start_pos, end_pos);
  DCHECK_LE(end_pos, data->length());
-  size_t start_offset = 0;
-  if (data->IsSlicedString()) {
-    SlicedString* string = SlicedString::cast(*data);
-    start_offset = string->offset();
-    data = handle(string->parent(), string->GetIsolate());
-  } else {
-    data = String::Flatten(data->GetIsolate(), data);
-  }
  if (data->IsExternalOneByteString()) {
-    return new BufferedCharacterStream<uint8_t, ExternalStringStream>(
-        static_cast<size_t>(start_pos),
-        ExternalOneByteString::cast(*data)->GetChars() + start_offset,
-        static_cast<size_t>(end_pos));
+    return new ExternalOneByteStringUtf16CharacterStream(
+        Handle<ExternalOneByteString>::cast(data),
+        static_cast<size_t>(start_pos), static_cast<size_t>(end_pos));
  } else if (data->IsExternalTwoByteString()) {
-    return new UnbufferedCharacterStream<ExternalStringStream>(
-        static_cast<size_t>(start_pos),
-        ExternalTwoByteString::cast(*data)->GetChars() + start_offset,
-        static_cast<size_t>(end_pos));
-  } else if (data->IsSeqOneByteString()) {
-    return new BufferedCharacterStream<uint8_t, OnHeapStream>(
-        static_cast<size_t>(start_pos), Handle<SeqOneByteString>::cast(data),
-        start_offset, static_cast<size_t>(end_pos));
-  } else if (data->IsSeqTwoByteString()) {
-    return new BufferedCharacterStream<uint16_t, OnHeapStream>(
-        static_cast<size_t>(start_pos), Handle<SeqTwoByteString>::cast(data),
-        start_offset, static_cast<size_t>(end_pos));
+    return new ExternalTwoByteStringUtf16CharacterStream(
+        Handle<ExternalTwoByteString>::cast(data),
+        static_cast<size_t>(start_pos), static_cast<size_t>(end_pos));
  } else {
-    UNREACHABLE();
+    // TODO(vogelheim): Maybe call data.Flatten() first?
+    return new GenericStringUtf16CharacterStream(
+        data, static_cast<size_t>(start_pos), static_cast<size_t>(end_pos));
  }
 }

@@ -683,9 +866,7 @@ std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting(
 std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting(
    const char* data, size_t length) {
  return std::unique_ptr<Utf16CharacterStream>(
-      new BufferedCharacterStream<uint8_t, ExternalStringStream>(
-          static_cast<size_t>(0), reinterpret_cast<const uint8_t*>(data),
-          static_cast<size_t>(length)));
+      new ExternalOneByteStringUtf16CharacterStream(data, length));
 }

 Utf16CharacterStream* ScannerStream::For(
@@ -694,15 +875,18 @@ Utf16CharacterStream* ScannerStream::For(
    RuntimeCallStats* stats) {
  switch (encoding) {
    case v8::ScriptCompiler::StreamedSource::TWO_BYTE:
-      return new UnbufferedCharacterStream<ChunkedStream>(
-          static_cast<size_t>(0), source_stream, stats);
+#if !(V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64)
+      return new TwoByteExternalStreamingStream(source_stream, stats);
+#else
+      return new TwoByteExternalBufferedStream(source_stream, stats);
+#endif
    case v8::ScriptCompiler::StreamedSource::ONE_BYTE:
-      return new BufferedCharacterStream<uint8_t, ChunkedStream>(
-          static_cast<size_t>(0), source_stream, stats);
+      return new OneByteExternalStreamingStream(source_stream, stats);
    case v8::ScriptCompiler::StreamedSource::UTF8:
      return new Utf8ExternalStreamingStream(source_stream, stats);
  }
  UNREACHABLE();
+  return nullptr;
 }

 }  // namespace internal

--- a/src/parsing/scanner-character-streams.h
+++ b/src/parsing/scanner-character-streams.h
@@ -19,9 +19,9 @@ class String;

 class V8_EXPORT_PRIVATE ScannerStream {
 public:
-  static Utf16CharacterStream* For(Isolate* isolate, Handle<String> data);
-  static Utf16CharacterStream* For(Isolate* isolate, Handle<String> data,
-                                   int start_pos, int end_pos);
+  static Utf16CharacterStream* For(Handle<String> data);
+  static Utf16CharacterStream* For(Handle<String> data, int start_pos,
+                                   int end_pos);
  static Utf16CharacterStream* For(
      ScriptCompiler::ExternalSourceStream* source_stream,
      ScriptCompiler::StreamedSource::Encoding encoding,

--- a/test/cctest/parsing/test-scanner-streams.cc
+++ b/test/cctest/parsing/test-scanner-streams.cc
@@ -315,7 +315,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
    i::Handle<i::String> uc16_string(
        factory->NewExternalStringFromTwoByte(&resource).ToHandleChecked());
    std::unique_ptr<i::Utf16CharacterStream> uc16_stream(
-        i::ScannerStream::For(isolate, uc16_string, start, end));
+        i::ScannerStream::For(uc16_string, start, end));
    TestCharacterStream(one_byte_source, uc16_stream.get(), length, start, end);

    // This avoids the GC from trying to free a stack allocated resource.
@@ -335,7 +335,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
        factory->NewExternalStringFromOneByte(&one_byte_resource)
            .ToHandleChecked());
    std::unique_ptr<i::Utf16CharacterStream> one_byte_stream(
-        i::ScannerStream::For(isolate, ext_one_byte_string, start, end));
+        i::ScannerStream::For(ext_one_byte_string, start, end));
    TestCharacterStream(one_byte_source, one_byte_stream.get(), length, start,
                        end);
    // This avoids the GC from trying to free a stack allocated resource.
@@ -347,7 +347,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
  // 1-byte generic i::String
  {
    std::unique_ptr<i::Utf16CharacterStream> string_stream(
-        i::ScannerStream::For(isolate, one_byte_string, start, end));
+        i::ScannerStream::For(one_byte_string, start, end));
    TestCharacterStream(one_byte_source, string_stream.get(), length, start,
                        end);
  }
@@ -357,7 +357,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
    i::Handle<i::String> two_byte_string =
        factory->NewStringFromTwoByte(two_byte_vector).ToHandleChecked();
    std::unique_ptr<i::Utf16CharacterStream> two_byte_string_stream(
-        i::ScannerStream::For(isolate, two_byte_string, start, end));
+        i::ScannerStream::For(two_byte_string, start, end));
    TestCharacterStream(one_byte_source, two_byte_string_stream.get(), length,
                        start, end);
  }

--- a/test/cctest/test-parsing.cc
+++ b/test/cctest/test-parsing.cc
@@ -1174,7 +1174,7 @@ void TestParserSyncWithFlags(i::Handle<i::String> source,
  if (test_preparser) {
    i::Scanner scanner(isolate->unicode_cache());
    std::unique_ptr<i::Utf16CharacterStream> stream(
-        i::ScannerStream::For(isolate, source));
+        i::ScannerStream::For(source));
    i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    i::AstValueFactory ast_value_factory(
        &zone, CcTest::i_isolate()->ast_string_constants(),