// Copyright 2014 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "src/unicode-inl.h" #include "src/unicode-decoder.h" #include <stdio.h> #include <stdlib.h> namespace unibrow { uint16_t Utf8Iterator::operator*() { if (V8_UNLIKELY(char_ > Utf16::kMaxNonSurrogateCharCode)) { return trailing_ ? Utf16::TrailSurrogate(char_) : Utf16::LeadSurrogate(char_); } DCHECK_EQ(trailing_, false); return char_; } Utf8Iterator& Utf8Iterator::operator++() { if (V8_UNLIKELY(this->Done())) { char_ = Utf8::kBufferEmpty; return *this; } if (V8_UNLIKELY(char_ > Utf16::kMaxNonSurrogateCharCode && !trailing_)) { trailing_ = true; return *this; } trailing_ = false; offset_ = cursor_; char_ = Utf8::ValueOf(reinterpret_cast<const uint8_t*>(stream_.begin()) + cursor_, stream_.length() - cursor_, &cursor_); return *this; } Utf8Iterator Utf8Iterator::operator++(int) { Utf8Iterator old(*this); ++*this; return old; } bool Utf8Iterator::Done() { return offset_ == static_cast<size_t>(stream_.length()); } void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length, const v8::internal::Vector<const char>& stream) { size_t utf16_length = 0; Utf8Iterator it = Utf8Iterator(stream); // Loop until stream is read, writing to buffer as long as buffer has space. while (utf16_length < buffer_length && !it.Done()) { *buffer++ = *it; ++it; utf16_length++; } bytes_read_ = it.Offset(); trailing_ = it.Trailing(); chars_written_ = utf16_length; // Now that writing to buffer is done, we just need to calculate utf16_length while (!it.Done()) { ++it; utf16_length++; } utf16_length_ = utf16_length; } void Utf8DecoderBase::WriteUtf16Slow( uint16_t* data, size_t length, const v8::internal::Vector<const char>& stream, size_t offset, bool trailing) { Utf8Iterator it = Utf8Iterator(stream, offset, trailing); while (!it.Done()) { DCHECK_GT(length--, 0); *data++ = *it; ++it; } } } // namespace unibrow