Commit 0e3b5386 authored by marja's avatar marja Committed by Commit bot

Scanner / Unicode decoding: use size_t instead of unsigned.

size_t is the correct data type for this purpose. Our APIs (in particular
ExternalSourceStream::GetMoreData) are already using it, and there were some
static_casts to convert between them.

This CL doesn't intend to fix all of V8, just the minimal sense-making part
around scanner character streams.

BUG=

Review URL: https://codereview.chromium.org/864273005

Cr-Commit-Position: refs/heads/master@{#26449}
parent ec42e002
......@@ -240,7 +240,7 @@ MaybeHandle<String> Factory::NewStringFromUtf8(Vector<const char> string,
decoder(isolate()->unicode_cache()->utf8_decoder());
decoder->Reset(string.start() + non_ascii_start,
length - non_ascii_start);
int utf16_length = decoder->Utf16Length();
int utf16_length = static_cast<int>(decoder->Utf16Length());
DCHECK(utf16_length > 0);
// Allocate string.
Handle<SeqTwoByteString> result;
......
......@@ -3102,7 +3102,7 @@ void HeapSnapshotJSONSerializer::SerializeString(const unsigned char* s) {
WriteUChar(writer_, *s);
} else {
// Convert UTF-8 into \u UTF-16 literal.
unsigned length = 1, cursor = 0;
size_t length = 1, cursor = 0;
for ( ; length <= 4 && *(s + length) != '\0'; ++length) { }
unibrow::uchar c = unibrow::Utf8::CalculateValue(s, length, &cursor);
if (c != unibrow::Utf8::kBadChar) {
......
......@@ -3999,9 +3999,9 @@ static inline void WriteOneByteData(Vector<const char> vector, uint8_t* chars,
static inline void WriteTwoByteData(Vector<const char> vector, uint16_t* chars,
int len) {
const uint8_t* stream = reinterpret_cast<const uint8_t*>(vector.start());
unsigned stream_length = vector.length();
size_t stream_length = vector.length();
while (stream_length != 0) {
unsigned consumed = 0;
size_t consumed = 0;
uint32_t c = unibrow::Utf8::ValueOf(stream, stream_length, &consumed);
DCHECK(c != unibrow::Utf8::kBadChar);
DCHECK(consumed <= stream_length);
......
......@@ -9115,10 +9115,10 @@ bool String::IsUtf8EqualTo(Vector<const char> str, bool allow_prefix_match) {
return false;
}
int i;
unsigned remaining_in_str = static_cast<unsigned>(str_len);
size_t remaining_in_str = static_cast<size_t>(str_len);
const uint8_t* utf8_data = reinterpret_cast<const uint8_t*>(str.start());
for (i = 0; i < slen && remaining_in_str > 0; i++) {
unsigned cursor = 0;
size_t cursor = 0;
uint32_t r = unibrow::Utf8::ValueOf(utf8_data, remaining_in_str, &cursor);
DCHECK(cursor > 0 && cursor <= remaining_in_str);
if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) {
......@@ -9292,13 +9292,13 @@ uint32_t StringHasher::ComputeUtf8Hash(Vector<const char> chars,
// Start with a fake length which won't affect computation.
// It will be updated later.
StringHasher hasher(String::kMaxArrayIndexSize, seed);
unsigned remaining = static_cast<unsigned>(vector_length);
size_t remaining = static_cast<size_t>(vector_length);
const uint8_t* stream = reinterpret_cast<const uint8_t*>(chars.start());
int utf16_length = 0;
bool is_index = true;
DCHECK(hasher.is_array_index_);
while (remaining > 0) {
unsigned consumed = 0;
size_t consumed = 0;
uint32_t c = unibrow::Utf8::ValueOf(stream, remaining, &consumed);
DCHECK(consumed > 0 && consumed <= remaining);
stream += consumed;
......
This diff is collapsed.
......@@ -21,15 +21,15 @@ class BufferedUtf16CharacterStream: public Utf16CharacterStream {
virtual void PushBack(uc32 character);
protected:
static const unsigned kBufferSize = 512;
static const unsigned kPushBackStepSize = 16;
static const size_t kBufferSize = 512;
static const size_t kPushBackStepSize = 16;
virtual unsigned SlowSeekForward(unsigned delta);
virtual size_t SlowSeekForward(size_t delta);
virtual bool ReadBlock();
virtual void SlowPushBack(uc16 character);
virtual unsigned BufferSeekForward(unsigned delta) = 0;
virtual unsigned FillBuffer(unsigned position) = 0;
virtual size_t BufferSeekForward(size_t delta) = 0;
virtual size_t FillBuffer(size_t position) = 0;
const uc16* pushback_limit_;
uc16 buffer_[kBufferSize];
......@@ -39,40 +39,39 @@ class BufferedUtf16CharacterStream: public Utf16CharacterStream {
// Generic string stream.
class GenericStringUtf16CharacterStream: public BufferedUtf16CharacterStream {
public:
GenericStringUtf16CharacterStream(Handle<String> data,
unsigned start_position,
unsigned end_position);
GenericStringUtf16CharacterStream(Handle<String> data, size_t start_position,
size_t end_position);
virtual ~GenericStringUtf16CharacterStream();
protected:
virtual unsigned BufferSeekForward(unsigned delta);
virtual unsigned FillBuffer(unsigned position);
virtual size_t BufferSeekForward(size_t delta);
virtual size_t FillBuffer(size_t position);
Handle<String> string_;
unsigned length_;
size_t length_;
};
// Utf16 stream based on a literal UTF-8 string.
class Utf8ToUtf16CharacterStream: public BufferedUtf16CharacterStream {
public:
Utf8ToUtf16CharacterStream(const byte* data, unsigned length);
Utf8ToUtf16CharacterStream(const byte* data, size_t length);
virtual ~Utf8ToUtf16CharacterStream();
static unsigned CopyChars(uint16_t* dest, unsigned length, const byte* src,
unsigned* src_pos, unsigned src_length);
static size_t CopyChars(uint16_t* dest, size_t length, const byte* src,
size_t* src_pos, size_t src_length);
protected:
virtual unsigned BufferSeekForward(unsigned delta);
virtual unsigned FillBuffer(unsigned char_position);
void SetRawPosition(unsigned char_position);
virtual size_t BufferSeekForward(size_t delta);
virtual size_t FillBuffer(size_t char_position);
void SetRawPosition(size_t char_position);
const byte* raw_data_;
unsigned raw_data_length_; // Measured in bytes, not characters.
unsigned raw_data_pos_;
size_t raw_data_length_; // Measured in bytes, not characters.
size_t raw_data_pos_;
// The character position of the character at raw_data[raw_data_pos_].
// Not necessarily the same as pos_.
unsigned raw_character_position_;
size_t raw_character_position_;
};
......@@ -91,7 +90,7 @@ class ExternalStreamingStream : public BufferedUtf16CharacterStream {
virtual ~ExternalStreamingStream() { delete[] current_data_; }
unsigned BufferSeekForward(unsigned delta) OVERRIDE {
size_t BufferSeekForward(size_t delta) OVERRIDE {
// We never need to seek forward when streaming scripts. We only seek
// forward when we want to parse a function whose location we already know,
// and when streaming, we don't know the locations of anything we haven't
......@@ -100,19 +99,19 @@ class ExternalStreamingStream : public BufferedUtf16CharacterStream {
return 0;
}
unsigned FillBuffer(unsigned position) OVERRIDE;
size_t FillBuffer(size_t position) OVERRIDE;
private:
void HandleUtf8SplitCharacters(unsigned* data_in_buffer);
void HandleUtf8SplitCharacters(size_t* data_in_buffer);
ScriptCompiler::ExternalSourceStream* source_stream_;
v8::ScriptCompiler::StreamedSource::Encoding encoding_;
const uint8_t* current_data_;
unsigned current_data_offset_;
unsigned current_data_length_;
size_t current_data_offset_;
size_t current_data_length_;
// For converting UTF-8 characters which are split across two data chunks.
uint8_t utf8_split_char_buffer_[4];
unsigned utf8_split_char_buffer_length_;
size_t utf8_split_char_buffer_length_;
};
......@@ -131,7 +130,7 @@ class ExternalTwoByteStringUtf16CharacterStream: public Utf16CharacterStream {
}
protected:
virtual unsigned SlowSeekForward(unsigned delta) {
virtual size_t SlowSeekForward(size_t delta) {
// Fast case always handles seeking.
return 0;
}
......
......@@ -67,15 +67,14 @@ class Utf16CharacterStream {
// Return the current position in the code unit stream.
// Starts at zero.
inline unsigned pos() const { return pos_; }
inline size_t pos() const { return pos_; }
// Skips forward past the next code_unit_count UTF-16 code units
// in the input, or until the end of input if that comes sooner.
// Returns the number of code units actually skipped. If less
// than code_unit_count,
inline unsigned SeekForward(unsigned code_unit_count) {
unsigned buffered_chars =
static_cast<unsigned>(buffer_end_ - buffer_cursor_);
inline size_t SeekForward(size_t code_unit_count) {
size_t buffered_chars = buffer_end_ - buffer_cursor_;
if (code_unit_count <= buffered_chars) {
buffer_cursor_ += code_unit_count;
pos_ += code_unit_count;
......@@ -98,11 +97,11 @@ class Utf16CharacterStream {
// is at or after the end of the input, return false. If there
// are more code_units available, return true.
virtual bool ReadBlock() = 0;
virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0;
virtual size_t SlowSeekForward(size_t code_unit_count) = 0;
const uint16_t* buffer_cursor_;
const uint16_t* buffer_end_;
unsigned pos_;
size_t pos_;
};
......@@ -697,7 +696,7 @@ class Scanner {
// Return the current source position.
int source_pos() {
return source_->pos() - kCharacterLookaheadBufferSize;
return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;
}
UnicodeCache* unicode_cache_;
......
......@@ -10,16 +10,16 @@
namespace unibrow {
void Utf8DecoderBase::Reset(uint16_t* buffer, unsigned buffer_length,
const uint8_t* stream, unsigned stream_length) {
void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length,
const uint8_t* stream, size_t stream_length) {
// Assume everything will fit in the buffer and stream won't be needed.
last_byte_of_buffer_unused_ = false;
unbuffered_start_ = NULL;
bool writing_to_buffer = true;
// Loop until stream is read, writing to buffer as long as buffer has space.
unsigned utf16_length = 0;
size_t utf16_length = 0;
while (stream_length != 0) {
unsigned cursor = 0;
size_t cursor = 0;
uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
DCHECK(cursor > 0 && cursor <= stream_length);
stream += cursor;
......@@ -56,9 +56,9 @@ void Utf8DecoderBase::Reset(uint16_t* buffer, unsigned buffer_length,
void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
unsigned data_length) {
size_t data_length) {
while (data_length != 0) {
unsigned cursor = 0;
size_t cursor = 0;
uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor);
// There's a total lack of bounds checking for stream
// as it was already done in Reset.
......
......@@ -14,32 +14,32 @@ class Utf8DecoderBase {
public:
// Initialization done in subclass.
inline Utf8DecoderBase();
inline Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length,
const uint8_t* stream, unsigned stream_length);
inline unsigned Utf16Length() const { return utf16_length_; }
inline Utf8DecoderBase(uint16_t* buffer, size_t buffer_length,
const uint8_t* stream, size_t stream_length);
inline size_t Utf16Length() const { return utf16_length_; }
protected:
// This reads all characters and sets the utf16_length_.
// The first buffer_length utf16 chars are cached in the buffer.
void Reset(uint16_t* buffer, unsigned buffer_length, const uint8_t* stream,
unsigned stream_length);
void Reset(uint16_t* buffer, size_t buffer_length, const uint8_t* stream,
size_t stream_length);
static void WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
unsigned length);
size_t length);
const uint8_t* unbuffered_start_;
unsigned utf16_length_;
size_t utf16_length_;
bool last_byte_of_buffer_unused_;
private:
DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase);
};
template <unsigned kBufferSize>
template <size_t kBufferSize>
class Utf8Decoder : public Utf8DecoderBase {
public:
inline Utf8Decoder() {}
inline Utf8Decoder(const char* stream, unsigned length);
inline void Reset(const char* stream, unsigned length);
inline unsigned WriteUtf16(uint16_t* data, unsigned length) const;
inline Utf8Decoder(const char* stream, size_t length);
inline void Reset(const char* stream, size_t length);
inline size_t WriteUtf16(uint16_t* data, size_t length) const;
private:
uint16_t buffer_[kBufferSize];
......@@ -52,35 +52,34 @@ Utf8DecoderBase::Utf8DecoderBase()
last_byte_of_buffer_unused_(false) {}
Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length,
const uint8_t* stream,
unsigned stream_length) {
Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, size_t buffer_length,
const uint8_t* stream, size_t stream_length) {
Reset(buffer, buffer_length, stream, stream_length);
}
template <unsigned kBufferSize>
Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, unsigned length)
template <size_t kBufferSize>
Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, size_t length)
: Utf8DecoderBase(buffer_, kBufferSize,
reinterpret_cast<const uint8_t*>(stream), length) {}
template <unsigned kBufferSize>
void Utf8Decoder<kBufferSize>::Reset(const char* stream, unsigned length) {
template <size_t kBufferSize>
void Utf8Decoder<kBufferSize>::Reset(const char* stream, size_t length) {
Utf8DecoderBase::Reset(buffer_, kBufferSize,
reinterpret_cast<const uint8_t*>(stream), length);
}
template <unsigned kBufferSize>
unsigned Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,
unsigned length) const {
template <size_t kBufferSize>
size_t Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,
size_t length) const {
DCHECK(length > 0);
if (length > utf16_length_) length = utf16_length_;
// memcpy everything in buffer.
unsigned buffer_length =
size_t buffer_length =
last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize;
unsigned memcpy_length = length <= buffer_length ? length : buffer_length;
size_t memcpy_length = length <= buffer_length ? length : buffer_length;
v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t));
if (length <= buffer_length) return length;
DCHECK(unbuffered_start_ != NULL);
......
......@@ -110,7 +110,7 @@ unsigned Utf8::Encode(char* str,
}
uchar Utf8::ValueOf(const byte* bytes, unsigned length, unsigned* cursor) {
uchar Utf8::ValueOf(const byte* bytes, size_t length, size_t* cursor) {
if (length <= 0) return kBadChar;
byte first = bytes[0];
// Characters between 0000 and 0007F are encoded as a single character
......
......@@ -190,9 +190,7 @@ static int LookupMapping(const int32_t* table,
}
uchar Utf8::CalculateValue(const byte* str,
unsigned length,
unsigned* cursor) {
uchar Utf8::CalculateValue(const byte* str, size_t length, size_t* cursor) {
// We only get called for non-ASCII characters.
if (length == 1) {
*cursor += 1;
......
......@@ -136,9 +136,7 @@ class Utf8 {
uchar c,
int previous,
bool replace_invalid = false);
static uchar CalculateValue(const byte* str,
unsigned length,
unsigned* cursor);
static uchar CalculateValue(const byte* str, size_t length, size_t* cursor);
// The unicode replacement character, used to signal invalid unicode
// sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding.
......@@ -156,9 +154,7 @@ class Utf8 {
// The maximum size a single UTF-16 code unit may take up when encoded as
// UTF-8.
static const unsigned kMax16BitCodeUnitSize = 3;
static inline uchar ValueOf(const byte* str,
unsigned length,
unsigned* cursor);
static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor);
};
struct Uppercase {
......
......@@ -733,9 +733,8 @@ class SequenceCollector : public Collector<T, growth_factor, max_growth> {
// Compare 8bit/16bit chars to 8bit/16bit chars.
template <typename lchar, typename rchar>
inline int CompareCharsUnsigned(const lchar* lhs,
const rchar* rhs,
int chars) {
inline int CompareCharsUnsigned(const lchar* lhs, const rchar* rhs,
size_t chars) {
const lchar* limit = lhs + chars;
if (sizeof(*lhs) == sizeof(char) && sizeof(*rhs) == sizeof(char)) {
// memcmp compares byte-by-byte, yielding wrong results for two-byte
......@@ -751,8 +750,8 @@ inline int CompareCharsUnsigned(const lchar* lhs,
return 0;
}
template<typename lchar, typename rchar>
inline int CompareChars(const lchar* lhs, const rchar* rhs, int chars) {
template <typename lchar, typename rchar>
inline int CompareChars(const lchar* lhs, const rchar* rhs, size_t chars) {
DCHECK(sizeof(lchar) <= 2);
DCHECK(sizeof(rchar) <= 2);
if (sizeof(lchar) == 1) {
......@@ -1317,27 +1316,30 @@ Vector<const char> ReadFile(FILE* file,
template <typename sourcechar, typename sinkchar>
INLINE(static void CopyCharsUnsigned(sinkchar* dest,
const sourcechar* src,
int chars));
INLINE(static void CopyCharsUnsigned(sinkchar* dest, const sourcechar* src,
size_t chars));
#if defined(V8_HOST_ARCH_ARM)
INLINE(void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars));
INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src, int chars));
INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars));
INLINE(void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, size_t chars));
INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src,
size_t chars));
INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src,
size_t chars));
#elif defined(V8_HOST_ARCH_MIPS)
INLINE(void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars));
INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars));
INLINE(void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, size_t chars));
INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src,
size_t chars));
#elif defined(V8_HOST_ARCH_PPC)
INLINE(void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars));
INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars));
INLINE(void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, size_t chars));
INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src,
size_t chars));
#endif
// Copy from 8bit/16bit chars to 8bit/16bit chars.
template <typename sourcechar, typename sinkchar>
INLINE(void CopyChars(sinkchar* dest, const sourcechar* src, int chars));
INLINE(void CopyChars(sinkchar* dest, const sourcechar* src, size_t chars));
template<typename sourcechar, typename sinkchar>
void CopyChars(sinkchar* dest, const sourcechar* src, int chars) {
template <typename sourcechar, typename sinkchar>
void CopyChars(sinkchar* dest, const sourcechar* src, size_t chars) {
DCHECK(sizeof(sourcechar) <= 2);
DCHECK(sizeof(sinkchar) <= 2);
if (sizeof(sinkchar) == 1) {
......@@ -1364,7 +1366,7 @@ void CopyChars(sinkchar* dest, const sourcechar* src, int chars) {
}
template <typename sourcechar, typename sinkchar>
void CopyCharsUnsigned(sinkchar* dest, const sourcechar* src, int chars) {
void CopyCharsUnsigned(sinkchar* dest, const sourcechar* src, size_t chars) {
sinkchar* limit = dest + chars;
if ((sizeof(*dest) == sizeof(*src)) &&
(chars >= static_cast<int>(kMinComplexMemCopy / sizeof(*dest)))) {
......@@ -1376,7 +1378,7 @@ void CopyCharsUnsigned(sinkchar* dest, const sourcechar* src, int chars) {
#if defined(V8_HOST_ARCH_ARM)
void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars) {
void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, size_t chars) {
switch (static_cast<unsigned>(chars)) {
case 0:
break;
......@@ -1432,7 +1434,7 @@ void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars) {
}
void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src, int chars) {
void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src, size_t chars) {
if (chars >= kMinComplexConvertMemCopy) {
MemCopyUint16Uint8(dest, src, chars);
} else {
......@@ -1441,7 +1443,7 @@ void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src, int chars) {
}
void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars) {
void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, size_t chars) {
switch (static_cast<unsigned>(chars)) {
case 0:
break;
......@@ -1474,7 +1476,7 @@ void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars) {
#elif defined(V8_HOST_ARCH_MIPS)
void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars) {
void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, size_t chars) {
if (chars < kMinComplexMemCopy) {
memcpy(dest, src, chars);
} else {
......@@ -1482,7 +1484,7 @@ void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars) {
}
}
void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars) {
void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, size_t chars) {
if (chars < kMinComplexMemCopy) {
memcpy(dest, src, chars * sizeof(*dest));
} else {
......@@ -1494,7 +1496,7 @@ void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars) {
case n: \
memcpy(dest, src, n); \
break
void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars) {
void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, size_t chars) {
switch (static_cast<unsigned>(chars)) {
case 0:
break;
......@@ -1575,7 +1577,7 @@ void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars) {
case n: \
memcpy(dest, src, n * 2); \
break
void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars) {
void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, size_t chars) {
switch (static_cast<unsigned>(chars)) {
case 0:
break;
......
......@@ -696,7 +696,7 @@ TEST(Utf8CharacterStream) {
int i = 0;
while (stream.pos() < kMaxUC16CharU) {
CHECK_EQU(i, stream.pos());
unsigned progress = stream.SeekForward(12);
int progress = static_cast<int>(stream.SeekForward(12));
i += progress;
int32_t c = stream.Advance();
if (i <= kMaxUC16Char) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment