Commit 9d524bd3 authored by jbroman's avatar jbroman Committed by Commit bot

Fix out-of-range access in unibrow::Utf8::CalculateValue.

This code should not access bytes out of the permitted range in order to check
the range of a possible UTF-8 value. Instead, the length check should occur
before such checks.

BUG=chromium:667260, chromium:662822

Review-Url: https://codereview.chromium.org/2520053003
Cr-Commit-Position: refs/heads/master@{#41165}
parent 8c4988f7
...@@ -7,10 +7,11 @@ ...@@ -7,10 +7,11 @@
#include <sys/types.h> #include <sys/types.h>
#include "src/globals.h" #include "src/globals.h"
#include "src/utils.h"
namespace unibrow { namespace unibrow {
class Utf8DecoderBase { class V8_EXPORT_PRIVATE Utf8DecoderBase {
public: public:
// Initialization done in subclass. // Initialization done in subclass.
inline Utf8DecoderBase(); inline Utf8DecoderBase();
......
...@@ -235,35 +235,31 @@ uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) { ...@@ -235,35 +235,31 @@ uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) {
while (count < max_count && IsContinuationCharacter(str[count])) { while (count < max_count && IsContinuationCharacter(str[count])) {
count++; count++;
} }
*cursor += count;
// Check overly long sequences & other conditions. Use length as error // There must be enough continuation characters.
// indicator. if (count != length) return kBadChar;
// Check overly long sequences & other conditions.
if (length == 3) { if (length == 3) {
if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) { if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) {
// Overlong three-byte sequence? // Overlong three-byte sequence?
length = 0; return kBadChar;
} else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) { } else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) {
// High and low surrogate halves? // High and low surrogate halves?
length = 0; return kBadChar;
} }
} else if (length == 4) { } else if (length == 4) {
if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) { if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) {
// Overlong four-byte sequence. // Overlong four-byte sequence.
length = 0; return kBadChar;
} else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) { } else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) {
// Code points outside of the unicode range. // Code points outside of the unicode range.
length = 0; return kBadChar;
} }
} }
if (count != length) {
// All invalid encodings should land here.
*cursor += count;
return kBadChar;
}
// All errors have been handled, so we only have to assemble the result. // All errors have been handled, so we only have to assemble the result.
*cursor += length;
switch (length) { switch (length) {
case 1: case 1:
return str[0]; return str[0];
......
...@@ -120,6 +120,7 @@ v8_executable("unittests") { ...@@ -120,6 +120,7 @@ v8_executable("unittests") {
"source-position-table-unittest.cc", "source-position-table-unittest.cc",
"test-utils.cc", "test-utils.cc",
"test-utils.h", "test-utils.h",
"unicode-unittest.cc",
"value-serializer-unittest.cc", "value-serializer-unittest.cc",
"wasm/asm-types-unittest.cc", "wasm/asm-types-unittest.cc",
"wasm/ast-decoder-unittest.cc", "wasm/ast-decoder-unittest.cc",
......
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <memory>
#include <string>
#include "src/unicode-decoder.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace v8 {
namespace internal {
namespace {
using Utf8Decoder = unibrow::Utf8Decoder<512>;
void Decode(Utf8Decoder* decoder, const std::string& str) {
// Put the string in its own buffer on the heap to make sure that
// AddressSanitizer's heap-buffer-overflow logic can see what's going on.
std::unique_ptr<char[]> buffer(new char[str.length()]);
memcpy(buffer.get(), str.data(), str.length());
decoder->Reset(buffer.get(), str.length());
}
} // namespace
TEST(UnicodeTest, ReadOffEndOfUtf8String) {
Utf8Decoder decoder;
// Not enough continuation bytes before string ends.
Decode(&decoder, "\xE0");
Decode(&decoder, "\xED");
Decode(&decoder, "\xF0");
Decode(&decoder, "\xF4");
}
} // namespace internal
} // namespace v8
...@@ -118,6 +118,7 @@ ...@@ -118,6 +118,7 @@
'source-position-table-unittest.cc', 'source-position-table-unittest.cc',
'test-utils.h', 'test-utils.h',
'test-utils.cc', 'test-utils.cc',
'unicode-unittest.cc',
'value-serializer-unittest.cc', 'value-serializer-unittest.cc',
'zone/segmentpool-unittest.cc', 'zone/segmentpool-unittest.cc',
'zone/zone-chunk-list-unittest.cc', 'zone/zone-chunk-list-unittest.cc',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment