Commit 5b9b44d1 authored by vogelheim's avatar vogelheim Committed by Commit bot

Separate DuplicateFinder from Scanner.

DuplicateFinder isn't actually used by the Scanner, except for one
convenience function which we should probably remove, also.

BUG=

Review-Url: https://codereview.chromium.org/2281443002
Cr-Commit-Position: refs/heads/master@{#38904}
parent b550c077
...@@ -1443,6 +1443,8 @@ v8_source_set("v8_base") { ...@@ -1443,6 +1443,8 @@ v8_source_set("v8_base") {
"src/objects.h", "src/objects.h",
"src/ostreams.cc", "src/ostreams.cc",
"src/ostreams.h", "src/ostreams.h",
"src/parsing/duplicate-finder.cc",
"src/parsing/duplicate-finder.h",
"src/parsing/expression-classifier.h", "src/parsing/expression-classifier.h",
"src/parsing/func-name-inferrer.cc", "src/parsing/func-name-inferrer.cc",
"src/parsing/func-name-inferrer.h", "src/parsing/func-name-inferrer.h",
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#define V8_COLLECTOR_H_ #define V8_COLLECTOR_H_
#include "src/checks.h" #include "src/checks.h"
#include "src/list.h" #include "src/list-inl.h"
#include "src/vector.h" #include "src/vector.h"
namespace v8 { namespace v8 {
......
// Copyright 2011 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/parsing/duplicate-finder.h"
#include "src/conversions.h"
#include "src/unicode-cache.h"
namespace v8 {
namespace internal {
int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) {
return AddSymbol(key, true, value);
}
int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) {
return AddSymbol(Vector<const uint8_t>::cast(key), false, value);
}
int DuplicateFinder::AddSymbol(Vector<const uint8_t> key, bool is_one_byte,
int value) {
uint32_t hash = Hash(key, is_one_byte);
byte* encoding = BackupKey(key, is_one_byte);
base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash);
int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
entry->value =
reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value));
return old_value;
}
int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) {
DCHECK(key.length() > 0);
// Quick check for already being in canonical form.
if (IsNumberCanonical(key)) {
return AddOneByteSymbol(key, value);
}
int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY;
double double_value = StringToDouble(unicode_constants_, key, flags, 0.0);
int length;
const char* string;
if (!std::isfinite(double_value)) {
string = "Infinity";
length = 8; // strlen("Infinity");
} else {
string = DoubleToCString(double_value,
Vector<char>(number_buffer_, kBufferSize));
length = StrLength(string);
}
return AddSymbol(
Vector<const byte>(reinterpret_cast<const byte*>(string), length), true,
value);
}
bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) {
// Test for a safe approximation of number literals that are already
// in canonical form: max 15 digits, no leading zeroes, except an
// integer part that is a single zero, and no trailing zeros below
// the decimal point.
int pos = 0;
int length = number.length();
if (number.length() > 15) return false;
if (number[pos] == '0') {
pos++;
} else {
while (pos < length &&
static_cast<unsigned>(number[pos] - '0') <= ('9' - '0'))
pos++;
}
if (length == pos) return true;
if (number[pos] != '.') return false;
pos++;
bool invalid_last_digit = true;
while (pos < length) {
uint8_t digit = number[pos] - '0';
if (digit > '9' - '0') return false;
invalid_last_digit = (digit == 0);
pos++;
}
return !invalid_last_digit;
}
uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {
// Primitive hash function, almost identical to the one used
// for strings (except that it's seeded by the length and representation).
int length = key.length();
uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0);
for (int i = 0; i < length; i++) {
uint32_t c = key[i];
hash = (hash + c) * 1025;
hash ^= (hash >> 6);
}
return hash;
}
bool DuplicateFinder::Match(void* first, void* second) {
// Decode lengths.
// Length + representation is encoded as base 128, most significant heptet
// first, with a 8th bit being non-zero while there are more heptets.
// The value encodes the number of bytes following, and whether the original
// was Latin1.
byte* s1 = reinterpret_cast<byte*>(first);
byte* s2 = reinterpret_cast<byte*>(second);
uint32_t length_one_byte_field = 0;
byte c1;
do {
c1 = *s1;
if (c1 != *s2) return false;
length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f);
s1++;
s2++;
} while ((c1 & 0x80) != 0);
int length = static_cast<int>(length_one_byte_field >> 1);
return memcmp(s1, s2, length) == 0;
}
byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,
bool is_one_byte) {
uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0);
backing_store_.StartSequence();
// Emit one_byte_length as base-128 encoded number, with the 7th bit set
// on the byte of every heptet except the last, least significant, one.
if (one_byte_length >= (1 << 7)) {
if (one_byte_length >= (1 << 14)) {
if (one_byte_length >= (1 << 21)) {
if (one_byte_length >= (1 << 28)) {
backing_store_.Add(
static_cast<uint8_t>((one_byte_length >> 28) | 0x80));
}
backing_store_.Add(
static_cast<uint8_t>((one_byte_length >> 21) | 0x80u));
}
backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 14) | 0x80u));
}
backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
}
backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
backing_store_.AddBlock(bytes);
return backing_store_.EndSequence().start();
}
} // namespace internal
} // namespace v8
// Copyright 2011 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_PARSING_DUPLICATE_FINDER_H_
#define V8_PARSING_DUPLICATE_FINDER_H_
#include "src/base/hashmap.h"
#include "src/collector.h"
namespace v8 {
namespace internal {
class UnicodeCache;
// DuplicateFinder discovers duplicate symbols.
class DuplicateFinder {
public:
explicit DuplicateFinder(UnicodeCache* constants)
: unicode_constants_(constants), backing_store_(16), map_(&Match) {}
int AddOneByteSymbol(Vector<const uint8_t> key, int value);
int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
// Add a a number literal by converting it (if necessary)
// to the string that ToString(ToNumber(literal)) would generate.
// and then adding that string with AddOneByteSymbol.
// This string is the actual value used as key in an object literal,
// and the one that must be different from the other keys.
int AddNumber(Vector<const uint8_t> key, int value);
private:
int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
// Backs up the key and its length in the backing store.
// The backup is stored with a base 127 encoding of the
// length (plus a bit saying whether the string is one byte),
// followed by the bytes of the key.
uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
// Compare two encoded keys (both pointing into the backing store)
// for having the same base-127 encoded lengths and representation.
// and then having the same 'length' bytes following.
static bool Match(void* first, void* second);
// Creates a hash from a sequence of bytes.
static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
// Checks whether a string containing a JS number is its canonical
// form.
static bool IsNumberCanonical(Vector<const uint8_t> key);
// Size of buffer. Sufficient for using it to call DoubleToCString in
// from conversions.h.
static const int kBufferSize = 100;
UnicodeCache* unicode_constants_;
// Backing store used to store strings used as hashmap keys.
SequenceCollector<unsigned char> backing_store_;
base::HashMap map_;
// Buffer used for string->number->canonical string conversions.
char number_buffer_[kBufferSize];
};
} // namespace internal
} // namespace v8
#endif // V8_PARSING_DUPLICATE_FINDER_H_
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
namespace v8 { namespace v8 {
namespace internal { namespace internal {
class DuplicateFinder;
#define ERROR_CODES(T) \ #define ERROR_CODES(T) \
T(ExpressionProduction, 0) \ T(ExpressionProduction, 0) \
T(FormalParameterInitializerProduction, 1) \ T(FormalParameterInitializerProduction, 1) \
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "src/base/platform/platform.h" #include "src/base/platform/platform.h"
#include "src/char-predicates-inl.h" #include "src/char-predicates-inl.h"
#include "src/messages.h" #include "src/messages.h"
#include "src/parsing/duplicate-finder.h"
#include "src/parsing/parameter-initializer-rewriter.h" #include "src/parsing/parameter-initializer-rewriter.h"
#include "src/parsing/parse-info.h" #include "src/parsing/parse-info.h"
#include "src/parsing/rewriter.h" #include "src/parsing/rewriter.h"
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "src/conversions.h" #include "src/conversions.h"
#include "src/globals.h" #include "src/globals.h"
#include "src/list.h" #include "src/list.h"
#include "src/parsing/duplicate-finder.h"
#include "src/parsing/parser-base.h" #include "src/parsing/parser-base.h"
#include "src/parsing/preparse-data-format.h" #include "src/parsing/preparse-data-format.h"
#include "src/parsing/preparse-data.h" #include "src/parsing/preparse-data.h"
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "src/char-predicates-inl.h" #include "src/char-predicates-inl.h"
#include "src/conversions-inl.h" #include "src/conversions-inl.h"
#include "src/list-inl.h" #include "src/list-inl.h"
#include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol
namespace v8 { namespace v8 {
namespace internal { namespace internal {
...@@ -1573,6 +1574,8 @@ bool Scanner::ContainsDot() { ...@@ -1573,6 +1574,8 @@ bool Scanner::ContainsDot() {
int Scanner::FindSymbol(DuplicateFinder* finder, int value) { int Scanner::FindSymbol(DuplicateFinder* finder, int value) {
// TODO(vogelheim): Move this logic into the calling class; this can be fully
// implemented using the public interface.
if (is_literal_one_byte()) { if (is_literal_one_byte()) {
return finder->AddOneByteSymbol(literal_one_byte_string(), value); return finder->AddOneByteSymbol(literal_one_byte_string(), value);
} }
...@@ -1632,144 +1635,6 @@ void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) { ...@@ -1632,144 +1635,6 @@ void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) {
} }
int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) {
return AddSymbol(key, true, value);
}
int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) {
return AddSymbol(Vector<const uint8_t>::cast(key), false, value);
}
int DuplicateFinder::AddSymbol(Vector<const uint8_t> key,
bool is_one_byte,
int value) {
uint32_t hash = Hash(key, is_one_byte);
byte* encoding = BackupKey(key, is_one_byte);
base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash);
int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
entry->value =
reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value));
return old_value;
}
int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) {
DCHECK(key.length() > 0);
// Quick check for already being in canonical form.
if (IsNumberCanonical(key)) {
return AddOneByteSymbol(key, value);
}
int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY;
double double_value = StringToDouble(
unicode_constants_, key, flags, 0.0);
int length;
const char* string;
if (!std::isfinite(double_value)) {
string = "Infinity";
length = 8; // strlen("Infinity");
} else {
string = DoubleToCString(double_value,
Vector<char>(number_buffer_, kBufferSize));
length = StrLength(string);
}
return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string),
length), true, value);
}
bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) {
// Test for a safe approximation of number literals that are already
// in canonical form: max 15 digits, no leading zeroes, except an
// integer part that is a single zero, and no trailing zeros below
// the decimal point.
int pos = 0;
int length = number.length();
if (number.length() > 15) return false;
if (number[pos] == '0') {
pos++;
} else {
while (pos < length &&
static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++;
}
if (length == pos) return true;
if (number[pos] != '.') return false;
pos++;
bool invalid_last_digit = true;
while (pos < length) {
uint8_t digit = number[pos] - '0';
if (digit > '9' - '0') return false;
invalid_last_digit = (digit == 0);
pos++;
}
return !invalid_last_digit;
}
uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {
// Primitive hash function, almost identical to the one used
// for strings (except that it's seeded by the length and representation).
int length = key.length();
uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0);
for (int i = 0; i < length; i++) {
uint32_t c = key[i];
hash = (hash + c) * 1025;
hash ^= (hash >> 6);
}
return hash;
}
bool DuplicateFinder::Match(void* first, void* second) {
// Decode lengths.
// Length + representation is encoded as base 128, most significant heptet
// first, with a 8th bit being non-zero while there are more heptets.
// The value encodes the number of bytes following, and whether the original
// was Latin1.
byte* s1 = reinterpret_cast<byte*>(first);
byte* s2 = reinterpret_cast<byte*>(second);
uint32_t length_one_byte_field = 0;
byte c1;
do {
c1 = *s1;
if (c1 != *s2) return false;
length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f);
s1++;
s2++;
} while ((c1 & 0x80) != 0);
int length = static_cast<int>(length_one_byte_field >> 1);
return memcmp(s1, s2, length) == 0;
}
byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,
bool is_one_byte) {
uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0);
backing_store_.StartSequence();
// Emit one_byte_length as base-128 encoded number, with the 7th bit set
// on the byte of every heptet except the last, least significant, one.
if (one_byte_length >= (1 << 7)) {
if (one_byte_length >= (1 << 14)) {
if (one_byte_length >= (1 << 21)) {
if (one_byte_length >= (1 << 28)) {
backing_store_.Add(
static_cast<uint8_t>((one_byte_length >> 28) | 0x80));
}
backing_store_.Add(
static_cast<uint8_t>((one_byte_length >> 21) | 0x80u));
}
backing_store_.Add(
static_cast<uint8_t>((one_byte_length >> 14) | 0x80u));
}
backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
}
backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
backing_store_.AddBlock(bytes);
return backing_store_.EndSequence().start();
}
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8
...@@ -8,12 +8,9 @@ ...@@ -8,12 +8,9 @@
#define V8_PARSING_SCANNER_H_ #define V8_PARSING_SCANNER_H_
#include "src/allocation.h" #include "src/allocation.h"
#include "src/base/hashmap.h"
#include "src/base/logging.h" #include "src/base/logging.h"
#include "src/char-predicates.h" #include "src/char-predicates.h"
#include "src/collector.h"
#include "src/globals.h" #include "src/globals.h"
#include "src/list.h"
#include "src/messages.h" #include "src/messages.h"
#include "src/parsing/token.h" #include "src/parsing/token.h"
#include "src/unicode-decoder.h" #include "src/unicode-decoder.h"
...@@ -25,6 +22,7 @@ namespace internal { ...@@ -25,6 +22,7 @@ namespace internal {
class AstRawString; class AstRawString;
class AstValueFactory; class AstValueFactory;
class DuplicateFinder;
class ParserRecorder; class ParserRecorder;
class UnicodeCache; class UnicodeCache;
...@@ -99,56 +97,6 @@ class Utf16CharacterStream { ...@@ -99,56 +97,6 @@ class Utf16CharacterStream {
}; };
// ---------------------------------------------------------------------
// DuplicateFinder discovers duplicate symbols.
class DuplicateFinder {
public:
explicit DuplicateFinder(UnicodeCache* constants)
: unicode_constants_(constants),
backing_store_(16),
map_(&Match) { }
int AddOneByteSymbol(Vector<const uint8_t> key, int value);
int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
// Add a a number literal by converting it (if necessary)
// to the string that ToString(ToNumber(literal)) would generate.
// and then adding that string with AddOneByteSymbol.
// This string is the actual value used as key in an object literal,
// and the one that must be different from the other keys.
int AddNumber(Vector<const uint8_t> key, int value);
private:
int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
// Backs up the key and its length in the backing store.
// The backup is stored with a base 127 encoding of the
// length (plus a bit saying whether the string is one byte),
// followed by the bytes of the key.
uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
// Compare two encoded keys (both pointing into the backing store)
// for having the same base-127 encoded lengths and representation.
// and then having the same 'length' bytes following.
static bool Match(void* first, void* second);
// Creates a hash from a sequence of bytes.
static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
// Checks whether a string containing a JS number is its canonical
// form.
static bool IsNumberCanonical(Vector<const uint8_t> key);
// Size of buffer. Sufficient for using it to call DoubleToCString in
// from conversions.h.
static const int kBufferSize = 100;
UnicodeCache* unicode_constants_;
// Backing store used to store strings used as hashmap keys.
SequenceCollector<unsigned char> backing_store_;
base::HashMap map_;
// Buffer used for string->number->canonical string conversions.
char number_buffer_[kBufferSize];
};
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// JavaScript Scanner. // JavaScript Scanner.
......
...@@ -1039,6 +1039,8 @@ ...@@ -1039,6 +1039,8 @@
'objects.h', 'objects.h',
'ostreams.cc', 'ostreams.cc',
'ostreams.h', 'ostreams.h',
'parsing/duplicate-finder.cc',
'parsing/duplicate-finder.h',
'parsing/expression-classifier.h', 'parsing/expression-classifier.h',
'parsing/func-name-inferrer.cc', 'parsing/func-name-inferrer.cc',
'parsing/func-name-inferrer.h', 'parsing/func-name-inferrer.h',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment