Commit dee16f86 authored by Daniel Vogelheim's avatar Daniel Vogelheim Committed by Commit Bot

[parser] Simplify DuplicateFinder.

The current incarncation of DuplicateFinder does work that
AstValueFactory already does. All that remains is that
DuplicateFinder wraps a container.

Adding const-ness changes were necessary to have IsDuplicateSymbol
be const.

BUG=v8:6092

Change-Id: I8081cfeef363717405d5b6325e290fe7725390dc
Reviewed-on: https://chromium-review.googlesource.com/456317
Commit-Queue: Marja Hölttä <marja@chromium.org>
Commit-Queue: Daniel Vogelheim <vogelheim@chromium.org>
Reviewed-by: 's avatarMarja Hölttä <marja@chromium.org>
Cr-Commit-Position: refs/heads/master@{#43872}
parent cd640d22
......@@ -1637,7 +1637,6 @@ v8_source_set("v8_base") {
"src/objects/scope-info.h",
"src/ostreams.cc",
"src/ostreams.h",
"src/parsing/duplicate-finder.cc",
"src/parsing/duplicate-finder.h",
"src/parsing/expression-classifier.h",
"src/parsing/func-name-inferrer.cc",
......
// Copyright 2011 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/parsing/duplicate-finder.h"
namespace v8 {
namespace internal {
bool DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key) {
return AddSymbol(key, true);
}
bool DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key) {
return AddSymbol(Vector<const uint8_t>::cast(key), false);
}
bool DuplicateFinder::AddSymbol(Vector<const uint8_t> key, bool is_one_byte) {
uint32_t hash = Hash(key, is_one_byte);
byte* encoding = BackupKey(key, is_one_byte);
base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash);
int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
entry->value = reinterpret_cast<void*>(1);
return old_value;
}
uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {
// Primitive hash function, almost identical to the one used
// for strings (except that it's seeded by the length and representation).
int length = key.length();
uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0);
for (int i = 0; i < length; i++) {
uint32_t c = key[i];
hash = (hash + c) * 1025;
hash ^= (hash >> 6);
}
return hash;
}
bool DuplicateFinder::Match(void* first, void* second) {
// Decode lengths.
// Length + representation is encoded as base 128, most significant heptet
// first, with a 8th bit being non-zero while there are more heptets.
// The value encodes the number of bytes following, and whether the original
// was Latin1.
byte* s1 = reinterpret_cast<byte*>(first);
byte* s2 = reinterpret_cast<byte*>(second);
uint32_t length_one_byte_field = 0;
byte c1;
do {
c1 = *s1;
if (c1 != *s2) return false;
length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f);
s1++;
s2++;
} while ((c1 & 0x80) != 0);
int length = static_cast<int>(length_one_byte_field >> 1);
return memcmp(s1, s2, length) == 0;
}
byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,
bool is_one_byte) {
uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0);
backing_store_.StartSequence();
// Emit one_byte_length as base-128 encoded number, with the 7th bit set
// on the byte of every heptet except the last, least significant, one.
if (one_byte_length >= (1 << 7)) {
if (one_byte_length >= (1 << 14)) {
if (one_byte_length >= (1 << 21)) {
if (one_byte_length >= (1 << 28)) {
backing_store_.Add(
static_cast<uint8_t>((one_byte_length >> 28) | 0x80));
}
backing_store_.Add(
static_cast<uint8_t>((one_byte_length >> 21) | 0x80u));
}
backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 14) | 0x80u));
}
backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
}
backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
backing_store_.AddBlock(bytes);
return backing_store_.EndSequence().start();
}
} // namespace internal
} // namespace v8
......@@ -5,39 +5,29 @@
#ifndef V8_PARSING_DUPLICATE_FINDER_H_
#define V8_PARSING_DUPLICATE_FINDER_H_
#include "src/base/hashmap.h"
#include "src/collector.h"
#include <set>
namespace v8 {
namespace internal {
// DuplicateFinder discovers duplicate symbols.
class Scanner;
// DuplicateFinder : Helper class to discover duplicate symbols.
//
// Allocate a DuplicateFinder for each set of symbols you want to check
// for duplicates and then pass this instance into
// Scanner::IsDuplicateSymbol(..).
//
// This class only holds the data; all actual logic is in
// Scanner::IsDuplicateSymbol.
class DuplicateFinder {
public:
DuplicateFinder() : backing_store_(16), map_(&Match) {}
bool AddOneByteSymbol(Vector<const uint8_t> key);
bool AddTwoByteSymbol(Vector<const uint16_t> key);
DuplicateFinder() {}
private:
bool AddSymbol(Vector<const uint8_t> key, bool is_one_byte);
// Backs up the key and its length in the backing store.
// The backup is stored with a base 127 encoding of the
// length (plus a bit saying whether the string is one byte),
// followed by the bytes of the key.
uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
// Compare two encoded keys (both pointing into the backing store)
// for having the same base-127 encoded lengths and representation.
// and then having the same 'length' bytes following.
static bool Match(void* first, void* second);
// Creates a hash from a sequence of bytes.
static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
// Backing store used to store strings used as hashmap keys.
SequenceCollector<unsigned char> backing_store_;
base::CustomMatcherHashMap map_;
friend class Scanner;
std::set<const void*> known_symbols_;
};
} // namespace internal
......
......@@ -1573,7 +1573,8 @@ ParserBase<Impl>::ParseAndClassifyIdentifier(bool* ok) {
}
if (classifier()->duplicate_finder() != nullptr &&
scanner()->FindSymbol(classifier()->duplicate_finder())) {
scanner()->IsDuplicateSymbol(classifier()->duplicate_finder(),
ast_value_factory())) {
classifier()->RecordDuplicateFormalParameterError(scanner()->location());
}
return name;
......@@ -2380,7 +2381,8 @@ ParserBase<Impl>::ParseObjectPropertyDefinition(ObjectLiteralChecker* checker,
DCHECK(!*is_computed_name);
if (classifier()->duplicate_finder() != nullptr &&
scanner()->FindSymbol(classifier()->duplicate_finder())) {
scanner()->IsDuplicateSymbol(classifier()->duplicate_finder(),
ast_value_factory())) {
classifier()->RecordDuplicateFormalParameterError(
scanner()->location());
}
......
......@@ -1678,25 +1678,24 @@ Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
return Just(RegExp::Flags(flags));
}
const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {
const AstRawString* Scanner::CurrentSymbol(
AstValueFactory* ast_value_factory) const {
if (is_literal_one_byte()) {
return ast_value_factory->GetOneByteString(literal_one_byte_string());
}
return ast_value_factory->GetTwoByteString(literal_two_byte_string());
}
const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {
const AstRawString* Scanner::NextSymbol(
AstValueFactory* ast_value_factory) const {
if (is_next_literal_one_byte()) {
return ast_value_factory->GetOneByteString(next_literal_one_byte_string());
}
return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());
}
const AstRawString* Scanner::CurrentRawSymbol(
AstValueFactory* ast_value_factory) {
AstValueFactory* ast_value_factory) const {
if (is_raw_literal_one_byte()) {
return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());
}
......@@ -1719,13 +1718,12 @@ bool Scanner::ContainsDot() {
return std::find(str.begin(), str.end(), '.') != str.end();
}
bool Scanner::FindSymbol(DuplicateFinder* finder) {
// TODO(vogelheim): Move this logic into the calling class; this can be fully
// implemented using the public interface.
if (is_literal_one_byte()) {
return finder->AddOneByteSymbol(literal_one_byte_string());
}
return finder->AddTwoByteSymbol(literal_two_byte_string());
bool Scanner::IsDuplicateSymbol(DuplicateFinder* duplicate_finder,
AstValueFactory* ast_value_factory) const {
DCHECK_NOT_NULL(duplicate_finder);
DCHECK_NOT_NULL(ast_value_factory);
const AstRawString* string = CurrentSymbol(ast_value_factory);
return !duplicate_finder->known_symbols_.insert(string).second;
}
void Scanner::SeekNext(size_t position) {
......
......@@ -247,9 +247,10 @@ class Scanner {
return next_.literal_chars->is_contextual_keyword(keyword);
}
const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);
const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);
const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory);
const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory) const;
const AstRawString* NextSymbol(AstValueFactory* ast_value_factory) const;
const AstRawString* CurrentRawSymbol(
AstValueFactory* ast_value_factory) const;
double DoubleValue();
bool ContainsDot();
......@@ -281,7 +282,11 @@ class Scanner {
return false;
}
bool FindSymbol(DuplicateFinder* finder);
// Check whether the CurrentSymbol() has already been seen.
// The DuplicateFinder holds the data, so different instances can be used
// for different sets of duplicates to check for.
bool IsDuplicateSymbol(DuplicateFinder* duplicate_finder,
AstValueFactory* ast_value_factory) const;
UnicodeCache* unicode_cache() { return unicode_cache_; }
......@@ -628,7 +633,7 @@ class Scanner {
// requested for tokens that do not have a literal. Hence, we treat any
// token as a one-byte literal. E.g. Token::FUNCTION pretends to have a
// literal "function".
Vector<const uint8_t> literal_one_byte_string() {
Vector<const uint8_t> literal_one_byte_string() const {
if (current_.literal_chars)
return current_.literal_chars->one_byte_literal();
const char* str = Token::String(current_.token);
......@@ -636,11 +641,11 @@ class Scanner {
return Vector<const uint8_t>(str_as_uint8,
Token::StringLength(current_.token));
}
Vector<const uint16_t> literal_two_byte_string() {
Vector<const uint16_t> literal_two_byte_string() const {
DCHECK_NOT_NULL(current_.literal_chars);
return current_.literal_chars->two_byte_literal();
}
bool is_literal_one_byte() {
bool is_literal_one_byte() const {
return !current_.literal_chars || current_.literal_chars->is_one_byte();
}
int literal_length() const {
......@@ -649,27 +654,27 @@ class Scanner {
}
// Returns the literal string for the next token (the token that
// would be returned if Next() were called).
Vector<const uint8_t> next_literal_one_byte_string() {
Vector<const uint8_t> next_literal_one_byte_string() const {
DCHECK_NOT_NULL(next_.literal_chars);
return next_.literal_chars->one_byte_literal();
}
Vector<const uint16_t> next_literal_two_byte_string() {
Vector<const uint16_t> next_literal_two_byte_string() const {
DCHECK_NOT_NULL(next_.literal_chars);
return next_.literal_chars->two_byte_literal();
}
bool is_next_literal_one_byte() {
bool is_next_literal_one_byte() const {
DCHECK_NOT_NULL(next_.literal_chars);
return next_.literal_chars->is_one_byte();
}
Vector<const uint8_t> raw_literal_one_byte_string() {
Vector<const uint8_t> raw_literal_one_byte_string() const {
DCHECK_NOT_NULL(current_.raw_literal_chars);
return current_.raw_literal_chars->one_byte_literal();
}
Vector<const uint16_t> raw_literal_two_byte_string() {
Vector<const uint16_t> raw_literal_two_byte_string() const {
DCHECK_NOT_NULL(current_.raw_literal_chars);
return current_.raw_literal_chars->two_byte_literal();
}
bool is_raw_literal_one_byte() {
bool is_raw_literal_one_byte() const {
DCHECK_NOT_NULL(current_.raw_literal_chars);
return current_.raw_literal_chars->is_one_byte();
}
......
......@@ -1112,7 +1112,6 @@
'objects/scope-info.h',
'ostreams.cc',
'ostreams.h',
'parsing/duplicate-finder.cc',
'parsing/duplicate-finder.h',
'parsing/expression-classifier.h',
'parsing/func-name-inferrer.cc',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment