Commit 9b8a56d4 authored by erikcorry's avatar erikcorry Committed by Commit bot

Revert of Reland of 'Optimize trivial regexp disjunctions' CL 1176453002...

Revert of Reland of 'Optimize trivial regexp disjunctions' CL 1176453002 (patchset #2 id:20001 of https://codereview.chromium.org/1174713002/)

Reason for revert:
Tree looks like a Christmas tree and this isn't helping

Original issue's description:
> Reland of 'Optimize trivial regexp disjunctions' CL 1176453002
>
> Original code review: https://codereview.chromium.org/1176453002/
>
> TBR=yangguo@chromium.org
> BUG=chromium:482998
> LOG=n
>
> Committed: https://crrev.com/85fab0fa092e8d979413f6a61baec3abe26e568d
> Cr-Commit-Position: refs/heads/master@{#28884}

TBR=yangguo@chromium.org,erikcorry@chromium.org
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=chromium:482998

Review URL: https://codereview.chromium.org/1172893002

Cr-Commit-Position: refs/heads/master@{#28885}
parent 85fab0fa
......@@ -2872,9 +2872,6 @@ class RegExpDisjunction final : public RegExpTree {
int max_match() override { return max_match_; }
ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
private:
bool SortConsecutiveAtoms(RegExpCompiler* compiler);
void RationalizeConsecutiveAtoms(RegExpCompiler* compiler);
void FixSingleCharacterDisjunctions(RegExpCompiler* compiler);
ZoneList<RegExpTree*>* alternatives_;
int min_match_;
int max_match_;
......
......@@ -4817,200 +4817,10 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
}
int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
RegExpAtom* atom1 = (*a)->AsAtom();
RegExpAtom* atom2 = (*b)->AsAtom();
uc16 character1 = atom1->data().at(0);
uc16 character2 = atom2->data().at(0);
if (character1 < character2) return -1;
if (character1 > character2) return 1;
return 0;
}
// We can stable sort runs of atoms, since the order does not matter if they
// start with different characters.
// Returns true if any consecutive atoms were found.
bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
ZoneList<RegExpTree*>* alternatives = this->alternatives();
int length = alternatives->length();
bool found_consecutive_atoms = false;
for (int i = 0; i < length; i++) {
while (i < length) {
RegExpTree* alternative = alternatives->at(i);
if (alternative->IsAtom()) break;
i++;
}
// i is length or it is the index of an atom.
if (i == length) break;
int first_atom = i;
i++;
while (i < length) {
RegExpTree* alternative = alternatives->at(i);
if (!alternative->IsAtom()) break;
i++;
}
// Sort atoms to get ones with common prefixes together.
// This step is not valid if we are in a case-independent regexp,
// because it would change /is|I/ to /I|is/, and order matters when
// the regexp parts don't match only disjoint starting points. To fix
// this would need a version of CompareFirstChar that uses case-
// independent character classes for comparison.
if (!compiler->ignore_case()) {
DCHECK_LT(first_atom, alternatives->length());
DCHECK_LE(i, alternatives->length());
DCHECK_LE(first_atom, i);
alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom);
}
if (i - first_atom > 1) found_consecutive_atoms = true;
}
return found_consecutive_atoms;
}
// Optimizes ab|ac|az to a(?:b|c|d).
void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
Zone* zone = compiler->zone();
ZoneList<RegExpTree*>* alternatives = this->alternatives();
int length = alternatives->length();
int write_posn = 0;
int i = 0;
while (i < length) {
RegExpTree* alternative = alternatives->at(i);
if (!alternative->IsAtom()) {
alternatives->at(write_posn++) = alternatives->at(i);
i++;
continue;
}
RegExpAtom* atom = alternative->AsAtom();
uc16 common_prefix = atom->data().at(0);
int first_with_prefix = i;
int prefix_length = atom->length();
i++;
while (i < length) {
alternative = alternatives->at(i);
if (!alternative->IsAtom()) break;
atom = alternative->AsAtom();
if (atom->data().at(0) != common_prefix) break;
prefix_length = Min(prefix_length, atom->length());
i++;
}
if (i > first_with_prefix + 2) {
// Found worthwhile run of alternatives with common prefix of at least one
// character. The sorting function above did not sort on more than one
// character for reasons of correctness, but there may still be a longer
// common prefix if the terms were similar or presorted in the input.
// Find out how long the common prefix is.
int run_length = i - first_with_prefix;
atom = alternatives->at(first_with_prefix)->AsAtom();
for (int j = 1; j < run_length && prefix_length > 1; j++) {
RegExpAtom* old_atom =
alternatives->at(j + first_with_prefix)->AsAtom();
for (int k = 1; k < prefix_length; k++) {
if (atom->data().at(k) != old_atom->data().at(k)) prefix_length = k;
}
}
RegExpAtom* prefix =
new (zone) RegExpAtom(atom->data().SubVector(0, prefix_length));
ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone);
pair->Add(prefix, zone);
ZoneList<RegExpTree*>* suffixes =
new (zone) ZoneList<RegExpTree*>(run_length, zone);
for (int j = 0; j < run_length; j++) {
RegExpAtom* old_atom =
alternatives->at(j + first_with_prefix)->AsAtom();
int len = old_atom->length();
if (len == prefix_length) {
suffixes->Add(new (zone) RegExpEmpty(), zone);
} else {
RegExpTree* suffix = new (zone) RegExpAtom(
old_atom->data().SubVector(prefix_length, old_atom->length()));
suffixes->Add(suffix, zone);
}
}
pair->Add(new (zone) RegExpDisjunction(suffixes), zone);
alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair);
} else {
// Just copy any non-worthwhile alternatives.
for (int j = first_with_prefix; j < i; j++) {
alternatives->at(write_posn++) = alternatives->at(j);
}
}
}
alternatives->Rewind(write_posn); // Trim end of array.
}
// Optimizes b|c|z to [bcz].
void RegExpDisjunction::FixSingleCharacterDisjunctions(
RegExpCompiler* compiler) {
Zone* zone = compiler->zone();
ZoneList<RegExpTree*>* alternatives = this->alternatives();
int length = alternatives->length();
int write_posn = 0;
int i = 0;
while (i < length) {
RegExpTree* alternative = alternatives->at(i);
if (!alternative->IsAtom()) {
alternatives->at(write_posn++) = alternatives->at(i);
i++;
continue;
}
RegExpAtom* atom = alternative->AsAtom();
if (atom->length() != 1) {
alternatives->at(write_posn++) = alternatives->at(i);
i++;
continue;
}
int first_in_run = i;
i++;
while (i < length) {
alternative = alternatives->at(i);
if (!alternative->IsAtom()) break;
atom = alternative->AsAtom();
if (atom->length() != 1) break;
i++;
}
if (i > first_in_run + 1) {
// Found non-trivial run of single-character alternatives.
int run_length = i - first_in_run;
ZoneList<CharacterRange>* ranges =
new (zone) ZoneList<CharacterRange>(2, zone);
for (int j = 0; j < run_length; j++) {
RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom();
DCHECK_EQ(old_atom->length(), 1);
ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
}
alternatives->at(write_posn++) =
new (zone) RegExpCharacterClass(ranges, false);
} else {
// Just copy any trivial alternatives.
for (int j = first_in_run; j < i; j++) {
alternatives->at(write_posn++) = alternatives->at(j);
}
}
}
alternatives->Rewind(write_posn); // Trim end of array.
}
RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
ZoneList<RegExpTree*>* alternatives = this->alternatives();
if (alternatives->length() > 2) {
bool found_consecutive_atoms = SortConsecutiveAtoms(compiler);
if (found_consecutive_atoms) RationalizeConsecutiveAtoms(compiler);
FixSingleCharacterDisjunctions(compiler);
if (alternatives->length() == 1) {
return alternatives->at(0)->ToNode(compiler, on_success);
}
}
int length = alternatives->length();
ChoiceNode* result =
new(compiler->zone()) ChoiceNode(length, compiler->zone());
for (int i = 0; i < length; i++) {
......
......@@ -212,7 +212,7 @@ class RegExpImpl {
// and the total executable memory at any point.
static const int kRegExpExecutableMemoryLimit = 16 * MB;
static const int kRegExpCompiledLimit = 1 * MB;
static const int kRegExpTooLargeToOptimize = 20 * KB;
static const int kRegExpTooLargeToOptimize = 10 * KB;
private:
static bool CompileIrregexp(Handle<JSRegExp> re,
......
......@@ -195,15 +195,10 @@ int List<T, P>::CountOccurrences(const T& elm, int start, int end) const {
template<typename T, class P>
void List<T, P>::Sort(int (*cmp)(const T* x, const T* y)) {
Sort(cmp, 0, length_);
}
template <typename T, class P>
void List<T, P>::Sort(int (*cmp)(const T* x, const T* y), size_t s, size_t l) {
ToVector().Sort(cmp, s, l);
ToVector().Sort(cmp);
#ifdef DEBUG
for (size_t i = s + 1; i < l; i++) DCHECK(cmp(&data_[i - 1], &data_[i]) <= 0);
for (int i = 1; i < length_; i++)
DCHECK(cmp(&data_[i - 1], &data_[i]) <= 0);
#endif
}
......@@ -214,29 +209,7 @@ void List<T, P>::Sort() {
}
template <typename T, class P>
void List<T, P>::StableSort(int (*cmp)(const T* x, const T* y)) {
StableSort(cmp, 0, length_);
}
template <typename T, class P>
void List<T, P>::StableSort(int (*cmp)(const T* x, const T* y), size_t s,
size_t l) {
ToVector().StableSort(cmp, s, l);
#ifdef DEBUG
for (size_t i = s + 1; i < l; i++) DCHECK(cmp(&data_[i - 1], &data_[i]) <= 0);
#endif
}
template <typename T, class P>
void List<T, P>::StableSort() {
ToVector().StableSort();
}
template <typename T, class P>
template<typename T, class P>
void List<T, P>::Initialize(int capacity, P allocator) {
DCHECK(capacity >= 0);
data_ = (capacity > 0) ? NewData(capacity, allocator) : NULL;
......
......@@ -149,13 +149,8 @@ class List {
void Iterate(Visitor* visitor);
// Sort all list entries (using QuickSort)
void Sort(int (*cmp)(const T* x, const T* y), size_t start, size_t length);
void Sort(int (*cmp)(const T* x, const T* y));
void Sort();
void StableSort(int (*cmp)(const T* x, const T* y), size_t start,
size_t length);
void StableSort(int (*cmp)(const T* x, const T* y));
void StableSort();
INLINE(void Initialize(int capacity,
AllocationPolicy allocator = AllocationPolicy()));
......
......@@ -69,10 +69,6 @@ class Vector {
return Vector<T>(result, length_);
}
void Sort(int (*cmp)(const T*, const T*), size_t s, size_t l) {
std::sort(start() + s, start() + s + l, RawComparer(cmp));
}
void Sort(int (*cmp)(const T*, const T*)) {
std::sort(start(), start() + length(), RawComparer(cmp));
}
......@@ -81,16 +77,6 @@ class Vector {
std::sort(start(), start() + length());
}
void StableSort(int (*cmp)(const T*, const T*), size_t s, size_t l) {
std::stable_sort(start() + s, start() + s + l, RawComparer(cmp));
}
void StableSort(int (*cmp)(const T*, const T*)) {
std::stable_sort(start(), start() + length(), RawComparer(cmp));
}
void StableSort() { std::stable_sort(start(), start() + length()); }
void Truncate(int length) {
DCHECK(length <= length_);
length_ = length;
......
......@@ -1747,13 +1747,13 @@ TEST(TestSizeOfRegExpCode) {
// Adjust source below and this check to match
// RegExpImple::kRegExpTooLargeToOptimize.
DCHECK_EQ(i::RegExpImpl::kRegExpTooLargeToOptimize, 20 * KB);
DCHECK_EQ(i::RegExpImpl::kRegExpTooLargeToOptimize, 10 * KB);
// Compile a regexp that is much larger if we are using regexp optimizations.
CompileRun(
"var reg_exp_source = '(?:a|bc|def|ghij|klmno|pqrstu)';"
"var half_size_reg_exp;"
"while (reg_exp_source.length < 20 * 1024) {"
"while (reg_exp_source.length < 10 * 1024) {"
" half_size_reg_exp = reg_exp_source;"
" reg_exp_source = reg_exp_source + reg_exp_source;"
"}"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment