Commit bfcbc10c authored by lrn@chromium.org's avatar lrn@chromium.org

Fix bug in character-set merging. Add test case.

See Chromium bug 32637.

Review URL: http://codereview.chromium.org/553067

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3689 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent cd51fcd3
......@@ -4462,10 +4462,13 @@ void CharacterRange::Merge(ZoneList<CharacterRange>* first_set,
while (i1 < n1 || i2 < n2) {
CharacterRange next_range;
int range_source;
if (i2 == n2 || first_set->at(i1).from() < second_set->at(i2).from()) {
if (i2 == n2 ||
(i1 < n1 && first_set->at(i1).from() < second_set->at(i2).from())) {
// Next smallest element is in first set.
next_range = first_set->at(i1++);
range_source = kInsideFirst;
} else {
// Next smallest element is in second set.
next_range = second_set->at(i2++);
range_source = kInsideSecond;
}
......
......@@ -1650,6 +1650,163 @@ TEST(CanonicalizeCharacterSets) {
ASSERT_EQ(30, list->at(0).to());
}
// Checks whether a character is in the set represented by a list of ranges.
static bool CharacterInSet(ZoneList<CharacterRange>* set, uc16 value) {
for (int i = 0; i < set->length(); i++) {
CharacterRange range = set->at(i);
if (range.from() <= value && value <= range.to()) {
return true;
}
}
return false;
}
TEST(CharacterRangeMerge) {
ZoneScope zone_scope(DELETE_ON_EXIT);
ZoneList<CharacterRange> l1(4);
ZoneList<CharacterRange> l2(4);
// Create all combinations of intersections of ranges, both singletons and
// longer.
int offset = 0;
// The five kinds of singleton intersections:
// X
// Y - outside before
// Y - outside touching start
// Y - overlap
// Y - outside touching end
// Y - outside after
for (int i = 0; i < 5; i++) {
l1.Add(CharacterRange::Singleton(offset + 2));
l2.Add(CharacterRange::Singleton(offset + i));
offset += 6;
}
// The seven kinds of singleton/non-singleton intersections:
// XXX
// Y - outside before
// Y - outside touching start
// Y - inside touching start
// Y - entirely inside
// Y - inside touching end
// Y - outside touching end
// Y - disjoint after
for (int i = 0; i < 7; i++) {
l1.Add(CharacterRange::Range(offset + 2, offset + 4));
l2.Add(CharacterRange::Singleton(offset + i));
offset += 8;
}
// The eleven kinds of non-singleton intersections:
//
// XXXXXXXX
// YYYY - outside before.
// YYYY - outside touching start.
// YYYY - overlapping start
// YYYY - inside touching start
// YYYY - entirely inside
// YYYY - inside touching end
// YYYY - overlapping end
// YYYY - outside touching end
// YYYY - outside after
// YYYYYYYY - identical
// YYYYYYYYYYYY - containing entirely.
for (int i = 0; i < 9; i++) {
l1.Add(CharacterRange::Range(offset + 6, offset + 15)); // Length 8.
l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3));
offset += 22;
}
l1.Add(CharacterRange::Range(offset + 6, offset + 15));
l2.Add(CharacterRange::Range(offset + 6, offset + 15));
offset += 22;
l1.Add(CharacterRange::Range(offset + 6, offset + 15));
l2.Add(CharacterRange::Range(offset + 4, offset + 17));
offset += 22;
// Different kinds of multi-range overlap:
// XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX
// YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y
l1.Add(CharacterRange::Range(offset, offset + 21));
l1.Add(CharacterRange::Range(offset + 31, offset + 52));
for (int i = 0; i < 6; i++) {
l2.Add(CharacterRange::Range(offset + 2, offset + 5));
l2.Add(CharacterRange::Singleton(offset + 8));
offset += 9;
}
ASSERT(CharacterRange::IsCanonical(&l1));
ASSERT(CharacterRange::IsCanonical(&l2));
ZoneList<CharacterRange> first_only(4);
ZoneList<CharacterRange> second_only(4);
ZoneList<CharacterRange> both(4);
// Merge one direction.
CharacterRange::Merge(&l1, &l2, &first_only, &second_only, &both);
CHECK(CharacterRange::IsCanonical(&first_only));
CHECK(CharacterRange::IsCanonical(&second_only));
CHECK(CharacterRange::IsCanonical(&both));
for (uc16 i = 0; i < offset; i++) {
bool in_first = CharacterInSet(&l1, i);
bool in_second = CharacterInSet(&l2, i);
CHECK((in_first && !in_second) == CharacterInSet(&first_only, i));
CHECK((!in_first && in_second) == CharacterInSet(&second_only, i));
CHECK((in_first && in_second) == CharacterInSet(&both, i));
}
first_only.Clear();
second_only.Clear();
both.Clear();
// Merge other direction.
CharacterRange::Merge(&l2, &l1, &second_only, &first_only, &both);
CHECK(CharacterRange::IsCanonical(&first_only));
CHECK(CharacterRange::IsCanonical(&second_only));
CHECK(CharacterRange::IsCanonical(&both));
for (uc16 i = 0; i < offset; i++) {
bool in_first = CharacterInSet(&l1, i);
bool in_second = CharacterInSet(&l2, i);
CHECK((in_first && !in_second) == CharacterInSet(&first_only, i));
CHECK((!in_first && in_second) == CharacterInSet(&second_only, i));
CHECK((in_first && in_second) == CharacterInSet(&both, i));
}
first_only.Clear();
second_only.Clear();
both.Clear();
// Merge but don't record all combinations.
CharacterRange::Merge(&l1, &l2, NULL, NULL, &both);
CHECK(CharacterRange::IsCanonical(&both));
for (uc16 i = 0; i < offset; i++) {
bool in_first = CharacterInSet(&l1, i);
bool in_second = CharacterInSet(&l2, i);
CHECK((in_first && in_second) == CharacterInSet(&both, i));
}
// Merge into same set.
ZoneList<CharacterRange> all(4);
CharacterRange::Merge(&l1, &l2, &all, &all, &all);
CHECK(CharacterRange::IsCanonical(&all));
for (uc16 i = 0; i < offset; i++) {
bool in_first = CharacterInSet(&l1, i);
bool in_second = CharacterInSet(&l2, i);
CHECK((in_first || in_second) == CharacterInSet(&all, i));
}
}
TEST(Graph) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment