Commit f905e3f4 authored by Iain Ireland's avatar Iain Ireland Committed by Commit Bot

[regexp] Change rangeBoundaries to use uc32

Some of the DCHECK_LT assertions in GenerateBranches were generating
signed-vs-unsigned comparisons in SM. While I was looking at this code,
it seemed reasonable to just fix the whole thing to use uc32/uint32_t
where appropriate.

Bug: v8:11380
Change-Id: I7e27fb7e34ce962349d7204d6306217292746e33
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2666986Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72557}
parent 13b7167d
...@@ -954,17 +954,18 @@ static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, int first, ...@@ -954,17 +954,18 @@ static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, int first,
// even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
// odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
static void EmitUseLookupTable(RegExpMacroAssembler* masm, static void EmitUseLookupTable(RegExpMacroAssembler* masm,
ZoneList<int>* ranges, int start_index, ZoneList<uc32>* ranges, uint32_t start_index,
int end_index, int min_char, Label* fall_through, uint32_t end_index, uc32 min_char,
Label* even_label, Label* odd_label) { Label* fall_through, Label* even_label,
static const int kSize = RegExpMacroAssembler::kTableSize; Label* odd_label) {
static const int kMask = RegExpMacroAssembler::kTableMask; static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
int base = (min_char & ~kMask);
uc32 base = (min_char & ~kMask);
USE(base); USE(base);
// Assert that everything is on one kTableSize page. // Assert that everything is on one kTableSize page.
for (int i = start_index; i <= end_index; i++) { for (uint32_t i = start_index; i <= end_index; i++) {
DCHECK_EQ(ranges->at(i) & ~kMask, base); DCHECK_EQ(ranges->at(i) & ~kMask, base);
} }
DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base); DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base);
...@@ -982,33 +983,35 @@ static void EmitUseLookupTable(RegExpMacroAssembler* masm, ...@@ -982,33 +983,35 @@ static void EmitUseLookupTable(RegExpMacroAssembler* masm,
on_bit_clear = odd_label; on_bit_clear = odd_label;
bit = 0; bit = 0;
} }
for (int i = 0; i < (ranges->at(start_index) & kMask) && i < kSize; i++) { for (uint32_t i = 0; i < (ranges->at(start_index) & kMask) && i < kSize;
i++) {
templ[i] = bit; templ[i] = bit;
} }
int j = 0; uint32_t j = 0;
bit ^= 1; bit ^= 1;
for (int i = start_index; i < end_index; i++) { for (uint32_t i = start_index; i < end_index; i++) {
for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) { for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) {
templ[j] = bit; templ[j] = bit;
} }
bit ^= 1; bit ^= 1;
} }
for (int i = j; i < kSize; i++) { for (uint32_t i = j; i < kSize; i++) {
templ[i] = bit; templ[i] = bit;
} }
Factory* factory = masm->isolate()->factory(); Factory* factory = masm->isolate()->factory();
// TODO(erikcorry): Cache these. // TODO(erikcorry): Cache these.
Handle<ByteArray> ba = factory->NewByteArray(kSize, AllocationType::kOld); Handle<ByteArray> ba = factory->NewByteArray(kSize, AllocationType::kOld);
for (int i = 0; i < kSize; i++) { for (uint32_t i = 0; i < kSize; i++) {
ba->set(i, templ[i]); ba->set(i, templ[i]);
} }
masm->CheckBitInTable(ba, on_bit_set); masm->CheckBitInTable(ba, on_bit_set);
if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
} }
static void CutOutRange(RegExpMacroAssembler* masm, ZoneList<int>* ranges, static void CutOutRange(RegExpMacroAssembler* masm, ZoneList<uc32>* ranges,
int start_index, int end_index, int cut_index, uint32_t start_index, uint32_t end_index,
Label* even_label, Label* odd_label) { uint32_t cut_index, Label* even_label,
Label* odd_label) {
bool odd = (((cut_index - start_index) & 1) == 1); bool odd = (((cut_index - start_index) & 1) == 1);
Label* in_range_label = odd ? odd_label : even_label; Label* in_range_label = odd ? odd_label : even_label;
Label dummy; Label dummy;
...@@ -1019,24 +1022,24 @@ static void CutOutRange(RegExpMacroAssembler* masm, ZoneList<int>* ranges, ...@@ -1019,24 +1022,24 @@ static void CutOutRange(RegExpMacroAssembler* masm, ZoneList<int>* ranges,
// Cut out the single range by rewriting the array. This creates a new // Cut out the single range by rewriting the array. This creates a new
// range that is a merger of the two ranges on either side of the one we // range that is a merger of the two ranges on either side of the one we
// are cutting out. The oddity of the labels is preserved. // are cutting out. The oddity of the labels is preserved.
for (int j = cut_index; j > start_index; j--) { for (uint32_t j = cut_index; j > start_index; j--) {
ranges->at(j) = ranges->at(j - 1); ranges->at(j) = ranges->at(j - 1);
} }
for (int j = cut_index + 1; j < end_index; j++) { for (uint32_t j = cut_index + 1; j < end_index; j++) {
ranges->at(j) = ranges->at(j + 1); ranges->at(j) = ranges->at(j + 1);
} }
} }
// Unicode case. Split the search space into kSize spaces that are handled // Unicode case. Split the search space into kSize spaces that are handled
// with recursion. // with recursion.
static void SplitSearchSpace(ZoneList<int>* ranges, int start_index, static void SplitSearchSpace(ZoneList<uc32>* ranges, uint32_t start_index,
int end_index, int* new_start_index, uint32_t end_index, uint32_t* new_start_index,
int* new_end_index, int* border) { uint32_t* new_end_index, uc32* border) {
static const int kSize = RegExpMacroAssembler::kTableSize; static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
static const int kMask = RegExpMacroAssembler::kTableMask; static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
int first = ranges->at(start_index); uc32 first = ranges->at(start_index);
int last = ranges->at(end_index) - 1; uc32 last = ranges->at(end_index) - 1;
*new_start_index = start_index; *new_start_index = start_index;
*border = (ranges->at(start_index) & ~kMask) + kSize; *border = (ranges->at(start_index) & ~kMask) + kSize;
...@@ -1055,7 +1058,7 @@ static void SplitSearchSpace(ZoneList<int>* ranges, int start_index, ...@@ -1055,7 +1058,7 @@ static void SplitSearchSpace(ZoneList<int>* ranges, int start_index,
// 128-character space can take up a lot of space in the ranges array if, // 128-character space can take up a lot of space in the ranges array if,
// for example, we only want to match every second character (eg. the lower // for example, we only want to match every second character (eg. the lower
// case characters on some Unicode pages). // case characters on some Unicode pages).
int binary_chop_index = (end_index + start_index) / 2; uint32_t binary_chop_index = (end_index + start_index) / 2;
// The first test ensures that we get to the code that handles the Latin1 // The first test ensures that we get to the code that handles the Latin1
// range with a single not-taken branch, speeding up this important // range with a single not-taken branch, speeding up this important
// character range (even non-Latin1 charset-based text has spaces and // character range (even non-Latin1 charset-based text has spaces and
...@@ -1064,8 +1067,8 @@ static void SplitSearchSpace(ZoneList<int>* ranges, int start_index, ...@@ -1064,8 +1067,8 @@ static void SplitSearchSpace(ZoneList<int>* ranges, int start_index,
end_index - start_index > (*new_start_index - start_index) * 2 && end_index - start_index > (*new_start_index - start_index) * 2 &&
last - first > kSize * 2 && binary_chop_index > *new_start_index && last - first > kSize * 2 && binary_chop_index > *new_start_index &&
ranges->at(binary_chop_index) >= first + 2 * kSize) { ranges->at(binary_chop_index) >= first + 2 * kSize) {
int scan_forward_for_section_border = binary_chop_index; uint32_t scan_forward_for_section_border = binary_chop_index;
int new_border = (ranges->at(binary_chop_index) | kMask) + 1; uint32_t new_border = (ranges->at(binary_chop_index) | kMask) + 1;
while (scan_forward_for_section_border < end_index) { while (scan_forward_for_section_border < end_index) {
if (ranges->at(scan_forward_for_section_border) > new_border) { if (ranges->at(scan_forward_for_section_border) > new_border) {
...@@ -1095,15 +1098,15 @@ static void SplitSearchSpace(ZoneList<int>* ranges, int start_index, ...@@ -1095,15 +1098,15 @@ static void SplitSearchSpace(ZoneList<int>* ranges, int start_index,
// know that the character is in the range of min_char to max_char inclusive. // know that the character is in the range of min_char to max_char inclusive.
// Either label can be nullptr indicating backtracking. Either label can also // Either label can be nullptr indicating backtracking. Either label can also
// be equal to the fall_through label. // be equal to the fall_through label.
static void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<int>* ranges, static void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<uc32>* ranges,
int start_index, int end_index, uc32 min_char, uint32_t start_index, uint32_t end_index,
uc32 max_char, Label* fall_through, uc32 min_char, uc32 max_char, Label* fall_through,
Label* even_label, Label* odd_label) { Label* even_label, Label* odd_label) {
DCHECK_LE(min_char, String::kMaxUtf16CodeUnit); DCHECK_LE(min_char, String::kMaxUtf16CodeUnit);
DCHECK_LE(max_char, String::kMaxUtf16CodeUnit); DCHECK_LE(max_char, String::kMaxUtf16CodeUnit);
int first = ranges->at(start_index); uc32 first = ranges->at(start_index);
int last = ranges->at(end_index) - 1; uc32 last = ranges->at(end_index) - 1;
DCHECK_LT(min_char, first); DCHECK_LT(min_char, first);
...@@ -1127,9 +1130,9 @@ static void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<int>* ranges, ...@@ -1127,9 +1130,9 @@ static void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<int>* ranges,
if (end_index - start_index <= 6) { if (end_index - start_index <= 6) {
// It is faster to test for individual characters, so we look for those // It is faster to test for individual characters, so we look for those
// first, then try arbitrary ranges in the second round. // first, then try arbitrary ranges in the second round.
static int kNoCutIndex = -1; static uint32_t kNoCutIndex = -1;
int cut = kNoCutIndex; uint32_t cut = kNoCutIndex;
for (int i = start_index; i < end_index; i++) { for (uint32_t i = start_index; i < end_index; i++) {
if (ranges->at(i) == ranges->at(i + 1) - 1) { if (ranges->at(i) == ranges->at(i + 1) - 1) {
cut = i; cut = i;
break; break;
...@@ -1154,16 +1157,16 @@ static void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<int>* ranges, ...@@ -1154,16 +1157,16 @@ static void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<int>* ranges,
return; return;
} }
if ((min_char >> kBits) != static_cast<uc32>(first >> kBits)) { if ((min_char >> kBits) != first >> kBits) {
masm->CheckCharacterLT(first, odd_label); masm->CheckCharacterLT(first, odd_label);
GenerateBranches(masm, ranges, start_index + 1, end_index, first, max_char, GenerateBranches(masm, ranges, start_index + 1, end_index, first, max_char,
fall_through, odd_label, even_label); fall_through, odd_label, even_label);
return; return;
} }
int new_start_index = 0; uint32_t new_start_index = 0;
int new_end_index = 0; uint32_t new_end_index = 0;
int border = 0; uc32 border = 0;
SplitSearchSpace(ranges, start_index, end_index, &new_start_index, SplitSearchSpace(ranges, start_index, end_index, &new_start_index,
&new_end_index, &border); &new_end_index, &border);
...@@ -1260,9 +1263,8 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler, ...@@ -1260,9 +1263,8 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
// entry at zero which goes to the failure label, but if there // entry at zero which goes to the failure label, but if there
// was already one there we fall through for success on that entry. // was already one there we fall through for success on that entry.
// Subsequent entries have alternating meaning (success/failure). // Subsequent entries have alternating meaning (success/failure).
// TODO(jgruber,v8:10568): Change `range_boundaries` to a ZoneList<uc32>. ZoneList<uc32>* range_boundaries =
ZoneList<int>* range_boundaries = zone->New<ZoneList<uc32>>(last_valid_range, zone);
zone->New<ZoneList<int>>(last_valid_range, zone);
bool zeroth_entry_is_failure = !cc->is_negated(); bool zeroth_entry_is_failure = !cc->is_negated();
...@@ -1277,7 +1279,7 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler, ...@@ -1277,7 +1279,7 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
range_boundaries->Add(range.to() + 1, zone); range_boundaries->Add(range.to() + 1, zone);
} }
int end_index = range_boundaries->length() - 1; int end_index = range_boundaries->length() - 1;
if (static_cast<uc32>(range_boundaries->at(end_index)) > max_char) { if (range_boundaries->at(end_index) > max_char) {
end_index--; end_index--;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment