Commit ab2d4bc9 authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

* Generate quick checks based on mask and compare for

  the alternatives in a choice node.  The quick checks
  are conservative in the sense that they only detect
  failure with certainty.  Checks can do 2 or 4 characters
  at a time.
* Inline the quick checks to allow the alternatives to
  be checked without branching in the common case where
  they fail.
Review URL: http://codereview.chromium.org/14194

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1005 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 00b0b67c
......@@ -51,23 +51,28 @@ V(ADVANCE_CP, 15, 5) /* advance_cp offset32 */ \
V(GOTO, 16, 5) /* goto addr32 */ \
V(LOAD_CURRENT_CHAR, 17, 9) /* load offset32 addr32 */ \
V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 5) /* load offset32 */ \
V(CHECK_CHAR, 19, 7) /* check_char uc16 addr32 */ \
V(CHECK_NOT_CHAR, 20, 7) /* check_not_char uc16 addr32 */ \
V(OR_CHECK_NOT_CHAR, 21, 9) /* or_check_not_char uc16 uc16 addr32 */ \
V(MINUS_OR_CHECK_NOT_CHAR, 22, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \
V(CHECK_LT, 23, 7) /* check_lt uc16 addr32 */ \
V(CHECK_GT, 24, 7) /* check_gr uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 25, 6) /* check_not_back_ref capture_idx addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 26, 6) /* check_not_back_ref_no_case captu... */ \
V(CHECK_NOT_REGS_EQUAL, 27, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \
V(LOOKUP_MAP1, 28, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP2, 29, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_MAP8, 30, 99) /* l_map8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 31, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 32, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 33, 8) /* check_reg_ge register_index value16 addr32 */ \
V(CHECK_NOT_AT_START, 34, 5) /* check_not_at_start addr32 */ \
V(CHECK_GREEDY, 35, 5) /* check_greedy addr32 */
V(LOAD_2_CURRENT_CHARS, 19, 9) /* load offset32 addr32 */ \
V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 5) /* load offset32 */ \
V(LOAD_4_CURRENT_CHARS, 21, 9) /* load offset32 addr32 */ \
V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 5) /* load offset32 */ \
V(CHECK_CHAR, 23, 9) /* check_char uint32 addr32 */ \
V(CHECK_NOT_CHAR, 24, 9) /* check_not_char uint32 addr32 */ \
V(AND_CHECK_CHAR, 25, 13) /* and_check_char uint32 uint32 addr32 */ \
V(AND_CHECK_NOT_CHAR, 26, 13) /* and_check_not_char uint32 uint32 addr32 */ \
V(MINUS_AND_CHECK_NOT_CHAR, 27, 11) /* minus_and_check_not_char uc16 uc16...*/ \
V(CHECK_LT, 28, 7) /* check_lt uc16 addr32 */ \
V(CHECK_GT, 29, 7) /* check_gr uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 30, 6) /* check_not_back_ref capture_idx addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 31, 6) /* check_not_back_ref_no_case captu... */ \
V(CHECK_NOT_REGS_EQUAL, 32, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \
V(LOOKUP_MAP1, 33, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP2, 34, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_MAP8, 35, 99) /* l_map8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 36, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 37, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 38, 8) /* check_reg_ge register_index value16 addr32 */ \
V(CHECK_NOT_AT_START, 39, 5) /* check_not_at_start addr32 */ \
V(CHECK_GREEDY, 40, 5) /* check_greedy addr32 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
......
......@@ -81,17 +81,34 @@ static void TraceInterpreter(const byte* code_base,
const byte* pc,
int stack_depth,
int current_position,
uint32_t current_char,
int bytecode_length,
const char* bytecode_name) {
if (FLAG_trace_regexp_bytecodes) {
PrintF("pc = %02x, sp = %d, current = %d, bc = %s",
bool printable = (current_char < 127 && current_char >= 32);
const char* format =
printable ?
"pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
"pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
PrintF(format,
pc - code_base,
stack_depth,
current_position,
current_char,
printable ? current_char : '.',
bytecode_name);
for (int i = 1; i < bytecode_length; i++) {
printf(", %02x", pc[i]);
}
printf(" ");
for (int i = 1; i < bytecode_length; i++) {
unsigned char b = pc[i];
if (b < 127 && b >= 32) {
printf("%c", b);
} else {
printf(".");
}
}
printf("\n");
}
}
......@@ -103,6 +120,7 @@ static void TraceInterpreter(const byte* code_base,
pc, \
backtrack_sp - backtrack_stack, \
current, \
current_char, \
BC_##name##_LENGTH, \
#name);
#else
......@@ -117,7 +135,7 @@ static bool RawMatch(const byte* code_base,
Vector<const Char> subject,
int* registers,
int current,
int current_char) {
uint32_t current_char) {
const byte* pc = code_base;
static const int kBacktrackStackSize = 10000;
int backtrack_stack[kBacktrackStackSize];
......@@ -233,45 +251,104 @@ static bool RawMatch(const byte* code_base,
pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
break;
}
BYTECODE(LOAD_2_CURRENT_CHARS) {
int pos = current + Load32(pc + 1);
if (pos + 2 > subject.length()) {
pc = code_base + Load32(pc + 5);
} else {
Char next = subject[pos + 1];
current_char =
(subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
}
break;
}
BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
int pos = current + Load32(pc + 1);
Char next = subject[pos + 1];
current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
break;
}
BYTECODE(LOAD_4_CURRENT_CHARS) {
ASSERT(sizeof(Char) == 1);
int pos = current + Load32(pc + 1);
if (pos + 4 > subject.length()) {
pc = code_base + Load32(pc + 5);
} else {
Char next1 = subject[pos + 1];
Char next2 = subject[pos + 2];
Char next3 = subject[pos + 3];
current_char = (subject[pos] |
(next1 << 8) |
(next2 << 16) |
(next3 << 24));
pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
}
break;
}
BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
ASSERT(sizeof(Char) == 1);
int pos = current + Load32(pc + 1);
Char next1 = subject[pos + 1];
Char next2 = subject[pos + 2];
Char next3 = subject[pos + 3];
current_char = (subject[pos] |
(next1 << 8) |
(next2 << 16) |
(next3 << 24));
pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
break;
}
BYTECODE(CHECK_CHAR) {
int c = Load16(pc + 1);
uint32_t c = Load32(pc + 1);
if (c == current_char) {
pc = code_base + Load32(pc + 3);
pc = code_base + Load32(pc + 5);
} else {
pc += BC_CHECK_CHAR_LENGTH;
}
break;
}
BYTECODE(CHECK_NOT_CHAR) {
int c = Load16(pc + 1);
uint32_t c = Load32(pc + 1);
if (c != current_char) {
pc = code_base + Load32(pc + 3);
pc = code_base + Load32(pc + 5);
} else {
pc += BC_CHECK_NOT_CHAR_LENGTH;
}
break;
}
BYTECODE(OR_CHECK_NOT_CHAR) {
int c = Load16(pc + 1);
if (c != (current_char | Load16(pc + 3))) {
pc = code_base + Load32(pc + 5);
BYTECODE(AND_CHECK_CHAR) {
uint32_t c = Load32(pc + 1);
if (c == (current_char & Load32(pc + 5))) {
pc = code_base + Load32(pc + 9);
} else {
pc += BC_OR_CHECK_NOT_CHAR_LENGTH;
pc += BC_AND_CHECK_CHAR_LENGTH;
}
break;
}
BYTECODE(MINUS_OR_CHECK_NOT_CHAR) {
int c = Load16(pc + 1);
int m = Load16(pc + 3);
if (c != ((current_char - m) | m)) {
pc = code_base + Load32(pc + 5);
BYTECODE(AND_CHECK_NOT_CHAR) {
uint32_t c = Load32(pc + 1);
if (c != (current_char & Load32(pc + 5))) {
pc = code_base + Load32(pc + 9);
} else {
pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
}
break;
}
BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
uint32_t c = Load16(pc + 1);
uint32_t minus = Load16(pc + 3);
uint32_t mask = Load16(pc + 5);
if (c != ((current_char - minus) & mask)) {
pc = code_base + Load32(pc + 7);
} else {
pc += BC_MINUS_OR_CHECK_NOT_CHAR_LENGTH;
pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
}
break;
}
BYTECODE(CHECK_LT) {
int limit = Load16(pc + 1);
uint32_t limit = Load16(pc + 1);
if (current_char < limit) {
pc = code_base + Load32(pc + 3);
} else {
......@@ -280,7 +357,7 @@ static bool RawMatch(const byte* code_base,
break;
}
BYTECODE(CHECK_GT) {
int limit = Load16(pc + 1);
uint32_t limit = Load16(pc + 1);
if (current_char > limit) {
pc = code_base + Load32(pc + 3);
} else {
......
This diff is collapsed.
This diff is collapsed.
......@@ -154,7 +154,7 @@ void RegExpMacroAssemblerIA32::CheckBitmap(uc16 start,
}
void RegExpMacroAssemblerIA32::CheckCharacter(uc16 c, Label* on_equal) {
void RegExpMacroAssemblerIA32::CheckCharacter(uint32_t c, Label* on_equal) {
__ cmp(current_character(), c);
BranchOrBacktrack(equal, on_equal);
}
......@@ -365,28 +365,41 @@ void RegExpMacroAssemblerIA32::CheckNotRegistersEqual(int reg1,
}
void RegExpMacroAssemblerIA32::CheckNotCharacter(uc16 c, Label* on_not_equal) {
void RegExpMacroAssemblerIA32::CheckNotCharacter(uint32_t c,
Label* on_not_equal) {
__ cmp(current_character(), c);
BranchOrBacktrack(not_equal, on_not_equal);
}
void RegExpMacroAssemblerIA32::CheckNotCharacterAfterOr(uc16 c,
uc16 mask,
Label* on_not_equal) {
void RegExpMacroAssemblerIA32::CheckCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_equal) {
__ mov(eax, current_character());
__ or_(eax, mask);
__ and_(eax, mask);
__ cmp(eax, c);
BranchOrBacktrack(equal, on_equal);
}
void RegExpMacroAssemblerIA32::CheckNotCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_not_equal) {
__ mov(eax, current_character());
__ and_(eax, mask);
__ cmp(eax, c);
BranchOrBacktrack(not_equal, on_not_equal);
}
void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusOr(
void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusAnd(
uc16 c,
uc16 minus,
uc16 mask,
Label* on_not_equal) {
__ lea(eax, Operand(current_character(), -mask));
__ or_(eax, mask);
ASSERT(minus < String::kMaxUC16CharCode);
__ lea(eax, Operand(current_character(), -minus));
__ and_(eax, mask);
__ cmp(eax, c);
BranchOrBacktrack(not_equal, on_not_equal);
}
......@@ -516,7 +529,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
Label at_start;
__ cmp(Operand(ebp, kAtStart), Immediate(0));
__ j(not_equal, &at_start);
LoadCurrentCharacterUnchecked(-1); // Load previous char.
LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
__ jmp(&start_label_);
__ bind(&at_start);
__ mov(current_character(), '\n');
......@@ -631,12 +644,16 @@ RegExpMacroAssembler::IrregexpImplementation
void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input) {
Label* on_end_of_input,
bool check_bounds,
int characters) {
ASSERT(cp_offset >= 0);
ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
__ cmp(edi, -cp_offset * char_size());
BranchOrBacktrack(greater_equal, on_end_of_input);
LoadCurrentCharacterUnchecked(cp_offset);
if (check_bounds) {
__ cmp(edi, -(cp_offset + characters) * char_size());
BranchOrBacktrack(greater, on_end_of_input);
}
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
......@@ -871,13 +888,27 @@ void RegExpMacroAssemblerIA32::CallCFunction(Address function_address,
}
void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset) {
void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset,
int characters) {
if (mode_ == ASCII) {
__ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
if (characters == 4) {
__ mov(current_character(), Operand(esi, edi, times_1, cp_offset));
} else if (characters == 2) {
__ movzx_w(current_character(), Operand(esi, edi, times_1, cp_offset));
} else {
ASSERT(characters == 1);
__ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
}
} else {
ASSERT(mode_ == UC16);
__ movzx_w(current_character(),
Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
if (characters == 2) {
__ mov(current_character(),
Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
} else {
ASSERT(characters == 1);
__ movzx_w(current_character(),
Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
}
}
}
......
......@@ -43,7 +43,10 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void Backtrack();
virtual void Bind(Label* label);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void CheckCharacter(uc16 c, Label* on_equal);
virtual void CheckCharacter(uint32_t c, Label* on_equal);
virtual void CheckCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(Vector<const uc16> str,
......@@ -56,11 +59,14 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
uc16 mask,
Label* on_not_equal);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
uc16 minus,
uc16 mask,
Label* on_not_equal);
virtual void DispatchByteMap(uc16 start,
Label* byte_map,
const Vector<Label*>& destinations);
......@@ -77,9 +83,10 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void LoadCurrentCharacterUnchecked(int cp_offset);
virtual void LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
bool check_bounds = true,
int characters = 1);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
......@@ -135,6 +142,8 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
int byte_offset2,
size_t byte_length);
void LoadCurrentCharacterUnchecked(int cp_offset, int characters);
// Called from RegExp if the stack-guard is triggered.
// If the code object is relocated, the return address is fixed before
// returning.
......
......@@ -44,6 +44,7 @@ RegExpMacroAssemblerIrregexp::RegExpMacroAssemblerIrregexp(Vector<byte> buffer)
RegExpMacroAssemblerIrregexp::~RegExpMacroAssemblerIrregexp() {
if (backtrack_.is_linked()) backtrack_.Unuse();
}
......@@ -196,17 +197,32 @@ void RegExpMacroAssemblerIrregexp::CheckGreedyLoop(
void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset,
Label* on_failure) {
Emit(BC_LOAD_CURRENT_CHAR);
Emit32(cp_offset);
EmitOrLink(on_failure);
}
void RegExpMacroAssemblerIrregexp::LoadCurrentCharacterUnchecked(
int cp_offset) {
Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED);
Label* on_failure,
bool check_bounds,
int characters) {
int bytecode;
if (check_bounds) {
if (characters == 4) {
bytecode = BC_LOAD_4_CURRENT_CHARS;
} else if (characters == 2) {
bytecode = BC_LOAD_2_CURRENT_CHARS;
} else {
ASSERT(characters == 1);
bytecode = BC_LOAD_CURRENT_CHAR;
}
} else {
if (characters == 4) {
bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED;
} else if (characters == 2) {
bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED;
} else {
ASSERT(characters == 1);
bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED;
}
}
Emit(bytecode);
Emit32(cp_offset);
if (check_bounds) EmitOrLink(on_failure);
}
......@@ -226,9 +242,9 @@ void RegExpMacroAssemblerIrregexp::CheckCharacterGT(uc16 limit,
}
void RegExpMacroAssemblerIrregexp::CheckCharacter(uc16 c, Label* on_equal) {
void RegExpMacroAssemblerIrregexp::CheckCharacter(uint32_t c, Label* on_equal) {
Emit(BC_CHECK_CHAR);
Emit16(c);
Emit32(c);
EmitOrLink(on_equal);
}
......@@ -239,31 +255,44 @@ void RegExpMacroAssemblerIrregexp::CheckNotAtStart(Label* on_not_at_start) {
}
void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uc16 c,
void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uint32_t c,
Label* on_not_equal) {
Emit(BC_CHECK_NOT_CHAR);
Emit16(c);
Emit32(c);
EmitOrLink(on_not_equal);
}
void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterOr(
uc16 c,
uc16 mask,
void RegExpMacroAssemblerIrregexp::CheckCharacterAfterAnd(
uint32_t c,
uint32_t mask,
Label* on_equal) {
Emit(BC_AND_CHECK_CHAR);
Emit32(c);
Emit32(mask);
EmitOrLink(on_equal);
}
void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterAnd(
uint32_t c,
uint32_t mask,
Label* on_not_equal) {
Emit(BC_OR_CHECK_NOT_CHAR);
Emit16(c);
Emit16(mask);
Emit(BC_AND_CHECK_NOT_CHAR);
Emit32(c);
Emit32(mask);
EmitOrLink(on_not_equal);
}
void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusOr(
void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd(
uc16 c,
uc16 minus,
uc16 mask,
Label* on_not_equal) {
Emit(BC_MINUS_OR_CHECK_NOT_CHAR);
Emit(BC_MINUS_AND_CHECK_NOT_CHAR);
Emit16(c);
Emit16(minus);
Emit16(mask);
EmitOrLink(on_not_equal);
}
......@@ -344,7 +373,7 @@ void RegExpMacroAssemblerIrregexp::CheckCharacters(
Emit32(cp_offset + i);
}
Emit(BC_CHECK_NOT_CHAR);
Emit16(str[i]);
Emit32(str[i]);
EmitOrLink(on_failure);
}
}
......
......@@ -66,18 +66,26 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void WriteStackPointerToRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void LoadCurrentCharacterUnchecked(int cp_offset);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
bool check_bounds = true,
int characters = 1);
virtual void CheckCharacter(uint32_t c, Label* on_equal);
virtual void CheckCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacter(uc16 c, Label* on_equal);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
uc16 mask,
Label* on_not_equal);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
uc16 minus,
uc16 mask,
Label* on_not_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
......
......@@ -164,18 +164,19 @@ void RegExpMacroAssemblerTracer::ReadStackPointerFromRegister(int reg) {
void RegExpMacroAssemblerTracer::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input) {
PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]);\n",
Label* on_end_of_input,
bool check_bounds,
int characters) {
const char* check_msg = check_bounds ? "" : " (unchecked)";
PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]%s (%d chars));\n",
cp_offset,
on_end_of_input);
assembler_->LoadCurrentCharacter(cp_offset, on_end_of_input);
}
void RegExpMacroAssemblerTracer::LoadCurrentCharacterUnchecked(int cp_offset) {
PrintF(" LoadCurrentCharacterUnchecked(cp_offset=%d);\n",
cp_offset);
assembler_->LoadCurrentCharacterUnchecked(cp_offset);
on_end_of_input,
check_msg,
characters);
assembler_->LoadCurrentCharacter(cp_offset,
on_end_of_input,
check_bounds,
characters);
}
......@@ -192,7 +193,7 @@ void RegExpMacroAssemblerTracer::CheckCharacterGT(uc16 limit,
}
void RegExpMacroAssemblerTracer::CheckCharacter(uc16 c, Label* on_equal) {
void RegExpMacroAssemblerTracer::CheckCharacter(uint32_t c, Label* on_equal) {
PrintF(" CheckCharacter(c='u%04x', label[%08x]);\n", c, on_equal);
assembler_->CheckCharacter(c, on_equal);
}
......@@ -204,28 +205,49 @@ void RegExpMacroAssemblerTracer::CheckNotAtStart(Label* on_not_at_start) {
}
void RegExpMacroAssemblerTracer::CheckNotCharacter(uc16 c,
void RegExpMacroAssemblerTracer::CheckNotCharacter(uint32_t c,
Label* on_not_equal) {
PrintF(" CheckNotCharacter(c='u%04x', label[%08x]);\n", c, on_not_equal);
assembler_->CheckNotCharacter(c, on_not_equal);
}
void RegExpMacroAssemblerTracer::CheckNotCharacterAfterOr(uc16 c, uc16 mask,
Label* on_not_equal) {
PrintF(" CheckNotCharacterAfterOr(c='u%04x', mask=0x%04x, label[%08x]);\n", c,
mask, on_not_equal);
assembler_->CheckNotCharacterAfterOr(c, mask, on_not_equal);
void RegExpMacroAssemblerTracer::CheckCharacterAfterAnd(
uint32_t c,
uint32_t mask,
Label* on_equal) {
PrintF(" CheckCharacterAfterAnd(c='u%04x', mask=0x%04x, label[%08x]);\n",
c,
mask,
on_equal);
assembler_->CheckCharacterAfterAnd(c, mask, on_equal);
}
void RegExpMacroAssemblerTracer::CheckNotCharacterAfterAnd(
uint32_t c,
uint32_t mask,
Label* on_not_equal) {
PrintF(" CheckNotCharacterAfterAnd(c='u%04x', mask=0x%04x, label[%08x]);\n",
c,
mask,
on_not_equal);
assembler_->CheckNotCharacterAfterAnd(c, mask, on_not_equal);
}
void RegExpMacroAssemblerTracer::CheckNotCharacterAfterMinusOr(
void RegExpMacroAssemblerTracer::CheckNotCharacterAfterMinusAnd(
uc16 c,
uc16 minus,
uc16 mask,
Label* on_not_equal) {
PrintF(" CheckNotCharacterAfterMinusOr(c='u%04x', mask=0x%04x, "
"label[%08x]);\n", c, mask, on_not_equal);
assembler_->CheckNotCharacterAfterMinusOr(c, mask, on_not_equal);
PrintF(" CheckNotCharacterAfterMinusAnd(c='u%04x', minus=%04x, mask=0x%04x, "
"label[%08x]);\n",
c,
minus,
mask,
on_not_equal);
assembler_->CheckNotCharacterAfterMinusAnd(c, minus, mask, on_not_equal);
}
......
......@@ -41,7 +41,10 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void Backtrack();
virtual void Bind(Label* label);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void CheckCharacter(uc16 c, Label* on_equal);
virtual void CheckCharacter(uint32_t c, Label* on_equal);
virtual void CheckCharacterAfterAnd(uint32_t c,
uint32_t and_with,
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(
......@@ -55,13 +58,14 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c,
uc16 or_with,
Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
uc16 minus_then_or_with,
Label* on_not_equal);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
uint32_t and_with,
Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
uc16 minus,
uc16 and_with,
Label* on_not_equal);
virtual void DispatchByteMap(
uc16 start,
Label* byte_map,
......@@ -81,8 +85,10 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void LoadCurrentCharacterUnchecked(int cp_offset);
virtual void LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
bool check_bounds = true,
int characters = 1);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
......
......@@ -58,7 +58,12 @@ class RegExpMacroAssembler {
Label* on_zero) = 0; // Where to go if the bit is 0. Fall through on 1.
// Dispatch after looking the current character up in a 2-bits-per-entry
// map. The destinations vector has up to 4 labels.
virtual void CheckCharacter(uc16 c, Label* on_equal) = 0;
virtual void CheckCharacter(uint32_t c, Label* on_equal) = 0;
// Bitwise and the current character with the given constant and then
// check for a match with c.
virtual void CheckCharacterAfterAnd(uint32_t c,
uint32_t and_with,
Label* on_equal) = 0;
virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
// Check the current character for a match with a literal string. If we
......@@ -81,17 +86,16 @@ class RegExpMacroAssembler {
// fail to match then goto the on_failure label. End of input always
// matches. If the label is NULL then we should pop a backtrack address off
// the stack and go to that.
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal) = 0;
// Bitwise or the current character with the given constant and then
// check for a match with c.
virtual void CheckNotCharacterAfterOr(uc16 c,
uc16 or_with,
Label* on_not_equal) = 0;
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal) = 0;
virtual void CheckNotCharacterAfterAnd(uint32_t c,
uint32_t and_with,
Label* on_not_equal) = 0;
// Subtract a constant from the current character, then or with the given
// constant and then check for a match with c.
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
uc16 minus_then_or_with,
Label* on_not_equal) = 0;
virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
uc16 minus,
uc16 and_with,
Label* on_not_equal) = 0;
virtual void CheckNotRegistersEqual(int reg1,
int reg2,
Label* on_not_equal) = 0;
......@@ -122,8 +126,10 @@ class RegExpMacroAssembler {
// Backtracks instead if the label is NULL.
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
virtual IrregexpImplementation Implementation() = 0;
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input) = 0;
virtual void LoadCurrentCharacterUnchecked(int cp_offset) = 0;
virtual void LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
bool check_bounds = true,
int characters = 1) = 0;
virtual void PopCurrentPosition() = 0;
virtual void PopRegister(int register_index) = 0;
virtual void PushBacktrack(Label* label) = 0;
......
......@@ -306,3 +306,13 @@ assertFalse(/f[abc]/i.test('x'));
assertFalse(/f[abc]/i.test('xa'));
assertFalse(/<[abc]/i.test('x'));
assertFalse(/<[abc]/i.test('xa'));
// Test that merging of quick test masks gets it right.
assertFalse(/x([0-7]%%x|[0-6]%%y)/.test('x7%%y'), 'qt');
assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy7%%%y'), 'qt2');
assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy%%%y'), 'qt3');
assertFalse(/()x\1y([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt4');
assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy%%%y'), 'qt5');
assertFalse(/()x\1y([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt6');
assertFalse(/xy([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt7');
assertFalse(/x([0-7]%%%x|[0-6]%%%y)/.test('x7%%%y'), 'qt8');
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment