Commit 356cf1ed authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

RegExp: Add support for table-based character class

code generation.  This is performance neutral for
all our tests, but a factor 6 faster for the Unicode
based regexp in the new test (and much more compact
code).
Review URL: https://chromiumcodereview.appspot.com/9854020

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11189 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 05728062
......@@ -480,6 +480,44 @@ void RegExpMacroAssemblerARM::CheckNotCharacterAfterMinusAnd(
}
void RegExpMacroAssemblerARM::CheckCharacterInRange(
uc16 from,
uc16 to,
Label* on_in_range) {
__ sub(r0, current_character(), Operand(from));
__ cmp(r0, Operand(to - from));
BranchOrBacktrack(ls, on_in_range); // Unsigned lower-or-same condition.
}
void RegExpMacroAssemblerARM::CheckCharacterNotInRange(
uc16 from,
uc16 to,
Label* on_not_in_range) {
__ sub(r0, current_character(), Operand(from));
__ cmp(r0, Operand(to - from));
BranchOrBacktrack(hi, on_not_in_range); // Unsigned higher condition.
}
void RegExpMacroAssemblerARM::CheckBitInTable(
Handle<ByteArray> table,
Label* on_bit_set) {
__ mov(r0, Operand(table));
if (mode_ != ASCII || kTableMask != String::kMaxAsciiCharCode) {
__ and_(r1, current_character(), Operand(kTableSize - 1));
__ add(r1, r1, Operand(ByteArray::kHeaderSize - kHeapObjectTag));
} else {
__ add(r1,
current_character(),
Operand(ByteArray::kHeaderSize - kHeapObjectTag));
}
__ ldrb(r0, MemOperand(r0, r1));
__ cmp(r0, Operand(0));
BranchOrBacktrack(ne, on_bit_set);
}
bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type,
Label* on_no_match) {
// Range checks (c in min..max) are generally implemented by an unsigned
......
......@@ -79,6 +79,14 @@ class RegExpMacroAssemblerARM: public NativeRegExpMacroAssembler {
uc16 minus,
uc16 mask,
Label* on_not_equal);
virtual void CheckCharacterInRange(uc16 from,
uc16 to,
Label* on_in_range);
virtual void CheckCharacterNotInRange(uc16 from,
uc16 to,
Label* on_not_in_range);
virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
// Checks whether the given offset from the current position is before
// the end of the string.
virtual void CheckPosition(int cp_offset, Label* on_outside_input);
......
......@@ -72,24 +72,23 @@ V(AND_CHECK_4_CHARS, 27, 16) /* bc8 pad24 uint32 uint32 addr32 */ \
V(AND_CHECK_CHAR, 28, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
V(AND_CHECK_NOT_4_CHARS, 29, 16) /* bc8 pad24 uint32 uint32 addr32 */ \
V(AND_CHECK_NOT_CHAR, 30, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
V(MINUS_AND_CHECK_NOT_CHAR, 31, 12) /* bc8 pad8 uc16 uc16 addr32 */ \
V(CHECK_LT, 32, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_GT, 33, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 34, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 35, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_REGS_EQUAL, 36, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
V(LOOKUP_MAP1, 37, 12) /* bc8 pad8 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP2, 38, 96) /* bc8 pad8 start16 half_nibble_map_addr32* */ \
V(LOOKUP_MAP8, 39, 96) /* bc8 pad8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 40, 96) /* bc8 start24 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 41, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_GE, 42, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_EQ_POS, 43, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_AT_START, 44, 8) /* bc8 pad24 addr32 */ \
V(CHECK_NOT_AT_START, 45, 8) /* bc8 pad24 addr32 */ \
V(CHECK_GREEDY, 46, 8) /* bc8 pad24 addr32 */ \
V(ADVANCE_CP_AND_GOTO, 47, 8) /* bc8 offset24 addr32 */ \
V(SET_CURRENT_POSITION_FROM_END, 48, 4) /* bc8 idx24 */
V(MINUS_AND_CHECK_NOT_CHAR, 31, 12) /* bc8 pad8 uc16 uc16 uc16 addr32 */ \
V(CHECK_CHAR_IN_RANGE, 32, 12) /* bc8 pad24 uc16 uc16 addr32 */ \
V(CHECK_CHAR_NOT_IN_RANGE, 33, 12) /* bc8 pad24 uc16 uc16 addr32 */ \
V(CHECK_BIT_IN_TABLE, 34, 24) /* bc8 pad24 addr32 bits128 */ \
V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_REGS_EQUAL, 39, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
V(CHECK_REGISTER_LT, 40, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_GE, 41, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_EQ_POS, 42, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_AT_START, 43, 8) /* bc8 pad24 addr32 */ \
V(CHECK_NOT_AT_START, 44, 8) /* bc8 pad24 addr32 */ \
V(CHECK_GREEDY, 45, 8) /* bc8 pad24 addr32 */ \
V(ADVANCE_CP_AND_GOTO, 46, 8) /* bc8 offset24 addr32 */ \
V(SET_CURRENT_POSITION_FROM_END, 47, 4) /* bc8 idx24 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
......
......@@ -501,9 +501,13 @@ void RegExpMacroAssemblerIA32::CheckNotCharacter(uint32_t c,
void RegExpMacroAssemblerIA32::CheckCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_equal) {
__ mov(eax, current_character());
__ and_(eax, mask);
__ cmp(eax, c);
if (c == 0) {
__ test(current_character(), Immediate(mask));
} else {
__ mov(eax, current_character());
__ and_(eax, mask);
__ cmp(eax, c);
}
BranchOrBacktrack(equal, on_equal);
}
......@@ -511,9 +515,13 @@ void RegExpMacroAssemblerIA32::CheckCharacterAfterAnd(uint32_t c,
void RegExpMacroAssemblerIA32::CheckNotCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_not_equal) {
__ mov(eax, current_character());
__ and_(eax, mask);
__ cmp(eax, c);
if (c == 0) {
__ test(current_character(), Immediate(mask));
} else {
__ mov(eax, current_character());
__ and_(eax, mask);
__ cmp(eax, c);
}
BranchOrBacktrack(not_equal, on_not_equal);
}
......@@ -525,12 +533,51 @@ void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusAnd(
Label* on_not_equal) {
ASSERT(minus < String::kMaxUtf16CodeUnit);
__ lea(eax, Operand(current_character(), -minus));
__ and_(eax, mask);
__ cmp(eax, c);
if (c == 0) {
__ test(eax, Immediate(mask));
} else {
__ and_(eax, mask);
__ cmp(eax, c);
}
BranchOrBacktrack(not_equal, on_not_equal);
}
void RegExpMacroAssemblerIA32::CheckCharacterInRange(
uc16 from,
uc16 to,
Label* on_in_range) {
__ lea(eax, Operand(current_character(), -from));
__ cmp(eax, to - from);
BranchOrBacktrack(below_equal, on_in_range);
}
void RegExpMacroAssemblerIA32::CheckCharacterNotInRange(
uc16 from,
uc16 to,
Label* on_not_in_range) {
__ lea(eax, Operand(current_character(), -from));
__ cmp(eax, to - from);
BranchOrBacktrack(above, on_not_in_range);
}
void RegExpMacroAssemblerIA32::CheckBitInTable(
Handle<ByteArray> table,
Label* on_bit_set) {
__ mov(eax, Immediate(table));
Register index = current_character();
if (mode_ != ASCII || kTableMask != String::kMaxAsciiCharCode) {
__ mov(ebx, current_character());
__ and_(ebx, kTableSize - 1);
index = ebx;
}
__ cmpb(FieldOperand(eax, index, times_1, ByteArray::kHeaderSize), 0);
BranchOrBacktrack(not_equal, on_bit_set);
}
bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
Label* on_no_match) {
// Range checks (c in min..max) are generally implemented by an unsigned
......
......@@ -78,6 +78,14 @@ class RegExpMacroAssemblerIA32: public NativeRegExpMacroAssembler {
uc16 minus,
uc16 mask,
Label* on_not_equal);
virtual void CheckCharacterInRange(uc16 from,
uc16 to,
Label* on_in_range);
virtual void CheckCharacterNotInRange(uc16 from,
uc16 to,
Label* on_not_in_range);
virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
// Checks whether the given offset from the current position is before
// the end of the string.
virtual void CheckPosition(int cp_offset, Label* on_outside_input);
......
......@@ -33,8 +33,9 @@
#include "utils.h"
#include "ast.h"
#include "bytecodes-irregexp.h"
#include "jsregexp.h"
#include "interpreter-irregexp.h"
#include "jsregexp.h"
#include "regexp-macro-assembler.h"
namespace v8 {
namespace internal {
......@@ -449,6 +450,37 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
}
break;
}
BYTECODE(CHECK_CHAR_IN_RANGE) {
uint32_t from = Load16Aligned(pc + 4);
uint32_t to = Load16Aligned(pc + 6);
if (from <= current_char && current_char <= to) {
pc = code_base + Load32Aligned(pc + 8);
} else {
pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
}
break;
}
BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
uint32_t from = Load16Aligned(pc + 4);
uint32_t to = Load16Aligned(pc + 6);
if (from > current_char || current_char > to) {
pc = code_base + Load32Aligned(pc + 8);
} else {
pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
}
break;
}
BYTECODE(CHECK_BIT_IN_TABLE) {
int mask = RegExpMacroAssembler::kTableMask;
byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
int bit = (current_char & (kBitsPerByte - 1));
if ((b & (1 << bit)) != 0) {
pc = code_base + Load32Aligned(pc + 4);
} else {
pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
}
break;
}
BYTECODE(CHECK_LT) {
uint32_t limit = (insn >> BYTECODE_SHIFT);
if (current_char < limit) {
......@@ -488,59 +520,6 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
}
break;
BYTECODE(LOOKUP_MAP1) {
// Look up character in a bitmap. If we find a 0, then jump to the
// location at pc + 8. Otherwise fall through!
int index = current_char - (insn >> BYTECODE_SHIFT);
byte map = code_base[Load32Aligned(pc + 4) + (index >> 3)];
map = ((map >> (index & 7)) & 1);
if (map == 0) {
pc = code_base + Load32Aligned(pc + 8);
} else {
pc += BC_LOOKUP_MAP1_LENGTH;
}
break;
}
BYTECODE(LOOKUP_MAP2) {
// Look up character in a half-nibble map. If we find 00, then jump to
// the location at pc + 8. If we find 01 then jump to location at
// pc + 11, etc.
int index = (current_char - (insn >> BYTECODE_SHIFT)) << 1;
byte map = code_base[Load32Aligned(pc + 3) + (index >> 3)];
map = ((map >> (index & 7)) & 3);
if (map < 2) {
if (map == 0) {
pc = code_base + Load32Aligned(pc + 8);
} else {
pc = code_base + Load32Aligned(pc + 12);
}
} else {
if (map == 2) {
pc = code_base + Load32Aligned(pc + 16);
} else {
pc = code_base + Load32Aligned(pc + 20);
}
}
break;
}
BYTECODE(LOOKUP_MAP8) {
// Look up character in a byte map. Use the byte as an index into a
// table that follows this instruction immediately.
int index = current_char - (insn >> BYTECODE_SHIFT);
byte map = code_base[Load32Aligned(pc + 4) + index];
const byte* new_pc = code_base + Load32Aligned(pc + 8) + (map << 2);
pc = code_base + Load32Aligned(new_pc);
break;
}
BYTECODE(LOOKUP_HI_MAP8) {
// Look up high byte of this character in a byte map. Use the byte as
// an index into a table that follows this instruction immediately.
int index = (current_char >> 8) - (insn >> BYTECODE_SHIFT);
byte map = code_base[Load32Aligned(pc + 4) + index];
const byte* new_pc = code_base + Load32Aligned(pc + 8) + (map << 2);
pc = code_base + Load32Aligned(new_pc);
break;
}
BYTECODE(CHECK_NOT_REGS_EQUAL)
if (registers[insn >> BYTECODE_SHIFT] ==
registers[Load32Aligned(pc + 4)]) {
......
This diff is collapsed.
......@@ -81,6 +81,14 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
uc16 minus,
uc16 mask,
Label* on_not_equal);
virtual void CheckCharacterInRange(uc16 from,
uc16 to,
Label* on_in_range);
virtual void CheckCharacterNotInRange(uc16 from,
uc16 to,
Label* on_not_in_range);
virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
// Checks whether the given offset from the current position is before
// the end of the string.
virtual void CheckPosition(int cp_offset, Label* on_outside_input);
......
......@@ -62,6 +62,16 @@ void RegExpMacroAssemblerIrregexp::Emit16(uint32_t word) {
}
void RegExpMacroAssemblerIrregexp::Emit8(uint32_t word) {
ASSERT(pc_ <= buffer_.length());
if (pc_ == buffer_.length()) {
Expand();
}
*reinterpret_cast<unsigned char*>(buffer_.start() + pc_) = word;
pc_ += 1;
}
void RegExpMacroAssemblerIrregexp::Emit32(uint32_t word) {
ASSERT(pc_ <= buffer_.length());
if (pc_ + 3 >= buffer_.length()) {
......
......@@ -352,6 +352,42 @@ void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd(
}
void RegExpMacroAssemblerIrregexp::CheckCharacterInRange(
uc16 from,
uc16 to,
Label* on_in_range) {
Emit(BC_CHECK_CHAR_IN_RANGE, 0);
Emit16(from);
Emit16(to);
EmitOrLink(on_in_range);
}
void RegExpMacroAssemblerIrregexp::CheckCharacterNotInRange(
uc16 from,
uc16 to,
Label* on_not_in_range) {
Emit(BC_CHECK_CHAR_NOT_IN_RANGE, 0);
Emit16(from);
Emit16(to);
EmitOrLink(on_not_in_range);
}
void RegExpMacroAssemblerIrregexp::CheckBitInTable(
Handle<ByteArray> table, Label* on_bit_set) {
Emit(BC_CHECK_BIT_IN_TABLE, 0);
EmitOrLink(on_bit_set);
for (int i = 0; i < kTableSize; i += kBitsPerByte) {
int byte = 0;
for (int j = 0; j < kBitsPerByte; j++) {
if (table->get(i + j) != 0) byte |= 1 << j;
}
Emit8(byte);
}
}
void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
Label* on_not_equal) {
ASSERT(start_reg >= 0);
......
......@@ -93,6 +93,13 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
uc16 minus,
uc16 mask,
Label* on_not_equal);
virtual void CheckCharacterInRange(uc16 from,
uc16 to,
Label* on_in_range);
virtual void CheckCharacterNotInRange(uc16 from,
uc16 to,
Label* on_not_in_range);
virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
......@@ -114,6 +121,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
inline void EmitOrLink(Label* label);
inline void Emit32(uint32_t x);
inline void Emit16(uint32_t x);
inline void Emit8(uint32_t x);
inline void Emit(uint32_t bc, uint32_t arg);
// Bytecode buffer.
int length();
......
......@@ -198,24 +198,55 @@ void RegExpMacroAssemblerTracer::LoadCurrentCharacter(int cp_offset,
}
class PrintablePrinter {
public:
explicit PrintablePrinter(uc16 character) : character_(character) { }
const char* operator*() {
if (character_ >= ' ' && character_ <= '~') {
buffer_[0] = '(';
buffer_[1] = character_;
buffer_[2] = ')';
buffer_[3] = '\0';
} else {
buffer_[0] = '\0';
}
return &buffer_[0];
};
private:
uc16 character_;
char buffer_[4];
};
void RegExpMacroAssemblerTracer::CheckCharacterLT(uc16 limit, Label* on_less) {
PrintF(" CheckCharacterLT(c='u%04x', label[%08x]);\n",
limit, LabelToInt(on_less));
PrintablePrinter printable(limit);
PrintF(" CheckCharacterLT(c=0x%04x%s, label[%08x]);\n",
limit,
*printable,
LabelToInt(on_less));
assembler_->CheckCharacterLT(limit, on_less);
}
void RegExpMacroAssemblerTracer::CheckCharacterGT(uc16 limit,
Label* on_greater) {
PrintF(" CheckCharacterGT(c='u%04x', label[%08x]);\n",
limit, LabelToInt(on_greater));
PrintablePrinter printable(limit);
PrintF(" CheckCharacterGT(c=0x%04x%s, label[%08x]);\n",
limit,
*printable,
LabelToInt(on_greater));
assembler_->CheckCharacterGT(limit, on_greater);
}
void RegExpMacroAssemblerTracer::CheckCharacter(unsigned c, Label* on_equal) {
PrintF(" CheckCharacter(c='u%04x', label[%08x]);\n",
c, LabelToInt(on_equal));
PrintablePrinter printable(c);
PrintF(" CheckCharacter(c=0x%04x%s, label[%08x]);\n",
c,
*printable,
LabelToInt(on_equal));
assembler_->CheckCharacter(c, on_equal);
}
......@@ -234,8 +265,11 @@ void RegExpMacroAssemblerTracer::CheckNotAtStart(Label* on_not_at_start) {
void RegExpMacroAssemblerTracer::CheckNotCharacter(unsigned c,
Label* on_not_equal) {
PrintF(" CheckNotCharacter(c='u%04x', label[%08x]);\n",
c, LabelToInt(on_not_equal));
PrintablePrinter printable(c);
PrintF(" CheckNotCharacter(c=0x%04x%s, label[%08x]);\n",
c,
*printable,
LabelToInt(on_not_equal));
assembler_->CheckNotCharacter(c, on_not_equal);
}
......@@ -244,8 +278,10 @@ void RegExpMacroAssemblerTracer::CheckCharacterAfterAnd(
unsigned c,
unsigned mask,
Label* on_equal) {
PrintF(" CheckCharacterAfterAnd(c='u%04x', mask=0x%04x, label[%08x]);\n",
PrintablePrinter printable(c);
PrintF(" CheckCharacterAfterAnd(c=0x%04x%s, mask=0x%04x, label[%08x]);\n",
c,
*printable,
mask,
LabelToInt(on_equal));
assembler_->CheckCharacterAfterAnd(c, mask, on_equal);
......@@ -256,8 +292,10 @@ void RegExpMacroAssemblerTracer::CheckNotCharacterAfterAnd(
unsigned c,
unsigned mask,
Label* on_not_equal) {
PrintF(" CheckNotCharacterAfterAnd(c='u%04x', mask=0x%04x, label[%08x]);\n",
PrintablePrinter printable(c);
PrintF(" CheckNotCharacterAfterAnd(c=0x%04x%s, mask=0x%04x, label[%08x]);\n",
c,
*printable,
mask,
LabelToInt(on_not_equal));
assembler_->CheckNotCharacterAfterAnd(c, mask, on_not_equal);
......@@ -269,7 +307,7 @@ void RegExpMacroAssemblerTracer::CheckNotCharacterAfterMinusAnd(
uc16 minus,
uc16 mask,
Label* on_not_equal) {
PrintF(" CheckNotCharacterAfterMinusAnd(c='u%04x', minus=%04x, mask=0x%04x, "
PrintF(" CheckNotCharacterAfterMinusAnd(c=0x%04x, minus=%04x, mask=0x%04x, "
"label[%08x]);\n",
c,
minus,
......@@ -279,6 +317,53 @@ void RegExpMacroAssemblerTracer::CheckNotCharacterAfterMinusAnd(
}
void RegExpMacroAssemblerTracer::CheckCharacterInRange(
uc16 from,
uc16 to,
Label* on_not_in_range) {
PrintablePrinter printable_from(from);
PrintablePrinter printable_to(to);
PrintF(" CheckCharacterInRange(from=0x%04x%s, to=0x%04x%s, label[%08x]);\n",
from,
*printable_from,
to,
*printable_to,
LabelToInt(on_not_in_range));
assembler_->CheckCharacterInRange(from, to, on_not_in_range);
}
void RegExpMacroAssemblerTracer::CheckCharacterNotInRange(
uc16 from,
uc16 to,
Label* on_in_range) {
PrintablePrinter printable_from(from);
PrintablePrinter printable_to(to);
PrintF(
" CheckCharacterNotInRange(from=0x%04x%s," " to=%04x%s, label[%08x]);\n",
from,
*printable_from,
to,
*printable_to,
LabelToInt(on_in_range));
assembler_->CheckCharacterNotInRange(from, to, on_in_range);
}
void RegExpMacroAssemblerTracer::CheckBitInTable(
Handle<ByteArray> table, Label* on_bit_set) {
PrintF(" CheckBitInTable(label[%08x] ", LabelToInt(on_bit_set));
for (int i = 0; i < kTableSize; i++) {
PrintF("%c", table->get(i) != 0 ? 'X' : '.');
if (i % 32 == 31 && i != kTableMask) {
PrintF("\n ");
}
}
PrintF(");\n");
assembler_->CheckBitInTable(table, on_bit_set);
}
void RegExpMacroAssemblerTracer::CheckNotBackReference(int start_reg,
Label* on_no_match) {
PrintF(" CheckNotBackReference(register=%d, label[%08x]);\n", start_reg,
......@@ -314,7 +399,7 @@ void RegExpMacroAssemblerTracer::CheckCharacters(Vector<const uc16> str,
PrintF(" %s(str=\"",
check_end_of_string ? "CheckCharacters" : "CheckCharactersUnchecked");
for (int i = 0; i < str.length(); i++) {
PrintF("u%04x", str[i]);
PrintF("0x%04x", str[i]);
}
PrintF("\", cp_offset=%d, label[%08x])\n",
cp_offset, LabelToInt(on_failure));
......
......@@ -68,6 +68,13 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
uc16 minus,
uc16 and_with,
Label* on_not_equal);
virtual void CheckCharacterInRange(uc16 from,
uc16 to,
Label* on_in_range);
virtual void CheckCharacterNotInRange(uc16 from,
uc16 to,
Label* on_not_in_range);
virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
virtual bool CheckSpecialCharacterClass(uc16 type,
Label* on_no_match);
virtual void Fail();
......
......@@ -45,6 +45,11 @@ class RegExpMacroAssembler {
static const int kMaxRegister = (1 << 16) - 1;
static const int kMaxCPOffset = (1 << 15) - 1;
static const int kMinCPOffset = -(1 << 15);
static const int kTableSizeBits = 7;
static const int kTableSize = 1 << kTableSizeBits;
static const int kTableMask = kTableSize - 1;
enum IrregexpImplementation {
kIA32Implementation,
kARMImplementation,
......@@ -106,12 +111,23 @@ class RegExpMacroAssembler {
virtual void CheckNotCharacterAfterAnd(unsigned c,
unsigned and_with,
Label* on_not_equal) = 0;
// Subtract a constant from the current character, then or with the given
// Subtract a constant from the current character, then and with the given
// constant and then check for a match with c.
virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
uc16 minus,
uc16 and_with,
Label* on_not_equal) = 0;
virtual void CheckCharacterInRange(uc16 from,
uc16 to, // Both inclusive.
Label* on_in_range) = 0;
virtual void CheckCharacterNotInRange(uc16 from,
uc16 to, // Both inclusive.
Label* on_not_in_range) = 0;
// The current character (modulus the kTableSize) is looked up in the byte
// array, and if the found byte is non-zero, we jump to the on_bit_set label.
virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) = 0;
virtual void CheckNotRegistersEqual(int reg1,
int reg2,
Label* on_not_equal) = 0;
......
......@@ -572,6 +572,42 @@ void RegExpMacroAssemblerX64::CheckNotCharacterAfterMinusAnd(
}
void RegExpMacroAssemblerX64::CheckCharacterInRange(
uc16 from,
uc16 to,
Label* on_in_range) {
__ leal(rax, Operand(current_character(), -from));
__ cmpl(rax, Immediate(to - from));
BranchOrBacktrack(below_equal, on_in_range);
}
void RegExpMacroAssemblerX64::CheckCharacterNotInRange(
uc16 from,
uc16 to,
Label* on_not_in_range) {
__ leal(rax, Operand(current_character(), -from));
__ cmpl(rax, Immediate(to - from));
BranchOrBacktrack(above, on_not_in_range);
}
void RegExpMacroAssemblerX64::CheckBitInTable(
Handle<ByteArray> table,
Label* on_bit_set) {
__ Move(rax, table);
Register index = current_character();
if (mode_ != ASCII || kTableMask != String::kMaxAsciiCharCode) {
__ movq(rbx, current_character());
__ and_(rbx, Immediate(kTableMask));
index = rbx;
}
__ cmpb(FieldOperand(rax, index, times_1, ByteArray::kHeaderSize),
Immediate(0));
BranchOrBacktrack(not_equal, on_bit_set);
}
bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,
Label* on_no_match) {
// Range checks (c in min..max) are generally implemented by an unsigned
......
......@@ -75,6 +75,14 @@ class RegExpMacroAssemblerX64: public NativeRegExpMacroAssembler {
uc16 minus,
uc16 mask,
Label* on_not_equal);
virtual void CheckCharacterInRange(uc16 from,
uc16 to,
Label* on_in_range);
virtual void CheckCharacterNotInRange(uc16 from,
uc16 to,
Label* on_not_in_range);
virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
// Checks whether the given offset from the current position is before
// the end of the string.
virtual void CheckPosition(int cp_offset, Label* on_outside_input);
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment