Commit 01456e8b authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Case independent back references.

Reviewed at http://codereview.chromium.org/12406


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@833 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent b57b4a15
...@@ -239,6 +239,14 @@ void IrregexpAssembler::CheckNotBackReference(int capture_index, ...@@ -239,6 +239,14 @@ void IrregexpAssembler::CheckNotBackReference(int capture_index,
} }
void IrregexpAssembler::CheckNotBackReferenceNoCase(int capture_index,
Label* on_mismatch) {
Emit(BC_CHECK_NOT_BACK_REF_NO_CASE);
Emit(capture_index);
EmitOrLink(on_mismatch);
}
void IrregexpAssembler::CheckRegister(int byte_code, void IrregexpAssembler::CheckRegister(int byte_code,
int reg_index, int reg_index,
uint16_t vs, uint16_t vs,
......
...@@ -75,6 +75,7 @@ class IrregexpAssembler { ...@@ -75,6 +75,7 @@ class IrregexpAssembler {
// the register after. If a register contains -1 then the other register // the register after. If a register contains -1 then the other register
// must always contain -1 and the on_mismatch label will never be called. // must always contain -1 and the on_mismatch label will never be called.
void CheckNotBackReference(int capture_index, Label* on_mismatch); void CheckNotBackReference(int capture_index, Label* on_mismatch);
void CheckNotBackReferenceNoCase(int capture_index, Label* on_mismatch);
// Checks a register for strictly-less-than or greater-than-or-equal. // Checks a register for strictly-less-than or greater-than-or-equal.
void CheckRegisterLT(int reg_index, uint16_t vs, Label* on_less_than); void CheckRegisterLT(int reg_index, uint16_t vs, Label* on_less_than);
......
...@@ -57,12 +57,13 @@ V(MINUS_OR_CHECK_NOT_CHAR, 21, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \ ...@@ -57,12 +57,13 @@ V(MINUS_OR_CHECK_NOT_CHAR, 21, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \
V(CHECK_LT, 22, 7) /* check_lt uc16 addr32 */ \ V(CHECK_LT, 22, 7) /* check_lt uc16 addr32 */ \
V(CHECK_GT, 23, 7) /* check_gr uc16 addr32 */ \ V(CHECK_GT, 23, 7) /* check_gr uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 24, 6) /* check_not_back_ref capture_idx addr32 */ \ V(CHECK_NOT_BACK_REF, 24, 6) /* check_not_back_ref capture_idx addr32 */ \
V(LOOKUP_MAP1, 25, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \ V(CHECK_NOT_BACK_REF_NO_CASE, 25, 6) /* check_not_back_ref_no_case captu... */ \
V(LOOKUP_MAP2, 26, 99) /* l_map2 start16 half_nibble_map_addr32* */ \ V(LOOKUP_MAP1, 26, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP8, 27, 99) /* l_map8 start16 byte_map addr32* */ \ V(LOOKUP_MAP2, 27, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_HI_MAP8, 28, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \ V(LOOKUP_MAP8, 28, 99) /* l_map8 start16 byte_map addr32* */ \
V(CHECK_REGISTER_LT, 29, 8) /* check_reg_lt register_index value16 addr32 */ \ V(LOOKUP_HI_MAP8, 29, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_GE, 30, 8) /* check_reg_ge register_index value16 addr32 */ \ V(CHECK_REGISTER_LT, 30, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 31, 8) /* check_reg_ge register_index value16 addr32 */ \
#define DECLARE_BYTECODES(name, code, length) \ #define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code; static const int BC_##name = code;
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "v8.h" #include "v8.h"
#include "unicode.h"
#include "utils.h" #include "utils.h"
#include "ast.h" #include "ast.h"
#include "bytecodes-irregexp.h" #include "bytecodes-irregexp.h"
...@@ -38,6 +39,27 @@ ...@@ -38,6 +39,27 @@
namespace v8 { namespace internal { namespace v8 { namespace internal {
static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
static bool BackRefMatchesNoCase(int from,
int current,
int len,
Vector<const uc16> subject) {
for (int i = 0; i < len; i++) {
unibrow::uchar old_char = subject[from++];
unibrow::uchar new_char = subject[current++];
if (old_char == new_char) continue;
canonicalize.get(old_char, '\0', &old_char);
canonicalize.get(new_char, '\0', &new_char);
if (old_char != new_char) {
return false;
}
}
return true;
}
#ifdef DEBUG #ifdef DEBUG
static void TraceInterpreter(const byte* code_base, static void TraceInterpreter(const byte* code_base,
const byte* pc, const byte* pc,
...@@ -319,6 +341,21 @@ static bool RawMatch(const byte* code_base, ...@@ -319,6 +341,21 @@ static bool RawMatch(const byte* code_base,
pc += BC_CHECK_NOT_BACK_REF_LENGTH; pc += BC_CHECK_NOT_BACK_REF_LENGTH;
break; break;
} }
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
int from = registers[pc[1]];
int len = registers[pc[1] + 1] - from;
if (current + len > subject.length()) {
pc = code_base + Load32(pc + 2);
break;
} else {
if (BackRefMatchesNoCase(from, current, len, subject)) {
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
} else {
pc = code_base + Load32(pc + 2);
}
}
break;
}
default: default:
UNREACHABLE(); UNREACHABLE();
break; break;
......
...@@ -1269,17 +1269,17 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler, ...@@ -1269,17 +1269,17 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
} }
} else { } else {
if (from != 0) { if (from != 0) {
if (!cc->is_negated()) { if (cc->is_negated()) {
macro_assembler->CheckCharacterLT(from, on_failure);
} else {
macro_assembler->CheckCharacterLT(from, &success); macro_assembler->CheckCharacterLT(from, &success);
} else {
macro_assembler->CheckCharacterLT(from, on_failure);
} }
} }
if (to != 0xffff) { if (to != 0xffff) {
if (!cc->is_negated()) { if (cc->is_negated()) {
macro_assembler->CheckCharacterGT(to, on_failure);
} else {
macro_assembler->CheckCharacterLT(to + 1, on_failure); macro_assembler->CheckCharacterLT(to + 1, on_failure);
} else {
macro_assembler->CheckCharacterGT(to, on_failure);
} }
} else { } else {
if (cc->is_negated()) { if (cc->is_negated()) {
...@@ -1302,16 +1302,16 @@ bool TextNode::Emit(RegExpCompiler* compiler) { ...@@ -1302,16 +1302,16 @@ bool TextNode::Emit(RegExpCompiler* compiler) {
TextElement elm = elms_->at(i); TextElement elm = elms_->at(i);
if (elm.type == TextElement::ATOM) { if (elm.type == TextElement::ATOM) {
Vector<const uc16> quarks = elm.data.u_atom->data(); Vector<const uc16> quarks = elm.data.u_atom->data();
if (!compiler->is_case_independent()) { if (compiler->is_case_independent()) {
macro_assembler->CheckCharacters(quarks,
cp_offset,
on_failure_->label());
} else {
EmitAtomNonLetters(macro_assembler, EmitAtomNonLetters(macro_assembler,
elm, elm,
quarks, quarks,
on_failure_->label(), on_failure_->label(),
cp_offset); cp_offset);
} else {
macro_assembler->CheckCharacters(quarks,
cp_offset,
on_failure_->label());
} }
cp_offset += quarks.length(); cp_offset += quarks.length();
} else { } else {
...@@ -1474,7 +1474,12 @@ bool BackReferenceNode::Emit(RegExpCompiler* compiler) { ...@@ -1474,7 +1474,12 @@ bool BackReferenceNode::Emit(RegExpCompiler* compiler) {
macro->IfRegisterLT(start_reg_, 0, on_success()->label()); macro->IfRegisterLT(start_reg_, 0, on_success()->label());
macro->IfRegisterLT(end_reg_, 0, on_success()->label()); macro->IfRegisterLT(end_reg_, 0, on_success()->label());
ASSERT_EQ(start_reg_ + 1, end_reg_); ASSERT_EQ(start_reg_ + 1, end_reg_);
macro->CheckNotBackReference(start_reg_, on_failure_->label()); if (compiler->is_case_independent()) {
macro->CheckNotBackReferenceCaseIndependent(start_reg_,
on_failure_->label());
} else {
macro->CheckNotBackReference(start_reg_, on_failure_->label());
}
return on_success()->GoTo(compiler); return on_success()->GoTo(compiler);
} }
...@@ -2140,8 +2145,9 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) { ...@@ -2140,8 +2145,9 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) {
uc32 c = range[i]; uc32 c = range[i];
uc16 range_from = c + (pos - start); uc16 range_from = c + (pos - start);
uc16 range_to = c + (end - start); uc16 range_to = c + (end - start);
if (!(from() <= range_from && range_to <= to())) if (!(from() <= range_from && range_to <= to())) {
ranges->Add(CharacterRange(range_from, range_to)); ranges->Add(CharacterRange(range_from, range_to));
}
} }
start = pos = block_end + 1; start = pos = block_end + 1;
} }
......
...@@ -213,11 +213,14 @@ void RegExpMacroAssemblerIA32::CheckCurrentPosition(int register_index, ...@@ -213,11 +213,14 @@ void RegExpMacroAssemblerIA32::CheckCurrentPosition(int register_index,
} }
void RegExpMacroAssemblerIA32::CheckNotBackReferenceCaseIndependent(
int start_reg, Label* on_no_match) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIA32::CheckNotBackReference( void RegExpMacroAssemblerIA32::CheckNotBackReference(
int start_reg, Label* on_no_match) { int start_reg, Label* on_no_match) {
if (ignore_case_) {
UNIMPLEMENTED();
}
Label fallthrough; Label fallthrough;
__ mov(eax, register_location(start_reg)); __ mov(eax, register_location(start_reg));
__ mov(ecx, register_location(start_reg + 1)); __ mov(ecx, register_location(start_reg + 1));
......
...@@ -53,6 +53,8 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { ...@@ -53,6 +53,8 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
Label* on_failure); Label* on_failure);
virtual void CheckCurrentPosition(int register_index, Label* on_equal); virtual void CheckCurrentPosition(int register_index, Label* on_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match); virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceCaseIndependent(int start_reg,
Label* on_no_match);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal); virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal); virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusOr(uc16 c, virtual void CheckNotCharacterAfterMinusOr(uc16 c,
......
...@@ -199,6 +199,13 @@ void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg, ...@@ -199,6 +199,13 @@ void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
} }
void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceCaseIndependent(
int start_reg,
Label* on_not_equal) {
assembler_->CheckNotBackReferenceNoCase(start_reg, on_not_equal);
}
void RegExpMacroAssemblerIrregexp::CheckBitmap(uc16 start, void RegExpMacroAssemblerIrregexp::CheckBitmap(uc16 start,
Label* bitmap, Label* bitmap,
Label* on_zero) { Label* on_zero) {
......
...@@ -65,6 +65,8 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { ...@@ -65,6 +65,8 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
uc16 mask, uc16 mask,
Label* on_not_equal); Label* on_not_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match); virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceCaseIndependent(int start_reg,
Label* on_no_match);
virtual void CheckCharacters(Vector<const uc16> str, virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset, int cp_offset,
Label* on_failure); Label* on_failure);
......
...@@ -76,6 +76,8 @@ class RegExpMacroAssembler { ...@@ -76,6 +76,8 @@ class RegExpMacroAssembler {
int register_index, int register_index,
Label* on_equal) = 0; Label* on_equal) = 0;
virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0; virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;
virtual void CheckNotBackReferenceCaseIndependent(int start_reg,
Label* on_no_match) = 0;
// Check the current character for a match with a literal character. If we // Check the current character for a match with a literal character. If we
// fail to match then goto the on_failure label. End of input always // fail to match then goto the on_failure label. End of input always
// matches. If the label is NULL then we should pop a backtrack address off // matches. If the label is NULL then we should pop a backtrack address off
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment