Commit 01456e8b authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Case independent back references.

Reviewed at http://codereview.chromium.org/12406


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@833 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent b57b4a15
......@@ -239,6 +239,14 @@ void IrregexpAssembler::CheckNotBackReference(int capture_index,
}
void IrregexpAssembler::CheckNotBackReferenceNoCase(int capture_index,
Label* on_mismatch) {
Emit(BC_CHECK_NOT_BACK_REF_NO_CASE);
Emit(capture_index);
EmitOrLink(on_mismatch);
}
void IrregexpAssembler::CheckRegister(int byte_code,
int reg_index,
uint16_t vs,
......
......@@ -75,6 +75,7 @@ class IrregexpAssembler {
// the register after. If a register contains -1 then the other register
// must always contain -1 and the on_mismatch label will never be called.
void CheckNotBackReference(int capture_index, Label* on_mismatch);
void CheckNotBackReferenceNoCase(int capture_index, Label* on_mismatch);
// Checks a register for strictly-less-than or greater-than-or-equal.
void CheckRegisterLT(int reg_index, uint16_t vs, Label* on_less_than);
......
......@@ -57,12 +57,13 @@ V(MINUS_OR_CHECK_NOT_CHAR, 21, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \
V(CHECK_LT, 22, 7) /* check_lt uc16 addr32 */ \
V(CHECK_GT, 23, 7) /* check_gr uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 24, 6) /* check_not_back_ref capture_idx addr32 */ \
V(LOOKUP_MAP1, 25, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP2, 26, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_MAP8, 27, 99) /* l_map8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 28, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 29, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 30, 8) /* check_reg_ge register_index value16 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 25, 6) /* check_not_back_ref_no_case captu... */ \
V(LOOKUP_MAP1, 26, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP2, 27, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_MAP8, 28, 99) /* l_map8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 29, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 30, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 31, 8) /* check_reg_ge register_index value16 addr32 */ \
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
......
......@@ -29,6 +29,7 @@
#include "v8.h"
#include "unicode.h"
#include "utils.h"
#include "ast.h"
#include "bytecodes-irregexp.h"
......@@ -38,6 +39,27 @@
namespace v8 { namespace internal {
static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
static bool BackRefMatchesNoCase(int from,
int current,
int len,
Vector<const uc16> subject) {
for (int i = 0; i < len; i++) {
unibrow::uchar old_char = subject[from++];
unibrow::uchar new_char = subject[current++];
if (old_char == new_char) continue;
canonicalize.get(old_char, '\0', &old_char);
canonicalize.get(new_char, '\0', &new_char);
if (old_char != new_char) {
return false;
}
}
return true;
}
#ifdef DEBUG
static void TraceInterpreter(const byte* code_base,
const byte* pc,
......@@ -319,6 +341,21 @@ static bool RawMatch(const byte* code_base,
pc += BC_CHECK_NOT_BACK_REF_LENGTH;
break;
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
int from = registers[pc[1]];
int len = registers[pc[1] + 1] - from;
if (current + len > subject.length()) {
pc = code_base + Load32(pc + 2);
break;
} else {
if (BackRefMatchesNoCase(from, current, len, subject)) {
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
} else {
pc = code_base + Load32(pc + 2);
}
}
break;
}
default:
UNREACHABLE();
break;
......
......@@ -1269,17 +1269,17 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
}
} else {
if (from != 0) {
if (!cc->is_negated()) {
macro_assembler->CheckCharacterLT(from, on_failure);
} else {
if (cc->is_negated()) {
macro_assembler->CheckCharacterLT(from, &success);
} else {
macro_assembler->CheckCharacterLT(from, on_failure);
}
}
if (to != 0xffff) {
if (!cc->is_negated()) {
macro_assembler->CheckCharacterGT(to, on_failure);
} else {
if (cc->is_negated()) {
macro_assembler->CheckCharacterLT(to + 1, on_failure);
} else {
macro_assembler->CheckCharacterGT(to, on_failure);
}
} else {
if (cc->is_negated()) {
......@@ -1302,16 +1302,16 @@ bool TextNode::Emit(RegExpCompiler* compiler) {
TextElement elm = elms_->at(i);
if (elm.type == TextElement::ATOM) {
Vector<const uc16> quarks = elm.data.u_atom->data();
if (!compiler->is_case_independent()) {
macro_assembler->CheckCharacters(quarks,
cp_offset,
on_failure_->label());
} else {
if (compiler->is_case_independent()) {
EmitAtomNonLetters(macro_assembler,
elm,
quarks,
on_failure_->label(),
cp_offset);
} else {
macro_assembler->CheckCharacters(quarks,
cp_offset,
on_failure_->label());
}
cp_offset += quarks.length();
} else {
......@@ -1474,7 +1474,12 @@ bool BackReferenceNode::Emit(RegExpCompiler* compiler) {
macro->IfRegisterLT(start_reg_, 0, on_success()->label());
macro->IfRegisterLT(end_reg_, 0, on_success()->label());
ASSERT_EQ(start_reg_ + 1, end_reg_);
macro->CheckNotBackReference(start_reg_, on_failure_->label());
if (compiler->is_case_independent()) {
macro->CheckNotBackReferenceCaseIndependent(start_reg_,
on_failure_->label());
} else {
macro->CheckNotBackReference(start_reg_, on_failure_->label());
}
return on_success()->GoTo(compiler);
}
......@@ -2140,8 +2145,9 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) {
uc32 c = range[i];
uc16 range_from = c + (pos - start);
uc16 range_to = c + (end - start);
if (!(from() <= range_from && range_to <= to()))
if (!(from() <= range_from && range_to <= to())) {
ranges->Add(CharacterRange(range_from, range_to));
}
}
start = pos = block_end + 1;
}
......
......@@ -213,11 +213,14 @@ void RegExpMacroAssemblerIA32::CheckCurrentPosition(int register_index,
}
void RegExpMacroAssemblerIA32::CheckNotBackReferenceCaseIndependent(
int start_reg, Label* on_no_match) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIA32::CheckNotBackReference(
int start_reg, Label* on_no_match) {
if (ignore_case_) {
UNIMPLEMENTED();
}
Label fallthrough;
__ mov(eax, register_location(start_reg));
__ mov(ecx, register_location(start_reg + 1));
......
......@@ -53,6 +53,8 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
Label* on_failure);
virtual void CheckCurrentPosition(int register_index, Label* on_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceCaseIndependent(int start_reg,
Label* on_no_match);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
......
......@@ -199,6 +199,13 @@ void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
}
void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceCaseIndependent(
int start_reg,
Label* on_not_equal) {
assembler_->CheckNotBackReferenceNoCase(start_reg, on_not_equal);
}
void RegExpMacroAssemblerIrregexp::CheckBitmap(uc16 start,
Label* bitmap,
Label* on_zero) {
......
......@@ -65,6 +65,8 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
uc16 mask,
Label* on_not_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceCaseIndependent(int start_reg,
Label* on_no_match);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure);
......
......@@ -76,6 +76,8 @@ class RegExpMacroAssembler {
int register_index,
Label* on_equal) = 0;
virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;
virtual void CheckNotBackReferenceCaseIndependent(int start_reg,
Label* on_no_match) = 0;
// Check the current character for a match with a literal character. If we
// fail to match then goto the on_failure label. End of input always
// matches. If the label is NULL then we should pop a backtrack address off
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment