Commit 75eda476 authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

* Complete case independent support in Irregexp.

Review URL: http://codereview.chromium.org/12473

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@853 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent be6f6d0d
......@@ -203,8 +203,8 @@ DEFINE_bool(preemption, false,
DEFINE_bool(irregexp, false, "new regular expression code")
DEFINE_bool(trace_regexps, false, "trace Irregexp execution")
DEFINE_bool(trace_regexp_bytecodes, false, "trace Irregexp bytecode execution")
DEFINE_bool(attempt_case_independent, false, "attempt to run Irregexp case independent")
DEFINE_bool(irregexp_native, false, "use native code Irregexp implementation (IA32 only)")
DEFINE_bool(disable_jscre, false, "abort if JSCRE is used. Only useful with --irregexp")
// Testing flags test/cctest/test-{flags,api,serialization}.cc
DEFINE_bool(testing_bool_flag, true, "testing_bool_flag")
......
......@@ -243,6 +243,9 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
&node,
flags.is_ignore_case());
if (irregexp_data.is_null()) {
if (FLAG_disable_jscre) {
UNIMPLEMENTED();
}
result = JscrePrepare(re, pattern, flags);
} else {
result = IrregexpPrepare(re, pattern, flags, irregexp_data);
......@@ -267,6 +270,9 @@ Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
Handle<Object> index) {
switch (regexp->TypeTag()) {
case JSRegExp::JSCRE:
if (FLAG_disable_jscre) {
UNIMPLEMENTED();
}
return JscreExec(regexp, subject, index);
case JSRegExp::ATOM:
return AtomExec(regexp, subject, index);
......@@ -283,6 +289,9 @@ Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject) {
switch (regexp->TypeTag()) {
case JSRegExp::JSCRE:
if (FLAG_disable_jscre) {
UNIMPLEMENTED();
}
return JscreExecGlobal(regexp, subject);
case JSRegExp::ATOM:
return AtomExecGlobal(regexp, subject);
......@@ -906,7 +915,7 @@ class RegExpCompiler {
inline void IncrementRecursionDepth() { recursion_depth_++; }
inline void DecrementRecursionDepth() { recursion_depth_--; }
inline bool is_case_independent() { return is_case_independent_; }
inline bool ignore_case() { return ignore_case_; }
private:
EndNode* accept_;
......@@ -915,7 +924,7 @@ class RegExpCompiler {
List<RegExpNode*>* work_list_;
int recursion_depth_;
RegExpMacroAssembler* macro_assembler_;
bool is_case_independent_;
bool ignore_case_;
};
......@@ -925,7 +934,7 @@ RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case)
: next_register_(2 * (capture_count + 1)),
work_list_(NULL),
recursion_depth_(0),
is_case_independent_(ignore_case) {
ignore_case_(ignore_case) {
accept_ = new EndNode(EndNode::ACCEPT);
backtrack_ = new EndNode(EndNode::BACKTRACK);
}
......@@ -935,9 +944,6 @@ Handle<FixedArray> RegExpCompiler::Assemble(
RegExpMacroAssembler* macro_assembler,
RegExpNode* start,
int capture_count) {
if (!FLAG_attempt_case_independent && is_case_independent_) {
return Handle<FixedArray>::null();
}
macro_assembler_ = macro_assembler;
List <RegExpNode*> work_list(0);
work_list_ = &work_list;
......@@ -1306,7 +1312,7 @@ bool TextNode::Emit(RegExpCompiler* compiler) {
TextElement elm = elms_->at(i);
if (elm.type == TextElement::ATOM) {
Vector<const uc16> quarks = elm.data.u_atom->data();
if (compiler->is_case_independent()) {
if (compiler->ignore_case()) {
EmitAtomNonLetters(macro_assembler,
elm,
quarks,
......@@ -1324,7 +1330,7 @@ bool TextNode::Emit(RegExpCompiler* compiler) {
}
}
// Second, handle case independent letter matches if any.
if (compiler->is_case_independent()) {
if (compiler->ignore_case()) {
cp_offset = 0;
for (int i = 0; i < element_count; i++) {
TextElement elm = elms_->at(i);
......@@ -1360,6 +1366,22 @@ bool TextNode::Emit(RegExpCompiler* compiler) {
}
void TextNode::MakeCaseIndependent() {
int element_count = elms_->length();
for (int i = 0; i < element_count; i++) {
TextElement elm = elms_->at(i);
if (elm.type == TextElement::CHAR_CLASS) {
RegExpCharacterClass* cc = elm.data.u_char_class;
ZoneList<CharacterRange>* ranges = cc->ranges();
int range_count = ranges->length();
for (int i = 0; i < range_count; i++) {
ranges->at(i).AddCaseEquivalents(ranges);
}
}
}
}
bool ChoiceNode::Emit(RegExpCompiler* compiler) {
int choice_count = alternatives_->length();
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
......@@ -1477,9 +1499,8 @@ bool BackReferenceNode::Emit(RegExpCompiler* compiler) {
macro->IfRegisterLT(start_reg_, 0, on_success()->label());
macro->IfRegisterLT(end_reg_, 0, on_success()->label());
ASSERT_EQ(start_reg_ + 1, end_reg_);
if (compiler->is_case_independent()) {
macro->CheckNotBackReferenceCaseIndependent(start_reg_,
on_failure_->label());
if (compiler->ignore_case()) {
macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label());
} else {
macro->CheckNotBackReference(start_reg_, on_failure_->label());
}
......@@ -2429,6 +2450,9 @@ void Analysis::VisitEnd(EndNode* that) {
void Analysis::VisitText(TextNode* that) {
if (ignore_case_) {
that->MakeCaseIndependent();
}
EnsureAnalyzed(that->on_success());
EnsureAnalyzed(that->on_failure());
}
......@@ -2604,7 +2628,7 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
captured_body,
compiler.backtrack());
if (node_return != NULL) *node_return = node;
Analysis analysis;
Analysis analysis(ignore_case);
analysis.EnsureAnalyzed(node);
if (!FLAG_irregexp) {
......
......@@ -596,6 +596,7 @@ class TextNode: public SeqRegExpNode {
RegExpNode* on_failure() { return on_failure_; }
virtual bool Emit(RegExpCompiler* compiler);
ZoneList<TextElement>* elements() { return elms_; }
void MakeCaseIndependent();
private:
RegExpNode* on_failure_;
ZoneList<TextElement>* elms_;
......@@ -741,12 +742,19 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
class Analysis: public NodeVisitor {
public:
explicit Analysis(bool ignore_case)
: ignore_case_(ignore_case) { }
void EnsureAnalyzed(RegExpNode* node);
#define DECLARE_VISIT(Type) \
virtual void Visit##Type(Type##Node* that);
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
#undef DECLARE_VISIT
private:
bool ignore_case_;
DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
};
......
......@@ -212,7 +212,7 @@ void RegExpMacroAssemblerIA32::CheckCurrentPosition(int register_index,
}
void RegExpMacroAssemblerIA32::CheckNotBackReferenceCaseIndependent(
void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
int start_reg, Label* on_no_match) {
UNIMPLEMENTED();
}
......
......@@ -50,8 +50,8 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
Label* on_failure);
virtual void CheckCurrentPosition(int register_index, Label* on_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceCaseIndependent(int start_reg,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
......
......@@ -199,7 +199,7 @@ void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
}
void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceCaseIndependent(
void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase(
int start_reg,
Label* on_not_equal) {
assembler_->CheckNotBackReferenceNoCase(start_reg, on_not_equal);
......
......@@ -65,8 +65,8 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
uc16 mask,
Label* on_not_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceCaseIndependent(int start_reg,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure);
......
......@@ -76,8 +76,8 @@ class RegExpMacroAssembler {
int register_index,
Label* on_equal) = 0;
virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;
virtual void CheckNotBackReferenceCaseIndependent(int start_reg,
Label* on_no_match) = 0;
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match) = 0;
// Check the current character for a match with a literal character. If we
// fail to match then goto the on_failure label. End of input always
// matches. If the label is NULL then we should pop a backtrack address off
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment