Commit c621bbbe authored by lrn@chromium.org's avatar lrn@chromium.org

Issue 227 Fixed. Properly handles non-ASCII characters in quick-check on ASCII strings.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1248 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent e7150a8d
...@@ -2227,9 +2227,17 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, ...@@ -2227,9 +2227,17 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
for (int i = 0; i < characters && i < quarks.length(); i++) { for (int i = 0; i < characters && i < quarks.length(); i++) {
QuickCheckDetails::Position* pos = QuickCheckDetails::Position* pos =
details->positions(characters_filled_in); details->positions(characters_filled_in);
uc16 c = quarks[i];
if (c > char_mask) {
// If we expect a non-ASCII character from an ASCII string,
// there is no way we can match. Not even case independent
// matching can turn an ASCII character into non-ASCII or
// vice versa.
details->set_cannot_match();
return;
}
if (compiler->ignore_case()) { if (compiler->ignore_case()) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
uc16 c = quarks[i];
int length = uncanonicalize.get(c, '\0', chars); int length = uncanonicalize.get(c, '\0', chars);
if (length < 2) { if (length < 2) {
// This letter has no case equivalents, so it's nice and simple // This letter has no case equivalents, so it's nice and simple
...@@ -2262,7 +2270,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, ...@@ -2262,7 +2270,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
// determine definitely whether we have a match at this character // determine definitely whether we have a match at this character
// position. // position.
pos->mask = char_mask; pos->mask = char_mask;
pos->value = quarks[i]; pos->value = c;
pos->determines_perfectly = true; pos->determines_perfectly = true;
} }
characters_filled_in++; characters_filled_in++;
...@@ -2658,20 +2666,23 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, ...@@ -2658,20 +2666,23 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
bool bound_checked = true; // Most ops will check their bounds. bool bound_checked = true; // Most ops will check their bounds.
if (first_element_checked && i == 0 && j == 0) continue; if (first_element_checked && i == 0 && j == 0) continue;
if (quick_check != NULL &&
elm.cp_offset + j < quick_check->characters() &&
quick_check->positions(elm.cp_offset + j)->determines_perfectly) {
continue;
}
if (pass == NON_ASCII_MATCH) { if (pass == NON_ASCII_MATCH) {
ASSERT(ascii); ASSERT(ascii);
if (quarks[j] > String::kMaxAsciiCharCode) { if (quarks[j] > String::kMaxAsciiCharCode) {
assembler->GoTo(backtrack); assembler->GoTo(backtrack);
return; return;
} }
} else if (pass == CHARACTER_MATCH) { } else {
if (quick_check != NULL &&
elm.cp_offset + j < quick_check->characters() &&
quick_check->positions(elm.cp_offset + j)->
determines_perfectly) {
continue;
}
if (pass == CHARACTER_MATCH) {
if (compiler->ignore_case()) { if (compiler->ignore_case()) {
bound_checked = EmitAtomNonLetter(assembler, bound_checked = EmitAtomNonLetter(
assembler,
quarks[j], quarks[j],
backtrack, backtrack,
cp_offset + j, cp_offset + j,
...@@ -2679,7 +2690,8 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, ...@@ -2679,7 +2690,8 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
preloaded); preloaded);
} else { } else {
if (!preloaded) { if (!preloaded) {
assembler->LoadCurrentCharacter(cp_offset + j, assembler->LoadCurrentCharacter(
cp_offset + j,
backtrack, backtrack,
*checked_up_to < cp_offset + j); *checked_up_to < cp_offset + j);
} }
...@@ -2696,13 +2708,14 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, ...@@ -2696,13 +2708,14 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
*checked_up_to < cp_offset + j, *checked_up_to < cp_offset + j,
preloaded); preloaded);
} }
if (pass != NON_ASCII_MATCH && bound_checked) { if (bound_checked) {
if (cp_offset + j > *checked_up_to) { if (cp_offset + j > *checked_up_to) {
*checked_up_to = cp_offset + j; *checked_up_to = cp_offset + j;
} }
} }
} }
} }
}
} else { } else {
ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
if (first_element_checked && i == 0) continue; if (first_element_checked && i == 0) continue;
......
// Copyright 2009 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
var re = /\u23a1|x/;
var res = re.exec("!");
assertEquals(null, res, "Throwing away high bits on ASCII string");
res = re.exec("!x");
assertEquals(["x"], res, "Throwing away high bits on ASCII string");
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment