Commit c621bbbe authored by lrn@chromium.org's avatar lrn@chromium.org

Issue 227 Fixed. Properly handles non-ASCII characters in quick-check on ASCII strings.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1248 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent e7150a8d
......@@ -2227,9 +2227,17 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
for (int i = 0; i < characters && i < quarks.length(); i++) {
QuickCheckDetails::Position* pos =
details->positions(characters_filled_in);
uc16 c = quarks[i];
if (c > char_mask) {
// If we expect a non-ASCII character from an ASCII string,
// there is no way we can match. Not even case independent
// matching can turn an ASCII character into non-ASCII or
// vice versa.
details->set_cannot_match();
return;
}
if (compiler->ignore_case()) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
uc16 c = quarks[i];
int length = uncanonicalize.get(c, '\0', chars);
if (length < 2) {
// This letter has no case equivalents, so it's nice and simple
......@@ -2262,7 +2270,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
// determine definitely whether we have a match at this character
// position.
pos->mask = char_mask;
pos->value = quarks[i];
pos->value = c;
pos->determines_perfectly = true;
}
characters_filled_in++;
......@@ -2658,47 +2666,52 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
bool bound_checked = true; // Most ops will check their bounds.
if (first_element_checked && i == 0 && j == 0) continue;
if (quick_check != NULL &&
elm.cp_offset + j < quick_check->characters() &&
quick_check->positions(elm.cp_offset + j)->determines_perfectly) {
continue;
}
if (pass == NON_ASCII_MATCH) {
ASSERT(ascii);
if (quarks[j] > String::kMaxAsciiCharCode) {
assembler->GoTo(backtrack);
return;
}
} else if (pass == CHARACTER_MATCH) {
if (compiler->ignore_case()) {
bound_checked = EmitAtomNonLetter(assembler,
quarks[j],
backtrack,
cp_offset + j,
*checked_up_to < cp_offset + j,
preloaded);
} else {
if (!preloaded) {
assembler->LoadCurrentCharacter(cp_offset + j,
backtrack,
*checked_up_to < cp_offset + j);
} else {
if (quick_check != NULL &&
elm.cp_offset + j < quick_check->characters() &&
quick_check->positions(elm.cp_offset + j)->
determines_perfectly) {
continue;
}
if (pass == CHARACTER_MATCH) {
if (compiler->ignore_case()) {
bound_checked = EmitAtomNonLetter(
assembler,
quarks[j],
backtrack,
cp_offset + j,
*checked_up_to < cp_offset + j,
preloaded);
} else {
if (!preloaded) {
assembler->LoadCurrentCharacter(
cp_offset + j,
backtrack,
*checked_up_to < cp_offset + j);
}
assembler->CheckNotCharacter(quarks[j], backtrack);
}
assembler->CheckNotCharacter(quarks[j], backtrack);
} else {
ASSERT_EQ(pass, CASE_CHARACTER_MATCH);
ASSERT(compiler->ignore_case());
bound_checked = EmitAtomLetter(assembler,
compiler->ascii(),
quarks[j],
backtrack,
cp_offset + j,
*checked_up_to < cp_offset + j,
preloaded);
}
} else {
ASSERT_EQ(pass, CASE_CHARACTER_MATCH);
ASSERT(compiler->ignore_case());
bound_checked = EmitAtomLetter(assembler,
compiler->ascii(),
quarks[j],
backtrack,
cp_offset + j,
*checked_up_to < cp_offset + j,
preloaded);
}
if (pass != NON_ASCII_MATCH && bound_checked) {
if (cp_offset + j > *checked_up_to) {
*checked_up_to = cp_offset + j;
if (bound_checked) {
if (cp_offset + j > *checked_up_to) {
*checked_up_to = cp_offset + j;
}
}
}
}
......
// Copyright 2009 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
var re = /\u23a1|x/;
var res = re.exec("!");
assertEquals(null, res, "Throwing away high bits on ASCII string");
res = re.exec("!x");
assertEquals(["x"], res, "Throwing away high bits on ASCII string");
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment