Commit b65fcfe9 authored by Iain Ireland's avatar Iain Ireland Committed by Commit Bot

[regexp] Fix non-unicode ignore-case backreferences

https://crrev.com/c/2072858 rewrote the implementation of non-unicode
ignore-case matches to comply with the JS spec in some corner
cases. It fixed character matches and character class matches.

We missed a similar bug in the implementation of back references. This
CL fixes that bug.

The main change is in regexp-macro-assembler.cc, where
CaseInsensitiveCompareUC16 is split into CaseInsensitiveCompareUnicode
(which has the same semantics as before) and
CaseInsensitiveCompareNonUnicode (which has the semantics described
here: https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch).

Most of the rest of the patch undoes https://crrev.com/c/2081816 to
once again make the unicode flag available to the macroassembler, so
that we can decide which helper function to call.

The testcase is a version of test/intl/regress-10248.js, modified to
test backreferences.

Bug: v8:10573
Change-Id: I70ef7d134d37f99b1f75a5eba17020e82d59f1b9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2219284Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68129}
parent 69bc22e7
......@@ -496,8 +496,12 @@ FUNCTION_REFERENCE_WITH_ISOLATE(re_match_for_call_from_js,
IrregexpInterpreter::MatchForCallFromJs)
FUNCTION_REFERENCE_WITH_ISOLATE(
re_case_insensitive_compare_uc16,
NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16)
re_case_insensitive_compare_unicode,
NativeRegExpMacroAssembler::CaseInsensitiveCompareUnicode)
FUNCTION_REFERENCE_WITH_ISOLATE(
re_case_insensitive_compare_non_unicode,
NativeRegExpMacroAssembler::CaseInsensitiveCompareNonUnicode)
ExternalReference ExternalReference::re_word_character_map(Isolate* isolate) {
return ExternalReference(
......
......@@ -77,8 +77,10 @@ class StatsCounter;
V(address_of_regexp_stack_memory_top_address, \
"RegExpStack::memory_top_address_address()") \
V(address_of_static_offsets_vector, "OffsetsVector::static_offsets_vector") \
V(re_case_insensitive_compare_uc16, \
"NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16()") \
V(re_case_insensitive_compare_unicode, \
"NativeRegExpMacroAssembler::CaseInsensitiveCompareUnicode()") \
V(re_case_insensitive_compare_non_unicode, \
"NativeRegExpMacroAssembler::CaseInsensitiveCompareNonUnicode()") \
V(re_check_stack_guard_state, \
"RegExpMacroAssembler*::CheckStackGuardState()") \
V(re_grow_stack, "NativeRegExpMacroAssembler::GrowStack()") \
......
......@@ -224,7 +224,7 @@ void RegExpMacroAssemblerARM::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
int start_reg, bool read_backward, Label* on_no_match) {
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
__ ldr(r0, register_location(start_reg)); // Index of start of capture
__ ldr(r1, register_location(start_reg + 1)); // Index of end of capture
......@@ -335,7 +335,10 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
{
AllowExternalCallThatCantCauseGC scope(masm_);
ExternalReference function =
ExternalReference::re_case_insensitive_compare_uc16(isolate());
unicode ? ExternalReference::re_case_insensitive_compare_unicode(
isolate())
: ExternalReference::re_case_insensitive_compare_non_unicode(
isolate());
__ CallCFunction(function, argument_count);
}
......
......@@ -37,7 +37,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
bool read_backward, bool unicode,
Label* on_no_match);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c,
......
......@@ -294,7 +294,7 @@ void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
int start_reg, bool read_backward, Label* on_no_match) {
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
Register capture_start_offset = w10;
......@@ -425,7 +425,10 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
{
AllowExternalCallThatCantCauseGC scope(masm_);
ExternalReference function =
ExternalReference::re_case_insensitive_compare_uc16(isolate());
unicode ? ExternalReference::re_case_insensitive_compare_unicode(
isolate())
: ExternalReference::re_case_insensitive_compare_non_unicode(
isolate());
__ CallCFunction(function, argument_count);
}
......
......@@ -42,7 +42,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM64
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
bool read_backward, bool unicode,
Label* on_no_match);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c,
......
......@@ -206,7 +206,7 @@ void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
int start_reg, bool read_backward, Label* on_no_match) {
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
__ mov(edx, register_location(start_reg)); // Index of start of capture
__ mov(ebx, register_location(start_reg + 1)); // Index of end of capture
......@@ -336,7 +336,10 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
{
AllowExternalCallThatCantCauseGC scope(masm_);
ExternalReference compare =
ExternalReference::re_case_insensitive_compare_uc16(isolate());
unicode ? ExternalReference::re_case_insensitive_compare_unicode(
isolate())
: ExternalReference::re_case_insensitive_compare_non_unicode(
isolate());
__ CallCFunction(compare, argument_count);
}
// Pop original values before reacting on result value.
......
......@@ -37,7 +37,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerIA32
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
bool read_backward, bool unicode,
Label* on_no_match);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
......
......@@ -226,7 +226,7 @@ void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
int start_reg, bool read_backward, Label* on_no_match) {
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
__ lw(a0, register_location(start_reg)); // Index of start of capture.
__ lw(a1, register_location(start_reg + 1)); // Index of end of capture.
......@@ -340,7 +340,10 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
{
AllowExternalCallThatCantCauseGC scope(masm_);
ExternalReference function =
ExternalReference::re_case_insensitive_compare_uc16(masm_->isolate());
unicode ? ExternalReference::re_case_insensitive_compare_unicode(
isolate())
: ExternalReference::re_case_insensitive_compare_non_unicode(
isolate());
__ CallCFunction(function, argument_count);
}
......
......@@ -37,7 +37,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
bool read_backward, bool unicode,
Label* on_no_match);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
......
......@@ -262,7 +262,7 @@ void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
int start_reg, bool read_backward, Label* on_no_match) {
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
__ Ld(a0, register_location(start_reg)); // Index of start of capture.
__ Ld(a1, register_location(start_reg + 1)); // Index of end of capture.
......@@ -376,7 +376,10 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
{
AllowExternalCallThatCantCauseGC scope(masm_);
ExternalReference function =
ExternalReference::re_case_insensitive_compare_uc16(masm_->isolate());
unicode ? ExternalReference::re_case_insensitive_compare_unicode(
isolate())
: ExternalReference::re_case_insensitive_compare_non_unicode(
isolate());
__ CallCFunction(function, argument_count);
}
......
......@@ -37,7 +37,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
bool read_backward, bool unicode,
Label* on_no_match);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
......
......@@ -242,7 +242,7 @@ void RegExpMacroAssemblerPPC::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase(
int start_reg, bool read_backward, Label* on_no_match) {
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
__ LoadP(r3, register_location(start_reg), r0); // Index of start of capture
__ LoadP(r4, register_location(start_reg + 1), r0); // Index of end
......@@ -356,7 +356,10 @@ void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase(
{
AllowExternalCallThatCantCauseGC scope(masm_);
ExternalReference function =
ExternalReference::re_case_insensitive_compare_uc16(isolate());
unicode ? ExternalReference::re_case_insensitive_compare_unicode(
isolate())
: ExternalReference::re_case_insensitive_compare_non_unicode(
isolate());
__ CallCFunction(function, argument_count);
}
......
......@@ -36,7 +36,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerPPC
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
bool read_backward, bool unicode,
Label* on_no_match);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned mask,
......
......@@ -329,11 +329,13 @@ void RegExpBytecodeGenerator::CheckNotBackReference(int start_reg,
}
void RegExpBytecodeGenerator::CheckNotBackReferenceIgnoreCase(
int start_reg, bool read_backward, Label* on_not_equal) {
int start_reg, bool read_backward, bool unicode, Label* on_not_equal) {
DCHECK_LE(0, start_reg);
DCHECK_GE(kMaxRegister, start_reg);
Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD
: BC_CHECK_NOT_BACK_REF_NO_CASE,
Emit(read_backward ? (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD
: BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD)
: (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE
: BC_CHECK_NOT_BACK_REF_NO_CASE),
start_reg);
EmitOrLink(on_not_equal);
}
......
......@@ -69,6 +69,7 @@ class V8_EXPORT_PRIVATE RegExpBytecodeGenerator : public RegExpMacroAssembler {
void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match) override;
void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
bool unicode,
Label* on_no_match) override;
void IfRegisterLT(int register_index, int comparand, Label* if_lt) override;
void IfRegisterGE(int register_index, int comparand, Label* if_ge) override;
......
......@@ -102,12 +102,12 @@ STATIC_ASSERT(1 << BYTECODE_SHIFT > BYTECODE_MASK);
V(CHECK_BIT_IN_TABLE, 34, 24) /* bc8 pad24 addr32 bits128 */ \
V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 39, 8) /* UNUSED */ \
V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 39, 8) \
V(CHECK_NOT_BACK_REF_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 41, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD, 42, 8) /* UNUSED */ \
V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD, 42, 8) \
V(CHECK_NOT_REGS_EQUAL, 43, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
V(CHECK_REGISTER_LT, 44, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_GE, 45, 12) /* bc8 reg_idx24 value32 addr32 */ \
......
......@@ -3421,8 +3421,9 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
DCHECK_EQ(start_reg_ + 1, end_reg_);
if (IgnoreCase(flags_)) {
bool unicode = IsUnicode(flags_);
assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
trace->backtrack());
unicode, trace->backtrack());
} else {
assembler->CheckNotBackReference(start_reg_, read_backward(),
trace->backtrack());
......
......@@ -35,18 +35,23 @@ namespace internal {
namespace {
bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
Vector<const uc16> subject) {
Vector<const uc16> subject, bool unicode) {
Address offset_a =
reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
Address offset_b =
reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current)));
size_t length = len * kUC16Size;
return RegExpMacroAssembler::CaseInsensitiveCompareUC16(offset_a, offset_b,
length, isolate) == 1;
bool result = unicode
? RegExpMacroAssembler::CaseInsensitiveCompareUnicode(
offset_a, offset_b, length, isolate)
: RegExpMacroAssembler::CaseInsensitiveCompareNonUnicode(
offset_a, offset_b, length, isolate);
return result == 1;
}
bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
Vector<const uint8_t> subject) {
Vector<const uint8_t> subject, bool unicode) {
// For Latin1 characters the unicode flag makes no difference.
for (int i = 0; i < len; i++) {
unsigned int old_char = subject[from++];
......@@ -818,14 +823,26 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) {
UNREACHABLE(); // TODO(jgruber): Remove this unused bytecode.
int from = registers[LoadPacked24Unsigned(insn)];
int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
if (from >= 0 && len > 0) {
if (current + len > subject.length() ||
!BackRefMatchesNoCase(isolate, from, current, len, subject, true)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
DISPATCH();
}
ADVANCE_CURRENT_POSITION(len);
}
ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE);
DISPATCH();
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
int from = registers[LoadPacked24Unsigned(insn)];
int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
if (from >= 0 && len > 0) {
if (current + len > subject.length() ||
!BackRefMatchesNoCase(isolate, from, current, len, subject)) {
!BackRefMatchesNoCase(isolate, from, current, len, subject,
false)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
DISPATCH();
}
......@@ -835,14 +852,27 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD) {
UNREACHABLE(); // TODO(jgruber): Remove this unused bytecode.
int from = registers[LoadPacked24Unsigned(insn)];
int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
if (from >= 0 && len > 0) {
if (current - len < 0 ||
!BackRefMatchesNoCase(isolate, from, current - len, len, subject,
true)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
DISPATCH();
}
SET_CURRENT_POSITION(current - len);
}
ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD);
DISPATCH();
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
int from = registers[LoadPacked24Unsigned(insn)];
int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
if (from >= 0 && len > 0) {
if (current - len < 0 ||
!BackRefMatchesNoCase(isolate, from, current - len, len, subject)) {
!BackRefMatchesNoCase(isolate, from, current - len, len, subject,
false)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
DISPATCH();
}
......
......@@ -352,11 +352,11 @@ void RegExpMacroAssemblerTracer::CheckNotBackReference(int start_reg,
}
void RegExpMacroAssemblerTracer::CheckNotBackReferenceIgnoreCase(
int start_reg, bool read_backward, Label* on_no_match) {
PrintF(" CheckNotBackReferenceIgnoreCase(register=%d, %s, label[%08x]);\n",
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
PrintF(" CheckNotBackReferenceIgnoreCase(register=%d, %s %s, label[%08x]);\n",
start_reg, read_backward ? "backward" : "forward",
LabelToInt(on_no_match));
assembler_->CheckNotBackReferenceIgnoreCase(start_reg, read_backward,
unicode ? "unicode" : "non-unicode", LabelToInt(on_no_match));
assembler_->CheckNotBackReferenceIgnoreCase(start_reg, read_backward, unicode,
on_no_match);
}
......
......@@ -33,6 +33,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match) override;
void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
bool unicode,
Label* on_no_match) override;
void CheckNotCharacter(unsigned c, Label* on_not_equal) override;
void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with,
......
......@@ -9,6 +9,7 @@
#include "src/execution/pointer-authentication.h"
#include "src/execution/simulator.h"
#include "src/regexp/regexp-stack.h"
#include "src/regexp/special-case.h"
#include "src/strings/unicode-inl.h"
#ifdef V8_INTL_SUPPORT
......@@ -27,13 +28,42 @@ RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone)
RegExpMacroAssembler::~RegExpMacroAssembler() = default;
int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
Address byte_offset2,
size_t byte_length,
Isolate* isolate) {
int RegExpMacroAssembler::CaseInsensitiveCompareNonUnicode(Address byte_offset1,
Address byte_offset2,
size_t byte_length,
Isolate* isolate) {
#ifdef V8_INTL_SUPPORT
// This function is not allowed to cause a garbage collection.
// A GC might move the calling generated code and invalidate the
// return address on the stack.
DisallowHeapAllocation no_gc;
DCHECK_EQ(0, byte_length % 2);
size_t length = byte_length / 2;
uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
for (size_t i = 0; i < length; i++) {
UChar32 c1 = RegExpCaseFolding::Canonicalize(substring1[i]);
UChar32 c2 = RegExpCaseFolding::Canonicalize(substring2[i]);
if (c1 != c2) {
return 0;
}
}
return 1;
#else
return CaseInsensitiveCompareUnicode(byte_offset1, byte_offset2, byte_length,
isolate);
#endif
}
int RegExpMacroAssembler::CaseInsensitiveCompareUnicode(Address byte_offset1,
Address byte_offset2,
size_t byte_length,
Isolate* isolate) {
// This function is not allowed to cause a garbage collection.
// A GC might move the calling generated code and invalidate the
// return address on the stack.
DisallowHeapAllocation no_gc;
DCHECK_EQ(0, byte_length % 2);
#ifdef V8_INTL_SUPPORT
......@@ -68,7 +98,6 @@ int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
#endif // V8_INTL_SUPPORT
}
void RegExpMacroAssembler::CheckNotInSurrogatePair(int cp_offset,
Label* on_failure) {
Label ok;
......
......@@ -88,7 +88,7 @@ class RegExpMacroAssembler {
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match) = 0;
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
bool read_backward, bool unicode,
Label* on_no_match) = 0;
// Check the current character for a match with a literal character. If we
// fail to match then goto the on_failure label. End of input always
......@@ -165,11 +165,16 @@ class RegExpMacroAssembler {
virtual void ClearRegisters(int reg_from, int reg_to) = 0;
virtual void WriteStackPointerToRegister(int reg) = 0;
// Compares two-byte strings case insensitively.
// Compare two-byte strings case insensitively.
// Called from generated RegExp code.
static int CaseInsensitiveCompareUC16(Address byte_offset1,
Address byte_offset2,
size_t byte_length, Isolate* isolate);
static int CaseInsensitiveCompareNonUnicode(Address byte_offset1,
Address byte_offset2,
size_t byte_length,
Isolate* isolate);
static int CaseInsensitiveCompareUnicode(Address byte_offset1,
Address byte_offset2,
size_t byte_length,
Isolate* isolate);
// Check that we are not in the middle of a surrogate pair.
void CheckNotInSurrogatePair(int cp_offset, Label* on_failure);
......
......@@ -230,7 +230,7 @@ void RegExpMacroAssemblerS390::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerS390::CheckNotBackReferenceIgnoreCase(
int start_reg, bool read_backward, Label* on_no_match) {
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
__ LoadP(r2, register_location(start_reg)); // Index of start of
// capture
......@@ -346,7 +346,10 @@ void RegExpMacroAssemblerS390::CheckNotBackReferenceIgnoreCase(
{
AllowExternalCallThatCantCauseGC scope(masm_);
ExternalReference function =
ExternalReference::re_case_insensitive_compare_uc16(isolate());
unicode ? ExternalReference::re_case_insensitive_compare_unicode(
isolate())
: ExternalReference::re_case_insensitive_compare_non_unicode(
isolate());
__ CallCFunction(function, argument_count);
}
......
......@@ -36,7 +36,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerS390
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
bool read_backward, bool unicode,
Label* on_no_match);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned mask,
......
......@@ -215,7 +215,7 @@ void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
int start_reg, bool read_backward, Label* on_no_match) {
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
ReadPositionFromRegister(rdx, start_reg); // Offset of start of capture
ReadPositionFromRegister(rbx, start_reg + 1); // Offset of end of capture
......@@ -354,7 +354,10 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
// linter.
AllowExternalCallThatCantCauseGC scope(&masm_);
ExternalReference compare =
ExternalReference::re_case_insensitive_compare_uc16(isolate());
unicode ? ExternalReference::re_case_insensitive_compare_unicode(
isolate())
: ExternalReference::re_case_insensitive_compare_non_unicode(
isolate());
__ CallCFunction(compare, num_arguments);
}
......
......@@ -37,6 +37,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerX64
void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match) override;
void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
bool unicode,
Label* on_no_match) override;
void CheckNotCharacter(uint32_t c, Label* on_not_equal) override;
void CheckNotCharacterAfterAnd(uint32_t c, uint32_t mask,
......
......@@ -1021,16 +1021,16 @@ TEST(MacroAssemblerNativeBackRefNoCase) {
m.WriteCurrentPositionToRegister(2, 0);
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(3, 0);
m.CheckNotBackReferenceIgnoreCase(2, false, &fail); // Match "AbC".
m.CheckNotBackReferenceIgnoreCase(2, false, &fail); // Match "ABC".
m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail); // Match "AbC".
m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail); // Match "ABC".
Label expected_fail;
m.CheckNotBackReferenceIgnoreCase(2, false, &expected_fail);
m.CheckNotBackReferenceIgnoreCase(2, false, false, &expected_fail);
m.BindJumpTarget(&fail);
m.Fail();
m.Bind(&expected_fail);
m.AdvanceCurrentPosition(3); // Skip "xYz"
m.CheckNotBackReferenceIgnoreCase(2, false, &succ);
m.CheckNotBackReferenceIgnoreCase(2, false, false, &succ);
m.Fail();
m.Bind(&succ);
......
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
function assertEquals(a,b) { if (a !== b) print("BLAH"); }
// See https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch
function Canonicalize(ch) {
var u = ch.toUpperCase();
if (u.length > 1) return ch;
var cu = u.charCodeAt(0);
if (ch.charCodeAt(0) >= 128 && cu < 128) return ch;
return cu;
}
function TestEquivalenceClass(eclass) {
var backref = /(.)\1/i;
var backrefUnicode = /(.)\1/iu;
for (var i = 0; i < eclass.length; i++) {
for (var j = 0; j < eclass.length; j++) {
if (i == j) continue;
var c1 = eclass[i];
var c2 = eclass[j];
var cc = c1 + c2;
var shouldMatch = Canonicalize(c1) === Canonicalize(c2);
assertEquals(backref.test(cc), shouldMatch);
assertEquals(backrefUnicode.test(cc), true);
}
}
}
function TestAll() {
for (var eclass of equivalence_classes) {
TestEquivalenceClass(eclass);
}
}
// Interesting case-folding equivalence classes (as determined by
// ICU's UnicodeSet::closeOver). A class is interesting if it contains
// more than two characters, or if it contains any characters in
// IgnoreSet or SpecialAddSet as defined in src/regexp/special-case.h.
var equivalence_classes = [
'\u0041\u0061', // Aa (sanity check)
'\u004b\u006b\u212a', // KkK
'\u0053\u0073\u017f', // Ssſ
'\u00b5\u039c\u03bc', // µΜμ
'\u00c5\u00e5\u212b', // ÅåÅ
'\u00df\u1e9e', // ßẞ
'\u03a9\u03c9\u2126', // ΩωΩ
'\u0390\u1fd3', // ΐΐ
'\u0398\u03b8\u03d1\u03f4', // Θθϑϴ
'\u03b0\u1fe3', // ΰΰ
'\u1f80\u1f88', // ᾀᾈ
'\u1fb3\u1fbc', // ᾳᾼ
'\u1fc3\u1fcc', // ῃῌ
'\u1ff3\u1ffc', // ῳῼ
'\ufb05\ufb06', // ſtst
// Everything below this line is a well-behaved case-folding
// equivalence class with more than two characters but only one
// canonical case-folded character
'\u01c4\u01c5\u01c6', '\u01c7\u01c8\u01c9', '\u01ca\u01cb\u01cc',
'\u01f1\u01f2\u01f3', '\u0345\u0399\u03b9\u1fbe', '\u0392\u03b2\u03d0',
'\u0395\u03b5\u03f5', '\u039a\u03ba\u03f0', '\u03a0\u03c0\u03d6',
'\u03a1\u03c1\u03f1', '\u03a3\u03c2\u03c3', '\u03a6\u03c6\u03d5',
'\u0412\u0432\u1c80', '\u0414\u0434\u1c81', '\u041e\u043e\u1c82',
'\u0421\u0441\u1c83', '\u0422\u0442\u1c84\u1c85', '\u042a\u044a\u1c86',
'\u0462\u0463\u1c87', '\u1c88\ua64a\ua64b', '\u1e60\u1e61\u1e9b'
];
TestAll();
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment