Commit 841d33a5 authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC/s390: [regexp] Compact codegen for large character classes

Port 8bbb44e5

Original Commit Message:

    Large character classes may easily be created when unicode
    properties (e.g.: /\p{L}/u and /\P{L}/u) are used - these are
    expanded internally into character classes that consist of hundreds
    of character ranges. Previously to this CL, we'd emit branching code
    for each of these ranges, leading to very large regexp code objects.

    This CL adds a new codegen mode for large character classes (where
    'large' currently means > 16 ranges). Instead of emitting branching
    code inline, the ranges are written into a ByteArray and we call into
    the C function IsCharacterInRangeArray for the actual branching logic.
    The ByteArray is smaller than emitted code and is deduplicated if the
    same character class is matched repeatedly in the same pattern.

    Note this mode is *not* implemented for the interpreter, since we
    currently don't have a constant pool for irregexp bytecode, and thus
    cannot reference ByteArrays.

R=jgruber@chromium.org, joransiu@ca.ibm.com, junyan@redhat.com, midawson@redhat.com
BUG=
LOG=N

Change-Id: I2ded01fa2767e56e72be81b949eefb5fb85b7013
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3231981Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#77473}
parent e127f584
......@@ -512,6 +512,41 @@ void RegExpMacroAssemblerPPC::CheckCharacterNotInRange(base::uc16 from,
BranchOrBacktrack(gt, on_not_in_range); // Unsigned higher condition.
}
void RegExpMacroAssemblerPPC::CallIsCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges) {
static const int kNumArguments = 3;
__ PrepareCallCFunction(kNumArguments, r0);
__ mr(r3, current_character());
__ mov(r4, Operand(GetOrAddRangeArray(ranges)));
__ mov(r5, Operand(ExternalReference::isolate_address(isolate())));
{
// We have a frame (set up in GetCode), but the assembler doesn't know.
FrameScope scope(masm_.get(), StackFrame::MANUAL);
__ CallCFunction(ExternalReference::re_is_character_in_range_array(),
kNumArguments);
}
__ mov(code_pointer(), Operand(masm_->CodeObject()));
}
bool RegExpMacroAssemblerPPC::CheckCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_in_range) {
CallIsCharacterInRangeArray(ranges);
__ cmpi(r3, Operand::Zero());
BranchOrBacktrack(ne, on_in_range);
return true;
}
bool RegExpMacroAssemblerPPC::CheckCharacterNotInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) {
CallIsCharacterInRangeArray(ranges);
__ cmpi(r3, Operand::Zero());
BranchOrBacktrack(eq, on_not_in_range);
return true;
}
void RegExpMacroAssemblerPPC::CheckBitInTable(Handle<ByteArray> table,
Label* on_bit_set) {
__ mov(r3, Operand(table));
......
......@@ -48,6 +48,10 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerPPC
Label* on_in_range) override;
void CheckCharacterNotInRange(base::uc16 from, base::uc16 to,
Label* on_not_in_range) override;
bool CheckCharacterInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_in_range) override;
bool CheckCharacterNotInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_not_in_range) override;
void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) override;
// Checks whether the given offset from the current position is before
......@@ -130,9 +134,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerPPC
// Check whether we are exceeding the stack limit on the backtrack stack.
void CheckStackLimit();
// Generate a call to CheckStackGuardState.
void CallCheckStackGuardState(Register scratch);
void CallIsCharacterInRangeArray(const ZoneList<CharacterRange>* ranges);
// The ebp-relative location of a regexp register.
MemOperand register_location(int register_index);
......
......@@ -490,6 +490,41 @@ void RegExpMacroAssemblerS390::CheckCharacterNotInRange(
BranchOrBacktrack(gt, on_not_in_range); // Unsigned higher condition.
}
void RegExpMacroAssemblerS390::CallIsCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges) {
static const int kNumArguments = 3;
__ PrepareCallCFunction(kNumArguments, r0);
__ mov(r2, current_character());
__ mov(r3, Operand(GetOrAddRangeArray(ranges)));
__ mov(r4, Operand(ExternalReference::isolate_address(isolate())));
{
// We have a frame (set up in GetCode), but the assembler doesn't know.
FrameScope scope(masm_.get(), StackFrame::MANUAL);
__ CallCFunction(ExternalReference::re_is_character_in_range_array(),
kNumArguments);
}
__ mov(code_pointer(), Operand(masm_->CodeObject()));
}
bool RegExpMacroAssemblerS390::CheckCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_in_range) {
CallIsCharacterInRangeArray(ranges);
__ CmpS64(r2, Operand::Zero());
BranchOrBacktrack(ne, on_in_range);
return true;
}
bool RegExpMacroAssemblerS390::CheckCharacterNotInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) {
CallIsCharacterInRangeArray(ranges);
__ CmpS64(r2, Operand::Zero());
BranchOrBacktrack(eq, on_not_in_range);
return true;
}
void RegExpMacroAssemblerS390::CheckBitInTable(Handle<ByteArray> table,
Label* on_bit_set) {
__ mov(r2, Operand(table));
......
......@@ -48,6 +48,10 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerS390
Label* on_in_range) override;
void CheckCharacterNotInRange(base::uc16 from, base::uc16 to,
Label* on_not_in_range) override;
bool CheckCharacterInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_in_range) override;
bool CheckCharacterNotInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_not_in_range) override;
void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) override;
// Checks whether the given offset from the current position is before
......@@ -130,8 +134,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerS390
void CheckStackLimit();
void CallCFunctionUsingStub(ExternalReference function, int num_arguments);
// Generate a call to CheckStackGuardState.
void CallCheckStackGuardState(Register scratch);
void CallIsCharacterInRangeArray(const ZoneList<CharacterRange>* ranges);
// The ebp-relative location of a regexp register.
MemOperand register_location(int register_index);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment