Commit 5638b7db authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC/s390: [regexp] Add dedicated enums for standard character sets

Port b4aa41d0

Original Commit Message:

    .. instead of referring to them through magic chars {s,S,w,W,d,D,n,.,*}.

R=jgruber@chromium.org, joransiu@ca.ibm.com, junyan@redhat.com, midawson@redhat.com
BUG=
LOG=N

Change-Id: Id1543bee0fe676876d1d7c7e49d3f4742c9959d9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3216038Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#77365}
parent e7ef566f
......@@ -527,19 +527,20 @@ void RegExpMacroAssemblerPPC::CheckBitInTable(Handle<ByteArray> table,
BranchOrBacktrack(ne, on_bit_set);
}
bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type,
Label* on_no_match) {
bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(
StandardCharacterSet type, Label* on_no_match) {
// Range checks (c in min..max) are generally implemented by an unsigned
// (c - min) <= (max - min) check
// TODO(jgruber): No custom implementation (yet): s(UC16), S(UC16).
switch (type) {
case 's':
// Match space-characters
case StandardCharacterSet::kWhitespace:
// Match space-characters.
if (mode_ == LATIN1) {
// One byte space characters are '\t'..'\r', ' ' and \u00a0.
Label success;
__ cmpi(current_character(), Operand(' '));
__ beq(&success);
// Check range 0x09..0x0D
// Check range 0x09..0x0D.
__ subi(r3, current_character(), Operand('\t'));
__ cmpli(r3, Operand('\r' - '\t'));
__ ble(&success);
......@@ -550,22 +551,22 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type,
return true;
}
return false;
case 'S':
case StandardCharacterSet::kNotWhitespace:
// The emitted code for generic character classes is good enough.
return false;
case 'd':
case StandardCharacterSet::kDigit:
// Match ASCII digits ('0'..'9')
__ subi(r3, current_character(), Operand('0'));
__ cmpli(r3, Operand('9' - '0'));
BranchOrBacktrack(gt, on_no_match);
return true;
case 'D':
case StandardCharacterSet::kNotDigit:
// Match non ASCII-digits
__ subi(r3, current_character(), Operand('0'));
__ cmpli(r3, Operand('9' - '0'));
BranchOrBacktrack(le, on_no_match);
return true;
case '.': {
case StandardCharacterSet::kNotLineTerminator: {
// Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ xori(r3, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
......@@ -582,7 +583,7 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type,
}
return true;
}
case 'n': {
case StandardCharacterSet::kLineTerminator: {
// Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ xori(r3, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
......@@ -603,7 +604,7 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type,
}
return true;
}
case 'w': {
case StandardCharacterSet::kWord: {
if (mode_ != LATIN1) {
// Table is 256 entries, so all Latin1 characters can be tested.
__ cmpi(current_character(), Operand('z'));
......@@ -616,7 +617,7 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type,
BranchOrBacktrack(eq, on_no_match);
return true;
}
case 'W': {
case StandardCharacterSet::kNotWord: {
Label done;
if (mode_ != LATIN1) {
// Table is 256 entries, so all Latin1 characters can be tested.
......@@ -633,12 +634,9 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type,
}
return true;
}
case '*':
case StandardCharacterSet::kEverything:
// Match any character.
return true;
// No custom implementation (yet): s(UC16), S(UC16).
default:
return false;
}
}
......
......@@ -53,7 +53,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerPPC
// Checks whether the given offset from the current position is before
// the end of the string.
void CheckPosition(int cp_offset, Label* on_outside_input) override;
bool CheckSpecialCharacterClass(base::uc16 type, Label* on_no_match) override;
bool CheckSpecialCharacterClass(StandardCharacterSet type,
Label* on_no_match) override;
void Fail() override;
Handle<HeapObject> GetCode(Handle<String> source) override;
void GoTo(Label* label) override;
......
......@@ -504,19 +504,20 @@ void RegExpMacroAssemblerS390::CheckBitInTable(Handle<ByteArray> table,
BranchOrBacktrack(ne, on_bit_set);
}
bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type,
Label* on_no_match) {
bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(
StandardCharacterSet type, Label* on_no_match) {
// Range checks (c in min..max) are generally implemented by an unsigned
// (c - min) <= (max - min) check
// TODO(jgruber): No custom implementation (yet): s(UC16), S(UC16).
switch (type) {
case 's':
// Match space-characters
case StandardCharacterSet::kWhitespace:
// Match space-characters.
if (mode_ == LATIN1) {
// One byte space characters are '\t'..'\r', ' ' and \u00a0.
Label success;
__ CmpS64(current_character(), Operand(' '));
__ beq(&success);
// Check range 0x09..0x0D
// Check range 0x09..0x0D.
__ SubS64(r2, current_character(), Operand('\t'));
__ CmpU64(r2, Operand('\r' - '\t'));
__ ble(&success);
......@@ -527,22 +528,22 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type,
return true;
}
return false;
case 'S':
case StandardCharacterSet::kNotWhitespace:
// The emitted code for generic character classes is good enough.
return false;
case 'd':
case StandardCharacterSet::kDigit:
// Match ASCII digits ('0'..'9')
__ SubS64(r2, current_character(), Operand('0'));
__ CmpU64(r2, Operand('9' - '0'));
BranchOrBacktrack(gt, on_no_match);
return true;
case 'D':
case StandardCharacterSet::kNotDigit:
// Match non ASCII-digits
__ SubS64(r2, current_character(), Operand('0'));
__ CmpU64(r2, Operand('9' - '0'));
BranchOrBacktrack(le, on_no_match);
return true;
case '.': {
case StandardCharacterSet::kNotLineTerminator: {
// Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ XorP(r2, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
......@@ -559,7 +560,7 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type,
}
return true;
}
case 'n': {
case StandardCharacterSet::kLineTerminator: {
// Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ XorP(r2, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
......@@ -580,7 +581,7 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type,
}
return true;
}
case 'w': {
case StandardCharacterSet::kWord: {
if (mode_ != LATIN1) {
// Table is 1256 entries, so all LATIN1 characters can be tested.
__ CmpS64(current_character(), Operand('z'));
......@@ -593,7 +594,7 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type,
BranchOrBacktrack(eq, on_no_match);
return true;
}
case 'W': {
case StandardCharacterSet::kNotWord: {
Label done;
if (mode_ != LATIN1) {
// Table is 256 entries, so all LATIN characters can be tested.
......@@ -610,12 +611,9 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type,
}
return true;
}
case '*':
case StandardCharacterSet::kEverything:
// Match any character.
return true;
// No custom implementation (yet): s(UC16), S(UC16).
default:
return false;
}
}
......
......@@ -53,7 +53,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerS390
// Checks whether the given offset from the current position is before
// the end of the string.
void CheckPosition(int cp_offset, Label* on_outside_input) override;
bool CheckSpecialCharacterClass(base::uc16 type, Label* on_no_match) override;
bool CheckSpecialCharacterClass(StandardCharacterSet type,
Label* on_no_match) override;
void Fail() override;
Handle<HeapObject> GetCode(Handle<String> source) override;
void GoTo(Label* label) override;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment