Commit 5638b7db authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC/s390: [regexp] Add dedicated enums for standard character sets

Port b4aa41d0

Original Commit Message:

    .. instead of referring to them through magic chars {s,S,w,W,d,D,n,.,*}.

R=jgruber@chromium.org, joransiu@ca.ibm.com, junyan@redhat.com, midawson@redhat.com
BUG=
LOG=N

Change-Id: Id1543bee0fe676876d1d7c7e49d3f4742c9959d9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3216038Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#77365}
parent e7ef566f
...@@ -527,19 +527,20 @@ void RegExpMacroAssemblerPPC::CheckBitInTable(Handle<ByteArray> table, ...@@ -527,19 +527,20 @@ void RegExpMacroAssemblerPPC::CheckBitInTable(Handle<ByteArray> table,
BranchOrBacktrack(ne, on_bit_set); BranchOrBacktrack(ne, on_bit_set);
} }
bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type, bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(
Label* on_no_match) { StandardCharacterSet type, Label* on_no_match) {
// Range checks (c in min..max) are generally implemented by an unsigned // Range checks (c in min..max) are generally implemented by an unsigned
// (c - min) <= (max - min) check // (c - min) <= (max - min) check
// TODO(jgruber): No custom implementation (yet): s(UC16), S(UC16).
switch (type) { switch (type) {
case 's': case StandardCharacterSet::kWhitespace:
// Match space-characters // Match space-characters.
if (mode_ == LATIN1) { if (mode_ == LATIN1) {
// One byte space characters are '\t'..'\r', ' ' and \u00a0. // One byte space characters are '\t'..'\r', ' ' and \u00a0.
Label success; Label success;
__ cmpi(current_character(), Operand(' ')); __ cmpi(current_character(), Operand(' '));
__ beq(&success); __ beq(&success);
// Check range 0x09..0x0D // Check range 0x09..0x0D.
__ subi(r3, current_character(), Operand('\t')); __ subi(r3, current_character(), Operand('\t'));
__ cmpli(r3, Operand('\r' - '\t')); __ cmpli(r3, Operand('\r' - '\t'));
__ ble(&success); __ ble(&success);
...@@ -550,22 +551,22 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type, ...@@ -550,22 +551,22 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type,
return true; return true;
} }
return false; return false;
case 'S': case StandardCharacterSet::kNotWhitespace:
// The emitted code for generic character classes is good enough. // The emitted code for generic character classes is good enough.
return false; return false;
case 'd': case StandardCharacterSet::kDigit:
// Match ASCII digits ('0'..'9') // Match ASCII digits ('0'..'9')
__ subi(r3, current_character(), Operand('0')); __ subi(r3, current_character(), Operand('0'));
__ cmpli(r3, Operand('9' - '0')); __ cmpli(r3, Operand('9' - '0'));
BranchOrBacktrack(gt, on_no_match); BranchOrBacktrack(gt, on_no_match);
return true; return true;
case 'D': case StandardCharacterSet::kNotDigit:
// Match non ASCII-digits // Match non ASCII-digits
__ subi(r3, current_character(), Operand('0')); __ subi(r3, current_character(), Operand('0'));
__ cmpli(r3, Operand('9' - '0')); __ cmpli(r3, Operand('9' - '0'));
BranchOrBacktrack(le, on_no_match); BranchOrBacktrack(le, on_no_match);
return true; return true;
case '.': { case StandardCharacterSet::kNotLineTerminator: {
// Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ xori(r3, current_character(), Operand(0x01)); __ xori(r3, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
...@@ -582,7 +583,7 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type, ...@@ -582,7 +583,7 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type,
} }
return true; return true;
} }
case 'n': { case StandardCharacterSet::kLineTerminator: {
// Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ xori(r3, current_character(), Operand(0x01)); __ xori(r3, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
...@@ -603,7 +604,7 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type, ...@@ -603,7 +604,7 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type,
} }
return true; return true;
} }
case 'w': { case StandardCharacterSet::kWord: {
if (mode_ != LATIN1) { if (mode_ != LATIN1) {
// Table is 256 entries, so all Latin1 characters can be tested. // Table is 256 entries, so all Latin1 characters can be tested.
__ cmpi(current_character(), Operand('z')); __ cmpi(current_character(), Operand('z'));
...@@ -616,7 +617,7 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type, ...@@ -616,7 +617,7 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type,
BranchOrBacktrack(eq, on_no_match); BranchOrBacktrack(eq, on_no_match);
return true; return true;
} }
case 'W': { case StandardCharacterSet::kNotWord: {
Label done; Label done;
if (mode_ != LATIN1) { if (mode_ != LATIN1) {
// Table is 256 entries, so all Latin1 characters can be tested. // Table is 256 entries, so all Latin1 characters can be tested.
...@@ -633,12 +634,9 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type, ...@@ -633,12 +634,9 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(base::uc16 type,
} }
return true; return true;
} }
case '*': case StandardCharacterSet::kEverything:
// Match any character. // Match any character.
return true; return true;
// No custom implementation (yet): s(UC16), S(UC16).
default:
return false;
} }
} }
......
...@@ -53,7 +53,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerPPC ...@@ -53,7 +53,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerPPC
// Checks whether the given offset from the current position is before // Checks whether the given offset from the current position is before
// the end of the string. // the end of the string.
void CheckPosition(int cp_offset, Label* on_outside_input) override; void CheckPosition(int cp_offset, Label* on_outside_input) override;
bool CheckSpecialCharacterClass(base::uc16 type, Label* on_no_match) override; bool CheckSpecialCharacterClass(StandardCharacterSet type,
Label* on_no_match) override;
void Fail() override; void Fail() override;
Handle<HeapObject> GetCode(Handle<String> source) override; Handle<HeapObject> GetCode(Handle<String> source) override;
void GoTo(Label* label) override; void GoTo(Label* label) override;
......
...@@ -504,19 +504,20 @@ void RegExpMacroAssemblerS390::CheckBitInTable(Handle<ByteArray> table, ...@@ -504,19 +504,20 @@ void RegExpMacroAssemblerS390::CheckBitInTable(Handle<ByteArray> table,
BranchOrBacktrack(ne, on_bit_set); BranchOrBacktrack(ne, on_bit_set);
} }
bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type, bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(
Label* on_no_match) { StandardCharacterSet type, Label* on_no_match) {
// Range checks (c in min..max) are generally implemented by an unsigned // Range checks (c in min..max) are generally implemented by an unsigned
// (c - min) <= (max - min) check // (c - min) <= (max - min) check
// TODO(jgruber): No custom implementation (yet): s(UC16), S(UC16).
switch (type) { switch (type) {
case 's': case StandardCharacterSet::kWhitespace:
// Match space-characters // Match space-characters.
if (mode_ == LATIN1) { if (mode_ == LATIN1) {
// One byte space characters are '\t'..'\r', ' ' and \u00a0. // One byte space characters are '\t'..'\r', ' ' and \u00a0.
Label success; Label success;
__ CmpS64(current_character(), Operand(' ')); __ CmpS64(current_character(), Operand(' '));
__ beq(&success); __ beq(&success);
// Check range 0x09..0x0D // Check range 0x09..0x0D.
__ SubS64(r2, current_character(), Operand('\t')); __ SubS64(r2, current_character(), Operand('\t'));
__ CmpU64(r2, Operand('\r' - '\t')); __ CmpU64(r2, Operand('\r' - '\t'));
__ ble(&success); __ ble(&success);
...@@ -527,22 +528,22 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type, ...@@ -527,22 +528,22 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type,
return true; return true;
} }
return false; return false;
case 'S': case StandardCharacterSet::kNotWhitespace:
// The emitted code for generic character classes is good enough. // The emitted code for generic character classes is good enough.
return false; return false;
case 'd': case StandardCharacterSet::kDigit:
// Match ASCII digits ('0'..'9') // Match ASCII digits ('0'..'9')
__ SubS64(r2, current_character(), Operand('0')); __ SubS64(r2, current_character(), Operand('0'));
__ CmpU64(r2, Operand('9' - '0')); __ CmpU64(r2, Operand('9' - '0'));
BranchOrBacktrack(gt, on_no_match); BranchOrBacktrack(gt, on_no_match);
return true; return true;
case 'D': case StandardCharacterSet::kNotDigit:
// Match non ASCII-digits // Match non ASCII-digits
__ SubS64(r2, current_character(), Operand('0')); __ SubS64(r2, current_character(), Operand('0'));
__ CmpU64(r2, Operand('9' - '0')); __ CmpU64(r2, Operand('9' - '0'));
BranchOrBacktrack(le, on_no_match); BranchOrBacktrack(le, on_no_match);
return true; return true;
case '.': { case StandardCharacterSet::kNotLineTerminator: {
// Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ XorP(r2, current_character(), Operand(0x01)); __ XorP(r2, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
...@@ -559,7 +560,7 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type, ...@@ -559,7 +560,7 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type,
} }
return true; return true;
} }
case 'n': { case StandardCharacterSet::kLineTerminator: {
// Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ XorP(r2, current_character(), Operand(0x01)); __ XorP(r2, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
...@@ -580,7 +581,7 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type, ...@@ -580,7 +581,7 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type,
} }
return true; return true;
} }
case 'w': { case StandardCharacterSet::kWord: {
if (mode_ != LATIN1) { if (mode_ != LATIN1) {
// Table is 1256 entries, so all LATIN1 characters can be tested. // Table is 1256 entries, so all LATIN1 characters can be tested.
__ CmpS64(current_character(), Operand('z')); __ CmpS64(current_character(), Operand('z'));
...@@ -593,7 +594,7 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type, ...@@ -593,7 +594,7 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type,
BranchOrBacktrack(eq, on_no_match); BranchOrBacktrack(eq, on_no_match);
return true; return true;
} }
case 'W': { case StandardCharacterSet::kNotWord: {
Label done; Label done;
if (mode_ != LATIN1) { if (mode_ != LATIN1) {
// Table is 256 entries, so all LATIN characters can be tested. // Table is 256 entries, so all LATIN characters can be tested.
...@@ -610,12 +611,9 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type, ...@@ -610,12 +611,9 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(base::uc16 type,
} }
return true; return true;
} }
case '*': case StandardCharacterSet::kEverything:
// Match any character. // Match any character.
return true; return true;
// No custom implementation (yet): s(UC16), S(UC16).
default:
return false;
} }
} }
......
...@@ -53,7 +53,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerS390 ...@@ -53,7 +53,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerS390
// Checks whether the given offset from the current position is before // Checks whether the given offset from the current position is before
// the end of the string. // the end of the string.
void CheckPosition(int cp_offset, Label* on_outside_input) override; void CheckPosition(int cp_offset, Label* on_outside_input) override;
bool CheckSpecialCharacterClass(base::uc16 type, Label* on_no_match) override; bool CheckSpecialCharacterClass(StandardCharacterSet type,
Label* on_no_match) override;
void Fail() override; void Fail() override;
Handle<HeapObject> GetCode(Handle<String> source) override; Handle<HeapObject> GetCode(Handle<String> source) override;
void GoTo(Label* label) override; void GoTo(Label* label) override;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment