[regexp] Add dedicated enums for standard character sets

.. instead of referring to them through magic chars {s,S,w,W,d,D,n,.,*}. Change-Id: Ib50937a2a7d4229a021377586a54be3db9ed8c1d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3217196 Commit-Queue: Jakob Gruber <jgruber@chromium.org> Reviewed-by: Patrick Thier <pthier@chromium.org> Cr-Commit-Position: refs/heads/main@{#77337}

[regexp] Add dedicated enums for standard character sets
.. instead of referring to them through magic chars {s,S,w,W,d,D,n,.,*}. Change-Id: Ib50937a2a7d4229a021377586a54be3db9ed8c1d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3217196 Commit-Queue: Jakob Gruber <jgruber@chromium.org> Reviewed-by: Patrick Thier <pthier@chromium.org> Cr-Commit-Position: refs/heads/main@{#77337}
b4aa41d0 · Jakob Gruber · V8 LUCI CQ · ad89fd9f · b4aa41d0 · b4aa41d0
Commit b4aa41d0 authored Oct 12, 2021 by Jakob Gruber Committed by V8 LUCI CQ Oct 12, 2021
16 changed files
--- a/src/regexp/arm/regexp-macro-assembler-arm.cc
+++ b/src/regexp/arm/regexp-macro-assembler-arm.cc
@@ -494,118 +494,116 @@ void RegExpMacroAssemblerARM::CheckBitInTable(
  BranchOrBacktrack(ne, on_bit_set);
 }

-bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(base::uc16 type,
-                                                         Label* on_no_match) {
+bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(
+    StandardCharacterSet type, Label* on_no_match) {
  // Range checks (c in min..max) are generally implemented by an unsigned
  // (c - min) <= (max - min) check
+  // TODO(jgruber): No custom implementation (yet): s(UC16), S(UC16).
  switch (type) {
-  case 's':
-    // Match space-characters
-    if (mode_ == LATIN1) {
-      // One byte space characters are '\t'..'\r', ' ' and \u00a0.
-      Label success;
-      __ cmp(current_character(), Operand(' '));
-      __ b(eq, &success);
-      // Check range 0x09..0x0D
-      __ sub(r0, current_character(), Operand('\t'));
-      __ cmp(r0, Operand('\r' - '\t'));
-      __ b(ls, &success);
-      // \u00a0 (NBSP).
-      __ cmp(r0, Operand(0x00A0 - '\t'));
-      BranchOrBacktrack(ne, on_no_match);
-      __ bind(&success);
+    case StandardCharacterSet::kWhitespace:
+      // Match space-characters.
+      if (mode_ == LATIN1) {
+        // One byte space characters are '\t'..'\r', ' ' and \u00a0.
+        Label success;
+        __ cmp(current_character(), Operand(' '));
+        __ b(eq, &success);
+        // Check range 0x09..0x0D.
+        __ sub(r0, current_character(), Operand('\t'));
+        __ cmp(r0, Operand('\r' - '\t'));
+        __ b(ls, &success);
+        // \u00a0 (NBSP).
+        __ cmp(r0, Operand(0x00A0 - '\t'));
+        BranchOrBacktrack(ne, on_no_match);
+        __ bind(&success);
+        return true;
+      }
+      return false;
+    case StandardCharacterSet::kNotWhitespace:
+      // The emitted code for generic character classes is good enough.
+      return false;
+    case StandardCharacterSet::kDigit:
+      // Match ASCII digits ('0'..'9')
+      __ sub(r0, current_character(), Operand('0'));
+      __ cmp(r0, Operand('9' - '0'));
+      BranchOrBacktrack(hi, on_no_match);
      return true;
-    }
-    return false;
-  case 'S':
-    // The emitted code for generic character classes is good enough.
-    return false;
-  case 'd':
-    // Match ASCII digits ('0'..'9')
-    __ sub(r0, current_character(), Operand('0'));
-    __ cmp(r0, Operand('9' - '0'));
-    BranchOrBacktrack(hi, on_no_match);
-    return true;
-  case 'D':
-    // Match non ASCII-digits
-    __ sub(r0, current_character(), Operand('0'));
-    __ cmp(r0, Operand('9' - '0'));
-    BranchOrBacktrack(ls, on_no_match);
-    return true;
-  case '.': {
-    // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
-    __ eor(r0, current_character(), Operand(0x01));
-    // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
-    __ sub(r0, r0, Operand(0x0B));
-    __ cmp(r0, Operand(0x0C - 0x0B));
-    BranchOrBacktrack(ls, on_no_match);
-    if (mode_ == UC16) {
-      // Compare original value to 0x2028 and 0x2029, using the already
-      // computed (current_char ^ 0x01 - 0x0B). I.e., check for
-      // 0x201D (0x2028 - 0x0B) or 0x201E.
-      __ sub(r0, r0, Operand(0x2028 - 0x0B));
-      __ cmp(r0, Operand(1));
+    case StandardCharacterSet::kNotDigit:
+      // Match non ASCII-digits
+      __ sub(r0, current_character(), Operand('0'));
+      __ cmp(r0, Operand('9' - '0'));
      BranchOrBacktrack(ls, on_no_match);
+      return true;
+    case StandardCharacterSet::kNotLineTerminator: {
+      // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
+      __ eor(r0, current_character(), Operand(0x01));
+      // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+      __ sub(r0, r0, Operand(0x0B));
+      __ cmp(r0, Operand(0x0C - 0x0B));
+      BranchOrBacktrack(ls, on_no_match);
+      if (mode_ == UC16) {
+        // Compare original value to 0x2028 and 0x2029, using the already
+        // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+        // 0x201D (0x2028 - 0x0B) or 0x201E.
+        __ sub(r0, r0, Operand(0x2028 - 0x0B));
+        __ cmp(r0, Operand(1));
+        BranchOrBacktrack(ls, on_no_match);
+      }
+      return true;
    }
-    return true;
-  }
-  case 'n': {
-    // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
-    __ eor(r0, current_character(), Operand(0x01));
-    // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
-    __ sub(r0, r0, Operand(0x0B));
-    __ cmp(r0, Operand(0x0C - 0x0B));
-    if (mode_ == LATIN1) {
-      BranchOrBacktrack(hi, on_no_match);
-    } else {
-      Label done;
-      __ b(ls, &done);
-      // Compare original value to 0x2028 and 0x2029, using the already
-      // computed (current_char ^ 0x01 - 0x0B). I.e., check for
-      // 0x201D (0x2028 - 0x0B) or 0x201E.
-      __ sub(r0, r0, Operand(0x2028 - 0x0B));
-      __ cmp(r0, Operand(1));
-      BranchOrBacktrack(hi, on_no_match);
-      __ bind(&done);
-    }
-    return true;
-  }
-  case 'w': {
-    if (mode_ != LATIN1) {
-      // Table is 256 entries, so all Latin1 characters can be tested.
-      __ cmp(current_character(), Operand('z'));
-      BranchOrBacktrack(hi, on_no_match);
+    case StandardCharacterSet::kLineTerminator: {
+      // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
+      __ eor(r0, current_character(), Operand(0x01));
+      // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+      __ sub(r0, r0, Operand(0x0B));
+      __ cmp(r0, Operand(0x0C - 0x0B));
+      if (mode_ == LATIN1) {
+        BranchOrBacktrack(hi, on_no_match);
+      } else {
+        Label done;
+        __ b(ls, &done);
+        // Compare original value to 0x2028 and 0x2029, using the already
+        // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+        // 0x201D (0x2028 - 0x0B) or 0x201E.
+        __ sub(r0, r0, Operand(0x2028 - 0x0B));
+        __ cmp(r0, Operand(1));
+        BranchOrBacktrack(hi, on_no_match);
+        __ bind(&done);
+      }
+      return true;
    }
-    ExternalReference map = ExternalReference::re_word_character_map();
-    __ mov(r0, Operand(map));
-    __ ldrb(r0, MemOperand(r0, current_character()));
-    __ cmp(r0, Operand::Zero());
-    BranchOrBacktrack(eq, on_no_match);
-    return true;
-  }
-  case 'W': {
-    Label done;
-    if (mode_ != LATIN1) {
-      // Table is 256 entries, so all Latin1 characters can be tested.
-      __ cmp(current_character(), Operand('z'));
-      __ b(hi, &done);
+    case StandardCharacterSet::kWord: {
+      if (mode_ != LATIN1) {
+        // Table is 256 entries, so all Latin1 characters can be tested.
+        __ cmp(current_character(), Operand('z'));
+        BranchOrBacktrack(hi, on_no_match);
+      }
+      ExternalReference map = ExternalReference::re_word_character_map();
+      __ mov(r0, Operand(map));
+      __ ldrb(r0, MemOperand(r0, current_character()));
+      __ cmp(r0, Operand::Zero());
+      BranchOrBacktrack(eq, on_no_match);
+      return true;
    }
-    ExternalReference map = ExternalReference::re_word_character_map();
-    __ mov(r0, Operand(map));
-    __ ldrb(r0, MemOperand(r0, current_character()));
-    __ cmp(r0, Operand::Zero());
-    BranchOrBacktrack(ne, on_no_match);
-    if (mode_ != LATIN1) {
-      __ bind(&done);
+    case StandardCharacterSet::kNotWord: {
+      Label done;
+      if (mode_ != LATIN1) {
+        // Table is 256 entries, so all Latin1 characters can be tested.
+        __ cmp(current_character(), Operand('z'));
+        __ b(hi, &done);
+      }
+      ExternalReference map = ExternalReference::re_word_character_map();
+      __ mov(r0, Operand(map));
+      __ ldrb(r0, MemOperand(r0, current_character()));
+      __ cmp(r0, Operand::Zero());
+      BranchOrBacktrack(ne, on_no_match);
+      if (mode_ != LATIN1) {
+        __ bind(&done);
+      }
+      return true;
    }
-    return true;
-  }
-  case '*':
-    // Match any character.
-    return true;
-  // No custom implementation (yet): s(UC16), S(UC16).
-  default:
-    return false;
+    case StandardCharacterSet::kEverything:
+      // Match any character.
+      return true;
  }
 }


--- a/src/regexp/arm/regexp-macro-assembler-arm.h
+++ b/src/regexp/arm/regexp-macro-assembler-arm.h
@@ -53,7 +53,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM
  // Checks whether the given offset from the current position is before
  // the end of the string.
  void CheckPosition(int cp_offset, Label* on_outside_input) override;
-  bool CheckSpecialCharacterClass(base::uc16 type, Label* on_no_match) override;
+  bool CheckSpecialCharacterClass(StandardCharacterSet type,
+                                  Label* on_no_match) override;
  void Fail() override;
  Handle<HeapObject> GetCode(Handle<String> source) override;
  void GoTo(Label* label) override;

--- a/src/regexp/arm64/regexp-macro-assembler-arm64.cc
+++ b/src/regexp/arm64/regexp-macro-assembler-arm64.cc
@@ -588,106 +588,104 @@ void RegExpMacroAssemblerARM64::CheckBitInTable(
  CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set);
 }

-bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(base::uc16 type,
-                                                           Label* on_no_match) {
+bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(
+    StandardCharacterSet type, Label* on_no_match) {
  // Range checks (c in min..max) are generally implemented by an unsigned
  // (c - min) <= (max - min) check
+  // TODO(jgruber): No custom implementation (yet): s(UC16), S(UC16).
  switch (type) {
-  case 's':
-    // Match space-characters
-    if (mode_ == LATIN1) {
-      // One byte space characters are '\t'..'\r', ' ' and \u00a0.
-      Label success;
-      // Check for ' ' or 0x00A0.
-      __ Cmp(current_character(), ' ');
-      __ Ccmp(current_character(), 0x00A0, ZFlag, ne);
-      __ B(eq, &success);
-      // Check range 0x09..0x0D.
-      __ Sub(w10, current_character(), '\t');
-      CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
-      __ Bind(&success);
+    case StandardCharacterSet::kWhitespace:
+      // Match space-characters.
+      if (mode_ == LATIN1) {
+        // One byte space characters are '\t'..'\r', ' ' and \u00a0.
+        Label success;
+        // Check for ' ' or 0x00A0.
+        __ Cmp(current_character(), ' ');
+        __ Ccmp(current_character(), 0x00A0, ZFlag, ne);
+        __ B(eq, &success);
+        // Check range 0x09..0x0D.
+        __ Sub(w10, current_character(), '\t');
+        CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
+        __ Bind(&success);
+        return true;
+      }
+      return false;
+    case StandardCharacterSet::kNotWhitespace:
+      // The emitted code for generic character classes is good enough.
+      return false;
+    case StandardCharacterSet::kDigit:
+      // Match ASCII digits ('0'..'9').
+      __ Sub(w10, current_character(), '0');
+      CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match);
+      return true;
+    case StandardCharacterSet::kNotDigit:
+      // Match ASCII non-digits.
+      __ Sub(w10, current_character(), '0');
+      CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
+      return true;
+    case StandardCharacterSet::kNotLineTerminator: {
+      // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
+      // Here we emit the conditional branch only once at the end to make branch
+      // prediction more efficient, even though we could branch out of here
+      // as soon as a character matches.
+      __ Cmp(current_character(), 0x0A);
+      __ Ccmp(current_character(), 0x0D, ZFlag, ne);
+      if (mode_ == UC16) {
+        __ Sub(w10, current_character(), 0x2028);
+        // If the Z flag was set we clear the flags to force a branch.
+        __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
+        // ls -> !((C==1) && (Z==0))
+        BranchOrBacktrack(ls, on_no_match);
+      } else {
+        BranchOrBacktrack(eq, on_no_match);
+      }
      return true;
    }
-    return false;
-  case 'S':
-    // The emitted code for generic character classes is good enough.
-    return false;
-  case 'd':
-    // Match ASCII digits ('0'..'9').
-    __ Sub(w10, current_character(), '0');
-    CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match);
-    return true;
-  case 'D':
-    // Match ASCII non-digits.
-    __ Sub(w10, current_character(), '0');
-    CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
-    return true;
-  case '.': {
-    // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
-    // Here we emit the conditional branch only once at the end to make branch
-    // prediction more efficient, even though we could branch out of here
-    // as soon as a character matches.
-    __ Cmp(current_character(), 0x0A);
-    __ Ccmp(current_character(), 0x0D, ZFlag, ne);
-    if (mode_ == UC16) {
-      __ Sub(w10, current_character(), 0x2028);
-      // If the Z flag was set we clear the flags to force a branch.
-      __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
-      // ls -> !((C==1) && (Z==0))
-      BranchOrBacktrack(ls, on_no_match);
-    } else {
-      BranchOrBacktrack(eq, on_no_match);
-    }
-    return true;
-  }
-  case 'n': {
-    // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
-    // We have to check all 4 newline characters before emitting
-    // the conditional branch.
-    __ Cmp(current_character(), 0x0A);
-    __ Ccmp(current_character(), 0x0D, ZFlag, ne);
-    if (mode_ == UC16) {
-      __ Sub(w10, current_character(), 0x2028);
-      // If the Z flag was set we clear the flags to force a fall-through.
-      __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
-      // hi -> (C==1) && (Z==0)
-      BranchOrBacktrack(hi, on_no_match);
-    } else {
-      BranchOrBacktrack(ne, on_no_match);
+    case StandardCharacterSet::kLineTerminator: {
+      // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
+      // We have to check all 4 newline characters before emitting
+      // the conditional branch.
+      __ Cmp(current_character(), 0x0A);
+      __ Ccmp(current_character(), 0x0D, ZFlag, ne);
+      if (mode_ == UC16) {
+        __ Sub(w10, current_character(), 0x2028);
+        // If the Z flag was set we clear the flags to force a fall-through.
+        __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
+        // hi -> (C==1) && (Z==0)
+        BranchOrBacktrack(hi, on_no_match);
+      } else {
+        BranchOrBacktrack(ne, on_no_match);
+      }
+      return true;
    }
-    return true;
-  }
-  case 'w': {
-    if (mode_ != LATIN1) {
-      // Table is 256 entries, so all Latin1 characters can be tested.
-      CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
+    case StandardCharacterSet::kWord: {
+      if (mode_ != LATIN1) {
+        // Table is 256 entries, so all Latin1 characters can be tested.
+        CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
+      }
+      ExternalReference map = ExternalReference::re_word_character_map();
+      __ Mov(x10, map);
+      __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
+      CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
+      return true;
    }
-    ExternalReference map = ExternalReference::re_word_character_map();
-    __ Mov(x10, map);
-    __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
-    CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
-    return true;
-  }
-  case 'W': {
-    Label done;
-    if (mode_ != LATIN1) {
-      // Table is 256 entries, so all Latin1 characters can be tested.
-      __ Cmp(current_character(), 'z');
-      __ B(hi, &done);
+    case StandardCharacterSet::kNotWord: {
+      Label done;
+      if (mode_ != LATIN1) {
+        // Table is 256 entries, so all Latin1 characters can be tested.
+        __ Cmp(current_character(), 'z');
+        __ B(hi, &done);
+      }
+      ExternalReference map = ExternalReference::re_word_character_map();
+      __ Mov(x10, map);
+      __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
+      CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
+      __ Bind(&done);
+      return true;
    }
-    ExternalReference map = ExternalReference::re_word_character_map();
-    __ Mov(x10, map);
-    __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
-    CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
-    __ Bind(&done);
-    return true;
-  }
-  case '*':
-    // Match any character.
-    return true;
-  // No custom implementation (yet): s(UC16), S(UC16).
-  default:
-    return false;
+    case StandardCharacterSet::kEverything:
+      // Match any character.
+      return true;
  }
 }


--- a/src/regexp/arm64/regexp-macro-assembler-arm64.h
+++ b/src/regexp/arm64/regexp-macro-assembler-arm64.h
@@ -57,7 +57,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM64
  // Checks whether the given offset from the current position is before
  // the end of the string.
  void CheckPosition(int cp_offset, Label* on_outside_input) override;
-  bool CheckSpecialCharacterClass(base::uc16 type, Label* on_no_match) override;
+  bool CheckSpecialCharacterClass(StandardCharacterSet type,
+                                  Label* on_no_match) override;
  void BindJumpTarget(Label* label = nullptr) override;
  void Fail() override;
  Handle<HeapObject> GetCode(Handle<String> source) override;

--- a/src/regexp/ia32/regexp-macro-assembler-ia32.cc
+++ b/src/regexp/ia32/regexp-macro-assembler-ia32.cc
@@ -517,125 +517,124 @@ void RegExpMacroAssemblerIA32::CheckBitInTable(
  BranchOrBacktrack(not_equal, on_bit_set);
 }

-bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(base::uc16 type,
-                                                          Label* on_no_match) {
+bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(
+    StandardCharacterSet type, Label* on_no_match) {
  // Range checks (c in min..max) are generally implemented by an unsigned
  // (c - min) <= (max - min) check
+  // TODO(jgruber): No custom implementation (yet): s(UC16), S(UC16).
  switch (type) {
-  case 's':
-    // Match space-characters
-    if (mode_ == LATIN1) {
-      // One byte space characters are '\t'..'\r', ' ' and \u00a0.
-      Label success;
-      __ cmp(current_character(), ' ');
-      __ j(equal, &success, Label::kNear);
-      // Check range 0x09..0x0D
-      __ lea(eax, Operand(current_character(), -'\t'));
-      __ cmp(eax, '\r' - '\t');
-      __ j(below_equal, &success, Label::kNear);
-      // \u00a0 (NBSP).
-      __ cmp(eax, 0x00A0 - '\t');
-      BranchOrBacktrack(not_equal, on_no_match);
-      __ bind(&success);
+    case StandardCharacterSet::kWhitespace:
+      // Match space-characters.
+      if (mode_ == LATIN1) {
+        // One byte space characters are '\t'..'\r', ' ' and \u00a0.
+        Label success;
+        __ cmp(current_character(), ' ');
+        __ j(equal, &success, Label::kNear);
+        // Check range 0x09..0x0D.
+        __ lea(eax, Operand(current_character(), -'\t'));
+        __ cmp(eax, '\r' - '\t');
+        __ j(below_equal, &success, Label::kNear);
+        // \u00a0 (NBSP).
+        __ cmp(eax, 0x00A0 - '\t');
+        BranchOrBacktrack(not_equal, on_no_match);
+        __ bind(&success);
+        return true;
+      }
+      return false;
+    case StandardCharacterSet::kNotWhitespace:
+      // The emitted code for generic character classes is good enough.
+      return false;
+    case StandardCharacterSet::kDigit:
+      // Match ASCII digits ('0'..'9').
+      __ lea(eax, Operand(current_character(), -'0'));
+      __ cmp(eax, '9' - '0');
+      BranchOrBacktrack(above, on_no_match);
      return true;
-    }
-    return false;
-  case 'S':
-    // The emitted code for generic character classes is good enough.
-    return false;
-  case 'd':
-    // Match ASCII digits ('0'..'9')
-    __ lea(eax, Operand(current_character(), -'0'));
-    __ cmp(eax, '9' - '0');
-    BranchOrBacktrack(above, on_no_match);
-    return true;
-  case 'D':
-    // Match non ASCII-digits
-    __ lea(eax, Operand(current_character(), -'0'));
-    __ cmp(eax, '9' - '0');
-    BranchOrBacktrack(below_equal, on_no_match);
-    return true;
-  case '.': {
-    // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
-    __ mov(eax, current_character());
-    __ xor_(eax, Immediate(0x01));
-    // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
-    __ sub(eax, Immediate(0x0B));
-    __ cmp(eax, 0x0C - 0x0B);
-    BranchOrBacktrack(below_equal, on_no_match);
-    if (mode_ == UC16) {
-      // Compare original value to 0x2028 and 0x2029, using the already
-      // computed (current_char ^ 0x01 - 0x0B). I.e., check for
-      // 0x201D (0x2028 - 0x0B) or 0x201E.
-      __ sub(eax, Immediate(0x2028 - 0x0B));
-      __ cmp(eax, 0x2029 - 0x2028);
+    case StandardCharacterSet::kNotDigit:
+      // Match non ASCII-digits.
+      __ lea(eax, Operand(current_character(), -'0'));
+      __ cmp(eax, '9' - '0');
      BranchOrBacktrack(below_equal, on_no_match);
+      return true;
+    case StandardCharacterSet::kLineTerminator:
+      // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 or 0x2029).
+      // The opposite of '.'.
+      __ mov(eax, current_character());
+      __ xor_(eax, Immediate(0x01));
+      // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
+      __ sub(eax, Immediate(0x0B));
+      __ cmp(eax, 0x0C - 0x0B);
+      if (mode_ == LATIN1) {
+        BranchOrBacktrack(above, on_no_match);
+      } else {
+        Label done;
+        BranchOrBacktrack(below_equal, &done);
+        DCHECK_EQ(UC16, mode_);
+        // Compare original value to 0x2028 and 0x2029, using the already
+        // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+        // 0x201D (0x2028 - 0x0B) or 0x201E.
+        __ sub(eax, Immediate(0x2028 - 0x0B));
+        __ cmp(eax, 1);
+        BranchOrBacktrack(above, on_no_match);
+        __ bind(&done);
+      }
+      return true;
+    case StandardCharacterSet::kNotLineTerminator: {
+      // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
+      __ mov(eax, current_character());
+      __ xor_(eax, Immediate(0x01));
+      // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
+      __ sub(eax, Immediate(0x0B));
+      __ cmp(eax, 0x0C - 0x0B);
+      BranchOrBacktrack(below_equal, on_no_match);
+      if (mode_ == UC16) {
+        // Compare original value to 0x2028 and 0x2029, using the already
+        // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+        // 0x201D (0x2028 - 0x0B) or 0x201E.
+        __ sub(eax, Immediate(0x2028 - 0x0B));
+        __ cmp(eax, 0x2029 - 0x2028);
+        BranchOrBacktrack(below_equal, on_no_match);
+      }
+      return true;
    }
-    return true;
-  }
-  case 'w': {
-    if (mode_ != LATIN1) {
-      // Table is 256 entries, so all Latin1 characters can be tested.
-      __ cmp(current_character(), Immediate('z'));
-      BranchOrBacktrack(above, on_no_match);
-    }
-    DCHECK_EQ(0, word_character_map[0]);  // Character '\0' is not a word char.
-    ExternalReference word_map = ExternalReference::re_word_character_map();
-    __ test_b(current_character(),
-              Operand(current_character(), times_1, word_map.address(),
-                      RelocInfo::EXTERNAL_REFERENCE));
-    BranchOrBacktrack(zero, on_no_match);
-    return true;
-  }
-  case 'W': {
-    Label done;
-    if (mode_ != LATIN1) {
-      // Table is 256 entries, so all Latin1 characters can be tested.
-      __ cmp(current_character(), Immediate('z'));
-      __ j(above, &done);
+    case StandardCharacterSet::kWord: {
+      if (mode_ != LATIN1) {
+        // Table is 256 entries, so all Latin1 characters can be tested.
+        __ cmp(current_character(), Immediate('z'));
+        BranchOrBacktrack(above, on_no_match);
+      }
+      DCHECK_EQ(0,
+                word_character_map[0]);  // Character '\0' is not a word char.
+      ExternalReference word_map = ExternalReference::re_word_character_map();
+      __ test_b(current_character(),
+                Operand(current_character(), times_1, word_map.address(),
+                        RelocInfo::EXTERNAL_REFERENCE));
+      BranchOrBacktrack(zero, on_no_match);
+      return true;
    }
-    DCHECK_EQ(0, word_character_map[0]);  // Character '\0' is not a word char.
-    ExternalReference word_map = ExternalReference::re_word_character_map();
-    __ test_b(current_character(),
-              Operand(current_character(), times_1, word_map.address(),
-                      RelocInfo::EXTERNAL_REFERENCE));
-    BranchOrBacktrack(not_zero, on_no_match);
-    if (mode_ != LATIN1) {
-      __ bind(&done);
+    case StandardCharacterSet::kNotWord: {
+      Label done;
+      if (mode_ != LATIN1) {
+        // Table is 256 entries, so all Latin1 characters can be tested.
+        __ cmp(current_character(), Immediate('z'));
+        __ j(above, &done);
+      }
+      DCHECK_EQ(0,
+                word_character_map[0]);  // Character '\0' is not a word char.
+      ExternalReference word_map = ExternalReference::re_word_character_map();
+      __ test_b(current_character(),
+                Operand(current_character(), times_1, word_map.address(),
+                        RelocInfo::EXTERNAL_REFERENCE));
+      BranchOrBacktrack(not_zero, on_no_match);
+      if (mode_ != LATIN1) {
+        __ bind(&done);
+      }
+      return true;
    }
-    return true;
-  }
  // Non-standard classes (with no syntactic shorthand) used internally.
-  case '*':
+  case StandardCharacterSet::kEverything:
    // Match any character.
    return true;
-  case 'n': {
-    // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 or 0x2029).
-    // The opposite of '.'.
-    __ mov(eax, current_character());
-    __ xor_(eax, Immediate(0x01));
-    // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
-    __ sub(eax, Immediate(0x0B));
-    __ cmp(eax, 0x0C - 0x0B);
-    if (mode_ == LATIN1) {
-      BranchOrBacktrack(above, on_no_match);
-    } else {
-      Label done;
-      BranchOrBacktrack(below_equal, &done);
-      DCHECK_EQ(UC16, mode_);
-      // Compare original value to 0x2028 and 0x2029, using the already
-      // computed (current_char ^ 0x01 - 0x0B). I.e., check for
-      // 0x201D (0x2028 - 0x0B) or 0x201E.
-      __ sub(eax, Immediate(0x2028 - 0x0B));
-      __ cmp(eax, 1);
-      BranchOrBacktrack(above, on_no_match);
-      __ bind(&done);
-    }
-    return true;
-  }
-  // No custom implementation (yet): s(UC16), S(UC16).
-  default:
-    return false;
  }
 }


--- a/src/regexp/ia32/regexp-macro-assembler-ia32.h
+++ b/src/regexp/ia32/regexp-macro-assembler-ia32.h
@@ -54,7 +54,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerIA32
  // Checks whether the given offset from the current position is before
  // the end of the string.
  void CheckPosition(int cp_offset, Label* on_outside_input) override;
-  bool CheckSpecialCharacterClass(base::uc16 type, Label* on_no_match) override;
+  bool CheckSpecialCharacterClass(StandardCharacterSet type,
+                                  Label* on_no_match) override;
  void Fail() override;
  Handle<HeapObject> GetCode(Handle<String> source) override;
  void GoTo(Label* label) override;

--- a/src/regexp/regexp-ast.h
+++ b/src/regexp/regexp-ast.h
@@ -71,6 +71,19 @@ class Interval {
  int to_;
 };

+// Named standard character sets.
+enum class StandardCharacterSet : char {
+  kWhitespace = 's',         // Like /\s/.
+  kNotWhitespace = 'S',      // Like /\S/.
+  kWord = 'w',               // Like /\w/.
+  kNotWord = 'W',            // Like /\W/.
+  kDigit = 'd',              // Like /\d/.
+  kNotDigit = 'D',           // Like /\D/.
+  kLineTerminator = 'n',     // The inverse of /./.
+  kNotLineTerminator = '.',  // Like /./.
+  kEverything = '*',         // Matches every character, like /./s.
+};
+
 // Represents code points (with values up to 0x10FFFF) in the range from from_
 // to to_, both ends are inclusive.
 class CharacterRange {
@@ -99,13 +112,14 @@ class CharacterRange {
    return list;
  }

-  V8_EXPORT_PRIVATE static void AddClassEscape(char type,
-                                               ZoneList<CharacterRange>* ranges,
-                                               Zone* zone);
+  V8_EXPORT_PRIVATE static void AddClassEscape(
+      StandardCharacterSet standard_character_set,
+      ZoneList<CharacterRange>* ranges, Zone* zone);
  // Add class escapes. Add case equivalent closure for \w and \W if necessary.
  V8_EXPORT_PRIVATE static void AddClassEscape(
-      char type, ZoneList<CharacterRange>* ranges,
-      bool add_unicode_case_equivalents, Zone* zone);
+      StandardCharacterSet standard_character_set,
+      ZoneList<CharacterRange>* ranges, bool add_unicode_case_equivalents,
+      Zone* zone);
  V8_EXPORT_PRIVATE static void AddCaseEquivalents(
      Isolate* isolate, Zone* zone, ZoneList<CharacterRange>* ranges,
      bool is_one_byte);
@@ -238,24 +252,23 @@ class RegExpAssertion final : public RegExpTree {

 class CharacterSet final {
 public:
-  explicit CharacterSet(base::uc16 standard_set_type)
+  explicit CharacterSet(StandardCharacterSet standard_set_type)
      : standard_set_type_(standard_set_type) {}
  explicit CharacterSet(ZoneList<CharacterRange>* ranges) : ranges_(ranges) {}

  ZoneList<CharacterRange>* ranges(Zone* zone);
-  base::uc16 standard_set_type() const { return standard_set_type_; }
-  void set_standard_set_type(base::uc16 special_set_type) {
-    standard_set_type_ = special_set_type;
+  StandardCharacterSet standard_set_type() const {
+    return standard_set_type_.value();
  }
-  bool is_standard() const { return standard_set_type_ != 0; }
+  void set_standard_set_type(StandardCharacterSet standard_set_type) {
+    standard_set_type_ = standard_set_type;
+  }
+  bool is_standard() const { return standard_set_type_.has_value(); }
  V8_EXPORT_PRIVATE void Canonicalize();

 private:
  ZoneList<CharacterRange>* ranges_ = nullptr;
-  // If non-zero, the value represents a standard set (e.g., all whitespace
-  // characters) without having to expand the ranges. See the comment on top of
-  // `standard_type` below.
-  base::uc16 standard_set_type_ = 0;
+  base::Optional<StandardCharacterSet> standard_set_type_;
 };

 class RegExpCharacterClass final : public RegExpTree {
@@ -280,8 +293,8 @@ class RegExpCharacterClass final : public RegExpTree {
      character_class_flags_ ^= NEGATED;
    }
  }
-  explicit RegExpCharacterClass(base::uc16 type)
-      : set_(type), character_class_flags_(CharacterClassFlags()) {}
+  explicit RegExpCharacterClass(StandardCharacterSet standard_set_type)
+      : set_(standard_set_type), character_class_flags_() {}

  DECL_BOILERPLATE(CharacterClass);

@@ -299,16 +312,9 @@ class RegExpCharacterClass final : public RegExpTree {
  bool is_standard(Zone* zone);
  // Returns a value representing the standard character set if is_standard()
  // returns true.
-  // Currently used values are:
-  // s : unicode whitespace
-  // S : unicode non-whitespace
-  // w : ASCII word character (digit, letter, underscore)
-  // W : non-ASCII word character
-  // d : ASCII digit
-  // D : non-ASCII digit
-  // . : non-newline
-  // * : All characters, for advancing unanchored regexp
-  base::uc16 standard_type() const { return set_.standard_set_type(); }
+  StandardCharacterSet standard_type() const {
+    return set_.standard_set_type();
+  }

  CharacterSet character_set() const { return set_; }
  ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
@@ -323,7 +329,6 @@ class RegExpCharacterClass final : public RegExpTree {
  CharacterClassFlags character_class_flags_;
 };

-
 class RegExpAtom final : public RegExpTree {
 public:
  explicit RegExpAtom(base::Vector<const base::uc16> data) : data_(data) {}

--- a/src/regexp/regexp-compiler-tonode.cc
+++ b/src/regexp/regexp-compiler-tonode.cc
@@ -101,29 +101,29 @@ bool RegExpCharacterClass::is_standard(Zone* zone) {
    return true;
  }
  if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
-    set_.set_standard_set_type('s');
+    set_.set_standard_set_type(StandardCharacterSet::kWhitespace);
    return true;
  }
  if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
-    set_.set_standard_set_type('S');
+    set_.set_standard_set_type(StandardCharacterSet::kNotWhitespace);
    return true;
  }
  if (CompareInverseRanges(set_.ranges(zone), kLineTerminatorRanges,
                           kLineTerminatorRangeCount)) {
-    set_.set_standard_set_type('.');
+    set_.set_standard_set_type(StandardCharacterSet::kNotLineTerminator);
    return true;
  }
  if (CompareRanges(set_.ranges(zone), kLineTerminatorRanges,
                    kLineTerminatorRangeCount)) {
-    set_.set_standard_set_type('n');
+    set_.set_standard_set_type(StandardCharacterSet::kLineTerminator);
    return true;
  }
  if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
-    set_.set_standard_set_type('w');
+    set_.set_standard_set_type(StandardCharacterSet::kWord);
    return true;
  }
  if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
-    set_.set_standard_set_type('W');
+    set_.set_standard_set_type(StandardCharacterSet::kNotWord);
    return true;
  }
  return false;
@@ -423,7 +423,8 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
          zone->New<RegExpCharacterClass>(zone, ranges);
      return zone->New<TextNode>(fail, compiler->read_backward(), on_success);
    }
-    if (standard_type() == '*') {
+    if (set_.is_standard() &&
+        standard_type() == StandardCharacterSet::kEverything) {
      return UnanchoredAdvance(compiler, on_success);
    } else {
      ChoiceNode* result = zone->New<ChoiceNode>(2, zone);
@@ -748,7 +749,8 @@ RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
  Zone* zone = compiler->zone();
  ZoneList<CharacterRange>* word_range =
      zone->New<ZoneList<CharacterRange>>(2, zone);
-  CharacterRange::AddClassEscape('w', word_range, true, zone);
+  CharacterRange::AddClassEscape(StandardCharacterSet::kWord, word_range, true,
+                                 zone);
  int stack_register = compiler->UnicodeLookaroundStackRegister();
  int position_register = compiler->UnicodeLookaroundPositionRegister();
  ChoiceNode* result = zone->New<ChoiceNode>(2, zone);
@@ -808,8 +810,10 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
      // Create a newline atom.
      ZoneList<CharacterRange>* newline_ranges =
          zone->New<ZoneList<CharacterRange>>(3, zone);
-      CharacterRange::AddClassEscape('n', newline_ranges, false, zone);
-      RegExpCharacterClass* newline_atom = zone->New<RegExpCharacterClass>('n');
+      CharacterRange::AddClassEscape(StandardCharacterSet::kLineTerminator,
+                                     newline_ranges, false, zone);
+      RegExpCharacterClass* newline_atom = zone->New<RegExpCharacterClass>(
+          StandardCharacterSet::kLineTerminator);
      TextNode* newline_matcher =
          zone->New<TextNode>(newline_atom, false,
                              ActionNode::PositiveSubmatchSuccess(
@@ -1057,10 +1061,13 @@ static void AddClassNegated(const int* elmv, int elmc,
  ranges->Add(CharacterRange::Range(last, kMaxCodePoint), zone);
 }

-void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
+void CharacterRange::AddClassEscape(StandardCharacterSet standard_character_set,
+                                    ZoneList<CharacterRange>* ranges,
                                    bool add_unicode_case_equivalents,
                                    Zone* zone) {
-  if (add_unicode_case_equivalents && (type == 'w' || type == 'W')) {
+  if (add_unicode_case_equivalents &&
+      (standard_character_set == StandardCharacterSet::kWord ||
+       standard_character_set == StandardCharacterSet::kNotWord)) {
    // See #sec-runtime-semantics-wordcharacters-abstract-operation
    // In case of unicode and ignore_case, we need to create the closure over
    // case equivalent characters before negating.
@@ -1068,7 +1075,7 @@ void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
        zone->New<ZoneList<CharacterRange>>(2, zone);
    AddClass(kWordRanges, kWordRangeCount, new_ranges, zone);
    AddUnicodeCaseEquivalents(new_ranges, zone);
-    if (type == 'W') {
+    if (standard_character_set == StandardCharacterSet::kNotWord) {
      ZoneList<CharacterRange>* negated =
          zone->New<ZoneList<CharacterRange>>(2, zone);
      CharacterRange::Negate(new_ranges, negated, zone);
@@ -1077,47 +1084,46 @@ void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
    ranges->AddAll(*new_ranges, zone);
    return;
  }
-  AddClassEscape(type, ranges, zone);
+  AddClassEscape(standard_character_set, ranges, zone);
 }

-void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
+void CharacterRange::AddClassEscape(StandardCharacterSet standard_character_set,
+                                    ZoneList<CharacterRange>* ranges,
                                    Zone* zone) {
-  switch (type) {
-    case 's':
+  switch (standard_character_set) {
+    case StandardCharacterSet::kWhitespace:
      AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
      break;
-    case 'S':
+    case StandardCharacterSet::kNotWhitespace:
      AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone);
      break;
-    case 'w':
+    case StandardCharacterSet::kWord:
      AddClass(kWordRanges, kWordRangeCount, ranges, zone);
      break;
-    case 'W':
+    case StandardCharacterSet::kNotWord:
      AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone);
      break;
-    case 'd':
+    case StandardCharacterSet::kDigit:
      AddClass(kDigitRanges, kDigitRangeCount, ranges, zone);
      break;
-    case 'D':
+    case StandardCharacterSet::kNotDigit:
      AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone);
      break;
-    case '.':
+    // This is the set of characters matched by the $ and ^ symbols
+    // in multiline mode.
+    case StandardCharacterSet::kLineTerminator:
+      AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges, zone);
+      break;
+    case StandardCharacterSet::kNotLineTerminator:
      AddClassNegated(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges,
                      zone);
      break;
    // This is not a character range as defined by the spec but a
    // convenient shorthand for a character class that matches any
    // character.
-    case '*':
+    case StandardCharacterSet::kEverything:
      ranges->Add(CharacterRange::Everything(), zone);
      break;
-    // This is the set of characters matched by the $ and ^ symbols
-    // in multiline mode.
-    case 'n':
-      AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges, zone);
-      break;
-    default:
-      UNREACHABLE();
  }
 }

@@ -1256,7 +1262,8 @@ bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
 ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) {
  if (ranges_ == nullptr) {
    ranges_ = zone->New<ZoneList<CharacterRange>>(2, zone);
-    CharacterRange::AddClassEscape(standard_set_type_, ranges_, false, zone);
+    CharacterRange::AddClassEscape(standard_set_type_.value(), ranges_, false,
+                                   zone);
  }
  return ranges_;
 }

--- a/src/regexp/regexp-compiler.cc
+++ b/src/regexp/regexp-compiler.cc
@@ -2073,7 +2073,8 @@ namespace {
 void EmitWordCheck(RegExpMacroAssembler* assembler, Label* word,
                   Label* non_word, bool fall_through_on_word) {
  if (assembler->CheckSpecialCharacterClass(
-          fall_through_on_word ? 'w' : 'W',
+          fall_through_on_word ? StandardCharacterSet::kWord
+                               : StandardCharacterSet::kNotWord,
          fall_through_on_word ? non_word : word)) {
    // Optimized implementation available.
    return;
@@ -2119,7 +2120,8 @@ void EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) {
  const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
  assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
                                  new_trace.backtrack(), can_skip_bounds_check);
-  if (!assembler->CheckSpecialCharacterClass('n', new_trace.backtrack())) {
+  if (!assembler->CheckSpecialCharacterClass(
+          StandardCharacterSet::kLineTerminator, new_trace.backtrack())) {
    // Newline means \n, \r, 0x2028 or 0x2029.
    if (!compiler->one_byte()) {
      assembler->CheckCharacterAfterAnd(0x2028, 0xFFFE, &ok);
@@ -3882,7 +3884,8 @@ RegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data,
    // Add a .*? at the beginning, outside the body capture, unless
    // this expression is anchored at the beginning or sticky.
    RegExpNode* loop_node = RegExpQuantifier::ToNode(
-        0, RegExpTree::kInfinity, false, zone()->New<RegExpCharacterClass>('*'),
+        0, RegExpTree::kInfinity, false,
+        zone()->New<RegExpCharacterClass>(StandardCharacterSet::kEverything),
        this, captured_body, data->contains_anchor);

    if (data->contains_anchor) {
@@ -3891,7 +3894,8 @@ RegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data,
      ChoiceNode* first_step_node = zone()->New<ChoiceNode>(2, zone());
      first_step_node->AddAlternative(GuardedAlternative(captured_body));
      first_step_node->AddAlternative(GuardedAlternative(zone()->New<TextNode>(
-          zone()->New<RegExpCharacterClass>('*'), false, loop_node)));
+          zone()->New<RegExpCharacterClass>(StandardCharacterSet::kEverything),
+          false, loop_node)));
      node = first_step_node;
    } else {
      node = loop_node;

--- a/src/regexp/regexp-macro-assembler-tracer.cc
+++ b/src/regexp/regexp-macro-assembler-tracer.cc
@@ -354,12 +354,11 @@ void RegExpMacroAssemblerTracer::CheckPosition(int cp_offset,
 }

 bool RegExpMacroAssemblerTracer::CheckSpecialCharacterClass(
-    base::uc16 type, Label* on_no_match) {
+    StandardCharacterSet type, Label* on_no_match) {
  bool supported = assembler_->CheckSpecialCharacterClass(type,
                                                          on_no_match);
  PrintF(" CheckSpecialCharacterClass(type='%c', label[%08x]): %s;\n",
-         type,
-         LabelToInt(on_no_match),
+         static_cast<char>(type), LabelToInt(on_no_match),
         supported ? "true" : "false");
  return supported;
 }

--- a/src/regexp/regexp-macro-assembler-tracer.h
+++ b/src/regexp/regexp-macro-assembler-tracer.h
@@ -50,7 +50,8 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
                                Label* on_not_in_range) override;
  void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) override;
  void CheckPosition(int cp_offset, Label* on_outside_input) override;
-  bool CheckSpecialCharacterClass(base::uc16 type, Label* on_no_match) override;
+  bool CheckSpecialCharacterClass(StandardCharacterSet type,
+                                  Label* on_no_match) override;
  void Fail() override;
  Handle<HeapObject> GetCode(Handle<String> source) override;
  void GoTo(Label* label) override;

--- a/src/regexp/regexp-macro-assembler.h
+++ b/src/regexp/regexp-macro-assembler.h
@@ -107,7 +107,8 @@ class RegExpMacroAssembler {
  // character. Returns false if the type of special character class does
  // not have custom support.
  // May clobber the current loaded character.
-  virtual bool CheckSpecialCharacterClass(base::uc16 type, Label* on_no_match) {
+  virtual bool CheckSpecialCharacterClass(StandardCharacterSet type,
+                                          Label* on_no_match) {
    return false;
  }


--- a/src/regexp/regexp-parser.cc
+++ b/src/regexp/regexp-parser.cc
@@ -671,10 +671,12 @@ RegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() {

        if (builder->dotall()) {
          // Everything.
-          CharacterRange::AddClassEscape('*', ranges, false, zone());
+          CharacterRange::AddClassEscape(StandardCharacterSet::kEverything,
+                                         ranges, false, zone());
        } else {
-          // Everything except \x0A, \x0D, \u2028 and \u2029
-          CharacterRange::AddClassEscape('.', ranges, false, zone());
+          // Everything except \x0A, \x0D, \u2028 and \u2029.
+          CharacterRange::AddClassEscape(
+              StandardCharacterSet::kNotLineTerminator, ranges, false, zone());
        }

        RegExpCharacterClass* cc =
@@ -1950,8 +1952,9 @@ bool RegExpParserImpl<CharT>::TryParseCharacterClassEscape(
    case 'S':
    case 'w':
    case 'W':
-      CharacterRange::AddClassEscape(static_cast<char>(next), ranges,
-                                     add_unicode_case_equivalents, zone);
+      CharacterRange::AddClassEscape(static_cast<StandardCharacterSet>(next),
+                                     ranges, add_unicode_case_equivalents,
+                                     zone);
      Advance(2);
      return true;
    case 'p':

--- a/src/regexp/x64/regexp-macro-assembler-x64.cc
+++ b/src/regexp/x64/regexp-macro-assembler-x64.cc
@@ -532,123 +532,123 @@ void RegExpMacroAssemblerX64::CheckBitInTable(
  BranchOrBacktrack(not_equal, on_bit_set);
 }

-bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(base::uc16 type,
-                                                         Label* on_no_match) {
+bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(
+    StandardCharacterSet type, Label* on_no_match) {
  // Range checks (c in min..max) are generally implemented by an unsigned
  // (c - min) <= (max - min) check, using the sequence:
  //   leal(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min))
  //   cmpl(rax, Immediate(max - min))
+  // TODO(jgruber): No custom implementation (yet): s(UC16), S(UC16).
  switch (type) {
-  case 's':
-    // Match space-characters
-    if (mode_ == LATIN1) {
-      // One byte space characters are '\t'..'\r', ' ' and \u00a0.
-      Label success;
-      __ cmpl(current_character(), Immediate(' '));
-      __ j(equal, &success, Label::kNear);
-      // Check range 0x09..0x0D
-      __ leal(rax, Operand(current_character(), -'\t'));
-      __ cmpl(rax, Immediate('\r' - '\t'));
-      __ j(below_equal, &success, Label::kNear);
-      // \u00a0 (NBSP).
-      __ cmpl(rax, Immediate(0x00A0 - '\t'));
-      BranchOrBacktrack(not_equal, on_no_match);
-      __ bind(&success);
+    case StandardCharacterSet::kWhitespace:
+      // Match space-characters.
+      if (mode_ == LATIN1) {
+        // One byte space characters are '\t'..'\r', ' ' and \u00a0.
+        Label success;
+        __ cmpl(current_character(), Immediate(' '));
+        __ j(equal, &success, Label::kNear);
+        // Check range 0x09..0x0D.
+        __ leal(rax, Operand(current_character(), -'\t'));
+        __ cmpl(rax, Immediate('\r' - '\t'));
+        __ j(below_equal, &success, Label::kNear);
+        // \u00a0 (NBSP).
+        __ cmpl(rax, Immediate(0x00A0 - '\t'));
+        BranchOrBacktrack(not_equal, on_no_match);
+        __ bind(&success);
+        return true;
+      }
+      return false;
+    case StandardCharacterSet::kNotWhitespace:
+      // The emitted code for generic character classes is good enough.
+      return false;
+    case StandardCharacterSet::kDigit:
+      // Match ASCII digits ('0'..'9').
+      __ leal(rax, Operand(current_character(), -'0'));
+      __ cmpl(rax, Immediate('9' - '0'));
+      BranchOrBacktrack(above, on_no_match);
      return true;
-    }
-    return false;
-  case 'S':
-    // The emitted code for generic character classes is good enough.
-    return false;
-  case 'd':
-    // Match ASCII digits ('0'..'9')
-    __ leal(rax, Operand(current_character(), -'0'));
-    __ cmpl(rax, Immediate('9' - '0'));
-    BranchOrBacktrack(above, on_no_match);
-    return true;
-  case 'D':
-    // Match non ASCII-digits
-    __ leal(rax, Operand(current_character(), -'0'));
-    __ cmpl(rax, Immediate('9' - '0'));
-    BranchOrBacktrack(below_equal, on_no_match);
-    return true;
-  case '.': {
-    // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
-    __ movl(rax, current_character());
-    __ xorl(rax, Immediate(0x01));
-    // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
-    __ subl(rax, Immediate(0x0B));
-    __ cmpl(rax, Immediate(0x0C - 0x0B));
-    BranchOrBacktrack(below_equal, on_no_match);
-    if (mode_ == UC16) {
-      // Compare original value to 0x2028 and 0x2029, using the already
-      // computed (current_char ^ 0x01 - 0x0B). I.e., check for
-      // 0x201D (0x2028 - 0x0B) or 0x201E.
-      __ subl(rax, Immediate(0x2028 - 0x0B));
-      __ cmpl(rax, Immediate(0x2029 - 0x2028));
+    case StandardCharacterSet::kNotDigit:
+      // Match non ASCII-digits.
+      __ leal(rax, Operand(current_character(), -'0'));
+      __ cmpl(rax, Immediate('9' - '0'));
      BranchOrBacktrack(below_equal, on_no_match);
+      return true;
+    case StandardCharacterSet::kNotLineTerminator: {
+      // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
+      __ movl(rax, current_character());
+      __ xorl(rax, Immediate(0x01));
+      // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
+      __ subl(rax, Immediate(0x0B));
+      __ cmpl(rax, Immediate(0x0C - 0x0B));
+      BranchOrBacktrack(below_equal, on_no_match);
+      if (mode_ == UC16) {
+        // Compare original value to 0x2028 and 0x2029, using the already
+        // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+        // 0x201D (0x2028 - 0x0B) or 0x201E.
+        __ subl(rax, Immediate(0x2028 - 0x0B));
+        __ cmpl(rax, Immediate(0x2029 - 0x2028));
+        BranchOrBacktrack(below_equal, on_no_match);
+      }
+      return true;
    }
-    return true;
-  }
-  case 'n': {
-    // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
-    __ movl(rax, current_character());
-    __ xorl(rax, Immediate(0x01));
-    // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
-    __ subl(rax, Immediate(0x0B));
-    __ cmpl(rax, Immediate(0x0C - 0x0B));
-    if (mode_ == LATIN1) {
-      BranchOrBacktrack(above, on_no_match);
-    } else {
-      Label done;
-      BranchOrBacktrack(below_equal, &done);
-      // Compare original value to 0x2028 and 0x2029, using the already
-      // computed (current_char ^ 0x01 - 0x0B). I.e., check for
-      // 0x201D (0x2028 - 0x0B) or 0x201E.
-      __ subl(rax, Immediate(0x2028 - 0x0B));
-      __ cmpl(rax, Immediate(0x2029 - 0x2028));
-      BranchOrBacktrack(above, on_no_match);
-      __ bind(&done);
-    }
-    return true;
-  }
-  case 'w': {
-    if (mode_ != LATIN1) {
-      // Table is 256 entries, so all Latin1 characters can be tested.
-      __ cmpl(current_character(), Immediate('z'));
-      BranchOrBacktrack(above, on_no_match);
+    case StandardCharacterSet::kLineTerminator: {
+      // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
+      __ movl(rax, current_character());
+      __ xorl(rax, Immediate(0x01));
+      // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
+      __ subl(rax, Immediate(0x0B));
+      __ cmpl(rax, Immediate(0x0C - 0x0B));
+      if (mode_ == LATIN1) {
+        BranchOrBacktrack(above, on_no_match);
+      } else {
+        Label done;
+        BranchOrBacktrack(below_equal, &done);
+        // Compare original value to 0x2028 and 0x2029, using the already
+        // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+        // 0x201D (0x2028 - 0x0B) or 0x201E.
+        __ subl(rax, Immediate(0x2028 - 0x0B));
+        __ cmpl(rax, Immediate(0x2029 - 0x2028));
+        BranchOrBacktrack(above, on_no_match);
+        __ bind(&done);
+      }
+      return true;
    }
-    __ Move(rbx, ExternalReference::re_word_character_map());
-    DCHECK_EQ(0, word_character_map[0]);  // Character '\0' is not a word char.
-    __ testb(Operand(rbx, current_character(), times_1, 0),
-             current_character());
-    BranchOrBacktrack(zero, on_no_match);
-    return true;
-  }
-  case 'W': {
-    Label done;
-    if (mode_ != LATIN1) {
-      // Table is 256 entries, so all Latin1 characters can be tested.
-      __ cmpl(current_character(), Immediate('z'));
-      __ j(above, &done);
+    case StandardCharacterSet::kWord: {
+      if (mode_ != LATIN1) {
+        // Table is 256 entries, so all Latin1 characters can be tested.
+        __ cmpl(current_character(), Immediate('z'));
+        BranchOrBacktrack(above, on_no_match);
+      }
+      __ Move(rbx, ExternalReference::re_word_character_map());
+      DCHECK_EQ(0,
+                word_character_map[0]);  // Character '\0' is not a word char.
+      __ testb(Operand(rbx, current_character(), times_1, 0),
+               current_character());
+      BranchOrBacktrack(zero, on_no_match);
+      return true;
    }
-    __ Move(rbx, ExternalReference::re_word_character_map());
-    DCHECK_EQ(0, word_character_map[0]);  // Character '\0' is not a word char.
-    __ testb(Operand(rbx, current_character(), times_1, 0),
-             current_character());
-    BranchOrBacktrack(not_zero, on_no_match);
-    if (mode_ != LATIN1) {
-      __ bind(&done);
+    case StandardCharacterSet::kNotWord: {
+      Label done;
+      if (mode_ != LATIN1) {
+        // Table is 256 entries, so all Latin1 characters can be tested.
+        __ cmpl(current_character(), Immediate('z'));
+        __ j(above, &done);
+      }
+      __ Move(rbx, ExternalReference::re_word_character_map());
+      DCHECK_EQ(0,
+                word_character_map[0]);  // Character '\0' is not a word char.
+      __ testb(Operand(rbx, current_character(), times_1, 0),
+               current_character());
+      BranchOrBacktrack(not_zero, on_no_match);
+      if (mode_ != LATIN1) {
+        __ bind(&done);
+      }
+      return true;
    }
-    return true;
-  }

-  case '*':
-    // Match any character.
-    return true;
-  // No custom implementation (yet): s(UC16), S(UC16).
-  default:
-    return false;
+    case StandardCharacterSet::kEverything:
+      // Match any character.
+      return true;
  }
 }


--- a/src/regexp/x64/regexp-macro-assembler-x64.h
+++ b/src/regexp/x64/regexp-macro-assembler-x64.h
@@ -53,7 +53,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerX64
  // Checks whether the given offset from the current position is before
  // the end of the string.
  void CheckPosition(int cp_offset, Label* on_outside_input) override;
-  bool CheckSpecialCharacterClass(base::uc16 type, Label* on_no_match) override;
+  bool CheckSpecialCharacterClass(StandardCharacterSet type,
+                                  Label* on_no_match) override;
  void Fail() override;
  Handle<HeapObject> GetCode(Handle<String> source) override;
  void GoTo(Label* label) override;

--- a/test/cctest/test-regexp.cc
+++ b/test/cctest/test-regexp.cc
@@ -505,7 +505,8 @@ static bool NotLineTerminator(base::uc32 c) {
  return !unibrow::IsLineTerminator(c);
 }

-static void TestCharacterClassEscapes(base::uc32 c, bool(pred)(base::uc32 c)) {
+static void TestCharacterClassEscapes(StandardCharacterSet c,
+                                      bool(pred)(base::uc32 c)) {
  Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
  ZoneList<CharacterRange>* ranges =
      zone.New<ZoneList<CharacterRange>>(2, &zone);
@@ -521,13 +522,16 @@ static void TestCharacterClassEscapes(base::uc32 c, bool(pred)(base::uc32 c)) {
 }

 TEST(CharacterClassEscapes) {
-  TestCharacterClassEscapes('.', NotLineTerminator);
-  TestCharacterClassEscapes('d', IsDigit);
-  TestCharacterClassEscapes('D', NotDigit);
-  TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
-  TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
-  TestCharacterClassEscapes('w', IsRegExpWord);
-  TestCharacterClassEscapes('W', NotWord);
+  TestCharacterClassEscapes(StandardCharacterSet::kNotLineTerminator,
+                            NotLineTerminator);
+  TestCharacterClassEscapes(StandardCharacterSet::kDigit, IsDigit);
+  TestCharacterClassEscapes(StandardCharacterSet::kNotDigit, NotDigit);
+  TestCharacterClassEscapes(StandardCharacterSet::kWhitespace,
+                            IsWhiteSpaceOrLineTerminator);
+  TestCharacterClassEscapes(StandardCharacterSet::kNotWhitespace,
+                            NotWhiteSpaceNorLineTermiantor);
+  TestCharacterClassEscapes(StandardCharacterSet::kWord, IsRegExpWord);
+  TestCharacterClassEscapes(StandardCharacterSet::kNotWord, NotWord);
 }

 static RegExpNode* Compile(const char* input, bool multiline, bool unicode,