char-predicates-inl.h 5.69 KB
Newer Older
1
// Copyright 2011 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4

5 6
#ifndef V8_STRINGS_CHAR_PREDICATES_INL_H_
#define V8_STRINGS_CHAR_PREDICATES_INL_H_
7

8
#include "src/base/bounds.h"
9
#include "src/strings/char-predicates.h"
10
#include "src/utils/utils.h"
11

12 13
namespace v8 {
namespace internal {
14

15 16 17
// If c is in 'A'-'Z' or 'a'-'z', return its lower-case.
// Else, return something outside of 'A'-'Z' and 'a'-'z'.
// Note: it ignores LOCALE.
18
inline constexpr int AsciiAlphaToLower(base::uc32 c) { return c | 0x20; }
19

20
inline constexpr bool IsCarriageReturn(base::uc32 c) { return c == 0x000D; }
21

22
inline constexpr bool IsLineFeed(base::uc32 c) { return c == 0x000A; }
23

24
inline constexpr bool IsAsciiIdentifier(base::uc32 c) {
25 26 27
  return IsAlphaNumeric(c) || c == '$' || c == '_';
}

28
inline constexpr bool IsAlphaNumeric(base::uc32 c) {
29
  return base::IsInRange(AsciiAlphaToLower(c), 'a', 'z') || IsDecimalDigit(c);
30
}
31

32
inline constexpr bool IsDecimalDigit(base::uc32 c) {
33
  // ECMA-262, 3rd, 7.8.3 (p 16)
34
  return base::IsInRange(c, '0', '9');
35 36
}

37
inline constexpr bool IsHexDigit(base::uc32 c) {
38
  // ECMA-262, 3rd, 7.6 (p 15)
39
  return IsDecimalDigit(c) || base::IsInRange(AsciiAlphaToLower(c), 'a', 'f');
40 41
}

42
inline constexpr bool IsOctalDigit(base::uc32 c) {
43
  // ECMA-262, 6th, 7.8.3
44
  return base::IsInRange(c, '0', '7');
45 46
}

47
inline constexpr bool IsNonOctalDecimalDigit(base::uc32 c) {
48
  return base::IsInRange(c, '8', '9');
49
}
50

51
inline constexpr bool IsBinaryDigit(base::uc32 c) {
52 53 54 55
  // ECMA-262, 6th, 7.8.3
  return c == '0' || c == '1';
}

56
inline constexpr bool IsAsciiLower(base::uc32 c) {
57 58
  return base::IsInRange(c, 'a', 'z');
}
59

60
inline constexpr bool IsAsciiUpper(base::uc32 c) {
61 62
  return base::IsInRange(c, 'A', 'Z');
}
63

64
inline constexpr base::uc32 ToAsciiUpper(base::uc32 c) {
65 66 67
  return c & ~(IsAsciiLower(c) << 5);
}

68
inline constexpr base::uc32 ToAsciiLower(base::uc32 c) {
69 70 71
  return c | (IsAsciiUpper(c) << 5);
}

72
inline constexpr bool IsRegExpWord(base::uc32 c) {
73
  return IsAlphaNumeric(c) || c == '_';
74 75
}

76
// Constexpr cache table for character flags.
77
enum OneByteCharFlags {
78 79 80
  kIsIdentifierStart = 1 << 0,
  kIsIdentifierPart = 1 << 1,
  kIsWhiteSpace = 1 << 2,
81 82
  kIsWhiteSpaceOrLineTerminator = 1 << 3,
  kMaybeLineEnd = 1 << 4
83
};
84 85 86

// See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
// ID_Start. Additionally includes '_' and '$'.
87
constexpr bool IsOneByteIDStart(base::uc32 c) {
88 89 90 91 92 93 94 95
  return c == 0x0024 || (c >= 0x0041 && c <= 0x005A) || c == 0x005F ||
         (c >= 0x0061 && c <= 0x007A) || c == 0x00AA || c == 0x00B5 ||
         c == 0x00BA || (c >= 0x00C0 && c <= 0x00D6) ||
         (c >= 0x00D8 && c <= 0x00F6) || (c >= 0x00F8 && c <= 0x00FF);
}

// See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
// ID_Continue. Additionally includes '_' and '$'.
96
constexpr bool IsOneByteIDContinue(base::uc32 c) {
97 98 99 100 101 102 103
  return c == 0x0024 || (c >= 0x0030 && c <= 0x0039) || c == 0x005F ||
         (c >= 0x0041 && c <= 0x005A) || (c >= 0x0061 && c <= 0x007A) ||
         c == 0x00AA || c == 0x00B5 || c == 0x00B7 || c == 0x00BA ||
         (c >= 0x00C0 && c <= 0x00D6) || (c >= 0x00D8 && c <= 0x00F6) ||
         (c >= 0x00F8 && c <= 0x00FF);
}

104
constexpr bool IsOneByteWhitespace(base::uc32 c) {
105 106 107
  return c == '\t' || c == '\v' || c == '\f' || c == ' ' || c == u'\xa0';
}

108
constexpr uint8_t BuildOneByteCharFlags(base::uc32 c) {
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
  uint8_t result = 0;
  if (IsOneByteIDStart(c) || c == '\\') result |= kIsIdentifierStart;
  if (IsOneByteIDContinue(c) || c == '\\') result |= kIsIdentifierPart;
  if (IsOneByteWhitespace(c)) {
    result |= kIsWhiteSpace | kIsWhiteSpaceOrLineTerminator;
  }
  if (c == '\r' || c == '\n') {
    result |= kIsWhiteSpaceOrLineTerminator | kMaybeLineEnd;
  }
  // Add markers to identify 0x2028 and 0x2029.
  if (c == static_cast<uint8_t>(0x2028) || c == static_cast<uint8_t>(0x2029)) {
    result |= kMaybeLineEnd;
  }
  return result;
}
const constexpr uint8_t kOneByteCharFlags[256] = {
#define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N),
126 127
    INT_0_TO_127_LIST(BUILD_CHAR_FLAGS)
#undef BUILD_CHAR_FLAGS
128 129 130
#define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N + 128),
        INT_0_TO_127_LIST(BUILD_CHAR_FLAGS)
#undef BUILD_CHAR_FLAGS
131 132
};

133
bool IsIdentifierStart(base::uc32 c) {
134
  if (!base::IsInRange(c, 0, 255)) return IsIdentifierStartSlow(c);
135
  DCHECK_EQ(IsIdentifierStartSlow(c),
136 137
            static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierStart));
  return kOneByteCharFlags[c] & kIsIdentifierStart;
138 139
}

140
bool IsIdentifierPart(base::uc32 c) {
141
  if (!base::IsInRange(c, 0, 255)) return IsIdentifierPartSlow(c);
142
  DCHECK_EQ(IsIdentifierPartSlow(c),
143 144
            static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierPart));
  return kOneByteCharFlags[c] & kIsIdentifierPart;
145 146
}

147
bool IsWhiteSpace(base::uc32 c) {
148
  if (!base::IsInRange(c, 0, 255)) return IsWhiteSpaceSlow(c);
149
  DCHECK_EQ(IsWhiteSpaceSlow(c),
150 151
            static_cast<bool>(kOneByteCharFlags[c] & kIsWhiteSpace));
  return kOneByteCharFlags[c] & kIsWhiteSpace;
152 153
}

154
bool IsWhiteSpaceOrLineTerminator(base::uc32 c) {
155
  if (!base::IsInRange(c, 0, 255)) return IsWhiteSpaceOrLineTerminatorSlow(c);
156 157
  DCHECK_EQ(
      IsWhiteSpaceOrLineTerminatorSlow(c),
158 159
      static_cast<bool>(kOneByteCharFlags[c] & kIsWhiteSpaceOrLineTerminator));
  return kOneByteCharFlags[c] & kIsWhiteSpaceOrLineTerminator;
160 161
}

162
bool IsLineTerminatorSequence(base::uc32 c, base::uc32 next) {
163 164 165 166 167 168
  if (kOneByteCharFlags[static_cast<uint8_t>(c)] & kMaybeLineEnd) {
    if (c == '\n') return true;
    if (c == '\r') return next != '\n';
    return base::IsInRange(static_cast<unsigned int>(c), 0x2028u, 0x2029u);
  }
  return false;
169
}
170

171
}  // namespace internal
172

173
}  // namespace v8
174

175
#endif  // V8_STRINGS_CHAR_PREDICATES_INL_H_