char-predicates-unittest.cc 6.82 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/char-predicates.h"
#include "src/unicode.h"
#include "testing/gtest/include/gtest/gtest.h"

namespace v8 {
namespace internal {

12
TEST(CharPredicatesTest, WhiteSpace) {
13 14 15 16 17
  EXPECT_TRUE(WhiteSpace::Is(0x0009));
  EXPECT_TRUE(WhiteSpace::Is(0x000B));
  EXPECT_TRUE(WhiteSpace::Is(0x000C));
  EXPECT_TRUE(WhiteSpace::Is(' '));
  EXPECT_TRUE(WhiteSpace::Is(0x00A0));
18 19 20 21 22 23
  EXPECT_TRUE(WhiteSpace::Is(0x1680));
  EXPECT_TRUE(WhiteSpace::Is(0x2000));
  EXPECT_TRUE(WhiteSpace::Is(0x2007));
  EXPECT_TRUE(WhiteSpace::Is(0x202F));
  EXPECT_TRUE(WhiteSpace::Is(0x205F));
  EXPECT_TRUE(WhiteSpace::Is(0x3000));
24
  EXPECT_TRUE(WhiteSpace::Is(0xFEFF));
25
  EXPECT_FALSE(WhiteSpace::Is(0x180E));
26 27 28
}


29
TEST(CharPredicatesTest, WhiteSpaceOrLineTerminator) {
30 31 32 33 34
  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x0009));
  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000B));
  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000C));
  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(' '));
  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x00A0));
35 36 37 38 39
  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x1680));
  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2000));
  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2007));
  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x202F));
  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x205F));
40 41 42 43 44 45
  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0xFEFF));
  // Line terminators
  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000A));
  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000D));
  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2028));
  EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2029));
46
  EXPECT_FALSE(WhiteSpaceOrLineTerminator::Is(0x180E));
47 48 49
}


50
TEST(CharPredicatesTest, IdentifierStart) {
51 52 53 54 55
  EXPECT_TRUE(IdentifierStart::Is('$'));
  EXPECT_TRUE(IdentifierStart::Is('_'));
  EXPECT_TRUE(IdentifierStart::Is('\\'));

  // http://www.unicode.org/reports/tr31/
56 57
  // curl http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt |
  // grep 'Other_ID_Start'
58
  // Other_ID_Start
59 60
  EXPECT_TRUE(IdentifierStart::Is(0x1885));
  EXPECT_TRUE(IdentifierStart::Is(0x1886));
61 62 63 64 65 66 67
  EXPECT_TRUE(IdentifierStart::Is(0x2118));
  EXPECT_TRUE(IdentifierStart::Is(0x212E));
  EXPECT_TRUE(IdentifierStart::Is(0x309B));
  EXPECT_TRUE(IdentifierStart::Is(0x309C));

  // Issue 2892:
  // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start.
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
  EXPECT_FALSE(IdentifierStart::Is(0x2E2F));

#ifdef V8_INTL_SUPPORT
  // New in Unicode 8.0 (6,847 code points)
  // [:ID_Start:] & [[:Age=8.0:] - [:Age=7.0:]]
  EXPECT_TRUE(IdentifierStart::Is(0x08B3));
  EXPECT_TRUE(IdentifierStart::Is(0x0AF9));
  EXPECT_TRUE(IdentifierStart::Is(0x13F8));
  EXPECT_TRUE(IdentifierStart::Is(0x9FCD));
  EXPECT_TRUE(IdentifierStart::Is(0xAB60));
  EXPECT_TRUE(IdentifierStart::Is(0x10CC0));
  EXPECT_TRUE(IdentifierStart::Is(0x108E0));
  EXPECT_TRUE(IdentifierStart::Is(0x2B820));

  // New in Unicode 9.0 (7,177 code points)
  // [:ID_Start:] & [[:Age=9.0:] - [:Age=8.0:]]

  EXPECT_TRUE(IdentifierStart::Is(0x1C80));
  EXPECT_TRUE(IdentifierStart::Is(0x104DB));
  EXPECT_TRUE(IdentifierStart::Is(0x1E922));
#endif
89 90 91
}


92
TEST(CharPredicatesTest, IdentifierPart) {
93 94 95 96 97 98
  EXPECT_TRUE(IdentifierPart::Is('$'));
  EXPECT_TRUE(IdentifierPart::Is('_'));
  EXPECT_TRUE(IdentifierPart::Is('\\'));
  EXPECT_TRUE(IdentifierPart::Is(0x200C));
  EXPECT_TRUE(IdentifierPart::Is(0x200D));

99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
#ifdef V8_INTL_SUPPORT
  // New in Unicode 8.0 (6,847 code points)
  // [:ID_Start:] & [[:Age=8.0:] - [:Age=7.0:]]
  EXPECT_TRUE(IdentifierPart::Is(0x08B3));
  EXPECT_TRUE(IdentifierPart::Is(0x0AF9));
  EXPECT_TRUE(IdentifierPart::Is(0x13F8));
  EXPECT_TRUE(IdentifierPart::Is(0x9FCD));
  EXPECT_TRUE(IdentifierPart::Is(0xAB60));
  EXPECT_TRUE(IdentifierPart::Is(0x10CC0));
  EXPECT_TRUE(IdentifierPart::Is(0x108E0));
  EXPECT_TRUE(IdentifierPart::Is(0x2B820));

  // [[:ID_Continue:]-[:ID_Start:]] &  [[:Age=8.0:]-[:Age=7.0:]]
  // 162 code points
  EXPECT_TRUE(IdentifierPart::Is(0x08E3));
  EXPECT_TRUE(IdentifierPart::Is(0xA69E));
  EXPECT_TRUE(IdentifierPart::Is(0x11730));

  // New in Unicode 9.0 (7,177 code points)
  // [:ID_Start:] & [[:Age=9.0:] - [:Age=8.0:]]
  EXPECT_TRUE(IdentifierPart::Is(0x1C80));
  EXPECT_TRUE(IdentifierPart::Is(0x104DB));
  EXPECT_TRUE(IdentifierPart::Is(0x1E922));

  // [[:ID_Continue:]-[:ID_Start:]] &  [[:Age=9.0:]-[:Age=8.0:]]
  // 162 code points
  EXPECT_TRUE(IdentifierPart::Is(0x08D4));
  EXPECT_TRUE(IdentifierPart::Is(0x1DFB));
  EXPECT_TRUE(IdentifierPart::Is(0xA8C5));
  EXPECT_TRUE(IdentifierPart::Is(0x11450));
#endif

131
  // http://www.unicode.org/reports/tr31/
132 133 134
  // curl http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt |
  // grep 'Other_ID_(Continue|Start)'

135
  // Other_ID_Start
136 137
  EXPECT_TRUE(IdentifierPart::Is(0x1885));
  EXPECT_TRUE(IdentifierPart::Is(0x1886));
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
  EXPECT_TRUE(IdentifierPart::Is(0x2118));
  EXPECT_TRUE(IdentifierPart::Is(0x212E));
  EXPECT_TRUE(IdentifierPart::Is(0x309B));
  EXPECT_TRUE(IdentifierPart::Is(0x309C));

  // Other_ID_Continue
  EXPECT_TRUE(IdentifierPart::Is(0x00B7));
  EXPECT_TRUE(IdentifierPart::Is(0x0387));
  EXPECT_TRUE(IdentifierPart::Is(0x1369));
  EXPECT_TRUE(IdentifierPart::Is(0x1370));
  EXPECT_TRUE(IdentifierPart::Is(0x1371));
  EXPECT_TRUE(IdentifierPart::Is(0x19DA));

  // Issue 2892:
  // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start.
  EXPECT_FALSE(IdentifierPart::Is(0x2E2F));
}

156
#ifdef V8_INTL_SUPPORT
157
TEST(CharPredicatesTest, SupplementaryPlaneIdentifiers) {
158 159 160 161 162 163 164 165 166 167 168 169
  // Both ID_Start and ID_Continue.
  EXPECT_TRUE(IdentifierStart::Is(0x10403));  // Category Lu
  EXPECT_TRUE(IdentifierPart::Is(0x10403));
  EXPECT_TRUE(IdentifierStart::Is(0x1043C));  // Category Ll
  EXPECT_TRUE(IdentifierPart::Is(0x1043C));
  EXPECT_TRUE(IdentifierStart::Is(0x16F9C));  // Category Lm
  EXPECT_TRUE(IdentifierPart::Is(0x16F9C));
  EXPECT_TRUE(IdentifierStart::Is(0x10048));  // Category Lo
  EXPECT_TRUE(IdentifierPart::Is(0x10048));
  EXPECT_TRUE(IdentifierStart::Is(0x1014D));  // Category Nl
  EXPECT_TRUE(IdentifierPart::Is(0x1014D));

170 171 172 173 174
  // New in Unicode 8.0
  // [ [:ID_Start=Yes:] & [:Age=8.0:]] - [:Age=7.0:]
  EXPECT_TRUE(IdentifierStart::Is(0x108E0));
  EXPECT_TRUE(IdentifierStart::Is(0x10C80));

175 176 177 178 179 180 181 182 183 184 185 186 187 188
  // Only ID_Continue.
  EXPECT_FALSE(IdentifierStart::Is(0x101FD));  // Category Mn
  EXPECT_TRUE(IdentifierPart::Is(0x101FD));
  EXPECT_FALSE(IdentifierStart::Is(0x11002));  // Category Mc
  EXPECT_TRUE(IdentifierPart::Is(0x11002));
  EXPECT_FALSE(IdentifierStart::Is(0x104A9));  // Category Nd
  EXPECT_TRUE(IdentifierPart::Is(0x104A9));

  // Neither.
  EXPECT_FALSE(IdentifierStart::Is(0x10111));  // Category No
  EXPECT_FALSE(IdentifierPart::Is(0x10111));
  EXPECT_FALSE(IdentifierStart::Is(0x1F4A9));  // Category So
  EXPECT_FALSE(IdentifierPart::Is(0x1F4A9));
}
189
#endif  // V8_INTL_SUPPORT
190

191 192
}  // namespace internal
}  // namespace v8