char-predicates.h 3.08 KB
Newer Older
1
// Copyright 2011 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4

5 6
#ifndef V8_STRINGS_CHAR_PREDICATES_H_
#define V8_STRINGS_CHAR_PREDICATES_H_
7

8
#include "src/common/globals.h"
9
#include "src/strings/unicode.h"
10

11 12
namespace v8 {
namespace internal {
13 14 15 16

// Unicode character predicates as defined by ECMA-262, 3rd,
// used for lexical analysis.

17 18 19 20 21 22 23 24 25 26 27
inline constexpr int AsciiAlphaToLower(uc32 c);
inline constexpr bool IsCarriageReturn(uc32 c);
inline constexpr bool IsLineFeed(uc32 c);
inline constexpr bool IsAsciiIdentifier(uc32 c);
inline constexpr bool IsAlphaNumeric(uc32 c);
inline constexpr bool IsDecimalDigit(uc32 c);
inline constexpr bool IsHexDigit(uc32 c);
inline constexpr bool IsOctalDigit(uc32 c);
inline constexpr bool IsBinaryDigit(uc32 c);
inline constexpr bool IsRegExpWord(uc32 c);
inline constexpr bool IsRegExpNewline(uc32 c);
28

29 30 31 32 33 34
inline constexpr bool IsAsciiLower(uc32 ch);
inline constexpr bool IsAsciiUpper(uc32 ch);

inline constexpr uc32 ToAsciiUpper(uc32 ch);
inline constexpr uc32 ToAsciiLower(uc32 ch);

35
// ES#sec-names-and-keywords
36 37 38 39
// This includes '_', '$' and '\', and ID_Start according to
// http://www.unicode.org/reports/tr31/, which consists of categories
// 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties
// 'Pattern_Syntax' or 'Pattern_White_Space'.
40
inline bool IsIdentifierStart(uc32 c);
41
#ifdef V8_INTL_SUPPORT
42
V8_EXPORT_PRIVATE bool IsIdentifierStartSlow(uc32 c);
43
#else
44
inline bool IsIdentifierStartSlow(uc32 c) {
45
  // Non-BMP characters are not supported without I18N.
46 47
  return (c <= 0xFFFF) ? unibrow::ID_Start::Is(c) : false;
}
48
#endif
49

50
// ES#sec-names-and-keywords
51 52 53 54
// This includes \u200c and \u200d, and ID_Continue according to
// http://www.unicode.org/reports/tr31/, which consists of ID_Start,
// the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties
// 'Pattern_Syntax' or 'Pattern_White_Space'.
55
inline bool IsIdentifierPart(uc32 c);
56
#ifdef V8_INTL_SUPPORT
57
V8_EXPORT_PRIVATE bool IsIdentifierPartSlow(uc32 c);
58
#else
59 60 61 62
inline bool IsIdentifierPartSlow(uc32 c) {
  // Non-BMP charaacters are not supported without I18N.
  if (c <= 0xFFFF) {
    return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c);
63
  }
64 65
  return false;
}
66
#endif
67

68 69
// ES6 draft section 11.2
// This includes all code points of Unicode category 'Zs'.
70
// Further included are \u0009, \u000b, \u000c, and \ufeff.
71
inline bool IsWhiteSpace(uc32 c);
72
#ifdef V8_INTL_SUPPORT
73
V8_EXPORT_PRIVATE bool IsWhiteSpaceSlow(uc32 c);
74
#else
75
inline bool IsWhiteSpaceSlow(uc32 c) { return unibrow::WhiteSpace::Is(c); }
76
#endif
77

78
// WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3
79 80
// This includes all the characters with Unicode category 'Z' (= Zs+Zl+Zp)
// as well as \u0009 - \u000d and \ufeff.
81 82 83 84 85 86
inline bool IsWhiteSpaceOrLineTerminator(uc32 c);
inline bool IsWhiteSpaceOrLineTerminatorSlow(uc32 c) {
  return IsWhiteSpaceSlow(c) || unibrow::IsLineTerminator(c);
}

inline bool IsLineTerminatorSequence(uc32 c, uc32 next);
87

88 89
}  // namespace internal
}  // namespace v8
90

91
#endif  // V8_STRINGS_CHAR_PREDICATES_H_