char-predicates.h 2.57 KB
Newer Older
1
// Copyright 2011 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4 5 6 7

#ifndef V8_CHAR_PREDICATES_H_
#define V8_CHAR_PREDICATES_H_

8
#include "src/unicode.h"
9

10 11
namespace v8 {
namespace internal {
12 13 14 15 16 17

// Unicode character predicates as defined by ECMA-262, 3rd,
// used for lexical analysis.

inline bool IsCarriageReturn(uc32 c);
inline bool IsLineFeed(uc32 c);
18 19
inline bool IsAsciiIdentifier(uc32 c);
inline bool IsAlphaNumeric(uc32 c);
20 21
inline bool IsDecimalDigit(uc32 c);
inline bool IsHexDigit(uc32 c);
22 23
inline bool IsOctalDigit(uc32 c);
inline bool IsBinaryDigit(uc32 c);
24 25
inline bool IsRegExpWord(uc32 c);
inline bool IsRegExpNewline(uc32 c);
26

27 28 29 30 31 32 33

struct SupplementaryPlanes {
  static bool IsIDStart(uc32 c);
  static bool IsIDPart(uc32 c);
};


34 35 36 37 38
// ES6 draft section 11.6
// This includes '_', '$' and '\', and ID_Start according to
// http://www.unicode.org/reports/tr31/, which consists of categories
// 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties
// 'Pattern_Syntax' or 'Pattern_White_Space'.
39
// For code points in the SMPs, we can resort to ICU (if available).
40
struct IdentifierStart {
41 42 43 44
  static inline bool Is(uc32 c) {
    if (c > 0xFFFF) return SupplementaryPlanes::IsIDStart(c);
    return unibrow::ID_Start::Is(c);
  }
45 46 47
};


48 49 50 51 52
// ES6 draft section 11.6
// This includes \u200c and \u200d, and ID_Continue according to
// http://www.unicode.org/reports/tr31/, which consists of ID_Start,
// the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties
// 'Pattern_Syntax' or 'Pattern_White_Space'.
53
// For code points in the SMPs, we can resort to ICU (if available).
54 55
struct IdentifierPart {
  static inline bool Is(uc32 c) {
56
    if (c > 0xFFFF) return SupplementaryPlanes::IsIDPart(c);
57
    return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c);
58 59 60
  }
};

61

62 63 64 65 66
// ES6 draft section 11.2
// This includes all code points of Unicode category 'Zs'.
// \u180e stops being one as of Unicode 6.3.0, but ES6 adheres to Unicode 5.1,
// so it is also included.
// Further included are \u0009, \u000b, \u0020, \u00a0, \u000c, and \ufeff.
67
// There are no category 'Zs' code points in the SMPs.
68
struct WhiteSpace {
69
  static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); }
70 71 72
};


73 74
// WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3
// This consists of \000a, \000d, \u2028, and \u2029.
75 76 77 78 79 80
struct WhiteSpaceOrLineTerminator {
  static inline bool Is(uc32 c) {
    return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c);
  }
};

81 82 83
} }  // namespace v8::internal

#endif  // V8_CHAR_PREDICATES_H_