Commit 2dba18ed authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[intl] Cleanup ascii char predicates

Reuse helpers and move generic functions to char-predicates

Change-Id: I63bf704bdd8e8cd788e80ad2d42b689527865e00
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1653118Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62116}
parent e1f91dba
...@@ -53,9 +53,8 @@ namespace v8 { ...@@ -53,9 +53,8 @@ namespace v8 {
namespace internal { namespace internal {
namespace { namespace {
inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; }
const uint8_t kToLower[256] = { constexpr uint8_t kToLower[256] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
...@@ -80,20 +79,17 @@ const uint8_t kToLower[256] = { ...@@ -80,20 +79,17 @@ const uint8_t kToLower[256] = {
0xFC, 0xFD, 0xFE, 0xFF, 0xFC, 0xFD, 0xFE, 0xFF,
}; };
inline uint16_t ToLatin1Lower(uint16_t ch) { inline constexpr uint16_t ToLatin1Lower(uint16_t ch) {
return static_cast<uint16_t>(kToLower[ch]); return static_cast<uint16_t>(kToLower[ch]);
} }
inline uint16_t ToASCIIUpper(uint16_t ch) {
return ch & ~((ch >= 'a' && ch <= 'z') << 5);
}
// Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF. // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
inline uint16_t ToLatin1Upper(uint16_t ch) { inline constexpr uint16_t ToLatin1Upper(uint16_t ch) {
#if V8_CAN_HAVE_DCHECK_IN_CONSTEXPR
DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF); DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
#endif
return ch & return ch &
~(((ch >= 'a' && ch <= 'z') || (((ch & 0xE0) == 0xE0) && ch != 0xF7)) ~((IsAsciiLower(ch) || (((ch & 0xE0) == 0xE0) && ch != 0xF7)) << 5);
<< 5);
} }
template <typename Char> template <typename Char>
...@@ -105,7 +101,7 @@ bool ToUpperFastASCII(const Vector<const Char>& src, ...@@ -105,7 +101,7 @@ bool ToUpperFastASCII(const Vector<const Char>& src,
for (auto it = src.begin(); it != src.end(); ++it) { for (auto it = src.begin(); it != src.end(); ++it) {
uint16_t ch = static_cast<uint16_t>(*it); uint16_t ch = static_cast<uint16_t>(*it);
ored |= ch; ored |= ch;
result->SeqOneByteStringSet(index++, ToASCIIUpper(ch)); result->SeqOneByteStringSet(index++, ToAsciiUpper(ch));
} }
return !(ored & ~0x7F); return !(ored & ~0x7F);
} }
...@@ -156,7 +152,7 @@ void ToUpperWithSharpS(const Vector<const Char>& src, ...@@ -156,7 +152,7 @@ void ToUpperWithSharpS(const Vector<const Char>& src,
inline int FindFirstUpperOrNonAscii(String s, int length) { inline int FindFirstUpperOrNonAscii(String s, int length) {
for (int index = 0; index < length; ++index) { for (int index = 0; index < length; ++index) {
uint16_t ch = s.Get(index); uint16_t ch = s.Get(index);
if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) { if (V8_UNLIKELY(IsAsciiUpper(ch) || ch & ~0x7F)) {
return index; return index;
} }
} }
...@@ -684,19 +680,10 @@ V8_WARN_UNUSED_RESULT Maybe<bool> Intl::GetBoolOption( ...@@ -684,19 +680,10 @@ V8_WARN_UNUSED_RESULT Maybe<bool> Intl::GetBoolOption(
namespace { namespace {
char AsciiToLower(char c) {
if (c < 'A' || c > 'Z') {
return c;
}
return c | (1 << 5);
}
bool IsLowerAscii(char c) { return c >= 'a' && c < 'z'; }
bool IsTwoLetterLanguage(const std::string& locale) { bool IsTwoLetterLanguage(const std::string& locale) {
// Two letters, both in range 'a'-'z'... // Two letters, both in range 'a'-'z'...
return locale.length() == 2 && IsLowerAscii(locale[0]) && return locale.length() == 2 && IsAsciiLower(locale[0]) &&
IsLowerAscii(locale[1]); IsAsciiLower(locale[1]);
} }
bool IsDeprecatedLanguage(const std::string& locale) { bool IsDeprecatedLanguage(const std::string& locale) {
...@@ -771,7 +758,7 @@ Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate, ...@@ -771,7 +758,7 @@ Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate,
// Because per BCP 47 2.1.1 language tags are case-insensitive, lowercase // Because per BCP 47 2.1.1 language tags are case-insensitive, lowercase
// the input before any more check. // the input before any more check.
std::transform(locale.begin(), locale.end(), locale.begin(), AsciiToLower); std::transform(locale.begin(), locale.end(), locale.begin(), ToAsciiLower);
// ICU maps a few grandfathered tags to what looks like a regular language // ICU maps a few grandfathered tags to what looks like a regular language
// tag even though IANA language tag registry does not have a preferred // tag even though IANA language tag registry does not have a preferred
......
...@@ -51,6 +51,18 @@ inline constexpr bool IsBinaryDigit(uc32 c) { ...@@ -51,6 +51,18 @@ inline constexpr bool IsBinaryDigit(uc32 c) {
return c == '0' || c == '1'; return c == '0' || c == '1';
} }
inline constexpr bool IsAsciiLower(uc32 c) { return IsInRange(c, 'a', 'z'); }
inline constexpr bool IsAsciiUpper(uc32 c) { return IsInRange(c, 'A', 'Z'); }
inline constexpr uc32 ToAsciiUpper(uc32 c) {
return c & ~(IsAsciiLower(c) << 5);
}
inline constexpr uc32 ToAsciiLower(uc32 c) {
return c | (IsAsciiUpper(c) << 5);
}
inline constexpr bool IsRegExpWord(uc16 c) { inline constexpr bool IsRegExpWord(uc16 c) {
return IsInRange(AsciiAlphaToLower(c), 'a', 'z') || IsDecimalDigit(c) || return IsInRange(AsciiAlphaToLower(c), 'a', 'z') || IsDecimalDigit(c) ||
(c == '_'); (c == '_');
......
...@@ -26,6 +26,12 @@ inline constexpr bool IsBinaryDigit(uc32 c); ...@@ -26,6 +26,12 @@ inline constexpr bool IsBinaryDigit(uc32 c);
inline constexpr bool IsRegExpWord(uc32 c); inline constexpr bool IsRegExpWord(uc32 c);
inline constexpr bool IsRegExpNewline(uc32 c); inline constexpr bool IsRegExpNewline(uc32 c);
inline constexpr bool IsAsciiLower(uc32 ch);
inline constexpr bool IsAsciiUpper(uc32 ch);
inline constexpr uc32 ToAsciiUpper(uc32 ch);
inline constexpr uc32 ToAsciiLower(uc32 ch);
// ES#sec-names-and-keywords // ES#sec-names-and-keywords
// This includes '_', '$' and '\', and ID_Start according to // This includes '_', '$' and '\', and ID_Start according to
// http://www.unicode.org/reports/tr31/, which consists of categories // http://www.unicode.org/reports/tr31/, which consists of categories
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment