Commit e0d0c96a authored by yangguo's avatar yangguo Committed by Commit bot

[regexp] experimental implementation for \p property class.

Currently we only support general categories for property classes.

R=erik.corry@gmail.com, erikcorry@chromium.org, littledan@chromium.org
BUG=v8:4743
LOG=N

Review URL: https://codereview.chromium.org/1685593002

Cr-Commit-Position: refs/heads/master@{#33872}
parent 99a58d30
......@@ -2317,6 +2317,7 @@ EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_regexps)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_unicode_regexps)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_do_expressions)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_regexp_lookbehind)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_regexp_property)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_function_name)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_function_sent)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(promise_extra)
......@@ -2961,6 +2962,7 @@ bool Genesis::InstallExperimentalNatives() {
static const char* harmony_do_expressions_natives[] = {nullptr};
static const char* harmony_regexp_subclass_natives[] = {nullptr};
static const char* harmony_regexp_lookbehind_natives[] = {nullptr};
static const char* harmony_regexp_property_natives[] = {nullptr};
static const char* harmony_function_name_natives[] = {nullptr};
static const char* harmony_function_sent_natives[] = {nullptr};
static const char* promise_extra_natives[] = {"native promise-extra.js",
......
......@@ -213,7 +213,8 @@ DEFINE_IMPLICATION(es_staging, move_object_start)
V(harmony_tailcalls, "harmony tail calls") \
V(harmony_object_values_entries, "harmony Object.values / Object.entries") \
V(harmony_object_own_property_descriptors, \
"harmony Object.getOwnPropertyDescriptors()")
"harmony Object.getOwnPropertyDescriptors()") \
V(harmony_regexp_property, "harmony unicode regexp property classes")
// Features that are complete (but still behind --harmony/es-staging flag).
#define HARMONY_STAGED(V) \
......
......@@ -352,6 +352,23 @@ RegExpTree* RegExpParser::ParseDisjunction() {
builder->AddCharacterClass(cc);
break;
}
case 'p':
case 'P': {
uc32 p = Next();
Advance(2);
if (unicode()) {
ZoneList<CharacterRange>* ranges = ParsePropertyClass();
if (ranges == nullptr) {
return ReportError(CStrVector("Invalid property name"));
}
RegExpCharacterClass* cc =
new (zone()) RegExpCharacterClass(ranges, p == 'P');
builder->AddCharacterClass(cc);
} else {
builder->AddCharacter(p);
}
break;
}
case '1':
case '2':
case '3':
......@@ -801,6 +818,55 @@ bool RegExpParser::ParseUnicodeEscape(uc32* value) {
return ParseHexEscape(4, value);
}
ZoneList<CharacterRange>* RegExpParser::ParsePropertyClass() {
#ifdef V8_I18N_SUPPORT
char property_name[3];
memset(property_name, 0, sizeof(property_name));
if (current() == '{') {
Advance();
if (current() < 'A' || current() > 'Z') return nullptr;
property_name[0] = static_cast<char>(current());
Advance();
if (current() >= 'a' && current() <= 'z') {
property_name[1] = static_cast<char>(current());
Advance();
}
if (current() != '}') return nullptr;
} else if (current() >= 'A' && current() <= 'Z') {
property_name[0] = static_cast<char>(current());
} else {
return nullptr;
}
Advance();
int32_t category =
u_getPropertyValueEnum(UCHAR_GENERAL_CATEGORY_MASK, property_name);
if (category == UCHAR_INVALID_CODE) return nullptr;
USet* set = uset_openEmpty();
UErrorCode ec = U_ZERO_ERROR;
uset_applyIntPropertyValue(set, UCHAR_GENERAL_CATEGORY_MASK, category, &ec);
ZoneList<CharacterRange>* ranges = nullptr;
if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) {
uset_removeAllStrings(set);
int item_count = uset_getItemCount(set);
ranges = new (zone()) ZoneList<CharacterRange>(item_count, zone());
int item_result = 0;
for (int i = 0; i < item_count; i++) {
uc32 start = 0;
uc32 end = 0;
item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
ranges->Add(CharacterRange::Range(start, end), zone());
}
DCHECK_EQ(U_ZERO_ERROR, ec);
DCHECK_EQ(0, item_result);
}
uset_close(set);
return ranges;
#else // V8_I18N_SUPPORT
return nullptr;
#endif // V8_I18N_SUPPORT
}
bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) {
uc32 x = 0;
......
......@@ -173,6 +173,7 @@ class RegExpParser BASE_EMBEDDED {
bool ParseHexEscape(int length, uc32* value);
bool ParseUnicodeEscape(uc32* value);
bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value);
ZoneList<CharacterRange>* ParsePropertyClass();
uc32 ParseOctalLiteral();
......
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-regexp-property --harmony-unicode-regexps
assertThrows("/\\p/u");
assertThrows("/\\p{garbage}/u");
assertThrows("/\\p{}/u");
assertThrows("/\\p{/u");
assertThrows("/\\p}/u");
assertThrows("/\p{Math}/u");
assertThrows("/\p{Bidi_M}/u");
assertThrows("/\p{Hex}/u");
assertTrue(/\p{Ll}/u.test("a"));
assertFalse(/\P{Ll}/u.test("a"));
assertTrue(/\P{Ll}/u.test("A"));
assertFalse(/\p{Ll}/u.test("A"));
assertTrue(/\p{Ll}/u.test("\u{1D7BE}"));
assertFalse(/\P{Ll}/u.test("\u{1D7BE}"));
assertFalse(/\p{Ll}/u.test("\u{1D5E3}"));
assertTrue(/\P{Ll}/u.test("\u{1D5E3}"));
assertTrue(/\p{Ll}/iu.test("a"));
assertTrue(/\p{Ll}/iu.test("\u{118D4}"));
assertTrue(/\p{Ll}/iu.test("A"));
assertTrue(/\p{Ll}/iu.test("\u{118B4}"));
assertFalse(/\P{Ll}/iu.test("a"));
assertFalse(/\P{Ll}/iu.test("\u{118D4}"));
assertFalse(/\P{Ll}/iu.test("A"));
assertFalse(/\P{Ll}/iu.test("\u{118B4}"));
assertTrue(/\p{Lu}/u.test("A"));
assertFalse(/\P{Lu}/u.test("A"));
assertTrue(/\P{Lu}/u.test("a"));
assertFalse(/\p{Lu}/u.test("a"));
assertTrue(/\p{Lu}/u.test("\u{1D5E3}"));
assertFalse(/\P{Lu}/u.test("\u{1D5E3}"));
assertFalse(/\p{Lu}/u.test("\u{1D7BE}"));
assertTrue(/\P{Lu}/u.test("\u{1D7BE}"));
assertTrue(/\p{Lu}/iu.test("a"));
assertTrue(/\p{Lu}/iu.test("\u{118D4}"));
assertTrue(/\p{Lu}/iu.test("A"));
assertTrue(/\p{Lu}/iu.test("\u{118B4}"));
assertFalse(/\P{Lu}/iu.test("a"));
assertFalse(/\P{Lu}/iu.test("\u{118D4}"));
assertFalse(/\P{Lu}/iu.test("A"));
assertFalse(/\P{Lu}/iu.test("\u{118B4}"));
assertTrue(/\p{Sm}/u.test("+"));
assertFalse(/\P{Sm}/u.test("+"));
assertTrue(/\p{Sm}/u.test("\u{1D6C1}"));
assertFalse(/\P{Sm}/u.test("\u{1D6C1}"));
assertTrue(/\pL/u.test("a"));
assertFalse(/\PL/u.test("a"));
assertFalse(/\pL/u.test("1"));
assertTrue(/\PL/u.test("1"));
assertTrue(/\pL/u.test("\u1FAB"));
assertFalse(/\PL/u.test("\u1FAB"));
assertFalse(/\p{L}/u.test("\uA6EE"));
assertTrue(/\P{L}/u.test("\uA6EE"));
......@@ -290,9 +290,11 @@
# TODO(titzer): SSE 4.1 required for asm-wasm test (floor).
'wasm/asm-wasm': [SKIP],
# case-insensitive unicode regexp relies on case mapping provided by ICU.
# case-insensitive unicode regexp relies on case mapping provided by ICU.
'harmony/unicode-regexp-ignore-case': [PASS, ['no_i18n == True', FAIL]],
'harmony/unicode-regexp-ignore-case-noi18n': [FAIL, ['no_i18n == True', PASS]],
# desugaring regexp property class relies on ICU.
'harmony/unicode-regexp-property-class': [PASS, ['no_i18n == True', FAIL]],
}], # ALWAYS
['novfp3 == True', {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment