Commit 5dedb164 authored by yangguo's avatar yangguo Committed by Commit bot

[regexp] require exact match for unicode property names.

R=littledan@chromium.org
BUG=v8:4810
LOG=N

Review URL: https://codereview.chromium.org/1824613002

Cr-Commit-Position: refs/heads/master@{#34961}
parent 5ff7901e
......@@ -845,11 +845,32 @@ bool RegExpParser::ParseUnicodeEscape(uc32* value) {
}
#ifdef V8_I18N_SUPPORT
bool IsExactPropertyValueAlias(const char* property_name, UProperty property,
int32_t property_value) {
const char* short_name =
u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME);
if (short_name != NULL && strcmp(property_name, short_name) == 0) return true;
for (int i = 0;; i++) {
const char* long_name = u_getPropertyValueName(
property, property_value,
static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
if (long_name == NULL) break;
if (strcmp(property_name, long_name) == 0) return true;
}
return false;
}
bool LookupPropertyClass(UProperty property, const char* property_name,
ZoneList<CharacterRange>* result, Zone* zone) {
int32_t property_value = u_getPropertyValueEnum(property, property_name);
if (property_value == UCHAR_INVALID_CODE) return false;
// We require the property name to match exactly to one of the property value
// aliases. However, u_getPropertyValueEnum uses loose matching.
if (!IsExactPropertyValueAlias(property_name, property, property_value)) {
return false;
}
USet* set = uset_openEmpty();
UErrorCode ec = U_ZERO_ERROR;
uset_applyIntPropertyValue(set, property, property_value, &ec);
......
......@@ -8,29 +8,26 @@ function t(re, s) { assertTrue(re.test(s)); }
function f(re, s) { assertFalse(re.test(s)); }
t(/\p{InASCII}+/u, ".");
t(/\p{In ASCII}+/u, "supercalifragilisticexpialidocious");
t(/\p{In Basic Latin}+/u, ".");
t(/\p{InBasicLatin}+/u, "supercalifragilisticexpialidocious");
t(/\p{InASCII}+/u, "supercalifragilisticexpialidocious");
t(/\p{InBasic_Latin}+/u, ".");
t(/\p{InBasic_Latin}+/u, "supercalifragilisticexpialidocious");
t(/\p{InBasicLatin}+/u, ".");
t(/\p{InBasicLatin}+/u, "supercalifragilisticexpialidocious");
t(/\p{In CJK}+/u, "话说天下大势,分久必合,合久必分");
t(/\p{InCJK}+/u, "吾庄后有一桃园,花开正盛");
f(/\p{InCJKUnifiedIdeographs}+/u, "おはようございます");
f(/\p{In CJK unified ideographs}+/u,
t(/\p{InCJK}+/u, "话说天下大势,分久必合,合久必分");
t(/\p{InCJK_Unified_Ideographs}+/u, "吾庄后有一桃园,花开正盛");
f(/\p{InCJK}+/u, "おはようございます");
f(/\p{InCJK_Unified_Ideographs}+/u,
"Something is rotten in the state of Denmark");
t(/\p{InLatin_1}+/u, "Wie froh bin ich, daß ich weg bin!");
f(/\p{InASCII}+/u, "奔腾千里荡尘埃,渡水登山紫雾开");
f(/\p{In Latin 1}+/u, "いただきます");
f(/\p{InLatin_1_Supplement}+/u, "奔腾千里荡尘埃,渡水登山紫雾开");
f(/\p{InLatin_1_Sup}+/u, "いただきます");
t(/\p{InHiragana}/u, "いただきます");
t(/\p{Hiragana}/u, "\u{1b001}"); // This refers to the script "Hiragana".
f(/\p{InHiragana}/u, "\u{1b001}"); // This refers to the block "Hiragana".
t(/\p{InGreekAndCoptic}/u, "ἄνδρα μοι ἔννεπε, μοῦσα, πολύτροπον, ὃς μάλα πολλὰ");
t(/\p{InGreek_And_Coptic}/u,
"ἄνδρα μοι ἔννεπε, μοῦσα, πολύτροπον, ὃς μάλα πολλὰ");
t(/\p{InGreek}/u, "μῆνιν ἄειδε θεὰ Πηληϊάδεω Ἀχιλῆος");
assertThrows("/\\p{In}/u");
......
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-regexp-property --harmony-unicode-regexps
assertThrows("/\\p{In CJK}/u");
assertThrows("/\\p{InCJKUnifiedIdeographs}/u");
assertDoesNotThrow("/\\p{InCJK}/u");
assertDoesNotThrow("/\\p{InCJK_Unified_Ideographs}/u");
assertDoesNotThrow("/\\p{InCyrillic_Sup}/u");
assertDoesNotThrow("/\\p{InCyrillic_Supplement}/u");
assertDoesNotThrow("/\\p{InCyrillic_Supplementary}/u");
assertThrows("/\\p{InCyrillicSupplementary}/u");
assertThrows("/\\p{InCyrillic_supplementary}/u");
assertDoesNotThrow("/\\pC/u");
assertDoesNotThrow("/\\p{Other}/u");
assertDoesNotThrow("/\\p{Cc}/u");
assertDoesNotThrow("/\\p{Control}/u");
assertDoesNotThrow("/\\p{cntrl}/u");
assertDoesNotThrow("/\\p{M}/u");
assertDoesNotThrow("/\\p{Mark}/u");
assertDoesNotThrow("/\\p{Combining_Mark}/u");
assertThrows("/\\p{Combining Mark}/u");
assertDoesNotThrow("/\\p{Copt}/u");
assertDoesNotThrow("/\\p{Coptic}/u");
assertDoesNotThrow("/\\p{Qaac}/u");
assertDoesNotThrow("/\\p{Egyp}/u");
assertDoesNotThrow("/\\p{Egyptian_Hieroglyphs}/u");
assertThrows("/\\p{EgyptianHieroglyphs}/u");
......@@ -64,7 +64,4 @@ assertFalse(/\p{L}/u.test("\uA6EE"));
assertTrue(/\P{L}/u.test("\uA6EE"));
assertTrue(/\p{Lowercase_Letter}/u.test("a"));
assertTrue(/\p{LowercaseLetter}/u.test("a"));
assertTrue(/\p{Lowercaseletter}/u.test("a"));
assertTrue(/\p{lowercase letter}/u.test("a"));
assertTrue(/\p{lowercase letter}/u.test("a"));
assertTrue(/\p{Math_Symbol}/u.test("+"));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment