Commit be271454 authored by Leszek Swirski's avatar Leszek Swirski Committed by Commit Bot

Revert "[regexp] implement regexp property sequence proposal"

This reverts commit f4c14fd9.

Reason for revert: Breaks noi18n build

Original change's description:
> [regexp] implement regexp property sequence proposal
> 
> Also-By: mathias@chromium.org
> Bug: v8:7467
> Change-Id: I9fd6e61f4da1097c2375f671b4801e9730f792c4
> Reviewed-on: https://chromium-review.googlesource.com/1227974
> Commit-Queue: Yang Guo <yangguo@chromium.org>
> Reviewed-by: Jakob Gruber <jgruber@chromium.org>
> Reviewed-by: Mathias Bynens <mathias@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#55991}

TBR=yangguo@chromium.org,jgruber@chromium.org,mathias@chromium.org

Change-Id: I10c67ad3ade35af920d32a7eea8ae0297677fa07
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: v8:7467
Reviewed-on: https://chromium-review.googlesource.com/1230137Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Commit-Queue: Leszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/master@{#55996}
parent f2911319
......@@ -2360,8 +2360,6 @@ v8_source_set("v8_base") {
"src/regexp/jsregexp-inl.h",
"src/regexp/jsregexp.cc",
"src/regexp/jsregexp.h",
"src/regexp/property-sequences.cc",
"src/regexp/property-sequences.h",
"src/regexp/regexp-ast.cc",
"src/regexp/regexp-ast.h",
"src/regexp/regexp-macro-assembler-irregexp-inl.h",
......
......@@ -4767,8 +4767,6 @@ void Genesis::InitializeGlobal_harmony_intl_relative_time_format() {
#endif // V8_INTL_SUPPORT
void Genesis::InitializeGlobal_harmony_regexp_sequence() {}
Handle<JSFunction> Genesis::CreateArrayBuffer(
Handle<String> name, ArrayBufferKind array_buffer_kind) {
// Create the %ArrayBufferPrototype%
......
......@@ -212,8 +212,7 @@ DEFINE_IMPLICATION(harmony_class_fields, harmony_private_fields)
#define HARMONY_INPROGRESS_BASE(V) \
V(harmony_do_expressions, "harmony do-expressions") \
V(harmony_class_fields, "harmony fields in class literals") \
V(harmony_await_optimization, "harmony await taking 1 tick") \
V(harmony_regexp_sequence, "RegExp Unicode sequence properties")
V(harmony_await_optimization, "harmony await taking 1 tick")
#ifdef V8_INTL_SUPPORT
#define HARMONY_INPROGRESS(V) \
......
This diff is collapsed.
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_REGEXP_PROPERTY_SEQUENCES_H_
#define V8_REGEXP_PROPERTY_SEQUENCES_H_
#ifdef V8_INTL_SUPPORT
#include "src/globals.h"
namespace v8 {
namespace internal {
class UnicodePropertySequences : public AllStatic {
public:
static const uc32 kEmojiFlagSequences[];
static const uc32 kEmojiTagSequences[];
static const uc32 kEmojiZWJSequences[];
};
} // namespace internal
} // namespace v8
#endif // V8_INTL_SUPPORT
#endif // V8_REGEXP_PROPERTY_SEQUENCES_H_
......@@ -12,7 +12,6 @@
#include "src/objects-inl.h"
#include "src/ostreams.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/property-sequences.h"
#include "src/utils.h"
#ifdef V8_INTL_SUPPORT
......@@ -345,23 +344,13 @@ RegExpTree* RegExpParser::ParseDisjunction() {
if (unicode()) {
ZoneList<CharacterRange>* ranges =
new (zone()) ZoneList<CharacterRange>(2, zone());
std::vector<char> name_1, name_2;
if (ParsePropertyClassName(&name_1, &name_2)) {
if (AddPropertyClassRange(ranges, p == 'P', name_1, name_2)) {
if (!ParsePropertyClass(ranges, p == 'P')) {
return ReportError(CStrVector("Invalid property name"));
}
RegExpCharacterClass* cc = new (zone())
RegExpCharacterClass(zone(), ranges, builder->flags());
builder->AddCharacterClass(cc);
break;
}
if (p == 'p' && name_2.empty()) {
RegExpTree* sequence = GetPropertySequence(name_1);
if (sequence != nullptr) {
builder->AddAtom(sequence);
break;
}
}
}
return ReportError(CStrVector("Invalid property name"));
} else {
builder->AddCharacter(p);
}
......@@ -1357,10 +1346,8 @@ bool IsUnicodePropertyValueCharacter(char c) {
} // anonymous namespace
bool RegExpParser::ParsePropertyClassName(std::vector<char>* name_1,
std::vector<char>* name_2) {
DCHECK(name_1->empty());
DCHECK(name_2->empty());
bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
bool negate) {
// Parse the property class as follows:
// - In \p{name}, 'name' is interpreted
// - either as a general category property value name.
......@@ -1369,58 +1356,55 @@ bool RegExpParser::ParsePropertyClassName(std::vector<char>* name_1,
// and 'value' is interpreted as one of the available property value names.
// - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used.
// - Loose matching is not applied.
std::vector<char> first_part;
std::vector<char> second_part;
if (current() == '{') {
// Parse \p{[PropertyName=]PropertyNameValue}
for (Advance(); current() != '}' && current() != '='; Advance()) {
if (!IsUnicodePropertyValueCharacter(current())) return false;
if (!has_next()) return false;
name_1->push_back(static_cast<char>(current()));
first_part.push_back(static_cast<char>(current()));
}
if (current() == '=') {
for (Advance(); current() != '}'; Advance()) {
if (!IsUnicodePropertyValueCharacter(current())) return false;
if (!has_next()) return false;
name_2->push_back(static_cast<char>(current()));
second_part.push_back(static_cast<char>(current()));
}
name_2->push_back(0); // null-terminate string.
second_part.push_back(0); // null-terminate string.
}
} else {
return false;
}
Advance();
name_1->push_back(0); // null-terminate string.
first_part.push_back(0); // null-terminate string.
DCHECK(name_1->size() - 1 == std::strlen(name_1->data()));
DCHECK(name_2->empty() || name_2->size() - 1 == std::strlen(name_2->data()));
return true;
}
DCHECK(first_part.size() - 1 == std::strlen(first_part.data()));
DCHECK(second_part.empty() ||
second_part.size() - 1 == std::strlen(second_part.data()));
bool RegExpParser::AddPropertyClassRange(ZoneList<CharacterRange>* add_to,
bool negate,
const std::vector<char>& name_1,
const std::vector<char>& name_2) {
if (name_2.empty()) {
if (second_part.empty()) {
// First attempt to interpret as general category property value name.
const char* name = name_1.data();
const char* name = first_part.data();
if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate,
add_to, zone())) {
result, zone())) {
return true;
}
// Interpret "Any", "ASCII", and "Assigned".
if (LookupSpecialPropertyValueName(name, add_to, negate, zone())) {
if (LookupSpecialPropertyValueName(name, result, negate, zone())) {
return true;
}
// Then attempt to interpret as binary property name with value name 'Y'.
UProperty property = u_getPropertyEnum(name);
if (!IsSupportedBinaryProperty(property)) return false;
if (!IsExactPropertyAlias(name, property)) return false;
return LookupPropertyValueName(property, negate ? "N" : "Y", false, add_to,
return LookupPropertyValueName(property, negate ? "N" : "Y", false, result,
zone());
} else {
// Both property name and value name are specified. Attempt to interpret
// the property name as enumerated property.
const char* property_name = name_1.data();
const char* value_name = name_2.data();
const char* property_name = first_part.data();
const char* value_name = second_part.data();
UProperty property = u_getPropertyEnum(property_name);
if (!IsExactPropertyAlias(property_name, property)) return false;
if (property == UCHAR_GENERAL_CATEGORY) {
......@@ -1430,88 +1414,18 @@ bool RegExpParser::AddPropertyClassRange(ZoneList<CharacterRange>* add_to,
property != UCHAR_SCRIPT_EXTENSIONS) {
return false;
}
return LookupPropertyValueName(property, value_name, negate, add_to,
return LookupPropertyValueName(property, value_name, negate, result,
zone());
}
}
RegExpTree* RegExpParser::GetPropertySequence(const std::vector<char>& name_1) {
if (!FLAG_harmony_regexp_sequence) return nullptr;
const char* name = name_1.data();
const uc32* sequence_list = nullptr;
JSRegExp::Flags flags = JSRegExp::kUnicode;
if (NameEquals(name, "Emoji_Flag_Sequence")) {
sequence_list = UnicodePropertySequences::kEmojiFlagSequences;
} else if (NameEquals(name, "Emoji_Tag_Sequence")) {
sequence_list = UnicodePropertySequences::kEmojiTagSequences;
} else if (NameEquals(name, "Emoji_ZWJ_Sequence")) {
sequence_list = UnicodePropertySequences::kEmojiZWJSequences;
}
if (sequence_list != nullptr) {
// TODO(yangguo): this creates huge regexp code. Alternative to this is
// to create a new operator that checks for these sequences at runtime.
RegExpBuilder builder(zone(), flags);
while (true) { // Iterate through list of sequences.
while (*sequence_list != 0) { // Iterate through sequence.
builder.AddUnicodeCharacter(*sequence_list);
sequence_list++;
}
sequence_list++;
if (*sequence_list == 0) break;
builder.NewAlternative();
}
return builder.ToRegExp();
}
if (NameEquals(name, "Emoji_Keycap_Sequence")) {
// https://unicode.org/reports/tr51/#def_emoji_keycap_sequence
// emoji_keycap_sequence := [0-9#*] \x{FE0F 20E3}
RegExpBuilder builder(zone(), flags);
ZoneList<CharacterRange>* prefix_ranges =
new (zone()) ZoneList<CharacterRange>(2, zone());
prefix_ranges->Add(CharacterRange::Range('0', '9'), zone());
prefix_ranges->Add(CharacterRange::Singleton('#'), zone());
prefix_ranges->Add(CharacterRange::Singleton('*'), zone());
builder.AddCharacterClass(
new (zone()) RegExpCharacterClass(zone(), prefix_ranges, flags));
builder.AddCharacter(0xFE0F);
builder.AddCharacter(0x20E3);
return builder.ToRegExp();
} else if (NameEquals(name, "Emoji_Modifier_Sequence")) {
// https://unicode.org/reports/tr51/#def_emoji_modifier_sequence
// emoji_modifier_sequence := emoji_modifier_base emoji_modifier
RegExpBuilder builder(zone(), flags);
ZoneList<CharacterRange>* modifier_base_ranges =
new (zone()) ZoneList<CharacterRange>(2, zone());
LookupPropertyValueName(UCHAR_EMOJI_MODIFIER_BASE, "Y", false,
modifier_base_ranges, zone());
builder.AddCharacterClass(
new (zone()) RegExpCharacterClass(zone(), modifier_base_ranges, flags));
ZoneList<CharacterRange>* modifier_ranges =
new (zone()) ZoneList<CharacterRange>(2, zone());
LookupPropertyValueName(UCHAR_EMOJI_MODIFIER, "Y", false, modifier_ranges,
zone());
builder.AddCharacterClass(
new (zone()) RegExpCharacterClass(zone(), modifier_ranges, flags));
return builder.ToRegExp();
}
return nullptr;
}
#else // V8_INTL_SUPPORT
bool RegExpParser::AddPropertyClassRange(ZoneList<CharacterRange>* add_to,
bool negate,
const std::vector<char>& name_1,
const std::vector<char>& name_2) {
bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
bool negate) {
return false;
}
RegExpTree* RegExpParser::GetPropertySequence(const std::vector<char>& name) {
return nullptr;
}
#endif // V8_INTL_SUPPORT
bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) {
......@@ -1677,9 +1591,7 @@ void RegExpParser::ParseClassEscape(ZoneList<CharacterRange>* ranges,
if (unicode()) {
bool negate = Next() == 'P';
Advance(2);
std::vector<char> name_1, name_2;
if (!ParsePropertyClassName(&name_1, &name_2) ||
!AddPropertyClassRange(ranges, negate, name_1, name_2)) {
if (!ParsePropertyClass(ranges, negate)) {
ReportError(CStrVector("Invalid property name in character class"));
}
*is_class_escape = true;
......
......@@ -176,14 +176,7 @@ class RegExpParser {
bool ParseHexEscape(int length, uc32* value);
bool ParseUnicodeEscape(uc32* value);
bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value);
bool ParsePropertyClassName(std::vector<char>* name_1,
std::vector<char>* name_2);
bool AddPropertyClassRange(ZoneList<CharacterRange>* add_to, bool negate,
const std::vector<char>& name_1,
const std::vector<char>& name_2);
RegExpTree* GetPropertySequence(const std::vector<char>& name_1);
bool ParsePropertyClass(ZoneList<CharacterRange>* result, bool negate);
RegExpTree* ParseCharacterClass(const RegExpBuilder* state);
uc32 ParseOctalLiteral();
......
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-regexp-sequence
const re = /\p{Emoji_Keycap_Sequence}/u;
assertTrue(re.test('#\uFE0F\u20E3'));
assertTrue(re.test('9\uFE0F\u20E3'));
assertTrue(re.test('0\uFE0F\u20E3'));
assertTrue(re.test('1\uFE0F\u20E3'));
assertTrue(re.test('2\uFE0F\u20E3'));
assertTrue(re.test('3\uFE0F\u20E3'));
assertTrue(re.test('*\uFE0F\u20E3'));
assertTrue(re.test('5\uFE0F\u20E3'));
assertTrue(re.test('6\uFE0F\u20E3'));
assertTrue(re.test('7\uFE0F\u20E3'));
assertTrue(re.test('8\uFE0F\u20E3'));
assertTrue(re.test('4\uFE0F\u20E3'));
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-regexp-sequence
const re = /\p{Emoji_Tag_Sequence}/u;
assertTrue(re.test('\u{1F3F4}\u{E0067}\u{E0062}\u{E0065}\u{E006E}\u{E0067}\u{E007F}'));
assertTrue(re.test('\u{1F3F4}\u{E0067}\u{E0062}\u{E0073}\u{E0063}\u{E0074}\u{E007F}'));
assertTrue(re.test('\u{1F3F4}\u{E0067}\u{E0062}\u{E0077}\u{E006C}\u{E0073}\u{E007F}'));
......@@ -34,5 +34,3 @@ assertThrows("/\\p{In}/u");
assertThrows("/\\pI/u");
assertThrows("/\\p{I}/u");
assertThrows("/\\p{CJK}/u");
assertThrows("/\\p{}/u");
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-regexp-sequence
// Normal usage.
assertDoesNotThrow("/\\p{Emoji_Flag_Sequence}/u");
assertTrue(/\p{Emoji_Flag_Sequence}/u.test("\u{1F1E9}\u{1F1EA}"));
assertDoesNotThrow("/\\p{Emoji_Keycap_Sequence}/u");
assertTrue(/\p{Emoji_Keycap_Sequence}/u.test("\u0023\ufe0f\u20e3"));
assertDoesNotThrow("/\\p{Emoji_Keycap_Sequence}/u");
assertFalse(/\p{Emoji_Keycap_Sequence}/u.test("\u0022\ufe0f\u20e3"));
assertDoesNotThrow("/\\p{Emoji_Modifier_Sequence}/u");
assertTrue(/\p{Emoji_Modifier_Sequence}/u.test("\u26f9\u{1f3ff}"));
assertDoesNotThrow("/\\p{Emoji_ZWJ_Sequence}/u");
assertTrue(/\p{Emoji_ZWJ_Sequence}/u.test("\u{1F468}\u{200D}\u{1F467}"));
// Without unicode flag.
assertDoesNotThrow("/\\p{Emoji_Flag_Sequence}/");
assertFalse(/\p{Emoji_Flag_Sequence}/.test("\u{1F1E9}\u{1F1EA}"));
assertTrue(/\p{Emoji_Flag_Sequence}/.test("\\p{Emoji_Flag_Sequence}"));
// Negated and/or inside a character class.
assertThrows("/\\P{Emoji_Flag_Sequence}/u");
assertThrows("/[\\p{Emoji_Flag_Sequence}]/u");
assertThrows("/[\\P{Emoji_Flag_Sequence}]/u");
assertThrows("/[\\w\\p{Emoji_Flag_Sequence}]/u");
assertThrows("/[\\w\\P{Emoji_Flag_Sequence}]/u");
// Two regional indicators, but not a country.
assertFalse(/\p{Emoji_Flag_Sequence}/u.test("\u{1F1E6}\u{1F1E6}"));
// ZWJ sequence as in two ZWJ elements joined by a ZWJ, but not in the list.
assertFalse(/\p{Emoji_ZWJ_Sequence}/u.test("\u{1F467}\u{200D}\u{1F468}"));
// More complex regexp
assertEquals(
["country flag: \u{1F1E6}\u{1F1F9}"],
/Country Flag: \p{Emoji_Flag_Sequence}/iu.exec(
"this is an example of a country flag: \u{1F1E6}\u{1F1F9} is Austra"));
assertEquals(
["country flag: \u{1F1E6}\u{1F1F9}", "\u{1F1E6}\u{1F1F9}"],
/Country Flag: (\p{Emoji_Flag_Sequence})/iu.exec(
"this is an example of a country flag: \u{1F1E6}\u{1F1F9} is Austra"));
assertEquals(
["country flag: \u{1F1E6}\u{1F1F9}"],
/Country Flag: ..(?<=\p{Emoji_Flag_Sequence})/iu.exec(
"this is an example of a country flag: \u{1F1E6}\u{1F1F9} is Austra"));
assertEquals(
["flag: \u{1F1E6}\u{1F1F9}", "\u{1F1E6}\u{1F1F9}"],
/Flag: ..(?<=(\p{Emoji_Flag_Sequence})|\p{Emoji_Keycap_Sequence})/iu.exec(
"this is an example of a country flag: \u{1F1E6}\u{1F1F9} is Austra"));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment