Commit 2305cfb4 authored by marja's avatar marja Committed by Commit bot

ES6 unicode escapes, part 2: Regexps.

Allows \u{xxxxx} in regexps. Behind the --harmony-unicode flag.

Part 1 is here: https://codereview.chromium.org/716423002

BUG=v8:3648
LOG=N

Review URL: https://codereview.chromium.org/788043005

Cr-Commit-Position: refs/heads/master@{#26018}
parent f9a22a5e
......@@ -1625,7 +1625,6 @@ EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_tostring)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_proxies)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_templates)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_sloppy)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_unicode)
void Genesis::InitializeGlobal_harmony_regexps() {
Handle<JSObject> builtins(native_context()->builtins());
......@@ -1639,6 +1638,18 @@ void Genesis::InitializeGlobal_harmony_regexps() {
}
void Genesis::InitializeGlobal_harmony_unicode() {
Handle<JSObject> builtins(native_context()->builtins());
Handle<HeapObject> flag(FLAG_harmony_unicode ? heap()->true_value()
: heap()->false_value());
PropertyAttributes attributes =
static_cast<PropertyAttributes>(DONT_DELETE | READ_ONLY);
Runtime::DefineObjectProperty(builtins, factory()->harmony_unicode_string(),
flag, attributes).Assert();
}
Handle<JSFunction> Genesis::InstallInternalArray(
Handle<JSBuiltinsObject> builtins,
const char* name,
......
......@@ -227,7 +227,9 @@ namespace internal {
V(ignore_case_string, "ignoreCase") \
V(multiline_string, "multiline") \
V(sticky_string, "sticky") \
V(unicode_string, "unicode") \
V(harmony_regexps_string, "harmony_regexps") \
V(harmony_unicode_string, "harmony_unicode") \
V(input_string, "input") \
V(index_string, "index") \
V(last_index_string, "lastIndex") \
......
......@@ -154,7 +154,7 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
RegExpCompileData parse_result;
FlatStringReader reader(isolate, pattern);
if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
&parse_result, &zone)) {
flags.is_unicode(), &parse_result, &zone)) {
// Throw an exception if we fail to parse the pattern.
return ThrowRegExpException(re,
pattern,
......@@ -401,8 +401,7 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
RegExpCompileData compile_data;
FlatStringReader reader(isolate, pattern);
if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
&compile_data,
&zone)) {
flags.is_unicode(), &compile_data, &zone)) {
// Throw an exception if we fail to parse the pattern.
// THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
USE(ThrowRegExpException(re,
......
......@@ -1274,6 +1274,15 @@ RegExpMirror.prototype.sticky = function() {
};
/**
* Returns whether this regular expression has the unicode (u) flag set.
* @return {boolean} Value of the unicode flag
*/
RegExpMirror.prototype.unicode = function() {
return this.value_.unicode;
};
RegExpMirror.prototype.toText = function() {
// Simpel to text which is used when on specialization in subclass.
return "/" + this.source() + "/";
......
......@@ -7902,7 +7902,8 @@ class JSRegExp: public JSObject {
GLOBAL = 1,
IGNORE_CASE = 2,
MULTILINE = 4,
STICKY = 8
STICKY = 8,
UNICODE_ESCAPES = 16
};
class Flags {
......@@ -7912,6 +7913,7 @@ class JSRegExp: public JSObject {
bool is_ignore_case() { return (value_ & IGNORE_CASE) != 0; }
bool is_multiline() { return (value_ & MULTILINE) != 0; }
bool is_sticky() { return (value_ & STICKY) != 0; }
bool is_unicode() { return (value_ & UNICODE_ESCAPES) != 0; }
uint32_t value() { return value_; }
private:
uint32_t value_;
......
......@@ -4278,10 +4278,8 @@ void Parser::Internalize() {
// Regular expressions
RegExpParser::RegExpParser(FlatStringReader* in,
Handle<String>* error,
bool multiline,
Zone* zone)
RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
bool multiline, bool unicode, Zone* zone)
: isolate_(zone->isolate()),
zone_(zone),
error_(error),
......@@ -4292,6 +4290,7 @@ RegExpParser::RegExpParser(FlatStringReader* in,
capture_count_(0),
has_more_(true),
multiline_(multiline),
unicode_(unicode),
simple_(false),
contains_anchor_(false),
is_scanned_for_captures_(false),
......@@ -4348,6 +4347,13 @@ bool RegExpParser::simple() {
}
bool RegExpParser::IsSyntaxCharacter(uc32 c) {
return c == '^' || c == '$' || c == '\\' || c == '.' || c == '*' ||
c == '+' || c == '?' || c == '(' || c == ')' || c == '[' || c == ']' ||
c == '{' || c == '}' || c == '|';
}
RegExpTree* RegExpParser::ReportError(Vector<const char> message) {
failed_ = true;
*error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked();
......@@ -4564,9 +4570,15 @@ RegExpTree* RegExpParser::ParseDisjunction() {
}
uc32 first_digit = Next();
if (first_digit == '8' || first_digit == '9') {
// Treat as identity escape
builder->AddCharacter(first_digit);
Advance(2);
// If the 'u' flag is present, only syntax characters can be escaped,
// no other identity escapes are allowed. If the 'u' flag is not
// present, all identity escapes are allowed.
if (!FLAG_harmony_unicode || !unicode_) {
builder->AddCharacter(first_digit);
Advance(2);
} else {
return ReportError(CStrVector("Invalid escape"));
}
break;
}
}
......@@ -4622,25 +4634,41 @@ RegExpTree* RegExpParser::ParseDisjunction() {
uc32 value;
if (ParseHexEscape(2, &value)) {
builder->AddCharacter(value);
} else {
} else if (!FLAG_harmony_unicode || !unicode_) {
builder->AddCharacter('x');
} else {
// If the 'u' flag is present, invalid escapes are not treated as
// identity escapes.
return ReportError(CStrVector("Invalid escape"));
}
break;
}
case 'u': {
Advance(2);
uc32 value;
if (ParseHexEscape(4, &value)) {
if (ParseUnicodeEscape(&value)) {
builder->AddCharacter(value);
} else {
} else if (!FLAG_harmony_unicode || !unicode_) {
builder->AddCharacter('u');
} else {
// If the 'u' flag is present, invalid escapes are not treated as
// identity escapes.
return ReportError(CStrVector("Invalid unicode escape"));
}
break;
}
default:
// Identity escape.
builder->AddCharacter(Next());
Advance(2);
Advance();
// If the 'u' flag is present, only syntax characters can be escaped, no
// other identity escapes are allowed. If the 'u' flag is not present,
// all identity escapes are allowed.
if (!FLAG_harmony_unicode || !unicode_ ||
IsSyntaxCharacter(current())) {
builder->AddCharacter(current());
Advance();
} else {
return ReportError(CStrVector("Invalid escape"));
}
break;
}
break;
......@@ -4883,11 +4911,10 @@ uc32 RegExpParser::ParseOctalLiteral() {
}
bool RegExpParser::ParseHexEscape(int length, uc32 *value) {
bool RegExpParser::ParseHexEscape(int length, uc32* value) {
int start = position();
uc32 val = 0;
bool done = false;
for (int i = 0; !done; i++) {
for (int i = 0; i < length; ++i) {
uc32 c = current();
int d = HexValue(c);
if (d < 0) {
......@@ -4896,15 +4923,52 @@ bool RegExpParser::ParseHexEscape(int length, uc32 *value) {
}
val = val * 16 + d;
Advance();
if (i == length - 1) {
done = true;
}
}
*value = val;
return true;
}
bool RegExpParser::ParseUnicodeEscape(uc32* value) {
// Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are
// allowed). In the latter case, the number of hex digits between { } is
// arbitrary. \ and u have already been read.
if (current() == '{' && FLAG_harmony_unicode && unicode_) {
int start = position();
Advance();
if (ParseUnlimitedLengthHexNumber(0x10ffff, value)) {
if (current() == '}') {
Advance();
return true;
}
}
Reset(start);
return false;
}
// \u but no {, or \u{...} escapes not allowed.
return ParseHexEscape(4, value);
}
bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) {
uc32 x = 0;
int d = HexValue(current());
if (d < 0) {
return false;
}
while (d >= 0) {
x = x * 16 + d;
if (x > max_value) {
return false;
}
Advance();
d = HexValue(current());
}
*value = x;
return true;
}
uc32 RegExpParser::ParseClassCharacterEscape() {
DCHECK(current() == '\\');
DCHECK(has_next() && !IsSpecialClassEscape(Next()));
......@@ -4959,27 +5023,41 @@ uc32 RegExpParser::ParseClassCharacterEscape() {
if (ParseHexEscape(2, &value)) {
return value;
}
// If \x is not followed by a two-digit hexadecimal, treat it
// as an identity escape.
return 'x';
if (!FLAG_harmony_unicode || !unicode_) {
// If \x is not followed by a two-digit hexadecimal, treat it
// as an identity escape.
return 'x';
}
// If the 'u' flag is present, invalid escapes are not treated as
// identity escapes.
ReportError(CStrVector("Invalid escape"));
return 0;
}
case 'u': {
Advance();
uc32 value;
if (ParseHexEscape(4, &value)) {
if (ParseUnicodeEscape(&value)) {
return value;
}
// If \u is not followed by a four-digit hexadecimal, treat it
// as an identity escape.
return 'u';
if (!FLAG_harmony_unicode || !unicode_) {
return 'u';
}
// If the 'u' flag is present, invalid escapes are not treated as
// identity escapes.
ReportError(CStrVector("Invalid unicode escape"));
return 0;
}
default: {
// Extended identity escape. We accept any character that hasn't
// been matched by a more specific case, not just the subset required
// by the ECMAScript specification.
uc32 result = current();
Advance();
return result;
// If the 'u' flag is present, only syntax characters can be escaped, no
// other identity escapes are allowed. If the 'u' flag is not present, all
// identity escapes are allowed.
if (!FLAG_harmony_unicode || !unicode_ || IsSyntaxCharacter(result)) {
Advance();
return result;
}
ReportError(CStrVector("Invalid escape"));
return 0;
}
}
return 0;
......@@ -5085,12 +5163,11 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
// ----------------------------------------------------------------------------
// The Parser interface.
bool RegExpParser::ParseRegExp(FlatStringReader* input,
bool multiline,
RegExpCompileData* result,
bool RegExpParser::ParseRegExp(FlatStringReader* input, bool multiline,
bool unicode, RegExpCompileData* result,
Zone* zone) {
DCHECK(result != NULL);
RegExpParser parser(input, &result->error, multiline, zone);
RegExpParser parser(input, &result->error, multiline, unicode, zone);
RegExpTree* tree = parser.ParsePattern();
if (parser.failed()) {
DCHECK(tree == NULL);
......
......@@ -222,15 +222,11 @@ class RegExpBuilder: public ZoneObject {
class RegExpParser BASE_EMBEDDED {
public:
RegExpParser(FlatStringReader* in,
Handle<String>* error,
bool multiline_mode,
Zone* zone);
RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode,
bool unicode, Zone* zone);
static bool ParseRegExp(FlatStringReader* input,
bool multiline,
RegExpCompileData* result,
Zone* zone);
static bool ParseRegExp(FlatStringReader* input, bool multiline, bool unicode,
RegExpCompileData* result, Zone* zone);
RegExpTree* ParsePattern();
RegExpTree* ParseDisjunction();
......@@ -248,6 +244,8 @@ class RegExpParser BASE_EMBEDDED {
// Checks whether the following is a length-digit hexadecimal number,
// and sets the value if it is.
bool ParseHexEscape(int length, uc32* value);
bool ParseUnicodeEscape(uc32* value);
bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value);
uc32 ParseOctalLiteral();
......@@ -272,6 +270,8 @@ class RegExpParser BASE_EMBEDDED {
int position() { return next_pos_ - 1; }
bool failed() { return failed_; }
static bool IsSyntaxCharacter(uc32 c);
static const int kMaxCaptures = 1 << 16;
static const uc32 kEndMarker = (1 << 21);
......@@ -338,6 +338,7 @@ class RegExpParser BASE_EMBEDDED {
int capture_count_;
bool has_more_;
bool multiline_;
bool unicode_;
bool simple_;
bool contains_anchor_;
bool is_scanned_for_captures_;
......
......@@ -22,6 +22,8 @@ function DoConstructRegExp(object, pattern, flags) {
flags = (pattern.global ? 'g' : '')
+ (pattern.ignoreCase ? 'i' : '')
+ (pattern.multiline ? 'm' : '');
if (harmony_unicode)
flags += (pattern.unicode ? 'u' : '');
if (harmony_regexps)
flags += (pattern.sticky ? 'y' : '');
pattern = pattern.source;
......@@ -235,6 +237,7 @@ function RegExpToString() {
if (this.global) result += 'g';
if (this.ignoreCase) result += 'i';
if (this.multiline) result += 'm';
if (harmony_unicode && this.unicode) result += 'u';
if (harmony_regexps && this.sticky) result += 'y';
return result;
}
......
......@@ -805,7 +805,7 @@ static JSRegExp::Flags RegExpFlagsFromString(Handle<String> flags,
uint32_t value = JSRegExp::NONE;
int length = flags->length();
// A longer flags string cannot be valid.
if (length > 4) return JSRegExp::Flags(0);
if (length > 5) return JSRegExp::Flags(0);
for (int i = 0; i < length; i++) {
uint32_t flag = JSRegExp::NONE;
switch (flags->Get(i)) {
......@@ -818,6 +818,10 @@ static JSRegExp::Flags RegExpFlagsFromString(Handle<String> flags,
case 'm':
flag = JSRegExp::MULTILINE;
break;
case 'u':
if (!FLAG_harmony_unicode) return JSRegExp::Flags(0);
flag = JSRegExp::UNICODE_ESCAPES;
break;
case 'y':
if (!FLAG_harmony_regexps) return JSRegExp::Flags(0);
flag = JSRegExp::STICKY;
......@@ -859,10 +863,12 @@ RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
Handle<Object> ignore_case = factory->ToBoolean(flags.is_ignore_case());
Handle<Object> multiline = factory->ToBoolean(flags.is_multiline());
Handle<Object> sticky = factory->ToBoolean(flags.is_sticky());
Handle<Object> unicode = factory->ToBoolean(flags.is_unicode());
Map* map = regexp->map();
Object* constructor = map->constructor();
if (!FLAG_harmony_regexps && constructor->IsJSFunction() &&
if (!FLAG_harmony_regexps && !FLAG_harmony_unicode &&
constructor->IsJSFunction() &&
JSFunction::cast(constructor)->initial_map() == map) {
// If we still have the original map, set in-object properties directly.
// Both true and false are immovable immortal objects so no need for write
......@@ -896,6 +902,10 @@ RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->sticky_string(),
sticky, final).Check();
}
if (FLAG_harmony_unicode) {
JSObject::SetOwnPropertyIgnoreAttributes(
regexp, factory->unicode_string(), unicode, final).Check();
}
JSObject::SetOwnPropertyIgnoreAttributes(
regexp, factory->last_index_string(), zero, writable).Check();
}
......
......@@ -89,8 +89,8 @@ static bool CheckParse(const char* input) {
Zone zone(CcTest::i_isolate());
FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
RegExpCompileData result;
return v8::internal::RegExpParser::ParseRegExp(
&reader, false, &result, &zone);
return v8::internal::RegExpParser::ParseRegExp(&reader, false, false, &result,
&zone);
}
......@@ -99,8 +99,8 @@ static void CheckParseEq(const char* input, const char* expected) {
Zone zone(CcTest::i_isolate());
FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
RegExpCompileData result;
CHECK(v8::internal::RegExpParser::ParseRegExp(
&reader, false, &result, &zone));
CHECK(v8::internal::RegExpParser::ParseRegExp(&reader, false, false, &result,
&zone));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
std::ostringstream os;
......@@ -114,8 +114,8 @@ static bool CheckSimple(const char* input) {
Zone zone(CcTest::i_isolate());
FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
RegExpCompileData result;
CHECK(v8::internal::RegExpParser::ParseRegExp(
&reader, false, &result, &zone));
CHECK(v8::internal::RegExpParser::ParseRegExp(&reader, false, false, &result,
&zone));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
return result.simple;
......@@ -132,8 +132,8 @@ static MinMaxPair CheckMinMaxMatch(const char* input) {
Zone zone(CcTest::i_isolate());
FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
RegExpCompileData result;
CHECK(v8::internal::RegExpParser::ParseRegExp(
&reader, false, &result, &zone));
CHECK(v8::internal::RegExpParser::ParseRegExp(&reader, false, false, &result,
&zone));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
int min_match = result.tree->min_match();
......@@ -405,8 +405,8 @@ static void ExpectError(const char* input,
Zone zone(CcTest::i_isolate());
FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
RegExpCompileData result;
CHECK(!v8::internal::RegExpParser::ParseRegExp(
&reader, false, &result, &zone));
CHECK(!v8::internal::RegExpParser::ParseRegExp(&reader, false, false, &result,
&zone));
CHECK(result.tree == NULL);
CHECK(!result.error.is_null());
SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
......@@ -497,16 +497,17 @@ TEST(CharacterClassEscapes) {
}
static RegExpNode* Compile(const char* input, bool multiline, bool is_one_byte,
Zone* zone) {
static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
bool is_one_byte, Zone* zone) {
Isolate* isolate = CcTest::i_isolate();
FlatStringReader reader(isolate, CStrVector(input));
RegExpCompileData compile_data;
if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline,
if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline, unicode,
&compile_data, zone))
return NULL;
Handle<String> pattern = isolate->factory()->
NewStringFromUtf8(CStrVector(input)).ToHandleChecked();
Handle<String> pattern = isolate->factory()
->NewStringFromUtf8(CStrVector(input))
.ToHandleChecked();
Handle<String> sample_subject =
isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
RegExpEngine::Compile(&compile_data, false, false, multiline, false, pattern,
......@@ -515,11 +516,11 @@ static RegExpNode* Compile(const char* input, bool multiline, bool is_one_byte,
}
static void Execute(const char* input, bool multiline, bool is_one_byte,
bool dot_output = false) {
static void Execute(const char* input, bool multiline, bool unicode,
bool is_one_byte, bool dot_output = false) {
v8::HandleScope scope(CcTest::isolate());
Zone zone(CcTest::i_isolate());
RegExpNode* node = Compile(input, multiline, is_one_byte, &zone);
RegExpNode* node = Compile(input, multiline, unicode, is_one_byte, &zone);
USE(node);
#ifdef DEBUG
if (dot_output) {
......
......@@ -2,19 +2,14 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-regexps
// Flags: --harmony-regexps --harmony-unicode
RegExp.prototype.flags = 'setter should be undefined';
assertEquals('', RegExp('').flags);
assertEquals('', /./.flags);
assertEquals('gimy', RegExp('', 'ygmi').flags);
assertEquals('gimy', /foo/ymig.flags);
// TODO(dslomov): When support for the `u` flag is added, uncomment the first
// line below and remove the second line.
//assertEquals(RegExp('', 'yumig').flags, 'gimuy');
assertThrows(function() { RegExp('', 'yumig').flags; }, SyntaxError);
assertEquals('gimuy', RegExp('', 'yugmi').flags);
assertEquals('gimuy', /foo/yumig.flags);
var descriptor = Object.getOwnPropertyDescriptor(RegExp.prototype, 'flags');
assertTrue(descriptor.configurable);
......
// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// ES6 extends the \uxxxx escape and also allows \u{xxxxx}.
// Flags: --harmony-unicode --harmony-regexps
function testRegexpHelper(r) {
assertTrue(r.test("foo"));
assertTrue(r.test("boo"));
assertFalse(r.test("moo"));
}
(function TestUnicodeEscapes() {
testRegexpHelper(/(\u0066|\u0062)oo/);
testRegexpHelper(/(\u0066|\u0062)oo/u);
testRegexpHelper(/(\u{0066}|\u{0062})oo/u);
testRegexpHelper(/(\u{66}|\u{000062})oo/u);
// Note that we need \\ inside a string, otherwise it's interpreted as a
// unicode escape inside a string.
testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo"));
testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo", "u"));
testRegexpHelper(new RegExp("(\\u{0066}|\\u{0062})oo", "u"));
testRegexpHelper(new RegExp("(\\u{66}|\\u{000062})oo", "u"));
// Though, unicode escapes via strings should work too.
testRegexpHelper(new RegExp("(\u0066|\u0062)oo"));
testRegexpHelper(new RegExp("(\u0066|\u0062)oo", "u"));
testRegexpHelper(new RegExp("(\u{0066}|\u{0062})oo", "u"));
testRegexpHelper(new RegExp("(\u{66}|\u{000062})oo", "u"));
})();
(function TestUnicodeEscapesInCharacterClasses() {
testRegexpHelper(/[\u0062-\u0066]oo/);
testRegexpHelper(/[\u0062-\u0066]oo/u);
testRegexpHelper(/[\u{0062}-\u{0066}]oo/u);
testRegexpHelper(/[\u{62}-\u{00000066}]oo/u);
// Note that we need \\ inside a string, otherwise it's interpreted as a
// unicode escape inside a string.
testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo"));
testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo", "u"));
testRegexpHelper(new RegExp("[\\u{0062}-\\u{0066}]oo", "u"));
testRegexpHelper(new RegExp("[\\u{62}-\\u{00000066}]oo", "u"));
// Though, unicode escapes via strings should work too.
testRegexpHelper(new RegExp("[\u0062-\u0066]oo"));
testRegexpHelper(new RegExp("[\u0062-\u0066]oo", "u"));
testRegexpHelper(new RegExp("[\u{0062}-\u{0066}]oo", "u"));
testRegexpHelper(new RegExp("[\u{62}-\u{00000066}]oo", "u"));
})();
(function TestBraceEscapesWithoutUnicodeFlag() {
// \u followed by illegal escape will be parsed as u. {x} will be the
// character count.
function helper1(r) {
assertFalse(r.test("fbar"));
assertFalse(r.test("fubar"));
assertTrue(r.test("fuubar"));
assertFalse(r.test("fuuubar"));
}
helper1(/f\u{2}bar/);
helper1(new RegExp("f\\u{2}bar"));
function helper2(r) {
assertFalse(r.test("fbar"));
assertTrue(r.test("fubar"));
assertTrue(r.test("fuubar"));
assertFalse(r.test("fuuubar"));
}
helper2(/f\u{1,2}bar/);
helper2(new RegExp("f\\u{1,2}bar"));
function helper3(r) {
assertTrue(r.test("u"));
assertTrue(r.test("{"));
assertTrue(r.test("2"));
assertTrue(r.test("}"));
assertFalse(r.test("q"));
assertFalse(r.test("("));
assertFalse(r.test(")"));
}
helper3(/[\u{2}]/);
helper3(new RegExp("[\\u{2}]"));
})();
(function TestInvalidEscapes() {
// Without the u flag, invalid unicode escapes and other invalid escapes are
// treated as identity escapes.
function helper1(r) {
assertTrue(r.test("firstuxz89second"));
}
helper1(/first\u\x\z\8\9second/);
helper1(new RegExp("first\\u\\x\\z\\8\\9second"));
function helper2(r) {
assertTrue(r.test("u"));
assertTrue(r.test("x"));
assertTrue(r.test("z"));
assertTrue(r.test("8"));
assertTrue(r.test("9"));
assertFalse(r.test("q"));
assertFalse(r.test("7"));
}
helper2(/[\u\x\z\8\9]/);
helper2(new RegExp("[\\u\\x\\z\\8\\9]"));
// However, with the u flag, these are treated as invalid escapes.
assertThrows("/\\u/u", SyntaxError);
assertThrows("/\\u12/u", SyntaxError);
assertThrows("/\\ufoo/u", SyntaxError);
assertThrows("/\\x/u", SyntaxError);
assertThrows("/\\xfoo/u", SyntaxError);
assertThrows("/\\z/u", SyntaxError);
assertThrows("/\\8/u", SyntaxError);
assertThrows("/\\9/u", SyntaxError);
assertThrows("new RegExp('\\\\u', 'u')", SyntaxError);
assertThrows("new RegExp('\\\\u12', 'u')", SyntaxError);
assertThrows("new RegExp('\\\\ufoo', 'u')", SyntaxError);
assertThrows("new RegExp('\\\\x', 'u')", SyntaxError);
assertThrows("new RegExp('\\\\xfoo', 'u')", SyntaxError);
assertThrows("new RegExp('\\\\z', 'u')", SyntaxError);
assertThrows("new RegExp('\\\\8', 'u')", SyntaxError);
assertThrows("new RegExp('\\\\9', 'u')", SyntaxError);
})();
(function TestTooBigHexEscape() {
// The hex number inside \u{} has a maximum value.
/\u{10ffff}/u
new RegExp("\\u{10ffff}", "u")
assertThrows("/\\u{110000}/u", SyntaxError);
assertThrows("new RegExp('\\\\u{110000}', 'u')", SyntaxError);
// Without the u flag, they're of course fine ({x} is the count).
/\u{110000}/
new RegExp("\\u{110000}")
})();
(function TestSyntaxEscapes() {
// Syntax escapes work the same with or without the u flag.
function helper(r) {
assertTrue(r.test("foo[bar"));
assertFalse(r.test("foo]bar"));
}
helper(/foo\[bar/);
helper(new RegExp("foo\\[bar"));
helper(/foo\[bar/u);
helper(new RegExp("foo\\[bar", "u"));
})();
(function TestUnicodeSurrogates() {
// U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D].
function helper(r) {
assertTrue(r.test("foo\u{10e6d}bar"));
}
helper(/foo\ud803\ude6dbar/u);
helper(new RegExp("foo\\ud803\\ude6dbar", "u"));
})();
(function AllFlags() {
// Test that we can pass all possible regexp flags and they work properly.
function helper1(r) {
assertTrue(r.global);
assertTrue(r.ignoreCase);
assertTrue(r.multiline);
assertTrue(r.sticky);
assertTrue(r.unicode);
}
helper1(/foo/gimyu);
helper1(new RegExp("foo", "gimyu"));
function helper2(r) {
assertFalse(r.global);
assertFalse(r.ignoreCase);
assertFalse(r.multiline);
assertFalse(r.sticky);
assertFalse(r.unicode);
}
helper2(/foo/);
helper2(new RegExp("foo"));
})();
(function DuplicatedFlags() {
// Test that duplicating the u flag is not allowed.
assertThrows("/foo/ugu");
assertThrows("new RegExp('foo', 'ugu')");
})();
(function ToString() {
// Test that the u flag is included in the string representation of regexps.
function helper(r) {
assertEquals(r.toString(), "/foo/u");
}
helper(/foo/u);
helper(new RegExp("foo", "u"));
})();
......@@ -25,7 +25,7 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Flags: --expose-debug-as debug
// Flags: --expose-debug-as debug --harmony-unicode
// Test the mirror object for regular expression values
var all_attributes = debug.PropertyAttribute.ReadOnly |
......@@ -36,6 +36,7 @@ var expected_attributes = {
'global': all_attributes,
'ignoreCase': all_attributes,
'multiline': all_attributes,
'unicode' : all_attributes,
'lastIndex': debug.PropertyAttribute.DontEnum | debug.PropertyAttribute.DontDelete
};
......@@ -108,3 +109,4 @@ testRegExpMirror(/x/);
testRegExpMirror(/[abc]/);
testRegExpMirror(/[\r\n]/g);
testRegExpMirror(/a*b/gmi);
testRegExpMirror(/(\u{0066}|\u{0062})oo/u);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment