Commit 0a0e6c8c authored by marja's avatar marja Committed by Commit bot

ES6 unicode extensions, part 1.

Allows \u{xxxxx} in variable names and string literals (not yet in regexps).

Everything's behind the --harmony-unicode flag.

BUG=

Review URL: https://codereview.chromium.org/716423002

Cr-Commit-Position: refs/heads/master@{#25603}
parent 027cbf8d
......@@ -1590,6 +1590,7 @@ EMPTY_NATIVE_FUNCTIONS_FOR_FEATURE(harmony_numeric_literals)
EMPTY_NATIVE_FUNCTIONS_FOR_FEATURE(harmony_tostring)
EMPTY_NATIVE_FUNCTIONS_FOR_FEATURE(harmony_templates)
EMPTY_NATIVE_FUNCTIONS_FOR_FEATURE(harmony_sloppy)
EMPTY_NATIVE_FUNCTIONS_FOR_FEATURE(harmony_unicode)
void Genesis::InstallNativeFunctions_harmony_proxies() {
......@@ -1618,6 +1619,7 @@ EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_tostring)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_proxies)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_templates)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_sloppy)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_unicode)
void Genesis::InitializeGlobal_harmony_regexps() {
Handle<JSObject> builtins(native_context()->builtins());
......@@ -2176,6 +2178,7 @@ bool Genesis::InstallExperimentalNatives() {
static const char* harmony_templates_natives[] = {
"native harmony-templates.js", NULL};
static const char* harmony_sloppy_natives[] = {NULL};
static const char* harmony_unicode_natives[] = {NULL};
for (int i = ExperimentalNatives::GetDebuggerCount();
i < ExperimentalNatives::GetBuiltinsCount(); i++) {
......
......@@ -162,17 +162,18 @@ DEFINE_IMPLICATION(harmony, es_staging)
DEFINE_IMPLICATION(es_staging, harmony)
// Features that are still work in progress (behind individual flags).
#define HARMONY_INPROGRESS(V) \
V(harmony_modules, "harmony modules (implies block scoping)") \
V(harmony_arrays, "harmony array methods") \
V(harmony_classes, \
#define HARMONY_INPROGRESS(V) \
V(harmony_modules, "harmony modules (implies block scoping)") \
V(harmony_arrays, "harmony array methods") \
V(harmony_classes, \
"harmony classes (implies block scoping & object literal extension)") \
V(harmony_object_literals, "harmony object literal extensions") \
V(harmony_regexps, "harmony regular expression extensions") \
V(harmony_arrow_functions, "harmony arrow functions") \
V(harmony_proxies, "harmony proxies") \
V(harmony_templates, "harmony template literals") \
V(harmony_sloppy, "harmony features in sloppy mode")
V(harmony_object_literals, "harmony object literal extensions") \
V(harmony_regexps, "harmony regular expression extensions") \
V(harmony_arrow_functions, "harmony arrow functions") \
V(harmony_proxies, "harmony proxies") \
V(harmony_templates, "harmony template literals") \
V(harmony_sloppy, "harmony features in sloppy mode") \
V(harmony_unicode, "harmony unicode escapes")
// Features that are complete (but still behind --harmony/es-staging flag).
#define HARMONY_STAGED(V) \
......
......@@ -805,6 +805,7 @@ Parser::Parser(CompilationInfo* info, ParseInfo* parse_info)
set_allow_harmony_object_literals(FLAG_harmony_object_literals);
set_allow_harmony_templates(FLAG_harmony_templates);
set_allow_harmony_sloppy(FLAG_harmony_sloppy);
set_allow_harmony_unicode(FLAG_harmony_unicode);
for (int feature = 0; feature < v8::Isolate::kUseCounterFeatureCount;
++feature) {
use_counts_[feature] = 0;
......@@ -3974,6 +3975,7 @@ PreParser::PreParseResult Parser::ParseLazyFunctionBodyWithPreParser(
allow_harmony_object_literals());
reusable_preparser_->set_allow_harmony_templates(allow_harmony_templates());
reusable_preparser_->set_allow_harmony_sloppy(allow_harmony_sloppy());
reusable_preparser_->set_allow_harmony_unicode(allow_harmony_unicode());
}
PreParser::PreParseResult result =
reusable_preparser_->PreParseLazyFunction(strict_mode(),
......
......@@ -107,6 +107,7 @@ class ParserBase : public Traits {
}
bool allow_harmony_templates() const { return scanner()->HarmonyTemplates(); }
bool allow_harmony_sloppy() const { return allow_harmony_sloppy_; }
bool allow_harmony_unicode() const { return scanner()->HarmonyUnicode(); }
// Setters that determine whether certain syntactical constructs are
// allowed to be parsed by this instance of the parser.
......@@ -136,6 +137,9 @@ class ParserBase : public Traits {
void set_allow_harmony_sloppy(bool allow) {
allow_harmony_sloppy_ = allow;
}
void set_allow_harmony_unicode(bool allow) {
scanner()->SetHarmonyUnicode(allow);
}
protected:
enum AllowEvalOrArgumentsAsIdentifier {
......
......@@ -39,7 +39,8 @@ Scanner::Scanner(UnicodeCache* unicode_cache)
harmony_modules_(false),
harmony_numeric_literals_(false),
harmony_classes_(false),
harmony_templates_(false) {}
harmony_templates_(false),
harmony_unicode_(false) {}
void Scanner::Initialize(Utf16CharacterStream* source) {
......@@ -72,6 +73,22 @@ uc32 Scanner::ScanHexNumber(int expected_length) {
}
uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) {
uc32 x = 0;
int d = HexValue(c0_);
if (d < 0) {
return -1;
}
while (d >= 0) {
x = x * 16 + d;
if (x > max_value) return -1;
Advance();
d = HexValue(c0_);
}
return x;
}
// Ensure that tokens can be stored in a byte.
STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
......@@ -700,7 +717,7 @@ bool Scanner::ScanEscape() {
case 'r' : c = '\r'; break;
case 't' : c = '\t'; break;
case 'u' : {
c = ScanHexNumber(4);
c = ScanUnicodeEscape();
if (c < 0) return false;
break;
}
......@@ -964,6 +981,26 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() {
Advance();
if (c0_ != 'u') return -1;
Advance();
return ScanUnicodeEscape();
}
uc32 Scanner::ScanUnicodeEscape() {
// Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are
// allowed). In the latter case, the number of hex digits between { } is
// arbitrary. \ and u have already been read.
if (c0_ == '{' && HarmonyUnicode()) {
Advance();
uc32 cp = ScanUnlimitedLengthHexNumber(0x10ffff);
if (cp < 0) {
return -1;
}
if (c0_ != '}') {
return -1;
}
Advance();
return cp;
}
return ScanHexNumber(4);
}
......
......@@ -460,6 +460,8 @@ class Scanner {
}
bool HarmonyTemplates() const { return harmony_templates_; }
void SetHarmonyTemplates(bool templates) { harmony_templates_ = templates; }
bool HarmonyUnicode() const { return harmony_unicode_; }
void SetHarmonyUnicode(bool unicode) { harmony_unicode_ = unicode; }
// Returns true if there was a line terminator before the peek'ed token,
// possibly inside a multi-line comment.
......@@ -616,6 +618,10 @@ class Scanner {
}
uc32 ScanHexNumber(int expected_length);
// Scan a number of any length but not bigger than max_value. For example, the
// number can be 000000001, so it's very long in characters but its value is
// small.
uc32 ScanUnlimitedLengthHexNumber(int max_value);
// Scans a single JavaScript token.
void Scan();
......@@ -642,6 +648,8 @@ class Scanner {
// Decodes a Unicode escape-sequence which is part of an identifier.
// If the escape sequence cannot be decoded the result is kBadChar.
uc32 ScanIdentifierUnicodeEscape();
// Helper for the above functions.
uc32 ScanUnicodeEscape();
// Return the current source position.
int source_pos() {
......@@ -688,6 +696,8 @@ class Scanner {
bool harmony_classes_;
// Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL
bool harmony_templates_;
// Whether we allow \u{xxxxx}.
bool harmony_unicode_;
};
} } // namespace v8::internal
......
......@@ -1357,7 +1357,8 @@ enum ParserFlag {
kAllowHarmonyClasses,
kAllowHarmonyObjectLiterals,
kAllowHarmonyTemplates,
kAllowHarmonySloppy
kAllowHarmonySloppy,
kAllowHarmonyUnicode
};
......@@ -1383,6 +1384,7 @@ void SetParserFlags(i::ParserBase<Traits>* parser,
parser->set_allow_harmony_classes(flags.Contains(kAllowHarmonyClasses));
parser->set_allow_harmony_templates(flags.Contains(kAllowHarmonyTemplates));
parser->set_allow_harmony_sloppy(flags.Contains(kAllowHarmonySloppy));
parser->set_allow_harmony_unicode(flags.Contains(kAllowHarmonyUnicode));
}
......@@ -1693,6 +1695,7 @@ void RunParserSyncTest(const char* context_data[][2],
kAllowHarmonyModules,
kAllowHarmonyTemplates,
kAllowHarmonySloppy,
kAllowHarmonyUnicode,
kAllowLazy,
kAllowNatives,
};
......@@ -4374,8 +4377,52 @@ TEST(InvalidUnicodeEscapes) {
// No escapes allowed in regexp flags
"/regex/\\u0069g",
"/regex/\\u006g",
// Braces gone wrong
"var foob\\u{c481r = 0;",
"var foob\\uc481}r = 0;",
"var \\u{0052oo = 0;",
"var \\u0052}oo = 0;",
"\"foob\\u{c481r\"",
"var foob\\u{}ar = 0;",
// Too high value for the unicode escape
"\"\\u{110000}\"",
// Not an unicode escape
"var foob\\v1234r = 0;",
"var foob\\U1234r = 0;",
"var foob\\v{1234}r = 0;",
"var foob\\U{1234}r = 0;",
NULL};
RunParserSyncTest(context_data, data, kError);
static const ParserFlag always_flags[] = {kAllowHarmonyUnicode};
RunParserSyncTest(context_data, data, kError, NULL, 0, always_flags,
arraysize(always_flags));
}
TEST(UnicodeEscapes) {
const char* context_data[][2] = {{"", ""},
{"'use strict';", ""},
{NULL, NULL}};
const char* data[] = {
// Identifier starting with escape
"var \\u0052oo = 0;",
"var \\u{0052}oo = 0;",
"var \\u{52}oo = 0;",
"var \\u{00000000052}oo = 0;",
// Identifier with an escape but not starting with an escape
"var foob\\uc481r = 0;",
"var foob\\u{c481}r = 0;",
// String with an escape
"\"foob\\uc481r\"",
"\"foob\\{uc481}r\"",
// This character is a valid unicode character, representable as a surrogate
// pair, not representable as 4 hex digits.
"\"foo\\u{10e6d}\"",
// Max value for the unicode escape
"\"\\u{10ffff}\"",
NULL};
static const ParserFlag always_flags[] = {kAllowHarmonyUnicode};
RunParserSyncTest(context_data, data, kSuccess, NULL, 0, always_flags,
arraysize(always_flags));
}
......
// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// ES6 extends the \uxxxx escape and also allows \u{xxxxx}.
// Flags: --harmony-unicode
// Unicode escapes in variable names.
(function TestVariableNames1() {
var foobar = 1;
assertEquals(foob\u0061r, 1);
assertEquals(foob\u{0061}r, 1);
assertEquals(foob\u{61}r, 1);
assertEquals(foob\u{0000000061}r, 1);
})();
(function TestVariableNames2() {
var foobar = 1;
assertEquals(\u0066oobar, 1);
assertEquals(\u{0066}oobar, 1);
assertEquals(\u{66}oobar, 1);
assertEquals(\u{0000000066}oobar, 1);
})();
// Unicode escapes in strings.
(function TestStrings() {
var s1 = "foob\u0061r";
assertEquals(s1, "foobar");
var s2 = "foob\u{0061}r";
assertEquals(s2, "foobar");
var s3 = "foob\u{61}r";
assertEquals(s3, "foobar");
var s4 = "foob\u{0000000061}r";
assertEquals(s4, "foobar");
})();
(function TestSurrogates() {
// U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D].
var s1 = "foo\u{10e6d}";
var s2 = "foo\u{d803}\u{de6d}";
assertEquals(s1, s2);
})();
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment