Commit d67e573d authored by arv's avatar arv Committed by Commit bot

Simplify template literal raw string creation

BUG=v8:3710
LOG=Y
R=dslomov@chromium.org, marja@chromium.org

Review URL: https://codereview.chromium.org/768203002

Cr-Commit-Position: refs/heads/master@{#25640}
parent 3fc9c9b6
......@@ -5186,8 +5186,10 @@ void Parser::AddTemplateSpan(TemplateLiteralState* state, bool tail) {
int pos = scanner()->location().beg_pos;
int end = scanner()->location().end_pos - (tail ? 1 : 2);
const AstRawString* tv = scanner()->CurrentSymbol(ast_value_factory());
const AstRawString* trv = scanner()->CurrentRawSymbol(ast_value_factory());
Literal* cooked = factory()->NewStringLiteral(tv, pos);
(*state)->AddTemplateSpan(cooked, end, zone());
Literal* raw = factory()->NewStringLiteral(trv, pos);
(*state)->AddTemplateSpan(cooked, raw, end, zone());
}
......@@ -5202,8 +5204,10 @@ Expression* Parser::CloseTemplateLiteral(TemplateLiteralState* state, int start,
TemplateLiteral* lit = *state;
int pos = lit->position();
const ZoneList<Expression*>* cooked_strings = lit->cooked();
const ZoneList<Expression*>* raw_strings = lit->raw();
const ZoneList<Expression*>* expressions = lit->expressions();
CHECK(cooked_strings->length() == (expressions->length() + 1));
DCHECK_EQ(cooked_strings->length(), raw_strings->length());
DCHECK_EQ(cooked_strings->length(), expressions->length() + 1);
if (!tag) {
// Build tree of BinaryOps to simplify code-generation
......@@ -5231,9 +5235,7 @@ Expression* Parser::CloseTemplateLiteral(TemplateLiteralState* state, int start,
}
return expr;
} else {
uint32_t hash;
ZoneList<Expression*>* raw_strings = TemplateRawStrings(lit, &hash);
Handle<String> source(String::cast(script()->source()));
uint32_t hash = ComputeTemplateLiteralHash(lit);
int cooked_idx = function_state_->NextMaterializedLiteralIndex();
int raw_idx = function_state_->NextMaterializedLiteralIndex();
......@@ -5249,7 +5251,7 @@ Expression* Parser::CloseTemplateLiteral(TemplateLiteralState* state, int start,
const_cast<ZoneList<Expression*>*>(raw_strings), raw_idx, pos),
zone());
// Ensure hash is suitable as an Smi value
// Ensure hash is suitable as a Smi value
Smi* hash_obj = Smi::cast(Internals::IntToSmi(static_cast<int>(hash)));
args->Add(factory()->NewSmiLiteral(hash_obj->value(), pos), zone());
......@@ -5267,84 +5269,32 @@ Expression* Parser::CloseTemplateLiteral(TemplateLiteralState* state, int start,
}
ZoneList<Expression*>* Parser::TemplateRawStrings(const TemplateLiteral* lit,
uint32_t* hash) {
const ZoneList<int>* lengths = lit->lengths();
const ZoneList<Expression*>* cooked_strings = lit->cooked();
int total = lengths->length();
ZoneList<Expression*>* raw_strings;
// Given a TemplateLiteral, produce a list of raw strings, used for generating
// a CallSite object for a tagged template invocations.
//
// A raw string will consist of the unescaped characters of a template span,
// with end-of-line sequences normalized to U+000A LINE FEEDs, and without
// leading or trailing template delimiters.
//
uint32_t Parser::ComputeTemplateLiteralHash(const TemplateLiteral* lit) {
const ZoneList<Expression*>* raw_strings = lit->raw();
int total = raw_strings->length();
DCHECK(total);
Handle<String> source(String::cast(script()->source()));
raw_strings = new (zone()) ZoneList<Expression*>(total, zone());
uint32_t running_hash = 0;
for (int index = 0; index < total; ++index) {
int span_start = cooked_strings->at(index)->position() + 1;
int span_end = lengths->at(index) - 1;
int length;
int to_index = 0;
if (index) {
running_hash = StringHasher::ComputeRunningHashOneByte(
running_hash, "${}", 3);
}
SmartArrayPointer<char> raw_chars =
source->ToCString(ALLOW_NULLS, FAST_STRING_TRAVERSAL, span_start,
span_end, &length);
// Normalize raw line-feeds. [U+000D U+000A] (CRLF) and [U+000D] (CR) must
// be translated into U+000A (LF).
for (int from_index = 0; from_index < length; ++from_index) {
char ch = raw_chars[from_index];
if (ch == '\r') {
ch = '\n';
if (from_index + 1 < length && raw_chars[from_index + 1] == '\n') {
++from_index;
}
}
raw_chars[to_index++] = ch;
}
Access<UnicodeCache::Utf8Decoder>
decoder(isolate()->unicode_cache()->utf8_decoder());
decoder->Reset(raw_chars.get(), to_index);
int utf16_length = decoder->Utf16Length();
Literal* raw_lit = NULL;
if (utf16_length > 0) {
uc16* utf16_buffer = zone()->NewArray<uc16>(utf16_length);
to_index = decoder->WriteUtf16(utf16_buffer, utf16_length);
running_hash = StringHasher::ComputeRunningHash(
running_hash, utf16_buffer, to_index);
const uint16_t* data = reinterpret_cast<const uint16_t*>(utf16_buffer);
const AstRawString* raw_str = ast_value_factory()->GetTwoByteString(
Vector<const uint16_t>(data, to_index));
raw_lit = factory()->NewStringLiteral(raw_str, span_start - 1);
const AstRawString* raw_string =
raw_strings->at(index)->AsLiteral()->raw_value()->AsString();
if (raw_string->is_one_byte()) {
const char* data = reinterpret_cast<const char*>(raw_string->raw_data());
running_hash = StringHasher::ComputeRunningHashOneByte(
running_hash, data, raw_string->length());
} else {
raw_lit = factory()->NewStringLiteral(
ast_value_factory()->empty_string(), span_start - 1);
const uc16* data = reinterpret_cast<const uc16*>(raw_string->raw_data());
running_hash = StringHasher::ComputeRunningHash(running_hash, data,
raw_string->length());
}
DCHECK_NOT_NULL(raw_lit);
raw_strings->Add(raw_lit, zone());
}
// Hash key is used exclusively by template call site caching. There are no
// real security implications for unseeded hashes, and no issues with changing
// the hashing algorithm to improve performance or entropy.
*hash = running_hash;
return raw_strings;
return running_hash;
}
} } // namespace v8::internal
......@@ -593,20 +593,18 @@ class ParserTraits {
class TemplateLiteral : public ZoneObject {
public:
TemplateLiteral(Zone* zone, int pos)
: cooked_(8, zone),
lengths_(8, zone),
expressions_(8, zone),
pos_(pos) {}
: cooked_(8, zone), raw_(8, zone), expressions_(8, zone), pos_(pos) {}
const ZoneList<Expression*>* cooked() const { return &cooked_; }
const ZoneList<int>* lengths() const { return &lengths_; }
const ZoneList<Expression*>* raw() const { return &raw_; }
const ZoneList<Expression*>* expressions() const { return &expressions_; }
int position() const { return pos_; }
void AddTemplateSpan(Literal* cooked, int end, Zone* zone) {
void AddTemplateSpan(Literal* cooked, Literal* raw, int end, Zone* zone) {
DCHECK_NOT_NULL(cooked);
DCHECK_NOT_NULL(raw);
cooked_.Add(cooked, zone);
lengths_.Add(end - cooked->position(), zone);
raw_.Add(raw, zone);
}
void AddExpression(Expression* expression, Zone* zone) {
......@@ -616,7 +614,7 @@ class ParserTraits {
private:
ZoneList<Expression*> cooked_;
ZoneList<int> lengths_;
ZoneList<Expression*> raw_;
ZoneList<Expression*> expressions_;
int pos_;
};
......@@ -884,8 +882,8 @@ class Parser : public ParserBase<ParserTraits> {
Expression* expression);
Expression* CloseTemplateLiteral(TemplateLiteralState* state, int start,
Expression* tag);
ZoneList<Expression*>* TemplateRawStrings(const TemplateLiteral* lit,
uint32_t* hash);
uint32_t ComputeTemplateLiteralHash(const TemplateLiteral* lit);
Scanner scanner_;
PreParser* reusable_preparser_;
Scope* original_scope_; // for ES5 function declarations in sloppy eval
......
......@@ -34,6 +34,7 @@ Handle<String> LiteralBuffer::Internalize(Isolate* isolate) const {
Scanner::Scanner(UnicodeCache* unicode_cache)
: unicode_cache_(unicode_cache),
capturing_raw_literal_(false),
octal_pos_(Location::invalid()),
harmony_scoping_(false),
harmony_modules_(false),
......@@ -420,6 +421,7 @@ Token::Value Scanner::ScanHtmlComment() {
void Scanner::Scan() {
next_.literal_chars = NULL;
next_.raw_literal_chars = NULL;
Token::Value token;
do {
// Remember the position of the next token
......@@ -819,15 +821,18 @@ Token::Value Scanner::ScanTemplateSpan() {
DCHECK(c0_ == '`' || c0_ == '}');
Advance(); // Consume ` or }
LiteralScope literal(this);
LiteralScope literal(this, true);
while (true) {
uc32 c = c0_;
Advance();
if (c == '`') {
result = Token::TEMPLATE_TAIL;
ReduceRawLiteralLength(1);
break;
} else if (c == '$' && c0_ == '{') {
Advance(); // Consume '{'
ReduceRawLiteralLength(2);
break;
} else if (c == '\\') {
if (unicode_cache_->IsLineTerminator(c0_)) {
......@@ -835,7 +840,14 @@ Token::Value Scanner::ScanTemplateSpan() {
// code unit sequence.
uc32 lastChar = c0_;
Advance();
if (lastChar == '\r' && c0_ == '\n') Advance();
if (lastChar == '\r') {
ReduceRawLiteralLength(1); // Remove \r
if (c0_ == '\n') {
Advance(); // Adds \n
} else {
AddRawLiteralChar('\n');
}
}
} else if (c0_ == '0') {
Advance();
AddLiteralChar('0');
......@@ -851,7 +863,12 @@ Token::Value Scanner::ScanTemplateSpan() {
// The TRV of LineTerminatorSequence :: <CR><LF> is the sequence
// consisting of the CV 0x000A.
if (c == '\r') {
if (c0_ == '\n') Advance();
ReduceRawLiteralLength(1); // Remove \r
if (c0_ == '\n') {
Advance(); // Adds \n
} else {
AddRawLiteralChar('\n');
}
c = '\n';
}
AddLiteralChar(c);
......@@ -1285,6 +1302,15 @@ const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {
}
const AstRawString* Scanner::CurrentRawSymbol(
AstValueFactory* ast_value_factory) {
if (is_raw_literal_one_byte()) {
return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());
}
return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());
}
double Scanner::DoubleValue() {
DCHECK(is_literal_one_byte());
return StringToDouble(
......
......@@ -252,6 +252,10 @@ class LiteralBuffer {
return is_one_byte_ ? position_ : (position_ >> 1);
}
void ReduceLength(int delta) {
position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size);
}
void Reset() {
position_ = 0;
is_one_byte_ = true;
......@@ -318,9 +322,10 @@ class Scanner {
// if aborting the scanning before it's complete.
class LiteralScope {
public:
explicit LiteralScope(Scanner* self)
explicit LiteralScope(Scanner* self, bool capture_raw = false)
: scanner_(self), complete_(false) {
scanner_->StartLiteral();
if (capture_raw) scanner_->StartRawLiteral();
}
~LiteralScope() {
if (!complete_) scanner_->DropLiteral();
......@@ -392,6 +397,7 @@ class Scanner {
const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);
const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);
const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory);
double DoubleValue();
bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {
......@@ -493,6 +499,7 @@ class Scanner {
Token::Value token;
Location location;
LiteralBuffer* literal_chars;
LiteralBuffer* raw_literal_chars;
};
static const int kCharacterLookaheadBufferSize = 1;
......@@ -507,6 +514,7 @@ class Scanner {
Advance();
// Initialize current_ to not refer to a literal.
current_.literal_chars = NULL;
current_.raw_literal_chars = NULL;
}
// Literal buffer support
......@@ -517,20 +525,38 @@ class Scanner {
next_.literal_chars = free_buffer;
}
inline void StartRawLiteral() {
raw_literal_buffer_.Reset();
next_.raw_literal_chars = &raw_literal_buffer_;
capturing_raw_literal_ = true;
}
INLINE(void AddLiteralChar(uc32 c)) {
DCHECK_NOT_NULL(next_.literal_chars);
next_.literal_chars->AddChar(c);
}
// Complete scanning of a literal.
inline void TerminateLiteral() {
// Does nothing in the current implementation.
INLINE(void AddRawLiteralChar(uc32 c)) {
DCHECK(capturing_raw_literal_);
DCHECK_NOT_NULL(next_.raw_literal_chars);
next_.raw_literal_chars->AddChar(c);
}
INLINE(void ReduceRawLiteralLength(int delta)) {
DCHECK(capturing_raw_literal_);
DCHECK_NOT_NULL(next_.raw_literal_chars);
next_.raw_literal_chars->ReduceLength(delta);
}
// Complete scanning of a literal.
inline void TerminateLiteral() { capturing_raw_literal_ = false; }
// Stops scanning of a literal and drop the collected characters,
// e.g., due to an encountered error.
inline void DropLiteral() {
next_.literal_chars = NULL;
next_.raw_literal_chars = NULL;
capturing_raw_literal_ = false;
}
inline void AddLiteralCharAdvance() {
......@@ -540,6 +566,9 @@ class Scanner {
// Low-level scanning support.
void Advance() {
if (capturing_raw_literal_) {
AddRawLiteralChar(c0_);
}
c0_ = source_->Advance();
if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
uc32 c1 = source_->Advance();
......@@ -555,8 +584,10 @@ class Scanner {
if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));
source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));
if (capturing_raw_literal_) ReduceRawLiteralLength(2);
} else {
source_->PushBack(c0_);
if (capturing_raw_literal_) ReduceRawLiteralLength(1);
}
c0_ = ch;
}
......@@ -578,8 +609,9 @@ class Scanner {
// Returns the literal string, if any, for the current token (the
// token last returned by Next()). The string is 0-terminated.
// Literal strings are collected for identifiers, strings, and
// numbers.
// Literal strings are collected for identifiers, strings, numbers as well
// as for template literals. For template literals we also collect the raw
// form.
// These functions only give the correct result if the literal
// was scanned between calls to StartLiteral() and TerminateLiteral().
Vector<const uint8_t> literal_one_byte_string() {
......@@ -612,10 +644,19 @@ class Scanner {
DCHECK_NOT_NULL(next_.literal_chars);
return next_.literal_chars->is_one_byte();
}
int next_literal_length() const {
DCHECK_NOT_NULL(next_.literal_chars);
return next_.literal_chars->length();
Vector<const uint8_t> raw_literal_one_byte_string() {
DCHECK_NOT_NULL(current_.raw_literal_chars);
return current_.raw_literal_chars->one_byte_literal();
}
Vector<const uint16_t> raw_literal_two_byte_string() {
DCHECK_NOT_NULL(current_.raw_literal_chars);
return current_.raw_literal_chars->two_byte_literal();
}
bool is_raw_literal_one_byte() {
DCHECK_NOT_NULL(current_.raw_literal_chars);
return current_.raw_literal_chars->is_one_byte();
}
uc32 ScanHexNumber(int expected_length);
// Scan a number of any length but not bigger than max_value. For example, the
......@@ -666,6 +707,13 @@ class Scanner {
LiteralBuffer source_url_;
LiteralBuffer source_mapping_url_;
// Buffer to store raw string values
LiteralBuffer raw_literal_buffer_;
// We only need to capture the raw literal when we are scanning template
// literal spans.
bool capturing_raw_literal_;
TokenDesc current_; // desc for current token (as returned by Next())
TokenDesc next_; // desc for next token (one token look-ahead)
......
......@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-templates
// Flags: --harmony-templates --harmony-unicode
var num = 5;
var str = "str";
......@@ -428,3 +428,28 @@ var obj = {
function tag(){}
tag`a${1}b`;
})();
(function testRawLineNormalization() {
function raw0(callSiteObj) {
return callSiteObj.raw[0];
}
assertEquals(eval("raw0`\r`"), "\n");
assertEquals(eval("raw0`\r\n`"), "\n");
assertEquals(eval("raw0`\r\r\n`"), "\n\n");
assertEquals(eval("raw0`\r\n\r\n`"), "\n\n");
assertEquals(eval("raw0`\r\r\r\n`"), "\n\n\n");
})();
(function testHarmonyUnicode() {
function raw0(callSiteObj) {
return callSiteObj.raw[0];
}
assertEquals(raw0`a\u{62}c`, "a\\u{62}c");
assertEquals(raw0`a\u{000062}c`, "a\\u{000062}c");
assertEquals(raw0`a\u{0}c`, "a\\u{0}c");
assertEquals(`a\u{62}c`, "abc");
assertEquals(`a\u{000062}c`, "abc");
})();
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment