Commit 0f06f80f authored by franzih's avatar franzih Committed by Commit bot

[builtins] Migrate escape/unescape from uri.js to C++.

These functions call into C++ anyways, so there's no need to
have the JavaScript wrapper around them. This commit removes uri.js
completely.

R=yangguo@chromium.org, bmeurer@chromium.org

BUG=v8:4912

Review-Url: https://codereview.chromium.org/2027003002
Cr-Commit-Position: refs/heads/master@{#36629}
parent 0e5c6a4b
...@@ -356,7 +356,6 @@ action("js2c") { ...@@ -356,7 +356,6 @@ action("js2c") {
"src/js/symbol.js", "src/js/symbol.js",
"src/js/array.js", "src/js/array.js",
"src/js/string.js", "src/js/string.js",
"src/js/uri.js",
"src/js/math.js", "src/js/math.js",
"src/third_party/fdlibm/fdlibm.js", "src/third_party/fdlibm/fdlibm.js",
"src/js/regexp.js", "src/js/regexp.js",
...@@ -1400,7 +1399,6 @@ v8_source_set("v8_base") { ...@@ -1400,7 +1399,6 @@ v8_source_set("v8_base") {
"src/runtime/runtime-symbol.cc", "src/runtime/runtime-symbol.cc",
"src/runtime/runtime-test.cc", "src/runtime/runtime-test.cc",
"src/runtime/runtime-typedarray.cc", "src/runtime/runtime-typedarray.cc",
"src/runtime/runtime-uri.cc",
"src/runtime/runtime-utils.h", "src/runtime/runtime-utils.h",
"src/runtime/runtime.cc", "src/runtime/runtime.cc",
"src/runtime/runtime.h", "src/runtime/runtime.h",
......
...@@ -2854,6 +2854,14 @@ bool Genesis::InstallNatives(GlobalContextType context_type) { ...@@ -2854,6 +2854,14 @@ bool Genesis::InstallNatives(GlobalContextType context_type) {
SimpleInstallFunction(global_object, "encodeURIComponent", SimpleInstallFunction(global_object, "encodeURIComponent",
Builtins::kGlobalEncodeURIComponent, 1, false); Builtins::kGlobalEncodeURIComponent, 1, false);
// Install Global.escape.
SimpleInstallFunction(global_object, "escape", Builtins::kGlobalEscape, 1,
false);
// Install Global.unescape.
SimpleInstallFunction(global_object, "unescape", Builtins::kGlobalUnescape, 1,
false);
// Install Global.eval. // Install Global.eval.
{ {
Handle<JSFunction> eval = Handle<JSFunction> eval =
......
...@@ -2156,6 +2156,28 @@ BUILTIN(GlobalEncodeURIComponent) { ...@@ -2156,6 +2156,28 @@ BUILTIN(GlobalEncodeURIComponent) {
Uri::EncodeUriComponent(isolate, uri_component)); Uri::EncodeUriComponent(isolate, uri_component));
} }
// ES6 section B.2.1.1 escape (string)
BUILTIN(GlobalEscape) {
HandleScope scope(isolate);
Handle<String> string;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, string,
Object::ToString(isolate, args.atOrUndefined(isolate, 1)));
RETURN_RESULT_OR_FAILURE(isolate, Uri::Escape(isolate, string));
}
// ES6 section B.2.1.2 unescape (string)
BUILTIN(GlobalUnescape) {
HandleScope scope(isolate);
Handle<String> string;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, string,
Object::ToString(isolate, args.atOrUndefined(isolate, 1)));
RETURN_RESULT_OR_FAILURE(isolate, Uri::Unescape(isolate, string));
}
namespace { namespace {
bool CodeGenerationFromStringsAllowed(Isolate* isolate, bool CodeGenerationFromStringsAllowed(Isolate* isolate,
......
...@@ -121,6 +121,8 @@ inline bool operator&(BuiltinExtraArguments lhs, BuiltinExtraArguments rhs) { ...@@ -121,6 +121,8 @@ inline bool operator&(BuiltinExtraArguments lhs, BuiltinExtraArguments rhs) {
V(GlobalDecodeURIComponent, kNone) \ V(GlobalDecodeURIComponent, kNone) \
V(GlobalEncodeURI, kNone) \ V(GlobalEncodeURI, kNone) \
V(GlobalEncodeURIComponent, kNone) \ V(GlobalEncodeURIComponent, kNone) \
V(GlobalEscape, kNone) \
V(GlobalUnescape, kNone) \
\ \
V(GlobalEval, kTarget) \ V(GlobalEval, kTarget) \
\ \
......
// Copyright 2006-2008 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// This file contains support for URI manipulations written in
// JavaScript.
(function(global, utils) {
"use strict";
%CheckIsBootstrapping();
// -------------------------------------------------------------------
// Define exported functions.
// ECMA-262 - B.2.1.
function URIEscapeJS(s) {
return %URIEscape(s);
}
// ECMA-262 - B.2.2.
function URIUnescapeJS(s) {
return %URIUnescape(s);
}
// -------------------------------------------------------------------
// Install exported functions.
// Set up non-enumerable URI functions on the global object and set
// their names.
utils.InstallFunctions(global, DONT_ENUM, [
"escape", URIEscapeJS,
"unescape", URIUnescapeJS
]);
})
This diff is collapsed.
...@@ -936,10 +936,6 @@ namespace internal { ...@@ -936,10 +936,6 @@ namespace internal {
F(DataViewSetFloat64, 4, 1) F(DataViewSetFloat64, 4, 1)
#define FOR_EACH_INTRINSIC_URI(F) \
F(URIEscape, 1, 1) \
F(URIUnescape, 1, 1)
#define FOR_EACH_INTRINSIC_RETURN_PAIR(F) \ #define FOR_EACH_INTRINSIC_RETURN_PAIR(F) \
F(LoadLookupSlotForCall, 1, 2) F(LoadLookupSlotForCall, 1, 2)
...@@ -972,7 +968,6 @@ namespace internal { ...@@ -972,7 +968,6 @@ namespace internal {
F(ToBooleanIC_Miss, 1, 1) \ F(ToBooleanIC_Miss, 1, 1) \
F(Unreachable, 0, 1) F(Unreachable, 0, 1)
#define FOR_EACH_INTRINSIC_RETURN_OBJECT(F) \ #define FOR_EACH_INTRINSIC_RETURN_OBJECT(F) \
FOR_EACH_INTRINSIC_IC(F) \ FOR_EACH_INTRINSIC_IC(F) \
FOR_EACH_INTRINSIC_ARRAY(F) \ FOR_EACH_INTRINSIC_ARRAY(F) \
...@@ -1002,8 +997,7 @@ namespace internal { ...@@ -1002,8 +997,7 @@ namespace internal {
FOR_EACH_INTRINSIC_STRINGS(F) \ FOR_EACH_INTRINSIC_STRINGS(F) \
FOR_EACH_INTRINSIC_SYMBOL(F) \ FOR_EACH_INTRINSIC_SYMBOL(F) \
FOR_EACH_INTRINSIC_TEST(F) \ FOR_EACH_INTRINSIC_TEST(F) \
FOR_EACH_INTRINSIC_TYPEDARRAY(F) \ FOR_EACH_INTRINSIC_TYPEDARRAY(F)
FOR_EACH_INTRINSIC_URI(F)
// FOR_EACH_INTRINSIC defines the list of all intrinsics, coming in 2 flavors, // FOR_EACH_INTRINSIC defines the list of all intrinsics, coming in 2 flavors,
// either returning an object or a pair. // either returning an object or a pair.
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "src/handles.h" #include "src/handles.h"
#include "src/isolate-inl.h" #include "src/isolate-inl.h"
#include "src/list.h" #include "src/list.h"
#include "src/string-search.h"
namespace v8 { namespace v8 {
namespace internal { namespace internal {
...@@ -59,14 +60,14 @@ bool DecodeOctets(const uint8_t* octets, int length, List<uc16>* buffer) { ...@@ -59,14 +60,14 @@ bool DecodeOctets(const uint8_t* octets, int length, List<uc16>* buffer) {
return true; return true;
} }
bool TwoDigitHex(int index, String::FlatContent* uri_content, uc16* decoded) { int TwoDigitHex(uc16 character1, uc16 character2) {
char high = HexValue(uri_content->Get(index + 1)); if (character1 > 'f') return -1;
char low = HexValue(uri_content->Get(index + 2)); int high = HexValue(character1);
if (high < 0 || low < 0) { if (high == -1) return -1;
return false; if (character2 > 'f') return -1;
} int low = HexValue(character2);
*decoded = (high << 4) | low; if (low == -1) return -1;
return true; return (high << 4) + low;
} }
template <typename T> template <typename T>
...@@ -92,7 +93,9 @@ bool IntoTwoByte(int index, bool is_uri, int uri_length, ...@@ -92,7 +93,9 @@ bool IntoTwoByte(int index, bool is_uri, int uri_length,
uc16 code = uri_content->Get(k); uc16 code = uri_content->Get(k);
if (code == '%') { if (code == '%') {
uc16 decoded; uc16 decoded;
if (k + 2 >= uri_length || !TwoDigitHex(k, uri_content, &decoded)) { if (k + 2 >= uri_length ||
(decoded = TwoDigitHex(uri_content->Get(k + 1),
uri_content->Get(k + 2))) < 0) {
return false; return false;
} }
k += 2; k += 2;
...@@ -109,7 +112,8 @@ bool IntoTwoByte(int index, bool is_uri, int uri_length, ...@@ -109,7 +112,8 @@ bool IntoTwoByte(int index, bool is_uri, int uri_length,
uc16 continuation_byte; uc16 continuation_byte;
if (uri_content->Get(++k) != '%' || if (uri_content->Get(++k) != '%' ||
!TwoDigitHex(k, uri_content, &continuation_byte)) { (continuation_byte = TwoDigitHex(uri_content->Get(k + 1),
uri_content->Get(k + 2))) < 0) {
return false; return false;
} }
k += 2; k += 2;
...@@ -140,7 +144,9 @@ bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri, ...@@ -140,7 +144,9 @@ bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri,
uc16 code = uri_content.Get(k); uc16 code = uri_content.Get(k);
if (code == '%') { if (code == '%') {
uc16 decoded; uc16 decoded;
if (k + 2 >= uri_length || !TwoDigitHex(k, &uri_content, &decoded)) { if (k + 2 >= uri_length ||
(decoded = TwoDigitHex(uri_content.Get(k + 1),
uri_content.Get(k + 2))) < 0) {
return false; return false;
} }
...@@ -234,7 +240,7 @@ bool IsUriSeparator(uc16 c) { ...@@ -234,7 +240,7 @@ bool IsUriSeparator(uc16 c) {
} }
} }
void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { void AddEncodedOctetToBuffer(uint8_t octet, List<uint8_t>* buffer) {
buffer->Add('%'); buffer->Add('%');
buffer->Add(HexCharOfValue(octet >> 4)); buffer->Add(HexCharOfValue(octet >> 4));
buffer->Add(HexCharOfValue(octet & 0x0F)); buffer->Add(HexCharOfValue(octet & 0x0F));
...@@ -246,17 +252,17 @@ void EncodeSingle(uc16 c, List<uint8_t>* buffer) { ...@@ -246,17 +252,17 @@ void EncodeSingle(uc16 c, List<uint8_t>* buffer) {
number_of_bytes = number_of_bytes =
unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false); unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false);
for (int k = 0; k < number_of_bytes; k++) { for (int k = 0; k < number_of_bytes; k++) {
AddHexEncodedToBuffer(s[k], buffer); AddEncodedOctetToBuffer(s[k], buffer);
} }
} }
void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {
char s[4]; char s[4] = {};
int number_of_bytes = int number_of_bytes =
unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2), unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),
unibrow::Utf16::kNoPreviousCharacter, false); unibrow::Utf16::kNoPreviousCharacter, false);
for (int k = 0; k < number_of_bytes; k++) { for (int k = 0; k < number_of_bytes; k++) {
AddHexEncodedToBuffer(s[k], buffer); AddEncodedOctetToBuffer(s[k], buffer);
} }
} }
...@@ -301,5 +307,199 @@ MaybeHandle<String> Uri::Encode(Isolate* isolate, Handle<String> uri, ...@@ -301,5 +307,199 @@ MaybeHandle<String> Uri::Encode(Isolate* isolate, Handle<String> uri,
return isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()); return isolate->factory()->NewStringFromOneByte(buffer.ToConstVector());
} }
namespace { // Anonymous namespace for Escape and Unescape
template <typename Char>
int UnescapeChar(Vector<const Char> vector, int i, int length, int* step) {
uint16_t character = vector[i];
int32_t hi = 0;
int32_t lo = 0;
if (character == '%' && i <= length - 6 && vector[i + 1] == 'u' &&
(hi = TwoDigitHex(vector[i + 2], vector[i + 3])) > -1 &&
(lo = TwoDigitHex(vector[i + 4], vector[i + 5])) > -1) {
*step = 6;
return (hi << 8) + lo;
} else if (character == '%' && i <= length - 3 &&
(lo = TwoDigitHex(vector[i + 1], vector[i + 2])) > -1) {
*step = 3;
return lo;
} else {
*step = 1;
return character;
}
}
template <typename Char>
MaybeHandle<String> UnescapeSlow(Isolate* isolate, Handle<String> string,
int start_index) {
bool one_byte = true;
int length = string->length();
int unescaped_length = 0;
{
DisallowHeapAllocation no_allocation;
Vector<const Char> vector = string->GetCharVector<Char>();
for (int i = start_index; i < length; unescaped_length++) {
int step;
if (UnescapeChar(vector, i, length, &step) >
String::kMaxOneByteCharCode) {
one_byte = false;
}
i += step;
}
}
DCHECK(start_index < length);
Handle<String> first_part =
isolate->factory()->NewProperSubString(string, 0, start_index);
int dest_position = 0;
Handle<String> second_part;
DCHECK(unescaped_length <= String::kMaxLength);
if (one_byte) {
Handle<SeqOneByteString> dest = isolate->factory()
->NewRawOneByteString(unescaped_length)
.ToHandleChecked();
DisallowHeapAllocation no_allocation;
Vector<const Char> vector = string->GetCharVector<Char>();
for (int i = start_index; i < length; dest_position++) {
int step;
dest->SeqOneByteStringSet(dest_position,
UnescapeChar(vector, i, length, &step));
i += step;
}
second_part = dest;
} else {
Handle<SeqTwoByteString> dest = isolate->factory()
->NewRawTwoByteString(unescaped_length)
.ToHandleChecked();
DisallowHeapAllocation no_allocation;
Vector<const Char> vector = string->GetCharVector<Char>();
for (int i = start_index; i < length; dest_position++) {
int step;
dest->SeqTwoByteStringSet(dest_position,
UnescapeChar(vector, i, length, &step));
i += step;
}
second_part = dest;
}
return isolate->factory()->NewConsString(first_part, second_part);
}
bool IsNotEscaped(uint16_t c) {
if (IsAlphaNumeric(c)) {
return true;
}
// @*_+-./
switch (c) {
case '@':
case '*':
case '_':
case '+':
case '-':
case '.':
case '/':
return true;
default:
return false;
}
}
template <typename Char>
static MaybeHandle<String> UnescapePrivate(Isolate* isolate,
Handle<String> source) {
int index;
{
DisallowHeapAllocation no_allocation;
StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%"));
index = search.Search(source->GetCharVector<Char>(), 0);
if (index < 0) return source;
}
return UnescapeSlow<Char>(isolate, source, index);
}
template <typename Char>
static MaybeHandle<String> EscapePrivate(Isolate* isolate,
Handle<String> string) {
DCHECK(string->IsFlat());
int escaped_length = 0;
int length = string->length();
{
DisallowHeapAllocation no_allocation;
Vector<const Char> vector = string->GetCharVector<Char>();
for (int i = 0; i < length; i++) {
uint16_t c = vector[i];
if (c >= 256) {
escaped_length += 6;
} else if (IsNotEscaped(c)) {
escaped_length++;
} else {
escaped_length += 3;
}
// We don't allow strings that are longer than a maximal length.
DCHECK(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow.
if (escaped_length > String::kMaxLength) break; // Provoke exception.
}
}
// No length change implies no change. Return original string if no change.
if (escaped_length == length) return string;
Handle<SeqOneByteString> dest;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, dest, isolate->factory()->NewRawOneByteString(escaped_length),
String);
int dest_position = 0;
{
DisallowHeapAllocation no_allocation;
Vector<const Char> vector = string->GetCharVector<Char>();
for (int i = 0; i < length; i++) {
uint16_t c = vector[i];
if (c >= 256) {
dest->SeqOneByteStringSet(dest_position, '%');
dest->SeqOneByteStringSet(dest_position + 1, 'u');
dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c >> 12));
dest->SeqOneByteStringSet(dest_position + 3,
HexCharOfValue((c >> 8) & 0xf));
dest->SeqOneByteStringSet(dest_position + 4,
HexCharOfValue((c >> 4) & 0xf));
dest->SeqOneByteStringSet(dest_position + 5, HexCharOfValue(c & 0xf));
dest_position += 6;
} else if (IsNotEscaped(c)) {
dest->SeqOneByteStringSet(dest_position, c);
dest_position++;
} else {
dest->SeqOneByteStringSet(dest_position, '%');
dest->SeqOneByteStringSet(dest_position + 1, HexCharOfValue(c >> 4));
dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c & 0xf));
dest_position += 3;
}
}
}
return dest;
}
} // Anonymous namespace
MaybeHandle<String> Uri::Escape(Isolate* isolate, Handle<String> string) {
Handle<String> result;
string = String::Flatten(string);
return string->IsOneByteRepresentationUnderneath()
? EscapePrivate<uint8_t>(isolate, string)
: EscapePrivate<uc16>(isolate, string);
}
MaybeHandle<String> Uri::Unescape(Isolate* isolate, Handle<String> string) {
Handle<String> result;
string = String::Flatten(string);
return string->IsOneByteRepresentationUnderneath()
? UnescapePrivate<uint8_t>(isolate, string)
: UnescapePrivate<uc16>(isolate, string);
}
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8
...@@ -35,6 +35,12 @@ class Uri : public AllStatic { ...@@ -35,6 +35,12 @@ class Uri : public AllStatic {
return Encode(isolate, component, false); return Encode(isolate, component, false);
} }
// ES6 section B.2.1.1 escape (string)
static MaybeHandle<String> Escape(Isolate* isolate, Handle<String> string);
// ES6 section B.2.1.2 unescape (string)
static MaybeHandle<String> Unescape(Isolate* isolate, Handle<String> string);
private: private:
static MaybeHandle<String> Decode(Isolate* isolate, Handle<String> uri, static MaybeHandle<String> Decode(Isolate* isolate, Handle<String> uri,
bool is_uri); bool is_uri);
......
...@@ -1069,7 +1069,6 @@ ...@@ -1069,7 +1069,6 @@
'runtime/runtime-symbol.cc', 'runtime/runtime-symbol.cc',
'runtime/runtime-test.cc', 'runtime/runtime-test.cc',
'runtime/runtime-typedarray.cc', 'runtime/runtime-typedarray.cc',
'runtime/runtime-uri.cc',
'runtime/runtime-utils.h', 'runtime/runtime-utils.h',
'runtime/runtime.cc', 'runtime/runtime.cc',
'runtime/runtime.h', 'runtime/runtime.h',
...@@ -2049,7 +2048,6 @@ ...@@ -2049,7 +2048,6 @@
'js/symbol.js', 'js/symbol.js',
'js/array.js', 'js/array.js',
'js/string.js', 'js/string.js',
'js/uri.js',
'js/math.js', 'js/math.js',
'third_party/fdlibm/fdlibm.js', 'third_party/fdlibm/fdlibm.js',
'js/regexp.js', 'js/regexp.js',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment