Commit 6502a1bf authored by franzih's avatar franzih Committed by Commit bot

[runtime] Implement encodeURI as single runtime function.

Rewrite encodeURI as runtime function. We well probably
repackage runtime_URIEncode as a C++ builtin.

BUG=v8:4912

R=yangguo@chromium.org

Review-Url: https://codereview.chromium.org/1968953002
Cr-Commit-Position: refs/heads/master@{#36257}
parent 29611a95
......@@ -539,13 +539,6 @@ static int SizeInHexChars(S number) {
}
static char HexCharOfValue(int value) {
DCHECK(0 <= value && value <= 16);
if (value < 10) return value + '0';
return value - 10 + 'A';
}
bool Bignum::ToHexString(char* buffer, int buffer_size) const {
DCHECK(IsClamped());
// Each bigit must be printable as separate hex-character.
......
......@@ -560,6 +560,11 @@ class Factory final {
return NewRangeError(MessageTemplate::kInvalidStringLength);
}
Handle<Object> NewURIError() {
return NewError(isolate()->uri_error_function(),
MessageTemplate::kURIMalformed);
}
Handle<Object> NewError(Handle<JSFunction> constructor,
MessageTemplate::Template template_index,
Handle<Object> arg0 = Handle<Object>(),
......
......@@ -37,72 +37,6 @@ function HexValueOf(code) {
return -1;
}
// Does the char code correspond to an alpha-numeric char.
function isAlphaNumeric(cc) {
// a - z
if (97 <= cc && cc <= 122) return true;
// A - Z
if (65 <= cc && cc <= 90) return true;
// 0 - 9
if (48 <= cc && cc <= 57) return true;
return false;
}
// Lazily initialized.
var hexCharCodeArray = 0;
function URIAddEncodedOctetToBuffer(octet, result, index) {
result[index++] = 37; // Char code of '%'.
result[index++] = hexCharCodeArray[octet >> 4];
result[index++] = hexCharCodeArray[octet & 0x0F];
return index;
}
function URIEncodeOctets(octets, result, index) {
if (hexCharCodeArray === 0) {
hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
65, 66, 67, 68, 69, 70];
}
index = URIAddEncodedOctetToBuffer(octets[0], result, index);
if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
return index;
}
function URIEncodeSingle(cc, result, index) {
var x = (cc >> 12) & 0xF;
var y = (cc >> 6) & 63;
var z = cc & 63;
var octets = new InternalArray(3);
if (cc <= 0x007F) {
octets[0] = cc;
} else if (cc <= 0x07FF) {
octets[0] = y + 192;
octets[1] = z + 128;
} else {
octets[0] = x + 224;
octets[1] = y + 128;
octets[2] = z + 128;
}
return URIEncodeOctets(octets, result, index);
}
function URIEncodePair(cc1 , cc2, result, index) {
var u = ((cc1 >> 6) & 0xF) + 1;
var w = (cc1 >> 2) & 0xF;
var x = cc1 & 3;
var y = (cc2 >> 6) & 0xF;
var z = cc2 & 63;
var octets = new InternalArray(4);
octets[0] = (u >> 2) + 240;
octets[1] = (((u & 3) << 4) | w) + 128;
octets[2] = ((x << 4) | y) + 128;
octets[3] = z + 128;
return URIEncodeOctets(octets, result, index);
}
function URIHexCharsToCharCode(highChar, lowChar) {
var highCode = HexValueOf(highChar);
var lowCode = HexValueOf(lowChar);
......@@ -167,37 +101,6 @@ function URIDecodeOctets(octets, result, index) {
return index;
}
// ECMA-262, section 15.1.3
function Encode(uri, unescape) {
uri = TO_STRING(uri);
var uriLength = uri.length;
var array = new InternalArray(uriLength);
var index = 0;
for (var k = 0; k < uriLength; k++) {
var cc1 = %_StringCharCodeAt(uri, k);
if (unescape(cc1)) {
array[index++] = cc1;
} else {
if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw MakeURIError();
if (cc1 < 0xD800 || cc1 > 0xDBFF) {
index = URIEncodeSingle(cc1, array, index);
} else {
k++;
if (k == uriLength) throw MakeURIError();
var cc2 = %_StringCharCodeAt(uri, k);
if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw MakeURIError();
index = URIEncodePair(cc1, cc2, array, index);
}
}
}
var result = %NewString(array.length, NEW_ONE_BYTE_STRING);
for (var i = 0; i < array.length; i++) {
%_OneByteSeqStringSetChar(i, array[i], result);
}
return result;
}
// ECMA-262, section 15.1.3
function Decode(uri, reserved) {
uri = TO_STRING(uri);
......@@ -318,48 +221,14 @@ function URIDecodeComponent(component) {
// ECMA-262 - 15.1.3.3.
function URIEncode(uri) {
var unescapePredicate = function(cc) {
if (isAlphaNumeric(cc)) return true;
// !
if (cc == 33) return true;
// #$
if (35 <= cc && cc <= 36) return true;
// &'()*+,-./
if (38 <= cc && cc <= 47) return true;
// :;
if (58 <= cc && cc <= 59) return true;
// =
if (cc == 61) return true;
// ?@
if (63 <= cc && cc <= 64) return true;
// _
if (cc == 95) return true;
// ~
if (cc == 126) return true;
return false;
};
return Encode(uri, unescapePredicate);
uri = TO_STRING(uri);
return %URIEncode(uri, true);
}
// ECMA-262 - 15.1.3.4
function URIEncodeComponent(component) {
var unescapePredicate = function(cc) {
if (isAlphaNumeric(cc)) return true;
// !
if (cc == 33) return true;
// '()*
if (39 <= cc && cc <= 42) return true;
// -.
if (45 <= cc && cc <= 46) return true;
// _
if (cc == 95) return true;
// ~
if (cc == 126) return true;
return false;
};
return Encode(component, unescapePredicate);
component = TO_STRING(component);
return %URIEncode(component, false);
}
// -------------------------------------------------------------------
......
......@@ -5,6 +5,7 @@
#include "src/runtime/runtime-utils.h"
#include "src/arguments.h"
#include "src/char-predicates-inl.h"
#include "src/regexp/jsregexp-inl.h"
#include "src/string-builder.h"
#include "src/string-search.h"
......@@ -1151,6 +1152,132 @@ RUNTIME_FUNCTION(Runtime_NewString) {
return *result;
}
// anonymous namespace for URIEncode helper functions
namespace {
bool IsUnescapePredicateInUriComponent(uc16 c) {
if (IsAlphaNumeric(c)) {
return true;
}
switch (c) {
case '!':
case '\'':
case '(':
case ')':
case '*':
case '-':
case '.':
case '_':
case '~':
return true;
default:
return false;
}
}
bool IsUriSeparator(uc16 c) {
switch (c) {
case '#':
case ':':
case ';':
case '/':
case '?':
case '$':
case '&':
case '+':
case ',':
case '@':
case '=':
return true;
default:
return false;
}
}
void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) {
buffer->Add('%');
buffer->Add(HexCharOfValue(octet >> 4));
buffer->Add(HexCharOfValue(octet & 0x0F));
}
void EncodeSingle(uc16 c, List<uint8_t>* buffer) {
uint8_t x = (c >> 12) & 0xF;
uint8_t y = (c >> 6) & 63;
uint8_t z = c & 63;
if (c <= 0x007F) {
AddHexEncodedToBuffer(c, buffer);
} else if (c <= 0x07FF) {
AddHexEncodedToBuffer(y + 192, buffer);
AddHexEncodedToBuffer(z + 128, buffer);
} else {
AddHexEncodedToBuffer(x + 224, buffer);
AddHexEncodedToBuffer(y + 128, buffer);
AddHexEncodedToBuffer(z + 128, buffer);
}
}
void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {
uint8_t u = ((cc1 >> 6) & 0xF) + 1;
uint8_t w = (cc1 >> 2) & 0xF;
uint8_t x = cc1 & 3;
uint8_t y = (cc2 >> 6) & 0xF;
uint8_t z = cc2 & 63;
AddHexEncodedToBuffer((u >> 2) + 240, buffer);
AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer);
AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer);
AddHexEncodedToBuffer(z + 128, buffer);
}
} // anonymous namespace
RUNTIME_FUNCTION(Runtime_URIEncode) {
HandleScope scope(isolate);
DCHECK(args.length() == 2);
CONVERT_ARG_HANDLE_CHECKED(String, uri, 0);
CONVERT_BOOLEAN_ARG_CHECKED(is_uri, 1);
uri = String::Flatten(uri);
int uri_length = uri->length();
List<uint8_t> buffer(uri_length);
{
DisallowHeapAllocation no_gc;
String::FlatContent uri_content = uri->GetFlatContent();
for (int k = 0; k < uri_length; k++) {
uc16 cc1 = uri_content.Get(k);
if (unibrow::Utf16::IsLeadSurrogate(cc1)) {
k++;
if (k < uri_length) {
uc16 cc2 = uri->Get(k);
if (unibrow::Utf16::IsTrailSurrogate(cc2)) {
EncodePair(cc1, cc2, &buffer);
continue;
}
}
} else if (!unibrow::Utf16::IsTrailSurrogate(cc1)) {
if (IsUnescapePredicateInUriComponent(cc1) ||
(is_uri && IsUriSeparator(cc1))) {
buffer.Add(cc1);
} else {
EncodeSingle(cc1, &buffer);
}
continue;
}
AllowHeapAllocation allocate_error_and_return;
THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewURIError());
}
}
Handle<String> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result,
isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()));
return *result;
}
RUNTIME_FUNCTION(Runtime_StringLessThan) {
HandleScope handle_scope(isolate);
DCHECK_EQ(2, args.length());
......
......@@ -836,6 +836,7 @@ namespace internal {
F(StringTrim, 3, 1) \
F(TruncateString, 2, 1) \
F(NewString, 2, 1) \
F(URIEncode, 2, 1) \
F(StringLessThan, 2, 1) \
F(StringLessThanOrEqual, 2, 1) \
F(StringGreaterThan, 2, 1) \
......
......@@ -37,6 +37,11 @@ inline int HexValue(uc32 c) {
return -1;
}
inline char HexCharOfValue(int value) {
DCHECK(0 <= value && value <= 16);
if (value < 10) return value + '0';
return value - 10 + 'A';
}
inline int BoolToInt(bool b) { return b ? 1 : 0; }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment