Commit af5eed10 authored by yangguo@chromium.org's avatar yangguo@chromium.org

Handlify escape and unescape.

R=mvstanton@chromium.org
BUG=

Review URL: https://chromiumcodereview.appspot.com/12326015

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13702 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 0832d08d
......@@ -58,6 +58,7 @@
#include "smart-pointers.h"
#include "string-search.h"
#include "stub-cache.h"
#include "uri.h"
#include "v8threads.h"
#include "vm-state-inl.h"
......@@ -5126,201 +5127,30 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_TruncateString) {
}
// kNotEscaped is generated by the following:
//
// #!/bin/perl
// for (my $i = 0; $i < 256; $i++) {
// print "\n" if $i % 16 == 0;
// my $c = chr($i);
// my $escaped = 1;
// $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#;
// print $escaped ? "0, " : "1, ";
// }
static bool IsNotEscaped(uint16_t character) {
// Only for 8 bit characters, the rest are always escaped (in a different way)
ASSERT(character < 256);
static const char kNotEscaped[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
return kNotEscaped[character] != 0;
}
RUNTIME_FUNCTION(MaybeObject*, Runtime_URIEscape) {
const char hex_chars[] = "0123456789ABCDEF";
NoHandleAllocation ha;
ASSERT(args.length() == 1);
CONVERT_ARG_CHECKED(String, source, 0);
source->TryFlatten();
int escaped_length = 0;
int length = source->length();
{
Access<ConsStringIteratorOp> op(
isolate->runtime_state()->string_iterator());
StringCharacterStream stream(source, op.value());
while (stream.HasMore()) {
uint16_t character = stream.GetNext();
if (character >= 256) {
escaped_length += 6;
} else if (IsNotEscaped(character)) {
escaped_length++;
} else {
escaped_length += 3;
}
// We don't allow strings that are longer than a maximal length.
ASSERT(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow.
if (escaped_length > String::kMaxLength) {
isolate->context()->mark_out_of_memory();
return Failure::OutOfMemoryException(0x12);
}
}
}
// No length change implies no change. Return original string if no change.
if (escaped_length == length) {
return source;
}
Object* o;
{ MaybeObject* maybe_o =
isolate->heap()->AllocateRawOneByteString(escaped_length);
if (!maybe_o->ToObject(&o)) return maybe_o;
}
String* destination = String::cast(o);
int dest_position = 0;
Access<ConsStringIteratorOp> op(
isolate->runtime_state()->string_iterator());
StringCharacterStream stream(source, op.value());
while (stream.HasMore()) {
uint16_t chr = stream.GetNext();
if (chr >= 256) {
destination->Set(dest_position, '%');
destination->Set(dest_position+1, 'u');
destination->Set(dest_position+2, hex_chars[chr >> 12]);
destination->Set(dest_position+3, hex_chars[(chr >> 8) & 0xf]);
destination->Set(dest_position+4, hex_chars[(chr >> 4) & 0xf]);
destination->Set(dest_position+5, hex_chars[chr & 0xf]);
dest_position += 6;
} else if (IsNotEscaped(chr)) {
destination->Set(dest_position, chr);
dest_position++;
} else {
destination->Set(dest_position, '%');
destination->Set(dest_position+1, hex_chars[chr >> 4]);
destination->Set(dest_position+2, hex_chars[chr & 0xf]);
dest_position += 3;
}
}
return destination;
}
static inline int TwoDigitHex(uint16_t character1, uint16_t character2) {
static const signed char kHexValue['g'] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15 };
if (character1 > 'f') return -1;
int hi = kHexValue[character1];
if (hi == -1) return -1;
if (character2 > 'f') return -1;
int lo = kHexValue[character2];
if (lo == -1) return -1;
return (hi << 4) + lo;
}
static inline int Unescape(String* source,
int i,
int length,
int* step) {
uint16_t character = source->Get(i);
int32_t hi = 0;
int32_t lo = 0;
if (character == '%' &&
i <= length - 6 &&
source->Get(i + 1) == 'u' &&
(hi = TwoDigitHex(source->Get(i + 2),
source->Get(i + 3))) != -1 &&
(lo = TwoDigitHex(source->Get(i + 4),
source->Get(i + 5))) != -1) {
*step = 6;
return (hi << 8) + lo;
} else if (character == '%' &&
i <= length - 3 &&
(lo = TwoDigitHex(source->Get(i + 1),
source->Get(i + 2))) != -1) {
*step = 3;
return lo;
} else {
*step = 1;
return character;
}
HandleScope scope(isolate);
CONVERT_ARG_HANDLE_CHECKED(String, source, 0);
Handle<String> string = FlattenGetString(source);
String::FlatContent content = string->GetFlatContent();
ASSERT(content.IsFlat());
Handle<String> result =
content.IsAscii() ? URIEscape::Escape<uint8_t>(isolate, source)
: URIEscape::Escape<uc16>(isolate, source);
if (result.is_null()) return Failure::OutOfMemoryException(0x12);
return *result;
}
RUNTIME_FUNCTION(MaybeObject*, Runtime_URIUnescape) {
NoHandleAllocation ha;
ASSERT(args.length() == 1);
CONVERT_ARG_CHECKED(String, source, 0);
source->TryFlatten();
bool one_byte = true;
int length = source->length();
int unescaped_length = 0;
for (int i = 0; i < length; unescaped_length++) {
int step;
if (Unescape(source, i, length, &step) > String::kMaxOneByteCharCode) {
one_byte = false;
}
i += step;
}
// No length change implies no change. Return original string if no change.
if (unescaped_length == length)
return source;
Object* o;
{ MaybeObject* maybe_o =
one_byte ?
isolate->heap()->AllocateRawOneByteString(unescaped_length) :
isolate->heap()->AllocateRawTwoByteString(unescaped_length);
if (!maybe_o->ToObject(&o)) return maybe_o;
}
String* destination = String::cast(o);
int dest_position = 0;
for (int i = 0; i < length; dest_position++) {
int step;
destination->Set(dest_position, Unescape(source, i, length, &step));
i += step;
}
return destination;
HandleScope scope(isolate);
CONVERT_ARG_HANDLE_CHECKED(String, source, 0);
Handle<String> string = FlattenGetString(source);
String::FlatContent content = string->GetFlatContent();
ASSERT(content.IsFlat());
return content.IsAscii() ? *URIUnescape::Unescape<uint8_t>(isolate, source)
: *URIUnescape::Unescape<uc16>(isolate, source);
}
......
// Copyright 2013 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef V8_URI_H_
#define V8_URI_H_
#include "v8.h"
#include "string-search.h"
#include "v8utils.h"
#include "v8conversions.h"
namespace v8 {
namespace internal {
template <typename Char>
static INLINE(Vector<const Char> GetCharVector(Handle<String> string));
template <>
Vector<const uint8_t> GetCharVector(Handle<String> string) {
String::FlatContent flat = string->GetFlatContent();
ASSERT(flat.IsAscii());
return flat.ToOneByteVector();
}
template <>
Vector<const uc16> GetCharVector(Handle<String> string) {
String::FlatContent flat = string->GetFlatContent();
ASSERT(flat.IsTwoByte());
return flat.ToUC16Vector();
}
class URIUnescape : public AllStatic {
public:
template<typename Char>
static Handle<String> Unescape(Isolate* isolate, Handle<String> source);
private:
static const signed char kHexValue['g'];
template<typename Char>
static Handle<String> UnescapeSlow(
Isolate* isolate, Handle<String> string, int start_index);
static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2));
template <typename Char>
static INLINE(int UnescapeChar(Vector<const Char> vector,
int i,
int length,
int* step));
};
const signed char URIUnescape::kHexValue[] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15 };
template<typename Char>
Handle<String> URIUnescape::Unescape(Isolate* isolate, Handle<String> source) {
int index;
{ AssertNoAllocation no_allocation;
StringSearch<uint8_t, Char> search(isolate, STATIC_ASCII_VECTOR("%"));
index = search.Search(GetCharVector<Char>(source), 0);
if (index < 0) return source;
}
return UnescapeSlow<Char>(isolate, source, index);
}
template <typename Char>
Handle<String> URIUnescape::UnescapeSlow(
Isolate* isolate, Handle<String> string, int start_index) {
bool one_byte = true;
int length = string->length();
int unescaped_length = 0;
{ AssertNoAllocation no_allocation;
Vector<const Char> vector = GetCharVector<Char>(string);
for (int i = start_index; i < length; unescaped_length++) {
int step;
if (UnescapeChar(vector, i, length, &step) >
String::kMaxOneByteCharCode) {
one_byte = false;
}
i += step;
}
}
ASSERT(start_index < length);
Handle<String> first_part =
isolate->factory()->NewProperSubString(string, 0, start_index);
int dest_position = 0;
Handle<String> second_part;
if (one_byte) {
Handle<SeqOneByteString> dest =
isolate->factory()->NewRawOneByteString(unescaped_length);
AssertNoAllocation no_allocation;
Vector<const Char> vector = GetCharVector<Char>(string);
for (int i = start_index; i < length; dest_position++) {
int step;
dest->SeqOneByteStringSet(dest_position,
UnescapeChar(vector, i, length, &step));
i += step;
}
second_part = dest;
} else {
Handle<SeqTwoByteString> dest =
isolate->factory()->NewRawTwoByteString(unescaped_length);
AssertNoAllocation no_allocation;
Vector<const Char> vector = GetCharVector<Char>(string);
for (int i = start_index; i < length; dest_position++) {
int step;
dest->SeqTwoByteStringSet(dest_position,
UnescapeChar(vector, i, length, &step));
i += step;
}
second_part = dest;
}
return isolate->factory()->NewConsString(first_part, second_part);
}
int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) {
if (character1 > 'f') return -1;
int hi = kHexValue[character1];
if (hi == -1) return -1;
if (character2 > 'f') return -1;
int lo = kHexValue[character2];
if (lo == -1) return -1;
return (hi << 4) + lo;
}
template <typename Char>
int URIUnescape::UnescapeChar(Vector<const Char> vector,
int i,
int length,
int* step) {
uint16_t character = vector[i];
int32_t hi = 0;
int32_t lo = 0;
if (character == '%' &&
i <= length - 6 &&
vector[i + 1] == 'u' &&
(hi = TwoDigitHex(vector[i + 2],
vector[i + 3])) != -1 &&
(lo = TwoDigitHex(vector[i + 4],
vector[i + 5])) != -1) {
*step = 6;
return (hi << 8) + lo;
} else if (character == '%' &&
i <= length - 3 &&
(lo = TwoDigitHex(vector[i + 1],
vector[i + 2])) != -1) {
*step = 3;
return lo;
} else {
*step = 1;
return character;
}
}
class URIEscape : public AllStatic {
public:
template<typename Char>
static Handle<String> Escape(Isolate* isolate, Handle<String> string);
private:
static const char kHexChars[17];
static const char kNotEscaped[256];
static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; }
};
const char URIEscape::kHexChars[] = "0123456789ABCDEF";
// kNotEscaped is generated by the following:
//
// #!/bin/perl
// for (my $i = 0; $i < 256; $i++) {
// print "\n" if $i % 16 == 0;
// my $c = chr($i);
// my $escaped = 1;
// $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#;
// print $escaped ? "0, " : "1, ";
// }
const char URIEscape::kNotEscaped[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
template<typename Char>
Handle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) {
ASSERT(string->IsFlat());
int escaped_length = 0;
int length = string->length();
{ AssertNoAllocation no_allocation;
Vector<const Char> vector = GetCharVector<Char>(string);
for (int i = 0; i < length; i++) {
uint16_t c = vector[i];
if (c >= 256) {
escaped_length += 6;
} else if (IsNotEscaped(c)) {
escaped_length++;
} else {
escaped_length += 3;
}
// We don't allow strings that are longer than a maximal length.
ASSERT(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow.
if (escaped_length > String::kMaxLength) {
isolate->context()->mark_out_of_memory();
return Handle<String>::null();
}
}
}
// No length change implies no change. Return original string if no change.
if (escaped_length == length) return string;
Handle<SeqOneByteString> dest =
isolate->factory()->NewRawOneByteString(escaped_length);
int dest_position = 0;
{ AssertNoAllocation no_allocation;
Vector<const Char> vector = GetCharVector<Char>(string);
for (int i = 0; i < length; i++) {
uint16_t c = vector[i];
if (c >= 256) {
dest->SeqOneByteStringSet(dest_position, '%');
dest->SeqOneByteStringSet(dest_position+1, 'u');
dest->SeqOneByteStringSet(dest_position+2, kHexChars[c >> 12]);
dest->SeqOneByteStringSet(dest_position+3, kHexChars[(c >> 8) & 0xf]);
dest->SeqOneByteStringSet(dest_position+4, kHexChars[(c >> 4) & 0xf]);
dest->SeqOneByteStringSet(dest_position+5, kHexChars[c & 0xf]);
dest_position += 6;
} else if (IsNotEscaped(c)) {
dest->SeqOneByteStringSet(dest_position, c);
dest_position++;
} else {
dest->SeqOneByteStringSet(dest_position, '%');
dest->SeqOneByteStringSet(dest_position+1, kHexChars[c >> 4]);
dest->SeqOneByteStringSet(dest_position+2, kHexChars[c & 0xf]);
dest_position += 3;
}
}
}
return dest;
}
} } // namespace v8::internal
#endif // V8_URI_H_
......@@ -478,6 +478,7 @@
'../../src/unicode-inl.h',
'../../src/unicode.cc',
'../../src/unicode.h',
'../../src/uri.h',
'../../src/utils-inl.h',
'../../src/utils.cc',
'../../src/utils.h',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment