runtime-uri.cc 9.92 KB
Newer Older
1
// Copyright 2014 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4

5
#include "src/runtime/runtime-utils.h"
6

7
#include "src/arguments.h"
8
#include "src/conversions.h"
9
#include "src/isolate-inl.h"
10
#include "src/objects-inl.h"
11 12
#include "src/string-search.h"
#include "src/utils.h"
13 14 15 16 17 18

namespace v8 {
namespace internal {

class URIUnescape : public AllStatic {
 public:
19
  template <typename Char>
20 21
  MUST_USE_RESULT static MaybeHandle<String> Unescape(Isolate* isolate,
                                                      Handle<String> source);
22 23 24 25

 private:
  static const signed char kHexValue['g'];

26 27 28 29
  template <typename Char>
  MUST_USE_RESULT static MaybeHandle<String> UnescapeSlow(Isolate* isolate,
                                                          Handle<String> string,
                                                          int start_index);
30 31 32 33

  static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2));

  template <typename Char>
34
  static INLINE(int UnescapeChar(Vector<const Char> vector, int i, int length,
35 36 37 38 39
                                 int* step));
};


const signed char URIUnescape::kHexValue[] = {
40 41 42 43 44 45
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -0, 1,  2,  3,  4,  5,
    6,  7,  8,  9,  -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15};
46 47


48
template <typename Char>
49 50
MaybeHandle<String> URIUnescape::Unescape(Isolate* isolate,
                                          Handle<String> source) {
51
  int index;
52 53
  {
    DisallowHeapAllocation no_allocation;
54
    StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%"));
55
    index = search.Search(source->GetCharVector<Char>(), 0);
56 57 58 59 60 61 62
    if (index < 0) return source;
  }
  return UnescapeSlow<Char>(isolate, source, index);
}


template <typename Char>
63 64 65
MaybeHandle<String> URIUnescape::UnescapeSlow(Isolate* isolate,
                                              Handle<String> string,
                                              int start_index) {
66 67 68 69
  bool one_byte = true;
  int length = string->length();

  int unescaped_length = 0;
70 71
  {
    DisallowHeapAllocation no_allocation;
72
    Vector<const Char> vector = string->GetCharVector<Char>();
73 74 75
    for (int i = start_index; i < length; unescaped_length++) {
      int step;
      if (UnescapeChar(vector, i, length, &step) >
76
          String::kMaxOneByteCharCode) {
77 78 79 80 81 82
        one_byte = false;
      }
      i += step;
    }
  }

83
  DCHECK(start_index < length);
84 85 86 87 88
  Handle<String> first_part =
      isolate->factory()->NewProperSubString(string, 0, start_index);

  int dest_position = 0;
  Handle<String> second_part;
89
  DCHECK(unescaped_length <= String::kMaxLength);
90
  if (one_byte) {
91 92 93
    Handle<SeqOneByteString> dest = isolate->factory()
                                        ->NewRawOneByteString(unescaped_length)
                                        .ToHandleChecked();
94
    DisallowHeapAllocation no_allocation;
95
    Vector<const Char> vector = string->GetCharVector<Char>();
96 97 98 99 100 101 102 103
    for (int i = start_index; i < length; dest_position++) {
      int step;
      dest->SeqOneByteStringSet(dest_position,
                                UnescapeChar(vector, i, length, &step));
      i += step;
    }
    second_part = dest;
  } else {
104 105 106
    Handle<SeqTwoByteString> dest = isolate->factory()
                                        ->NewRawTwoByteString(unescaped_length)
                                        .ToHandleChecked();
107
    DisallowHeapAllocation no_allocation;
108
    Vector<const Char> vector = string->GetCharVector<Char>();
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
    for (int i = start_index; i < length; dest_position++) {
      int step;
      dest->SeqTwoByteStringSet(dest_position,
                                UnescapeChar(vector, i, length, &step));
      i += step;
    }
    second_part = dest;
  }
  return isolate->factory()->NewConsString(first_part, second_part);
}


int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) {
  if (character1 > 'f') return -1;
  int hi = kHexValue[character1];
  if (hi == -1) return -1;
  if (character2 > 'f') return -1;
  int lo = kHexValue[character2];
  if (lo == -1) return -1;
  return (hi << 4) + lo;
}


template <typename Char>
133
int URIUnescape::UnescapeChar(Vector<const Char> vector, int i, int length,
134 135 136 137
                              int* step) {
  uint16_t character = vector[i];
  int32_t hi = 0;
  int32_t lo = 0;
138 139 140
  if (character == '%' && i <= length - 6 && vector[i + 1] == 'u' &&
      (hi = TwoDigitHex(vector[i + 2], vector[i + 3])) != -1 &&
      (lo = TwoDigitHex(vector[i + 4], vector[i + 5])) != -1) {
141 142
    *step = 6;
    return (hi << 8) + lo;
143 144
  } else if (character == '%' && i <= length - 3 &&
             (lo = TwoDigitHex(vector[i + 1], vector[i + 2])) != -1) {
145 146 147 148 149 150 151 152 153 154 155
    *step = 3;
    return lo;
  } else {
    *step = 1;
    return character;
  }
}


class URIEscape : public AllStatic {
 public:
156
  template <typename Char>
157 158
  MUST_USE_RESULT static MaybeHandle<String> Escape(Isolate* isolate,
                                                    Handle<String> string);
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182

 private:
  static const char kHexChars[17];
  static const char kNotEscaped[256];

  static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; }
};


const char URIEscape::kHexChars[] = "0123456789ABCDEF";


// kNotEscaped is generated by the following:
//
// #!/bin/perl
// for (my $i = 0; $i < 256; $i++) {
//   print "\n" if $i % 16 == 0;
//   my $c = chr($i);
//   my $escaped = 1;
//   $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#;
//   print $escaped ? "0, " : "1, ";
// }

const char URIEscape::kNotEscaped[] = {
183 184 185 186 187 188 189 190 191 192 193 194 195 196
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};


template <typename Char>
197
MaybeHandle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) {
198
  DCHECK(string->IsFlat());
199 200 201
  int escaped_length = 0;
  int length = string->length();

202 203
  {
    DisallowHeapAllocation no_allocation;
204
    Vector<const Char> vector = string->GetCharVector<Char>();
205 206 207 208 209 210 211 212 213 214 215
    for (int i = 0; i < length; i++) {
      uint16_t c = vector[i];
      if (c >= 256) {
        escaped_length += 6;
      } else if (IsNotEscaped(c)) {
        escaped_length++;
      } else {
        escaped_length += 3;
      }

      // We don't allow strings that are longer than a maximal length.
216
      DCHECK(String::kMaxLength < 0x7fffffff - 6);     // Cannot overflow.
217
      if (escaped_length > String::kMaxLength) break;  // Provoke exception.
218 219 220 221 222 223
    }
  }

  // No length change implies no change.  Return original string if no change.
  if (escaped_length == length) return string;

224 225
  Handle<SeqOneByteString> dest;
  ASSIGN_RETURN_ON_EXCEPTION(
226
      isolate, dest, isolate->factory()->NewRawOneByteString(escaped_length),
227
      String);
228 229
  int dest_position = 0;

230 231
  {
    DisallowHeapAllocation no_allocation;
232
    Vector<const Char> vector = string->GetCharVector<Char>();
233 234 235 236
    for (int i = 0; i < length; i++) {
      uint16_t c = vector[i];
      if (c >= 256) {
        dest->SeqOneByteStringSet(dest_position, '%');
237 238 239 240 241
        dest->SeqOneByteStringSet(dest_position + 1, 'u');
        dest->SeqOneByteStringSet(dest_position + 2, kHexChars[c >> 12]);
        dest->SeqOneByteStringSet(dest_position + 3, kHexChars[(c >> 8) & 0xf]);
        dest->SeqOneByteStringSet(dest_position + 4, kHexChars[(c >> 4) & 0xf]);
        dest->SeqOneByteStringSet(dest_position + 5, kHexChars[c & 0xf]);
242 243 244 245 246 247
        dest_position += 6;
      } else if (IsNotEscaped(c)) {
        dest->SeqOneByteStringSet(dest_position, c);
        dest_position++;
      } else {
        dest->SeqOneByteStringSet(dest_position, '%');
248 249
        dest->SeqOneByteStringSet(dest_position + 1, kHexChars[c >> 4]);
        dest->SeqOneByteStringSet(dest_position + 2, kHexChars[c & 0xf]);
250 251 252 253 254 255 256 257 258
        dest_position += 3;
      }
    }
  }

  return dest;
}


259 260
RUNTIME_FUNCTION(Runtime_URIEscape) {
  HandleScope scope(isolate);
261 262 263 264 265 266
  DCHECK_EQ(1, args.length());
  CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
  Handle<String> source;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, source,
                                     Object::ToString(isolate, input));
  source = String::Flatten(source);
267 268
  Handle<String> result;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
269
      isolate, result, source->IsOneByteRepresentationUnderneath()
270 271 272 273 274 275 276 277 278
                           ? URIEscape::Escape<uint8_t>(isolate, source)
                           : URIEscape::Escape<uc16>(isolate, source));
  return *result;
}


RUNTIME_FUNCTION(Runtime_URIUnescape) {
  HandleScope scope(isolate);
  DCHECK(args.length() == 1);
279 280 281 282 283
  CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
  Handle<String> source;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, source,
                                     Object::ToString(isolate, input));
  source = String::Flatten(source);
284 285
  Handle<String> result;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
286
      isolate, result, source->IsOneByteRepresentationUnderneath()
287 288 289 290
                           ? URIUnescape::Unescape<uint8_t>(isolate, source)
                           : URIUnescape::Unescape<uc16>(isolate, source));
  return *result;
}
291

292 293
}  // namespace internal
}  // namespace v8