uri.js 10.8 KB
Newer Older
1
// Copyright 2006-2008 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4 5 6 7

// This file contains support for URI manipulations written in
// JavaScript.

8
(function(global, utils) {
9

10
"use strict";
11

12
%CheckIsBootstrapping();
13

14 15 16
//- ------------------------------------------------------------------
// Imports

17 18
var GlobalObject = global.Object;
var GlobalArray = global.Array;
19
var InternalArray = utils.InternalArray;
20 21 22 23 24 25
var MakeURIError;

utils.Import(function(from) {
  MakeURIError = from.MakeURIError;
});

26

27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
// -------------------------------------------------------------------
// Define internal helper functions.

function HexValueOf(code) {
  // 0-9
  if (code >= 48 && code <= 57) return code - 48;
  // A-F
  if (code >= 65 && code <= 70) return code - 55;
  // a-f
  if (code >= 97 && code <= 102) return code - 87;

  return -1;
}

// Does the char code correspond to an alpha-numeric char.
function isAlphaNumeric(cc) {
  // a - z
  if (97 <= cc && cc <= 122) return true;
  // A - Z
  if (65 <= cc && cc <= 90) return true;
  // 0 - 9
  if (48 <= cc && cc <= 57) return true;

  return false;
}

53
// Lazily initialized.
54 55 56 57 58 59 60 61 62 63 64 65 66
var hexCharCodeArray = 0;

function URIAddEncodedOctetToBuffer(octet, result, index) {
  result[index++] = 37; // Char code of '%'.
  result[index++] = hexCharCodeArray[octet >> 4];
  result[index++] = hexCharCodeArray[octet & 0x0F];
  return index;
}

function URIEncodeOctets(octets, result, index) {
  if (hexCharCodeArray === 0) {
    hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
                        65, 66, 67, 68, 69, 70];
67
  }
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
  index = URIAddEncodedOctetToBuffer(octets[0], result, index);
  if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
  if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
  if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
  return index;
}

function URIEncodeSingle(cc, result, index) {
  var x = (cc >> 12) & 0xF;
  var y = (cc >> 6) & 63;
  var z = cc & 63;
  var octets = new GlobalArray(3);
  if (cc <= 0x007F) {
    octets[0] = cc;
  } else if (cc <= 0x07FF) {
    octets[0] = y + 192;
    octets[1] = z + 128;
  } else {
    octets[0] = x + 224;
    octets[1] = y + 128;
    octets[2] = z + 128;
89
  }
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
  return URIEncodeOctets(octets, result, index);
}

function URIEncodePair(cc1 , cc2, result, index) {
  var u = ((cc1 >> 6) & 0xF) + 1;
  var w = (cc1 >> 2) & 0xF;
  var x = cc1 & 3;
  var y = (cc2 >> 6) & 0xF;
  var z = cc2 & 63;
  var octets = new GlobalArray(4);
  octets[0] = (u >> 2) + 240;
  octets[1] = (((u & 3) << 4) | w) + 128;
  octets[2] = ((x << 4) | y) + 128;
  octets[3] = z + 128;
  return URIEncodeOctets(octets, result, index);
}

function URIHexCharsToCharCode(highChar, lowChar) {
  var highCode = HexValueOf(highChar);
  var lowCode = HexValueOf(lowChar);
110
  if (highCode == -1 || lowCode == -1) throw MakeURIError();
111 112 113 114 115 116 117 118 119 120 121
  return (highCode << 4) | lowCode;
}

// Callers must ensure that |result| is a sufficiently long sequential
// two-byte string!
function URIDecodeOctets(octets, result, index) {
  var value;
  var o0 = octets[0];
  if (o0 < 0x80) {
    value = o0;
  } else if (o0 < 0xc2) {
122
    throw MakeURIError();
123 124 125 126
  } else {
    var o1 = octets[1];
    if (o0 < 0xe0) {
      var a = o0 & 0x1f;
127
      if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError();
128 129
      var b = o1 & 0x3f;
      value = (a << 6) + b;
130
      if (value < 0x80 || value > 0x7ff) throw MakeURIError();
131
    } else {
132 133 134
      var o2 = octets[2];
      if (o0 < 0xf0) {
        var a = o0 & 0x0f;
135
        if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError();
136
        var b = o1 & 0x3f;
137
        if ((o2 < 0x80) || (o2 > 0xbf)) throw MakeURIError();
138 139
        var c = o2 & 0x3f;
        value = (a << 12) + (b << 6) + c;
140
        if ((value < 0x800) || (value > 0xffff)) throw MakeURIError();
141
      } else {
142 143 144
        var o3 = octets[3];
        if (o0 < 0xf8) {
          var a = (o0 & 0x07);
145
          if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError();
146
          var b = (o1 & 0x3f);
147
          if ((o2 < 0x80) || (o2 > 0xbf)) {
148
            throw MakeURIError();
149
          }
150
          var c = (o2 & 0x3f);
151
          if ((o3 < 0x80) || (o3 > 0xbf)) throw MakeURIError();
152 153
          var d = (o3 & 0x3f);
          value = (a << 18) + (b << 12) + (c << 6) + d;
154
          if ((value < 0x10000) || (value > 0x10ffff)) throw MakeURIError();
155
        } else {
156
          throw MakeURIError();
157 158 159
        }
      }
    }
160
  }
161
  if (0xD800 <= value && value <= 0xDFFF) throw MakeURIError();
162 163 164 165 166 167 168 169 170 171 172
  if (value < 0x10000) {
    %_TwoByteSeqStringSetChar(index++, value, result);
  } else {
    %_TwoByteSeqStringSetChar(index++, (value >> 10) + 0xd7c0, result);
    %_TwoByteSeqStringSetChar(index++, (value & 0x3ff) + 0xdc00, result);
  }
  return index;
}

// ECMA-262, section 15.1.3
function Encode(uri, unescape) {
173
  uri = TO_STRING(uri);
174 175 176 177
  var uriLength = uri.length;
  var array = new InternalArray(uriLength);
  var index = 0;
  for (var k = 0; k < uriLength; k++) {
178
    var cc1 = %_StringCharCodeAt(uri, k);
179 180 181
    if (unescape(cc1)) {
      array[index++] = cc1;
    } else {
182
      if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw MakeURIError();
183 184
      if (cc1 < 0xD800 || cc1 > 0xDBFF) {
        index = URIEncodeSingle(cc1, array, index);
185
      } else {
186
        k++;
187
        if (k == uriLength) throw MakeURIError();
188
        var cc2 = %_StringCharCodeAt(uri, k);
189
        if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw MakeURIError();
190
        index = URIEncodePair(cc1, cc2, array, index);
191 192
      }
    }
193
  }
194

195 196 197 198 199 200 201 202 203
  var result = %NewString(array.length, NEW_ONE_BYTE_STRING);
  for (var i = 0; i < array.length; i++) {
    %_OneByteSeqStringSetChar(i, array[i], result);
  }
  return result;
}

// ECMA-262, section 15.1.3
function Decode(uri, reserved) {
204
  uri = TO_STRING(uri);
205 206 207 208 209 210 211
  var uriLength = uri.length;
  var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING);
  var index = 0;
  var k = 0;

  // Optimistically assume one-byte string.
  for ( ; k < uriLength; k++) {
212
    var code = %_StringCharCodeAt(uri, k);
213
    if (code == 37) {  // '%'
214
      if (k + 2 >= uriLength) throw MakeURIError();
215 216
      var cc = URIHexCharsToCharCode(%_StringCharCodeAt(uri, k+1),
                                     %_StringCharCodeAt(uri, k+2));
217 218 219
      if (cc >> 7) break;  // Assumption wrong, two-byte string.
      if (reserved(cc)) {
        %_OneByteSeqStringSetChar(index++, 37, one_byte);  // '%'.
220 221 222 223
        %_OneByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k+1),
                                  one_byte);
        %_OneByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k+2),
                                  one_byte);
224
      } else {
225
        %_OneByteSeqStringSetChar(index++, cc, one_byte);
226
      }
227 228 229 230
      k += 2;
    } else {
      if (code > 0x7f) break;  // Assumption wrong, two-byte string.
      %_OneByteSeqStringSetChar(index++, code, one_byte);
231
    }
232
  }
233

234 235 236 237 238 239 240 241
  one_byte = %TruncateString(one_byte, index);
  if (k == uriLength) return one_byte;

  // Write into two byte string.
  var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING);
  index = 0;

  for ( ; k < uriLength; k++) {
242
    var code = %_StringCharCodeAt(uri, k);
243
    if (code == 37) {  // '%'
244
      if (k + 2 >= uriLength) throw MakeURIError();
245 246
      var cc = URIHexCharsToCharCode(%_StringCharCodeAt(uri, ++k),
                                     %_StringCharCodeAt(uri, ++k));
247 248 249
      if (cc >> 7) {
        var n = 0;
        while (((cc << ++n) & 0x80) != 0) { }
250
        if (n == 1 || n > 4) throw MakeURIError();
251 252
        var octets = new GlobalArray(n);
        octets[0] = cc;
253
        if (k + 3 * (n - 1) >= uriLength) throw MakeURIError();
254
        for (var i = 1; i < n; i++) {
255 256 257
          if (uri[++k] != '%') throw MakeURIError();
          octets[i] = URIHexCharsToCharCode(%_StringCharCodeAt(uri, ++k),
                                            %_StringCharCodeAt(uri, ++k));
258
        }
259 260 261
        index = URIDecodeOctets(octets, two_byte, index);
      } else  if (reserved(cc)) {
        %_TwoByteSeqStringSetChar(index++, 37, two_byte);  // '%'.
262 263 264 265
        %_TwoByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k - 1),
                                  two_byte);
        %_TwoByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k),
                                  two_byte);
266
      } else {
267
        %_TwoByteSeqStringSetChar(index++, cc, two_byte);
268
      }
269 270
    } else {
      %_TwoByteSeqStringSetChar(index++, code, two_byte);
271
    }
272
  }
273

274 275 276
  two_byte = %TruncateString(two_byte, index);
  return one_byte + two_byte;
}
277

278 279 280 281
// -------------------------------------------------------------------
// Define exported functions.

// ECMA-262 - B.2.1.
282
function URIEscapeJS(s) {
283 284 285 286
  return %URIEscape(s);
}

// ECMA-262 - B.2.2.
287
function URIUnescapeJS(s) {
288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
  return %URIUnescape(s);
}

// ECMA-262 - 15.1.3.1.
function URIDecode(uri) {
  var reservedPredicate = function(cc) {
    // #$
    if (35 <= cc && cc <= 36) return true;
    // &
    if (cc == 38) return true;
    // +,
    if (43 <= cc && cc <= 44) return true;
    // /
    if (cc == 47) return true;
    // :;
    if (58 <= cc && cc <= 59) return true;
    // =
    if (cc == 61) return true;
    // ?@
    if (63 <= cc && cc <= 64) return true;
308

309 310
    return false;
  };
311
  return Decode(uri, reservedPredicate);
312 313 314 315 316
}

// ECMA-262 - 15.1.3.2.
function URIDecodeComponent(component) {
  var reservedPredicate = function(cc) { return false; };
317
  return Decode(component, reservedPredicate);
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
}

// ECMA-262 - 15.1.3.3.
function URIEncode(uri) {
  var unescapePredicate = function(cc) {
    if (isAlphaNumeric(cc)) return true;
    // !
    if (cc == 33) return true;
    // #$
    if (35 <= cc && cc <= 36) return true;
    // &'()*+,-./
    if (38 <= cc && cc <= 47) return true;
    // :;
    if (58 <= cc && cc <= 59) return true;
    // =
    if (cc == 61) return true;
    // ?@
    if (63 <= cc && cc <= 64) return true;
    // _
    if (cc == 95) return true;
    // ~
    if (cc == 126) return true;
340

341 342
    return false;
  };
343
  return Encode(uri, unescapePredicate);
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
}

// ECMA-262 - 15.1.3.4
function URIEncodeComponent(component) {
  var unescapePredicate = function(cc) {
    if (isAlphaNumeric(cc)) return true;
    // !
    if (cc == 33) return true;
    // '()*
    if (39 <= cc && cc <= 42) return true;
    // -.
    if (45 <= cc && cc <= 46) return true;
    // _
    if (cc == 95) return true;
    // ~
    if (cc == 126) return true;
360

361 362
    return false;
  };
363
  return Encode(component, unescapePredicate);
364
}
365

366 367 368 369 370
// -------------------------------------------------------------------
// Install exported functions.

// Set up non-enumerable URI functions on the global object and set
// their names.
371
utils.InstallFunctions(global, DONT_ENUM, [
372 373 374 375 376 377 378
  "escape", URIEscapeJS,
  "unescape", URIUnescapeJS,
  "decodeURI", URIDecode,
  "decodeURIComponent", URIDecodeComponent,
  "encodeURI", URIEncode,
  "encodeURIComponent", URIEncodeComponent
]);
379

380
})