uri.js 12.9 KB
Newer Older
1
// Copyright 2006-2008 the V8 project authors. All rights reserved.
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
//       copyright notice, this list of conditions and the following
//       disclaimer in the documentation and/or other materials provided
//       with the distribution.
//     * Neither the name of Google Inc. nor the names of its
//       contributors may be used to endorse or promote products derived
//       from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

28 29 30 31 32 33
// This file relies on the fact that the following declaration has been made
// in runtime.js:
// var $Array = global.Array;

// -------------------------------------------------------------------

34 35 36
// This file contains support for URI manipulations written in
// JavaScript.

37 38 39 40 41
// Lazily initialized.
var hexCharArray = 0;
var hexCharCodeArray = 0;


42 43 44 45 46
function URIAddEncodedOctetToBuffer(octet, result, index) {
  result[index++] = 37; // Char code of '%'.
  result[index++] = hexCharCodeArray[octet >> 4];
  result[index++] = hexCharCodeArray[octet & 0x0F];
  return index;
47
}
48 49 50


function URIEncodeOctets(octets, result, index) {
51 52 53 54
  if (hexCharCodeArray === 0) {
    hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
                        65, 66, 67, 68, 69, 70];
  }
55 56 57 58 59
  index = URIAddEncodedOctetToBuffer(octets[0], result, index);
  if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
  if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
  if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
  return index;
60
}
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78


function URIEncodeSingle(cc, result, index) {
  var x = (cc >> 12) & 0xF;
  var y = (cc >> 6) & 63;
  var z = cc & 63;
  var octets = new $Array(3);
  if (cc <= 0x007F) {
    octets[0] = cc;
  } else if (cc <= 0x07FF) {
    octets[0] = y + 192;
    octets[1] = z + 128;
  } else {
    octets[0] = x + 224;
    octets[1] = y + 128;
    octets[2] = z + 128;
  }
  return URIEncodeOctets(octets, result, index);
79
}
80 81 82 83 84 85 86 87 88 89 90 91 92 93


function URIEncodePair(cc1 , cc2, result, index) {
  var u = ((cc1 >> 6) & 0xF) + 1;
  var w = (cc1 >> 2) & 0xF;
  var x = cc1 & 3;
  var y = (cc2 >> 6) & 0xF;
  var z = cc2 & 63;
  var octets = new $Array(4);
  octets[0] = (u >> 2) + 240;
  octets[1] = (((u & 3) << 4) | w) + 128;
  octets[2] = ((x << 4) | y) + 128;
  octets[3] = z + 128;
  return URIEncodeOctets(octets, result, index);
94
}
95 96


97 98 99 100
function URIHexCharsToCharCode(highChar, lowChar) {
  var highCode = HexValueOf(highChar);
  var lowCode = HexValueOf(lowChar);
  if (highCode == -1 || lowCode == -1) {
101 102
    throw new $URIError("URI malformed");
  }
103
  return (highCode << 4) | lowCode;
104
}
105 106 107


function URIDecodeOctets(octets, result, index) {
108 109 110 111 112 113 114 115 116 117
  var value;
  var o0 = octets[0];
  if (o0 < 0x80) {
    value = o0;
  } else if (o0 < 0xc2) {
    throw new $URIError("URI malformed");
  } else {
    var o1 = octets[1];
    if (o0 < 0xe0) {
      var a = o0 & 0x1f;
118
      if ((o1 < 0x80) || (o1 > 0xbf)) {
119
        throw new $URIError("URI malformed");
120
      }
121 122
      var b = o1 & 0x3f;
      value = (a << 6) + b;
123
      if (value < 0x80 || value > 0x7ff) {
124
        throw new $URIError("URI malformed");
125
      }
126 127 128 129
    } else {
      var o2 = octets[2];
      if (o0 < 0xf0) {
        var a = o0 & 0x0f;
130
        if ((o1 < 0x80) || (o1 > 0xbf)) {
131
          throw new $URIError("URI malformed");
132
        }
133
        var b = o1 & 0x3f;
134
        if ((o2 < 0x80) || (o2 > 0xbf)) {
135
          throw new $URIError("URI malformed");
136
        }
137 138
        var c = o2 & 0x3f;
        value = (a << 12) + (b << 6) + c;
139
        if ((value < 0x800) || (value > 0xffff)) {
140
          throw new $URIError("URI malformed");
141
        }
142 143 144 145
      } else {
        var o3 = octets[3];
        if (o0 < 0xf8) {
          var a = (o0 & 0x07);
146
          if ((o1 < 0x80) || (o1 > 0xbf)) {
147
            throw new $URIError("URI malformed");
148
          }
149
          var b = (o1 & 0x3f);
150
          if ((o2 < 0x80) || (o2 > 0xbf)) {
151
            throw new $URIError("URI malformed");
152
          }
153
          var c = (o2 & 0x3f);
154
          if ((o3 < 0x80) || (o3 > 0xbf)) {
155
            throw new $URIError("URI malformed");
156
          }
157 158
          var d = (o3 & 0x3f);
          value = (a << 18) + (b << 12) + (c << 6) + d;
159
          if ((value < 0x10000) || (value > 0x10ffff)) {
160
            throw new $URIError("URI malformed");
161
          }
162 163 164 165 166
        } else {
          throw new $URIError("URI malformed");
        }
      }
    }
167
  }
168 169 170
  if (0xD800 <= value && value <= 0xDFFF) {
    throw new $URIError("URI malformed");
  }
171
  if (value < 0x10000) {
172
    %_TwoByteSeqStringSetChar(result, index++, value);
173 174
    return index;
  } else {
175 176
    %_TwoByteSeqStringSetChar(result, index++, (value >> 10) + 0xd7c0);
    %_TwoByteSeqStringSetChar(result, index++, (value & 0x3ff) + 0xdc00);
177 178
    return index;
  }
179
}
180 181 182 183 184


// ECMA-262, section 15.1.3
function Encode(uri, unescape) {
  var uriLength = uri.length;
185
  var array = new InternalArray(uriLength);
186 187 188 189
  var index = 0;
  for (var k = 0; k < uriLength; k++) {
    var cc1 = uri.charCodeAt(k);
    if (unescape(cc1)) {
190
      array[index++] = cc1;
191 192 193
    } else {
      if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw new $URIError("URI malformed");
      if (cc1 < 0xD800 || cc1 > 0xDBFF) {
194
        index = URIEncodeSingle(cc1, array, index);
195 196 197 198 199
      } else {
        k++;
        if (k == uriLength) throw new $URIError("URI malformed");
        var cc2 = uri.charCodeAt(k);
        if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw new $URIError("URI malformed");
200
        index = URIEncodePair(cc1, cc2, array, index);
201 202 203
      }
    }
  }
204 205 206 207 208 209

  var result = %NewString(array.length, NEW_ONE_BYTE_STRING);
  for (var i = 0; i < array.length; i++) {
    %_OneByteSeqStringSetChar(result, i, array[i]);
  }
  return result;
210
}
211 212 213 214 215


// ECMA-262, section 15.1.3
function Decode(uri, reserved) {
  var uriLength = uri.length;
216
  var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING);
217
  var index = 0;
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
  var k = 0;

  // Optimistically assume ascii string.
  for ( ; k < uriLength; k++) {
    var code = uri.charCodeAt(k);
    if (code == 37) {  // '%'
      if (k + 2 >= uriLength) throw new $URIError("URI malformed");
      var cc = URIHexCharsToCharCode(uri.charCodeAt(k+1), uri.charCodeAt(k+2));
      if (cc >> 7) break;  // Assumption wrong, two byte string.
      if (reserved(cc)) {
        %_OneByteSeqStringSetChar(one_byte, index++, 37);  // '%'.
        %_OneByteSeqStringSetChar(one_byte, index++, uri.charCodeAt(k+1));
        %_OneByteSeqStringSetChar(one_byte, index++, uri.charCodeAt(k+2));
      } else {
        %_OneByteSeqStringSetChar(one_byte, index++, cc);
      }
      k += 2;
    } else {
      if (code > 0x7f) break;  // Assumption wrong, two byte string.
      %_OneByteSeqStringSetChar(one_byte, index++, code);
    }
  }

  one_byte = %TruncateString(one_byte, index);
  if (k == uriLength) return one_byte;

  // Write into two byte string.
  var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING);
  index = 0;

  for ( ; k < uriLength; k++) {
    var code = uri.charCodeAt(k);
    if (code == 37) {  // '%'
251
      if (k + 2 >= uriLength) throw new $URIError("URI malformed");
252
      var cc = URIHexCharsToCharCode(uri.charCodeAt(++k), uri.charCodeAt(++k));
253 254
      if (cc >> 7) {
        var n = 0;
255
        while (((cc << ++n) & 0x80) != 0) { }
256 257 258 259 260
        if (n == 1 || n > 4) throw new $URIError("URI malformed");
        var octets = new $Array(n);
        octets[0] = cc;
        if (k + 3 * (n - 1) >= uriLength) throw new $URIError("URI malformed");
        for (var i = 1; i < n; i++) {
261
          if (uri.charAt(++k) != '%') throw new $URIError("URI malformed");
262 263
          octets[i] = URIHexCharsToCharCode(uri.charCodeAt(++k),
                                            uri.charCodeAt(++k));
264
        }
265 266 267 268 269
        index = URIDecodeOctets(octets, two_byte, index);
      } else  if (reserved(cc)) {
        %_TwoByteSeqStringSetChar(two_byte, index++, 37);  // '%'.
        %_TwoByteSeqStringSetChar(two_byte, index++, uri.charCodeAt(k - 1));
        %_TwoByteSeqStringSetChar(two_byte, index++, uri.charCodeAt(k));
270
      } else {
271
        %_TwoByteSeqStringSetChar(two_byte, index++, cc);
272 273
      }
    } else {
274
      %_TwoByteSeqStringSetChar(two_byte, index++, code);
275 276
    }
  }
277 278 279

  two_byte = %TruncateString(two_byte, index);
  return one_byte + two_byte;
280
}
281 282 283 284


// ECMA-262 - 15.1.3.1.
function URIDecode(uri) {
285
  var reservedPredicate = function(cc) {
286 287 288 289 290 291 292 293 294 295 296 297 298 299
    // #$
    if (35 <= cc && cc <= 36) return true;
    // &
    if (cc == 38) return true;
    // +,
    if (43 <= cc && cc <= 44) return true;
    // /
    if (cc == 47) return true;
    // :;
    if (58 <= cc && cc <= 59) return true;
    // =
    if (cc == 61) return true;
    // ?@
    if (63 <= cc && cc <= 64) return true;
300

301
    return false;
302
  };
303 304
  var string = ToString(uri);
  return Decode(string, reservedPredicate);
305
}
306 307 308 309


// ECMA-262 - 15.1.3.2.
function URIDecodeComponent(component) {
310
  var reservedPredicate = function(cc) { return false; };
311 312
  var string = ToString(component);
  return Decode(string, reservedPredicate);
313
}
314 315 316 317 318 319 320 321 322 323


// Does the char code correspond to an alpha-numeric char.
function isAlphaNumeric(cc) {
  // a - z
  if (97 <= cc && cc <= 122) return true;
  // A - Z
  if (65 <= cc && cc <= 90) return true;
  // 0 - 9
  if (48 <= cc && cc <= 57) return true;
324

325
  return false;
326
}
327 328 329 330


// ECMA-262 - 15.1.3.3.
function URIEncode(uri) {
331
  var unescapePredicate = function(cc) {
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
    if (isAlphaNumeric(cc)) return true;
    // !
    if (cc == 33) return true;
    // #$
    if (35 <= cc && cc <= 36) return true;
    // &'()*+,-./
    if (38 <= cc && cc <= 47) return true;
    // :;
    if (58 <= cc && cc <= 59) return true;
    // =
    if (cc == 61) return true;
    // ?@
    if (63 <= cc && cc <= 64) return true;
    // _
    if (cc == 95) return true;
    // ~
    if (cc == 126) return true;
349

350
    return false;
351
  };
352 353 354

  var string = ToString(uri);
  return Encode(string, unescapePredicate);
355
}
356 357 358 359


// ECMA-262 - 15.1.3.4
function URIEncodeComponent(component) {
360
  var unescapePredicate = function(cc) {
361 362 363 364 365 366 367 368 369 370 371
    if (isAlphaNumeric(cc)) return true;
    // !
    if (cc == 33) return true;
    // '()*
    if (39 <= cc && cc <= 42) return true;
    // -.
    if (45 <= cc && cc <= 46) return true;
    // _
    if (cc == 95) return true;
    // ~
    if (cc == 126) return true;
372

373
    return false;
374
  };
375 376 377

  var string = ToString(component);
  return Encode(string, unescapePredicate);
378
}
379 380


381
function HexValueOf(code) {
382 383 384 385 386 387
  // 0-9
  if (code >= 48 && code <= 57) return code - 48;
  // A-F
  if (code >= 65 && code <= 70) return code - 55;
  // a-f
  if (code >= 97 && code <= 102) return code - 87;
388

389
  return -1;
390
}
391 392 393 394 395 396


// Convert a character code to 4-digit hex string representation
// 64 -> 0040, 62234 -> F31A.
function CharCodeToHex4Str(cc) {
  var r = "";
397 398 399 400
  if (hexCharArray === 0) {
    hexCharArray = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
                    "A", "B", "C", "D", "E", "F"];
  }
401 402 403 404 405 406
  for (var i = 0; i < 4; ++i) {
    var c = hexCharArray[cc & 0x0F];
    r = c + r;
    cc = cc >>> 4;
  }
  return r;
407
}
408 409 410 411 412 413


// Returns true if all digits in string s are valid hex numbers
function IsValidHex(s) {
  for (var i = 0; i < s.length; ++i) {
    var cc = s.charCodeAt(i);
414 415 416
    if ((48 <= cc && cc <= 57) ||
        (65 <= cc && cc <= 70) ||
        (97 <= cc && cc <= 102)) {
417 418 419 420 421 422
      // '0'..'9', 'A'..'F' and 'a' .. 'f'.
    } else {
      return false;
    }
  }
  return true;
423
}
424 425 426 427 428 429


// ECMA-262 - B.2.1.
function URIEscape(str) {
  var s = ToString(str);
  return %URIEscape(s);
430
}
431 432 433 434 435 436 437 438 439 440 441


// ECMA-262 - B.2.2.
function URIUnescape(str) {
  var s = ToString(str);
  return %URIUnescape(s);
}


// -------------------------------------------------------------------

442 443
function SetUpUri() {
  %CheckIsBootstrapping();
444

445
  // Set up non-enumerable URI functions on the global object and set
446 447 448 449 450 451 452 453 454 455
  // their names.
  InstallFunctions(global, DONT_ENUM, $Array(
    "escape", URIEscape,
    "unescape", URIUnescape,
    "decodeURI", URIDecode,
    "decodeURIComponent", URIDecodeComponent,
    "encodeURI", URIEncode,
    "encodeURIComponent", URIEncodeComponent
  ));
}
456

457
SetUpUri();