uri.js 11.7 KB
Newer Older
1
// Copyright 2006-2008 the V8 project authors. All rights reserved.
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
//       copyright notice, this list of conditions and the following
//       disclaimer in the documentation and/or other materials provided
//       with the distribution.
//     * Neither the name of Google Inc. nor the names of its
//       contributors may be used to endorse or promote products derived
//       from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// This file contains support for URI manipulations written in
// JavaScript.

// Expect $String = global.String;

33 34 35 36 37
// Lazily initialized.
var hexCharArray = 0;
var hexCharCodeArray = 0;


38 39 40 41 42
function URIAddEncodedOctetToBuffer(octet, result, index) {
  result[index++] = 37; // Char code of '%'.
  result[index++] = hexCharCodeArray[octet >> 4];
  result[index++] = hexCharCodeArray[octet & 0x0F];
  return index;
43
}
44 45 46


function URIEncodeOctets(octets, result, index) {
47 48 49 50
  if (hexCharCodeArray === 0) {
    hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
                        65, 66, 67, 68, 69, 70];
  }
51 52 53 54 55
  index = URIAddEncodedOctetToBuffer(octets[0], result, index);
  if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
  if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
  if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
  return index;
56
}
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74


function URIEncodeSingle(cc, result, index) {
  var x = (cc >> 12) & 0xF;
  var y = (cc >> 6) & 63;
  var z = cc & 63;
  var octets = new $Array(3);
  if (cc <= 0x007F) {
    octets[0] = cc;
  } else if (cc <= 0x07FF) {
    octets[0] = y + 192;
    octets[1] = z + 128;
  } else {
    octets[0] = x + 224;
    octets[1] = y + 128;
    octets[2] = z + 128;
  }
  return URIEncodeOctets(octets, result, index);
75
}
76 77 78 79 80 81 82 83 84 85 86 87 88 89


function URIEncodePair(cc1 , cc2, result, index) {
  var u = ((cc1 >> 6) & 0xF) + 1;
  var w = (cc1 >> 2) & 0xF;
  var x = cc1 & 3;
  var y = (cc2 >> 6) & 0xF;
  var z = cc2 & 63;
  var octets = new $Array(4);
  octets[0] = (u >> 2) + 240;
  octets[1] = (((u & 3) << 4) | w) + 128;
  octets[2] = ((x << 4) | y) + 128;
  octets[3] = z + 128;
  return URIEncodeOctets(octets, result, index);
90
}
91 92


93 94 95 96
function URIHexCharsToCharCode(highChar, lowChar) {
  var highCode = HexValueOf(highChar);
  var lowCode = HexValueOf(lowChar);
  if (highCode == -1 || lowCode == -1) {
97 98
    throw new $URIError("URI malformed");
  }
99
  return (highCode << 4) | lowCode;
100
}
101 102 103


function URIDecodeOctets(octets, result, index) {
104 105 106 107 108 109 110 111 112 113
  var value;
  var o0 = octets[0];
  if (o0 < 0x80) {
    value = o0;
  } else if (o0 < 0xc2) {
    throw new $URIError("URI malformed");
  } else {
    var o1 = octets[1];
    if (o0 < 0xe0) {
      var a = o0 & 0x1f;
114
      if ((o1 < 0x80) || (o1 > 0xbf)) {
115
        throw new $URIError("URI malformed");
116
      }
117 118
      var b = o1 & 0x3f;
      value = (a << 6) + b;
119
      if (value < 0x80 || value > 0x7ff) {
120
        throw new $URIError("URI malformed");
121
      }
122 123 124 125
    } else {
      var o2 = octets[2];
      if (o0 < 0xf0) {
        var a = o0 & 0x0f;
126
        if ((o1 < 0x80) || (o1 > 0xbf)) {
127
          throw new $URIError("URI malformed");
128
        }
129
        var b = o1 & 0x3f;
130
        if ((o2 < 0x80) || (o2 > 0xbf)) {
131
          throw new $URIError("URI malformed");
132
        }
133 134
        var c = o2 & 0x3f;
        value = (a << 12) + (b << 6) + c;
135
        if ((value < 0x800) || (value > 0xffff)) {
136
          throw new $URIError("URI malformed");
137
        }
138 139 140 141
      } else {
        var o3 = octets[3];
        if (o0 < 0xf8) {
          var a = (o0 & 0x07);
142
          if ((o1 < 0x80) || (o1 > 0xbf)) {
143
            throw new $URIError("URI malformed");
144
          }
145
          var b = (o1 & 0x3f);
146
          if ((o2 < 0x80) || (o2 > 0xbf)) {
147
            throw new $URIError("URI malformed");
148
          }
149
          var c = (o2 & 0x3f);
150
          if ((o3 < 0x80) || (o3 > 0xbf)) {
151
            throw new $URIError("URI malformed");
152
          }
153 154
          var d = (o3 & 0x3f);
          value = (a << 18) + (b << 12) + (c << 6) + d;
155
          if ((value < 0x10000) || (value > 0x10ffff)) {
156
            throw new $URIError("URI malformed");
157
          }
158 159 160 161 162
        } else {
          throw new $URIError("URI malformed");
        }
      }
    }
163
  }
164 165 166
  if (0xD800 <= value && value <= 0xDFFF) {
    throw new $URIError("URI malformed");
  }
167 168 169 170 171 172
  if (value < 0x10000) {
    result[index++] = value;
    return index;
  } else {
    result[index++] = (value >> 10) + 0xd7c0;
    result[index++] = (value & 0x3ff) + 0xdc00;
173 174
    return index;
  }
175
}
176 177 178 179 180


// ECMA-262, section 15.1.3
function Encode(uri, unescape) {
  var uriLength = uri.length;
181 182 183 184
  // We are going to pass result to %StringFromCharCodeArray
  // which does not expect any getters/setters installed
  // on the incoming array.
  var result = new InternalArray(uriLength);
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
  var index = 0;
  for (var k = 0; k < uriLength; k++) {
    var cc1 = uri.charCodeAt(k);
    if (unescape(cc1)) {
      result[index++] = cc1;
    } else {
      if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw new $URIError("URI malformed");
      if (cc1 < 0xD800 || cc1 > 0xDBFF) {
        index = URIEncodeSingle(cc1, result, index);
      } else {
        k++;
        if (k == uriLength) throw new $URIError("URI malformed");
        var cc2 = uri.charCodeAt(k);
        if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw new $URIError("URI malformed");
        index = URIEncodePair(cc1, cc2, result, index);
      }
    }
  }
  return %StringFromCharCodeArray(result);
204
}
205 206 207 208 209


// ECMA-262, section 15.1.3
function Decode(uri, reserved) {
  var uriLength = uri.length;
210 211 212 213
  // We are going to pass result to %StringFromCharCodeArray
  // which does not expect any getters/setters installed
  // on the incoming array.
  var result = new InternalArray(uriLength);
214 215 216 217 218
  var index = 0;
  for (var k = 0; k < uriLength; k++) {
    var ch = uri.charAt(k);
    if (ch == '%') {
      if (k + 2 >= uriLength) throw new $URIError("URI malformed");
219
      var cc = URIHexCharsToCharCode(uri.charCodeAt(++k), uri.charCodeAt(++k));
220 221
      if (cc >> 7) {
        var n = 0;
222
        while (((cc << ++n) & 0x80) != 0) { }
223 224 225 226 227
        if (n == 1 || n > 4) throw new $URIError("URI malformed");
        var octets = new $Array(n);
        octets[0] = cc;
        if (k + 3 * (n - 1) >= uriLength) throw new $URIError("URI malformed");
        for (var i = 1; i < n; i++) {
228
          if (uri.charAt(++k) != '%') throw new $URIError("URI malformed");
229 230
          octets[i] = URIHexCharsToCharCode(uri.charCodeAt(++k),
                                            uri.charCodeAt(++k));
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
        }
        index = URIDecodeOctets(octets, result, index);
      } else {
        if (reserved(cc)) {
          result[index++] = 37; // Char code of '%'.
          result[index++] = uri.charCodeAt(k - 1);
          result[index++] = uri.charCodeAt(k);
        } else {
          result[index++] = cc;
        }
      }
    } else {
      result[index++] = ch.charCodeAt(0);
    }
  }
  result.length = index;
  return %StringFromCharCodeArray(result);
248
}
249 250 251 252


// ECMA-262 - 15.1.3.1.
function URIDecode(uri) {
253
  var reservedPredicate = function(cc) {
254 255 256 257 258 259 260 261 262 263 264 265 266 267
    // #$
    if (35 <= cc && cc <= 36) return true;
    // &
    if (cc == 38) return true;
    // +,
    if (43 <= cc && cc <= 44) return true;
    // /
    if (cc == 47) return true;
    // :;
    if (58 <= cc && cc <= 59) return true;
    // =
    if (cc == 61) return true;
    // ?@
    if (63 <= cc && cc <= 64) return true;
268

269
    return false;
270
  };
271 272
  var string = ToString(uri);
  return Decode(string, reservedPredicate);
273
}
274 275 276 277


// ECMA-262 - 15.1.3.2.
function URIDecodeComponent(component) {
278
  var reservedPredicate = function(cc) { return false; };
279 280
  var string = ToString(component);
  return Decode(string, reservedPredicate);
281
}
282 283 284 285 286 287 288 289 290 291


// Does the char code correspond to an alpha-numeric char.
function isAlphaNumeric(cc) {
  // a - z
  if (97 <= cc && cc <= 122) return true;
  // A - Z
  if (65 <= cc && cc <= 90) return true;
  // 0 - 9
  if (48 <= cc && cc <= 57) return true;
292

293
  return false;
294
}
295 296 297 298


// ECMA-262 - 15.1.3.3.
function URIEncode(uri) {
299
  var unescapePredicate = function(cc) {
300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
    if (isAlphaNumeric(cc)) return true;
    // !
    if (cc == 33) return true;
    // #$
    if (35 <= cc && cc <= 36) return true;
    // &'()*+,-./
    if (38 <= cc && cc <= 47) return true;
    // :;
    if (58 <= cc && cc <= 59) return true;
    // =
    if (cc == 61) return true;
    // ?@
    if (63 <= cc && cc <= 64) return true;
    // _
    if (cc == 95) return true;
    // ~
    if (cc == 126) return true;
317

318
    return false;
319
  };
320 321 322

  var string = ToString(uri);
  return Encode(string, unescapePredicate);
323
}
324 325 326 327


// ECMA-262 - 15.1.3.4
function URIEncodeComponent(component) {
328
  var unescapePredicate = function(cc) {
329 330 331 332 333 334 335 336 337 338 339
    if (isAlphaNumeric(cc)) return true;
    // !
    if (cc == 33) return true;
    // '()*
    if (39 <= cc && cc <= 42) return true;
    // -.
    if (45 <= cc && cc <= 46) return true;
    // _
    if (cc == 95) return true;
    // ~
    if (cc == 126) return true;
340

341
    return false;
342
  };
343 344 345

  var string = ToString(component);
  return Encode(string, unescapePredicate);
346
}
347 348


349
function HexValueOf(code) {
350 351 352 353 354 355
  // 0-9
  if (code >= 48 && code <= 57) return code - 48;
  // A-F
  if (code >= 65 && code <= 70) return code - 55;
  // a-f
  if (code >= 97 && code <= 102) return code - 87;
356

357
  return -1;
358
}
359 360 361 362 363 364


// Convert a character code to 4-digit hex string representation
// 64 -> 0040, 62234 -> F31A.
function CharCodeToHex4Str(cc) {
  var r = "";
365 366 367 368
  if (hexCharArray === 0) {
    hexCharArray = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
                    "A", "B", "C", "D", "E", "F"];
  }
369 370 371 372 373 374
  for (var i = 0; i < 4; ++i) {
    var c = hexCharArray[cc & 0x0F];
    r = c + r;
    cc = cc >>> 4;
  }
  return r;
375
}
376 377 378 379 380 381


// Returns true if all digits in string s are valid hex numbers
function IsValidHex(s) {
  for (var i = 0; i < s.length; ++i) {
    var cc = s.charCodeAt(i);
382 383 384
    if ((48 <= cc && cc <= 57) ||
        (65 <= cc && cc <= 70) ||
        (97 <= cc && cc <= 102)) {
385 386 387 388 389 390
      // '0'..'9', 'A'..'F' and 'a' .. 'f'.
    } else {
      return false;
    }
  }
  return true;
391
}
392 393 394 395 396 397


// ECMA-262 - B.2.1.
function URIEscape(str) {
  var s = ToString(str);
  return %URIEscape(s);
398
}
399 400 401 402 403 404 405 406 407 408 409


// ECMA-262 - B.2.2.
function URIUnescape(str) {
  var s = ToString(str);
  return %URIUnescape(s);
}


// -------------------------------------------------------------------

410 411 412
function SetUpUri() {
  %CheckIsBootstrapping();
  // Set up non-enumerable URI functions on the global object and set
413 414 415 416 417 418 419 420 421 422
  // their names.
  InstallFunctions(global, DONT_ENUM, $Array(
    "escape", URIEscape,
    "unescape", URIUnescape,
    "decodeURI", URIDecode,
    "decodeURIComponent", URIDecodeComponent,
    "encodeURI", URIEncode,
    "encodeURIComponent", URIEncodeComponent
  ));
}
423

424
SetUpUri();