regexp-non-bmp.js 2.26 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
// Copyright 2013 the V8 project authors. All rights reserved.
// Copyright (C) 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1.  Redistributions of source code must retain the above copyright
//     notice, this list of conditions and the following disclaimer.
// 2.  Redistributions in binary form must reproduce the above copyright
//     notice, this list of conditions and the following disclaimer in the
//     documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
// ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

description(
'Tests that regular expressions treat non-BMP characters as two separate characters. '
+ 'From a Unicode correctness point of view this is wrong, but it is what other browsers do. '
+ 'And given that we store strings as UTF-16, it is also more efficient to implement. '
+ 'Also test some other cases related to UTF-8 and UTF-16.'
);

var surrogatePair = String.fromCharCode(0xD800) + String.fromCharCode(0xDC00);

shouldBe('/./.exec(surrogatePair).toString().length', '1');
shouldBe('/\\D/.exec(surrogatePair).toString().length', '1');
shouldBe('/\\S/.exec(surrogatePair).toString().length', '1');
shouldBe('/\\W/.exec(surrogatePair).toString().length', '1');
shouldBe('/[^x]/.exec(surrogatePair).toString().length', '1');

debug('');

shouldBe('/.{1,2}/.exec("!!" + String.fromCharCode(0xA1)).toString().length', '2');
shouldBe('/./.exec("")', 'null');

debug('');