Commit b941a40a authored by lrn@chromium.org's avatar lrn@chromium.org

Convert Unicode code points outside the basic multilingual plane to the replacement character.

Previous behavior was to silently truncate the value to 16 bits.

Review URL: http://codereview.chromium.org/2832050

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5023 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent a50e7257
......@@ -2868,6 +2868,8 @@ Object* Heap::AllocateStringFromAscii(Vector<const char> string,
Object* Heap::AllocateStringFromUtf8(Vector<const char> string,
PretenureFlag pretenure) {
// V8 only supports characters in the Basic Multilingual Plane.
const uc32 kMaxSupportedChar = 0xFFFF;
// Count the number of characters in the UTF-8 string and check if
// it is an ASCII string.
Access<Scanner::Utf8Decoder> decoder(Scanner::utf8_decoder());
......@@ -2892,6 +2894,7 @@ Object* Heap::AllocateStringFromUtf8(Vector<const char> string,
decoder->Reset(string.start(), string.length());
for (int i = 0; i < chars; i++) {
uc32 r = decoder->GetNext();
if (r > kMaxSupportedChar) { r = unibrow::Utf8::kBadChar; }
string_result->Set(i, r);
}
return result;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment