Commit 2e3f3950 authored by Andrey Kosyakov's avatar Andrey Kosyakov Committed by Commit Bot

DevTools: fix String16::utf8() to return invalid char for broken surrogate pair

DevTools protocol is not supposed to carry structurally invalid utf8 as string payload.

Bug: chromium:929862
Change-Id: I701eeb553e6bf22d887947dcd9f4b29af7a43e2b
Reviewed-on: https://chromium-review.googlesource.com/c/1475665Reviewed-by: 's avatarPavel Feldman <pfeldman@chromium.org>
Reviewed-by: 's avatarAlexei Filippov <alph@chromium.org>
Commit-Queue: Andrey Kosyakov <caseq@chromium.org>
Cr-Commit-Position: refs/heads/master@{#59643}
parent 8cdb3d8e
......@@ -550,35 +550,33 @@ std::string String16::utf8() const {
// have a good chance of being able to write the string into the
// buffer without reallocing (say, 1.5 x length).
if (length > std::numeric_limits<unsigned>::max() / 3) return std::string();
std::vector<char> bufferVector(length * 3);
char* buffer = bufferVector.data();
const UChar* characters = m_impl.data();
ConversionResult result =
convertUTF16ToUTF8(&characters, characters + length, &buffer,
buffer + bufferVector.size(), false);
DCHECK(
result !=
targetExhausted); // (length * 3) should be sufficient for any conversion
// Only produced from strict conversion.
DCHECK(result != sourceIllegal);
// Check for an unconverted high surrogate.
if (result == sourceExhausted) {
// This should be one unpaired high surrogate. Treat it the same
// was as an unpaired high surrogate would have been handled in
// the middle of a string with non-strict conversion - which is
// to say, simply encode it to UTF-8.
DCHECK((characters + 1) == (m_impl.data() + length));
DCHECK((*characters >= 0xD800) && (*characters <= 0xDBFF));
// There should be room left, since one UChar hasn't been
// converted.
DCHECK((buffer + 3) <= (buffer + bufferVector.size()));
putUTF8Triple(buffer, *characters);
std::string output(length * 3, '\0');
const UChar* characters = m_impl.data();
const UChar* characters_end = characters + length;
char* buffer = &*output.begin();
char* buffer_end = &*output.end();
while (characters < characters_end) {
// Use strict conversion to detect unpaired surrogates.
ConversionResult result = convertUTF16ToUTF8(
&characters, characters_end, &buffer, buffer_end, /* strict= */ true);
DCHECK_NE(result, targetExhausted);
// Conversion fails when there is an unpaired surrogate. Put
// replacement character (U+FFFD) instead of the unpaired
// surrogate.
if (result != conversionOK) {
DCHECK_LE(0xD800, *characters);
DCHECK_LE(*characters, 0xDFFF);
// There should be room left, since one UChar hasn't been
// converted.
DCHECK_LE(buffer + 3, buffer_end);
putUTF8Triple(buffer, replacementCharacter);
++characters;
}
}
return std::string(bufferVector.data(), buffer - bufferVector.data());
output.resize(buffer - output.data());
return output;
}
} // namespace v8_inspector
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment