Commit 2e3f3950 authored by Andrey Kosyakov's avatar Andrey Kosyakov Committed by Commit Bot

DevTools: fix String16::utf8() to return invalid char for broken surrogate pair

DevTools protocol is not supposed to carry structurally invalid utf8 as string payload.

Bug: chromium:929862
Change-Id: I701eeb553e6bf22d887947dcd9f4b29af7a43e2b
Reviewed-on: https://chromium-review.googlesource.com/c/1475665Reviewed-by: 's avatarPavel Feldman <pfeldman@chromium.org>
Reviewed-by: 's avatarAlexei Filippov <alph@chromium.org>
Commit-Queue: Andrey Kosyakov <caseq@chromium.org>
Cr-Commit-Position: refs/heads/master@{#59643}
parent 8cdb3d8e
...@@ -550,35 +550,33 @@ std::string String16::utf8() const { ...@@ -550,35 +550,33 @@ std::string String16::utf8() const {
// have a good chance of being able to write the string into the // have a good chance of being able to write the string into the
// buffer without reallocing (say, 1.5 x length). // buffer without reallocing (say, 1.5 x length).
if (length > std::numeric_limits<unsigned>::max() / 3) return std::string(); if (length > std::numeric_limits<unsigned>::max() / 3) return std::string();
std::vector<char> bufferVector(length * 3);
char* buffer = bufferVector.data();
const UChar* characters = m_impl.data();
ConversionResult result = std::string output(length * 3, '\0');
convertUTF16ToUTF8(&characters, characters + length, &buffer, const UChar* characters = m_impl.data();
buffer + bufferVector.size(), false); const UChar* characters_end = characters + length;
DCHECK( char* buffer = &*output.begin();
result != char* buffer_end = &*output.end();
targetExhausted); // (length * 3) should be sufficient for any conversion while (characters < characters_end) {
// Use strict conversion to detect unpaired surrogates.
// Only produced from strict conversion. ConversionResult result = convertUTF16ToUTF8(
DCHECK(result != sourceIllegal); &characters, characters_end, &buffer, buffer_end, /* strict= */ true);
DCHECK_NE(result, targetExhausted);
// Check for an unconverted high surrogate. // Conversion fails when there is an unpaired surrogate. Put
if (result == sourceExhausted) { // replacement character (U+FFFD) instead of the unpaired
// This should be one unpaired high surrogate. Treat it the same // surrogate.
// was as an unpaired high surrogate would have been handled in if (result != conversionOK) {
// the middle of a string with non-strict conversion - which is DCHECK_LE(0xD800, *characters);
// to say, simply encode it to UTF-8. DCHECK_LE(*characters, 0xDFFF);
DCHECK((characters + 1) == (m_impl.data() + length)); // There should be room left, since one UChar hasn't been
DCHECK((*characters >= 0xD800) && (*characters <= 0xDBFF)); // converted.
// There should be room left, since one UChar hasn't been DCHECK_LE(buffer + 3, buffer_end);
// converted. putUTF8Triple(buffer, replacementCharacter);
DCHECK((buffer + 3) <= (buffer + bufferVector.size())); ++characters;
putUTF8Triple(buffer, *characters); }
} }
return std::string(bufferVector.data(), buffer - bufferVector.data()); output.resize(buffer - output.data());
return output;
} }
} // namespace v8_inspector } // namespace v8_inspector
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment