Commit f39959d2 authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[unibrow] -1 used by kNoPreviousCharacter and kEndOfInput isn't a valid...

[unibrow] -1 used by kNoPreviousCharacter and kEndOfInput isn't a valid Lead/Trail char. Drop superfluous check.

Change-Id: Ie6b8e61230e25ab0321d0672d71f64fd593714bf
Reviewed-on: https://chromium-review.googlesource.com/1109829
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: 's avatarMathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#53924}
parent e10e149b
......@@ -618,8 +618,10 @@ class Scanner {
}
void HandleLeadSurrogate() {
DCHECK(!unibrow::Utf16::IsLeadSurrogate(kEndOfInput));
if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
uc32 c1 = source_->Advance();
DCHECK(!unibrow::Utf16::IsTrailSurrogate(kEndOfInput));
if (!unibrow::Utf16::IsTrailSurrogate(c1)) {
source_->Back();
} else {
......
......@@ -85,6 +85,7 @@ unsigned Utf8::Encode(char* str,
str[1] = 0x80 | (c & kMask);
return 2;
} else if (c <= kMaxThreeByteChar) {
DCHECK(!Utf16::IsLeadSurrogate(Utf16::kNoPreviousCharacter));
if (Utf16::IsSurrogatePair(previous, c)) {
const int kUnmatchedSize = kSizeOfUnmatchedSurrogate;
return Encode(str - kUnmatchedSize,
......@@ -127,8 +128,8 @@ unsigned Utf8::Length(uchar c, int previous) {
} else if (c <= kMaxTwoByteChar) {
return 2;
} else if (c <= kMaxThreeByteChar) {
if (Utf16::IsTrailSurrogate(c) &&
Utf16::IsLeadSurrogate(previous)) {
DCHECK(!Utf16::IsLeadSurrogate(Utf16::kNoPreviousCharacter));
if (Utf16::IsSurrogatePair(previous, c)) {
return kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates;
}
return 3;
......
......@@ -94,22 +94,20 @@ class UnicodeData {
class Utf16 {
public:
static const int kNoPreviousCharacter = -1;
static inline bool IsSurrogatePair(int lead, int trail) {
return IsLeadSurrogate(lead) && IsTrailSurrogate(trail);
}
static inline bool IsLeadSurrogate(int code) {
if (code == kNoPreviousCharacter) return false;
return (code & 0xfc00) == 0xd800;
}
static inline bool IsTrailSurrogate(int code) {
if (code == kNoPreviousCharacter) return false;
return (code & 0xfc00) == 0xdc00;
}
static inline int CombineSurrogatePair(uchar lead, uchar trail) {
return 0x10000 + ((lead & 0x3ff) << 10) + (trail & 0x3ff);
}
static const int kNoPreviousCharacter = -1;
static const uchar kMaxNonSurrogateCharCode = 0xffff;
// Encoding a single UTF-16 code unit will produce 1, 2 or 3 bytes
// of UTF-8 data. The special case where the unit is a surrogate
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment