[string] Handle two-byte contents in String.p.toLowerCase

Previously (since f0e95769), this toLowerCase fast-path assumed it would only see one-byte flat contents. Unfortunately, it's possible to have a one-byte sliced string that has a two-byte parent. This CL ensures that String.p.toLowerCase handles such cases correctly. BUG=chromium:736451 Cq-Include-Trybots: master.tryserver.v8:v8_linux_noi18n_rel_ng Change-Id: Iae056b3db5535bb5665439a5cc8282a51571a548 Reviewed-on: https://chromium-review.googlesource.com/565559Reviewed-by: Yang Guo <yangguo@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#46574}

[string] Handle two-byte contents in String.p.toLowerCase
Previously (since f0e95769), this toLowerCase fast-path assumed it would only see one-byte flat contents. Unfortunately, it's possible to have a one-byte sliced string that has a two-byte parent. This CL ensures that String.p.toLowerCase handles such cases correctly. BUG=chromium:736451 Cq-Include-Trybots: master.tryserver.v8:v8_linux_noi18n_rel_ng Change-Id: Iae056b3db5535bb5665439a5cc8282a51571a548 Reviewed-on: https://chromium-review.googlesource.com/565559Reviewed-by: Yang Guo <yangguo@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#46574}
3c260762 · jgruber · Commit Bot · 292e9670 · 3c260762 · 3c260762
Commit 3c260762 authored Jul 11, 2017 by jgruber Committed by Commit Bot Jul 12, 2017
Show whitespace changes
Inline Side-by-side

Showing with 49 additions and 45 deletions

intl.cc src/intl.cc +36 -45

regress-crbug-736451.js test/mjsunit/regress/regress-crbug-736451.js +13 -0

No files found.
--- a/src/intl.cc
+++ b/src/intl.cc
@@ -125,7 +125,7 @@ void ToUpperWithSharpS(const Vector<const Char>& src,
  }
 }
-inline int FindFirstUpperOrNonAscii(Handle<String> s, int length) {
+inline int FindFirstUpperOrNonAscii(String* s, int length) {
  for (int index = 0; index < length; ++index) {
    uint16_t ch = s->Get(index);
    if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
@@ -200,26 +200,30 @@ MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
 }
 // A stripped-down version of ConvertToLower that can only handle flat one-byte
-// strings and does not allocate.
+// strings and does not allocate. Note that {src} could still be, e.g., a
+// one-byte sliced string with a two-byte parent string.
 // Called from TF builtins.
 MUST_USE_RESULT Object* ConvertOneByteToLower(String* src, String* dst,
                                              Isolate* isolate) {
  DCHECK_EQ(src->length(), dst->length());
-  DCHECK(src->IsOneByteRepresentation());
+  DCHECK(src->HasOnlyOneByteChars());
  DCHECK(src->IsFlat());
  DCHECK(dst->IsSeqOneByteString());
  DisallowHeapAllocation no_gc;
  const int length = src->length();
+  String::FlatContent src_flat = src->GetFlatContent();
-  const uint8_t* src_data = src->GetFlatContent().ToOneByteVector().start();
  uint8_t* dst_data = SeqOneByteString::cast(dst)->GetChars();
+  if (src_flat.IsOneByte()) {
+    const uint8_t* src_data = src_flat.ToOneByteVector().start();
    bool has_changed_character = false;
-  int index_to_first_unprocessed = FastAsciiConvert<true>(
+    int index_to_first_unprocessed =
-      reinterpret_cast<char*>(dst_data),
+        FastAsciiConvert<true>(reinterpret_cast<char*>(dst_data),
-      reinterpret_cast<const char*>(src_data), length, &has_changed_character);
+                               reinterpret_cast<const char*>(src_data), length,
+                               &has_changed_character);
    if (index_to_first_unprocessed == length) {
      return has_changed_character ? dst : src;
@@ -230,6 +234,17 @@ MUST_USE_RESULT Object* ConvertOneByteToLower(String* src, String* dst,
    for (int index = index_to_first_unprocessed; index < length; ++index) {
      dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
    }
+  } else {
+    DCHECK(src_flat.IsTwoByte());
+    int index_to_first_unprocessed = FindFirstUpperOrNonAscii(src, length);
+    if (index_to_first_unprocessed == length) return src;
+    const uint16_t* src_data = src_flat.ToUC16Vector().start();
+    CopyChars(dst_data, src_data, index_to_first_unprocessed);
+    for (int index = index_to_first_unprocessed; index < length; ++index) {
+      dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
+    }
+  }
  return dst;
 }
@@ -252,41 +267,17 @@ MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) {
  // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
  // to two parts, one for scanning the prefix with no change and the other for
  // handling ASCII-only characters.
-  int index_to_first_unprocessed = length;
-  const bool is_short = length < static_cast<int>(sizeof(uintptr_t));
+  bool is_short = length < static_cast<int>(sizeof(uintptr_t));
  if (is_short) {
-    index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
+    bool is_lower_ascii = FindFirstUpperOrNonAscii(*s, length) == length;
-    // Nothing to do if the string is all ASCII with no uppercase.
+    if (is_lower_ascii) return *s;
-    if (index_to_first_unprocessed == length) return *s;
  }
  Handle<SeqOneByteString> result =
      isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
-  if (s->IsOneByteRepresentation()) {
  return ConvertOneByteToLower(*s, *result, isolate);
-  }
-  DisallowHeapAllocation no_gc;
-  DCHECK(s->IsFlat());
-  DCHECK(s->IsTwoByteRepresentation());
-  String::FlatContent flat = s->GetFlatContent();
-  DCHECK(flat.IsTwoByte());
-  uint8_t* dest = result->GetChars();
-  if (index_to_first_unprocessed == length) {
-    DCHECK(!is_short);
-    index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
-  }
-  // Nothing to do if the string is all ASCII with no uppercase.
-  if (index_to_first_unprocessed == length) return *s;
-  const uint16_t* src = flat.ToUC16Vector().start();
-  CopyChars(dest, src, index_to_first_unprocessed);
-  for (int index = index_to_first_unprocessed; index < length; ++index) {
-    dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));
-  }
-  return *result;
 }
 MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) {

--- a/test/mjsunit/regress/regress-crbug-736451.js
+++ b/test/mjsunit/regress/regress-crbug-736451.js
+// Copyright 2017 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+// Flags: --expose-externalize-string --no-stress-opt
+!function() {
+  const s0 = "external string turned into two byte";
+  const s1 = s0.substring(1);
+  externalizeString(s0, true);
+  s1.toLowerCase();
+}();