Commit 4593f3c6 authored by Jakob Gruber's avatar Jakob Gruber Committed by V8 LUCI CQ

[string] Micro-optimize String::Flatten

- Use a StringShape instead of repeatedly querying type.
- Add a shortcut for already-flat strings.
- Unhandlify where possible (all except SlowFlatten).
- Mark String::Flatten and StringShape methods V8_INLINE.
- Add a specialized ConsString::IsFlat overload.

Drive-by: Various (add const, remove this->, helper methods).

Bug: v8:12195
Change-Id: If20df12bc29c29cff2005fdc9bd826ed9f303463
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3259527
Auto-Submit: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarCamillo Bruni <cbruni@chromium.org>
Commit-Queue: Camillo Bruni <cbruni@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77701}
parent ae18522e
......@@ -803,27 +803,27 @@ void String::StringVerify(Isolate* isolate) {
void ConsString::ConsStringVerify(Isolate* isolate) {
TorqueGeneratedClassVerifiers::ConsStringVerify(*this, isolate);
CHECK_GE(this->length(), ConsString::kMinLength);
CHECK(this->length() == this->first().length() + this->second().length());
if (this->IsFlat()) {
CHECK_GE(length(), ConsString::kMinLength);
CHECK(length() == first().length() + second().length());
if (IsFlat(isolate)) {
// A flat cons can only be created by String::SlowFlatten.
// Afterwards, the first part may be externalized or internalized.
CHECK(this->first().IsSeqString() || this->first().IsExternalString() ||
this->first().IsThinString());
CHECK(first().IsSeqString() || first().IsExternalString() ||
first().IsThinString());
}
}
void ThinString::ThinStringVerify(Isolate* isolate) {
TorqueGeneratedClassVerifiers::ThinStringVerify(*this, isolate);
CHECK(this->actual().IsInternalizedString());
CHECK(this->actual().IsSeqString() || this->actual().IsExternalString());
CHECK(actual().IsInternalizedString());
CHECK(actual().IsSeqString() || actual().IsExternalString());
}
void SlicedString::SlicedStringVerify(Isolate* isolate) {
TorqueGeneratedClassVerifiers::SlicedStringVerify(*this, isolate);
CHECK(!this->parent().IsConsString());
CHECK(!this->parent().IsSlicedString());
CHECK_GE(this->length(), SlicedString::kMinLength);
CHECK(!parent().IsConsString());
CHECK(!parent().IsSlicedString());
CHECK_GE(length(), SlicedString::kMinLength);
}
USE_TORQUE_VERIFIER(ExternalString)
......
......@@ -158,6 +158,8 @@ bool StringShape::IsIndirect() const {
return (type_ & kIsIndirectStringMask) == kIsIndirectStringTag;
}
bool StringShape::IsDirect() const { return !IsIndirect(); }
bool StringShape::IsExternal() const {
return (type_ & kStringRepresentationMask) == kExternalStringTag;
}
......@@ -604,20 +606,31 @@ const Char* String::GetChars(
Handle<String> String::Flatten(Isolate* isolate, Handle<String> string,
AllocationType allocation) {
if (string->IsConsString()) {
DCHECK(!string->InSharedHeap());
Handle<ConsString> cons = Handle<ConsString>::cast(string);
if (cons->IsFlat()) {
string = handle(cons->first(), isolate);
} else {
return SlowFlatten(isolate, cons, allocation);
DisallowGarbageCollection no_gc; // Unhandlified code.
PtrComprCageBase cage_base(isolate);
String s = *string;
StringShape shape(s, cage_base);
// Shortcut already-flat strings.
if (V8_LIKELY(shape.IsDirect())) return string;
if (shape.IsCons()) {
DCHECK(!s.InSharedHeap());
ConsString cons = ConsString::cast(s);
if (!cons.IsFlat(isolate)) {
AllowGarbageCollection yes_gc;
return SlowFlatten(isolate, handle(cons, isolate), allocation);
}
s = cons.first(cage_base);
shape = StringShape(s, cage_base);
}
if (string->IsThinString()) {
string = handle(Handle<ThinString>::cast(string)->actual(), isolate);
DCHECK(!string->IsConsString());
if (shape.IsThin()) {
s = ThinString::cast(s).actual(cage_base);
DCHECK(!s.IsConsString());
}
return string;
return handle(s, isolate);
}
Handle<String> String::Flatten(LocalIsolate* isolate, Handle<String> string,
......@@ -680,7 +693,7 @@ void String::Set(int index, uint16_t value) {
DCHECK(index >= 0 && index < length());
DCHECK(StringShape(*this).IsSequential());
return this->IsOneByteRepresentation()
return IsOneByteRepresentation()
? SeqOneByteString::cast(*this).SeqOneByteStringSet(index, value)
: SeqTwoByteString::cast(*this).SeqTwoByteStringSet(index, value);
}
......@@ -689,13 +702,13 @@ bool String::IsFlat() const { return IsFlat(GetPtrComprCageBase(*this)); }
bool String::IsFlat(PtrComprCageBase cage_base) const {
if (!StringShape(*this, cage_base).IsCons()) return true;
return ConsString::cast(*this).second(cage_base).length() == 0;
return ConsString::cast(*this).IsFlat(cage_base);
}
String String::GetUnderlying() const {
// Giving direct access to underlying string only makes sense if the
// wrapping string is already flattened.
DCHECK(this->IsFlat());
DCHECK(IsFlat());
DCHECK(StringShape(*this).IsIndirect());
STATIC_ASSERT(static_cast<int>(ConsString::kFirstOffset) ==
static_cast<int>(SlicedString::kParentOffset));
......@@ -876,14 +889,18 @@ void SlicedString::set_parent(String parent, WriteBarrierMode mode) {
TorqueGeneratedSlicedString<SlicedString, Super>::set_parent(parent, mode);
}
Object ConsString::unchecked_first() {
Object ConsString::unchecked_first() const {
return TaggedField<Object, kFirstOffset>::load(*this);
}
Object ConsString::unchecked_second() {
Object ConsString::unchecked_second() const {
return RELAXED_READ_FIELD(*this, kSecondOffset);
}
bool ConsString::IsFlat(PtrComprCageBase cage_base) const {
return second(cage_base).length() == 0;
}
DEF_GETTER(ThinString, unchecked_actual, HeapObject) {
return TaggedField<HeapObject, kActualOffset>::load(cage_base, *this);
}
......
......@@ -42,25 +42,26 @@ enum RobustnessFlag { ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL };
// concrete performance benefit at that particular point in the code.
class StringShape {
public:
inline explicit StringShape(const String s);
inline explicit StringShape(const String s, PtrComprCageBase cage_base);
inline explicit StringShape(Map s);
inline explicit StringShape(InstanceType t);
inline bool IsSequential() const;
inline bool IsExternal() const;
inline bool IsCons() const;
inline bool IsSliced() const;
inline bool IsThin() const;
inline bool IsIndirect() const;
inline bool IsUncachedExternal() const;
inline bool IsExternalOneByte() const;
inline bool IsExternalTwoByte() const;
inline bool IsSequentialOneByte() const;
inline bool IsSequentialTwoByte() const;
inline bool IsInternalized() const;
inline StringRepresentationTag representation_tag() const;
inline uint32_t encoding_tag() const;
inline uint32_t full_representation_tag() const;
V8_INLINE explicit StringShape(const String s);
V8_INLINE explicit StringShape(const String s, PtrComprCageBase cage_base);
V8_INLINE explicit StringShape(Map s);
V8_INLINE explicit StringShape(InstanceType t);
V8_INLINE bool IsSequential() const;
V8_INLINE bool IsExternal() const;
V8_INLINE bool IsCons() const;
V8_INLINE bool IsSliced() const;
V8_INLINE bool IsThin() const;
V8_INLINE bool IsDirect() const;
V8_INLINE bool IsIndirect() const;
V8_INLINE bool IsUncachedExternal() const;
V8_INLINE bool IsExternalOneByte() const;
V8_INLINE bool IsExternalTwoByte() const;
V8_INLINE bool IsSequentialOneByte() const;
V8_INLINE bool IsSequentialTwoByte() const;
V8_INLINE bool IsInternalized() const;
V8_INLINE StringRepresentationTag representation_tag() const;
V8_INLINE uint32_t encoding_tag() const;
V8_INLINE uint32_t full_representation_tag() const;
#ifdef DEBUG
inline uint32_t type() const { return type_; }
inline void invalidate() { valid_ = false; }
......@@ -250,10 +251,10 @@ class String : public TorqueGeneratedString<String, Name> {
// Degenerate cons strings are handled specially by the garbage
// collector (see IsShortcutCandidate).
static inline Handle<String> Flatten(
static V8_INLINE Handle<String> Flatten(
Isolate* isolate, Handle<String> string,
AllocationType allocation = AllocationType::kYoung);
static inline Handle<String> Flatten(
static V8_INLINE Handle<String> Flatten(
LocalIsolate* isolate, Handle<String> string,
AllocationType allocation = AllocationType::kYoung);
......@@ -770,11 +771,13 @@ class ConsString : public TorqueGeneratedConsString<ConsString, String> {
public:
// Doesn't check that the result is a string, even in debug mode. This is
// useful during GC where the mark bits confuse the checks.
inline Object unchecked_first();
inline Object unchecked_first() const;
// Doesn't check that the result is a string, even in debug mode. This is
// useful during GC where the mark bits confuse the checks.
inline Object unchecked_second();
inline Object unchecked_second() const;
V8_INLINE bool IsFlat(PtrComprCageBase cage_base) const;
// Dispatched behavior.
V8_EXPORT_PRIVATE uint16_t
......
......@@ -5,6 +5,7 @@
#include "src/regexp/regexp-parser.h"
#include "src/execution/isolate.h"
#include "src/objects/string-inl.h"
#include "src/regexp/property-sequences.h"
#include "src/regexp/regexp-ast.h"
#include "src/regexp/regexp-macro-assembler.h"
......
......@@ -18152,7 +18152,7 @@ void AssertOneByteConsContainsTwoByteExternal(i::Handle<i::String> maybe_cons,
CHECK(maybe_cons->IsOneByteRepresentation());
CHECK(maybe_cons->IsConsString());
i::ConsString cons = i::ConsString::cast(*maybe_cons);
CHECK(cons.IsFlat());
CHECK(cons.IsFlat(GetPtrComprCageBase(cons)));
CHECK(cons.first() == *external);
CHECK(cons.first().IsTwoByteRepresentation());
CHECK(cons.first().IsExternalString());
......@@ -602,7 +602,7 @@ TEST(ConsStringWithEmptyFirstFlatten) {
isolate->factory()->NewStringFromAsciiChecked("snd012345012345678");
cons->set_first(*new_fst);
cons->set_second(*new_snd);
CHECK(!cons->IsFlat());
CHECK(!cons->IsFlat(GetPtrComprCageBase(*cons)));
CHECK_EQ(initial_length, new_fst->length() + new_snd->length());
CHECK_EQ(initial_length, cons->length());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment