Commit 460ba9c2 authored by jgruber's avatar jgruber Committed by Commit bot

[string] Refactor direct string conversions

This unifies several of the places in CSA that convert various
string kinds (cons, thin, sliced) to direct strings
(sequential, external).

A couple of spots remain with duplicate code, but most of these are
more difficult to unify due to specific optimizations.

BUG=

Review-Url: https://codereview.chromium.org/2744263002
Cr-Commit-Position: refs/heads/master@{#43848}
parent 9e827c23
......@@ -216,23 +216,23 @@ Node* RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo(
}
void RegExpBuiltinsAssembler::GetStringPointers(
Node* const string, Node* const offset, Node* const last_index,
Node* const string_length, bool is_one_byte, Variable* var_string_start,
Variable* var_string_end) {
Node* const string_data, Node* const offset, Node* const last_index,
Node* const string_length, String::Encoding encoding,
Variable* var_string_start, Variable* var_string_end) {
DCHECK_EQ(var_string_start->rep(), MachineType::PointerRepresentation());
DCHECK_EQ(var_string_end->rep(), MachineType::PointerRepresentation());
STATIC_ASSERT(SeqOneByteString::kHeaderSize == SeqTwoByteString::kHeaderSize);
const int kHeaderSize = SeqOneByteString::kHeaderSize - kHeapObjectTag;
const ElementsKind kind = is_one_byte ? UINT8_ELEMENTS : UINT16_ELEMENTS;
const ElementsKind kind = (encoding == String::ONE_BYTE_ENCODING)
? UINT8_ELEMENTS
: UINT16_ELEMENTS;
Node* const from_offset = ElementOffsetFromIndex(
IntPtrAdd(offset, last_index), kind, INTPTR_PARAMETERS, kHeaderSize);
var_string_start->Bind(IntPtrAdd(string, from_offset));
IntPtrAdd(offset, last_index), kind, INTPTR_PARAMETERS);
var_string_start->Bind(IntPtrAdd(string_data, from_offset));
Node* const to_offset = ElementOffsetFromIndex(
IntPtrAdd(offset, string_length), kind, INTPTR_PARAMETERS, kHeaderSize);
var_string_end->Bind(IntPtrAdd(string, to_offset));
IntPtrAdd(offset, string_length), kind, INTPTR_PARAMETERS);
var_string_end->Bind(IntPtrAdd(string_data, to_offset));
}
Node* RegExpBuiltinsAssembler::IrregexpExec(Node* const context,
......@@ -258,13 +258,9 @@ Node* RegExpBuiltinsAssembler::IrregexpExec(Node* const context,
Node* const int_zero = IntPtrConstant(0);
Variable var_result(this, MachineRepresentation::kTagged);
Variable var_string(this, MachineType::PointerRepresentation(), int_zero);
Variable var_string_offset(this, MachineType::PointerRepresentation(),
int_zero);
Variable var_string_instance_type(this, MachineRepresentation::kWord32,
Int32Constant(0));
ToDirectStringAssembler to_direct(state(), string);
Variable var_result(this, MachineRepresentation::kTagged);
Label out(this), runtime(this, Label::kDeferred);
// External constants.
......@@ -308,19 +304,7 @@ Node* RegExpBuiltinsAssembler::IrregexpExec(Node* const context,
// Unpack the string if possible.
var_string.Bind(BitcastTaggedToWord(string));
var_string_offset.Bind(int_zero);
var_string_instance_type.Bind(LoadInstanceType(string));
{
TryUnpackString(&var_string, &var_string_offset, &var_string_instance_type,
&runtime);
// At this point, {var_string} may contain a faked sequential string (i.e.
// an external string with an adjusted offset) so we cannot assert
// IsString({var_string}). We also cannot allocate after this point since
// GC could move {var_string}'s underlying string.
}
to_direct.TryToDirect(&runtime);
Node* const smi_string_length = LoadStringLength(string);
......@@ -339,19 +323,16 @@ Node* RegExpBuiltinsAssembler::IrregexpExec(Node* const context,
{
Node* const int_string_length = SmiUntag(smi_string_length);
Node* const string_instance_type = var_string_instance_type.value();
CSA_ASSERT(this, IsSequentialStringInstanceType(string_instance_type));
Node* const direct_string_data = to_direct.PointerToData(&runtime);
Label next(this), if_isonebyte(this), if_istwobyte(this, Label::kDeferred);
Branch(IsOneByteStringInstanceType(string_instance_type), &if_isonebyte,
&if_istwobyte);
Branch(IsOneByteStringInstanceType(to_direct.instance_type()),
&if_isonebyte, &if_istwobyte);
Bind(&if_isonebyte);
{
const bool kIsOneByte = true;
GetStringPointers(var_string.value(), var_string_offset.value(),
int_last_index, int_string_length, kIsOneByte,
GetStringPointers(direct_string_data, to_direct.offset(), int_last_index,
int_string_length, String::ONE_BYTE_ENCODING,
&var_string_start, &var_string_end);
var_code.Bind(
LoadFixedArrayElement(data, JSRegExp::kIrregexpLatin1CodeIndex));
......@@ -360,9 +341,8 @@ Node* RegExpBuiltinsAssembler::IrregexpExec(Node* const context,
Bind(&if_istwobyte);
{
const bool kIsOneByte = false;
GetStringPointers(var_string.value(), var_string_offset.value(),
int_last_index, int_string_length, kIsOneByte,
GetStringPointers(direct_string_data, to_direct.offset(), int_last_index,
int_string_length, String::TWO_BYTE_ENCODING,
&var_string_start, &var_string_end);
var_code.Bind(
LoadFixedArrayElement(data, JSRegExp::kIrregexpUC16CodeIndex));
......
......@@ -34,10 +34,10 @@ class RegExpBuiltinsAssembler : public CodeStubAssembler {
bool is_fastpath);
// Loads {var_string_start} and {var_string_end} with the corresponding
// offsets into the given {string}.
void GetStringPointers(Node* const string, Node* const offset,
// offsets into the given {string_data}.
void GetStringPointers(Node* const string_data, Node* const offset,
Node* const last_index, Node* const string_length,
bool is_one_byte, Variable* var_string_start,
String::Encoding encoding, Variable* var_string_start,
Variable* var_string_end);
// Low level logic around the actual call into generated Irregexp code.
......
This diff is collapsed.
......@@ -692,6 +692,8 @@ class V8_EXPORT_PRIVATE CodeStubAssembler : public compiler::CodeAssembler {
Node* IsSpecialReceiverInstanceType(Node* instance_type);
Node* IsStringInstanceType(Node* instance_type);
Node* IsOneByteStringInstanceType(Node* instance_type);
Node* IsExternalStringInstanceType(Node* instance_type);
Node* IsShortExternalStringInstanceType(Node* instance_type);
Node* IsSequentialStringInstanceType(Node* instance_type);
Node* IsString(Node* object);
Node* IsJSObject(Node* object);
......@@ -738,14 +740,6 @@ class V8_EXPORT_PRIVATE CodeStubAssembler : public compiler::CodeAssembler {
Node* StringAdd(Node* context, Node* first, Node* second,
AllocationFlags flags = kNone);
// Tries to unpack |string| into a pseudo-sequential string. For instance,
// In addition to the work done by TryDerefExternalString and
// MaybeDerefIndirectString, this method can also unpack sliced strings into
// a (string, offset) pair. The same GC restrictions on the returned string
// value apply as for TryDerefExternalString.
void TryUnpackString(Variable* var_string, Variable* var_offset,
Variable* var_instance_type, Label* if_bailout);
// Unpack the external string, returning a pointer that (offset-wise) looks
// like a sequential string.
// Note that this pointer is not tagged and does not point to a real
......@@ -1399,6 +1393,42 @@ class CodeStubArguments {
Node* fp_;
};
class ToDirectStringAssembler : public CodeStubAssembler {
private:
enum StringPointerKind { PTR_TO_DATA, PTR_TO_STRING };
public:
explicit ToDirectStringAssembler(compiler::CodeAssemblerState* state,
Node* string);
// Converts flat cons, thin, and sliced strings and returns the direct
// string. The result can be either a sequential or external string.
Node* TryToDirect(Label* if_bailout);
// Returns a pointer to the beginning of the string data.
Node* PointerToData(Label* if_bailout) {
return TryToSequential(PTR_TO_DATA, if_bailout);
}
// Returns a pointer that, offset-wise, looks like a String.
Node* PointerToString(Label* if_bailout) {
return TryToSequential(PTR_TO_STRING, if_bailout);
}
Node* string() { return var_string_.value(); }
Node* instance_type() { return var_instance_type_.value(); }
Node* offset() { return var_offset_.value(); }
Node* is_external() { return var_is_external_.value(); }
private:
Node* TryToSequential(StringPointerKind ptr_kind, Label* if_bailout);
Variable var_string_;
Variable var_instance_type_;
Variable var_offset_;
Variable var_is_external_;
};
#ifdef DEBUG
#define CSA_ASSERT(csa, x) \
(csa)->Assert([&] { return (x); }, #x, __FILE__, __LINE__)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment