Commit 50875018 authored by Sigurd Schneider's avatar Sigurd Schneider Committed by Commit Bot

Reland "[turbofan] Add simplified operator for String.prototype.codePointAt"

This is a reland of 22d894c8
Original change's description:
> [turbofan] Add simplified operator for String.prototype.codePointAt
> 
> This CL adds the simplified operators
>   StringCodePointAt
>   SeqStringCodePointAt
> and associated lowering.
> 
> Bug: v8:7270
> Change-Id: I5aeefddeec39c3119b2d225e92a3116f802e7b45
> Reviewed-on: https://chromium-review.googlesource.com/861789
> Commit-Queue: Sigurd Schneider <sigurds@chromium.org>
> Reviewed-by: Benedikt Meurer <bmeurer@chromium.org>
> Reviewed-by: Peter Marshall <petermarshall@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#50547}

Bug: v8:7270
Change-Id: Ia08d18543fc165fc2312cd393ed51f4ec98d7a58
Reviewed-on: https://chromium-review.googlesource.com/866311Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Reviewed-by: 's avatarBenedikt Meurer <bmeurer@chromium.org>
Commit-Queue: Sigurd Schneider <sigurds@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50599}
parent 83df2d84
......@@ -91,8 +91,9 @@ namespace internal {
ASM(StackCheck) \
\
/* String helpers */ \
TFC(StringCharAt, StringCharAt, 1) \
TFC(StringCharCodeAt, StringCharCodeAt, 1) \
TFC(StringCharAt, StringAt, 1) \
TFC(StringCharCodeAt, StringAt, 1) \
TFC(StringCodePointAt, StringAt, 1) \
TFC(StringEqual, Compare, 1) \
TFC(StringGreaterThan, Compare, 1) \
TFC(StringGreaterThanOrEqual, Compare, 1) \
......
......@@ -576,6 +576,21 @@ TF_BUILTIN(StringCharCodeAt, StringBuiltinsAssembler) {
Return(result);
}
TF_BUILTIN(StringCodePointAt, StringBuiltinsAssembler) {
Node* receiver = Parameter(Descriptor::kReceiver);
Node* position = Parameter(Descriptor::kPosition);
// TODO(sigurds) Figure out if passing length as argument pays off.
TNode<IntPtrT> length = LoadStringLengthAsWord(receiver);
// Load the character code at the {position} from the {receiver}.
TNode<Int32T> code =
LoadSurrogatePairAt(receiver, length, position, UnicodeEncoding::UTF32);
// And return it as TaggedSigned value.
// TODO(turbofan): Allow builtins to return values untagged.
TNode<Smi> result = SmiFromWord32(code);
Return(result);
}
// -----------------------------------------------------------------------------
// ES6 section 21.1 String Objects
......
......@@ -847,6 +847,12 @@ bool EffectControlLinearizer::TryWireInStateEffect(Node* node,
case IrOpcode::kSeqStringCharCodeAt:
result = LowerSeqStringCharCodeAt(node);
break;
case IrOpcode::kStringCodePointAt:
result = LowerStringCodePointAt(node);
break;
case IrOpcode::kSeqStringCodePointAt:
result = LowerSeqStringCharCodeAt(node);
break;
case IrOpcode::kStringToLowerCaseIntl:
result = LowerStringToLowerCaseIntl(node);
break;
......@@ -2687,19 +2693,25 @@ Node* EffectControlLinearizer::LowerStringCharCodeAt(Node* node) {
__ NoContextConstant());
}
Node* EffectControlLinearizer::LowerSeqStringCharCodeAt(Node* node) {
Node* EffectControlLinearizer::LowerStringCodePointAt(Node* node) {
Node* receiver = node->InputAt(0);
Node* position = node->InputAt(1);
Callable const callable =
Builtins::CallableFor(isolate(), Builtins::kStringCodePointAt);
Operator::Properties properties = Operator::kNoThrow | Operator::kNoWrite;
CallDescriptor::Flags flags = CallDescriptor::kNoFlags;
CallDescriptor* desc = Linkage::GetStubCallDescriptor(
isolate(), graph()->zone(), callable.descriptor(), 0, flags, properties,
MachineType::TaggedSigned());
return __ Call(desc, __ HeapConstant(callable.code()), receiver, position,
__ NoContextConstant());
}
Node* EffectControlLinearizer::LoadFromString(Node* receiver, Node* position,
Node* is_one_byte) {
auto one_byte_load = __ MakeLabel();
auto done = __ MakeLabel(MachineRepresentation::kWord32);
Node* map = __ LoadField(AccessBuilder::ForMap(), receiver);
Node* instance_type = __ LoadField(AccessBuilder::ForMapInstanceType(), map);
Node* is_one_byte = __ Word32Equal(
__ Word32And(instance_type, __ Int32Constant(kStringEncodingMask)),
__ Int32Constant(kOneByteStringTag));
__ GotoIf(is_one_byte, &one_byte_load);
Node* two_byte_result = __ LoadElement(
AccessBuilder::ForSeqTwoByteStringCharacter(), receiver, position);
......@@ -2714,6 +2726,85 @@ Node* EffectControlLinearizer::LowerSeqStringCharCodeAt(Node* node) {
return done.PhiAt(0);
}
Node* EffectControlLinearizer::LowerSeqStringCharCodeAt(Node* node) {
Node* receiver = node->InputAt(0);
Node* position = node->InputAt(1);
Node* map = __ LoadField(AccessBuilder::ForMap(), receiver);
Node* instance_type = __ LoadField(AccessBuilder::ForMapInstanceType(), map);
Node* is_one_byte = __ Word32Equal(
__ Word32And(instance_type, __ Int32Constant(kStringEncodingMask)),
__ Int32Constant(kOneByteStringTag));
return LoadFromString(receiver, position, is_one_byte);
}
Node* EffectControlLinearizer::LowerSeqStringCodePointAt(
Node* node, UnicodeEncoding encoding) {
Node* receiver = node->InputAt(0);
Node* position = node->InputAt(1);
Node* map = __ LoadField(AccessBuilder::ForMap(), receiver);
Node* instance_type = __ LoadField(AccessBuilder::ForMapInstanceType(), map);
Node* is_one_byte = __ Word32Equal(
__ Word32And(instance_type, __ Int32Constant(kStringEncodingMask)),
__ Int32Constant(kOneByteStringTag));
Node* first_char_code = LoadFromString(receiver, position, is_one_byte);
auto return_result = __ MakeLabel(MachineRepresentation::kWord32);
// Check if first character code is outside of interval [0xD800, 0xDBFF].
Node* first_out =
__ Word32Equal(__ Word32And(first_char_code, __ Int32Constant(0xFC00)),
__ Int32Constant(0xD800));
// Return first character code.
__ GotoIf(first_out, &return_result, first_char_code);
// Check if position + 1 is still in range.
Node* length = __ LoadField(AccessBuilder::ForStringLength(), receiver);
Node* next_position = __ Int32Add(position, __ Int32Constant(1));
Node* next_position_in_range = __ Int32LessThan(next_position, length);
__ GotoIf(next_position_in_range, &return_result, first_char_code);
// Load second character code.
Node* second_char_code = LoadFromString(receiver, next_position, is_one_byte);
// Check if first character code is outside of interval [0xD800, 0xDBFF].
Node* second_out =
__ Word32Equal(__ Word32And(second_char_code, __ Int32Constant(0xFC00)),
__ Int32Constant(0xDC00));
__ GotoIfNot(second_out, &return_result, first_char_code);
Node* result;
switch (encoding) {
case UnicodeEncoding::UTF16:
result = __ Word32Or(
// Need to swap the order for big-endian platforms
#if V8_TARGET_BIG_ENDIAN
__ Word32Shl(first_char_code, __ Int32Constant(16)),
second_char_code);
#else
__ Word32Shl(second_char_code, __ Int32Constant(16)),
first_char_code);
#endif
break;
case UnicodeEncoding::UTF32: {
// Convert UTF16 surrogate pair into |word32| code point, encoded as
// UTF32.
Node* surrogate_offset =
__ Int32Constant(0x10000 - (0xD800 << 10) - 0xDC00);
// (lead << 10) + trail + SURROGATE_OFFSET
result = __ Int32Add(__ Word32Shl(first_char_code, __ Int32Constant(10)),
__ Int32Add(second_char_code, surrogate_offset));
break;
}
}
__ Goto(&return_result, result);
__ Bind(&return_result);
return return_result.PhiAt(0);
}
Node* EffectControlLinearizer::LowerStringFromCharCode(Node* node) {
Node* value = node->InputAt(0);
......
......@@ -118,6 +118,8 @@ class V8_EXPORT_PRIVATE EffectControlLinearizer {
Node* LowerStringCharAt(Node* node);
Node* LowerStringCharCodeAt(Node* node);
Node* LowerSeqStringCharCodeAt(Node* node);
Node* LowerStringCodePointAt(Node* node);
Node* LowerSeqStringCodePointAt(Node* node, UnicodeEncoding encoding);
Node* LowerStringToLowerCaseIntl(Node* node);
Node* LowerStringToUpperCaseIntl(Node* node);
Node* LowerStringFromCharCode(Node* node);
......@@ -179,6 +181,7 @@ class V8_EXPORT_PRIVATE EffectControlLinearizer {
Node* ChangeSmiToIntPtr(Node* value);
Node* ChangeSmiToInt32(Node* value);
Node* ObjectIsSmi(Node* value);
Node* LoadFromString(Node* receiver, Node* position, Node* is_one_byte);
Node* SmiMaxValueConstant();
Node* SmiShiftBitsConstant();
......
......@@ -333,6 +333,8 @@
V(StringCharAt) \
V(StringCharCodeAt) \
V(SeqStringCharCodeAt) \
V(StringCodePointAt) \
V(SeqStringCodePointAt) \
V(StringFromCharCode) \
V(StringFromCodePoint) \
V(StringIndexOf) \
......
......@@ -2353,6 +2353,22 @@ class RepresentationSelector {
}
return;
}
case IrOpcode::kStringCodePointAt: {
Type* string_type = TypeOf(node->InputAt(0));
if (string_type->Is(Type::SeqString())) {
VisitBinop(node, UseInfo::AnyTagged(), UseInfo::TruncatingWord32(),
MachineRepresentation::kWord32);
if (lower()) {
NodeProperties::ChangeOp(node,
simplified()->SeqStringCodePointAt());
}
} else {
// TODO(turbofan): Allow builtins to return untagged values.
VisitBinop(node, UseInfo::AnyTagged(), UseInfo::TruncatingWord32(),
MachineRepresentation::kTaggedSigned);
}
return;
}
case IrOpcode::kStringFromCharCode: {
VisitUnop(node, UseInfo::TruncatingWord32(),
MachineRepresentation::kTaggedPointer);
......
......@@ -657,6 +657,8 @@ bool operator==(CheckMinusZeroParameters const& lhs,
V(StringCharAt, Operator::kNoProperties, 2, 1) \
V(StringCharCodeAt, Operator::kNoProperties, 2, 1) \
V(SeqStringCharCodeAt, Operator::kNoProperties, 2, 1) \
V(StringCodePointAt, Operator::kNoProperties, 2, 1) \
V(SeqStringCodePointAt, Operator::kNoProperties, 2, 1) \
V(StringFromCharCode, Operator::kNoProperties, 1, 0) \
V(StringIndexOf, Operator::kNoProperties, 3, 0) \
V(StringLength, Operator::kNoProperties, 1, 0) \
......
......@@ -503,6 +503,8 @@ class V8_EXPORT_PRIVATE SimplifiedOperatorBuilder final
const Operator* StringCharAt();
const Operator* StringCharCodeAt();
const Operator* SeqStringCharCodeAt();
const Operator* StringCodePointAt();
const Operator* SeqStringCodePointAt();
const Operator* StringFromCharCode();
const Operator* StringFromCodePoint(UnicodeEncoding encoding);
const Operator* StringIndexOf();
......
......@@ -1948,6 +1948,14 @@ Type* Typer::Visitor::TypeSeqStringCharCodeAt(Node* node) {
return typer_->cache_.kUint16;
}
Type* Typer::Visitor::TypeStringCodePointAt(Node* node) {
return Type::Range(0.0, String::kMaxCodePoint, zone());
}
Type* Typer::Visitor::TypeSeqStringCodePointAt(Node* node) {
return Type::Range(0.0, String::kMaxCodePoint, zone());
}
Type* Typer::Visitor::TypeStringFromCharCode(Node* node) {
return TypeUnaryOp(node, StringFromCharCodeTyper);
}
......
......@@ -1054,6 +1054,18 @@ void Verifier::Visitor::Check(Node* node, const AllNodes& all) {
CheckValueInputIs(node, 1, Type::Unsigned32());
CheckTypeIs(node, Type::UnsignedSmall());
break;
case IrOpcode::kStringCodePointAt:
// (String, Unsigned32) -> UnsignedSmall
CheckValueInputIs(node, 0, Type::String());
CheckValueInputIs(node, 1, Type::Unsigned32());
CheckTypeIs(node, Type::UnsignedSmall());
break;
case IrOpcode::kSeqStringCodePointAt:
// (String, Unsigned32) -> UnsignedSmall
CheckValueInputIs(node, 0, Type::String());
CheckValueInputIs(node, 1, Type::Unsigned32());
CheckTypeIs(node, Type::UnsignedSmall());
break;
case IrOpcode::kStringFromCharCode:
// Number -> String
CheckValueInputIs(node, 0, Type::Number());
......
......@@ -269,21 +269,7 @@ void StoreNamedTransitionDescriptor::InitializePlatformSpecific(
data->InitializePlatformSpecific(len, registers);
}
void StringCharAtDescriptor::InitializePlatformIndependent(
CallInterfaceDescriptorData* data) {
// kReceiver, kPosition
MachineType machine_types[] = {MachineType::AnyTagged(),
MachineType::IntPtr()};
data->InitializePlatformIndependent(arraysize(machine_types), 0,
machine_types);
}
void StringCharAtDescriptor::InitializePlatformSpecific(
CallInterfaceDescriptorData* data) {
DefaultInitializePlatformSpecific(data, kParameterCount);
}
void StringCharCodeAtDescriptor::InitializePlatformIndependent(
void StringAtDescriptor::InitializePlatformIndependent(
CallInterfaceDescriptorData* data) {
// kReceiver, kPosition
// TODO(turbofan): Allow builtins to return untagged values.
......@@ -293,7 +279,7 @@ void StringCharCodeAtDescriptor::InitializePlatformIndependent(
machine_types);
}
void StringCharCodeAtDescriptor::InitializePlatformSpecific(
void StringAtDescriptor::InitializePlatformSpecific(
CallInterfaceDescriptorData* data) {
DefaultInitializePlatformSpecific(data, kParameterCount);
}
......
......@@ -62,8 +62,7 @@ class PlatformInterfaceDescriptor;
V(Compare) \
V(BinaryOp) \
V(StringAdd) \
V(StringCharAt) \
V(StringCharCodeAt) \
V(StringAt) \
V(ForInPrepare) \
V(GetProperty) \
V(ArgumentAdaptor) \
......@@ -762,17 +761,12 @@ class StringAddDescriptor : public CallInterfaceDescriptor {
DECLARE_DESCRIPTOR(StringAddDescriptor, CallInterfaceDescriptor)
};
class StringCharAtDescriptor final : public CallInterfaceDescriptor {
// This desciptor is shared among String.p.charAt/charCodeAt/codePointAt
// as they all have the same interface.
class StringAtDescriptor final : public CallInterfaceDescriptor {
public:
DEFINE_PARAMETERS(kReceiver, kPosition)
DECLARE_DESCRIPTOR_WITH_CUSTOM_FUNCTION_TYPE(StringCharAtDescriptor,
CallInterfaceDescriptor)
};
class StringCharCodeAtDescriptor final : public CallInterfaceDescriptor {
public:
DEFINE_PARAMETERS(kReceiver, kPosition)
DECLARE_DESCRIPTOR_WITH_CUSTOM_FUNCTION_TYPE(StringCharCodeAtDescriptor,
DECLARE_DESCRIPTOR_WITH_CUSTOM_FUNCTION_TYPE(StringAtDescriptor,
CallInterfaceDescriptor)
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment