Commit 22d894c8 authored by Sigurd Schneider's avatar Sigurd Schneider Committed by Commit Bot

[turbofan] Add simplified operator for String.prototype.codePointAt

This CL adds the simplified operators
  StringCodePointAt
  SeqStringCodePointAt
and associated lowering.

Bug: v8:7270
Change-Id: I5aeefddeec39c3119b2d225e92a3116f802e7b45
Reviewed-on: https://chromium-review.googlesource.com/861789
Commit-Queue: Sigurd Schneider <sigurds@chromium.org>
Reviewed-by: 's avatarBenedikt Meurer <bmeurer@chromium.org>
Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50547}
parent b1181181
...@@ -91,8 +91,9 @@ namespace internal { ...@@ -91,8 +91,9 @@ namespace internal {
ASM(StackCheck) \ ASM(StackCheck) \
\ \
/* String helpers */ \ /* String helpers */ \
TFC(StringCharAt, StringCharAt, 1) \ TFC(StringCharAt, StringAt, 1) \
TFC(StringCharCodeAt, StringCharCodeAt, 1) \ TFC(StringCharCodeAt, StringAt, 1) \
TFC(StringCodePointAt, StringAt, 1) \
TFC(StringEqual, Compare, 1) \ TFC(StringEqual, Compare, 1) \
TFC(StringGreaterThan, Compare, 1) \ TFC(StringGreaterThan, Compare, 1) \
TFC(StringGreaterThanOrEqual, Compare, 1) \ TFC(StringGreaterThanOrEqual, Compare, 1) \
......
...@@ -576,6 +576,21 @@ TF_BUILTIN(StringCharCodeAt, StringBuiltinsAssembler) { ...@@ -576,6 +576,21 @@ TF_BUILTIN(StringCharCodeAt, StringBuiltinsAssembler) {
Return(result); Return(result);
} }
TF_BUILTIN(StringCodePointAt, StringBuiltinsAssembler) {
Node* receiver = Parameter(Descriptor::kReceiver);
Node* position = Parameter(Descriptor::kPosition);
// TODO(sigurds) Figure out if passing length as argument pays off.
TNode<IntPtrT> length = LoadStringLengthAsWord(receiver);
// Load the character code at the {position} from the {receiver}.
TNode<Int32T> code =
LoadSurrogatePairAt(receiver, length, position, UnicodeEncoding::UTF32);
// And return it as TaggedSigned value.
// TODO(turbofan): Allow builtins to return values untagged.
TNode<Smi> result = SmiFromWord32(code);
Return(result);
}
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// ES6 section 21.1 String Objects // ES6 section 21.1 String Objects
......
...@@ -847,6 +847,12 @@ bool EffectControlLinearizer::TryWireInStateEffect(Node* node, ...@@ -847,6 +847,12 @@ bool EffectControlLinearizer::TryWireInStateEffect(Node* node,
case IrOpcode::kSeqStringCharCodeAt: case IrOpcode::kSeqStringCharCodeAt:
result = LowerSeqStringCharCodeAt(node); result = LowerSeqStringCharCodeAt(node);
break; break;
case IrOpcode::kStringCodePointAt:
result = LowerStringCodePointAt(node);
break;
case IrOpcode::kSeqStringCodePointAt:
result = LowerSeqStringCharCodeAt(node);
break;
case IrOpcode::kStringToLowerCaseIntl: case IrOpcode::kStringToLowerCaseIntl:
result = LowerStringToLowerCaseIntl(node); result = LowerStringToLowerCaseIntl(node);
break; break;
...@@ -2687,19 +2693,25 @@ Node* EffectControlLinearizer::LowerStringCharCodeAt(Node* node) { ...@@ -2687,19 +2693,25 @@ Node* EffectControlLinearizer::LowerStringCharCodeAt(Node* node) {
__ NoContextConstant()); __ NoContextConstant());
} }
Node* EffectControlLinearizer::LowerSeqStringCharCodeAt(Node* node) { Node* EffectControlLinearizer::LowerStringCodePointAt(Node* node) {
Node* receiver = node->InputAt(0); Node* receiver = node->InputAt(0);
Node* position = node->InputAt(1); Node* position = node->InputAt(1);
Callable const callable =
Builtins::CallableFor(isolate(), Builtins::kStringCodePointAt);
Operator::Properties properties = Operator::kNoThrow | Operator::kNoWrite;
CallDescriptor::Flags flags = CallDescriptor::kNoFlags;
CallDescriptor* desc = Linkage::GetStubCallDescriptor(
isolate(), graph()->zone(), callable.descriptor(), 0, flags, properties,
MachineType::TaggedSigned());
return __ Call(desc, __ HeapConstant(callable.code()), receiver, position,
__ NoContextConstant());
}
Node* EffectControlLinearizer::LoadFromString(Node* receiver, Node* position,
Node* is_one_byte) {
auto one_byte_load = __ MakeLabel(); auto one_byte_load = __ MakeLabel();
auto done = __ MakeLabel(MachineRepresentation::kWord32); auto done = __ MakeLabel(MachineRepresentation::kWord32);
Node* map = __ LoadField(AccessBuilder::ForMap(), receiver);
Node* instance_type = __ LoadField(AccessBuilder::ForMapInstanceType(), map);
Node* is_one_byte = __ Word32Equal(
__ Word32And(instance_type, __ Int32Constant(kStringEncodingMask)),
__ Int32Constant(kOneByteStringTag));
__ GotoIf(is_one_byte, &one_byte_load); __ GotoIf(is_one_byte, &one_byte_load);
Node* two_byte_result = __ LoadElement( Node* two_byte_result = __ LoadElement(
AccessBuilder::ForSeqTwoByteStringCharacter(), receiver, position); AccessBuilder::ForSeqTwoByteStringCharacter(), receiver, position);
...@@ -2714,6 +2726,85 @@ Node* EffectControlLinearizer::LowerSeqStringCharCodeAt(Node* node) { ...@@ -2714,6 +2726,85 @@ Node* EffectControlLinearizer::LowerSeqStringCharCodeAt(Node* node) {
return done.PhiAt(0); return done.PhiAt(0);
} }
Node* EffectControlLinearizer::LowerSeqStringCharCodeAt(Node* node) {
Node* receiver = node->InputAt(0);
Node* position = node->InputAt(1);
Node* map = __ LoadField(AccessBuilder::ForMap(), receiver);
Node* instance_type = __ LoadField(AccessBuilder::ForMapInstanceType(), map);
Node* is_one_byte = __ Word32Equal(
__ Word32And(instance_type, __ Int32Constant(kStringEncodingMask)),
__ Int32Constant(kOneByteStringTag));
return LoadFromString(receiver, position, is_one_byte);
}
Node* EffectControlLinearizer::LowerSeqStringCodePointAt(
Node* node, UnicodeEncoding encoding) {
Node* receiver = node->InputAt(0);
Node* position = node->InputAt(1);
Node* map = __ LoadField(AccessBuilder::ForMap(), receiver);
Node* instance_type = __ LoadField(AccessBuilder::ForMapInstanceType(), map);
Node* is_one_byte = __ Word32Equal(
__ Word32And(instance_type, __ Int32Constant(kStringEncodingMask)),
__ Int32Constant(kOneByteStringTag));
Node* first_char_code = LoadFromString(receiver, position, is_one_byte);
auto return_result = __ MakeLabel(MachineRepresentation::kWord32);
// Check if first character code is outside of interval [0xD800, 0xDBFF].
Node* first_out =
__ Word32Equal(__ Word32And(first_char_code, __ Int32Constant(0xFC00)),
__ Int32Constant(0xD800));
// Return first character code.
__ GotoIf(first_out, &return_result, first_char_code);
// Check if position + 1 is still in range.
Node* length = __ LoadField(AccessBuilder::ForStringLength(), receiver);
Node* next_position = __ Int32Add(position, __ Int32Constant(1));
Node* next_position_in_range = __ Int32LessThan(next_position, length);
__ GotoIf(next_position_in_range, &return_result, first_char_code);
// Load second character code.
Node* second_char_code = LoadFromString(receiver, next_position, is_one_byte);
// Check if first character code is outside of interval [0xD800, 0xDBFF].
Node* second_out =
__ Word32Equal(__ Word32And(second_char_code, __ Int32Constant(0xFC00)),
__ Int32Constant(0xDC00));
__ GotoIfNot(second_out, &return_result, first_char_code);
Node* result;
switch (encoding) {
case UnicodeEncoding::UTF16:
result = __ Word32Or(
// Need to swap the order for big-endian platforms
#if V8_TARGET_BIG_ENDIAN
__ Word32Shl(first_char_code, __ Int32Constant(16)),
second_char_code));
#else
__ Word32Shl(second_char_code, __ Int32Constant(16)),
first_char_code);
#endif
break;
case UnicodeEncoding::UTF32: {
// Convert UTF16 surrogate pair into |word32| code point, encoded as
// UTF32.
Node* surrogate_offset =
__ Int32Constant(0x10000 - (0xD800 << 10) - 0xDC00);
// (lead << 10) + trail + SURROGATE_OFFSET
result = __ Int32Add(__ Word32Shl(first_char_code, __ Int32Constant(10)),
__ Int32Add(second_char_code, surrogate_offset));
break;
}
}
__ Goto(&return_result, result);
__ Bind(&return_result);
return return_result.PhiAt(0);
}
Node* EffectControlLinearizer::LowerStringFromCharCode(Node* node) { Node* EffectControlLinearizer::LowerStringFromCharCode(Node* node) {
Node* value = node->InputAt(0); Node* value = node->InputAt(0);
......
...@@ -118,6 +118,8 @@ class V8_EXPORT_PRIVATE EffectControlLinearizer { ...@@ -118,6 +118,8 @@ class V8_EXPORT_PRIVATE EffectControlLinearizer {
Node* LowerStringCharAt(Node* node); Node* LowerStringCharAt(Node* node);
Node* LowerStringCharCodeAt(Node* node); Node* LowerStringCharCodeAt(Node* node);
Node* LowerSeqStringCharCodeAt(Node* node); Node* LowerSeqStringCharCodeAt(Node* node);
Node* LowerStringCodePointAt(Node* node);
Node* LowerSeqStringCodePointAt(Node* node, UnicodeEncoding encoding);
Node* LowerStringToLowerCaseIntl(Node* node); Node* LowerStringToLowerCaseIntl(Node* node);
Node* LowerStringToUpperCaseIntl(Node* node); Node* LowerStringToUpperCaseIntl(Node* node);
Node* LowerStringFromCharCode(Node* node); Node* LowerStringFromCharCode(Node* node);
...@@ -179,6 +181,7 @@ class V8_EXPORT_PRIVATE EffectControlLinearizer { ...@@ -179,6 +181,7 @@ class V8_EXPORT_PRIVATE EffectControlLinearizer {
Node* ChangeSmiToIntPtr(Node* value); Node* ChangeSmiToIntPtr(Node* value);
Node* ChangeSmiToInt32(Node* value); Node* ChangeSmiToInt32(Node* value);
Node* ObjectIsSmi(Node* value); Node* ObjectIsSmi(Node* value);
Node* LoadFromString(Node* receiver, Node* position, Node* is_one_byte);
Node* SmiMaxValueConstant(); Node* SmiMaxValueConstant();
Node* SmiShiftBitsConstant(); Node* SmiShiftBitsConstant();
......
...@@ -333,6 +333,8 @@ ...@@ -333,6 +333,8 @@
V(StringCharAt) \ V(StringCharAt) \
V(StringCharCodeAt) \ V(StringCharCodeAt) \
V(SeqStringCharCodeAt) \ V(SeqStringCharCodeAt) \
V(StringCodePointAt) \
V(SeqStringCodePointAt) \
V(StringFromCharCode) \ V(StringFromCharCode) \
V(StringFromCodePoint) \ V(StringFromCodePoint) \
V(StringIndexOf) \ V(StringIndexOf) \
......
...@@ -2341,6 +2341,22 @@ class RepresentationSelector { ...@@ -2341,6 +2341,22 @@ class RepresentationSelector {
} }
return; return;
} }
case IrOpcode::kStringCodePointAt: {
Type* string_type = TypeOf(node->InputAt(0));
if (string_type->Is(Type::SeqString())) {
VisitBinop(node, UseInfo::AnyTagged(), UseInfo::TruncatingWord32(),
MachineRepresentation::kWord32);
if (lower()) {
NodeProperties::ChangeOp(node,
simplified()->SeqStringCodePointAt());
}
} else {
// TODO(turbofan): Allow builtins to return untagged values.
VisitBinop(node, UseInfo::AnyTagged(), UseInfo::TruncatingWord32(),
MachineRepresentation::kTaggedSigned);
}
return;
}
case IrOpcode::kStringFromCharCode: { case IrOpcode::kStringFromCharCode: {
VisitUnop(node, UseInfo::TruncatingWord32(), VisitUnop(node, UseInfo::TruncatingWord32(),
MachineRepresentation::kTaggedPointer); MachineRepresentation::kTaggedPointer);
......
...@@ -657,6 +657,8 @@ bool operator==(CheckMinusZeroParameters const& lhs, ...@@ -657,6 +657,8 @@ bool operator==(CheckMinusZeroParameters const& lhs,
V(StringCharAt, Operator::kNoProperties, 2, 1) \ V(StringCharAt, Operator::kNoProperties, 2, 1) \
V(StringCharCodeAt, Operator::kNoProperties, 2, 1) \ V(StringCharCodeAt, Operator::kNoProperties, 2, 1) \
V(SeqStringCharCodeAt, Operator::kNoProperties, 2, 1) \ V(SeqStringCharCodeAt, Operator::kNoProperties, 2, 1) \
V(StringCodePointAt, Operator::kNoProperties, 2, 1) \
V(SeqStringCodePointAt, Operator::kNoProperties, 2, 1) \
V(StringFromCharCode, Operator::kNoProperties, 1, 0) \ V(StringFromCharCode, Operator::kNoProperties, 1, 0) \
V(StringIndexOf, Operator::kNoProperties, 3, 0) \ V(StringIndexOf, Operator::kNoProperties, 3, 0) \
V(StringLength, Operator::kNoProperties, 1, 0) \ V(StringLength, Operator::kNoProperties, 1, 0) \
......
...@@ -503,6 +503,8 @@ class V8_EXPORT_PRIVATE SimplifiedOperatorBuilder final ...@@ -503,6 +503,8 @@ class V8_EXPORT_PRIVATE SimplifiedOperatorBuilder final
const Operator* StringCharAt(); const Operator* StringCharAt();
const Operator* StringCharCodeAt(); const Operator* StringCharCodeAt();
const Operator* SeqStringCharCodeAt(); const Operator* SeqStringCharCodeAt();
const Operator* StringCodePointAt();
const Operator* SeqStringCodePointAt();
const Operator* StringFromCharCode(); const Operator* StringFromCharCode();
const Operator* StringFromCodePoint(UnicodeEncoding encoding); const Operator* StringFromCodePoint(UnicodeEncoding encoding);
const Operator* StringIndexOf(); const Operator* StringIndexOf();
......
...@@ -1948,6 +1948,14 @@ Type* Typer::Visitor::TypeSeqStringCharCodeAt(Node* node) { ...@@ -1948,6 +1948,14 @@ Type* Typer::Visitor::TypeSeqStringCharCodeAt(Node* node) {
return typer_->cache_.kUint16; return typer_->cache_.kUint16;
} }
Type* Typer::Visitor::TypeStringCodePointAt(Node* node) {
return Type::Range(0.0, String::kMaxCodePoint, zone());
}
Type* Typer::Visitor::TypeSeqStringCodePointAt(Node* node) {
return Type::Range(0.0, String::kMaxCodePoint, zone());
}
Type* Typer::Visitor::TypeStringFromCharCode(Node* node) { Type* Typer::Visitor::TypeStringFromCharCode(Node* node) {
return TypeUnaryOp(node, StringFromCharCodeTyper); return TypeUnaryOp(node, StringFromCharCodeTyper);
} }
......
...@@ -1054,6 +1054,18 @@ void Verifier::Visitor::Check(Node* node, const AllNodes& all) { ...@@ -1054,6 +1054,18 @@ void Verifier::Visitor::Check(Node* node, const AllNodes& all) {
CheckValueInputIs(node, 1, Type::Unsigned32()); CheckValueInputIs(node, 1, Type::Unsigned32());
CheckTypeIs(node, Type::UnsignedSmall()); CheckTypeIs(node, Type::UnsignedSmall());
break; break;
case IrOpcode::kStringCodePointAt:
// (String, Unsigned32) -> UnsignedSmall
CheckValueInputIs(node, 0, Type::String());
CheckValueInputIs(node, 1, Type::Unsigned32());
CheckTypeIs(node, Type::UnsignedSmall());
break;
case IrOpcode::kSeqStringCodePointAt:
// (String, Unsigned32) -> UnsignedSmall
CheckValueInputIs(node, 0, Type::String());
CheckValueInputIs(node, 1, Type::Unsigned32());
CheckTypeIs(node, Type::UnsignedSmall());
break;
case IrOpcode::kStringFromCharCode: case IrOpcode::kStringFromCharCode:
// Number -> String // Number -> String
CheckValueInputIs(node, 0, Type::Number()); CheckValueInputIs(node, 0, Type::Number());
......
...@@ -269,21 +269,7 @@ void StoreNamedTransitionDescriptor::InitializePlatformSpecific( ...@@ -269,21 +269,7 @@ void StoreNamedTransitionDescriptor::InitializePlatformSpecific(
data->InitializePlatformSpecific(len, registers); data->InitializePlatformSpecific(len, registers);
} }
void StringCharAtDescriptor::InitializePlatformIndependent( void StringAtDescriptor::InitializePlatformIndependent(
CallInterfaceDescriptorData* data) {
// kReceiver, kPosition
MachineType machine_types[] = {MachineType::AnyTagged(),
MachineType::IntPtr()};
data->InitializePlatformIndependent(arraysize(machine_types), 0,
machine_types);
}
void StringCharAtDescriptor::InitializePlatformSpecific(
CallInterfaceDescriptorData* data) {
DefaultInitializePlatformSpecific(data, kParameterCount);
}
void StringCharCodeAtDescriptor::InitializePlatformIndependent(
CallInterfaceDescriptorData* data) { CallInterfaceDescriptorData* data) {
// kReceiver, kPosition // kReceiver, kPosition
// TODO(turbofan): Allow builtins to return untagged values. // TODO(turbofan): Allow builtins to return untagged values.
...@@ -293,7 +279,7 @@ void StringCharCodeAtDescriptor::InitializePlatformIndependent( ...@@ -293,7 +279,7 @@ void StringCharCodeAtDescriptor::InitializePlatformIndependent(
machine_types); machine_types);
} }
void StringCharCodeAtDescriptor::InitializePlatformSpecific( void StringAtDescriptor::InitializePlatformSpecific(
CallInterfaceDescriptorData* data) { CallInterfaceDescriptorData* data) {
DefaultInitializePlatformSpecific(data, kParameterCount); DefaultInitializePlatformSpecific(data, kParameterCount);
} }
......
...@@ -62,8 +62,7 @@ class PlatformInterfaceDescriptor; ...@@ -62,8 +62,7 @@ class PlatformInterfaceDescriptor;
V(Compare) \ V(Compare) \
V(BinaryOp) \ V(BinaryOp) \
V(StringAdd) \ V(StringAdd) \
V(StringCharAt) \ V(StringAt) \
V(StringCharCodeAt) \
V(ForInPrepare) \ V(ForInPrepare) \
V(GetProperty) \ V(GetProperty) \
V(ArgumentAdaptor) \ V(ArgumentAdaptor) \
...@@ -762,17 +761,12 @@ class StringAddDescriptor : public CallInterfaceDescriptor { ...@@ -762,17 +761,12 @@ class StringAddDescriptor : public CallInterfaceDescriptor {
DECLARE_DESCRIPTOR(StringAddDescriptor, CallInterfaceDescriptor) DECLARE_DESCRIPTOR(StringAddDescriptor, CallInterfaceDescriptor)
}; };
class StringCharAtDescriptor final : public CallInterfaceDescriptor { // This desciptor is shared among String.p.charAt/charCodeAt/codePointAt
// as they all have the same interface.
class StringAtDescriptor final : public CallInterfaceDescriptor {
public: public:
DEFINE_PARAMETERS(kReceiver, kPosition) DEFINE_PARAMETERS(kReceiver, kPosition)
DECLARE_DESCRIPTOR_WITH_CUSTOM_FUNCTION_TYPE(StringCharAtDescriptor, DECLARE_DESCRIPTOR_WITH_CUSTOM_FUNCTION_TYPE(StringAtDescriptor,
CallInterfaceDescriptor)
};
class StringCharCodeAtDescriptor final : public CallInterfaceDescriptor {
public:
DEFINE_PARAMETERS(kReceiver, kPosition)
DECLARE_DESCRIPTOR_WITH_CUSTOM_FUNCTION_TYPE(StringCharCodeAtDescriptor,
CallInterfaceDescriptor) CallInterfaceDescriptor)
}; };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment