Commit 083faa5b authored by Sigurd Schneider's avatar Sigurd Schneider Committed by Commit Bot

[turbofan] Improve fast-path of string iterator next

This CL uses UTF16 encoding internally in the string iterator,
thereby saving a few shifts, ors, and ands.

Bug: v8:7270
Change-Id: I3ac9e0e8c4b64eb1d6c796597eb0b3413c5f516b
Reviewed-on: https://chromium-review.googlesource.com/887085Reviewed-by: 's avatarJaroslav Sevcik <jarin@chromium.org>
Commit-Queue: Sigurd Schneider <sigurds@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50904}
parent 1e3a8c15
...@@ -92,7 +92,8 @@ namespace internal { ...@@ -92,7 +92,8 @@ namespace internal {
\ \
/* String helpers */ \ /* String helpers */ \
TFC(StringCharAt, StringAt, 1) \ TFC(StringCharAt, StringAt, 1) \
TFC(StringCodePointAt, StringAt, 1) \ TFC(StringCodePointAtUTF16, StringAt, 1) \
TFC(StringCodePointAtUTF32, StringAt, 1) \
TFC(StringEqual, Compare, 1) \ TFC(StringEqual, Compare, 1) \
TFC(StringGreaterThan, Compare, 1) \ TFC(StringGreaterThan, Compare, 1) \
TFC(StringGreaterThanOrEqual, Compare, 1) \ TFC(StringGreaterThanOrEqual, Compare, 1) \
......
...@@ -527,7 +527,21 @@ TF_BUILTIN(StringCharAt, StringBuiltinsAssembler) { ...@@ -527,7 +527,21 @@ TF_BUILTIN(StringCharAt, StringBuiltinsAssembler) {
Return(result); Return(result);
} }
TF_BUILTIN(StringCodePointAt, StringBuiltinsAssembler) { TF_BUILTIN(StringCodePointAtUTF16, StringBuiltinsAssembler) {
Node* receiver = Parameter(Descriptor::kReceiver);
Node* position = Parameter(Descriptor::kPosition);
// TODO(sigurds) Figure out if passing length as argument pays off.
TNode<IntPtrT> length = LoadStringLengthAsWord(receiver);
// Load the character code at the {position} from the {receiver}.
TNode<Int32T> code =
LoadSurrogatePairAt(receiver, length, position, UnicodeEncoding::UTF16);
// And return it as TaggedSigned value.
// TODO(turbofan): Allow builtins to return values untagged.
TNode<Smi> result = SmiFromWord32(code);
Return(result);
}
TF_BUILTIN(StringCodePointAtUTF32, StringBuiltinsAssembler) {
Node* receiver = Parameter(Descriptor::kReceiver); Node* receiver = Parameter(Descriptor::kReceiver);
Node* position = Parameter(Descriptor::kPosition); Node* position = Parameter(Descriptor::kPosition);
...@@ -693,6 +707,8 @@ TF_BUILTIN(StringPrototypeCodePointAt, StringBuiltinsAssembler) { ...@@ -693,6 +707,8 @@ TF_BUILTIN(StringPrototypeCodePointAt, StringBuiltinsAssembler) {
maybe_position, UndefinedConstant(), maybe_position, UndefinedConstant(),
[this](TNode<String> receiver, TNode<IntPtrT> length, [this](TNode<String> receiver, TNode<IntPtrT> length,
TNode<IntPtrT> index) { TNode<IntPtrT> index) {
// This is always a call to a builtin from Javascript,
// so we need to produce UTF32.
Node* value = LoadSurrogatePairAt(receiver, length, index, Node* value = LoadSurrogatePairAt(receiver, length, index,
UnicodeEncoding::UTF32); UnicodeEncoding::UTF32);
return SmiFromWord32(value); return SmiFromWord32(value);
......
...@@ -855,7 +855,7 @@ bool EffectControlLinearizer::TryWireInStateEffect(Node* node, ...@@ -855,7 +855,7 @@ bool EffectControlLinearizer::TryWireInStateEffect(Node* node,
result = LowerSeqStringCharCodeAt(node); result = LowerSeqStringCharCodeAt(node);
break; break;
case IrOpcode::kStringCodePointAt: case IrOpcode::kStringCodePointAt:
result = LowerStringCodePointAt(node); result = LowerStringCodePointAt(node, UnicodeEncodingOf(node->op()));
break; break;
case IrOpcode::kSeqStringCodePointAt: case IrOpcode::kSeqStringCodePointAt:
result = LowerSeqStringCharCodeAt(node); result = LowerSeqStringCharCodeAt(node);
...@@ -2838,12 +2838,16 @@ Node* EffectControlLinearizer::LowerStringCharCodeAt(Node* node) { ...@@ -2838,12 +2838,16 @@ Node* EffectControlLinearizer::LowerStringCharCodeAt(Node* node) {
return loop_done.PhiAt(0); return loop_done.PhiAt(0);
} }
Node* EffectControlLinearizer::LowerStringCodePointAt(Node* node) { Node* EffectControlLinearizer::LowerStringCodePointAt(
Node* node, UnicodeEncoding encoding) {
Node* receiver = node->InputAt(0); Node* receiver = node->InputAt(0);
Node* position = node->InputAt(1); Node* position = node->InputAt(1);
Callable const callable = Builtins::Name builtin = encoding == UnicodeEncoding::UTF16
Builtins::CallableFor(isolate(), Builtins::kStringCodePointAt); ? Builtins::kStringCodePointAtUTF16
: Builtins::kStringCodePointAtUTF32;
Callable const callable = Builtins::CallableFor(isolate(), builtin);
Operator::Properties properties = Operator::kNoThrow | Operator::kNoWrite; Operator::Properties properties = Operator::kNoThrow | Operator::kNoWrite;
CallDescriptor::Flags flags = CallDescriptor::kNoFlags; CallDescriptor::Flags flags = CallDescriptor::kNoFlags;
CallDescriptor* desc = Linkage::GetStubCallDescriptor( CallDescriptor* desc = Linkage::GetStubCallDescriptor(
......
...@@ -119,7 +119,7 @@ class V8_EXPORT_PRIVATE EffectControlLinearizer { ...@@ -119,7 +119,7 @@ class V8_EXPORT_PRIVATE EffectControlLinearizer {
Node* LowerStringCharAt(Node* node); Node* LowerStringCharAt(Node* node);
Node* LowerStringCharCodeAt(Node* node); Node* LowerStringCharCodeAt(Node* node);
Node* LowerSeqStringCharCodeAt(Node* node); Node* LowerSeqStringCharCodeAt(Node* node);
Node* LowerStringCodePointAt(Node* node); Node* LowerStringCodePointAt(Node* node, UnicodeEncoding encoding);
Node* LowerSeqStringCodePointAt(Node* node, UnicodeEncoding encoding); Node* LowerSeqStringCodePointAt(Node* node, UnicodeEncoding encoding);
Node* LowerStringToLowerCaseIntl(Node* node); Node* LowerStringToLowerCaseIntl(Node* node);
Node* LowerStringToUpperCaseIntl(Node* node); Node* LowerStringToUpperCaseIntl(Node* node);
......
...@@ -2033,9 +2033,10 @@ Reduction JSBuiltinReducer::ReduceStringIteratorNext(Node* node) { ...@@ -2033,9 +2033,10 @@ Reduction JSBuiltinReducer::ReduceStringIteratorNext(Node* node) {
{ {
done_true = jsgraph()->FalseConstant(); done_true = jsgraph()->FalseConstant();
Node* codepoint = etrue0 = graph()->NewNode( Node* codepoint = etrue0 = graph()->NewNode(
simplified()->StringCodePointAt(), string, index, etrue0, if_true0); simplified()->StringCodePointAt(UnicodeEncoding::UTF16), string,
index, etrue0, if_true0);
vtrue0 = graph()->NewNode( vtrue0 = graph()->NewNode(
simplified()->StringFromCodePoint(UnicodeEncoding::UTF32), codepoint); simplified()->StringFromCodePoint(UnicodeEncoding::UTF16), codepoint);
// Update iterator.[[NextIndex]] // Update iterator.[[NextIndex]]
Node* char_length = Node* char_length =
......
...@@ -2961,7 +2961,7 @@ Reduction JSCallReducer::ReduceJSCall(Node* node) { ...@@ -2961,7 +2961,7 @@ Reduction JSCallReducer::ReduceJSCall(Node* node) {
node); node);
case Builtins::kStringPrototypeCodePointAt: case Builtins::kStringPrototypeCodePointAt:
return ReduceStringPrototypeStringAt( return ReduceStringPrototypeStringAt(
simplified()->StringCodePointAt(), node); simplified()->StringCodePointAt(UnicodeEncoding::UTF32), node);
case Builtins::kAsyncFunctionPromiseCreate: case Builtins::kAsyncFunctionPromiseCreate:
return ReduceAsyncFunctionPromiseCreate(node); return ReduceAsyncFunctionPromiseCreate(node);
case Builtins::kAsyncFunctionPromiseRelease: case Builtins::kAsyncFunctionPromiseRelease:
......
...@@ -537,7 +537,9 @@ Type* AllocateTypeOf(const Operator* op) { ...@@ -537,7 +537,9 @@ Type* AllocateTypeOf(const Operator* op) {
} }
UnicodeEncoding UnicodeEncodingOf(const Operator* op) { UnicodeEncoding UnicodeEncodingOf(const Operator* op) {
DCHECK_EQ(IrOpcode::kStringFromCodePoint, op->opcode()); DCHECK(op->opcode() == IrOpcode::kStringFromCodePoint ||
op->opcode() == IrOpcode::kStringCodePointAt ||
op->opcode() == IrOpcode::kSeqStringCodePointAt);
return OpParameter<UnicodeEncoding>(op); return OpParameter<UnicodeEncoding>(op);
} }
...@@ -705,12 +707,10 @@ bool operator==(CheckMinusZeroParameters const& lhs, ...@@ -705,12 +707,10 @@ bool operator==(CheckMinusZeroParameters const& lhs,
V(NewConsString, Operator::kNoProperties, 3, 0) \ V(NewConsString, Operator::kNoProperties, 3, 0) \
V(MaskIndexWithBound, Operator::kNoProperties, 2, 0) V(MaskIndexWithBound, Operator::kNoProperties, 2, 0)
#define EFFECT_DEPENDENT_OP_LIST(V) \ #define EFFECT_DEPENDENT_OP_LIST(V) \
V(StringCharAt, Operator::kNoProperties, 2, 1) \ V(StringCharAt, Operator::kNoProperties, 2, 1) \
V(StringCharCodeAt, Operator::kNoProperties, 2, 1) \ V(StringCharCodeAt, Operator::kNoProperties, 2, 1) \
V(SeqStringCharCodeAt, Operator::kNoProperties, 2, 1) \ V(SeqStringCharCodeAt, Operator::kNoProperties, 2, 1)
V(StringCodePointAt, Operator::kNoProperties, 2, 1) \
V(SeqStringCodePointAt, Operator::kNoProperties, 2, 1)
#define SPECULATIVE_NUMBER_BINOP_LIST(V) \ #define SPECULATIVE_NUMBER_BINOP_LIST(V) \
SIMPLIFIED_SPECULATIVE_NUMBER_BINOP_LIST(V) \ SIMPLIFIED_SPECULATIVE_NUMBER_BINOP_LIST(V) \
...@@ -806,6 +806,33 @@ struct SimplifiedOperatorGlobalCache final { ...@@ -806,6 +806,33 @@ struct SimplifiedOperatorGlobalCache final {
DEOPTIMIZE_REASON_LIST(CHECK_IF) DEOPTIMIZE_REASON_LIST(CHECK_IF)
#undef CHECK_IF #undef CHECK_IF
template <UnicodeEncoding kEncoding>
struct StringCodePointAtOperator final : public Operator1<UnicodeEncoding> {
StringCodePointAtOperator()
: Operator1<UnicodeEncoding>(IrOpcode::kStringCodePointAt,
Operator::kFoldable | Operator::kNoThrow,
"StringCodePointAt", 2, 1, 1, 1, 1, 0,
kEncoding) {}
};
StringCodePointAtOperator<UnicodeEncoding::UTF16>
kStringCodePointAtOperatorUTF16;
StringCodePointAtOperator<UnicodeEncoding::UTF32>
kStringCodePointAtOperatorUTF32;
template <UnicodeEncoding kEncoding>
struct SeqStringCodePointAtOperator final
: public Operator1<UnicodeEncoding> {
SeqStringCodePointAtOperator()
: Operator1<UnicodeEncoding>(IrOpcode::kSeqStringCodePointAt,
Operator::kFoldable | Operator::kNoThrow,
"SeqStringCodePointAt", 2, 1, 1, 1, 1, 0,
kEncoding) {}
};
SeqStringCodePointAtOperator<UnicodeEncoding::UTF16>
kSeqStringCodePointAtOperatorUTF16;
SeqStringCodePointAtOperator<UnicodeEncoding::UTF32>
kSeqStringCodePointAtOperatorUTF32;
template <UnicodeEncoding kEncoding> template <UnicodeEncoding kEncoding>
struct StringFromCodePointOperator final : public Operator1<UnicodeEncoding> { struct StringFromCodePointOperator final : public Operator1<UnicodeEncoding> {
StringFromCodePointOperator() StringFromCodePointOperator()
...@@ -1395,6 +1422,28 @@ const Operator* SimplifiedOperatorBuilder::AllocateRaw( ...@@ -1395,6 +1422,28 @@ const Operator* SimplifiedOperatorBuilder::AllocateRaw(
"AllocateRaw", 1, 1, 1, 1, 1, 1, AllocateParameters(type, pretenure)); "AllocateRaw", 1, 1, 1, 1, 1, 1, AllocateParameters(type, pretenure));
} }
const Operator* SimplifiedOperatorBuilder::StringCodePointAt(
UnicodeEncoding encoding) {
switch (encoding) {
case UnicodeEncoding::UTF16:
return &cache_.kStringCodePointAtOperatorUTF16;
case UnicodeEncoding::UTF32:
return &cache_.kStringCodePointAtOperatorUTF32;
}
UNREACHABLE();
}
const Operator* SimplifiedOperatorBuilder::SeqStringCodePointAt(
UnicodeEncoding encoding) {
switch (encoding) {
case UnicodeEncoding::UTF16:
return &cache_.kSeqStringCodePointAtOperatorUTF16;
case UnicodeEncoding::UTF32:
return &cache_.kSeqStringCodePointAtOperatorUTF32;
}
UNREACHABLE();
}
const Operator* SimplifiedOperatorBuilder::StringFromCodePoint( const Operator* SimplifiedOperatorBuilder::StringFromCodePoint(
UnicodeEncoding encoding) { UnicodeEncoding encoding) {
switch (encoding) { switch (encoding) {
......
...@@ -504,8 +504,8 @@ class V8_EXPORT_PRIVATE SimplifiedOperatorBuilder final ...@@ -504,8 +504,8 @@ class V8_EXPORT_PRIVATE SimplifiedOperatorBuilder final
const Operator* StringCharAt(); const Operator* StringCharAt();
const Operator* StringCharCodeAt(); const Operator* StringCharCodeAt();
const Operator* SeqStringCharCodeAt(); const Operator* SeqStringCharCodeAt();
const Operator* StringCodePointAt(); const Operator* StringCodePointAt(UnicodeEncoding encoding);
const Operator* SeqStringCodePointAt(); const Operator* SeqStringCodePointAt(UnicodeEncoding encoding);
const Operator* StringFromCharCode(); const Operator* StringFromCharCode();
const Operator* StringFromCodePoint(UnicodeEncoding encoding); const Operator* StringFromCodePoint(UnicodeEncoding encoding);
const Operator* StringIndexOf(); const Operator* StringIndexOf();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment