Commit 083faa5b authored by Sigurd Schneider's avatar Sigurd Schneider Committed by Commit Bot

[turbofan] Improve fast-path of string iterator next

This CL uses UTF16 encoding internally in the string iterator,
thereby saving a few shifts, ors, and ands.

Bug: v8:7270
Change-Id: I3ac9e0e8c4b64eb1d6c796597eb0b3413c5f516b
Reviewed-on: https://chromium-review.googlesource.com/887085Reviewed-by: 's avatarJaroslav Sevcik <jarin@chromium.org>
Commit-Queue: Sigurd Schneider <sigurds@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50904}
parent 1e3a8c15
......@@ -92,7 +92,8 @@ namespace internal {
\
/* String helpers */ \
TFC(StringCharAt, StringAt, 1) \
TFC(StringCodePointAt, StringAt, 1) \
TFC(StringCodePointAtUTF16, StringAt, 1) \
TFC(StringCodePointAtUTF32, StringAt, 1) \
TFC(StringEqual, Compare, 1) \
TFC(StringGreaterThan, Compare, 1) \
TFC(StringGreaterThanOrEqual, Compare, 1) \
......
......@@ -527,7 +527,21 @@ TF_BUILTIN(StringCharAt, StringBuiltinsAssembler) {
Return(result);
}
TF_BUILTIN(StringCodePointAt, StringBuiltinsAssembler) {
TF_BUILTIN(StringCodePointAtUTF16, StringBuiltinsAssembler) {
Node* receiver = Parameter(Descriptor::kReceiver);
Node* position = Parameter(Descriptor::kPosition);
// TODO(sigurds) Figure out if passing length as argument pays off.
TNode<IntPtrT> length = LoadStringLengthAsWord(receiver);
// Load the character code at the {position} from the {receiver}.
TNode<Int32T> code =
LoadSurrogatePairAt(receiver, length, position, UnicodeEncoding::UTF16);
// And return it as TaggedSigned value.
// TODO(turbofan): Allow builtins to return values untagged.
TNode<Smi> result = SmiFromWord32(code);
Return(result);
}
TF_BUILTIN(StringCodePointAtUTF32, StringBuiltinsAssembler) {
Node* receiver = Parameter(Descriptor::kReceiver);
Node* position = Parameter(Descriptor::kPosition);
......@@ -693,6 +707,8 @@ TF_BUILTIN(StringPrototypeCodePointAt, StringBuiltinsAssembler) {
maybe_position, UndefinedConstant(),
[this](TNode<String> receiver, TNode<IntPtrT> length,
TNode<IntPtrT> index) {
// This is always a call to a builtin from Javascript,
// so we need to produce UTF32.
Node* value = LoadSurrogatePairAt(receiver, length, index,
UnicodeEncoding::UTF32);
return SmiFromWord32(value);
......
......@@ -855,7 +855,7 @@ bool EffectControlLinearizer::TryWireInStateEffect(Node* node,
result = LowerSeqStringCharCodeAt(node);
break;
case IrOpcode::kStringCodePointAt:
result = LowerStringCodePointAt(node);
result = LowerStringCodePointAt(node, UnicodeEncodingOf(node->op()));
break;
case IrOpcode::kSeqStringCodePointAt:
result = LowerSeqStringCharCodeAt(node);
......@@ -2838,12 +2838,16 @@ Node* EffectControlLinearizer::LowerStringCharCodeAt(Node* node) {
return loop_done.PhiAt(0);
}
Node* EffectControlLinearizer::LowerStringCodePointAt(Node* node) {
Node* EffectControlLinearizer::LowerStringCodePointAt(
Node* node, UnicodeEncoding encoding) {
Node* receiver = node->InputAt(0);
Node* position = node->InputAt(1);
Callable const callable =
Builtins::CallableFor(isolate(), Builtins::kStringCodePointAt);
Builtins::Name builtin = encoding == UnicodeEncoding::UTF16
? Builtins::kStringCodePointAtUTF16
: Builtins::kStringCodePointAtUTF32;
Callable const callable = Builtins::CallableFor(isolate(), builtin);
Operator::Properties properties = Operator::kNoThrow | Operator::kNoWrite;
CallDescriptor::Flags flags = CallDescriptor::kNoFlags;
CallDescriptor* desc = Linkage::GetStubCallDescriptor(
......
......@@ -119,7 +119,7 @@ class V8_EXPORT_PRIVATE EffectControlLinearizer {
Node* LowerStringCharAt(Node* node);
Node* LowerStringCharCodeAt(Node* node);
Node* LowerSeqStringCharCodeAt(Node* node);
Node* LowerStringCodePointAt(Node* node);
Node* LowerStringCodePointAt(Node* node, UnicodeEncoding encoding);
Node* LowerSeqStringCodePointAt(Node* node, UnicodeEncoding encoding);
Node* LowerStringToLowerCaseIntl(Node* node);
Node* LowerStringToUpperCaseIntl(Node* node);
......
......@@ -2033,9 +2033,10 @@ Reduction JSBuiltinReducer::ReduceStringIteratorNext(Node* node) {
{
done_true = jsgraph()->FalseConstant();
Node* codepoint = etrue0 = graph()->NewNode(
simplified()->StringCodePointAt(), string, index, etrue0, if_true0);
simplified()->StringCodePointAt(UnicodeEncoding::UTF16), string,
index, etrue0, if_true0);
vtrue0 = graph()->NewNode(
simplified()->StringFromCodePoint(UnicodeEncoding::UTF32), codepoint);
simplified()->StringFromCodePoint(UnicodeEncoding::UTF16), codepoint);
// Update iterator.[[NextIndex]]
Node* char_length =
......
......@@ -2961,7 +2961,7 @@ Reduction JSCallReducer::ReduceJSCall(Node* node) {
node);
case Builtins::kStringPrototypeCodePointAt:
return ReduceStringPrototypeStringAt(
simplified()->StringCodePointAt(), node);
simplified()->StringCodePointAt(UnicodeEncoding::UTF32), node);
case Builtins::kAsyncFunctionPromiseCreate:
return ReduceAsyncFunctionPromiseCreate(node);
case Builtins::kAsyncFunctionPromiseRelease:
......
......@@ -537,7 +537,9 @@ Type* AllocateTypeOf(const Operator* op) {
}
UnicodeEncoding UnicodeEncodingOf(const Operator* op) {
DCHECK_EQ(IrOpcode::kStringFromCodePoint, op->opcode());
DCHECK(op->opcode() == IrOpcode::kStringFromCodePoint ||
op->opcode() == IrOpcode::kStringCodePointAt ||
op->opcode() == IrOpcode::kSeqStringCodePointAt);
return OpParameter<UnicodeEncoding>(op);
}
......@@ -705,12 +707,10 @@ bool operator==(CheckMinusZeroParameters const& lhs,
V(NewConsString, Operator::kNoProperties, 3, 0) \
V(MaskIndexWithBound, Operator::kNoProperties, 2, 0)
#define EFFECT_DEPENDENT_OP_LIST(V) \
V(StringCharAt, Operator::kNoProperties, 2, 1) \
V(StringCharCodeAt, Operator::kNoProperties, 2, 1) \
V(SeqStringCharCodeAt, Operator::kNoProperties, 2, 1) \
V(StringCodePointAt, Operator::kNoProperties, 2, 1) \
V(SeqStringCodePointAt, Operator::kNoProperties, 2, 1)
#define EFFECT_DEPENDENT_OP_LIST(V) \
V(StringCharAt, Operator::kNoProperties, 2, 1) \
V(StringCharCodeAt, Operator::kNoProperties, 2, 1) \
V(SeqStringCharCodeAt, Operator::kNoProperties, 2, 1)
#define SPECULATIVE_NUMBER_BINOP_LIST(V) \
SIMPLIFIED_SPECULATIVE_NUMBER_BINOP_LIST(V) \
......@@ -806,6 +806,33 @@ struct SimplifiedOperatorGlobalCache final {
DEOPTIMIZE_REASON_LIST(CHECK_IF)
#undef CHECK_IF
template <UnicodeEncoding kEncoding>
struct StringCodePointAtOperator final : public Operator1<UnicodeEncoding> {
StringCodePointAtOperator()
: Operator1<UnicodeEncoding>(IrOpcode::kStringCodePointAt,
Operator::kFoldable | Operator::kNoThrow,
"StringCodePointAt", 2, 1, 1, 1, 1, 0,
kEncoding) {}
};
StringCodePointAtOperator<UnicodeEncoding::UTF16>
kStringCodePointAtOperatorUTF16;
StringCodePointAtOperator<UnicodeEncoding::UTF32>
kStringCodePointAtOperatorUTF32;
template <UnicodeEncoding kEncoding>
struct SeqStringCodePointAtOperator final
: public Operator1<UnicodeEncoding> {
SeqStringCodePointAtOperator()
: Operator1<UnicodeEncoding>(IrOpcode::kSeqStringCodePointAt,
Operator::kFoldable | Operator::kNoThrow,
"SeqStringCodePointAt", 2, 1, 1, 1, 1, 0,
kEncoding) {}
};
SeqStringCodePointAtOperator<UnicodeEncoding::UTF16>
kSeqStringCodePointAtOperatorUTF16;
SeqStringCodePointAtOperator<UnicodeEncoding::UTF32>
kSeqStringCodePointAtOperatorUTF32;
template <UnicodeEncoding kEncoding>
struct StringFromCodePointOperator final : public Operator1<UnicodeEncoding> {
StringFromCodePointOperator()
......@@ -1395,6 +1422,28 @@ const Operator* SimplifiedOperatorBuilder::AllocateRaw(
"AllocateRaw", 1, 1, 1, 1, 1, 1, AllocateParameters(type, pretenure));
}
const Operator* SimplifiedOperatorBuilder::StringCodePointAt(
UnicodeEncoding encoding) {
switch (encoding) {
case UnicodeEncoding::UTF16:
return &cache_.kStringCodePointAtOperatorUTF16;
case UnicodeEncoding::UTF32:
return &cache_.kStringCodePointAtOperatorUTF32;
}
UNREACHABLE();
}
const Operator* SimplifiedOperatorBuilder::SeqStringCodePointAt(
UnicodeEncoding encoding) {
switch (encoding) {
case UnicodeEncoding::UTF16:
return &cache_.kSeqStringCodePointAtOperatorUTF16;
case UnicodeEncoding::UTF32:
return &cache_.kSeqStringCodePointAtOperatorUTF32;
}
UNREACHABLE();
}
const Operator* SimplifiedOperatorBuilder::StringFromCodePoint(
UnicodeEncoding encoding) {
switch (encoding) {
......
......@@ -504,8 +504,8 @@ class V8_EXPORT_PRIVATE SimplifiedOperatorBuilder final
const Operator* StringCharAt();
const Operator* StringCharCodeAt();
const Operator* SeqStringCharCodeAt();
const Operator* StringCodePointAt();
const Operator* SeqStringCodePointAt();
const Operator* StringCodePointAt(UnicodeEncoding encoding);
const Operator* SeqStringCodePointAt(UnicodeEncoding encoding);
const Operator* StringFromCharCode();
const Operator* StringFromCodePoint(UnicodeEncoding encoding);
const Operator* StringIndexOf();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment