Commit 43bb214d authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[ia32] Optimize F64x2PromoteLowF32x4 with S128Load64Zero

Similar optimization to the one for x64 at https://crrev.com/c/3154347.

There is a change to VisitLoad, which should call
GetEffectiveAddressMemoryOperand on the value node. This allows us to
match the input operands to the value (S128Load64Zero node), while
emitting instructions for the node (F64x2PromoteLowF32x4 node).

Bug: v8:12189
Change-Id: I30ca09b567c12a43f7f3bbb4811bae53006bedaf
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3171979Reviewed-by: 's avatarGeorg Neis <neis@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77019}
parent 1f33a962
......@@ -1838,7 +1838,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kIA32F64x2PromoteLowF32x4: {
__ Cvtps2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
if (HasAddressingMode(instr)) {
__ Cvtps2pd(i.OutputSimd128Register(), i.MemoryOperand());
} else {
__ Cvtps2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
}
break;
}
case kIA32F32x4DemoteF64x2Zero: {
......
......@@ -578,7 +578,7 @@ void InstructionSelector::VisitLoad(Node* node, Node* value,
InstructionOperand inputs[3];
size_t input_count = 0;
AddressingMode mode =
g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
g.GetEffectiveAddressMemoryOperand(value, inputs, &input_count);
InstructionCode code = opcode | AddressingModeField::encode(mode);
Emit(code, 1, outputs, input_count, inputs);
}
......@@ -2344,7 +2344,6 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
#define SIMD_UNOP_LIST(V) \
V(F64x2ConvertLowI32x4S) \
V(F64x2PromoteLowF32x4) \
V(F32x4DemoteF64x2Zero) \
V(F32x4Sqrt) \
V(F32x4SConvertI32x4) \
......@@ -3172,6 +3171,25 @@ void InstructionSelector::VisitI64x2Abs(Node* node) {
VisitRRSimd(this, node, kIA32I64x2Abs, kIA32I64x2Abs);
}
void InstructionSelector::VisitF64x2PromoteLowF32x4(Node* node) {
IA32OperandGenerator g(this);
InstructionCode code = kIA32F64x2PromoteLowF32x4;
Node* input = node->InputAt(0);
LoadTransformMatcher m(input);
if (m.Is(LoadTransformation::kS128Load64Zero) && CanCover(node, input)) {
// Trap handler is not supported on IA32.
DCHECK_NE(m.ResolvedValue().kind, MemoryAccessKind::kProtected);
// LoadTransforms cannot be eliminated, so they are visited even if
// unused. Mark it as defined so that we don't visit it.
MarkAsDefined(input);
VisitLoad(node, input, code);
return;
}
VisitRR(this, node, code);
}
void InstructionSelector::AddOutputToSelectContinuation(OperandGenerator* g,
int first_input_index,
Node* node) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment