Commit fde3691b authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm64] Optimize load extends to remove add

For load extends, we can use Ldr, which does not require us to manually
calculate address - ld1r uses post-index, so we have to add the index
ourselves.

By checking the operation in the instruction-selector, we can set the
addressing mode for load extends to be MRR, then use Ldr in the codegen.

Bug: v8:9886
Change-Id: Ibcd22fa719cd6dafd2fd06e68066960db249b57a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2207656Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67912}
parent f920ad11
......@@ -2550,32 +2550,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArm64I16x8Load8x8S: {
__ ld1(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
__ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
__ Sxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
break;
}
case kArm64I16x8Load8x8U: {
__ ld1(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
__ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
__ Uxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
break;
}
case kArm64I32x4Load16x4S: {
__ ld1(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
__ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
__ Sxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
break;
}
case kArm64I32x4Load16x4U: {
__ ld1(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
__ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
__ Uxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
break;
}
case kArm64I64x2Load32x2S: {
__ ld1(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
__ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
__ Sxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
break;
}
case kArm64I64x2Load32x2U: {
__ ld1(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
__ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
__ Uxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
break;
}
......
......@@ -608,18 +608,23 @@ void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode,
void InstructionSelector::VisitLoadTransform(Node* node) {
LoadTransformParameters params = LoadTransformParametersOf(node->op());
InstructionCode opcode = kArchNop;
bool require_add = false;
switch (params.transformation) {
case LoadTransformation::kS8x16LoadSplat:
opcode = kArm64S8x16LoadSplat;
require_add = true;
break;
case LoadTransformation::kS16x8LoadSplat:
opcode = kArm64S16x8LoadSplat;
require_add = true;
break;
case LoadTransformation::kS32x4LoadSplat:
opcode = kArm64S32x4LoadSplat;
require_add = true;
break;
case LoadTransformation::kS64x2LoadSplat:
opcode = kArm64S64x2LoadSplat;
require_add = true;
break;
case LoadTransformation::kI16x8Load8x8S:
opcode = kArm64I16x8Load8x8S;
......@@ -655,6 +660,7 @@ void InstructionSelector::VisitLoadTransform(Node* node) {
inputs[1] = g.UseRegister(index);
outputs[0] = g.DefineAsRegister(node);
if (require_add) {
// ld1r uses post-index, so construct address first.
// TODO(v8:9886) If index can be immediate, use vldr without this add.
InstructionOperand addr = g.TempRegister();
......@@ -662,6 +668,9 @@ void InstructionSelector::VisitLoadTransform(Node* node) {
inputs[0] = addr;
inputs[1] = g.TempImmediate(0);
opcode |= AddressingModeField::encode(kMode_MRI);
} else {
opcode |= AddressingModeField::encode(kMode_MRR);
}
Emit(opcode, 1, outputs, 2, inputs);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment