Commit 6dbc2b01 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][arm64] Prototype load lane and store lane

Prototype v128.{load,store}{8,16,32,64}_lane on arm64.

All the required assembler, disassembler, and simulator changes are
already available. The biggest changes here are in the
instruction-selector. ld1 and st1 only supports no-offset or post-index
addressing, so we have to do our own addition (base + index) to
construction the actual memory address to load/store from.

Bug: v8:10975
Change-Id: I026e3075003ff5dece7cd1a590894b09e2e823db
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2558268
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71551}
parent 5a2390b6
......@@ -2666,6 +2666,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ ld1r(i.OutputSimd128Register().Format(f), i.MemoryOperand(0));
break;
}
case kArm64LoadLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
VectorFormat f = VectorFormatFillQ(MiscField::decode(opcode));
int laneidx = i.InputInt8(1);
__ ld1(i.OutputSimd128Register().Format(f), laneidx, i.MemoryOperand(2));
break;
}
case kArm64StoreLane: {
VectorFormat f = VectorFormatFillQ(MiscField::decode(opcode));
int laneidx = i.InputInt8(1);
__ st1(i.InputSimd128Register(0).Format(f), laneidx, i.MemoryOperand(2));
break;
}
case kArm64S128Load8x8S: {
__ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
__ Sxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
......
......@@ -371,6 +371,8 @@ namespace compiler {
V(Arm64V16x8AllTrue) \
V(Arm64V8x16AllTrue) \
V(Arm64LoadSplat) \
V(Arm64LoadLane) \
V(Arm64StoreLane) \
V(Arm64S128Load8x8S) \
V(Arm64S128Load8x8U) \
V(Arm64S128Load16x4S) \
......
......@@ -360,6 +360,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64LdrDecompressAnyTagged:
case kArm64Peek:
case kArm64LoadSplat:
case kArm64LoadLane:
case kArm64S128Load8x8S:
case kArm64S128Load8x8U:
case kArm64S128Load16x4S:
......@@ -384,6 +385,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64DmbIsh:
case kArm64DsbIsb:
case kArm64Prfm:
case kArm64StoreLane:
return kHasSideEffect;
case kArm64Word64AtomicLoadUint8:
......
......@@ -5,7 +5,10 @@
#include "src/base/bits.h"
#include "src/base/platform/wrappers.h"
#include "src/codegen/assembler-inl.h"
#include "src/codegen/machine-type.h"
#include "src/common/globals.h"
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/machine-operator.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h"
......@@ -637,6 +640,57 @@ void InstructionSelector::VisitPrefetchNonTemporal(Node* node) {
Emit(opcode, 0, nullptr, 2, inputs);
}
namespace {
// Manually add base and index into a register to get the actual address.
// This should be used prior to instructions that only support
// immediate/post-index addressing, like ld1 and st1.
InstructionOperand EmitAddBeforeLoadOrStore(InstructionSelector* selector,
Node* node,
InstructionCode* opcode) {
Arm64OperandGenerator g(selector);
InstructionOperand addr = g.TempRegister();
selector->Emit(kArm64Add, addr, g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)));
*opcode |= AddressingModeField::encode(kMode_MRI);
return addr;
}
} // namespace
void InstructionSelector::VisitLoadLane(Node* node) {
LoadLaneParameters params = LoadLaneParametersOf(node->op());
DCHECK(
params.rep == MachineType::Int8() || params.rep == MachineType::Int16() ||
params.rep == MachineType::Int32() || params.rep == MachineType::Int64());
InstructionCode opcode = kArm64LoadLane;
opcode |= MiscField::encode(params.rep.MemSize() * kBitsPerByte);
Arm64OperandGenerator g(this);
InstructionOperand addr = EmitAddBeforeLoadOrStore(this, node, &opcode);
Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(2)),
g.UseImmediate(params.laneidx), addr, g.TempImmediate(0));
}
void InstructionSelector::VisitStoreLane(Node* node) {
StoreLaneParameters params = StoreLaneParametersOf(node->op());
DCHECK_LE(MachineRepresentation::kWord8, params.rep);
DCHECK_GE(MachineRepresentation::kWord64, params.rep);
InstructionCode opcode = kArm64StoreLane;
opcode |= MiscField::encode(ElementSizeInBytes(params.rep) * kBitsPerByte);
Arm64OperandGenerator g(this);
InstructionOperand addr = EmitAddBeforeLoadOrStore(this, node, &opcode);
InstructionOperand inputs[4] = {
g.UseRegister(node->InputAt(2)),
g.UseImmediate(params.laneidx),
addr,
g.TempImmediate(0),
};
Emit(opcode, 0, nullptr, 4, inputs);
}
void InstructionSelector::VisitLoadTransform(Node* node) {
LoadTransformParameters params = LoadTransformParametersOf(node->op());
InstructionCode opcode = kArchNop;
......@@ -705,9 +759,7 @@ void InstructionSelector::VisitLoadTransform(Node* node) {
if (require_add) {
// ld1r uses post-index, so construct address first.
// TODO(v8:9886) If index can be immediate, use vldr without this add.
InstructionOperand addr = g.TempRegister();
Emit(kArm64Add, 1, &addr, 2, inputs);
inputs[0] = addr;
inputs[0] = EmitAddBeforeLoadOrStore(this, node, &opcode);
inputs[1] = g.TempImmediate(0);
opcode |= AddressingModeField::encode(kMode_MRI);
} else {
......
......@@ -2820,11 +2820,11 @@ void InstructionSelector::VisitPrefetchNonTemporal(Node* node) {
}
#endif // !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
// TODO(v8:10975): Prototyping load lane and store lane.
void InstructionSelector::VisitLoadLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitStoreLane(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_X64
// TODO(v8:10983) Prototyping sign select.
......
......@@ -3974,7 +3974,7 @@ WASM_SIMD_TEST(S128Load64Zero) {
RunLoadZeroTest<int64_t>(execution_tier, lower_simd, kExprS128Load64Zero);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
// TODO(v8:10975): Prototyping load lane and store lane.
template <typename T>
void RunLoadLaneTest(TestExecutionTier execution_tier, LowerSimd lower_simd,
......@@ -4179,7 +4179,7 @@ WASM_SIMD_TEST_NO_LOWERING(S128Store64Lane) {
kExprI64x2Splat);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
#define WASM_SIMD_ANYTRUE_TEST(format, lanes, max, param_type) \
WASM_SIMD_TEST(S##format##AnyTrue) { \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment