Commit 7b3bdb13 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm64] Consolidate opcodes for signed extend widening

- I32x4SConvertI16x8Low and I16x8SConvertI8x16Low becomes kArm64Sxtl
- I32x4SConvertI16x8High and I16x8SConvertI8x16High becomes kArm64Sxtl2
- I32x4UConvertI16x8Low and I16x8UConvertI8x16Low becomes kArm64Uxtl
- I32x4UConvertI16x8High and I16x8UConvertI8x16High becomes kArm64Uxtl2

This saves us 4 arch opcodes, and also later when we implement the i64x2
versions, we don't need to add any new opcodes.

Bug: v8:10930, v8:10972
Change-Id: I01f3ea78f0bc7de9026316379d9eefa18df3a0d1
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2441367
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70357}
parent a27f80f0
......@@ -1873,11 +1873,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Instr(i.OutputSimd128Register().V##FORMAT(), \
i.InputSimd128Register(0).V##FORMAT()); \
break;
#define SIMD_WIDENING_UNOP_CASE(Op, Instr, WIDE, NARROW) \
case Op: \
__ Instr(i.OutputSimd128Register().V##WIDE(), \
i.InputSimd128Register(0).V##NARROW()); \
break;
#define SIMD_BINOP_CASE(Op, Instr, FORMAT) \
case Op: \
__ Instr(i.OutputSimd128Register().V##FORMAT(), \
......@@ -1893,6 +1888,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; \
}
case kArm64Sxtl: {
VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
VectorFormat narrow = VectorFormatHalfWidth(wide);
__ Sxtl(i.OutputSimd128Register().Format(wide),
i.InputSimd128Register(0).Format(narrow));
break;
}
case kArm64Sxtl2: {
VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
VectorFormat narrow = VectorFormatHalfWidthDoubleLanes(wide);
__ Sxtl2(i.OutputSimd128Register().Format(wide),
i.InputSimd128Register(0).Format(narrow));
break;
}
case kArm64Uxtl: {
VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
VectorFormat narrow = VectorFormatHalfWidth(wide);
__ Uxtl(i.OutputSimd128Register().Format(wide),
i.InputSimd128Register(0).Format(narrow));
break;
}
case kArm64Uxtl2: {
VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
VectorFormat narrow = VectorFormatHalfWidthDoubleLanes(wide);
__ Uxtl2(i.OutputSimd128Register().Format(wide),
i.InputSimd128Register(0).Format(narrow));
break;
}
case kArm64F64x2Splat: {
__ Dup(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).D(), 0);
break;
......@@ -2157,8 +2180,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
SIMD_UNOP_CASE(kArm64I32x4SConvertF32x4, Fcvtzs, 4S);
SIMD_WIDENING_UNOP_CASE(kArm64I32x4SConvertI16x8Low, Sxtl, 4S, 4H);
SIMD_WIDENING_UNOP_CASE(kArm64I32x4SConvertI16x8High, Sxtl2, 4S, 8H);
SIMD_UNOP_CASE(kArm64I32x4Neg, Neg, 4S);
case kArm64I32x4Shl: {
ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 5, V4S, Sshl, W);
......@@ -2187,8 +2208,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_BINOP_CASE(kArm64I32x4GtS, Cmgt, 4S);
SIMD_BINOP_CASE(kArm64I32x4GeS, Cmge, 4S);
SIMD_UNOP_CASE(kArm64I32x4UConvertF32x4, Fcvtzu, 4S);
SIMD_WIDENING_UNOP_CASE(kArm64I32x4UConvertI16x8Low, Uxtl, 4S, 4H);
SIMD_WIDENING_UNOP_CASE(kArm64I32x4UConvertI16x8High, Uxtl2, 4S, 8H);
case kArm64I32x4ShrU: {
ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 5, V4S, Ushl, W);
break;
......@@ -2247,8 +2266,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Mov(dst, i.InputInt8(1), i.InputRegister32(2));
break;
}
SIMD_WIDENING_UNOP_CASE(kArm64I16x8SConvertI8x16Low, Sxtl, 8H, 8B);
SIMD_WIDENING_UNOP_CASE(kArm64I16x8SConvertI8x16High, Sxtl2, 8H, 16B);
SIMD_UNOP_CASE(kArm64I16x8Neg, Neg, 8H);
case kArm64I16x8Shl: {
ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 4, V8H, Sshl, W);
......@@ -2292,15 +2309,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
SIMD_BINOP_CASE(kArm64I16x8GtS, Cmgt, 8H);
SIMD_BINOP_CASE(kArm64I16x8GeS, Cmge, 8H);
case kArm64I16x8UConvertI8x16Low: {
__ Uxtl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8B());
break;
}
case kArm64I16x8UConvertI8x16High: {
__ Uxtl2(i.OutputSimd128Register().V8H(),
i.InputSimd128Register(0).V16B());
break;
}
case kArm64I16x8ShrU: {
ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 4, V8H, Ushl, W);
break;
......@@ -2653,7 +2661,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} // NOLINT(readability/fn_size)
#undef SIMD_UNOP_CASE
#undef SIMD_WIDENING_UNOP_CASE
#undef SIMD_BINOP_CASE
#undef SIMD_DESTRUCTIVE_BINOP_CASE
#undef SIMD_REDUCE_OP_CASE
......
......@@ -168,6 +168,10 @@ namespace compiler {
V(Arm64StrCompressTagged) \
V(Arm64DmbIsh) \
V(Arm64DsbIsb) \
V(Arm64Sxtl) \
V(Arm64Sxtl2) \
V(Arm64Uxtl) \
V(Arm64Uxtl2) \
V(Arm64F64x2Splat) \
V(Arm64F64x2ExtractLane) \
V(Arm64F64x2ReplaceLane) \
......@@ -233,8 +237,6 @@ namespace compiler {
V(Arm64I32x4ExtractLane) \
V(Arm64I32x4ReplaceLane) \
V(Arm64I32x4SConvertF32x4) \
V(Arm64I32x4SConvertI16x8Low) \
V(Arm64I32x4SConvertI16x8High) \
V(Arm64I32x4Neg) \
V(Arm64I32x4Shl) \
V(Arm64I32x4ShrS) \
......@@ -251,8 +253,6 @@ namespace compiler {
V(Arm64I32x4GtS) \
V(Arm64I32x4GeS) \
V(Arm64I32x4UConvertF32x4) \
V(Arm64I32x4UConvertI16x8Low) \
V(Arm64I32x4UConvertI16x8High) \
V(Arm64I32x4ShrU) \
V(Arm64I32x4MinU) \
V(Arm64I32x4MaxU) \
......@@ -265,8 +265,6 @@ namespace compiler {
V(Arm64I16x8ExtractLaneU) \
V(Arm64I16x8ExtractLaneS) \
V(Arm64I16x8ReplaceLane) \
V(Arm64I16x8SConvertI8x16Low) \
V(Arm64I16x8SConvertI8x16High) \
V(Arm64I16x8Neg) \
V(Arm64I16x8Shl) \
V(Arm64I16x8ShrS) \
......@@ -285,8 +283,6 @@ namespace compiler {
V(Arm64I16x8Ne) \
V(Arm64I16x8GtS) \
V(Arm64I16x8GeS) \
V(Arm64I16x8UConvertI8x16Low) \
V(Arm64I16x8UConvertI8x16High) \
V(Arm64I16x8ShrU) \
V(Arm64I16x8UConvertI32x4) \
V(Arm64I16x8AddSaturateU) \
......
......@@ -203,8 +203,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I32x4ExtractLane:
case kArm64I32x4ReplaceLane:
case kArm64I32x4SConvertF32x4:
case kArm64I32x4SConvertI16x8Low:
case kArm64I32x4SConvertI16x8High:
case kArm64Sxtl:
case kArm64Sxtl2:
case kArm64Uxtl:
case kArm64Uxtl2:
case kArm64I32x4Neg:
case kArm64I32x4Shl:
case kArm64I32x4ShrS:
......@@ -221,8 +223,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I32x4GtS:
case kArm64I32x4GeS:
case kArm64I32x4UConvertF32x4:
case kArm64I32x4UConvertI16x8Low:
case kArm64I32x4UConvertI16x8High:
case kArm64I32x4ShrU:
case kArm64I32x4MinU:
case kArm64I32x4MaxU:
......@@ -235,8 +235,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I16x8ExtractLaneU:
case kArm64I16x8ExtractLaneS:
case kArm64I16x8ReplaceLane:
case kArm64I16x8SConvertI8x16Low:
case kArm64I16x8SConvertI8x16High:
case kArm64I16x8Neg:
case kArm64I16x8Shl:
case kArm64I16x8ShrS:
......@@ -255,8 +253,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I16x8Ne:
case kArm64I16x8GtS:
case kArm64I16x8GeS:
case kArm64I16x8UConvertI8x16Low:
case kArm64I16x8UConvertI8x16High:
case kArm64I16x8ShrU:
case kArm64I16x8UConvertI32x4:
case kArm64I16x8AddSaturateU:
......
......@@ -144,6 +144,13 @@ void VisitRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
g.UseRegister(node->InputAt(0)));
}
void VisitRR(InstructionSelector* selector, InstructionCode opcode,
Node* node) {
Arm64OperandGenerator g(selector);
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)));
}
void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
Arm64OperandGenerator g(selector);
selector->Emit(opcode, g.DefineAsRegister(node),
......@@ -3223,18 +3230,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(F32x4RecipSqrtApprox, kArm64F32x4RecipSqrtApprox) \
V(I64x2Neg, kArm64I64x2Neg) \
V(I32x4SConvertF32x4, kArm64I32x4SConvertF32x4) \
V(I32x4SConvertI16x8Low, kArm64I32x4SConvertI16x8Low) \
V(I32x4SConvertI16x8High, kArm64I32x4SConvertI16x8High) \
V(I32x4Neg, kArm64I32x4Neg) \
V(I32x4UConvertF32x4, kArm64I32x4UConvertF32x4) \
V(I32x4UConvertI16x8Low, kArm64I32x4UConvertI16x8Low) \
V(I32x4UConvertI16x8High, kArm64I32x4UConvertI16x8High) \
V(I32x4Abs, kArm64I32x4Abs) \
V(I16x8SConvertI8x16Low, kArm64I16x8SConvertI8x16Low) \
V(I16x8SConvertI8x16High, kArm64I16x8SConvertI8x16High) \
V(I16x8Neg, kArm64I16x8Neg) \
V(I16x8UConvertI8x16Low, kArm64I16x8UConvertI8x16Low) \
V(I16x8UConvertI8x16High, kArm64I16x8UConvertI8x16High) \
V(I16x8Abs, kArm64I16x8Abs) \
V(I8x16Neg, kArm64I8x16Neg) \
V(I8x16Abs, kArm64I8x16Abs) \
......@@ -3716,6 +3715,47 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) {
VisitPminOrPmax(this, kArm64F64x2Pmax, node);
}
namespace {
void VisitSignExtendLong(InstructionSelector* selector, ArchOpcode opcode,
Node* node, int lane_size) {
InstructionCode code = opcode;
code |= MiscField::encode(lane_size);
VisitRR(selector, code, node);
}
} // namespace
void InstructionSelector::VisitI32x4SConvertI16x8Low(Node* node) {
VisitSignExtendLong(this, kArm64Sxtl, node, 32);
}
void InstructionSelector::VisitI32x4SConvertI16x8High(Node* node) {
VisitSignExtendLong(this, kArm64Sxtl2, node, 32);
}
void InstructionSelector::VisitI32x4UConvertI16x8Low(Node* node) {
VisitSignExtendLong(this, kArm64Uxtl, node, 32);
}
void InstructionSelector::VisitI32x4UConvertI16x8High(Node* node) {
VisitSignExtendLong(this, kArm64Uxtl2, node, 32);
}
void InstructionSelector::VisitI16x8SConvertI8x16Low(Node* node) {
VisitSignExtendLong(this, kArm64Sxtl, node, 16);
}
void InstructionSelector::VisitI16x8SConvertI8x16High(Node* node) {
VisitSignExtendLong(this, kArm64Sxtl2, node, 16);
}
void InstructionSelector::VisitI16x8UConvertI8x16Low(Node* node) {
VisitSignExtendLong(this, kArm64Uxtl, node, 16);
}
void InstructionSelector::VisitI16x8UConvertI8x16High(Node* node) {
VisitSignExtendLong(this, kArm64Uxtl2, node, 16);
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment