Commit 7b3bdb13 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm64] Consolidate opcodes for signed extend widening

- I32x4SConvertI16x8Low and I16x8SConvertI8x16Low becomes kArm64Sxtl
- I32x4SConvertI16x8High and I16x8SConvertI8x16High becomes kArm64Sxtl2
- I32x4UConvertI16x8Low and I16x8UConvertI8x16Low becomes kArm64Uxtl
- I32x4UConvertI16x8High and I16x8UConvertI8x16High becomes kArm64Uxtl2

This saves us 4 arch opcodes, and also later when we implement the i64x2
versions, we don't need to add any new opcodes.

Bug: v8:10930, v8:10972
Change-Id: I01f3ea78f0bc7de9026316379d9eefa18df3a0d1
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2441367
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70357}
parent a27f80f0
...@@ -1873,11 +1873,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1873,11 +1873,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Instr(i.OutputSimd128Register().V##FORMAT(), \ __ Instr(i.OutputSimd128Register().V##FORMAT(), \
i.InputSimd128Register(0).V##FORMAT()); \ i.InputSimd128Register(0).V##FORMAT()); \
break; break;
#define SIMD_WIDENING_UNOP_CASE(Op, Instr, WIDE, NARROW) \
case Op: \
__ Instr(i.OutputSimd128Register().V##WIDE(), \
i.InputSimd128Register(0).V##NARROW()); \
break;
#define SIMD_BINOP_CASE(Op, Instr, FORMAT) \ #define SIMD_BINOP_CASE(Op, Instr, FORMAT) \
case Op: \ case Op: \
__ Instr(i.OutputSimd128Register().V##FORMAT(), \ __ Instr(i.OutputSimd128Register().V##FORMAT(), \
...@@ -1893,6 +1888,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1893,6 +1888,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; \ break; \
} }
case kArm64Sxtl: {
VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
VectorFormat narrow = VectorFormatHalfWidth(wide);
__ Sxtl(i.OutputSimd128Register().Format(wide),
i.InputSimd128Register(0).Format(narrow));
break;
}
case kArm64Sxtl2: {
VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
VectorFormat narrow = VectorFormatHalfWidthDoubleLanes(wide);
__ Sxtl2(i.OutputSimd128Register().Format(wide),
i.InputSimd128Register(0).Format(narrow));
break;
}
case kArm64Uxtl: {
VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
VectorFormat narrow = VectorFormatHalfWidth(wide);
__ Uxtl(i.OutputSimd128Register().Format(wide),
i.InputSimd128Register(0).Format(narrow));
break;
}
case kArm64Uxtl2: {
VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
VectorFormat narrow = VectorFormatHalfWidthDoubleLanes(wide);
__ Uxtl2(i.OutputSimd128Register().Format(wide),
i.InputSimd128Register(0).Format(narrow));
break;
}
case kArm64F64x2Splat: { case kArm64F64x2Splat: {
__ Dup(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).D(), 0); __ Dup(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).D(), 0);
break; break;
...@@ -2157,8 +2180,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2157,8 +2180,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
SIMD_UNOP_CASE(kArm64I32x4SConvertF32x4, Fcvtzs, 4S); SIMD_UNOP_CASE(kArm64I32x4SConvertF32x4, Fcvtzs, 4S);
SIMD_WIDENING_UNOP_CASE(kArm64I32x4SConvertI16x8Low, Sxtl, 4S, 4H);
SIMD_WIDENING_UNOP_CASE(kArm64I32x4SConvertI16x8High, Sxtl2, 4S, 8H);
SIMD_UNOP_CASE(kArm64I32x4Neg, Neg, 4S); SIMD_UNOP_CASE(kArm64I32x4Neg, Neg, 4S);
case kArm64I32x4Shl: { case kArm64I32x4Shl: {
ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 5, V4S, Sshl, W); ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 5, V4S, Sshl, W);
...@@ -2187,8 +2208,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2187,8 +2208,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_BINOP_CASE(kArm64I32x4GtS, Cmgt, 4S); SIMD_BINOP_CASE(kArm64I32x4GtS, Cmgt, 4S);
SIMD_BINOP_CASE(kArm64I32x4GeS, Cmge, 4S); SIMD_BINOP_CASE(kArm64I32x4GeS, Cmge, 4S);
SIMD_UNOP_CASE(kArm64I32x4UConvertF32x4, Fcvtzu, 4S); SIMD_UNOP_CASE(kArm64I32x4UConvertF32x4, Fcvtzu, 4S);
SIMD_WIDENING_UNOP_CASE(kArm64I32x4UConvertI16x8Low, Uxtl, 4S, 4H);
SIMD_WIDENING_UNOP_CASE(kArm64I32x4UConvertI16x8High, Uxtl2, 4S, 8H);
case kArm64I32x4ShrU: { case kArm64I32x4ShrU: {
ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 5, V4S, Ushl, W); ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 5, V4S, Ushl, W);
break; break;
...@@ -2247,8 +2266,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2247,8 +2266,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Mov(dst, i.InputInt8(1), i.InputRegister32(2)); __ Mov(dst, i.InputInt8(1), i.InputRegister32(2));
break; break;
} }
SIMD_WIDENING_UNOP_CASE(kArm64I16x8SConvertI8x16Low, Sxtl, 8H, 8B);
SIMD_WIDENING_UNOP_CASE(kArm64I16x8SConvertI8x16High, Sxtl2, 8H, 16B);
SIMD_UNOP_CASE(kArm64I16x8Neg, Neg, 8H); SIMD_UNOP_CASE(kArm64I16x8Neg, Neg, 8H);
case kArm64I16x8Shl: { case kArm64I16x8Shl: {
ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 4, V8H, Sshl, W); ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 4, V8H, Sshl, W);
...@@ -2292,15 +2309,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2292,15 +2309,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
SIMD_BINOP_CASE(kArm64I16x8GtS, Cmgt, 8H); SIMD_BINOP_CASE(kArm64I16x8GtS, Cmgt, 8H);
SIMD_BINOP_CASE(kArm64I16x8GeS, Cmge, 8H); SIMD_BINOP_CASE(kArm64I16x8GeS, Cmge, 8H);
case kArm64I16x8UConvertI8x16Low: {
__ Uxtl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8B());
break;
}
case kArm64I16x8UConvertI8x16High: {
__ Uxtl2(i.OutputSimd128Register().V8H(),
i.InputSimd128Register(0).V16B());
break;
}
case kArm64I16x8ShrU: { case kArm64I16x8ShrU: {
ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 4, V8H, Ushl, W); ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 4, V8H, Ushl, W);
break; break;
...@@ -2653,7 +2661,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2653,7 +2661,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} // NOLINT(readability/fn_size) } // NOLINT(readability/fn_size)
#undef SIMD_UNOP_CASE #undef SIMD_UNOP_CASE
#undef SIMD_WIDENING_UNOP_CASE
#undef SIMD_BINOP_CASE #undef SIMD_BINOP_CASE
#undef SIMD_DESTRUCTIVE_BINOP_CASE #undef SIMD_DESTRUCTIVE_BINOP_CASE
#undef SIMD_REDUCE_OP_CASE #undef SIMD_REDUCE_OP_CASE
......
...@@ -168,6 +168,10 @@ namespace compiler { ...@@ -168,6 +168,10 @@ namespace compiler {
V(Arm64StrCompressTagged) \ V(Arm64StrCompressTagged) \
V(Arm64DmbIsh) \ V(Arm64DmbIsh) \
V(Arm64DsbIsb) \ V(Arm64DsbIsb) \
V(Arm64Sxtl) \
V(Arm64Sxtl2) \
V(Arm64Uxtl) \
V(Arm64Uxtl2) \
V(Arm64F64x2Splat) \ V(Arm64F64x2Splat) \
V(Arm64F64x2ExtractLane) \ V(Arm64F64x2ExtractLane) \
V(Arm64F64x2ReplaceLane) \ V(Arm64F64x2ReplaceLane) \
...@@ -233,8 +237,6 @@ namespace compiler { ...@@ -233,8 +237,6 @@ namespace compiler {
V(Arm64I32x4ExtractLane) \ V(Arm64I32x4ExtractLane) \
V(Arm64I32x4ReplaceLane) \ V(Arm64I32x4ReplaceLane) \
V(Arm64I32x4SConvertF32x4) \ V(Arm64I32x4SConvertF32x4) \
V(Arm64I32x4SConvertI16x8Low) \
V(Arm64I32x4SConvertI16x8High) \
V(Arm64I32x4Neg) \ V(Arm64I32x4Neg) \
V(Arm64I32x4Shl) \ V(Arm64I32x4Shl) \
V(Arm64I32x4ShrS) \ V(Arm64I32x4ShrS) \
...@@ -251,8 +253,6 @@ namespace compiler { ...@@ -251,8 +253,6 @@ namespace compiler {
V(Arm64I32x4GtS) \ V(Arm64I32x4GtS) \
V(Arm64I32x4GeS) \ V(Arm64I32x4GeS) \
V(Arm64I32x4UConvertF32x4) \ V(Arm64I32x4UConvertF32x4) \
V(Arm64I32x4UConvertI16x8Low) \
V(Arm64I32x4UConvertI16x8High) \
V(Arm64I32x4ShrU) \ V(Arm64I32x4ShrU) \
V(Arm64I32x4MinU) \ V(Arm64I32x4MinU) \
V(Arm64I32x4MaxU) \ V(Arm64I32x4MaxU) \
...@@ -265,8 +265,6 @@ namespace compiler { ...@@ -265,8 +265,6 @@ namespace compiler {
V(Arm64I16x8ExtractLaneU) \ V(Arm64I16x8ExtractLaneU) \
V(Arm64I16x8ExtractLaneS) \ V(Arm64I16x8ExtractLaneS) \
V(Arm64I16x8ReplaceLane) \ V(Arm64I16x8ReplaceLane) \
V(Arm64I16x8SConvertI8x16Low) \
V(Arm64I16x8SConvertI8x16High) \
V(Arm64I16x8Neg) \ V(Arm64I16x8Neg) \
V(Arm64I16x8Shl) \ V(Arm64I16x8Shl) \
V(Arm64I16x8ShrS) \ V(Arm64I16x8ShrS) \
...@@ -285,8 +283,6 @@ namespace compiler { ...@@ -285,8 +283,6 @@ namespace compiler {
V(Arm64I16x8Ne) \ V(Arm64I16x8Ne) \
V(Arm64I16x8GtS) \ V(Arm64I16x8GtS) \
V(Arm64I16x8GeS) \ V(Arm64I16x8GeS) \
V(Arm64I16x8UConvertI8x16Low) \
V(Arm64I16x8UConvertI8x16High) \
V(Arm64I16x8ShrU) \ V(Arm64I16x8ShrU) \
V(Arm64I16x8UConvertI32x4) \ V(Arm64I16x8UConvertI32x4) \
V(Arm64I16x8AddSaturateU) \ V(Arm64I16x8AddSaturateU) \
......
...@@ -203,8 +203,10 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -203,8 +203,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I32x4ExtractLane: case kArm64I32x4ExtractLane:
case kArm64I32x4ReplaceLane: case kArm64I32x4ReplaceLane:
case kArm64I32x4SConvertF32x4: case kArm64I32x4SConvertF32x4:
case kArm64I32x4SConvertI16x8Low: case kArm64Sxtl:
case kArm64I32x4SConvertI16x8High: case kArm64Sxtl2:
case kArm64Uxtl:
case kArm64Uxtl2:
case kArm64I32x4Neg: case kArm64I32x4Neg:
case kArm64I32x4Shl: case kArm64I32x4Shl:
case kArm64I32x4ShrS: case kArm64I32x4ShrS:
...@@ -221,8 +223,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -221,8 +223,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I32x4GtS: case kArm64I32x4GtS:
case kArm64I32x4GeS: case kArm64I32x4GeS:
case kArm64I32x4UConvertF32x4: case kArm64I32x4UConvertF32x4:
case kArm64I32x4UConvertI16x8Low:
case kArm64I32x4UConvertI16x8High:
case kArm64I32x4ShrU: case kArm64I32x4ShrU:
case kArm64I32x4MinU: case kArm64I32x4MinU:
case kArm64I32x4MaxU: case kArm64I32x4MaxU:
...@@ -235,8 +235,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -235,8 +235,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I16x8ExtractLaneU: case kArm64I16x8ExtractLaneU:
case kArm64I16x8ExtractLaneS: case kArm64I16x8ExtractLaneS:
case kArm64I16x8ReplaceLane: case kArm64I16x8ReplaceLane:
case kArm64I16x8SConvertI8x16Low:
case kArm64I16x8SConvertI8x16High:
case kArm64I16x8Neg: case kArm64I16x8Neg:
case kArm64I16x8Shl: case kArm64I16x8Shl:
case kArm64I16x8ShrS: case kArm64I16x8ShrS:
...@@ -255,8 +253,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -255,8 +253,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I16x8Ne: case kArm64I16x8Ne:
case kArm64I16x8GtS: case kArm64I16x8GtS:
case kArm64I16x8GeS: case kArm64I16x8GeS:
case kArm64I16x8UConvertI8x16Low:
case kArm64I16x8UConvertI8x16High:
case kArm64I16x8ShrU: case kArm64I16x8ShrU:
case kArm64I16x8UConvertI32x4: case kArm64I16x8UConvertI32x4:
case kArm64I16x8AddSaturateU: case kArm64I16x8AddSaturateU:
......
...@@ -144,6 +144,13 @@ void VisitRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) { ...@@ -144,6 +144,13 @@ void VisitRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
g.UseRegister(node->InputAt(0))); g.UseRegister(node->InputAt(0)));
} }
void VisitRR(InstructionSelector* selector, InstructionCode opcode,
Node* node) {
Arm64OperandGenerator g(selector);
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)));
}
void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) { void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
Arm64OperandGenerator g(selector); Arm64OperandGenerator g(selector);
selector->Emit(opcode, g.DefineAsRegister(node), selector->Emit(opcode, g.DefineAsRegister(node),
...@@ -3223,18 +3230,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { ...@@ -3223,18 +3230,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(F32x4RecipSqrtApprox, kArm64F32x4RecipSqrtApprox) \ V(F32x4RecipSqrtApprox, kArm64F32x4RecipSqrtApprox) \
V(I64x2Neg, kArm64I64x2Neg) \ V(I64x2Neg, kArm64I64x2Neg) \
V(I32x4SConvertF32x4, kArm64I32x4SConvertF32x4) \ V(I32x4SConvertF32x4, kArm64I32x4SConvertF32x4) \
V(I32x4SConvertI16x8Low, kArm64I32x4SConvertI16x8Low) \
V(I32x4SConvertI16x8High, kArm64I32x4SConvertI16x8High) \
V(I32x4Neg, kArm64I32x4Neg) \ V(I32x4Neg, kArm64I32x4Neg) \
V(I32x4UConvertF32x4, kArm64I32x4UConvertF32x4) \ V(I32x4UConvertF32x4, kArm64I32x4UConvertF32x4) \
V(I32x4UConvertI16x8Low, kArm64I32x4UConvertI16x8Low) \
V(I32x4UConvertI16x8High, kArm64I32x4UConvertI16x8High) \
V(I32x4Abs, kArm64I32x4Abs) \ V(I32x4Abs, kArm64I32x4Abs) \
V(I16x8SConvertI8x16Low, kArm64I16x8SConvertI8x16Low) \
V(I16x8SConvertI8x16High, kArm64I16x8SConvertI8x16High) \
V(I16x8Neg, kArm64I16x8Neg) \ V(I16x8Neg, kArm64I16x8Neg) \
V(I16x8UConvertI8x16Low, kArm64I16x8UConvertI8x16Low) \
V(I16x8UConvertI8x16High, kArm64I16x8UConvertI8x16High) \
V(I16x8Abs, kArm64I16x8Abs) \ V(I16x8Abs, kArm64I16x8Abs) \
V(I8x16Neg, kArm64I8x16Neg) \ V(I8x16Neg, kArm64I8x16Neg) \
V(I8x16Abs, kArm64I8x16Abs) \ V(I8x16Abs, kArm64I8x16Abs) \
...@@ -3716,6 +3715,47 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) { ...@@ -3716,6 +3715,47 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) {
VisitPminOrPmax(this, kArm64F64x2Pmax, node); VisitPminOrPmax(this, kArm64F64x2Pmax, node);
} }
namespace {
void VisitSignExtendLong(InstructionSelector* selector, ArchOpcode opcode,
Node* node, int lane_size) {
InstructionCode code = opcode;
code |= MiscField::encode(lane_size);
VisitRR(selector, code, node);
}
} // namespace
void InstructionSelector::VisitI32x4SConvertI16x8Low(Node* node) {
VisitSignExtendLong(this, kArm64Sxtl, node, 32);
}
void InstructionSelector::VisitI32x4SConvertI16x8High(Node* node) {
VisitSignExtendLong(this, kArm64Sxtl2, node, 32);
}
void InstructionSelector::VisitI32x4UConvertI16x8Low(Node* node) {
VisitSignExtendLong(this, kArm64Uxtl, node, 32);
}
void InstructionSelector::VisitI32x4UConvertI16x8High(Node* node) {
VisitSignExtendLong(this, kArm64Uxtl2, node, 32);
}
void InstructionSelector::VisitI16x8SConvertI8x16Low(Node* node) {
VisitSignExtendLong(this, kArm64Sxtl, node, 16);
}
void InstructionSelector::VisitI16x8SConvertI8x16High(Node* node) {
VisitSignExtendLong(this, kArm64Sxtl2, node, 16);
}
void InstructionSelector::VisitI16x8UConvertI8x16Low(Node* node) {
VisitSignExtendLong(this, kArm64Uxtl, node, 16);
}
void InstructionSelector::VisitI16x8UConvertI8x16High(Node* node) {
VisitSignExtendLong(this, kArm64Uxtl2, node, 16);
}
// static // static
MachineOperatorBuilder::Flags MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() { InstructionSelector::SupportedMachineOperatorFlags() {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment