Commit 99e252ba authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Merge extract lane ops into pinsr{b,w,d,q}

The only one that doesn't use a pinsr* is f32x4, which uses insertps, so
that is kept as it is.

Bug: v8:10933
Change-Id: I7442668812c674d4242949e13ef595978290bc8d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2458787Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70493}
parent d2ab873d
...@@ -647,6 +647,26 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, ...@@ -647,6 +647,26 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
} \ } \
} while (false) } while (false)
#define ASSEMBLE_PINSR(ASM_INSTR) \
do { \
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
XMMRegister dst = i.OutputSimd128Register(); \
XMMRegister src = i.InputSimd128Register(0); \
uint8_t laneidx = i.InputUint8(1); \
if (HasAddressingMode(instr)) { \
__ ASM_INSTR(dst, src, i.MemoryOperand(2), laneidx); \
break; \
} \
if (instr->InputAt(2)->IsFPRegister()) { \
__ Movq(kScratchRegister, i.InputDoubleRegister(2)); \
__ ASM_INSTR(dst, src, kScratchRegister, laneidx); \
} else if (instr->InputAt(2)->IsRegister()) { \
__ ASM_INSTR(dst, src, i.InputRegister(2), laneidx); \
} else { \
__ ASM_INSTR(dst, src, i.InputOperand(2), laneidx); \
} \
} while (false)
void CodeGenerator::AssembleDeconstructFrame() { void CodeGenerator::AssembleDeconstructFrame() {
unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset()); unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
__ movq(rsp, rbp); __ movq(rsp, rbp);
...@@ -2354,16 +2374,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2354,16 +2374,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
break; break;
} }
case kX64F64x2ReplaceLane: {
if (instr->InputAt(2)->IsFPRegister()) {
__ Movq(kScratchRegister, i.InputDoubleRegister(2));
__ Pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputUint8(1));
} else {
__ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2),
i.InputUint8(1));
}
break;
}
case kX64F64x2ExtractLane: { case kX64F64x2ExtractLane: {
__ Pextrq(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1)); __ Pextrq(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
__ Movq(i.OutputDoubleRegister(), kScratchRegister); __ Movq(i.OutputDoubleRegister(), kScratchRegister);
...@@ -2718,16 +2728,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2718,16 +2728,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1)); __ Pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
break; break;
} }
case kX64I64x2ReplaceLane: {
if (HasRegisterInput(instr, 2)) {
__ Pinsrq(i.OutputSimd128Register(), i.InputRegister(2),
i.InputUint8(1));
} else {
__ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2),
i.InputUint8(1));
}
break;
}
case kX64I64x2Neg: { case kX64I64x2Neg: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0); XMMRegister src = i.InputSimd128Register(0);
...@@ -2826,16 +2826,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2826,16 +2826,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1)); __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
break; break;
} }
case kX64I32x4ReplaceLane: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (HasRegisterInput(instr, 2)) {
__ Pinsrd(dst, src, i.InputRegister(2), i.InputInt8(1));
} else {
__ Pinsrd(dst, src, i.InputOperand(2), i.InputInt8(1));
}
break;
}
case kX64I32x4SConvertF32x4: { case kX64I32x4SConvertF32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
...@@ -3056,16 +3046,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3056,16 +3046,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ movsxwl(dst, dst); __ movsxwl(dst, dst);
break; break;
} }
case kX64I16x8ReplaceLane: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (HasRegisterInput(instr, 2)) {
__ Pinsrw(dst, src, i.InputRegister(2), i.InputInt8(1));
} else {
__ Pinsrw(dst, src, i.InputOperand(2), i.InputInt8(1));
}
break;
}
case kX64I16x8SConvertI8x16Low: { case kX64I16x8SConvertI8x16Low: {
__ Pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ Pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
break; break;
...@@ -3248,52 +3228,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3248,52 +3228,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ movsxbl(dst, dst); __ movsxbl(dst, dst);
break; break;
} }
case kX64I8x16ReplaceLane: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (HasRegisterInput(instr, 2)) {
__ Pinsrb(dst, src, i.InputRegister(2), i.InputInt8(1));
} else {
__ Pinsrb(dst, src, i.InputOperand(2), i.InputInt8(1));
}
break;
}
case kX64Pinsrb: { case kX64Pinsrb: {
// TODO(zhin): consolidate this opcode with the other usages, like ASSEMBLE_PINSR(Pinsrb);
// ReplaceLane, by implementing support when this has no addressing mode.
DCHECK(HasAddressingMode(instr));
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t offset = 0;
Operand mem = i.MemoryOperand(&offset);
__ Pinsrb(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
mem, i.InputUint8(offset));
break; break;
} }
case kX64Pinsrw: { case kX64Pinsrw: {
DCHECK(HasAddressingMode(instr)); ASSEMBLE_PINSR(Pinsrw);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t offset = 0;
Operand mem = i.MemoryOperand(&offset);
__ Pinsrw(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
mem, i.InputUint8(offset));
break; break;
} }
case kX64Pinsrd: { case kX64Pinsrd: {
DCHECK(HasAddressingMode(instr)); ASSEMBLE_PINSR(Pinsrd);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t offset = 0;
Operand mem = i.MemoryOperand(&offset);
__ Pinsrd(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
mem, i.InputUint8(offset));
break; break;
} }
case kX64Pinsrq: { case kX64Pinsrq: {
DCHECK(HasAddressingMode(instr)); ASSEMBLE_PINSR(Pinsrq);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t offset = 0;
Operand mem = i.MemoryOperand(&offset);
__ Pinsrq(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
mem, i.InputUint8(offset));
break; break;
} }
case kX64I8x16SConvertI16x8: { case kX64I8x16SConvertI16x8: {
......
...@@ -156,7 +156,6 @@ namespace compiler { ...@@ -156,7 +156,6 @@ namespace compiler {
V(X64Peek) \ V(X64Peek) \
V(X64F64x2Splat) \ V(X64F64x2Splat) \
V(X64F64x2ExtractLane) \ V(X64F64x2ExtractLane) \
V(X64F64x2ReplaceLane) \
V(X64F64x2Abs) \ V(X64F64x2Abs) \
V(X64F64x2Neg) \ V(X64F64x2Neg) \
V(X64F64x2Sqrt) \ V(X64F64x2Sqrt) \
...@@ -203,7 +202,6 @@ namespace compiler { ...@@ -203,7 +202,6 @@ namespace compiler {
V(X64F32x4Round) \ V(X64F32x4Round) \
V(X64I64x2Splat) \ V(X64I64x2Splat) \
V(X64I64x2ExtractLane) \ V(X64I64x2ExtractLane) \
V(X64I64x2ReplaceLane) \
V(X64I64x2Neg) \ V(X64I64x2Neg) \
V(X64I64x2BitMask) \ V(X64I64x2BitMask) \
V(X64I64x2Shl) \ V(X64I64x2Shl) \
...@@ -215,7 +213,6 @@ namespace compiler { ...@@ -215,7 +213,6 @@ namespace compiler {
V(X64I64x2ShrU) \ V(X64I64x2ShrU) \
V(X64I32x4Splat) \ V(X64I32x4Splat) \
V(X64I32x4ExtractLane) \ V(X64I32x4ExtractLane) \
V(X64I32x4ReplaceLane) \
V(X64I32x4SConvertF32x4) \ V(X64I32x4SConvertF32x4) \
V(X64I32x4SConvertI16x8Low) \ V(X64I32x4SConvertI16x8Low) \
V(X64I32x4SConvertI16x8High) \ V(X64I32x4SConvertI16x8High) \
...@@ -246,7 +243,6 @@ namespace compiler { ...@@ -246,7 +243,6 @@ namespace compiler {
V(X64I16x8Splat) \ V(X64I16x8Splat) \
V(X64I16x8ExtractLaneU) \ V(X64I16x8ExtractLaneU) \
V(X64I16x8ExtractLaneS) \ V(X64I16x8ExtractLaneS) \
V(X64I16x8ReplaceLane) \
V(X64I16x8SConvertI8x16Low) \ V(X64I16x8SConvertI8x16Low) \
V(X64I16x8SConvertI8x16High) \ V(X64I16x8SConvertI8x16High) \
V(X64I16x8Neg) \ V(X64I16x8Neg) \
...@@ -281,7 +277,6 @@ namespace compiler { ...@@ -281,7 +277,6 @@ namespace compiler {
V(X64I8x16Splat) \ V(X64I8x16Splat) \
V(X64I8x16ExtractLaneU) \ V(X64I8x16ExtractLaneU) \
V(X64I8x16ExtractLaneS) \ V(X64I8x16ExtractLaneS) \
V(X64I8x16ReplaceLane) \
V(X64Pinsrb) \ V(X64Pinsrb) \
V(X64Pinsrw) \ V(X64Pinsrw) \
V(X64Pinsrd) \ V(X64Pinsrd) \
......
...@@ -126,9 +126,12 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -126,9 +126,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64Lea: case kX64Lea:
case kX64Dec32: case kX64Dec32:
case kX64Inc32: case kX64Inc32:
case kX64Pinsrb:
case kX64Pinsrw:
case kX64Pinsrd:
case kX64Pinsrq:
case kX64F64x2Splat: case kX64F64x2Splat:
case kX64F64x2ExtractLane: case kX64F64x2ExtractLane:
case kX64F64x2ReplaceLane:
case kX64F64x2Abs: case kX64F64x2Abs:
case kX64F64x2Neg: case kX64F64x2Neg:
case kX64F64x2Sqrt: case kX64F64x2Sqrt:
...@@ -175,7 +178,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -175,7 +178,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4Round: case kX64F32x4Round:
case kX64I64x2Splat: case kX64I64x2Splat:
case kX64I64x2ExtractLane: case kX64I64x2ExtractLane:
case kX64I64x2ReplaceLane:
case kX64I64x2Neg: case kX64I64x2Neg:
case kX64I64x2BitMask: case kX64I64x2BitMask:
case kX64I64x2Shl: case kX64I64x2Shl:
...@@ -187,7 +189,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -187,7 +189,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I64x2ShrU: case kX64I64x2ShrU:
case kX64I32x4Splat: case kX64I32x4Splat:
case kX64I32x4ExtractLane: case kX64I32x4ExtractLane:
case kX64I32x4ReplaceLane:
case kX64I32x4SConvertF32x4: case kX64I32x4SConvertF32x4:
case kX64I32x4SConvertI16x8Low: case kX64I32x4SConvertI16x8Low:
case kX64I32x4SConvertI16x8High: case kX64I32x4SConvertI16x8High:
...@@ -218,7 +219,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -218,7 +219,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I16x8Splat: case kX64I16x8Splat:
case kX64I16x8ExtractLaneU: case kX64I16x8ExtractLaneU:
case kX64I16x8ExtractLaneS: case kX64I16x8ExtractLaneS:
case kX64I16x8ReplaceLane:
case kX64I16x8SConvertI8x16Low: case kX64I16x8SConvertI8x16Low:
case kX64I16x8SConvertI8x16High: case kX64I16x8SConvertI8x16High:
case kX64I16x8Neg: case kX64I16x8Neg:
...@@ -253,11 +253,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -253,11 +253,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I8x16Splat: case kX64I8x16Splat:
case kX64I8x16ExtractLaneU: case kX64I8x16ExtractLaneU:
case kX64I8x16ExtractLaneS: case kX64I8x16ExtractLaneS:
case kX64I8x16ReplaceLane:
case kX64Pinsrb:
case kX64Pinsrw:
case kX64Pinsrd:
case kX64Pinsrq:
case kX64I8x16SConvertI16x8: case kX64I8x16SConvertI16x8:
case kX64I8x16Neg: case kX64I8x16Neg:
case kX64I8x16Shl: case kX64I8x16Shl:
......
...@@ -361,17 +361,19 @@ void InstructionSelector::VisitLoadLane(Node* node) { ...@@ -361,17 +361,19 @@ void InstructionSelector::VisitLoadLane(Node* node) {
X64OperandGenerator g(this); X64OperandGenerator g(this);
InstructionOperand outputs[] = {g.DefineAsRegister(node)}; InstructionOperand outputs[] = {g.DefineAsRegister(node)};
// GetEffectiveAddressMemoryOperand uses up to 3 inputs, 4th is laneidx, 5th // Input 0 is value node, 1 is lane idx, and GetEffectiveAddressMemoryOperand
// is the value node. // uses up to 3 inputs. This ordering is consistent with other operations that
// use the same opcode.
InstructionOperand inputs[5]; InstructionOperand inputs[5];
size_t input_count = 0; size_t input_count = 0;
inputs[input_count++] = g.UseRegister(node->InputAt(2));
inputs[input_count++] = g.UseImmediate(params.laneidx);
AddressingMode mode = AddressingMode mode =
g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count); g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
opcode |= AddressingModeField::encode(mode); opcode |= AddressingModeField::encode(mode);
inputs[input_count++] = g.UseImmediate(params.laneidx);
inputs[input_count++] = g.UseRegister(node->InputAt(2));
DCHECK_GE(5, input_count); DCHECK_GE(5, input_count);
// x64 supports unaligned loads. // x64 supports unaligned loads.
...@@ -2963,15 +2965,31 @@ SIMD_VISIT_EXTRACT_LANE(I8x16, U) ...@@ -2963,15 +2965,31 @@ SIMD_VISIT_EXTRACT_LANE(I8x16, U)
SIMD_VISIT_EXTRACT_LANE(I8x16, S) SIMD_VISIT_EXTRACT_LANE(I8x16, S)
#undef SIMD_VISIT_EXTRACT_LANE #undef SIMD_VISIT_EXTRACT_LANE
#define VISIT_SIMD_REPLACE_LANE(Type) \ void InstructionSelector::VisitF32x4ReplaceLane(Node* node) {
void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \ X64OperandGenerator g(this);
X64OperandGenerator g(this); \ int32_t lane = OpParameter<int32_t>(node->op());
int32_t lane = OpParameter<int32_t>(node->op()); \ Emit(kX64F32x4ReplaceLane, g.DefineSameAsFirst(node),
Emit(kX64##Type##ReplaceLane, g.DefineSameAsFirst(node), \ g.UseRegister(node->InputAt(0)), g.UseImmediate(lane),
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane), \ g.Use(node->InputAt(1)));
g.Use(node->InputAt(1))); \ }
#define VISIT_SIMD_REPLACE_LANE(TYPE, OPCODE) \
void InstructionSelector::Visit##TYPE##ReplaceLane(Node* node) { \
X64OperandGenerator g(this); \
int32_t lane = OpParameter<int32_t>(node->op()); \
Emit(OPCODE, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)), \
g.UseImmediate(lane), g.Use(node->InputAt(1))); \
} }
SIMD_TYPES(VISIT_SIMD_REPLACE_LANE)
#define SIMD_TYPES_FOR_REPLACE_LANE(V) \
V(F64x2, kX64Pinsrq) \
V(I64x2, kX64Pinsrq) \
V(I32x4, kX64Pinsrd) \
V(I16x8, kX64Pinsrw) \
V(I8x16, kX64Pinsrb)
SIMD_TYPES_FOR_REPLACE_LANE(VISIT_SIMD_REPLACE_LANE)
#undef SIMD_TYPES_FOR_REPLACE_LANE
#undef VISIT_SIMD_REPLACE_LANE #undef VISIT_SIMD_REPLACE_LANE
#define VISIT_SIMD_SHIFT(Opcode) \ #define VISIT_SIMD_SHIFT(Opcode) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment