Commit affc3646 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

Reland "[wasm-simd][x64] Cleanup unnecessary CpuFeatureScope"

This relands commit df204288 with the
addtional fix to Haddps to enable SSE3 scope.

There are a couple more left in some i64x2 ops, but those are not in the
proposal, so I've left them as it is.

Bug: v8:9561
Change-Id: I88fd5f88809e629f53a65d67ea5b11562111b44a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2136067Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67091}
parent e652f8da
...@@ -228,7 +228,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -228,7 +228,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Rcpps, rcpps) AVX_OP(Rcpps, rcpps)
AVX_OP(Rsqrtps, rsqrtps) AVX_OP(Rsqrtps, rsqrtps)
AVX_OP(Addps, addps) AVX_OP(Addps, addps)
AVX_OP(Haddps, haddps)
AVX_OP(Subps, subps) AVX_OP(Subps, subps)
AVX_OP(Mulps, mulps) AVX_OP(Mulps, mulps)
AVX_OP(Divps, divps) AVX_OP(Divps, divps)
...@@ -249,6 +248,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -249,6 +248,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Cmpps, cmpps) AVX_OP(Cmpps, cmpps)
AVX_OP(Cmppd, cmppd) AVX_OP(Cmppd, cmppd)
AVX_OP(Movlhps, movlhps) AVX_OP(Movlhps, movlhps)
AVX_OP_SSE3(Haddps, haddps)
AVX_OP_SSE3(Movddup, movddup) AVX_OP_SSE3(Movddup, movddup)
AVX_OP_SSSE3(Phaddd, phaddd) AVX_OP_SSSE3(Phaddd, phaddd)
AVX_OP_SSSE3(Phaddw, phaddw) AVX_OP_SSSE3(Phaddw, phaddw)
......
...@@ -588,7 +588,6 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, ...@@ -588,7 +588,6 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
#define ASSEMBLE_SIMD_ALL_TRUE(opcode) \ #define ASSEMBLE_SIMD_ALL_TRUE(opcode) \
do { \ do { \
CpuFeatureScope sse_scope(tasm(), SSE4_1); \
Register dst = i.OutputRegister(); \ Register dst = i.OutputRegister(); \
XMMRegister tmp = i.TempSimd128Register(0); \ XMMRegister tmp = i.TempSimd128Register(0); \
__ xorq(dst, dst); \ __ xorq(dst, dst); \
...@@ -2278,7 +2277,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2278,7 +2277,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64F64x2Splat: { case kX64F64x2Splat: {
CpuFeatureScope sse_scope(tasm(), SSE3);
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsFPRegister()) { if (instr->InputAt(0)->IsFPRegister()) {
__ Movddup(dst, i.InputDoubleRegister(0)); __ Movddup(dst, i.InputDoubleRegister(0));
...@@ -2288,7 +2286,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2288,7 +2286,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64F64x2ReplaceLane: { case kX64F64x2ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (instr->InputAt(2)->IsFPRegister()) { if (instr->InputAt(2)->IsFPRegister()) {
__ Movq(kScratchRegister, i.InputDoubleRegister(2)); __ Movq(kScratchRegister, i.InputDoubleRegister(2));
__ Pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputInt8(1)); __ Pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputInt8(1));
...@@ -2298,7 +2295,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2298,7 +2295,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64F64x2ExtractLane: { case kX64F64x2ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ Pextrq(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1)); __ Pextrq(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
__ Movq(i.OutputDoubleRegister(), kScratchRegister); __ Movq(i.OutputDoubleRegister(), kScratchRegister);
break; break;
...@@ -2443,7 +2439,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2443,7 +2439,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64F32x4UConvertI32x4: { case kX64F32x4UConvertI32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg); DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
__ Pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros __ Pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
__ Pblendw(kScratchDoubleReg, dst, __ Pblendw(kScratchDoubleReg, dst,
...@@ -2504,7 +2499,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2504,7 +2499,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kX64F32x4AddHoriz: { case kX64F32x4AddHoriz: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE3);
__ Haddps(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
...@@ -2611,7 +2605,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2611,7 +2605,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64I64x2Splat: { case kX64I64x2Splat: {
CpuFeatureScope sse_scope(tasm(), SSE3);
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
if (HasRegisterInput(instr, 0)) { if (HasRegisterInput(instr, 0)) {
__ Movq(dst, i.InputRegister(0)); __ Movq(dst, i.InputRegister(0));
...@@ -2622,12 +2615,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2622,12 +2615,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64I64x2ExtractLane: { case kX64I64x2ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ Pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1)); __ Pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
break; break;
} }
case kX64I64x2ReplaceLane: { case kX64I64x2ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (HasRegisterInput(instr, 2)) { if (HasRegisterInput(instr, 2)) {
__ Pinsrq(i.OutputSimd128Register(), i.InputRegister(2), __ Pinsrq(i.OutputSimd128Register(), i.InputRegister(2),
i.InputInt8(1)); i.InputInt8(1));
...@@ -2654,7 +2645,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2654,7 +2645,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kX64I64x2ShrS: { case kX64I64x2ShrS: {
// TODO(zhin): there is vpsraq but requires AVX512 // TODO(zhin): there is vpsraq but requires AVX512
CpuFeatureScope sse_scope(tasm(), SSE4_1);
// ShrS on each quadword one at a time // ShrS on each quadword one at a time
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0); XMMRegister src = i.InputSimd128Register(0);
...@@ -2891,12 +2881,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2891,12 +2881,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64I32x4ExtractLane: { case kX64I32x4ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1)); __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
break; break;
} }
case kX64I32x4ReplaceLane: { case kX64I32x4ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (HasRegisterInput(instr, 2)) { if (HasRegisterInput(instr, 2)) {
__ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2), __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
i.InputInt8(1)); i.InputInt8(1));
...@@ -2925,7 +2913,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2925,7 +2913,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64I32x4SConvertI16x8Low: { case kX64I32x4SConvertI16x8Low: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ Pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ Pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
break; break;
} }
...@@ -2936,7 +2923,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2936,7 +2923,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64I32x4Neg: { case kX64I32x4Neg: {
CpuFeatureScope sse_scope(tasm(), SSSE3);
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0); XMMRegister src = i.InputSimd128Register(0);
if (dst == src) { if (dst == src) {
...@@ -2963,7 +2949,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2963,7 +2949,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64I32x4AddHoriz: { case kX64I32x4AddHoriz: {
CpuFeatureScope sse_scope(tasm(), SSSE3);
__ Phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
...@@ -2972,17 +2957,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2972,17 +2957,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64I32x4Mul: { case kX64I32x4Mul: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ Pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
case kX64I32x4MinS: { case kX64I32x4MinS: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ Pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
case kX64I32x4MaxS: { case kX64I32x4MaxS: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ Pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
...@@ -3002,7 +2984,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3002,7 +2984,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64I32x4GeS: { case kX64I32x4GeS: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1); XMMRegister src = i.InputSimd128Register(1);
__ Pminsd(dst, src); __ Pminsd(dst, src);
...@@ -3011,7 +2992,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3011,7 +2992,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kX64I32x4UConvertF32x4: { case kX64I32x4UConvertF32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
XMMRegister tmp = i.TempSimd128Register(0); XMMRegister tmp = i.TempSimd128Register(0);
XMMRegister tmp2 = i.TempSimd128Register(1); XMMRegister tmp2 = i.TempSimd128Register(1);
...@@ -3054,17 +3034,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3054,17 +3034,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64I32x4MinU: { case kX64I32x4MinU: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ Pminud(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
case kX64I32x4MaxU: { case kX64I32x4MaxU: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ Pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
case kX64I32x4GtU: { case kX64I32x4GtU: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1); XMMRegister src = i.InputSimd128Register(1);
XMMRegister tmp = i.TempSimd128Register(0); XMMRegister tmp = i.TempSimd128Register(0);
...@@ -3075,7 +3052,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3075,7 +3052,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64I32x4GeU: { case kX64I32x4GeU: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1); XMMRegister src = i.InputSimd128Register(1);
__ Pminud(dst, src); __ Pminud(dst, src);
...@@ -3699,37 +3675,31 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3699,37 +3675,31 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64I16x8Load8x8S: { case kX64I16x8Load8x8S: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand()); __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
break; break;
} }
case kX64I16x8Load8x8U: { case kX64I16x8Load8x8U: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand()); __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
break; break;
} }
case kX64I32x4Load16x4S: { case kX64I32x4Load16x4S: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand()); __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
break; break;
} }
case kX64I32x4Load16x4U: { case kX64I32x4Load16x4U: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand()); __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
break; break;
} }
case kX64I64x2Load32x2S: { case kX64I64x2Load32x2S: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand()); __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
break; break;
} }
case kX64I64x2Load32x2U: { case kX64I64x2Load32x2U: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand()); __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
break; break;
...@@ -3938,7 +3908,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3938,7 +3908,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64S1x4AnyTrue: case kX64S1x4AnyTrue:
case kX64S1x8AnyTrue: case kX64S1x8AnyTrue:
case kX64S1x16AnyTrue: { case kX64S1x16AnyTrue: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
Register dst = i.OutputRegister(); Register dst = i.OutputRegister();
XMMRegister src = i.InputSimd128Register(0); XMMRegister src = i.InputSimd128Register(0);
...@@ -3952,7 +3921,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3952,7 +3921,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1 // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
// respectively. // respectively.
case kX64S1x2AllTrue: { case kX64S1x2AllTrue: {
ASSEMBLE_SIMD_ALL_TRUE(pcmpeqq); ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqq);
break; break;
} }
case kX64S1x4AllTrue: { case kX64S1x4AllTrue: {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment