Commit 44d3ae70 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

Reland "[wasm-simd][x64][ia32] Do not overwrite input register"

This relands commit 7d955faa.

Changed the test case to use i16x8 splat instead of i8x16 splat,
the latter was causing issues when doing scalar lowering. This
change still causes the regression test to fail without the fix.

Original change's description:
> [wasm-simd][x64][ia32] Do not overwrite input register
>
> We are ovewriting input register (contains the shift) when we are
> masking it, instead, move to a temporary,then mask it.
>
> Bug: chromium:1065599
> Change-Id: Iab72b94581239447e444746681387350b576e24a
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2125941
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#66997}

Bug: chromium:1065599
Change-Id: I0dc78ddb013652ef88c07d065c3f6877937c5300
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2136220Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67026}
parent cff2617b
...@@ -501,10 +501,11 @@ class OutOfLineRecordWrite final : public OutOfLineCode { ...@@ -501,10 +501,11 @@ class OutOfLineRecordWrite final : public OutOfLineCode {
__ opcode(dst, dst, static_cast<byte>(i.InputInt##width(1))); \ __ opcode(dst, dst, static_cast<byte>(i.InputInt##width(1))); \
} else { \ } else { \
XMMRegister tmp = i.TempSimd128Register(0); \ XMMRegister tmp = i.TempSimd128Register(0); \
Register shift = i.InputRegister(1); \ Register tmp_shift = i.TempRegister(1); \
constexpr int mask = (1 << width) - 1; \ constexpr int mask = (1 << width) - 1; \
__ and_(shift, Immediate(mask)); \ __ mov(tmp_shift, i.InputRegister(1)); \
__ Movd(tmp, shift); \ __ and_(tmp_shift, Immediate(mask)); \
__ Movd(tmp, tmp_shift); \
__ opcode(dst, dst, tmp); \ __ opcode(dst, dst, tmp); \
} \ } \
} while (false) } while (false)
...@@ -3176,18 +3177,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3176,18 +3177,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pshufd(tmp_simd, tmp_simd, 0); __ Pshufd(tmp_simd, tmp_simd, 0);
__ Pand(dst, tmp_simd); __ Pand(dst, tmp_simd);
} else { } else {
Register shift = i.InputRegister(1);
// Take shift value modulo 8. // Take shift value modulo 8.
__ and_(shift, 7); __ mov(tmp, i.InputRegister(1));
__ and_(tmp, 7);
// Mask off the unwanted bits before word-shifting. // Mask off the unwanted bits before word-shifting.
__ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
__ mov(tmp, shift);
__ add(tmp, Immediate(8)); __ add(tmp, Immediate(8));
__ Movd(tmp_simd, tmp); __ Movd(tmp_simd, tmp);
__ Psrlw(kScratchDoubleReg, kScratchDoubleReg, tmp_simd); __ Psrlw(kScratchDoubleReg, kScratchDoubleReg, tmp_simd);
__ Packuswb(kScratchDoubleReg, kScratchDoubleReg); __ Packuswb(kScratchDoubleReg, kScratchDoubleReg);
__ Pand(dst, kScratchDoubleReg); __ Pand(dst, kScratchDoubleReg);
__ Movd(tmp_simd, shift); // TODO(zhin): sub here to avoid asking for another temporary register,
// examine codegen for other i8x16 shifts, they use less instructions.
__ sub(tmp, Immediate(8));
__ Movd(tmp_simd, tmp);
__ Psllw(dst, dst, tmp_simd); __ Psllw(dst, dst, tmp_simd);
} }
break; break;
......
...@@ -314,7 +314,7 @@ void VisitRROSimdShift(InstructionSelector* selector, Node* node, ...@@ -314,7 +314,7 @@ void VisitRROSimdShift(InstructionSelector* selector, Node* node,
} else { } else {
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0)); InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1)); InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
InstructionOperand temps[] = {g.TempSimd128Register()}; InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1, selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1,
arraysize(temps), temps); arraysize(temps), temps);
} }
......
...@@ -609,10 +609,11 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, ...@@ -609,10 +609,11 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
__ opcode(dst, static_cast<byte>(i.InputInt##width(1))); \ __ opcode(dst, static_cast<byte>(i.InputInt##width(1))); \
} else { \ } else { \
XMMRegister tmp = i.TempSimd128Register(0); \ XMMRegister tmp = i.TempSimd128Register(0); \
Register shift = i.InputRegister(1); \ Register tmp_shift = i.TempRegister(1); \
constexpr int mask = (1 << width) - 1; \ constexpr int mask = (1 << width) - 1; \
__ andq(shift, Immediate(mask)); \ __ movq(tmp_shift, i.InputRegister(1)); \
__ Movq(tmp, shift); \ __ andq(tmp_shift, Immediate(mask)); \
__ Movq(tmp, tmp_shift); \
__ opcode(dst, tmp); \ __ opcode(dst, tmp); \
} \ } \
} while (false) } while (false)
...@@ -3351,18 +3352,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3351,18 +3352,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pshufd(tmp_simd, tmp_simd, static_cast<uint8_t>(0)); __ Pshufd(tmp_simd, tmp_simd, static_cast<uint8_t>(0));
__ Pand(dst, tmp_simd); __ Pand(dst, tmp_simd);
} else { } else {
Register shift = i.InputRegister(1);
// Mask off the unwanted bits before word-shifting. // Mask off the unwanted bits before word-shifting.
__ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
// Take shift value modulo 8. // Take shift value modulo 8.
__ andq(shift, Immediate(7)); __ movq(tmp, i.InputRegister(1));
__ movq(tmp, shift); __ andq(tmp, Immediate(7));
__ addq(tmp, Immediate(8)); __ addq(tmp, Immediate(8));
__ Movq(tmp_simd, tmp); __ Movq(tmp_simd, tmp);
__ Psrlw(kScratchDoubleReg, tmp_simd); __ Psrlw(kScratchDoubleReg, tmp_simd);
__ Packuswb(kScratchDoubleReg, kScratchDoubleReg); __ Packuswb(kScratchDoubleReg, kScratchDoubleReg);
__ Pand(dst, kScratchDoubleReg); __ Pand(dst, kScratchDoubleReg);
__ Movq(tmp_simd, shift); // TODO(zhin): subq here to avoid asking for another temporary register,
// examine codegen for other i8x16 shifts, they use less instructions.
__ subq(tmp, Immediate(8));
__ Movq(tmp_simd, tmp);
__ Psllw(dst, tmp_simd); __ Psllw(dst, tmp_simd);
} }
break; break;
......
...@@ -2817,7 +2817,8 @@ SIMD_TYPES(VISIT_SIMD_REPLACE_LANE) ...@@ -2817,7 +2817,8 @@ SIMD_TYPES(VISIT_SIMD_REPLACE_LANE)
Emit(kX64##Opcode, g.DefineSameAsFirst(node), \ Emit(kX64##Opcode, g.DefineSameAsFirst(node), \
g.UseRegister(node->InputAt(0)), g.UseImmediate(node->InputAt(1))); \ g.UseRegister(node->InputAt(0)), g.UseImmediate(node->InputAt(1))); \
} else { \ } else { \
InstructionOperand temps[] = {g.TempSimd128Register()}; \ InstructionOperand temps[] = {g.TempSimd128Register(), \
g.TempRegister()}; \
Emit(kX64##Opcode, g.DefineSameAsFirst(node), \ Emit(kX64##Opcode, g.DefineSameAsFirst(node), \
g.UseUniqueRegister(node->InputAt(0)), \ g.UseUniqueRegister(node->InputAt(0)), \
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \ g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \
......
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --experimental-wasm-simd
load('test/mjsunit/wasm/wasm-module-builder.js');
const builder = new WasmModuleBuilder();
builder.addMemory(16, 32, false);
builder.addType(makeSig([kWasmI32, kWasmI32, kWasmI32], [kWasmI32]));
// Generate function 1 (out of 1).
builder.addFunction(undefined, 0 /* sig */).addBodyWithEnd([
// signature: i_iii
// body:
kExprI32Const, 0xba, 0x01, // i32.const
kSimdPrefix, kExprI16x8Splat, // i16x8.splat
kExprMemorySize, 0x00, // memory.size
kSimdPrefix, kExprI16x8ShrS, // i16x8.shr_s
kSimdPrefix, kExprS1x16AnyTrue, // s1x16.any_true
kExprMemorySize, 0x00, // memory.size
kExprI32RemS, // i32.rem_s
kExprEnd, // end @15
]);
builder.addExport('main', 0);
const instance = builder.instantiate();
instance.exports.main(1, 2, 3);
...@@ -469,10 +469,13 @@ let kExprI64AtomicCompareExchange32U = 0x4e; ...@@ -469,10 +469,13 @@ let kExprI64AtomicCompareExchange32U = 0x4e;
// Simd opcodes. // Simd opcodes.
let kExprS128LoadMem = 0x00; let kExprS128LoadMem = 0x00;
let kExprS128StoreMem = 0x01; let kExprS128StoreMem = 0x01;
let kExprI16x8Splat = 0x08;
let kExprI32x4Splat = 0x0c; let kExprI32x4Splat = 0x0c;
let kExprF32x4Splat = 0x12; let kExprF32x4Splat = 0x12;
let kExprI32x4Eq = 0x2c; let kExprI32x4Eq = 0x2c;
let kExprS1x16AnyTrue = 0x52;
let kExprS1x8AnyTrue = 0x63; let kExprS1x8AnyTrue = 0x63;
let kExprI16x8ShrS = 0x66;
let kExprS1x4AllTrue = 0x75; let kExprS1x4AllTrue = 0x75;
let kExprI32x4Add = 0x79; let kExprI32x4Add = 0x79;
let kExprF32x4Min = 0x9e; let kExprF32x4Min = 0x9e;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment