Commit ab23ff3c authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[ia32][wasm-simd] Fix aligned moves in codegen

For SIMD instructions that use aligned moves (like movaps or movapd), we
don't have correct memory alignment for SIMD moves yet. Switch to to
movupd.

Bug: v8:9198
Bug: v8:10831
Change-Id: Ic60fba5d08dda9676f6091ce505ac7be54957d00
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2380240
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69613}
parent c44efad0
......@@ -963,6 +963,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void movapd(XMMRegister dst, Operand src) {
sse2_instr(dst, src, 0x66, 0x0F, 0x28);
}
void movupd(XMMRegister dst, Operand src) {
sse2_instr(dst, src, 0x66, 0x0F, 0x10);
}
void movmskpd(Register dst, XMMRegister src);
void movmskps(Register dst, XMMRegister src);
......@@ -1338,6 +1341,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vmovapd(XMMRegister dst, Operand src) { vpd(0x28, dst, xmm0, src); }
void vmovups(XMMRegister dst, XMMRegister src) { vmovups(dst, Operand(src)); }
void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); }
void vmovupd(XMMRegister dst, Operand src) { vpd(0x10, dst, xmm0, src); }
void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
vshufps(dst, src1, Operand(src2), imm8);
}
......
......@@ -294,6 +294,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP2_WITH_TYPE(Movaps, movaps, XMMRegister, XMMRegister)
AVX_OP2_WITH_TYPE(Movapd, movapd, XMMRegister, XMMRegister)
AVX_OP2_WITH_TYPE(Movapd, movapd, XMMRegister, const Operand&)
AVX_OP2_WITH_TYPE(Movupd, movupd, XMMRegister, const Operand&)
AVX_OP2_WITH_TYPE(Pmovmskb, pmovmskb, Register, XMMRegister)
AVX_OP2_WITH_TYPE(Movmskps, movmskps, Register, XMMRegister)
......
......@@ -1968,7 +1968,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
tmp = i.TempSimd128Register(0);
// The minpd instruction doesn't propagate NaNs and +0's in its first
// operand. Perform minpd in both orders, merge the resuls, and adjust.
__ Movapd(tmp, src1);
__ Movupd(tmp, src1);
__ Minpd(tmp, tmp, src);
__ Minpd(dst, src, src1);
// propagate -0's and NaNs, which may be non-canonical.
......@@ -1987,7 +1987,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
tmp = i.TempSimd128Register(0);
// The maxpd instruction doesn't propagate NaNs and +0's in its first
// operand. Perform maxpd in both orders, merge the resuls, and adjust.
__ Movapd(tmp, src1);
__ Movupd(tmp, src1);
__ Maxpd(tmp, tmp, src);
__ Maxpd(dst, src, src1);
// Find discrepancies.
......@@ -2383,7 +2383,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register();
Operand src1 = i.InputOperand(1);
// See comment above for correction of maxps.
__ movaps(kScratchDoubleReg, src1);
__ vmovups(kScratchDoubleReg, src1);
__ vmaxps(kScratchDoubleReg, kScratchDoubleReg, dst);
__ vmaxps(dst, dst, src1);
__ vxorps(dst, dst, kScratchDoubleReg);
......
......@@ -1173,6 +1173,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0x10:
AppendToBuffer("vmovupd %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x28:
AppendToBuffer("vmovapd %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
......@@ -2108,7 +2112,13 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
data += 2;
} else if (*data == 0x0F) {
data++;
if (*data == 0x28) {
if (*data == 0x10) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("movupd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (*data == 0x28) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
......
......@@ -473,6 +473,7 @@ TEST(DisasmIa320) {
__ movapd(xmm0, xmm1);
__ movapd(xmm0, Operand(edx, 4));
__ movupd(xmm0, Operand(edx, 4));
__ movd(xmm0, edi);
__ movd(xmm0, Operand(ebx, ecx, times_4, 10000));
......@@ -689,6 +690,7 @@ TEST(DisasmIa320) {
__ vmovaps(xmm0, xmm1);
__ vmovapd(xmm0, xmm1);
__ vmovapd(xmm0, Operand(ebx, ecx, times_4, 10000));
__ vmovupd(xmm0, Operand(ebx, ecx, times_4, 10000));
__ vshufps(xmm0, xmm1, xmm2, 3);
__ vshufps(xmm0, xmm1, Operand(edx, 4), 3);
__ vhaddps(xmm0, xmm1, xmm2);
......
......@@ -1443,4 +1443,10 @@
'compiler/serializer-transition-propagation': [SKIP],
}], # variant == nci or variant == nci_as_midtier
['nosse41', {
# Requires scalar lowering for 64x2 SIMD instructions, which are not
# implemented yet.
'regress/wasm/regress-10831': [SKIP],
}], # nosse3
]
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --experimental-wasm-simd
load('test/mjsunit/wasm/wasm-module-builder.js');
// This test is shrunk from a test case provided at https://crbug.com/v8/10831.
// This exercises a aligned-load bug in ia32. Some SIMD operations were using
// instructions that required aligned operands (like movaps and movapd), but we
// don't have the right memory alignment yet, see https://crbug.com/v8/9198,
// resulting in a SIGSEGV when running the generated code.
const builder = new WasmModuleBuilder();
builder.addType(makeSig([], [kWasmI32]));
// Generate function 1 (out of 1).
builder.addFunction(undefined, 0 /* sig */)
.addBodyWithEnd([
// signature: i_v
// body:
kExprI32Const, 0xfc, 0xb6, 0xed, 0x02, // i32.const
kSimdPrefix, kExprI8x16Splat, // i8x16.splat
kExprI32Const, 0xfc, 0x00, // i32.const
kSimdPrefix, kExprI8x16Splat, // i8x16.splat
kSimdPrefix, kExprI64x2Sub, 0x01, // i64x2.sub
kExprI32Const, 0x00, // i32.const
kSimdPrefix, kExprI8x16Splat, // i8x16.splat
kExprI32Const, 0x81, 0x96, 0xf0, 0xe3, 0x07, // i32.const
kSimdPrefix, kExprI8x16Splat, // i8x16.splat
kSimdPrefix, kExprF64x2Max, 0x01, // f64x2.max
kSimdPrefix, kExprI64x2Sub, 0x01, // i64x2.sub
kExprI32Const, 0x00, // i32.const
kSimdPrefix, kExprI8x16Splat, // i8x16.splat
kExprI32Const, 0x00, // i32.const
kExprI32Const, 0x0b, // i32.const
kExprI32LtU, // i32.lt_u
kSimdPrefix, kExprI8x16ReplaceLane, 0x00, // i8x16.replace_lane
kExprI32Const, 0xfc, 0xf8, 0x01, // i32.const
kSimdPrefix, kExprI8x16Splat, // i8x16.splat
kSimdPrefix, kExprF64x2Max, 0x01, // f64x2.max
kSimdPrefix, kExprI16x8MaxS, 0x01, // i16x8.max_s
kSimdPrefix, kExprV8x16AllTrue, // v8x16.all_true
kExprEnd, // end @70
]);
builder.addExport('main', 0);
const instance = builder.instantiate();
print(instance.exports.main());
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment