Commit f3cbcdb2 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Fix f64x2 replace lane

There are a couple of bugs here:

1. The immediate used for vinsertps is wrong when lane == 1, the first
two bits specify which element of the source is copied, and it should
always be 00, 01 to copy the first 2 lanes of source.
2. For both cases, the second insertps call should be using dst as the
src, since dst was already updated by the first insertps call, it was
incorrectly using the old value of src. This was probably working
correctly because in many cases dst and src happened to be the same
register.
3. rep cannot be same as dst, because dst is overwritten, and rep should
stay the same

I also modified the F64x2ReplaceLane to test separately for replacing
lane 0 and lane 1.

Fixed bug 3. for arm and arm64.

Bug: v8:9728
Change-Id: Iec6e48bcfbc7d27908dd86d5f113a8b5dedd499b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1877055Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64620}
parent 3c6ecf71
......@@ -133,7 +133,7 @@ void VisitRRIR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
int32_t imm = OpParameter<int32_t>(node->op());
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(imm),
g.UseRegister(node->InputAt(1)));
g.UseUniqueRegister(node->InputAt(1)));
}
template <IrOpcode::Value kOpcode, int kImmMin, int kImmMax,
......
......@@ -179,7 +179,7 @@ void VisitRRIR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
int32_t imm = OpParameter<int32_t>(node->op());
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(imm),
g.UseRegister(node->InputAt(1)));
g.UseUniqueRegister(node->InputAt(1)));
}
struct ExtendingLoadMatcher {
......
......@@ -1881,14 +1881,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister src = i.InputSimd128Register(0);
int8_t lane = i.InputInt8(1);
DoubleRegister rep = i.InputDoubleRegister(2);
DCHECK_NE(dst, rep);
DCHECK_LT(lane, 2);
if (lane == 0) {
__ vinsertps(dst, src, rep, 0b00000000);
__ vinsertps(dst, src, rep, 0b01010000);
__ vinsertps(dst, dst, rep, 0b01010000);
} else {
__ vinsertps(dst, src, rep, 0b10100000);
__ vinsertps(dst, src, rep, 0b11110000);
__ vinsertps(dst, src, rep, 0b00100000);
__ vinsertps(dst, dst, rep, 0b01110000);
}
break;
}
......
......@@ -2211,7 +2211,7 @@ VISIT_SIMD_REPLACE_LANE(F32x4)
InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); \
InstructionOperand operand1 = \
g.UseImmediate(OpParameter<int32_t>(node->op())); \
InstructionOperand operand2 = g.UseRegister(node->InputAt(1)); \
InstructionOperand operand2 = g.UseUniqueRegister(node->InputAt(1)); \
if (IsSupported(AVX)) { \
Emit(kAVX##Type##ReplaceLane, g.DefineAsRegister(node), operand0, \
operand1, operand2); \
......
......@@ -1136,21 +1136,25 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLane) {
WASM_SIMD_TEST_NO_LOWERING(F64x2ReplaceLane) {
WasmRunner<int32_t> r(execution_tier, lower_simd);
// Set up a global to hold input/output vector.
double* g = r.builder().AddGlobal<double>(kWasmS128);
// Set up globals to hold input/output vector.
double* g0 = r.builder().AddGlobal<double>(kWasmS128);
double* g1 = r.builder().AddGlobal<double>(kWasmS128);
// Build function to replace each lane with its (FP) index.
byte temp1 = r.AllocateLocal(kWasmS128);
BUILD(r, WASM_SET_LOCAL(temp1, WASM_SIMD_F64x2_SPLAT(WASM_F64(1e100))),
WASM_SET_LOCAL(temp1, WASM_SIMD_F64x2_REPLACE_LANE(
0, WASM_GET_LOCAL(temp1), WASM_F64(0.0f))),
// Replace lane 0.
WASM_SET_GLOBAL(0, WASM_SIMD_F64x2_REPLACE_LANE(
0, WASM_GET_LOCAL(temp1), WASM_F64(0.0f))),
// Replace lane 1.
WASM_SET_GLOBAL(1, WASM_SIMD_F64x2_REPLACE_LANE(
1, WASM_GET_LOCAL(temp1), WASM_F64(1.0f))),
WASM_ONE);
r.Call();
for (int i = 0; i < 2; i++) {
CHECK_EQ(static_cast<double>(i), ReadLittleEndianValue<double>(&g[i]));
}
CHECK_EQ(0., ReadLittleEndianValue<double>(&g0[0]));
CHECK_EQ(1e100, ReadLittleEndianValue<double>(&g0[1]));
CHECK_EQ(1e100, ReadLittleEndianValue<double>(&g1[0]));
CHECK_EQ(1., ReadLittleEndianValue<double>(&g1[1]));
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment