Commit ff503fd4 authored by Bill Budge's avatar Bill Budge Committed by Commit Bot

Revert "[wasm-simd][arm] Use vmov to move all ones to register"

This reverts commit 57242a05.

Reason for revert: regression tests fails:
https://ci.chromium.org/p/v8/builders/ci/V8%20Linux%20-%20debug/31477

Original change's description:
> [wasm-simd][arm] Use vmov to move all ones to register
> 
> vceq(dst, dst, dst) does not seem to always set the register to all
> ones. The right way should be be to use vmov (immediate) anyway. This
> was not supported in the assembler yet, so we need changes to the
> assembler, diassembler, and simulator.
> 
> There is an unfortunate fork in logic in the simulator, due to the way
> the switches are set up, vmov (imm) logic is duplicated across two
> different cases, because the switch looks at the top bit of the
> immediate. Refactoring this will be a bigger change that is irrelevant
> for this bug, so I'm putting that off for now. Instead we extract the
> core of vmov (imm) into helpers and call it in the two cases.
> 
> Bug: chromium:1112124
> Change-Id: I283dbcd86cb0572e5ee720835f897b51fae96701
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2337503
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org>
> Reviewed-by: Bill Budge <bbudge@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#69315}

TBR=bbudge@chromium.org,jkummerow@chromium.org,v8-arm-ports@googlegroups.com,zhin@chromium.org

Change-Id: I5d9d1dcb81771f71001d959ec5a03a43a11c4233
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: chromium:1112124
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2347211Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Bill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69316}
parent 57242a05
...@@ -2621,28 +2621,16 @@ static void DoubleAsTwoUInt32(Double d, uint32_t* lo, uint32_t* hi) { ...@@ -2621,28 +2621,16 @@ static void DoubleAsTwoUInt32(Double d, uint32_t* lo, uint32_t* hi) {
*hi = i >> 32; *hi = i >> 32;
} }
static void WriteVmovIntImmEncoding(uint8_t imm, uint32_t* encoding) {
// Integer promotion from uint8_t to int makes these all okay.
*encoding = ((imm & 0x80) << (24 - 7)); // a
*encoding |= ((imm & 0x70) << (16 - 4)); // bcd
*encoding |= (imm & 0x0f); // efgh
}
// This checks if imm can be encoded into an immediate for vmov. // This checks if imm can be encoded into an immediate for vmov.
// See Table A7-15 in ARM DDI 0406C.d. // See Table A7-15 in ARM DDI 0406C.d.
// Currently only supports the first row and op=0 && cmode=1110. // Currently only supports the first row of the table.
static bool FitsVmovIntImm(uint64_t imm, uint32_t* encoding, uint8_t* cmode) { static bool FitsVmovImm64(uint64_t imm, uint32_t* encoding) {
uint32_t lo = imm & 0xFFFFFFFF; uint32_t lo = imm & 0xFFFFFFFF;
uint32_t hi = imm >> 32; uint32_t hi = imm >> 32;
if ((lo == hi && ((lo & 0xffffff00) == 0))) { if (lo == hi && ((lo & 0xffffff00) == 0)) {
WriteVmovIntImmEncoding(imm & 0xff, encoding); *encoding = ((lo & 0x80) << (24 - 7)); // a
*cmode = 0; *encoding |= ((lo & 0x70) << (16 - 4)); // bcd
return true; *encoding |= (lo & 0x0f); // efgh
} else if ((lo == hi) && ((lo & 0xffff) == (lo >> 16)) &&
((lo & 0xff) == (lo >> 24))) {
// Check that all bytes in imm are the same.
WriteVmovIntImmEncoding(imm & 0xff, encoding);
*cmode = 0xe;
return true; return true;
} }
...@@ -2651,17 +2639,15 @@ static bool FitsVmovIntImm(uint64_t imm, uint32_t* encoding, uint8_t* cmode) { ...@@ -2651,17 +2639,15 @@ static bool FitsVmovIntImm(uint64_t imm, uint32_t* encoding, uint8_t* cmode) {
void Assembler::vmov(const QwNeonRegister dst, uint64_t imm) { void Assembler::vmov(const QwNeonRegister dst, uint64_t imm) {
uint32_t enc; uint32_t enc;
uint8_t cmode; if (CpuFeatures::IsSupported(VFPv3) && FitsVmovImm64(imm, &enc)) {
uint8_t op = 0;
if (CpuFeatures::IsSupported(VFPv3) && FitsVmovIntImm(imm, &enc, &cmode)) {
CpuFeatureScope scope(this, VFPv3); CpuFeatureScope scope(this, VFPv3);
// Instruction details available in ARM DDI 0406C.b, A8-937. // Instruction details available in ARM DDI 0406C.b, A8-937.
// 001i1(27-23) | D(22) | 000(21-19) | imm3(18-16) | Vd(15-12) | cmode(11-8) // 001i1(27-23) | D(22) | 000(21-19) | imm3(18-16) | Vd(15-12) | cmode(11-8)
// | 0(7) | Q(6) | op(5) | 4(1) | imm4(3-0) // | 0(7) | Q(6) | op(5) | 4(1) | imm4(3-0)
int vd, d; int vd, d;
dst.split_code(&vd, &d); dst.split_code(&vd, &d);
emit(kSpecialCondition | 0x05 * B23 | d * B22 | vd * B12 | cmode * B8 | emit(kSpecialCondition | 0x05 * B23 | d * B22 | vd * B12 | 0x1 * B6 |
0x1 * B6 | op * B5 | 0x1 * B4 | enc); 0x1 * B4 | enc);
} else { } else {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
......
...@@ -2833,7 +2833,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2833,7 +2833,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kArmS128AllOnes: { case kArmS128AllOnes: {
__ vmov(i.OutputSimd128Register(), uint64_t{0xffff'ffff'ffff'ffff}); __ vceq(i.OutputSimd128Register(), i.OutputSimd128Register(),
i.OutputSimd128Register());
break; break;
} }
case kArmS128Dup: { case kArmS128Dup: {
......
...@@ -115,7 +115,6 @@ class Decoder { ...@@ -115,7 +115,6 @@ class Decoder {
void DecodeVCMP(Instruction* instr); void DecodeVCMP(Instruction* instr);
void DecodeVCVTBetweenDoubleAndSingle(Instruction* instr); void DecodeVCVTBetweenDoubleAndSingle(Instruction* instr);
void DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr); void DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr);
void DecodeVmovImmediate(Instruction* instr);
const disasm::NameConverter& converter_; const disasm::NameConverter& converter_;
Vector<char> out_buffer_; Vector<char> out_buffer_;
...@@ -1736,30 +1735,6 @@ void Decoder::DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr) { ...@@ -1736,30 +1735,6 @@ void Decoder::DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr) {
} }
} }
void Decoder::DecodeVmovImmediate(Instruction* instr) {
byte cmode = instr->Bits(11, 8);
int vd = instr->VFPDRegValue(kSimd128Precision);
int a = instr->Bit(24);
int bcd = instr->Bits(18, 16);
int efgh = instr->Bits(3, 0);
uint8_t imm = a << 7 | bcd << 4 | efgh;
switch (cmode) {
case 0: {
uint32_t imm32 = imm;
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmov.i32 q%d, %d", vd, imm32);
break;
}
case 0xe: {
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vmov.i8 q%d, %d", vd, imm);
break;
}
default:
Unknown(instr);
}
}
// Decode Type 6 coprocessor instructions. // Decode Type 6 coprocessor instructions.
// Dm = vmov(Rt, Rt2) // Dm = vmov(Rt, Rt2)
// <Rt, Rt2> = vmov(Dm) // <Rt, Rt2> = vmov(Dm)
...@@ -2035,7 +2010,21 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { ...@@ -2035,7 +2010,21 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
instr->Bit(7) == 0 && instr->Bit(4) == 1) { instr->Bit(7) == 0 && instr->Bit(4) == 1) {
// One register and a modified immediate value, see ARM DDI 0406C.d // One register and a modified immediate value, see ARM DDI 0406C.d
// A7.4.6. // A7.4.6.
DecodeVmovImmediate(instr); byte cmode = instr->Bits(11, 8);
switch (cmode) {
case 0: {
int vd = instr->VFPDRegValue(kSimd128Precision);
int a = instr->Bit(24);
int bcd = instr->Bits(18, 16);
int efgh = instr->Bits(3, 0);
int imm64 = a << 7 | bcd << 4 | efgh;
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmov.i32 q%d, %d", vd, imm64);
break;
}
default:
Unknown(instr);
}
} else if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && } else if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) { (instr->Bit(4) == 1)) {
// vmovl signed // vmovl signed
...@@ -2482,11 +2471,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { ...@@ -2482,11 +2471,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
out_buffer_pos_ += out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vmull.u%d q%d, d%d, d%d", SNPrintF(out_buffer_ + out_buffer_pos_, "vmull.u%d q%d, d%d, d%d",
size, Vd, Vn, Vm); size, Vd, Vn, Vm);
} else if (instr->Bits(21, 19) == 0 && instr->Bit(7) == 0 &&
instr->Bit(4) == 1) {
// One register and a modified immediate value, see ARM DDI 0406C.d
// A7.4.6.
DecodeVmovImmediate(instr);
} else { } else {
Unknown(instr); Unknown(instr);
} }
......
...@@ -4043,32 +4043,6 @@ uint16_t Multiply(uint16_t a, uint16_t b) { ...@@ -4043,32 +4043,6 @@ uint16_t Multiply(uint16_t a, uint16_t b) {
uint32_t result = static_cast<uint32_t>(a) * static_cast<uint32_t>(b); uint32_t result = static_cast<uint32_t>(a) * static_cast<uint32_t>(b);
return static_cast<uint16_t>(result); return static_cast<uint16_t>(result);
} }
void VmovImmediate(Simulator* simulator, Instruction* instr) {
byte cmode = instr->Bits(11, 8);
int vd = instr->VFPDRegValue(kSimd128Precision);
uint8_t imm = instr->Bit(24) << 7; // i
imm |= instr->Bits(18, 16) << 4; // imm3
imm |= instr->Bits(3, 0); // imm4
switch (cmode) {
case 0: {
// Set the LSB of each 64-bit halves.
uint64_t imm64 = imm;
simulator->set_neon_register(vd, {imm64, imm64});
break;
}
case 0xe: {
uint8_t imms[kSimd128Size];
// Set all bytes of register.
std::fill_n(imms, kSimd128Size, imm);
simulator->set_neon_register(vd, imms);
break;
}
default: {
UNIMPLEMENTED();
}
}
}
} // namespace } // namespace
template <typename T, int SIZE> template <typename T, int SIZE>
...@@ -4632,7 +4606,21 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { ...@@ -4632,7 +4606,21 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
// One register and a modified immediate value, see ARM DDI 0406C.d // One register and a modified immediate value, see ARM DDI 0406C.d
// A7.4.6. Handles vmov, vorr, vmvn, vbic. // A7.4.6. Handles vmov, vorr, vmvn, vbic.
// Only handle vmov.i32 for now. // Only handle vmov.i32 for now.
VmovImmediate(this, instr); byte cmode = instr->Bits(11, 8);
switch (cmode) {
case 0: {
// vmov.i32 Qd, #<imm>
int vd = instr->VFPDRegValue(kSimd128Precision);
uint64_t imm = instr->Bit(24) << 7; // i
imm |= instr->Bits(18, 16) << 4; // imm3
imm |= instr->Bits(3, 0); // imm4
imm |= imm << 32;
set_neon_register(vd, {imm, imm});
break;
}
default:
UNIMPLEMENTED();
}
} else if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && } else if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) { (instr->Bit(4) == 1)) {
// vmovl signed // vmovl signed
...@@ -5639,12 +5627,6 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { ...@@ -5639,12 +5627,6 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
default: default:
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
} else if (instr->Bits(21, 19) == 0 && instr->Bit(7) == 0 &&
instr->Bit(4) == 1) {
// vmov (immediate), see ARM DDI 0487F.b F6.1.134, decoding A4.
// Similar to vmov (immediate above), but when high bit of immediate is
// set.
VmovImmediate(this, instr);
} else { } else {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
......
...@@ -1053,8 +1053,6 @@ TEST(Neon) { ...@@ -1053,8 +1053,6 @@ TEST(Neon) {
"f2802050 vmov.i32 q1, 0"); "f2802050 vmov.i32 q1, 0");
COMPARE(vmov(q1, 0x0000001200000012), COMPARE(vmov(q1, 0x0000001200000012),
"f2812052 vmov.i32 q1, 18"); "f2812052 vmov.i32 q1, 18");
COMPARE(vmov(q0, 0xffffffffffffffff),
"f3870e5f vmov.i8 q0, 255");
COMPARE(vmvn(q0, q15), COMPARE(vmvn(q0, q15),
"f3b005ee vmvn q0, q15"); "f3b005ee vmvn q0, q15");
COMPARE(vmvn(q8, q9), COMPARE(vmvn(q8, q9),
......
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --experimental-wasm-simd
load('test/mjsunit/wasm/wasm-module-builder.js');
const builder = new WasmModuleBuilder();
builder.addType(makeSig([kWasmI32, kWasmI32, kWasmI32], [kWasmI32]));
// Generate function 1 (out of 1).
builder.addFunction(undefined, 0 /* sig */)
.addBodyWithEnd([
// signature: i_iii
// body:
kSimdPrefix, kExprS128Const, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // s128.const
kSimdPrefix, kExprI32x4ExtractLane, 0x00, // i32x4.extract_lane
kExprEnd, // end @22
]);
builder.addExport('main', 0);
const instance = builder.instantiate();
assertEquals(-1, instance.exports.main(1, 2, 3));
// The bug happens on the second call to the same exported function, it returns 0.
assertEquals(-1, instance.exports.main(1, 2, 3));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment