Commit ff503fd4 authored by Bill Budge's avatar Bill Budge Committed by Commit Bot

Revert "[wasm-simd][arm] Use vmov to move all ones to register"

This reverts commit 57242a05.

Reason for revert: regression tests fails:
https://ci.chromium.org/p/v8/builders/ci/V8%20Linux%20-%20debug/31477

Original change's description:
> [wasm-simd][arm] Use vmov to move all ones to register
> 
> vceq(dst, dst, dst) does not seem to always set the register to all
> ones. The right way should be be to use vmov (immediate) anyway. This
> was not supported in the assembler yet, so we need changes to the
> assembler, diassembler, and simulator.
> 
> There is an unfortunate fork in logic in the simulator, due to the way
> the switches are set up, vmov (imm) logic is duplicated across two
> different cases, because the switch looks at the top bit of the
> immediate. Refactoring this will be a bigger change that is irrelevant
> for this bug, so I'm putting that off for now. Instead we extract the
> core of vmov (imm) into helpers and call it in the two cases.
> 
> Bug: chromium:1112124
> Change-Id: I283dbcd86cb0572e5ee720835f897b51fae96701
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2337503
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org>
> Reviewed-by: Bill Budge <bbudge@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#69315}

TBR=bbudge@chromium.org,jkummerow@chromium.org,v8-arm-ports@googlegroups.com,zhin@chromium.org

Change-Id: I5d9d1dcb81771f71001d959ec5a03a43a11c4233
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: chromium:1112124
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2347211Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Bill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69316}
parent 57242a05
......@@ -2621,28 +2621,16 @@ static void DoubleAsTwoUInt32(Double d, uint32_t* lo, uint32_t* hi) {
*hi = i >> 32;
}
static void WriteVmovIntImmEncoding(uint8_t imm, uint32_t* encoding) {
// Integer promotion from uint8_t to int makes these all okay.
*encoding = ((imm & 0x80) << (24 - 7)); // a
*encoding |= ((imm & 0x70) << (16 - 4)); // bcd
*encoding |= (imm & 0x0f); // efgh
}
// This checks if imm can be encoded into an immediate for vmov.
// See Table A7-15 in ARM DDI 0406C.d.
// Currently only supports the first row and op=0 && cmode=1110.
static bool FitsVmovIntImm(uint64_t imm, uint32_t* encoding, uint8_t* cmode) {
// Currently only supports the first row of the table.
static bool FitsVmovImm64(uint64_t imm, uint32_t* encoding) {
uint32_t lo = imm & 0xFFFFFFFF;
uint32_t hi = imm >> 32;
if ((lo == hi && ((lo & 0xffffff00) == 0))) {
WriteVmovIntImmEncoding(imm & 0xff, encoding);
*cmode = 0;
return true;
} else if ((lo == hi) && ((lo & 0xffff) == (lo >> 16)) &&
((lo & 0xff) == (lo >> 24))) {
// Check that all bytes in imm are the same.
WriteVmovIntImmEncoding(imm & 0xff, encoding);
*cmode = 0xe;
if (lo == hi && ((lo & 0xffffff00) == 0)) {
*encoding = ((lo & 0x80) << (24 - 7)); // a
*encoding |= ((lo & 0x70) << (16 - 4)); // bcd
*encoding |= (lo & 0x0f); // efgh
return true;
}
......@@ -2651,17 +2639,15 @@ static bool FitsVmovIntImm(uint64_t imm, uint32_t* encoding, uint8_t* cmode) {
void Assembler::vmov(const QwNeonRegister dst, uint64_t imm) {
uint32_t enc;
uint8_t cmode;
uint8_t op = 0;
if (CpuFeatures::IsSupported(VFPv3) && FitsVmovIntImm(imm, &enc, &cmode)) {
if (CpuFeatures::IsSupported(VFPv3) && FitsVmovImm64(imm, &enc)) {
CpuFeatureScope scope(this, VFPv3);
// Instruction details available in ARM DDI 0406C.b, A8-937.
// 001i1(27-23) | D(22) | 000(21-19) | imm3(18-16) | Vd(15-12) | cmode(11-8)
// | 0(7) | Q(6) | op(5) | 4(1) | imm4(3-0)
int vd, d;
dst.split_code(&vd, &d);
emit(kSpecialCondition | 0x05 * B23 | d * B22 | vd * B12 | cmode * B8 |
0x1 * B6 | op * B5 | 0x1 * B4 | enc);
emit(kSpecialCondition | 0x05 * B23 | d * B22 | vd * B12 | 0x1 * B6 |
0x1 * B4 | enc);
} else {
UNIMPLEMENTED();
}
......
......@@ -2833,7 +2833,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmS128AllOnes: {
__ vmov(i.OutputSimd128Register(), uint64_t{0xffff'ffff'ffff'ffff});
__ vceq(i.OutputSimd128Register(), i.OutputSimd128Register(),
i.OutputSimd128Register());
break;
}
case kArmS128Dup: {
......
......@@ -115,7 +115,6 @@ class Decoder {
void DecodeVCMP(Instruction* instr);
void DecodeVCVTBetweenDoubleAndSingle(Instruction* instr);
void DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr);
void DecodeVmovImmediate(Instruction* instr);
const disasm::NameConverter& converter_;
Vector<char> out_buffer_;
......@@ -1736,30 +1735,6 @@ void Decoder::DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr) {
}
}
void Decoder::DecodeVmovImmediate(Instruction* instr) {
byte cmode = instr->Bits(11, 8);
int vd = instr->VFPDRegValue(kSimd128Precision);
int a = instr->Bit(24);
int bcd = instr->Bits(18, 16);
int efgh = instr->Bits(3, 0);
uint8_t imm = a << 7 | bcd << 4 | efgh;
switch (cmode) {
case 0: {
uint32_t imm32 = imm;
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmov.i32 q%d, %d", vd, imm32);
break;
}
case 0xe: {
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vmov.i8 q%d, %d", vd, imm);
break;
}
default:
Unknown(instr);
}
}
// Decode Type 6 coprocessor instructions.
// Dm = vmov(Rt, Rt2)
// <Rt, Rt2> = vmov(Dm)
......@@ -2035,7 +2010,21 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
instr->Bit(7) == 0 && instr->Bit(4) == 1) {
// One register and a modified immediate value, see ARM DDI 0406C.d
// A7.4.6.
DecodeVmovImmediate(instr);
byte cmode = instr->Bits(11, 8);
switch (cmode) {
case 0: {
int vd = instr->VFPDRegValue(kSimd128Precision);
int a = instr->Bit(24);
int bcd = instr->Bits(18, 16);
int efgh = instr->Bits(3, 0);
int imm64 = a << 7 | bcd << 4 | efgh;
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmov.i32 q%d, %d", vd, imm64);
break;
}
default:
Unknown(instr);
}
} else if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
// vmovl signed
......@@ -2482,11 +2471,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vmull.u%d q%d, d%d, d%d",
size, Vd, Vn, Vm);
} else if (instr->Bits(21, 19) == 0 && instr->Bit(7) == 0 &&
instr->Bit(4) == 1) {
// One register and a modified immediate value, see ARM DDI 0406C.d
// A7.4.6.
DecodeVmovImmediate(instr);
} else {
Unknown(instr);
}
......
......@@ -4043,32 +4043,6 @@ uint16_t Multiply(uint16_t a, uint16_t b) {
uint32_t result = static_cast<uint32_t>(a) * static_cast<uint32_t>(b);
return static_cast<uint16_t>(result);
}
void VmovImmediate(Simulator* simulator, Instruction* instr) {
byte cmode = instr->Bits(11, 8);
int vd = instr->VFPDRegValue(kSimd128Precision);
uint8_t imm = instr->Bit(24) << 7; // i
imm |= instr->Bits(18, 16) << 4; // imm3
imm |= instr->Bits(3, 0); // imm4
switch (cmode) {
case 0: {
// Set the LSB of each 64-bit halves.
uint64_t imm64 = imm;
simulator->set_neon_register(vd, {imm64, imm64});
break;
}
case 0xe: {
uint8_t imms[kSimd128Size];
// Set all bytes of register.
std::fill_n(imms, kSimd128Size, imm);
simulator->set_neon_register(vd, imms);
break;
}
default: {
UNIMPLEMENTED();
}
}
}
} // namespace
template <typename T, int SIZE>
......@@ -4632,7 +4606,21 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
// One register and a modified immediate value, see ARM DDI 0406C.d
// A7.4.6. Handles vmov, vorr, vmvn, vbic.
// Only handle vmov.i32 for now.
VmovImmediate(this, instr);
byte cmode = instr->Bits(11, 8);
switch (cmode) {
case 0: {
// vmov.i32 Qd, #<imm>
int vd = instr->VFPDRegValue(kSimd128Precision);
uint64_t imm = instr->Bit(24) << 7; // i
imm |= instr->Bits(18, 16) << 4; // imm3
imm |= instr->Bits(3, 0); // imm4
imm |= imm << 32;
set_neon_register(vd, {imm, imm});
break;
}
default:
UNIMPLEMENTED();
}
} else if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
// vmovl signed
......@@ -5639,12 +5627,6 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
default:
UNIMPLEMENTED();
}
} else if (instr->Bits(21, 19) == 0 && instr->Bit(7) == 0 &&
instr->Bit(4) == 1) {
// vmov (immediate), see ARM DDI 0487F.b F6.1.134, decoding A4.
// Similar to vmov (immediate above), but when high bit of immediate is
// set.
VmovImmediate(this, instr);
} else {
UNIMPLEMENTED();
}
......
......@@ -1053,8 +1053,6 @@ TEST(Neon) {
"f2802050 vmov.i32 q1, 0");
COMPARE(vmov(q1, 0x0000001200000012),
"f2812052 vmov.i32 q1, 18");
COMPARE(vmov(q0, 0xffffffffffffffff),
"f3870e5f vmov.i8 q0, 255");
COMPARE(vmvn(q0, q15),
"f3b005ee vmvn q0, q15");
COMPARE(vmvn(q8, q9),
......
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --experimental-wasm-simd
load('test/mjsunit/wasm/wasm-module-builder.js');
const builder = new WasmModuleBuilder();
builder.addType(makeSig([kWasmI32, kWasmI32, kWasmI32], [kWasmI32]));
// Generate function 1 (out of 1).
builder.addFunction(undefined, 0 /* sig */)
.addBodyWithEnd([
// signature: i_iii
// body:
kSimdPrefix, kExprS128Const, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // s128.const
kSimdPrefix, kExprI32x4ExtractLane, 0x00, // i32x4.extract_lane
kExprEnd, // end @22
]);
builder.addExport('main', 0);
const instance = builder.instantiate();
assertEquals(-1, instance.exports.main(1, 2, 3));
// The bug happens on the second call to the same exported function, it returns 0.
assertEquals(-1, instance.exports.main(1, 2, 3));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment