Commit 167ca2e2 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Fix shifts value to be modulo lane width

Drive by fix of type of expected value in a test

Bug: v8:9626
Change-Id: I1bb44082b873383ea75e7089828bc68c9d4e0df0
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1757503Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63727}
parent c255e5fb
......@@ -1919,14 +1919,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI32x4Shl: {
QwNeonRegister tmp = i.TempSimd128Register(0);
__ vdup(Neon32, tmp, i.InputRegister(1));
Register shift = i.TempRegister(1);
// Take shift value modulo 32.
__ and_(shift, i.InputRegister(1), Operand(31));
__ vdup(Neon32, tmp, shift);
__ vshl(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
break;
}
case kArmI32x4ShrS: {
QwNeonRegister tmp = i.TempSimd128Register(0);
__ vdup(Neon32, tmp, i.InputRegister(1));
Register shift = i.TempRegister(1);
// Take shift value modulo 32.
__ and_(shift, i.InputRegister(1), Operand(31));
__ vdup(Neon32, tmp, shift);
__ vneg(Neon32, tmp, tmp);
__ vshl(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
......@@ -1998,7 +2004,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI32x4ShrU: {
QwNeonRegister tmp = i.TempSimd128Register(0);
__ vdup(Neon32, tmp, i.InputRegister(1));
Register shift = i.TempRegister(1);
// Take shift value modulo 32.
__ and_(shift, i.InputRegister(1), Operand(31));
__ vdup(Neon32, tmp, shift);
__ vneg(Neon32, tmp, tmp);
__ vshl(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
......@@ -2054,14 +2063,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI16x8Shl: {
QwNeonRegister tmp = i.TempSimd128Register(0);
__ vdup(Neon16, tmp, i.InputRegister(1));
Register shift = i.TempRegister(1);
// Take shift value modulo 16.
__ and_(shift, i.InputRegister(1), Operand(15));
__ vdup(Neon16, tmp, shift);
__ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
break;
}
case kArmI16x8ShrS: {
QwNeonRegister tmp = i.TempSimd128Register(0);
__ vdup(Neon16, tmp, i.InputRegister(1));
Register shift = i.TempRegister(1);
// Take shift value modulo 16.
__ and_(shift, i.InputRegister(1), Operand(15));
__ vdup(Neon16, tmp, shift);
__ vneg(Neon16, tmp, tmp);
__ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
......@@ -2142,7 +2157,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI16x8ShrU: {
QwNeonRegister tmp = i.TempSimd128Register(0);
__ vdup(Neon16, tmp, i.InputRegister(1));
Register shift = i.TempRegister(1);
// Take shift value modulo 16.
__ and_(shift, i.InputRegister(1), Operand(15));
__ vdup(Neon16, tmp, shift);
__ vneg(Neon16, tmp, tmp);
__ vshl(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
......@@ -2201,6 +2219,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI8x16Shl: {
QwNeonRegister tmp = i.TempSimd128Register(0);
Register shift = i.TempRegister(1);
// Take shift value modulo 8.
__ and_(shift, i.InputRegister(1), Operand(7));
__ vdup(Neon8, tmp, i.InputRegister(1));
__ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
......@@ -2208,7 +2229,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI8x16ShrS: {
QwNeonRegister tmp = i.TempSimd128Register(0);
__ vdup(Neon8, tmp, i.InputRegister(1));
Register shift = i.TempRegister(1);
// Take shift value modulo 8.
__ and_(shift, i.InputRegister(1), Operand(7));
__ vdup(Neon8, tmp, shift);
__ vneg(Neon8, tmp, tmp);
__ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
......@@ -2275,7 +2299,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI8x16ShrU: {
QwNeonRegister tmp = i.TempSimd128Register(0);
__ vdup(Neon8, tmp, i.InputRegister(1));
Register shift = i.TempRegister(1);
// Take shift value modulo 8.
__ and_(shift, i.InputRegister(1), Operand(7));
__ vdup(Neon8, tmp, shift);
__ vneg(Neon8, tmp, tmp);
__ vshl(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
tmp);
......
......@@ -94,7 +94,7 @@ void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode,
Node* node) {
ArmOperandGenerator g(selector);
InstructionOperand temps[] = {g.TempSimd128Register()};
InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)), arraysize(temps), temps);
......
......@@ -1888,14 +1888,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_UNOP_CASE(kArm64I64x2Neg, Neg, 2D);
case kArm64I64x2Shl: {
VRegister tmp = i.TempSimd128Register(0);
__ Dup(tmp.V2D(), i.InputRegister64(1));
Register shift = i.TempRegister(1);
// Take shift value modulo 64.
__ And(shift, i.InputRegister64(1), 63);
__ Dup(tmp.V2D(), shift);
__ Sshl(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).V2D(),
tmp.V2D());
break;
}
case kArm64I64x2ShrS: {
VRegister tmp = i.TempSimd128Register(0);
__ Dup(tmp.V2D(), i.InputRegister64(1));
Register shift = i.TempRegister(1);
// Take shift value modulo 64.
__ And(shift, i.InputRegister64(1), 63);
__ Dup(tmp.V2D(), shift);
__ Neg(tmp.V2D(), tmp.V2D());
__ Sshl(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).V2D(),
tmp.V2D());
......@@ -1974,7 +1980,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_BINOP_CASE(kArm64I64x2GeS, Cmge, 2D);
case kArm64I64x2ShrU: {
VRegister tmp = i.TempSimd128Register(0);
__ Dup(tmp.V2D(), i.InputRegister64(1));
Register shift = i.TempRegister(1);
// Take shift value modulo 64.
__ And(shift, i.InputRegister64(1), 63);
__ Dup(tmp.V2D(), shift);
__ Neg(tmp.V2D(), tmp.V2D());
__ Ushl(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).V2D(),
tmp.V2D());
......@@ -2006,14 +2015,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_UNOP_CASE(kArm64I32x4Neg, Neg, 4S);
case kArm64I32x4Shl: {
VRegister tmp = i.TempSimd128Register(0);
__ Dup(tmp.V4S(), i.InputRegister32(1));
Register shift = i.TempRegister32(1);
// Take shift value modulo 32.
__ And(shift, i.InputRegister32(1), 31);
__ Dup(tmp.V4S(), shift);
__ Sshl(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).V4S(),
tmp.V4S());
break;
}
case kArm64I32x4ShrS: {
VRegister tmp = i.TempSimd128Register(0);
__ Dup(tmp.V4S(), i.InputRegister32(1));
Register shift = i.TempRegister32(1);
// Take shift value modulo 32.
__ And(shift, i.InputRegister32(1), 31);
__ Dup(tmp.V4S(), shift);
__ Neg(tmp.V4S(), tmp.V4S());
__ Sshl(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).V4S(),
tmp.V4S());
......@@ -2040,7 +2055,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_WIDENING_UNOP_CASE(kArm64I32x4UConvertI16x8High, Uxtl2, 4S, 8H);
case kArm64I32x4ShrU: {
VRegister tmp = i.TempSimd128Register(0);
__ Dup(tmp.V4S(), i.InputRegister32(1));
Register shift = i.TempRegister32(1);
// Take shift value modulo 32.
__ And(shift, i.InputRegister32(1), 31);
__ Dup(tmp.V4S(), shift);
__ Neg(tmp.V4S(), tmp.V4S());
__ Ushl(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).V4S(),
tmp.V4S());
......@@ -2073,14 +2091,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_UNOP_CASE(kArm64I16x8Neg, Neg, 8H);
case kArm64I16x8Shl: {
VRegister tmp = i.TempSimd128Register(0);
__ Dup(tmp.V8H(), i.InputRegister32(1));
Register shift = i.TempRegister32(1);
// Take shift value modulo 16.
__ And(shift, i.InputRegister32(1), 15);
__ Dup(tmp.V8H(), shift);
__ Sshl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8H(),
tmp.V8H());
break;
}
case kArm64I16x8ShrS: {
VRegister tmp = i.TempSimd128Register(0);
__ Dup(tmp.V8H(), i.InputRegister32(1));
Register shift = i.TempRegister32(1);
// Take shift value modulo 16.
__ And(shift, i.InputRegister32(1), 15);
__ Dup(tmp.V8H(), shift);
__ Neg(tmp.V8H(), tmp.V8H());
__ Sshl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8H(),
tmp.V8H());
......@@ -2129,7 +2153,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArm64I16x8ShrU: {
VRegister tmp = i.TempSimd128Register(0);
__ Dup(tmp.V8H(), i.InputRegister32(1));
Register shift = i.TempRegister32(1);
// Take shift value modulo 16.
__ And(shift, i.InputRegister32(1), 15);
__ Dup(tmp.V8H(), shift);
__ Neg(tmp.V8H(), tmp.V8H());
__ Ushl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8H(),
tmp.V8H());
......@@ -2176,14 +2203,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_UNOP_CASE(kArm64I8x16Neg, Neg, 16B);
case kArm64I8x16Shl: {
VRegister tmp = i.TempSimd128Register(0);
__ Dup(tmp.V16B(), i.InputRegister32(1));
Register shift = i.TempRegister32(1);
// Take shift value modulo 8.
__ And(shift, i.InputRegister32(1), 7);
__ Dup(tmp.V16B(), shift);
__ Sshl(i.OutputSimd128Register().V16B(),
i.InputSimd128Register(0).V16B(), tmp.V16B());
break;
}
case kArm64I8x16ShrS: {
VRegister tmp = i.TempSimd128Register(0);
__ Dup(tmp.V16B(), i.InputRegister32(1));
Register shift = i.TempRegister32(1);
// Take shift value modulo 8.
__ And(shift, i.InputRegister32(1), 7);
__ Dup(tmp.V16B(), shift);
__ Neg(tmp.V16B(), tmp.V16B());
__ Sshl(i.OutputSimd128Register().V16B(),
i.InputSimd128Register(0).V16B(), tmp.V16B());
......@@ -2222,7 +2255,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_BINOP_CASE(kArm64I8x16GeS, Cmge, 16B);
case kArm64I8x16ShrU: {
VRegister tmp = i.TempSimd128Register(0);
__ Dup(tmp.V16B(), i.InputRegister32(1));
Register shift = i.TempRegister32(1);
// Take shift value modulo 8.
__ And(shift, i.InputRegister32(1), 7);
__ Dup(tmp.V16B(), shift);
__ Neg(tmp.V16B(), tmp.V16B());
__ Ushl(i.OutputSimd128Register().V16B(),
i.InputSimd128Register(0).V16B(), tmp.V16B());
......
......@@ -153,7 +153,7 @@ void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode,
Node* node) {
Arm64OperandGenerator g(selector);
InstructionOperand temps[] = {g.TempSimd128Register()};
InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)), arraysize(temps), temps);
......
......@@ -2212,28 +2212,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kSSEI32x4Shl: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
__ movd(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ pslld(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI32x4Shl: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0);
__ movd(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ vpslld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
case kSSEI32x4ShrS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
__ movd(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ psrad(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI32x4ShrS: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0);
__ movd(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ vpsrad(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
......@@ -2430,14 +2442,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kSSEI32x4ShrU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
__ movd(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ psrld(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI32x4ShrU: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0);
__ movd(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ vpsrld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
......@@ -2553,28 +2571,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kSSEI16x8Shl: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
__ movd(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ and_(shift, 15);
__ movd(tmp, shift);
__ psllw(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI16x8Shl: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0);
__ movd(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ and_(shift, 15);
__ movd(tmp, shift);
__ vpsllw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
case kSSEI16x8ShrS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
__ movd(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ and_(shift, 15);
__ movd(tmp, shift);
__ psraw(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI16x8ShrS: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0);
__ movd(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ and_(shift, 15);
__ movd(tmp, shift);
__ vpsraw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
......@@ -2745,14 +2775,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kSSEI16x8ShrU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
__ movd(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ and_(shift, 15);
__ movd(tmp, shift);
__ psrlw(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI16x8ShrU: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0);
__ movd(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ and_(shift, 15);
__ movd(tmp, shift);
__ vpsrlw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
......@@ -2919,6 +2955,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Register shift = i.InputRegister(1);
Register tmp = i.ToRegister(instr->TempAt(0));
XMMRegister tmp_simd = i.TempSimd128Register(1);
// Take shift value modulo 8.
__ and_(shift, 7);
// Mask off the unwanted bits before word-shifting.
__ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
__ mov(tmp, shift);
......@@ -2938,6 +2976,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Register shift = i.InputRegister(1);
Register tmp = i.ToRegister(instr->TempAt(0));
XMMRegister tmp_simd = i.TempSimd128Register(1);
// Take shift value modulo 8.
__ and_(shift, 7);
// Mask off the unwanted bits before word-shifting.
__ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ mov(tmp, shift);
......@@ -2959,6 +2999,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ punpckhbw(kScratchDoubleReg, dst);
__ punpcklbw(dst, dst);
__ mov(tmp, i.InputRegister(1));
// Take shift value modulo 8.
__ and_(tmp, 7);
__ add(tmp, Immediate(8));
__ movd(tmp_simd, tmp);
__ psraw(kScratchDoubleReg, tmp_simd);
......@@ -3223,6 +3265,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ punpckhbw(kScratchDoubleReg, dst);
__ punpcklbw(dst, dst);
__ mov(tmp, i.InputRegister(1));
// Take shift value modulo 8.
__ and_(tmp, 7);
__ add(tmp, Immediate(8));
__ movd(tmp_simd, tmp);
__ psrlw(kScratchDoubleReg, tmp_simd);
......
......@@ -2584,7 +2584,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I64x2Shl: {
XMMRegister tmp = i.TempSimd128Register(0);
__ movq(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 8.
__ andq(shift, Immediate(63));
__ movq(tmp, shift);
__ psllq(i.OutputSimd128Register(), tmp);
break;
}
......@@ -2595,6 +2598,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
Register tmp = i.ToRegister(instr->TempAt(0));
// Modulo 64 not required as sarq_cl will mask cl to 6 bits.
// lower quadword
__ pextrq(tmp, src, 0x0);
......@@ -2739,7 +2743,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I64x2ShrU: {
XMMRegister tmp = i.TempSimd128Register(0);
__ movq(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 64.
__ andq(shift, Immediate(63));
__ movq(tmp, shift);
__ psrlq(i.OutputSimd128Register(), tmp);
break;
}
......@@ -2895,13 +2902,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I32x4Shl: {
XMMRegister tmp = i.TempSimd128Register(0);
__ movq(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ andq(shift, Immediate(31));
__ movq(tmp, shift);
__ pslld(i.OutputSimd128Register(), tmp);
break;
}
case kX64I32x4ShrS: {
XMMRegister tmp = i.TempSimd128Register(0);
__ movq(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ andq(shift, Immediate(31));
__ movq(tmp, shift);
__ psrad(i.OutputSimd128Register(), tmp);
break;
}
......@@ -2999,7 +3012,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I32x4ShrU: {
XMMRegister tmp = i.TempSimd128Register(0);
__ movq(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ andq(shift, Immediate(31));
__ movq(tmp, shift);
__ psrld(i.OutputSimd128Register(), tmp);
break;
}
......@@ -3092,13 +3108,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I16x8Shl: {
XMMRegister tmp = i.TempSimd128Register(0);
__ movq(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ andq(shift, Immediate(15));
__ movq(tmp, shift);
__ psllw(i.OutputSimd128Register(), tmp);
break;
}
case kX64I16x8ShrS: {
XMMRegister tmp = i.TempSimd128Register(0);
__ movq(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ andq(shift, Immediate(15));
__ movq(tmp, shift);
__ psraw(i.OutputSimd128Register(), tmp);
break;
}
......@@ -3180,7 +3202,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I16x8ShrU: {
XMMRegister tmp = i.TempSimd128Register(0);
__ movq(tmp, i.InputRegister(1));
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ andq(shift, Immediate(15));
__ movq(tmp, shift);
__ psrlw(i.OutputSimd128Register(), tmp);
break;
}
......@@ -3286,15 +3311,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Temp registers for shift mask andadditional moves to XMM registers.
Register tmp = i.ToRegister(instr->TempAt(0));
XMMRegister tmp_simd = i.TempSimd128Register(1);
Register shift = i.InputRegister(1);
// Mask off the unwanted bits before word-shifting.
__ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
__ movq(tmp, i.InputRegister(1));
// Take shift value modulo 8.
__ andq(shift, Immediate(7));
__ movq(tmp, shift);
__ addq(tmp, Immediate(8));
__ movq(tmp_simd, tmp);
__ psrlw(kScratchDoubleReg, tmp_simd);
__ packuswb(kScratchDoubleReg, kScratchDoubleReg);
__ pand(dst, kScratchDoubleReg);
__ movq(tmp_simd, i.InputRegister(1));
__ movq(tmp_simd, shift);
__ psllw(dst, tmp_simd);
break;
}
......@@ -3309,6 +3337,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ punpcklbw(dst, dst);
// Prepare shift value
__ movq(tmp, i.InputRegister(1));
// Take shift value modulo 8.
__ andq(tmp, Immediate(7));
__ addq(tmp, Immediate(8));
__ movq(tmp_simd, tmp);
__ psraw(kScratchDoubleReg, tmp_simd);
......@@ -3421,6 +3451,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ punpcklbw(dst, dst);
// Prepare shift value
__ movq(tmp, i.InputRegister(1));
// Take shift value modulo 8.
__ andq(tmp, Immediate(7));
__ addq(tmp, Immediate(8));
__ movq(tmp_simd, tmp);
__ psrlw(kScratchDoubleReg, tmp_simd);
......
......@@ -2450,19 +2450,26 @@ class ThreadImpl {
Push(WasmValue(Simd128(res))); \
return true; \
}
SHIFT_CASE(I64x2Shl, i64x2, int2, 2, static_cast<uint64_t>(a) << shift)
SHIFT_CASE(I64x2ShrS, i64x2, int2, 2, a >> shift)
SHIFT_CASE(I64x2ShrU, i64x2, int2, 2, static_cast<uint64_t>(a) >> shift)
SHIFT_CASE(I32x4Shl, i32x4, int4, 4, static_cast<uint32_t>(a) << shift)
SHIFT_CASE(I32x4ShrS, i32x4, int4, 4, a >> shift)
SHIFT_CASE(I32x4ShrU, i32x4, int4, 4, static_cast<uint32_t>(a) >> shift)
SHIFT_CASE(I16x8Shl, i16x8, int8, 8, static_cast<uint16_t>(a) << shift)
SHIFT_CASE(I16x8ShrS, i16x8, int8, 8, a >> shift)
SHIFT_CASE(I16x8ShrU, i16x8, int8, 8, static_cast<uint16_t>(a) >> shift)
SHIFT_CASE(I8x16Shl, i8x16, int16, 16, static_cast<uint8_t>(a) << shift)
SHIFT_CASE(I8x16ShrS, i8x16, int16, 16, a >> shift)
SHIFT_CASE(I64x2Shl, i64x2, int2, 2,
static_cast<uint64_t>(a) << (shift % 64))
SHIFT_CASE(I64x2ShrS, i64x2, int2, 2, a >> (shift % 64))
SHIFT_CASE(I64x2ShrU, i64x2, int2, 2,
static_cast<uint64_t>(a) >> (shift % 64))
SHIFT_CASE(I32x4Shl, i32x4, int4, 4,
static_cast<uint32_t>(a) << (shift % 32))
SHIFT_CASE(I32x4ShrS, i32x4, int4, 4, a >> (shift % 32))
SHIFT_CASE(I32x4ShrU, i32x4, int4, 4,
static_cast<uint32_t>(a) >> (shift % 32))
SHIFT_CASE(I16x8Shl, i16x8, int8, 8,
static_cast<uint16_t>(a) << (shift % 16))
SHIFT_CASE(I16x8ShrS, i16x8, int8, 8, a >> (shift % 16))
SHIFT_CASE(I16x8ShrU, i16x8, int8, 8,
static_cast<uint16_t>(a) >> (shift % 16))
SHIFT_CASE(I8x16Shl, i8x16, int16, 16,
static_cast<uint8_t>(a) << (shift % 8))
SHIFT_CASE(I8x16ShrS, i8x16, int16, 16, a >> (shift % 8))
SHIFT_CASE(I8x16ShrU, i8x16, int16, 16,
static_cast<uint8_t>(a) >> shift)
static_cast<uint8_t>(a) >> (shift % 8))
#undef SHIFT_CASE
#define CONVERT_CASE(op, src_type, name, dst_type, count, start_index, ctype, \
expr) \
......
......@@ -184,13 +184,20 @@ T UnsignedGreaterEqual(T a, T b) {
template <typename T>
T LogicalShiftLeft(T a, int shift) {
using UnsignedT = typename std::make_unsigned<T>::type;
return static_cast<UnsignedT>(a) << shift;
return static_cast<UnsignedT>(a) << (shift % (sizeof(T) * 8));
}
template <typename T>
T LogicalShiftRight(T a, int shift) {
using UnsignedT = typename std::make_unsigned<T>::type;
return static_cast<UnsignedT>(a) >> shift;
return static_cast<UnsignedT>(a) >> (shift % (sizeof(T) * 8));
}
// Define our own ArithmeticShiftRight instead of using the one from utils.h
// because the shift amount needs to be taken modulo lane width.
template <typename T>
T ArithmeticShiftRight(T a, int shift) {
return a >> (shift % (sizeof(T) * 8));
}
template <typename T>
......@@ -809,7 +816,8 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Neg) {
void RunI64x2ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int64ShiftOp expected_op) {
for (int shift = 1; shift < 64; shift++) {
// Intentionally shift by 64, should be no-op.
for (int shift = 1; shift <= 64; shift++) {
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
int64_t* g = r.builder().AddGlobal<int64_t>(kWasmS128);
byte value = 0;
......@@ -1664,7 +1672,8 @@ WASM_SIMD_TEST(I32x4GeU) {
void RunI32x4ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int32ShiftOp expected_op) {
for (int shift = 1; shift < 32; shift++) {
// Intentionally shift by 32, should be no-op.
for (int shift = 1; shift <= 32; shift++) {
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
int32_t* g = r.builder().AddGlobal<int32_t>(kWasmS128);
byte value = 0;
......@@ -1914,7 +1923,8 @@ WASM_SIMD_TEST(I16x8LeU) {
void RunI16x8ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int16ShiftOp expected_op) {
for (int shift = 1; shift < 16; shift++) {
// Intentionally shift by 16, should be no-op.
for (int shift = 1; shift <= 16; shift++) {
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
int16_t* g = r.builder().AddGlobal<int16_t>(kWasmS128);
byte value = 0;
......@@ -1929,7 +1939,7 @@ void RunI16x8ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
FOR_INT16_INPUTS(x) {
r.Call(x);
float expected = expected_op(x, shift);
int16_t expected = expected_op(x, shift);
for (int i = 0; i < 8; i++) {
CHECK_EQ(expected, ReadLittleEndianValue<int16_t>(&g[i]));
}
......@@ -2130,7 +2140,8 @@ WASM_SIMD_TEST(I8x16Mul) {
void RunI8x16ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int8ShiftOp expected_op) {
for (int shift = 1; shift < 8; shift++) {
// Intentionally shift by 8, should be no-op.
for (int shift = 1; shift <= 8; shift++) {
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
int8_t* g = r.builder().AddGlobal<int8_t>(kWasmS128);
byte value = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment