Commit a56d9349 authored by Kanghua Yu's avatar Kanghua Yu Committed by Commit Bot

[ia32][wasm] Add I8x16 ShiftOp and MulOp.

I8x16Shl/I8x16ShrS/I8x16ShrU,I8x16Mul

R=bbudge@chromium.org, bmeurer@chromium.org

Bug: 
Change-Id: I97d7f077c26fe6f8be6464582f20d4e3c8fd4667
Reviewed-on: https://chromium-review.googlesource.com/853772
Commit-Queue: Benedikt Meurer <bmeurer@chromium.org>
Reviewed-by: 's avatarBenedikt Meurer <bmeurer@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50940}
parent f5933218
......@@ -2406,6 +2406,126 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
#define I8x16_SPLAT(reg, scratch, v) \
__ Move(reg, static_cast<uint32_t>(v)); \
__ Pxor(scratch, scratch); \
__ Pshufb(reg, scratch)
case kSSEI8x16Shl: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister src = i.InputSimd128Register(0);
int8_t shift = i.InputInt8(1) & 0x7;
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
// src = AAaa ... AAaa
// tmp = 0F0F ... 0F0F (shift=4)
I8x16_SPLAT(tmp, kScratchDoubleReg, 0xFFU >> shift);
// src = src & tmp
// => 0A0a ... 0A0a
__ pand(src, tmp);
// src = src << shift
// => A0a0 ... A0a0 (shift=4)
__ pslld(src, shift);
break;
}
case kAVXI8x16Shl: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
int8_t shift = i.InputInt8(1) & 0x7;
XMMRegister tmp =
dst != src ? dst : i.ToSimd128Register(instr->TempAt(0));
// src = AAaa ... AAaa
// tmp = 0F0F ... 0F0F (shift=4)
I8x16_SPLAT(tmp, kScratchDoubleReg, 0xFFU >> shift);
// dst = src & tmp
// => 0A0a ... 0A0a
__ vpand(dst, src, tmp);
// dst = dst << shift
// => A0a0 ... A0a0 (shift=4)
__ vpslld(dst, dst, shift);
break;
}
case kSSEI8x16ShrS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister src = i.InputSimd128Register(0);
int8_t shift = i.InputInt8(1) & 0x7;
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
// I16x8 view of I8x16
// src = AAaa AAaa ... AAaa AAaa
// tmp = aa00 aa00 ... aa00 aa00
__ movaps(tmp, src);
__ Move(kScratchDoubleReg, static_cast<uint32_t>(0xff00));
__ psllw(tmp, 8);
// src = I16x8ShrS(src, shift)
// => SAAa SAAa ... SAAa SAAa (shift=4)
__ pshuflw(kScratchDoubleReg, kScratchDoubleReg, 0x0);
__ psraw(src, shift);
// tmp = I16x8ShrS(tmp, shift)
// => Saa0 Saa0 ... Saa0 Saa0 (shift=4)
__ pshufd(kScratchDoubleReg, kScratchDoubleReg, 0x0);
__ psraw(tmp, shift);
// src = I16x8And(src, 0xff00)
// => SA00 SA00 ... SA00 SA00
__ pand(src, kScratchDoubleReg);
// tmp = I16x8ShrU(tmp, 8)
// => 00Sa 00Sa ... 00Sa 00Sa (shift=4)
__ psrlw(tmp, 8);
// src = I16x8Or(src, tmp)
// => SASa SASa ... SASa SASa (shift=4)
__ por(src, tmp);
break;
}
case kAVXI8x16ShrS: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
int8_t shift = i.InputInt8(1) & 0x7;
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
// I16x8 view of I8x16
// src = AAaa AAaa ... AAaa AAaa
// tmp = aa00 aa00 ... aa00 aa00
__ Move(kScratchDoubleReg, static_cast<uint32_t>(0xff00));
__ vpsllw(tmp, src, 8);
// dst = I16x8ShrS(src, shift)
// => SAAa SAAa ... SAAa SAAa (shift=4)
__ vpshuflw(kScratchDoubleReg, kScratchDoubleReg, 0x0);
__ vpsraw(dst, src, shift);
// tmp = I16x8ShrS(tmp, shift)
// => Saa0 Saa0 ... Saa0 Saa0 (shift=4)
__ vpshufd(kScratchDoubleReg, kScratchDoubleReg, 0x0);
__ vpsraw(tmp, tmp, shift);
// dst = I16x8And(dst, 0xff00)
// => SA00 SA00 ... SA00 SA00
__ vpand(dst, dst, kScratchDoubleReg);
// tmp = I16x8ShrU(tmp, 8)
// => 00Sa 00Sa ... 00Sa 00Sa (shift=4)
__ vpsrlw(tmp, tmp, 8);
// dst = I16x8Or(dst, tmp)
// => SASa SASa ... SASa SASa (shift=4)
__ vpor(dst, dst, tmp);
break;
}
case kSSEI8x16Add: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ paddb(i.OutputSimd128Register(), i.InputOperand(1));
......@@ -2450,6 +2570,88 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1));
break;
}
case kSSEI8x16Mul: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister left = i.InputSimd128Register(0);
XMMRegister right = i.InputSimd128Register(1);
XMMRegister t0 = i.ToSimd128Register(instr->TempAt(0));
XMMRegister t1 = i.ToSimd128Register(instr->TempAt(1));
// I16x8 view of I8x16
// left = AAaa AAaa ... AAaa AAaa
// right= BBbb BBbb ... BBbb BBbb
// t0 = 00AA 00AA ... 00AA 00AA
// t1 = 00BB 00BB ... 00BB 00BB
__ movaps(t0, left);
__ movaps(t1, right);
__ Move(kScratchDoubleReg, static_cast<uint32_t>(0x00ff));
__ psrlw(t0, 8);
__ psrlw(t1, 8);
// left = I16x8Mul(left, right)
// => __pp __pp ... __pp __pp
// t0 = I16x8Mul(t0, t1)
// => __PP __PP ... __PP __PP
__ pshuflw(kScratchDoubleReg, kScratchDoubleReg, 0x0);
__ pmullw(t0, t1);
__ pmullw(left, right);
__ pshufd(kScratchDoubleReg, kScratchDoubleReg, 0x0);
// t0 = I16x8Shl(t0, 8)
// => PP00 PP00 ... PP00 PP00
__ psllw(t0, 8);
// left = I16x8And(left, 0x00ff)
// => 00pp 00pp ... 00pp 00pp
__ pand(left, kScratchDoubleReg);
// left = I16x8Or(left, t0)
// => PPpp PPpp ... PPpp PPpp
__ por(left, t0);
break;
}
case kAVXI8x16Mul: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister left = i.InputSimd128Register(0);
XMMRegister right = i.InputSimd128Register(1);
XMMRegister t0 = i.ToSimd128Register(instr->TempAt(0));
XMMRegister t1 = i.ToSimd128Register(instr->TempAt(1));
// I16x8 view of I8x16
// left = AAaa AAaa ... AAaa AAaa
// right= BBbb BBbb ... BBbb BBbb
// t0 = 00AA 00AA ... 00AA 00AA
// t1 = 00BB 00BB ... 00BB 00BB
__ Move(kScratchDoubleReg, static_cast<uint32_t>(0x00ff));
__ vpsrlw(t0, left, 8);
__ vpsrlw(t1, right, 8);
// dst = I16x8Mul(left, right)
// => __pp __pp ... __pp __pp
__ vpshuflw(kScratchDoubleReg, kScratchDoubleReg, 0x0);
__ vpmullw(dst, left, right);
// t0 = I16x8Mul(t0, t1)
// => __PP __PP ... __PP __PP
__ vpmullw(t0, t0, t1);
__ vpshufd(kScratchDoubleReg, kScratchDoubleReg, 0x0);
// t0 = I16x8Shl(t0, 8)
// => PP00 PP00 ... PP00 PP00
__ vpsllw(t0, t0, 8);
// dst = I16x8And(dst, 0x00ff)
// => 00pp 00pp ... 00pp 00pp
__ vpand(dst, dst, kScratchDoubleReg);
// dst = I16x8Or(dst, t0)
// => PPpp PPpp ... PPpp PPpp
__ vpor(dst, dst, t0);
break;
}
case kSSEI8x16MinS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
......@@ -2551,6 +2753,48 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1));
break;
}
case kSSEI8x16ShrU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister src = i.InputSimd128Register(0);
int8_t shift = i.InputInt8(1) & 0x7;
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
// src = AAaa ... AAaa
// tmp = F0F0 ... F0F0 (shift=4)
I8x16_SPLAT(tmp, kScratchDoubleReg, 0xFFU << shift); // needn't byte cast
// src = src & tmp
// => A0a0 ... A0a0
__ pand(src, tmp);
// src = src >> shift
// => 0A0a ... 0A0a (shift=4)
__ psrld(src, shift);
break;
}
case kAVXI8x16ShrU: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
int8_t shift = i.InputInt8(1) & 0x7;
XMMRegister tmp =
dst != src ? dst : i.ToSimd128Register(instr->TempAt(0));
// src = AAaa ... AAaa
// tmp = F0F0 ... F0F0 (shift=4)
I8x16_SPLAT(tmp, kScratchDoubleReg, 0xFFU << shift);
// src = src & tmp
// => A0a0 ... A0a0
__ vpand(dst, src, tmp);
// dst = dst >> shift
// => 0A0a ... 0A0a (shift=4)
__ vpsrld(dst, dst, shift);
break;
}
#undef I8x16_SPLAT
case kSSEI8x16MinU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ pminub(i.OutputSimd128Register(), i.InputOperand(1));
......
......@@ -229,6 +229,10 @@ namespace compiler {
V(IA32I8x16ExtractLane) \
V(SSEI8x16ReplaceLane) \
V(AVXI8x16ReplaceLane) \
V(SSEI8x16Shl) \
V(AVXI8x16Shl) \
V(SSEI8x16ShrS) \
V(AVXI8x16ShrS) \
V(IA32I8x16Neg) \
V(SSEI8x16Add) \
V(AVXI8x16Add) \
......@@ -238,6 +242,8 @@ namespace compiler {
V(AVXI8x16Sub) \
V(SSEI8x16SubSaturateS) \
V(AVXI8x16SubSaturateS) \
V(SSEI8x16Mul) \
V(AVXI8x16Mul) \
V(SSEI8x16MinS) \
V(AVXI8x16MinS) \
V(SSEI8x16MaxS) \
......@@ -254,6 +260,8 @@ namespace compiler {
V(AVXI8x16AddSaturateU) \
V(SSEI8x16SubSaturateU) \
V(AVXI8x16SubSaturateU) \
V(SSEI8x16ShrU) \
V(AVXI8x16ShrU) \
V(SSEI8x16MinU) \
V(AVXI8x16MinU) \
V(SSEI8x16MaxU) \
......
......@@ -212,6 +212,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kSSEI8x16ReplaceLane:
case kAVXI8x16ReplaceLane:
case kIA32I8x16Neg:
case kSSEI8x16Shl:
case kAVXI8x16Shl:
case kSSEI8x16ShrS:
case kAVXI8x16ShrS:
case kSSEI8x16Add:
case kAVXI8x16Add:
case kSSEI8x16AddSaturateS:
......@@ -220,6 +224,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI8x16Sub:
case kSSEI8x16SubSaturateS:
case kAVXI8x16SubSaturateS:
case kSSEI8x16Mul:
case kAVXI8x16Mul:
case kSSEI8x16MinS:
case kAVXI8x16MinS:
case kSSEI8x16MaxS:
......@@ -236,6 +242,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI8x16AddSaturateU:
case kSSEI8x16SubSaturateU:
case kAVXI8x16SubSaturateU:
case kSSEI8x16ShrU:
case kAVXI8x16ShrU:
case kSSEI8x16MinU:
case kAVXI8x16MinU:
case kSSEI8x16MaxU:
......
......@@ -208,6 +208,20 @@ void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
}
}
void VisitRRISimd(InstructionSelector* selector, Node* node,
ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
IA32OperandGenerator g(selector);
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseImmediate(OpParameter<int32_t>(node));
InstructionOperand temps[] = {g.TempSimd128Register()};
if (selector->IsSupported(AVX)) {
selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0, operand1,
arraysize(temps), temps);
} else {
selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0, operand1,
arraysize(temps), temps);
}
}
} // namespace
......@@ -1886,12 +1900,40 @@ void InstructionSelector::VisitF32x4ExtractLane(Node* node) {
}
}
#define SIMD_I8X16_SHIFT_OPCODES(V) \
V(I8x16Shl) \
V(I8x16ShrS) \
V(I8x16ShrU)
#define VISIT_SIMD_I8X16_SHIFT(Op) \
void InstructionSelector::Visit##Op(Node* node) { \
VisitRRISimd(this, node, kAVX##Op, kSSE##Op); \
}
SIMD_I8X16_SHIFT_OPCODES(VISIT_SIMD_I8X16_SHIFT)
#undef SIMD_I8X16_SHIFT_OPCODES
#undef VISIT_SIMD_I8X16_SHIFT
void InstructionSelector::VisitI8x16Mul(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseRegister(node->InputAt(1));
InstructionOperand temps[] = {g.TempSimd128Register(),
g.TempSimd128Register()};
if (IsSupported(AVX)) {
Emit(kAVXI8x16Mul, g.DefineAsRegister(node), operand0, operand1,
arraysize(temps), temps);
} else {
Emit(kSSEI8x16Mul, g.DefineSameAsFirst(node), operand0, operand1,
arraysize(temps), temps);
}
}
void InstructionSelector::VisitS128Zero(Node* node) {
IA32OperandGenerator g(this);
Emit(kIA32S128Zero, g.DefineAsRegister(node));
}
#define VISIT_SIMD_SPLAT(Type) \
void InstructionSelector::Visit##Type##Splat(Node* node) { \
VisitRO(this, node, kIA32##Type##Splat); \
......
......@@ -206,6 +206,15 @@ class OperandGenerator {
return op;
}
InstructionOperand TempSimd128Register() {
UnallocatedOperand op = UnallocatedOperand(
UnallocatedOperand::MUST_HAVE_REGISTER,
UnallocatedOperand::USED_AT_START, sequence()->NextVirtualRegister());
sequence()->MarkAsRepresentation(MachineRepresentation::kSimd128,
op.virtual_register());
return op;
}
InstructionOperand TempRegister(Register reg) {
return UnallocatedOperand(UnallocatedOperand::FIXED_REGISTER, reg.code(),
InstructionOperand::kInvalidVirtualRegister);
......
......@@ -2219,24 +2219,28 @@ void InstructionSelector::VisitI16x8UConvertI8x16High(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI8x16Shl(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16ShrS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16SConvertI16x8(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI8x16Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16ShrU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI8x16Shl(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16ShrS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16ShrU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16Mul(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
!V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitS128Select(Node* node) { UNIMPLEMENTED(); }
......
......@@ -1215,7 +1215,9 @@ void TurboAssembler::Move(XMMRegister dst, uint64_t src) {
push(eax);
Move(eax, Immediate(lower));
movd(dst, Operand(eax));
Move(eax, Immediate(upper));
if (upper != lower) {
Move(eax, Immediate(upper));
}
pinsrd(dst, Operand(eax), 1);
pop(eax);
} else {
......
......@@ -1464,10 +1464,10 @@ WASM_SIMD_TEST(I8x16LeU) {
}
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(I8x16Mul) { RunI8x16BinOpTest(lower_simd, kExprI8x16Mul, Mul); }
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
void RunI8x16ShiftOpTest(LowerSimd lower_simd, WasmOpcode simd_op,
Int8ShiftOp expected_op, int shift) {
......@@ -1484,7 +1484,7 @@ void RunI8x16ShiftOpTest(LowerSimd lower_simd, WasmOpcode simd_op,
}
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(I8x16Shl) {
RunI8x16ShiftOpTest(lower_simd, kExprI8x16Shl, LogicalShiftLeft, 1);
}
......@@ -1497,7 +1497,7 @@ WASM_SIMD_TEST(I8x16ShrU) {
RunI8x16ShiftOpTest(lower_simd, kExprI8x16ShrU, LogicalShiftRight, 1);
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 || \
V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment