Commit 8b5480b2 authored by Zhou, Zhiguo's avatar Zhou, Zhiguo Committed by Commit Bot

[wasm-simd] Implement the rest load_extend and load_splat on IA32

This CL implements load_extend with 2 lanes and all load_splat
operations on IA32. The necessary assemblers together with their
corresponding disassemblers and tests are also added in this CL.
The newly added opcodes include: S8x16LoadSplat, S16x8LoadSplat,
S32x4LoadSplat, S64x2LoadSplat, I64x2Load32x2S, I64x2Load32x2U.

Bug: v8:9886
Change-Id: I0a5dae0a683985c14c433ba9d85acbd1cee6705f
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1982989Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Commit-Queue: Zhiguo Zhou <zhiguo.zhou@intel.com>
Cr-Commit-Position: refs/heads/master@{#65937}
parent 164a0313
...@@ -2312,6 +2312,15 @@ void Assembler::movups(Operand dst, XMMRegister src) { ...@@ -2312,6 +2312,15 @@ void Assembler::movups(Operand dst, XMMRegister src) {
emit_sse_operand(src, dst); emit_sse_operand(src, dst);
} }
void Assembler::movddup(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(SSE3));
EnsureSpace ensure_space(this);
EMIT(0xF2);
EMIT(0x0F);
EMIT(0x12);
emit_sse_operand(dst, src);
}
void Assembler::shufps(XMMRegister dst, XMMRegister src, byte imm8) { void Assembler::shufps(XMMRegister dst, XMMRegister src, byte imm8) {
DCHECK(is_uint8(imm8)); DCHECK(is_uint8(imm8));
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
......
...@@ -1026,6 +1026,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1026,6 +1026,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
pextrb(Operand(dst), src, offset); pextrb(Operand(dst), src, offset);
} }
void pextrb(Operand dst, XMMRegister src, uint8_t offset); void pextrb(Operand dst, XMMRegister src, uint8_t offset);
// SSE3 instructions
void movddup(XMMRegister dst, Operand src);
void movddup(XMMRegister dst, XMMRegister src) { movddup(dst, Operand(src)); }
// Use SSE4_1 encoding for pextrw reg, xmm, imm8 for consistency // Use SSE4_1 encoding for pextrw reg, xmm, imm8 for consistency
void pextrw(Register dst, XMMRegister src, uint8_t offset) { void pextrw(Register dst, XMMRegister src, uint8_t offset) {
pextrw(Operand(dst), src, offset); pextrw(Operand(dst), src, offset);
...@@ -1411,6 +1415,15 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1411,6 +1415,15 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
vinstr(0x5B, dst, xmm0, src, kF3, k0F, kWIG); vinstr(0x5B, dst, xmm0, src, kF3, k0F, kWIG);
} }
void vmovddup(XMMRegister dst, Operand src) {
vinstr(0x12, dst, xmm0, src, kF2, k0F, kWIG);
}
void vmovddup(XMMRegister dst, XMMRegister src) {
vmovddup(dst, Operand(src));
}
void vbroadcastss(XMMRegister dst, Operand src) {
vinstr(0x18, dst, xmm0, src, k66, k0F38, kW0);
}
void vmovdqu(XMMRegister dst, Operand src) { void vmovdqu(XMMRegister dst, Operand src) {
vinstr(0x6F, dst, xmm0, src, kF3, k0F, kWIG); vinstr(0x6F, dst, xmm0, src, kF3, k0F, kWIG);
} }
......
...@@ -1541,6 +1541,20 @@ void TurboAssembler::Pextrd(Register dst, XMMRegister src, uint8_t imm8) { ...@@ -1541,6 +1541,20 @@ void TurboAssembler::Pextrd(Register dst, XMMRegister src, uint8_t imm8) {
add(esp, Immediate(kDoubleSize)); add(esp, Immediate(kDoubleSize));
} }
void TurboAssembler::Pinsrb(XMMRegister dst, Operand src, int8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrb(dst, dst, src, imm8);
return;
}
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
pinsrb(dst, src, imm8);
return;
}
FATAL("no AVX or SSE4.1 support");
}
void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) { void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
...@@ -1571,6 +1585,27 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) { ...@@ -1571,6 +1585,27 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) {
add(esp, Immediate(kDoubleSize)); add(esp, Immediate(kDoubleSize));
} }
void TurboAssembler::Pinsrw(XMMRegister dst, Operand src, int8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrw(dst, dst, src, imm8);
return;
} else {
pinsrw(dst, src, imm8);
return;
}
}
void TurboAssembler::Vbroadcastss(XMMRegister dst, Operand src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vbroadcastss(dst, src);
return;
}
movss(dst, src);
shufps(dst, dst, static_cast<byte>(0));
}
void TurboAssembler::Lzcnt(Register dst, Operand src) { void TurboAssembler::Lzcnt(Register dst, Operand src) {
if (CpuFeatures::IsSupported(LZCNT)) { if (CpuFeatures::IsSupported(LZCNT)) {
CpuFeatureScope scope(this, LZCNT); CpuFeatureScope scope(this, LZCNT);
......
...@@ -294,6 +294,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -294,6 +294,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP3_XO(Punpcklbw, punpcklbw) AVX_OP3_XO(Punpcklbw, punpcklbw)
AVX_OP3_XO(Punpckhbw, punpckhbw) AVX_OP3_XO(Punpckhbw, punpckhbw)
AVX_OP3_XO(Punpckldq, punpckldq) AVX_OP3_XO(Punpckldq, punpckldq)
AVX_OP3_XO(Punpcklqdq, punpcklqdq)
AVX_OP3_XO(Pxor, pxor) AVX_OP3_XO(Pxor, pxor)
AVX_OP3_XO(Andps, andps) AVX_OP3_XO(Andps, andps)
AVX_OP3_XO(Andnps, andnps) AVX_OP3_XO(Andnps, andnps)
...@@ -362,6 +363,12 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -362,6 +363,12 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
} \ } \
UNREACHABLE(); \ UNREACHABLE(); \
} }
#define AVX_OP2_XO_SSE3(macro_name, name) \
AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, XMMRegister, SSE3) \
AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, Operand, SSE3)
AVX_OP2_XO_SSE3(Movddup, movddup)
#undef AVX_OP2_XO_SSE3
#define AVX_OP2_XO_SSE4(macro_name, name) \ #define AVX_OP2_XO_SSE4(macro_name, name) \
AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, XMMRegister, SSE4_1) \ AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, XMMRegister, SSE4_1) \
AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, Operand, SSE4_1) AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, Operand, SSE4_1)
...@@ -369,8 +376,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -369,8 +376,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP2_XO_SSE4(Ptest, ptest) AVX_OP2_XO_SSE4(Ptest, ptest)
AVX_OP2_XO_SSE4(Pmovsxbw, pmovsxbw) AVX_OP2_XO_SSE4(Pmovsxbw, pmovsxbw)
AVX_OP2_XO_SSE4(Pmovsxwd, pmovsxwd) AVX_OP2_XO_SSE4(Pmovsxwd, pmovsxwd)
AVX_OP2_XO_SSE4(Pmovsxdq, pmovsxdq)
AVX_OP2_XO_SSE4(Pmovzxbw, pmovzxbw) AVX_OP2_XO_SSE4(Pmovzxbw, pmovzxbw)
AVX_OP2_XO_SSE4(Pmovzxwd, pmovzxwd) AVX_OP2_XO_SSE4(Pmovzxwd, pmovzxwd)
AVX_OP2_XO_SSE4(Pmovzxdq, pmovzxdq)
#undef AVX_OP2_WITH_TYPE_SCOPE #undef AVX_OP2_WITH_TYPE_SCOPE
#undef AVX_OP2_XO_SSE4 #undef AVX_OP2_XO_SSE4
...@@ -397,10 +406,19 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -397,10 +406,19 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Pextrb(Register dst, XMMRegister src, uint8_t imm8); void Pextrb(Register dst, XMMRegister src, uint8_t imm8);
void Pextrw(Register dst, XMMRegister src, uint8_t imm8); void Pextrw(Register dst, XMMRegister src, uint8_t imm8);
void Pextrd(Register dst, XMMRegister src, uint8_t imm8); void Pextrd(Register dst, XMMRegister src, uint8_t imm8);
void Pinsrb(XMMRegister dst, Register src, int8_t imm8) {
Pinsrb(dst, Operand(src), imm8);
}
void Pinsrb(XMMRegister dst, Operand src, int8_t imm8);
void Pinsrd(XMMRegister dst, Register src, uint8_t imm8) { void Pinsrd(XMMRegister dst, Register src, uint8_t imm8) {
Pinsrd(dst, Operand(src), imm8); Pinsrd(dst, Operand(src), imm8);
} }
void Pinsrd(XMMRegister dst, Operand src, uint8_t imm8); void Pinsrd(XMMRegister dst, Operand src, uint8_t imm8);
void Pinsrw(XMMRegister dst, Register src, int8_t imm8) {
Pinsrw(dst, Operand(src), imm8);
}
void Pinsrw(XMMRegister dst, Operand src, int8_t imm8);
void Vbroadcastss(XMMRegister dst, Operand src);
// Expression support // Expression support
// cvtsi2sd instruction only writes to the low 64-bit of dst register, which // cvtsi2sd instruction only writes to the low 64-bit of dst register, which
......
...@@ -82,8 +82,10 @@ ...@@ -82,8 +82,10 @@
#define SSE4_RM_INSTRUCTION_LIST(V) \ #define SSE4_RM_INSTRUCTION_LIST(V) \
V(pmovsxbw, 66, 0F, 38, 20) \ V(pmovsxbw, 66, 0F, 38, 20) \
V(pmovsxwd, 66, 0F, 38, 23) \ V(pmovsxwd, 66, 0F, 38, 23) \
V(pmovsxdq, 66, 0F, 38, 25) \
V(pmovzxbw, 66, 0F, 38, 30) \ V(pmovzxbw, 66, 0F, 38, 30) \
V(pmovzxwd, 66, 0F, 38, 33) \ V(pmovzxwd, 66, 0F, 38, 33) \
V(pmovzxdq, 66, 0F, 38, 35) \
V(ptest, 66, 0F, 38, 17) V(ptest, 66, 0F, 38, 17)
#endif // V8_CODEGEN_IA32_SSE_INSTR_H_ #endif // V8_CODEGEN_IA32_SSE_INSTR_H_
...@@ -3745,6 +3745,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3745,6 +3745,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ mov(esp, tmp); __ mov(esp, tmp);
break; break;
} }
case kIA32S8x16LoadSplat: {
__ Pinsrb(i.OutputSimd128Register(), i.MemoryOperand(), 0);
__ Pxor(kScratchDoubleReg, kScratchDoubleReg);
__ Pshufb(i.OutputSimd128Register(), kScratchDoubleReg);
break;
}
case kIA32S16x8LoadSplat: {
__ Pinsrw(i.OutputSimd128Register(), i.MemoryOperand(), 0);
__ Pshuflw(i.OutputSimd128Register(), i.OutputSimd128Register(),
static_cast<uint8_t>(0));
__ Punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register());
break;
}
case kIA32S32x4LoadSplat: {
__ Vbroadcastss(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kIA32S64x2LoadSplat: {
__ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kIA32I16x8Load8x8S: { case kIA32I16x8Load8x8S: {
__ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand()); __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
break; break;
...@@ -3761,6 +3782,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3761,6 +3782,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand()); __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
break; break;
} }
case kIA32I64x2Load32x2S: {
__ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kIA32I64x2Load32x2U: {
__ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kIA32S32x4Swizzle: { case kIA32S32x4Swizzle: {
DCHECK_EQ(2, instr->InputCount()); DCHECK_EQ(2, instr->InputCount());
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1)); __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
......
...@@ -348,10 +348,16 @@ namespace compiler { ...@@ -348,10 +348,16 @@ namespace compiler {
V(IA32S128AndNot) \ V(IA32S128AndNot) \
V(IA32S8x16Swizzle) \ V(IA32S8x16Swizzle) \
V(IA32S8x16Shuffle) \ V(IA32S8x16Shuffle) \
V(IA32S8x16LoadSplat) \
V(IA32S16x8LoadSplat) \
V(IA32S32x4LoadSplat) \
V(IA32S64x2LoadSplat) \
V(IA32I16x8Load8x8S) \ V(IA32I16x8Load8x8S) \
V(IA32I16x8Load8x8U) \ V(IA32I16x8Load8x8U) \
V(IA32I32x4Load16x4S) \ V(IA32I32x4Load16x4S) \
V(IA32I32x4Load16x4U) \ V(IA32I32x4Load16x4U) \
V(IA32I64x2Load32x2S) \
V(IA32I64x2Load32x2U) \
V(IA32S32x4Swizzle) \ V(IA32S32x4Swizzle) \
V(IA32S32x4Shuffle) \ V(IA32S32x4Shuffle) \
V(IA32S16x8Blend) \ V(IA32S16x8Blend) \
......
...@@ -390,10 +390,16 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -390,10 +390,16 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32Movsd: case kIA32Movsd:
case kIA32Movdqu: case kIA32Movdqu:
// Moves are used for memory load/store operations. // Moves are used for memory load/store operations.
case kIA32S8x16LoadSplat:
case kIA32S16x8LoadSplat:
case kIA32S32x4LoadSplat:
case kIA32S64x2LoadSplat:
case kIA32I16x8Load8x8S: case kIA32I16x8Load8x8S:
case kIA32I16x8Load8x8U: case kIA32I16x8Load8x8U:
case kIA32I32x4Load16x4S: case kIA32I32x4Load16x4S:
case kIA32I32x4Load16x4U: case kIA32I32x4Load16x4U:
case kIA32I64x2Load32x2S:
case kIA32I64x2Load32x2U:
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect; return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
case kIA32Peek: case kIA32Peek:
......
...@@ -350,18 +350,16 @@ void InstructionSelector::VisitLoadTransform(Node* node) { ...@@ -350,18 +350,16 @@ void InstructionSelector::VisitLoadTransform(Node* node) {
InstructionCode opcode = kArchNop; InstructionCode opcode = kArchNop;
switch (params.transformation) { switch (params.transformation) {
case LoadTransformation::kS8x16LoadSplat: case LoadTransformation::kS8x16LoadSplat:
// TODO(zhiguo.zhou@intel.com): Implement the rest of load splat and load opcode = kIA32S8x16LoadSplat;
// extend operations.
UNIMPLEMENTED();
break; break;
case LoadTransformation::kS16x8LoadSplat: case LoadTransformation::kS16x8LoadSplat:
UNIMPLEMENTED(); opcode = kIA32S16x8LoadSplat;
break; break;
case LoadTransformation::kS32x4LoadSplat: case LoadTransformation::kS32x4LoadSplat:
UNIMPLEMENTED(); opcode = kIA32S32x4LoadSplat;
break; break;
case LoadTransformation::kS64x2LoadSplat: case LoadTransformation::kS64x2LoadSplat:
UNIMPLEMENTED(); opcode = kIA32S64x2LoadSplat;
break; break;
case LoadTransformation::kI16x8Load8x8S: case LoadTransformation::kI16x8Load8x8S:
opcode = kIA32I16x8Load8x8S; opcode = kIA32I16x8Load8x8S;
...@@ -376,10 +374,10 @@ void InstructionSelector::VisitLoadTransform(Node* node) { ...@@ -376,10 +374,10 @@ void InstructionSelector::VisitLoadTransform(Node* node) {
opcode = kIA32I32x4Load16x4U; opcode = kIA32I32x4Load16x4U;
break; break;
case LoadTransformation::kI64x2Load32x2S: case LoadTransformation::kI64x2Load32x2S:
UNIMPLEMENTED(); opcode = kIA32I64x2Load32x2S;
break; break;
case LoadTransformation::kI64x2Load32x2U: case LoadTransformation::kI64x2Load32x2U:
UNIMPLEMENTED(); opcode = kIA32I64x2Load32x2U;
break; break;
default: default:
UNREACHABLE(); UNREACHABLE();
......
...@@ -692,6 +692,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) { ...@@ -692,6 +692,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg(); int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm); get_modrm(*current, &mod, &regop, &rm);
switch (opcode) { switch (opcode) {
case 0x18:
AppendToBuffer("vbroadcastss %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x99: case 0x99:
AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(), AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv)); NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
...@@ -846,6 +850,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) { ...@@ -846,6 +850,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg(); int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm); get_modrm(*current, &mod, &regop, &rm);
switch (opcode) { switch (opcode) {
case 0x12:
AppendToBuffer("vmovddup %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x51: case 0x51:
AppendToBuffer("vsqrtsd %s,%s,", NameOfXMMRegister(regop), AppendToBuffer("vsqrtsd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv)); NameOfXMMRegister(vvvv));
...@@ -2430,6 +2438,12 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer, ...@@ -2430,6 +2438,12 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
get_modrm(*data, &mod, &regop, &rm); get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("movsd %s,", NameOfXMMRegister(regop)); AppendToBuffer("movsd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data); data += PrintRightXMMOperand(data);
} else if (b2 == 0x12) {
data += 3;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("movddup %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (b2 == 0x5A) { } else if (b2 == 0x5A) {
data += 3; data += 3;
int mod, regop, rm; int mod, regop, rm;
......
...@@ -579,6 +579,8 @@ TEST(DisasmIa320) { ...@@ -579,6 +579,8 @@ TEST(DisasmIa320) {
CpuFeatureScope scope(&assm, SSE3); CpuFeatureScope scope(&assm, SSE3);
__ haddps(xmm1, xmm0); __ haddps(xmm1, xmm0);
__ haddps(xmm1, Operand(ebx, ecx, times_4, 10000)); __ haddps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ movddup(xmm1, Operand(eax, 5));
__ movddup(xmm1, xmm2);
} }
} }
...@@ -770,6 +772,9 @@ TEST(DisasmIa320) { ...@@ -770,6 +772,9 @@ TEST(DisasmIa320) {
__ vcvttps2dq(xmm1, xmm0); __ vcvttps2dq(xmm1, xmm0);
__ vcvttps2dq(xmm1, Operand(ebx, ecx, times_4, 10000)); __ vcvttps2dq(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmovddup(xmm1, xmm2);
__ vmovddup(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vbroadcastss(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmovdqu(xmm0, Operand(ebx, ecx, times_4, 10000)); __ vmovdqu(xmm0, Operand(ebx, ecx, times_4, 10000));
__ vmovdqu(Operand(ebx, ecx, times_4, 10000), xmm0); __ vmovdqu(Operand(ebx, ecx, times_4, 10000), xmm0);
__ vmovd(xmm0, edi); __ vmovd(xmm0, edi);
......
...@@ -3304,9 +3304,6 @@ WASM_SIMD_TEST(SimdLoadStoreLoadMemargOffset) { ...@@ -3304,9 +3304,6 @@ WASM_SIMD_TEST(SimdLoadStoreLoadMemargOffset) {
} }
} }
#if !V8_TARGET_ARCH_IA32
// TODO(zhiguo.zhou@intel.com): Add the tests on IA32 once these operations are
// implemented.
template <typename T> template <typename T>
void RunLoadSplatTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunLoadSplatTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode op) { WasmOpcode op) {
...@@ -3343,7 +3340,6 @@ WASM_SIMD_TEST_NO_LOWERING(S32x4LoadSplat) { ...@@ -3343,7 +3340,6 @@ WASM_SIMD_TEST_NO_LOWERING(S32x4LoadSplat) {
WASM_SIMD_TEST_NO_LOWERING(S64x2LoadSplat) { WASM_SIMD_TEST_NO_LOWERING(S64x2LoadSplat) {
RunLoadSplatTest<int64_t>(execution_tier, lower_simd, kExprS64x2LoadSplat); RunLoadSplatTest<int64_t>(execution_tier, lower_simd, kExprS64x2LoadSplat);
} }
#endif // !V8_TARGET_ARCH_IA32
template <typename S, typename T> template <typename S, typename T>
void RunLoadExtendTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunLoadExtendTest(ExecutionTier execution_tier, LowerSimd lower_simd,
...@@ -3388,7 +3384,6 @@ WASM_SIMD_TEST_NO_LOWERING(I32x4Load16x4S) { ...@@ -3388,7 +3384,6 @@ WASM_SIMD_TEST_NO_LOWERING(I32x4Load16x4S) {
kExprI32x4Load16x4S); kExprI32x4Load16x4S);
} }
#if !V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2U) { WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2U) {
RunLoadExtendTest<uint32_t, uint64_t>(execution_tier, lower_simd, RunLoadExtendTest<uint32_t, uint64_t>(execution_tier, lower_simd,
kExprI64x2Load32x2U); kExprI64x2Load32x2U);
...@@ -3398,7 +3393,6 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2S) { ...@@ -3398,7 +3393,6 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2S) {
RunLoadExtendTest<int32_t, int64_t>(execution_tier, lower_simd, RunLoadExtendTest<int32_t, int64_t>(execution_tier, lower_simd,
kExprI64x2Load32x2S); kExprI64x2Load32x2S);
} }
#endif // !V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \ #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
V8_TARGET_ARCH_ARM V8_TARGET_ARCH_ARM
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment