Commit 4648b83c authored by Zhou, Zhiguo's avatar Zhou, Zhiguo Committed by Commit Bot

[wasm-simd] Implement load extend with 4 and 8 lanes on IA32

This CL implements 4 of the 6 load extend operations. The added
opcodes include: I16x8Load8x8S, I16x8Load8x8U, I32x4Load16x4S,
I32x4Load16x4U.

Bug: v8:9886
Change-Id: I9961f97325168e3a0036e1b282b769cc65b06ffb
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1981329
Commit-Queue: Zhiguo Zhou <zhiguo.zhou@intel.com>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65743}
parent 8d511cbd
......@@ -3734,6 +3734,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ mov(esp, tmp);
break;
}
case kIA32I16x8Load8x8S: {
__ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kIA32I16x8Load8x8U: {
__ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kIA32I32x4Load16x4S: {
__ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kIA32I32x4Load16x4U: {
__ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kIA32S32x4Swizzle: {
DCHECK_EQ(2, instr->InputCount());
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
......
......@@ -347,6 +347,10 @@ namespace compiler {
V(AVXS128Select) \
V(IA32S8x16Swizzle) \
V(IA32S8x16Shuffle) \
V(IA32I16x8Load8x8S) \
V(IA32I16x8Load8x8U) \
V(IA32I32x4Load16x4S) \
V(IA32I32x4Load16x4U) \
V(IA32S32x4Swizzle) \
V(IA32S32x4Shuffle) \
V(IA32S16x8Blend) \
......
......@@ -388,7 +388,11 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32Movss:
case kIA32Movsd:
case kIA32Movdqu:
// Moves are used for memory load/store operations.
// Moves are used for memory load/store operations.
case kIA32I16x8Load8x8S:
case kIA32I16x8Load8x8U:
case kIA32I32x4Load16x4S:
case kIA32I32x4Load16x4U:
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
case kIA32Peek:
......
......@@ -336,6 +336,62 @@ void InstructionSelector::VisitAbortCSAAssert(Node* node) {
Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), edx));
}
void InstructionSelector::VisitLoadTransform(Node* node) {
LoadTransformParameters params = LoadTransformParametersOf(node->op());
InstructionCode opcode = kArchNop;
switch (params.transformation) {
case LoadTransformation::kS8x16LoadSplat:
// TODO(zhiguo.zhou@intel.com): Implement the rest of load splat and load
// extend operations.
UNIMPLEMENTED();
break;
case LoadTransformation::kS16x8LoadSplat:
UNIMPLEMENTED();
break;
case LoadTransformation::kS32x4LoadSplat:
UNIMPLEMENTED();
break;
case LoadTransformation::kS64x2LoadSplat:
UNIMPLEMENTED();
break;
case LoadTransformation::kI16x8Load8x8S:
opcode = kIA32I16x8Load8x8S;
break;
case LoadTransformation::kI16x8Load8x8U:
opcode = kIA32I16x8Load8x8U;
break;
case LoadTransformation::kI32x4Load16x4S:
opcode = kIA32I32x4Load16x4S;
break;
case LoadTransformation::kI32x4Load16x4U:
opcode = kIA32I32x4Load16x4U;
break;
case LoadTransformation::kI64x2Load32x2S:
UNIMPLEMENTED();
break;
case LoadTransformation::kI64x2Load32x2U:
UNIMPLEMENTED();
break;
default:
UNREACHABLE();
}
// IA32 supports unaligned loads.
DCHECK_NE(params.kind, LoadKind::kUnaligned);
// Trap handler is not supported on IA32.
DCHECK_NE(params.kind, LoadKind::kProtected);
IA32OperandGenerator g(this);
InstructionOperand outputs[1];
outputs[0] = g.DefineAsRegister(node);
InstructionOperand inputs[3];
size_t input_count = 0;
AddressingMode mode =
g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
InstructionCode code = opcode | AddressingModeField::encode(mode);
Emit(code, 1, outputs, input_count, inputs);
}
void InstructionSelector::VisitLoad(Node* node) {
LoadRepresentation load_rep = LoadRepresentationOf(node->op());
......
......@@ -2632,9 +2632,6 @@ void InstructionSelector::VisitF64x2UConvertI64x2(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitS128AndNot(Node* node) { UNIMPLEMENTED(); }
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_IA32
......
......@@ -3310,7 +3310,9 @@ WASM_SIMD_TEST(SimdLoadStoreLoadMemargOffset) {
}
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_IA32
// TODO(zhiguo.zhou@intel.com): Add the tests on IA32 once these operations are
// implemented.
template <typename T>
void RunLoadSplatTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode op) {
......@@ -3347,6 +3349,7 @@ WASM_SIMD_TEST_NO_LOWERING(S32x4LoadSplat) {
WASM_SIMD_TEST_NO_LOWERING(S64x2LoadSplat) {
RunLoadSplatTest<int64_t>(execution_tier, lower_simd, kExprS64x2LoadSplat);
}
#endif // !V8_TARGET_ARCH_IA32
template <typename S, typename T>
void RunLoadExtendTest(ExecutionTier execution_tier, LowerSimd lower_simd,
......@@ -3391,6 +3394,7 @@ WASM_SIMD_TEST_NO_LOWERING(I32x4Load16x4S) {
kExprI32x4Load16x4S);
}
#if !V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2U) {
RunLoadExtendTest<uint32_t, uint64_t>(execution_tier, lower_simd,
kExprI64x2Load32x2U);
......@@ -3400,7 +3404,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2S) {
RunLoadExtendTest<int32_t, int64_t>(execution_tier, lower_simd,
kExprI64x2Load32x2S);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
#endif // !V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
V8_TARGET_ARCH_ARM
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment