Commit 603ade14 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Improve codegen for all_true and any_true

Based on feedback in https://github.com/WebAssembly/simd/issues/189 and
inspired by cranelift's codegen, we reduce instruction count by 1 for
both types of operations - all_true goes from 6 -> 5, any_true from 4 ->
3. The main transformation is to change a sequence of movq + ptest +
cmovq to ptest + setcc. We unfortunately cannot cut down the instruction
counts further, since we need to zero the destination register.

Change-Id: Idc2540dbec755c7a7ff5069955f74e978190161d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2100994Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66710}
parent b53bf811
...@@ -591,14 +591,12 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, ...@@ -591,14 +591,12 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
do { \ do { \
CpuFeatureScope sse_scope(tasm(), SSE4_1); \ CpuFeatureScope sse_scope(tasm(), SSE4_1); \
Register dst = i.OutputRegister(); \ Register dst = i.OutputRegister(); \
Register tmp1 = i.TempRegister(0); \ XMMRegister tmp = i.TempSimd128Register(0); \
XMMRegister tmp2 = i.TempSimd128Register(1); \
__ movq(tmp1, Immediate(1)); \
__ xorq(dst, dst); \ __ xorq(dst, dst); \
__ Pxor(tmp2, tmp2); \ __ Pxor(tmp, tmp); \
__ opcode(tmp2, i.InputSimd128Register(0)); \ __ opcode(tmp, i.InputSimd128Register(0)); \
__ Ptest(tmp2, tmp2); \ __ Ptest(tmp, tmp); \
__ cmovq(zero, dst, tmp1); \ __ setcc(equal, dst); \
} while (false) } while (false)
// This macro will directly emit the opcode if the shift is an immediate - the // This macro will directly emit the opcode if the shift is an immediate - the
...@@ -3922,11 +3920,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3922,11 +3920,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope sse_scope(tasm(), SSE4_1); CpuFeatureScope sse_scope(tasm(), SSE4_1);
Register dst = i.OutputRegister(); Register dst = i.OutputRegister();
XMMRegister src = i.InputSimd128Register(0); XMMRegister src = i.InputSimd128Register(0);
Register tmp = i.TempRegister(0);
__ xorq(tmp, tmp); __ xorq(dst, dst);
__ movq(dst, Immediate(1)); __ Ptest(src, src);
__ ptest(src, src); __ setcc(not_equal, dst);
__ cmovq(zero, dst, tmp);
break; break;
} }
// Need to split up all the different lane structures because the // Need to split up all the different lane structures because the
......
...@@ -2863,9 +2863,8 @@ SIMD_BINOP_ONE_TEMP_LIST(VISIT_SIMD_BINOP_ONE_TEMP) ...@@ -2863,9 +2863,8 @@ SIMD_BINOP_ONE_TEMP_LIST(VISIT_SIMD_BINOP_ONE_TEMP)
#define VISIT_SIMD_ANYTRUE(Opcode) \ #define VISIT_SIMD_ANYTRUE(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \ void InstructionSelector::Visit##Opcode(Node* node) { \
X64OperandGenerator g(this); \ X64OperandGenerator g(this); \
InstructionOperand temps[] = {g.TempRegister()}; \
Emit(kX64##Opcode, g.DefineAsRegister(node), \ Emit(kX64##Opcode, g.DefineAsRegister(node), \
g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \ g.UseUniqueRegister(node->InputAt(0))); \
} }
SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE) SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE)
#undef VISIT_SIMD_ANYTRUE #undef VISIT_SIMD_ANYTRUE
...@@ -2874,7 +2873,7 @@ SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE) ...@@ -2874,7 +2873,7 @@ SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE)
#define VISIT_SIMD_ALLTRUE(Opcode) \ #define VISIT_SIMD_ALLTRUE(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \ void InstructionSelector::Visit##Opcode(Node* node) { \
X64OperandGenerator g(this); \ X64OperandGenerator g(this); \
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \ InstructionOperand temps[] = {g.TempSimd128Register()}; \
Emit(kX64##Opcode, g.DefineAsRegister(node), \ Emit(kX64##Opcode, g.DefineAsRegister(node), \
g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \ g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment