Commit df546bd1 authored by Deepti Gandluri's avatar Deepti Gandluri Committed by Commit Bot

[wasm-simd] Add ExtractLaneU operations

This CL implements i8x16.extract_lane_u, i16x8.extract_lane_u operations by
changing the default narrow extract operations to be unsigned. The
sign-extended extracts are implemented on top of the unsigned extracts
with an additional extend compiler node.
For IA32/X64, the codegen effectively remains the same -

0x389332bc32a3    63  660f3a14c900   pextrb rcx,xmm1,0
0x389332bc32a9    69  0fbec9         movsxbl rcx,rcx

0x389332bc32a3    63  660f3a14c900   pextrb rcx,xmm1,0
0x389332bc32a9    69  0fbec9         movsxbl rcx,rcx

On ARM, this adds an additional sxt instruction for the signed extracts.

Bug: v8:8460
Change-Id: I67f14b2b860ff8cc86ffbb2f65c7ef7de32da83f
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1846711Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Deepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64172}
parent 5df672a6
......@@ -2051,7 +2051,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmI16x8ExtractLane: {
__ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS16,
__ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU16,
i.InputInt8(1));
break;
}
......@@ -2217,7 +2217,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmI8x16ExtractLane: {
__ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS8,
__ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU8,
i.InputInt8(1));
break;
}
......
......@@ -2129,7 +2129,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArm64I16x8ExtractLane: {
__ Smov(i.OutputRegister32(), i.InputSimd128Register(0).V8H(),
__ Umov(i.OutputRegister32(), i.InputSimd128Register(0).V8H(),
i.InputInt8(1));
break;
}
......@@ -2243,7 +2243,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArm64I8x16ExtractLane: {
__ Smov(i.OutputRegister32(), i.InputSimd128Register(0).V16B(),
__ Umov(i.OutputRegister32(), i.InputSimd128Register(0).V16B(),
i.InputInt8(1));
break;
}
......
......@@ -2640,7 +2640,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kIA32I16x8ExtractLane: {
Register dst = i.OutputRegister();
__ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
__ movsx_w(dst, dst);
break;
}
case kSSEI16x8ReplaceLane: {
......@@ -3019,7 +3018,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kIA32I8x16ExtractLane: {
Register dst = i.OutputRegister();
__ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
__ movsx_b(dst, dst);
break;
}
case kSSEI8x16ReplaceLane: {
......
......@@ -3118,7 +3118,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope sse_scope(tasm(), SSE4_1);
Register dst = i.OutputRegister();
__ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
__ movsxwl(dst, dst);
break;
}
case kX64I16x8ReplaceLane: {
......@@ -3324,7 +3323,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope sse_scope(tasm(), SSE4_1);
Register dst = i.OutputRegister();
__ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
__ movsxbl(dst, dst);
break;
}
case kX64I8x16ReplaceLane: {
......
......@@ -4505,13 +4505,23 @@ Node* WasmGraphBuilder::SimdLaneOp(wasm::WasmOpcode opcode, uint8_t lane,
case wasm::kExprI32x4ReplaceLane:
return graph()->NewNode(mcgraph()->machine()->I32x4ReplaceLane(lane),
inputs[0], inputs[1]);
case wasm::kExprI16x8ExtractLane:
case wasm::kExprI16x8ExtractLaneS:
return graph()->NewNode(
mcgraph()->machine()->SignExtendWord16ToInt32(),
graph()->NewNode(mcgraph()->machine()->I16x8ExtractLane(lane),
inputs[0]));
case wasm::kExprI16x8ExtractLaneU:
return graph()->NewNode(mcgraph()->machine()->I16x8ExtractLane(lane),
inputs[0]);
case wasm::kExprI16x8ReplaceLane:
return graph()->NewNode(mcgraph()->machine()->I16x8ReplaceLane(lane),
inputs[0], inputs[1]);
case wasm::kExprI8x16ExtractLane:
case wasm::kExprI8x16ExtractLaneS:
return graph()->NewNode(
mcgraph()->machine()->SignExtendWord8ToInt32(),
graph()->NewNode(mcgraph()->machine()->I8x16ExtractLane(lane),
inputs[0]));
case wasm::kExprI8x16ExtractLaneU:
return graph()->NewNode(mcgraph()->machine()->I8x16ExtractLane(lane),
inputs[0]);
case wasm::kExprI8x16ReplaceLane:
......
......@@ -1069,11 +1069,13 @@ class WasmDecoder : public Decoder {
case kExprI32x4ReplaceLane:
num_lanes = 4;
break;
case kExprI16x8ExtractLane:
case kExprI16x8ExtractLaneS:
case kExprI16x8ExtractLaneU:
case kExprI16x8ReplaceLane:
num_lanes = 8;
break;
case kExprI8x16ExtractLane:
case kExprI8x16ExtractLaneS:
case kExprI8x16ExtractLaneU:
case kExprI8x16ReplaceLane:
num_lanes = 16;
break;
......@@ -2700,8 +2702,10 @@ class WasmFullDecoder : public WasmDecoder<validate> {
break;
}
case kExprI32x4ExtractLane:
case kExprI16x8ExtractLane:
case kExprI8x16ExtractLane: {
case kExprI16x8ExtractLaneS:
case kExprI16x8ExtractLaneU:
case kExprI8x16ExtractLaneS:
case kExprI8x16ExtractLaneU: {
len = SimdExtractLane(opcode, kWasmI32);
break;
}
......
......@@ -2264,9 +2264,22 @@ class ThreadImpl {
EXTRACT_LANE_CASE(F32x4, f32x4)
EXTRACT_LANE_CASE(I64x2, i64x2)
EXTRACT_LANE_CASE(I32x4, i32x4)
EXTRACT_LANE_CASE(I16x8, i16x8)
EXTRACT_LANE_CASE(I8x16, i8x16)
#undef EXTRACT_LANE_CASE
#define EXTRACT_LANE_EXTEND_CASE(format, name, sign, type) \
case kExpr##format##ExtractLane##sign: { \
SimdLaneImmediate<Decoder::kNoValidate> imm(decoder, code->at(pc)); \
*len += 1; \
WasmValue val = Pop(); \
Simd128 s = val.to_s128(); \
auto ss = s.to_##name(); \
Push(WasmValue(static_cast<type>(ss.val[LANE(imm.lane, ss)]))); \
return true; \
}
EXTRACT_LANE_EXTEND_CASE(I16x8, i16x8, S, int32_t)
EXTRACT_LANE_EXTEND_CASE(I16x8, i16x8, U, uint32_t)
EXTRACT_LANE_EXTEND_CASE(I8x16, i8x16, S, int32_t)
EXTRACT_LANE_EXTEND_CASE(I8x16, i8x16, U, uint32_t)
#undef EXTRACT_LANE_EXTEND_CASE
#define BINOP_CASE(op, name, stype, count, expr) \
case kExpr##op: { \
WasmValue v2 = Pop(); \
......
......@@ -274,7 +274,9 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_F32x4_OP(ReplaceLane, "replace_lane")
CASE_I64x2_OP(ExtractLane, "extract_lane")
CASE_I64x2_OP(ReplaceLane, "replace_lane")
CASE_SIMDI_OP(ExtractLane, "extract_lane")
CASE_I32x4_OP(ExtractLane, "extract_lane")
CASE_SIGN_OP(I16x8, ExtractLane, "extract_lane")
CASE_SIGN_OP(I8x16, ExtractLane, "extract_lane")
CASE_SIMDI_OP(ReplaceLane, "replace_lane")
CASE_SIGN_OP(SIMDI, Min, "min")
CASE_SIGN_OP(I64x2, Min, "min")
......
......@@ -439,8 +439,10 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(F32x4RecipSqrtApprox, 0xfde1, s_s)
#define FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(V) \
V(I8x16ExtractLane, 0xfd05, _) \
V(I16x8ExtractLane, 0xfd09, _) \
V(I8x16ExtractLaneS, 0xfd05, _) \
V(I8x16ExtractLaneU, 0xfd06, _) \
V(I16x8ExtractLaneS, 0xfd09, _) \
V(I16x8ExtractLaneU, 0xfd0a, _) \
V(I32x4ExtractLane, 0xfd0d, _) \
V(I64x2ExtractLane, 0xfd10, _) \
V(F32x4ExtractLane, 0xfd13, _) \
......
......@@ -304,8 +304,10 @@ void PrintWasmText(const WasmModule* module, const ModuleWireBytes& wire_bytes,
break;
}
case kExprI8x16ExtractLane:
case kExprI16x8ExtractLane:
case kExprI8x16ExtractLaneS:
case kExprI8x16ExtractLaneU:
case kExprI16x8ExtractLaneS:
case kExprI16x8ExtractLaneU:
case kExprI32x4ExtractLane:
case kExprI64x2ExtractLane:
case kExprF32x4ExtractLane:
......
......@@ -386,10 +386,17 @@ bool ExpectFused(ExecutionTier tier) {
} // namespace
#define WASM_SIMD_CHECK_LANE(TYPE, value, LANE_TYPE, lane_value, lane_index) \
WASM_IF(WASM_##LANE_TYPE##_NE(WASM_GET_LOCAL(lane_value), \
WASM_SIMD_##TYPE##_EXTRACT_LANE( \
lane_index, WASM_GET_LOCAL(value))), \
#define WASM_SIMD_CHECK_LANE_S(TYPE, value, LANE_TYPE, lane_value, lane_index) \
WASM_IF(WASM_##LANE_TYPE##_NE(WASM_GET_LOCAL(lane_value), \
WASM_SIMD_##TYPE##_EXTRACT_LANE( \
lane_index, WASM_GET_LOCAL(value))), \
WASM_RETURN1(WASM_ZERO))
// Unsigned Extracts are only available for I8x16, I16x8 types
#define WASM_SIMD_CHECK_LANE_U(TYPE, value, LANE_TYPE, lane_value, lane_index) \
WASM_IF(WASM_##LANE_TYPE##_NE(WASM_GET_LOCAL(lane_value), \
WASM_SIMD_##TYPE##_EXTRACT_LANE_U( \
lane_index, WASM_GET_LOCAL(value))), \
WASM_RETURN1(WASM_ZERO))
#define TO_BYTE(val) static_cast<byte>(val)
......@@ -428,13 +435,17 @@ bool ExpectFused(ExecutionTier tier) {
#define WASM_SIMD_I16x8_SPLAT(x) WASM_SIMD_SPLAT(I16x8, x)
#define WASM_SIMD_I16x8_EXTRACT_LANE(lane, x) \
x, WASM_SIMD_OP(kExprI16x8ExtractLane), TO_BYTE(lane)
x, WASM_SIMD_OP(kExprI16x8ExtractLaneS), TO_BYTE(lane)
#define WASM_SIMD_I16x8_EXTRACT_LANE_U(lane, x) \
x, WASM_SIMD_OP(kExprI16x8ExtractLaneU), TO_BYTE(lane)
#define WASM_SIMD_I16x8_REPLACE_LANE(lane, x, y) \
x, y, WASM_SIMD_OP(kExprI16x8ReplaceLane), TO_BYTE(lane)
#define WASM_SIMD_I8x16_SPLAT(x) WASM_SIMD_SPLAT(I8x16, x)
#define WASM_SIMD_I8x16_EXTRACT_LANE(lane, x) \
x, WASM_SIMD_OP(kExprI8x16ExtractLane), TO_BYTE(lane)
x, WASM_SIMD_OP(kExprI8x16ExtractLaneS), TO_BYTE(lane)
#define WASM_SIMD_I8x16_EXTRACT_LANE_U(lane, x) \
x, WASM_SIMD_OP(kExprI8x16ExtractLaneU), TO_BYTE(lane)
#define WASM_SIMD_I8x16_REPLACE_LANE(lane, x, y) \
x, y, WASM_SIMD_OP(kExprI8x16ReplaceLane), TO_BYTE(lane)
......@@ -2404,10 +2415,10 @@ WASM_SIMD_TEST_NO_LOWERING(I8x16ShrU) {
format, WASM_GET_LOCAL(src1), WASM_GET_LOCAL(src2), \
WASM_SIMD_BINOP(kExprI##format##Ne, WASM_GET_LOCAL(mask), \
WASM_GET_LOCAL(zero)))), \
WASM_SIMD_CHECK_LANE(I##format, mask, I32, val2, 0), \
WASM_SIMD_CHECK_LANE(I##format, mask, I32, val1, 1), \
WASM_SIMD_CHECK_LANE(I##format, mask, I32, val1, 2), \
WASM_SIMD_CHECK_LANE(I##format, mask, I32, val2, 3), WASM_ONE); \
WASM_SIMD_CHECK_LANE_S(I##format, mask, I32, val2, 0), \
WASM_SIMD_CHECK_LANE_S(I##format, mask, I32, val1, 1), \
WASM_SIMD_CHECK_LANE_S(I##format, mask, I32, val1, 2), \
WASM_SIMD_CHECK_LANE_S(I##format, mask, I32, val2, 3), WASM_ONE); \
\
CHECK_EQ(1, r.Call(0x12, 0x34)); \
}
......@@ -2442,10 +2453,10 @@ WASM_SIMD_SELECT_TEST(8x16)
WASM_SET_LOCAL(mask, WASM_SIMD_SELECT(format, WASM_GET_LOCAL(src1), \
WASM_GET_LOCAL(src2), \
WASM_GET_LOCAL(mask))), \
WASM_SIMD_CHECK_LANE(I##format, mask, I32, val2, 0), \
WASM_SIMD_CHECK_LANE(I##format, mask, I32, combined, 1), \
WASM_SIMD_CHECK_LANE(I##format, mask, I32, combined, 2), \
WASM_SIMD_CHECK_LANE(I##format, mask, I32, val2, 3), WASM_ONE); \
WASM_SIMD_CHECK_LANE_S(I##format, mask, I32, val2, 0), \
WASM_SIMD_CHECK_LANE_S(I##format, mask, I32, combined, 1), \
WASM_SIMD_CHECK_LANE_S(I##format, mask, I32, combined, 2), \
WASM_SIMD_CHECK_LANE_S(I##format, mask, I32, val2, 3), WASM_ONE); \
\
CHECK_EQ(1, r.Call(0x12, 0x34, 0x32)); \
}
......@@ -3283,8 +3294,48 @@ WASM_SIMD_TEST_NO_LOWERING(I16x8GtUMixed) {
UnsignedGreater);
}
#define WASM_EXTRACT_I16x8_TEST(Sign, Type) \
WASM_SIMD_TEST(I16X8ExtractLane##Sign) { \
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd); \
byte int_val = r.AllocateLocal(kWasmI32); \
byte simd_val = r.AllocateLocal(kWasmS128); \
BUILD(r, \
WASM_SET_LOCAL(simd_val, \
WASM_SIMD_I16x8_SPLAT(WASM_GET_LOCAL(int_val))), \
WASM_SIMD_CHECK_LANE_U(I16x8, simd_val, I32, int_val, 0), \
WASM_SIMD_CHECK_LANE_U(I16x8, simd_val, I32, int_val, 2), \
WASM_SIMD_CHECK_LANE_U(I16x8, simd_val, I32, int_val, 4), \
WASM_SIMD_CHECK_LANE_U(I16x8, simd_val, I32, int_val, 6), WASM_ONE); \
FOR_##Type##_INPUTS(x) { CHECK_EQ(1, r.Call(x)); } \
}
WASM_EXTRACT_I16x8_TEST(S, UINT16) WASM_EXTRACT_I16x8_TEST(I, INT16)
#undef WASM_EXTRACT_I16x8_TEST
#define WASM_EXTRACT_I8x16_TEST(Sign, Type) \
WASM_SIMD_TEST(I8x16ExtractLane##Sign) { \
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd); \
byte int_val = r.AllocateLocal(kWasmI32); \
byte simd_val = r.AllocateLocal(kWasmS128); \
BUILD(r, \
WASM_SET_LOCAL(simd_val, \
WASM_SIMD_I8x16_SPLAT(WASM_GET_LOCAL(int_val))), \
WASM_SIMD_CHECK_LANE_U(I8x16, simd_val, I32, int_val, 1), \
WASM_SIMD_CHECK_LANE_U(I8x16, simd_val, I32, int_val, 3), \
WASM_SIMD_CHECK_LANE_U(I8x16, simd_val, I32, int_val, 5), \
WASM_SIMD_CHECK_LANE_U(I8x16, simd_val, I32, int_val, 7), \
WASM_SIMD_CHECK_LANE_U(I8x16, simd_val, I32, int_val, 9), \
WASM_SIMD_CHECK_LANE_U(I8x16, simd_val, I32, int_val, 10), \
WASM_SIMD_CHECK_LANE_U(I8x16, simd_val, I32, int_val, 11), \
WASM_SIMD_CHECK_LANE_U(I8x16, simd_val, I32, int_val, 13), \
WASM_ONE); \
FOR_##Type##_INPUTS(x) { CHECK_EQ(1, r.Call(x)); } \
}
WASM_EXTRACT_I8x16_TEST(S, UINT8) WASM_EXTRACT_I8x16_TEST(I, INT8)
#undef WASM_EXTRACT_I8x16_TEST
#undef WASM_SIMD_TEST
#undef WASM_SIMD_CHECK_LANE
#undef WASM_SIMD_CHECK_LANE_S
#undef WASM_SIMD_CHECK_LANE_U
#undef TO_BYTE
#undef WASM_SIMD_OP
#undef WASM_SIMD_SPLAT
......@@ -3307,9 +3358,11 @@ WASM_SIMD_TEST_NO_LOWERING(I16x8GtUMixed) {
#undef WASM_SIMD_I32x4_REPLACE_LANE
#undef WASM_SIMD_I16x8_SPLAT
#undef WASM_SIMD_I16x8_EXTRACT_LANE
#undef WASM_SIMD_I16x8_EXTRACT_LANE_U
#undef WASM_SIMD_I16x8_REPLACE_LANE
#undef WASM_SIMD_I8x16_SPLAT
#undef WASM_SIMD_I8x16_EXTRACT_LANE
#undef WASM_SIMD_I8x16_EXTRACT_LANE_U
#undef WASM_SIMD_I8x16_REPLACE_LANE
#undef WASM_SIMD_S8x16_SHUFFLE_OP
#undef WASM_SIMD_LOAD_MEM
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment