Commit 39ed694b authored by ahaas's avatar ahaas Committed by Commit bot

Implemented the Word64Clz TurboFan operator for x64, arm64, and mips64.

R=titzer@chromium.org

Review URL: https://codereview.chromium.org/1413463009

Cr-Commit-Position: refs/heads/master@{#31858}
parent aac8ee84
......@@ -774,6 +774,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
__ PokePair(i.InputRegister(1), i.InputRegister(0), slot * kPointerSize);
break;
}
case kArm64Clz:
__ Clz(i.OutputRegister64(), i.InputRegister64(0));
break;
case kArm64Clz32:
__ Clz(i.OutputRegister32(), i.InputRegister32(0));
break;
......
......@@ -18,6 +18,7 @@ namespace compiler {
V(Arm64And32) \
V(Arm64Bic) \
V(Arm64Bic32) \
V(Arm64Clz) \
V(Arm64Clz32) \
V(Arm64Cmp) \
V(Arm64Cmp32) \
......
......@@ -921,6 +921,12 @@ void InstructionSelector::VisitWord64Ror(Node* node) {
}
void InstructionSelector::VisitWord64Clz(Node* node) {
Arm64OperandGenerator g(this);
Emit(kArm64Clz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitWord32Clz(Node* node) {
Arm64OperandGenerator g(this);
Emit(kArm64Clz32, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
......
......@@ -736,6 +736,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsWord64(node), VisitWord64Sar(node);
case IrOpcode::kWord64Ror:
return MarkAsWord64(node), VisitWord64Ror(node);
case IrOpcode::kWord64Clz:
return MarkAsWord64(node), VisitWord64Clz(node);
case IrOpcode::kWord64Equal:
return VisitWord64Equal(node);
case IrOpcode::kInt32Add:
......@@ -971,6 +973,9 @@ void InstructionSelector::VisitWord64Sar(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord64Ror(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord64Clz(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord64Equal(Node* node) { UNIMPLEMENTED(); }
......
......@@ -99,6 +99,7 @@ CheckedStoreRepresentation CheckedStoreRepresentationOf(Operator const* op) {
V(Word64Shr, Operator::kNoProperties, 2, 0, 1) \
V(Word64Sar, Operator::kNoProperties, 2, 0, 1) \
V(Word64Ror, Operator::kNoProperties, 2, 0, 1) \
V(Word64Clz, Operator::kNoProperties, 1, 0, 1) \
V(Word64Equal, Operator::kCommutative, 2, 0, 1) \
V(Int32Add, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \
V(Int32AddWithOverflow, Operator::kAssociative | Operator::kCommutative, 2, \
......
......@@ -148,6 +148,7 @@ class MachineOperatorBuilder final : public ZoneObject {
const Operator* Word64Shr();
const Operator* Word64Sar();
const Operator* Word64Ror();
const Operator* Word64Clz();
const Operator* Word64Equal();
const Operator* Int32Add();
......
......@@ -578,6 +578,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
case kMips64Clz:
__ Clz(i.OutputRegister(), i.InputRegister(0));
break;
case kMips64Dclz:
__ dclz(i.OutputRegister(), i.InputRegister(0));
break;
case kMips64Shl:
if (instr->InputAt(1)->IsRegister()) {
__ sllv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
......
......@@ -37,6 +37,7 @@ namespace compiler {
V(Mips64Sar) \
V(Mips64Ext) \
V(Mips64Dext) \
V(Mips64Dclz) \
V(Mips64Dshl) \
V(Mips64Dshr) \
V(Mips64Dsar) \
......
......@@ -332,6 +332,11 @@ void InstructionSelector::VisitWord64Ror(Node* node) {
}
void InstructionSelector::VisitWord64Clz(Node* node) {
VisitRR(this, kMips64Dclz, node);
}
void InstructionSelector::VisitInt32Add(Node* node) {
Mips64OperandGenerator g(this);
// TODO(plind): Consider multiply & add optimization from arm port.
......
......@@ -240,6 +240,7 @@
V(Word64Shr) \
V(Word64Sar) \
V(Word64Ror) \
V(Word64Clz) \
V(Int32Add) \
V(Int32AddWithOverflow) \
V(Int32Sub) \
......
......@@ -233,6 +233,7 @@ class RawMachineAssembler {
Node* Word64Ror(Node* a, Node* b) {
return AddNode(machine()->Word64Ror(), a, b);
}
Node* Word64Clz(Node* a) { return AddNode(machine()->Word64Clz(), a); }
Node* Word64Equal(Node* a, Node* b) {
return AddNode(machine()->Word64Equal(), a, b);
}
......
......@@ -1946,6 +1946,9 @@ Type* Typer::Visitor::TypeWord64Sar(Node* node) { return Type::Internal(); }
Type* Typer::Visitor::TypeWord64Ror(Node* node) { return Type::Internal(); }
Type* Typer::Visitor::TypeWord64Clz(Node* node) { return Type::Internal(); }
Type* Typer::Visitor::TypeWord64Equal(Node* node) { return Type::Boolean(); }
......
......@@ -824,6 +824,7 @@ void Verifier::Visitor::Check(Node* node) {
case IrOpcode::kWord64Shr:
case IrOpcode::kWord64Sar:
case IrOpcode::kWord64Ror:
case IrOpcode::kWord64Clz:
case IrOpcode::kWord64Equal:
case IrOpcode::kInt32Add:
case IrOpcode::kInt32AddWithOverflow:
......
......@@ -768,6 +768,13 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
case kX64Ror:
ASSEMBLE_SHIFT(rorq, 6);
break;
case kX64Lzcnt:
if (instr->InputAt(0)->IsRegister()) {
__ Lzcntq(i.OutputRegister(), i.InputRegister(0));
} else {
__ Lzcntq(i.OutputRegister(), i.InputOperand(0));
}
break;
case kX64Lzcnt32:
if (instr->InputAt(0)->IsRegister()) {
__ Lzcntl(i.OutputRegister(), i.InputRegister(0));
......
......@@ -46,6 +46,7 @@ namespace compiler {
V(X64Sar32) \
V(X64Ror) \
V(X64Ror32) \
V(X64Lzcnt) \
V(X64Lzcnt32) \
V(X64Tzcnt32) \
V(X64Popcnt32) \
......
......@@ -572,6 +572,12 @@ void InstructionSelector::VisitWord64Ror(Node* node) {
}
void InstructionSelector::VisitWord64Clz(Node* node) {
X64OperandGenerator g(this);
Emit(kX64Lzcnt, g.DefineAsRegister(node), g.Use(node->InputAt(0)));
}
void InstructionSelector::VisitWord32Clz(Node* node) {
X64OperandGenerator g(this);
Emit(kX64Lzcnt32, g.DefineAsRegister(node), g.Use(node->InputAt(0)));
......
......@@ -2282,6 +2282,16 @@ void Assembler::clz(Register rd, Register rs) {
}
void Assembler::dclz(Register rd, Register rs) {
if (kArchVariant != kMips64r6) {
// dclz instr requires same GPR number in 'rd' and 'rt' fields.
GenInstrRegister(SPECIAL2, rs, rd, rd, 0, DCLZ);
} else {
GenInstrRegister(SPECIAL, rs, zero_reg, rd, 1, DCLZ_R6);
}
}
void Assembler::ins_(Register rt, Register rs, uint16_t pos, uint16_t size) {
// Should be called via MacroAssembler::Ins.
// Ins instr has 'rt' field as dest, and two uint5: msb, lsb.
......
......@@ -862,6 +862,7 @@ class Assembler : public AssemblerBase {
void movn_d(FPURegister fd, FPURegister fs, Register rt);
// Bit twiddling.
void clz(Register rd, Register rs);
void dclz(Register rd, Register rs);
void ins_(Register rt, Register rs, uint16_t pos, uint16_t size);
void ext_(Register rt, Register rs, uint16_t pos, uint16_t size);
void dext_(Register rt, Register rs, uint16_t pos, uint16_t size);
......
......@@ -394,6 +394,8 @@ enum SecondaryField {
CLZ_R6 = ((2 << 3) + 0),
CLO_R6 = ((2 << 3) + 1),
MFLO = ((2 << 3) + 2),
DCLZ_R6 = ((2 << 3) + 2),
DCLO_R6 = ((2 << 3) + 3),
DSLLV = ((2 << 3) + 4),
DSRLV = ((2 << 3) + 6),
DSRAV = ((2 << 3) + 7),
......@@ -462,6 +464,8 @@ enum SecondaryField {
MUL = ((0 << 3) + 2),
CLZ = ((4 << 3) + 0),
CLO = ((4 << 3) + 1),
DCLZ = ((4 << 3) + 4),
DCLO = ((4 << 3) + 5),
// SPECIAL3 Encoding of Function Field.
EXT = ((0 << 3) + 0),
......@@ -927,6 +931,7 @@ class Instruction {
#define FunctionFieldToBitNumber(function) (1ULL << function)
// On r6, DCLZ_R6 aliases to existing MFLO.
static const uint64_t kFunctionFieldRegisterTypeMask =
FunctionFieldToBitNumber(JR) | FunctionFieldToBitNumber(JALR) |
FunctionFieldToBitNumber(BREAK) | FunctionFieldToBitNumber(SLL) |
......@@ -1171,6 +1176,7 @@ Instruction::Type Instruction::InstructionType(TypeChecks checks) const {
switch (FunctionFieldRaw()) {
case MUL:
case CLZ:
case DCLZ:
return kRegisterType;
default:
return kUnsupported;
......
......@@ -1179,7 +1179,16 @@ void Decoder::DecodeTypeRegisterSPECIAL(Instruction* instr) {
}
break;
case MFLO:
Format(instr, "mflo 'rd");
if (instr->Bits(25, 16) == 0) {
Format(instr, "mflo 'rd");
} else {
if ((instr->FunctionFieldRaw() == DCLZ_R6) && (instr->FdValue() == 1)) {
Format(instr, "dclz 'rd, 'rs");
} else if ((instr->FunctionFieldRaw() == DCLO_R6) &&
(instr->FdValue() == 1)) {
Format(instr, "dclo 'rd, 'rs");
}
}
break;
case D_MUL_MUH_U: // Equals to DMULTU.
if (kArchVariant != kMips64r6) {
......@@ -1360,6 +1369,11 @@ void Decoder::DecodeTypeRegisterSPECIAL2(Instruction* instr) {
Format(instr, "clz 'rd, 'rs");
}
break;
case DCLZ:
if (kArchVariant != kMips64r6) {
Format(instr, "dclz 'rd, 'rs");
}
break;
default:
UNREACHABLE();
}
......
......@@ -3364,8 +3364,17 @@ void Simulator::DecodeTypeRegisterSPECIAL() {
}
SetResult(rd_reg(), alu_out);
break;
case MFLO:
SetResult(rd_reg(), get_register(LO));
case MFLO: // MFLO == DCLZ on R6.
if (kArchVariant != kMips64r6) {
DCHECK(sa() == 0);
alu_out = get_register(LO);
} else {
// MIPS spec: If no bits were set in GPR rs(), the result written to
// GPR rd() is 64.
DCHECK(sa() == 1);
alu_out = base::bits::CountLeadingZeros64(static_cast<int64_t>(rs_u()));
}
SetResult(rd_reg(), alu_out);
break;
// Instructions using HI and LO registers.
case MULT: { // MULT == D_MUL_MUH.
......@@ -3665,7 +3674,13 @@ void Simulator::DecodeTypeRegisterSPECIAL2() {
// MIPS32 spec: If no bits were set in GPR rs(), the result written to
// GPR rd is 32.
alu_out = base::bits::CountLeadingZeros32(static_cast<uint32_t>(rs_u()));
set_register(rd_reg(), alu_out);
SetResult(rd_reg(), alu_out);
break;
case DCLZ:
// MIPS64 spec: If no bits were set in GPR rs(), the result written to
// GPR rd is 64.
alu_out = base::bits::CountLeadingZeros64(static_cast<uint64_t>(rs_u()));
SetResult(rd_reg(), alu_out);
break;
default:
alu_out = 0x12345678;
......
......@@ -745,6 +745,24 @@ void Assembler::bsrl(Register dst, const Operand& src) {
}
void Assembler::bsrq(Register dst, Register src) {
EnsureSpace ensure_space(this);
emit_rex_64(dst, src);
emit(0x0F);
emit(0xBD);
emit_modrm(dst, src);
}
void Assembler::bsrq(Register dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit_rex_64(dst, src);
emit(0x0F);
emit(0xBD);
emit_operand(dst, src);
}
void Assembler::bsfl(Register dst, Register src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
......
......@@ -847,6 +847,8 @@ class Assembler : public AssemblerBase {
// Bit operations.
void bt(const Operand& dst, Register src);
void bts(const Operand& dst, Register src);
void bsrq(Register dst, Register src);
void bsrq(Register dst, const Operand& src);
void bsrl(Register dst, Register src);
void bsrl(Register dst, const Operand& src);
void bsfl(Register dst, Register src);
......
......@@ -3159,6 +3159,36 @@ void MacroAssembler::Lzcntl(Register dst, const Operand& src) {
}
void MacroAssembler::Lzcntq(Register dst, Register src) {
if (CpuFeatures::IsSupported(LZCNT)) {
CpuFeatureScope scope(this, LZCNT);
lzcntq(dst, src);
return;
}
Label not_zero_src;
bsrq(dst, src);
j(not_zero, &not_zero_src, Label::kNear);
Set(dst, 127); // 127^63 == 64
bind(&not_zero_src);
xorl(dst, Immediate(63)); // for x in [0..63], 63^x == 63 - x
}
void MacroAssembler::Lzcntq(Register dst, const Operand& src) {
if (CpuFeatures::IsSupported(LZCNT)) {
CpuFeatureScope scope(this, LZCNT);
lzcntq(dst, src);
return;
}
Label not_zero_src;
bsrq(dst, src);
j(not_zero, &not_zero_src, Label::kNear);
Set(dst, 127); // 127^63 == 64
bind(&not_zero_src);
xorl(dst, Immediate(63)); // for x in [0..63], 63^x == 63 - x
}
void MacroAssembler::Tzcntl(Register dst, Register src) {
if (CpuFeatures::IsSupported(BMI1)) {
CpuFeatureScope scope(this, BMI1);
......
......@@ -1010,6 +1010,9 @@ class MacroAssembler: public Assembler {
void Pinsrd(XMMRegister dst, Register src, int8_t imm8);
void Pinsrd(XMMRegister dst, const Operand& src, int8_t imm8);
void Lzcntq(Register dst, Register src);
void Lzcntq(Register dst, const Operand& src);
void Lzcntl(Register dst, Register src);
void Lzcntl(Register dst, const Operand& src);
......
......@@ -127,6 +127,80 @@ TEST(RunInt32Clz) {
}
#if V8_TARGET_ARCH_64_BIT
TEST(RunWord64Clz) {
BufferedRawMachineAssemblerTester<int32_t> m(kMachUint64);
m.Return(m.Word64Clz(m.Parameter(0)));
CHECK_EQ(0, m.Call(uint64_t(0x8000100000000000)));
CHECK_EQ(1, m.Call(uint64_t(0x4000050000000000)));
CHECK_EQ(2, m.Call(uint64_t(0x2000030000000000)));
CHECK_EQ(3, m.Call(uint64_t(0x1000000300000000)));
CHECK_EQ(4, m.Call(uint64_t(0x0805000000000000)));
CHECK_EQ(5, m.Call(uint64_t(0x0400600000000000)));
CHECK_EQ(6, m.Call(uint64_t(0x0200000000000000)));
CHECK_EQ(7, m.Call(uint64_t(0x010000a000000000)));
CHECK_EQ(8, m.Call(uint64_t(0x00800c0000000000)));
CHECK_EQ(9, m.Call(uint64_t(0x0040000000000000)));
CHECK_EQ(10, m.Call(uint64_t(0x0020000d00000000)));
CHECK_EQ(11, m.Call(uint64_t(0x00100f0000000000)));
CHECK_EQ(12, m.Call(uint64_t(0x0008000000000000)));
CHECK_EQ(13, m.Call(uint64_t(0x0004100000000000)));
CHECK_EQ(14, m.Call(uint64_t(0x0002002000000000)));
CHECK_EQ(15, m.Call(uint64_t(0x0001030000000000)));
CHECK_EQ(16, m.Call(uint64_t(0x0000804000000000)));
CHECK_EQ(17, m.Call(uint64_t(0x0000400500000000)));
CHECK_EQ(18, m.Call(uint64_t(0x0000205000000000)));
CHECK_EQ(19, m.Call(uint64_t(0x0000170000000000)));
CHECK_EQ(20, m.Call(uint64_t(0x0000087000000000)));
CHECK_EQ(21, m.Call(uint64_t(0x0000040500000000)));
CHECK_EQ(22, m.Call(uint64_t(0x0000020300000000)));
CHECK_EQ(23, m.Call(uint64_t(0x0000010100000000)));
CHECK_EQ(24, m.Call(uint64_t(0x0000008900000000)));
CHECK_EQ(25, m.Call(uint64_t(0x0000004100000000)));
CHECK_EQ(26, m.Call(uint64_t(0x0000002200000000)));
CHECK_EQ(27, m.Call(uint64_t(0x0000001300000000)));
CHECK_EQ(28, m.Call(uint64_t(0x0000000800000000)));
CHECK_EQ(29, m.Call(uint64_t(0x0000000400000000)));
CHECK_EQ(30, m.Call(uint64_t(0x0000000200000000)));
CHECK_EQ(31, m.Call(uint64_t(0x0000000100000000)));
CHECK_EQ(32, m.Call(uint64_t(0x0000000080001000)));
CHECK_EQ(33, m.Call(uint64_t(0x0000000040000500)));
CHECK_EQ(34, m.Call(uint64_t(0x0000000020000300)));
CHECK_EQ(35, m.Call(uint64_t(0x0000000010000003)));
CHECK_EQ(36, m.Call(uint64_t(0x0000000008050000)));
CHECK_EQ(37, m.Call(uint64_t(0x0000000004006000)));
CHECK_EQ(38, m.Call(uint64_t(0x0000000002000000)));
CHECK_EQ(39, m.Call(uint64_t(0x00000000010000a0)));
CHECK_EQ(40, m.Call(uint64_t(0x0000000000800c00)));
CHECK_EQ(41, m.Call(uint64_t(0x0000000000400000)));
CHECK_EQ(42, m.Call(uint64_t(0x000000000020000d)));
CHECK_EQ(43, m.Call(uint64_t(0x0000000000100f00)));
CHECK_EQ(44, m.Call(uint64_t(0x0000000000080000)));
CHECK_EQ(45, m.Call(uint64_t(0x0000000000041000)));
CHECK_EQ(46, m.Call(uint64_t(0x0000000000020020)));
CHECK_EQ(47, m.Call(uint64_t(0x0000000000010300)));
CHECK_EQ(48, m.Call(uint64_t(0x0000000000008040)));
CHECK_EQ(49, m.Call(uint64_t(0x0000000000004005)));
CHECK_EQ(50, m.Call(uint64_t(0x0000000000002050)));
CHECK_EQ(51, m.Call(uint64_t(0x0000000000001700)));
CHECK_EQ(52, m.Call(uint64_t(0x0000000000000870)));
CHECK_EQ(53, m.Call(uint64_t(0x0000000000000405)));
CHECK_EQ(54, m.Call(uint64_t(0x0000000000000203)));
CHECK_EQ(55, m.Call(uint64_t(0x0000000000000101)));
CHECK_EQ(56, m.Call(uint64_t(0x0000000000000089)));
CHECK_EQ(57, m.Call(uint64_t(0x0000000000000041)));
CHECK_EQ(58, m.Call(uint64_t(0x0000000000000022)));
CHECK_EQ(59, m.Call(uint64_t(0x0000000000000013)));
CHECK_EQ(60, m.Call(uint64_t(0x0000000000000008)));
CHECK_EQ(61, m.Call(uint64_t(0x0000000000000004)));
CHECK_EQ(62, m.Call(uint64_t(0x0000000000000002)));
CHECK_EQ(63, m.Call(uint64_t(0x0000000000000001)));
CHECK_EQ(64, m.Call(uint64_t(0x0000000000000000)));
}
#endif // V8_TARGET_ARCH_64_BIT
void TestWord32Popcnt(int32_t value, int32_t expected) {
RawMachineAssemblerTester<int32_t> m;
compiler::OptionalOperator op = m.machine()->Word32Popcnt();
......
......@@ -841,6 +841,21 @@ TEST_F(InstructionSelectorTest, Word32Clz) {
}
TEST_F(InstructionSelectorTest, Word64Clz) {
StreamBuilder m(this, kMachUint64, kMachUint64);
Node* const p0 = m.Parameter(0);
Node* const n = m.Word64Clz(p0);
m.Return(n);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kMips64Dclz, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output()));
}
TEST_F(InstructionSelectorTest, Float32Abs) {
StreamBuilder m(this, kMachFloat32, kMachFloat32);
Node* const p0 = m.Parameter(0);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment