Commit dfa0f31a authored by Ambroise Vincent's avatar Ambroise Vincent Committed by Commit Bot

[arm64] Add FJCVTZS support

This includes the instruction opcode, its use in TF, its support in the
simulator and the detection of the associated CPU feature.

The instruction can be tested in the simulator with the new
--sim-arm64-optional-features flag.

Change-Id: I6047fa16696394fe0ced4535f7788d2c8716a18c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2222348Reviewed-by: 's avatarRoss McIlroy <rmcilroy@chromium.org>
Reviewed-by: 's avatarGeorg Neis <neis@chromium.org>
Commit-Queue: Georg Neis <neis@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68261}
parent 539979f4
......@@ -75,7 +75,8 @@ static V8_INLINE void __cpuid(int cpu_info[4], int info_type) {
#endif // !V8_LIBC_MSVCRT
#elif V8_HOST_ARCH_ARM || V8_HOST_ARCH_MIPS || V8_HOST_ARCH_MIPS64
#elif V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64 || V8_HOST_ARCH_MIPS || \
V8_HOST_ARCH_MIPS64
#if V8_OS_LINUX
......@@ -108,6 +109,51 @@ static V8_INLINE void __cpuid(int cpu_info[4], int info_type) {
#define HWCAP_IDIV (HWCAP_IDIVA | HWCAP_IDIVT)
#define HWCAP_LPAE (1 << 20)
#endif // V8_HOST_ARCH_ARM
#if V8_HOST_ARCH_ARM64
// See <uapi/asm/hwcap.h> kernel header.
/*
* HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP
*/
#define HWCAP_FP (1 << 0)
#define HWCAP_ASIMD (1 << 1)
#define HWCAP_EVTSTRM (1 << 2)
#define HWCAP_AES (1 << 3)
#define HWCAP_PMULL (1 << 4)
#define HWCAP_SHA1 (1 << 5)
#define HWCAP_SHA2 (1 << 6)
#define HWCAP_CRC32 (1 << 7)
#define HWCAP_ATOMICS (1 << 8)
#define HWCAP_FPHP (1 << 9)
#define HWCAP_ASIMDHP (1 << 10)
#define HWCAP_CPUID (1 << 11)
#define HWCAP_ASIMDRDM (1 << 12)
#define HWCAP_JSCVT (1 << 13)
#define HWCAP_FCMA (1 << 14)
#define HWCAP_LRCPC (1 << 15)
#define HWCAP_DCPOP (1 << 16)
#define HWCAP_SHA3 (1 << 17)
#define HWCAP_SM3 (1 << 18)
#define HWCAP_SM4 (1 << 19)
#define HWCAP_ASIMDDP (1 << 20)
#define HWCAP_SHA512 (1 << 21)
#define HWCAP_SVE (1 << 22)
#define HWCAP_ASIMDFHM (1 << 23)
#define HWCAP_DIT (1 << 24)
#define HWCAP_USCAT (1 << 25)
#define HWCAP_ILRCPC (1 << 26)
#define HWCAP_FLAGM (1 << 27)
#define HWCAP_SSBS (1 << 28)
#define HWCAP_SB (1 << 29)
#define HWCAP_PACA (1 << 30)
#define HWCAP_PACG (1UL << 31)
#endif // V8_HOST_ARCH_ARM64
#if V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64
static uint32_t ReadELFHWCaps() {
uint32_t result = 0;
#if V8_GLIBC_PREREQ(2, 16)
......@@ -136,7 +182,7 @@ static uint32_t ReadELFHWCaps() {
return result;
}
#endif // V8_HOST_ARCH_ARM
#endif // V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64
#if V8_HOST_ARCH_MIPS
int __detect_fp64_mode(void) {
......@@ -298,7 +344,8 @@ static bool HasListItem(const char* list, const char* item) {
#endif // V8_OS_LINUX
#endif // V8_HOST_ARCH_ARM || V8_HOST_ARCH_MIPS || V8_HOST_ARCH_MIPS64
#endif // V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64 ||
// V8_HOST_ARCH_MIPS || V8_HOST_ARCH_MIPS64
CPU::CPU()
: stepping_(0),
......@@ -337,6 +384,7 @@ CPU::CPU()
has_vfp_(false),
has_vfp3_(false),
has_vfp3_d32_(false),
has_jscvt_(false),
is_fp64_mode_(false),
has_non_stop_time_stamp_counter_(false),
has_msa_(false) {
......@@ -609,6 +657,19 @@ CPU::CPU()
// Windows makes high-resolution thread timing information available in
// user-space.
has_non_stop_time_stamp_counter_ = true;
#elif V8_OS_LINUX
// Try to extract the list of CPU features from ELF hwcaps.
uint32_t hwcaps = ReadELFHWCaps();
if (hwcaps != 0) {
has_jscvt_ = (hwcaps & HWCAP_JSCVT) != 0;
} else {
// Try to fallback to "Features" CPUInfo field
CPUInfo cpu_info;
char* features = cpu_info.ExtractField("Features");
has_jscvt_ = HasListItem(features, "jscvt");
delete[] features;
}
#endif // V8_OS_WIN
#elif V8_HOST_ARCH_PPC || V8_HOST_ARCH_PPC64
......
......@@ -110,6 +110,7 @@ class V8_BASE_EXPORT CPU final {
bool has_vfp() const { return has_vfp_; }
bool has_vfp3() const { return has_vfp3_; }
bool has_vfp3_d32() const { return has_vfp3_d32_; }
bool has_jscvt() const { return has_jscvt_; }
// mips features
bool is_fp64_mode() const { return is_fp64_mode_; }
......@@ -153,6 +154,7 @@ class V8_BASE_EXPORT CPU final {
bool has_vfp_;
bool has_vfp3_;
bool has_vfp3_d32_;
bool has_jscvt_;
bool is_fp64_mode_;
bool has_non_stop_time_stamp_counter_;
bool has_msa_;
......
......@@ -41,19 +41,60 @@
namespace v8 {
namespace internal {
static unsigned SimulatorFeaturesFromCommandLine() {
if (strcmp(FLAG_sim_arm64_optional_features, "none") == 0) {
return 0;
}
if (strcmp(FLAG_sim_arm64_optional_features, "all") == 0) {
return (1u << NUMBER_OF_CPU_FEATURES) - 1;
}
fprintf(
stderr,
"Error: unrecognised value for --sim-arm64-optional-features ('%s').\n",
FLAG_sim_arm64_optional_features);
fprintf(stderr,
"Supported values are: none\n"
" all\n");
FATAL("sim-arm64-optional-features");
}
static constexpr unsigned CpuFeaturesFromCompiler() {
unsigned features = 0;
#if defined(__ARM_FEATURE_JCVT)
features |= 1u << JSCVT;
#endif
return features;
}
// -----------------------------------------------------------------------------
// CpuFeatures implementation.
void CpuFeatures::ProbeImpl(bool cross_compile) {
// AArch64 has no configuration options, no further probing is required.
supported_ = 0;
// Only use statically determined features for cross compile (snapshot).
if (cross_compile) return;
if (cross_compile) {
supported_ |= CpuFeaturesFromCompiler();
return;
}
// We used to probe for coherent cache support, but on older CPUs it
// causes crashes (crbug.com/524337), and newer CPUs don't even have
// the feature any more.
#ifdef USE_SIMULATOR
supported_ |= SimulatorFeaturesFromCommandLine();
#else
// Probe for additional features at runtime.
base::CPU cpu;
unsigned runtime = 0;
if (cpu.has_jscvt()) {
runtime |= 1u << JSCVT;
}
// Use the best of the features found by CPU detection and those inferred from
// the build system.
supported_ |= CpuFeaturesFromCompiler();
supported_ |= runtime;
#endif // USE_SIMULATOR
}
void CpuFeatures::PrintTarget() {}
......@@ -2714,6 +2755,11 @@ void Assembler::fcvtxn2(const VRegister& vd, const VRegister& vn) {
Emit(NEON_Q | format | NEON_FCVTXN | Rn(vn) | Rd(vd));
}
void Assembler::fjcvtzs(const Register& rd, const VRegister& vn) {
DCHECK(rd.IsW() && vn.Is1D());
Emit(FJCVTZS | Rn(vn) | Rd(rd));
}
#define NEON_FP2REGMISC_FCVT_LIST(V) \
V(fcvtnu, NEON_FCVTNU, FCVTNU) \
V(fcvtns, NEON_FCVTNS, FCVTNS) \
......
......@@ -1750,6 +1750,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
// FP convert to signed integer, nearest with ties to even.
void fcvtns(const Register& rd, const VRegister& vn);
// FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
void fjcvtzs(const Register& rd, const VRegister& vn);
// FP convert to unsigned integer, nearest with ties to even.
void fcvtnu(const Register& rd, const VRegister& vn);
......
......@@ -1325,7 +1325,8 @@ enum FPIntegerConvertOp : uint32_t {
FMOV_xd = FMOV_ws | SixtyFourBits | FP64,
FMOV_dx = FMOV_sw | SixtyFourBits | FP64,
FMOV_d1_x = FPIntegerConvertFixed | SixtyFourBits | 0x008F0000,
FMOV_x_d1 = FPIntegerConvertFixed | SixtyFourBits | 0x008E0000
FMOV_x_d1 = FPIntegerConvertFixed | SixtyFourBits | 0x008E0000,
FJCVTZS = FPIntegerConvertFixed | FP64 | 0x001E0000
};
// Conversion between fixed point and floating point.
......
......@@ -538,7 +538,6 @@ void Decoder<V>::DecodeFP(Instruction* instr) {
(instr->Mask(0x20C60000) == 0x00840000) ||
(instr->Mask(0xA0C60000) == 0x80060000) ||
(instr->Mask(0xA0C60000) == 0x00860000) ||
(instr->Mask(0xA0C60000) == 0x00460000) ||
(instr->Mask(0xA0CE0000) == 0x80860000) ||
(instr->Mask(0xA0CE0000) == 0x804E0000) ||
(instr->Mask(0xA0CE0000) == 0x000E0000) ||
......
......@@ -2263,6 +2263,11 @@ void TurboAssembler::TruncateDoubleToI(Isolate* isolate, Zone* zone,
DoubleRegister double_input,
StubCallMode stub_mode,
LinkRegisterStatus lr_status) {
if (CpuFeatures::IsSupported(JSCVT)) {
Fjcvtzs(result.W(), double_input);
return;
}
Label done;
// Try to convert the double to an int64. If successful, the bottom 32 bits
......
......@@ -1009,6 +1009,12 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
fcvtzs(vd, vn, fbits);
}
void Fjcvtzs(const Register& rd, const VRegister& vn) {
DCHECK(allow_macro_instructions());
DCHECK(!rd.IsZero());
fjcvtzs(rd, vn);
}
inline void Fcvtzu(const Register& rd, const VRegister& fn);
void Fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0) {
DCHECK(allow_macro_instructions());
......
......@@ -27,7 +27,7 @@ enum CpuFeature {
POPCNT,
ATOM,
#elif V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64
#elif V8_TARGET_ARCH_ARM
// - Standard configurations. The baseline is ARMv6+VFPv2.
ARMv7, // ARMv7-A + VFPv3-D32 + NEON
ARMv7_SUDIV, // ARMv7-A + VFPv4-D32 + NEON + SUDIV
......@@ -39,6 +39,9 @@ enum CpuFeature {
VFP32DREGS = ARMv7,
SUDIV = ARMv7_SUDIV,
#elif V8_TARGET_ARCH_ARM64
JSCVT,
#elif V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
FPU,
FP64FPU,
......
......@@ -1377,6 +1377,10 @@ void DisassemblingDecoder::VisitFPIntegerConvert(Instruction* instr) {
mnemonic = "ucvtf";
form = form_fr;
break;
case FJCVTZS:
mnemonic = "fjcvtzs";
form = form_rf;
break;
}
Format(instr, mnemonic, form);
}
......
......@@ -2756,6 +2756,9 @@ void Simulator::VisitFPIntegerConvert(Instruction* instr) {
case FCVTZU_xd:
set_xreg(dst, FPToUInt64(dreg(src), FPZero));
break;
case FJCVTZS:
set_wreg(dst, FPToFixedJS(dreg(src)));
break;
case FMOV_ws:
set_wreg(dst, sreg_bits(src));
break;
......
......@@ -2179,6 +2179,7 @@ class Simulator : public DecoderVisitor, public SimulatorBase {
int64_t FPToInt64(double value, FPRounding rmode);
uint32_t FPToUInt32(double value, FPRounding rmode);
uint64_t FPToUInt64(double value, FPRounding rmode);
int32_t FPToFixedJS(double value);
template <typename T>
T FPAdd(T op1, T op2);
......
......@@ -3342,6 +3342,65 @@ LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
return dst;
}
int32_t Simulator::FPToFixedJS(double value) {
// The Z-flag is set when the conversion from double precision floating-point
// to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
// outside the bounds of a 32-bit integer, or isn't an exact integer then the
// Z-flag is unset.
int Z = 1;
int32_t result;
if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
(value == kFP64NegativeInfinity)) {
// +/- zero and infinity all return zero, however -0 and +/- Infinity also
// unset the Z-flag.
result = 0.0;
if ((value != 0.0) || std::signbit(value)) {
Z = 0;
}
} else if (std::isnan(value)) {
// NaN values unset the Z-flag and set the result to 0.
result = 0;
Z = 0;
} else {
// All other values are converted to an integer representation, rounded
// toward zero.
double int_result = std::floor(value);
double error = value - int_result;
if ((error != 0.0) && (int_result < 0.0)) {
int_result++;
}
// Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
// write a one-liner with std::round, but the behaviour on ties is incorrect
// for our purposes.
double mod_const = static_cast<double>(UINT64_C(1) << 32);
double mod_error =
(int_result / mod_const) - std::floor(int_result / mod_const);
double constrained;
if (mod_error == 0.5) {
constrained = INT32_MIN;
} else {
constrained = int_result - mod_const * round(int_result / mod_const);
}
DCHECK(std::floor(constrained) == constrained);
DCHECK(constrained >= INT32_MIN);
DCHECK(constrained <= INT32_MAX);
// Take the bottom 32 bits of the result as a 32-bit integer.
result = static_cast<int32_t>(constrained);
if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
(error != 0.0)) {
// If the integer result is out of range or the conversion isn't exact,
// take exception and unset the Z-flag.
FPProcessException();
Z = 0;
}
}
nzcv().SetN(0);
nzcv().SetZ(Z);
nzcv().SetC(0);
nzcv().SetV(0);
return result;
}
LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
......
......@@ -1073,7 +1073,7 @@ DEFINE_BOOL(young_generation_large_objects, true,
"allocates large objects by default in the young generation large "
"object space")
// assembler-ia32.cc / assembler-arm.cc / assembler-x64.cc
// assembler-ia32.cc / assembler-arm.cc / assembler-arm64.cc / assembler-x64.cc
DEFINE_BOOL(debug_code, DEBUG_BOOL,
"generate extra code (assertions) for debugging")
DEFINE_BOOL(code_comments, false,
......@@ -1102,6 +1102,9 @@ DEFINE_BOOL(force_long_branches, false,
DEFINE_STRING(mcpu, "auto", "enable optimization for specific cpu")
DEFINE_BOOL(partial_constant_pool, true,
"enable use of partial constant pools (X64 only)")
DEFINE_STRING(sim_arm64_optional_features, "none",
"enable optional features on the simulator for testing: none or "
"all")
// Controlling source positions for Torque/CSA code.
DEFINE_BOOL(enable_source_at_csa_bind, false,
......
......@@ -11131,6 +11131,107 @@ TEST(fcvtzs) {
CHECK_EQUAL_64(0x8000000000000400UL, x30);
}
static void FjcvtzsHelper(uint64_t value, uint64_t expected,
uint32_t expected_z) {
SETUP();
START();
__ Fmov(d0, bit_cast<double>(value));
__ Fjcvtzs(w0, d0);
__ Mrs(x1, NZCV);
END();
RUN();
CHECK_EQUAL_64(expected, x0);
CHECK_EQUAL_32(expected_z, w1);
}
TEST(fjcvtzs) {
// Simple values.
FjcvtzsHelper(0x0000000000000000, 0, ZFlag); // 0.0
FjcvtzsHelper(0x0010000000000000, 0, NoFlag); // The smallest normal value.
FjcvtzsHelper(0x3fdfffffffffffff, 0, NoFlag); // The value just below 0.5.
FjcvtzsHelper(0x3fe0000000000000, 0, NoFlag); // 0.5
FjcvtzsHelper(0x3fe0000000000001, 0, NoFlag); // The value just above 0.5.
FjcvtzsHelper(0x3fefffffffffffff, 0, NoFlag); // The value just below 1.0.
FjcvtzsHelper(0x3ff0000000000000, 1, ZFlag); // 1.0
FjcvtzsHelper(0x3ff0000000000001, 1, NoFlag); // The value just above 1.0.
FjcvtzsHelper(0x3ff8000000000000, 1, NoFlag); // 1.5
FjcvtzsHelper(0x4024000000000000, 10, ZFlag); // 10
FjcvtzsHelper(0x7fefffffffffffff, 0, NoFlag); // The largest finite value.
// Infinity.
FjcvtzsHelper(0x7ff0000000000000, 0, NoFlag);
// NaNs.
// - Quiet NaNs
FjcvtzsHelper(0x7ff923456789abcd, 0, NoFlag);
FjcvtzsHelper(0x7ff8000000000000, 0, NoFlag);
// - Signalling NaNs
FjcvtzsHelper(0x7ff123456789abcd, 0, NoFlag);
FjcvtzsHelper(0x7ff0000000000001, 0, NoFlag);
// Subnormals.
// - A recognisable bit pattern.
FjcvtzsHelper(0x000123456789abcd, 0, NoFlag);
// - The largest subnormal value.
FjcvtzsHelper(0x000fffffffffffff, 0, NoFlag);
// - The smallest subnormal value.
FjcvtzsHelper(0x0000000000000001, 0, NoFlag);
// The same values again, but negated.
FjcvtzsHelper(0x8000000000000000, 0, NoFlag);
FjcvtzsHelper(0x8010000000000000, 0, NoFlag);
FjcvtzsHelper(0xbfdfffffffffffff, 0, NoFlag);
FjcvtzsHelper(0xbfe0000000000000, 0, NoFlag);
FjcvtzsHelper(0xbfe0000000000001, 0, NoFlag);
FjcvtzsHelper(0xbfefffffffffffff, 0, NoFlag);
FjcvtzsHelper(0xbff0000000000000, 0xffffffff, ZFlag);
FjcvtzsHelper(0xbff0000000000001, 0xffffffff, NoFlag);
FjcvtzsHelper(0xbff8000000000000, 0xffffffff, NoFlag);
FjcvtzsHelper(0xc024000000000000, 0xfffffff6, ZFlag);
FjcvtzsHelper(0xffefffffffffffff, 0, NoFlag);
FjcvtzsHelper(0xfff0000000000000, 0, NoFlag);
FjcvtzsHelper(0xfff923456789abcd, 0, NoFlag);
FjcvtzsHelper(0xfff8000000000000, 0, NoFlag);
FjcvtzsHelper(0xfff123456789abcd, 0, NoFlag);
FjcvtzsHelper(0xfff0000000000001, 0, NoFlag);
FjcvtzsHelper(0x800123456789abcd, 0, NoFlag);
FjcvtzsHelper(0x800fffffffffffff, 0, NoFlag);
FjcvtzsHelper(0x8000000000000001, 0, NoFlag);
// Test floating-point numbers of every possible exponent, most of the
// expected values are zero but there is a range of exponents where the
// results are shifted parts of this mantissa.
uint64_t mantissa = 0x0001234567890abc;
// Between an exponent of 0 and 52, only some of the top bits of the
// mantissa are above the decimal position of doubles so the mantissa is
// shifted to the right down to just those top bits. Above 52, all bits
// of the mantissa are shifted left above the decimal position until it
// reaches 52 + 64 where all the bits are shifted out of the range of 64-bit
// integers.
int first_exp_boundary = 52;
int second_exp_boundary = first_exp_boundary + 64;
for (int exponent = 0; exponent < 2048; exponent++) {
int e = exponent - 1023;
uint64_t expected = 0;
if (e < 0) {
expected = 0;
} else if (e <= first_exp_boundary) {
expected = (UINT64_C(1) << e) | (mantissa >> (52 - e));
expected &= 0xffffffff;
} else if (e < second_exp_boundary) {
expected = (mantissa << (e - 52)) & 0xffffffff;
} else {
expected = 0;
}
uint64_t value = (static_cast<uint64_t>(exponent) << 52) | mantissa;
FjcvtzsHelper(value, expected, NoFlag);
FjcvtzsHelper(value | kDSignMask, (-expected) & 0xffffffff, NoFlag);
}
}
TEST(fcvtzu) {
INIT_V8();
SETUP();
......
......@@ -1802,6 +1802,7 @@ TEST_(fcvt_scvtf_ucvtf) {
COMPARE(fcvtzs(x4, s3, 15), "fcvtzs x4, s3, #15");
COMPARE(fcvtzs(w6, d5, 32), "fcvtzs w6, d5, #32");
COMPARE(fcvtzs(w6, s5, 32), "fcvtzs w6, s5, #32");
COMPARE(fjcvtzs(w0, d1), "fjcvtzs w0, d1");
COMPARE(fcvtzu(w2, d1, 1), "fcvtzu w2, d1, #1");
COMPARE(fcvtzu(w2, s1, 1), "fcvtzu w2, s1, #1");
COMPARE(fcvtzu(x4, d3, 15), "fcvtzu x4, d3, #15");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment