X64/Win64: Alternative implementation of fmod in general.

Review URL: http://codereview.chromium.org/303034 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3116 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

X64/Win64: Alternative implementation of fmod in general.
Review URL: http://codereview.chromium.org/303034 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3116 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
6f1d641f · lrn@chromium.org · d50fbb63 · 6f1d641f · 6f1d641f · 6f1d641f
Commit 6f1d641f authored Oct 23, 2009 by lrn@chromium.org
13 changed files
--- a/src/conversions-inl.h
+++ b/src/conversions-inl.h
@@ -84,7 +84,7 @@ int32_t DoubleToInt32(double x) {
  static const double two32 = 4294967296.0;
  static const double two31 = 2147483648.0;
  if (!isfinite(x) || x == 0) return 0;
-  if (x < 0 || x >= two32) x = fmod(x, two32);
+  if (x < 0 || x >= two32) x = modulo(x, two32);
  x = (x >= 0) ? floor(x) : ceil(x) + two32;
  return (int32_t) ((x >= two31) ? x - two32 : x);
 }

--- a/src/conversions.cc
+++ b/src/conversions.cc
@@ -664,7 +664,7 @@ char* DoubleToRadixCString(double value, int radix) {
  int integer_pos = kBufferSize - 2;
  do {
    integer_buffer[integer_pos--] =
-        chars[static_cast<int>(fmod(integer_part, radix))];
+        chars[static_cast<int>(modulo(integer_part, radix))];
    integer_part /= radix;
  } while (integer_part >= 1.0);
  // Sanity check.

--- a/src/conversions.h
+++ b/src/conversions.h
@@ -31,6 +31,7 @@
 namespace v8 {
 namespace internal {
 // The fast double-to-int conversion routine does not guarantee
 // rounding towards zero.
 // The result is unspecified if x is infinite or NaN, or if the rounded

--- a/src/platform-nullos.cc
+++ b/src/platform-nullos.cc
@@ -47,6 +47,13 @@ double ceiling(double x) {
 }
+// Give V8 the opportunity to override the default fmod behavior.
+double modulo(double x, double y) {
+  UNIMPLEMENTED();
+  return 0;
+}
 // Initialize OS class early in the V8 startup.
 void OS::Setup() {
  // Seed the random number generator.

--- a/src/platform-posix.cc
+++ b/src/platform-posix.cc
@@ -54,6 +54,12 @@
 namespace v8 {
 namespace internal {
+// ----------------------------------------------------------------------------
+// Math functions
+double modulo(double x, double y) {
+  return fmod(x, y);
+}
 // ----------------------------------------------------------------------------
 // POSIX date/time support.

--- a/src/platform-win32.cc
+++ b/src/platform-win32.cc
@@ -223,6 +223,31 @@ double ceiling(double x) {
  return ceil(x);
 }
+#ifdef _WIN64
+typedef double (*ModuloFunction)(double, double);
+// Defined in codegen-x64.cc.
+ModuloFunction CreateModuloFunction();
+double modulo(double x, double y) {
+  static ModuloFunction function = CreateModuloFunction();
+  return function(x, y);
+}
+#else  // Win32
+double modulo(double x, double y) {
+  // Workaround MS fmod bugs. ECMA-262 says:
+  // dividend is finite and divisor is an infinity => result equals dividend
+  // dividend is a zero and divisor is nonzero finite => result equals dividend
+  if (!(isfinite(x) && (!isfinite(y) && !isnan(y))) &&
+      !(x == 0 && (y != 0 && isfinite(y)))) {
+    x = fmod(x, y);
+  }
+  return x;
+}
+#endif  // _WIN64
 // ----------------------------------------------------------------------------
 // The Time class represents time on win32. A timestamp is represented as
 // a 64-bit integer in 100 nano-seconds since January 1, 1601 (UTC). JavaScript

--- a/src/platform.h
+++ b/src/platform.h
@@ -111,6 +111,7 @@ namespace internal {
 class Semaphore;
 double ceiling(double x);
+double modulo(double x, double y);
 // Forward declarations.
 class Socket;

--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -3742,14 +3742,7 @@ static Object* Runtime_NumberMod(Arguments args) {
  CONVERT_DOUBLE_CHECKED(x, args[0]);
  CONVERT_DOUBLE_CHECKED(y, args[1]);
-#if defined WIN32 || defined _WIN64
+  x = modulo(x, y);
-  // Workaround MS fmod bugs. ECMA-262 says:
-  // dividend is finite and divisor is an infinity => result equals dividend
-  // dividend is a zero and divisor is nonzero finite => result equals dividend
-  if (!(isfinite(x) && (!isfinite(y) && !isnan(y))) &&
-      !(x == 0 && (y != 0 && isfinite(y))))
-#endif
-  x = fmod(x, y);
  // NewNumberFromDouble may return a Smi instead of a Number object
  return Heap::NewNumberFromDouble(x);
 }

--- a/src/x64/assembler-x64.cc
+++ b/src/x64/assembler-x64.cc
@@ -2009,6 +2009,14 @@ void Assembler::fstp_d(const Operand& adr) {
 }
+void Assembler::fstp(int index) {
+  ASSERT(is_uint3(index));
+  EnsureSpace ensure_space(this);
+  last_pc_ = pc_;
+  emit_farith(0xDD, 0xD8, index);
+}
 void Assembler::fild_s(const Operand& adr) {
  EnsureSpace ensure_space(this);
  last_pc_ = pc_;
@@ -2313,18 +2321,7 @@ void Assembler::movsd(const Operand& dst, XMMRegister src) {
 }
-void Assembler::movsd(Register dst, XMMRegister src) {
+void Assembler::movsd(XMMRegister dst, XMMRegister src) {
-  EnsureSpace ensure_space(this);
-  last_pc_ = pc_;
-  emit(0xF2);  // double
-  emit_optional_rex_32(src, dst);
-  emit(0x0F);
-  emit(0x11);  // store
-  emit_sse_operand(src, dst);
-}
-void Assembler::movsd(XMMRegister dst, Register src) {
  EnsureSpace ensure_space(this);
  last_pc_ = pc_;
  emit(0xF2);  // double

--- a/src/x64/assembler-x64.h
+++ b/src/x64/assembler-x64.h
@@ -1013,6 +1013,7 @@ class Assembler : public Malloced {
  void fstp_s(const Operand& adr);
  void fstp_d(const Operand& adr);
+  void fstp(int index);
  void fild_s(const Operand& adr);
  void fild_d(const Operand& adr);
@@ -1066,8 +1067,7 @@ class Assembler : public Malloced {
  // SSE2 instructions
  void movsd(const Operand& dst, XMMRegister src);
-  void movsd(Register src, XMMRegister dst);
+  void movsd(XMMRegister src, XMMRegister dst);
-  void movsd(XMMRegister dst, Register src);
  void movsd(XMMRegister src, const Operand& dst);
  void cvttss2si(Register dst, const Operand& src);

--- a/src/x64/codegen-x64.cc
+++ b/src/x64/codegen-x64.cc
@@ -7643,6 +7643,98 @@ int CompareStub::MinorKey() {
  return (static_cast<unsigned>(cc_) << 1) | (strict_ ? 1 : 0);
 }
+#undef __
+#define __ masm.
+#ifdef _WIN64
+typedef double (*ModuloFunction)(double, double);
+// Define custom fmod implementation.
+ModuloFunction CreateModuloFunction() {
+  size_t actual_size;
+  byte* buffer = static_cast<byte*>(OS::Allocate(Assembler::kMinimalBufferSize,
+                                                 &actual_size,
+                                                 true));
+  CHECK(buffer);
+  Assembler masm(buffer, actual_size);
+  // Generated code is put into a fixed, unmovable, buffer, and not into
+  // the V8 heap. We can't, and don't, refer to any relocatable addresses
+  // (e.g. the JavaScript nan-object).
+  // Windows 64 ABI passes double arguments in xmm0, xmm1 and
+  // returns result in xmm0.
+  // Argument backing space is allocated on the stack above
+  // the return address.
+  // Compute x mod y.
+  // Load y and x (use argument backing store as temporary storage).
+  __ movsd(Operand(rsp, kPointerSize * 2), xmm1);
+  __ movsd(Operand(rsp, kPointerSize), xmm0);
+  __ fld_d(Operand(rsp, kPointerSize * 2));
+  __ fld_d(Operand(rsp, kPointerSize));
+  // Clear exception flags before operation.
+  {
+    Label no_exceptions;
+    __ fwait();
+    __ fnstsw_ax();
+    // Clear if Illegal Operand or Zero Division exceptions are set.
+    __ testb(rax, Immediate(5));
+    __ j(zero, &no_exceptions);
+    __ fnclex();
+    __ bind(&no_exceptions);
+  }
+  // Compute st(0) % st(1)
+  {
+    Label partial_remainder_loop;
+    __ bind(&partial_remainder_loop);
+    __ fprem();
+    __ fwait();
+    __ fnstsw_ax();
+    __ testl(rax, Immediate(0x400 /* C2 */));
+    // If C2 is set, computation only has partial result. Loop to
+    // continue computation.
+    __ j(not_zero, &partial_remainder_loop);
+  }
+  Label valid_result;
+  Label return_result;
+  // If Invalid Operand or Zero Division exceptions are set,
+  // return NaN.
+  __ testb(rax, Immediate(5));
+  __ j(zero, &valid_result);
+  __ fstp(0);  // Drop result in st(0).
+  int64_t kNaNValue = V8_INT64_C(0x7ff8000000000000);
+  __ movq(rcx, kNaNValue, RelocInfo::NONE);
+  __ movq(Operand(rsp, kPointerSize), rcx);
+  __ movsd(xmm0, Operand(rsp, kPointerSize));
+  __ jmp(&return_result);
+  // If result is valid, return that.
+  __ bind(&valid_result);
+  __ fstp_d(Operand(rsp, kPointerSize));
+  __ movsd(xmm0, Operand(rsp, kPointerSize));
+  // Clean up FPU stack and exceptions and return xmm0
+  __ bind(&return_result);
+  __ fstp(0);  // Unload y.
+  {
+    Label no_exceptions;
+    __ testb(rax, Immediate(0x3f /* Any Exception*/));
+    __ j(zero, &no_exceptions);
+    __ fnclex();
+    __ bind(&no_exceptions);
+  }
+  __ ret(0);
+  CodeDesc desc;
+  masm.GetCode(&desc);
+  // Call the function from C++.
+  return FUNCTION_CAST<ModuloFunction>(buffer);
+}
+#endif
 #undef __

--- a/src/x64/disasm-x64.cc
+++ b/src/x64/disasm-x64.cc
@@ -860,12 +860,22 @@ int DisassemblerX64::FPUInstruction(byte* data) {
      return count + 1;
    }
  } else if (b1 == 0xDD) {
-    if ((b2 & 0xF8) == 0xC0) {
-      AppendToBuffer("ffree st%d", b2 & 0x7);
-      return 2;
-    } else {
    int mod, regop, rm;
    get_modrm(*(data + 1), &mod, &regop, &rm);
+    if (mod == 3) {
+      switch (regop) {
+        case 0:
+          AppendToBuffer("ffree st%d", rm & 7);
+          break;
+        case 2:
+          AppendToBuffer("fstp st%d", rm & 7);
+          break;
+        default:
+          UnimplementedInstruction();
+          break;
+      }
+      return 2;
+    } else {
      const char* mnem = "?";
      switch (regop) {
        case 0:

--- a/test/mjsunit/div-mod.js
+++ b/test/mjsunit/div-mod.js
@@ -86,3 +86,72 @@ var divisors = [
 for (var i = 0; i < divisors.length; i++) {
  run_tests_for(divisors[i]);
 }
+// Test extreme corner cases of modulo.
+// Computes the modulo by slow but lossless operations.
+function compute_mod(dividend, divisor) {
+  // Return NaN if either operand is NaN, if divisor is 0 or
+  // dividend is an infinity. Return dividend if divisor is an infinity.
+  if (isNaN(dividend) || isNaN(divisor) || divisor == 0) { return NaN; }
+  var sign = 1;
+  if (dividend < 0) { dividend = -dividend; sign = -1; }
+  if (dividend == Infinity) { return NaN; }
+  if (divisor < 0) { divisor = -divisor; }
+  if (divisor == Infinity) { return sign * dividend; }
+  function rec_mod(a, b) {
+    // Subtracts maximal possible multiplum of b from a.
+    if (a >= b) {
+      a = rec_mod(a, 2 * b);
+      if (a >= b) { a -= b; }
+    }
+    return a;
+  }
+  return sign * rec_mod(dividend, divisor);
+}
+(function () {
+  var large_non_smi = 1234567891234.12245;
+  var small_non_smi = 43.2367243;
+  var repeating_decimal = 0.3;
+  var finite_decimal = 0.5;
+  var smi = 43;
+  var power_of_two = 64;
+  var min_normal = Number.MIN_VALUE * Math.pow(2, 52);
+  var max_denormal = Number.MIN_VALUE * (Math.pow(2, 52) - 1);
+  // All combinations of NaN, Infinity, normal, denormal and zero.
+  var example_numbers = [
+    NaN,
+    0,
+    Number.MIN_VALUE,
+    3 * Number.MIN_VALUE,
+    max_denormal,
+    min_normal,
+    repeating_decimal,
+    finite_decimal,
+    smi,
+    power_of_two,
+    small_non_smi,
+    large_non_smi,
+    Number.MAX_VALUE,
+    Infinity
+  ];
+  function doTest(a, b) {
+    var exp = compute_mod(a, b);
+    var act = a % b;
+    assertEquals(exp, act, a + " % " + b);
+  }
+  for (var i = 0; i < example_numbers.length; i++) {
+    for (var j = 0; j < example_numbers.length; j++) {
+      var a = example_numbers[i];
+      var b = example_numbers[j];
+      doTest(a,b);
+      doTest(-a,b);
+      doTest(a,-b);
+      doTest(-a,-b);
+    }
+  }
+})()