conversions-inl.h 20.9 KB
Newer Older
1
// Copyright 2011 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4 5 6 7

#ifndef V8_CONVERSIONS_INL_H_
#define V8_CONVERSIONS_INL_H_

8 9 10
#include <float.h>         // Required for DBL_MAX and on Win32 for finite()
#include <limits.h>        // Required for INT_MAX etc.
#include <stdarg.h>
11
#include <cmath>
12
#include "src/globals.h"       // Required for V8_INFINITY
13 14 15 16

// ----------------------------------------------------------------------------
// Extra POSIX/ANSI functions for Win32/MSVC.

17
#include "src/base/bits.h"
18
#include "src/base/platform/platform.h"
19 20 21 22
#include "src/conversions.h"
#include "src/double.h"
#include "src/scanner.h"
#include "src/strtod.h"
23

24 25
namespace v8 {
namespace internal {
26

27
inline double JunkStringValue() {
28
  return bit_cast<double, uint64_t>(kQuietNaNMask);
29 30 31
}


32 33 34 35 36
inline double SignedZero(bool negative) {
  return negative ? uint64_to_double(Double::kSignMask) : 0.0;
}


37
// The fast double-to-unsigned-int conversion routine does not guarantee
38 39
// rounding towards zero, or any reasonable value if the argument is larger
// than what fits in an unsigned 32-bit integer.
40
inline unsigned int FastD2UI(double x) {
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
  // There is no unsigned version of lrint, so there is no fast path
  // in this function as there is in FastD2I. Using lrint doesn't work
  // for values of 2^31 and above.

  // Convert "small enough" doubles to uint32_t by fixing the 32
  // least significant non-fractional bits in the low 32 bits of the
  // double, and reading them from there.
  const double k2Pow52 = 4503599627370496.0;
  bool negative = x < 0;
  if (negative) {
    x = -x;
  }
  if (x < k2Pow52) {
    x += k2Pow52;
    uint32_t result;
56
#ifndef V8_TARGET_BIG_ENDIAN
57
    Address mantissa_ptr = reinterpret_cast<Address>(&x);
58 59 60
#else
    Address mantissa_ptr = reinterpret_cast<Address>(&x) + kIntSize;
#endif
61
    // Copy least significant 32 bits of mantissa.
62
    memcpy(&result, mantissa_ptr, sizeof(result));
63 64 65 66 67 68 69
    return negative ? ~result + 1 : result;
  }
  // Large number (outside uint32 range), Infinity or NaN.
  return 0x80000000u;  // Return integer indefinite.
}


70 71 72 73 74 75 76 77
inline float DoubleToFloat32(double x) {
  // TODO(yanggou): This static_cast is implementation-defined behaviour in C++,
  // so we may need to do the conversion manually instead to match the spec.
  volatile float f = static_cast<float>(x);
  return f;
}


78
inline double DoubleToInteger(double x) {
79 80
  if (std::isnan(x)) return 0;
  if (!std::isfinite(x) || x == 0) return x;
81
  return (x >= 0) ? std::floor(x) : std::ceil(x);
82 83 84 85 86 87
}


int32_t DoubleToInt32(double x) {
  int32_t i = FastD2I(x);
  if (FastI2D(i) == x) return i;
88 89 90 91 92 93 94 95 96
  Double d(x);
  int exponent = d.Exponent();
  if (exponent < 0) {
    if (exponent <= -Double::kSignificandSize) return 0;
    return d.Sign() * static_cast<int32_t>(d.Significand() >> -exponent);
  } else {
    if (exponent > 31) return 0;
    return d.Sign() * static_cast<int32_t>(d.Significand() << exponent);
  }
97 98 99
}


100
template <class Iterator, class EndMark>
101 102 103
bool SubStringEquals(Iterator* current,
                     EndMark end,
                     const char* substring) {
104
  DCHECK(**current == *substring);
105 106 107 108 109 110 111 112 113 114 115 116
  for (substring++; *substring != '\0'; substring++) {
    ++*current;
    if (*current == end || **current != *substring) return false;
  }
  ++*current;
  return true;
}


// Returns true if a nonspace character has been found and false if the
// end was been reached before finding a nonspace character.
template <class Iterator, class EndMark>
117 118 119
inline bool AdvanceToNonspace(UnicodeCache* unicode_cache,
                              Iterator* current,
                              EndMark end) {
120
  while (*current != end) {
121
    if (!unicode_cache->IsWhiteSpaceOrLineTerminator(**current)) return true;
122 123 124 125 126 127 128 129
    ++*current;
  }
  return false;
}


// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
template <int radix_log_2, class Iterator, class EndMark>
130 131 132 133 134
double InternalStringToIntDouble(UnicodeCache* unicode_cache,
                                 Iterator current,
                                 EndMark end,
                                 bool negative,
                                 bool allow_trailing_junk) {
135
  DCHECK(current != end);
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159

  // Skip leading 0s.
  while (*current == '0') {
    ++current;
    if (current == end) return SignedZero(negative);
  }

  int64_t number = 0;
  int exponent = 0;
  const int radix = (1 << radix_log_2);

  do {
    int digit;
    if (*current >= '0' && *current <= '9' && *current < '0' + radix) {
      digit = static_cast<char>(*current) - '0';
    } else if (radix > 10 && *current >= 'a' && *current < 'a' + radix - 10) {
      digit = static_cast<char>(*current) - 'a' + 10;
    } else if (radix > 10 && *current >= 'A' && *current < 'A' + radix - 10) {
      digit = static_cast<char>(*current) - 'A' + 10;
    } else {
      if (allow_trailing_junk ||
          !AdvanceToNonspace(unicode_cache, &current, end)) {
        break;
      } else {
160
        return JunkStringValue();
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
      }
    }

    number = number * radix + digit;
    int overflow = static_cast<int>(number >> 53);
    if (overflow != 0) {
      // Overflow occurred. Need to determine which direction to round the
      // result.
      int overflow_bits_count = 1;
      while (overflow > 1) {
        overflow_bits_count++;
        overflow >>= 1;
      }

      int dropped_bits_mask = ((1 << overflow_bits_count) - 1);
      int dropped_bits = static_cast<int>(number) & dropped_bits_mask;
      number >>= overflow_bits_count;
      exponent = overflow_bits_count;

      bool zero_tail = true;
      while (true) {
        ++current;
        if (current == end || !isDigit(*current, radix)) break;
        zero_tail = zero_tail && *current == '0';
        exponent += radix_log_2;
      }

      if (!allow_trailing_junk &&
          AdvanceToNonspace(unicode_cache, &current, end)) {
190
        return JunkStringValue();
191 192 193 194 195 196 197 198 199 200 201 202 203 204
      }

      int middle_value = (1 << (overflow_bits_count - 1));
      if (dropped_bits > middle_value) {
        number++;  // Rounding up.
      } else if (dropped_bits == middle_value) {
        // Rounding to even to consistency with decimals: half-way case rounds
        // up if significant part is odd and down otherwise.
        if ((number & 1) != 0 || !zero_tail) {
          number++;  // Rounding up.
        }
      }

      // Rounding up may cause overflow.
205
      if ((number & (static_cast<int64_t>(1) << 53)) != 0) {
206 207 208 209 210 211 212 213
        exponent++;
        number >>= 1;
      }
      break;
    }
    ++current;
  } while (current != end);

214 215
  DCHECK(number < ((int64_t)1 << 53));
  DCHECK(static_cast<int64_t>(static_cast<double>(number)) == number);
216 217 218 219 220 221 222 223 224

  if (exponent == 0) {
    if (negative) {
      if (number == 0) return -0.0;
      number = -number;
    }
    return static_cast<double>(number);
  }

225
  DCHECK(number != 0);
226
  return std::ldexp(static_cast<double>(negative ? -number : number), exponent);
227 228 229 230
}


template <class Iterator, class EndMark>
231 232 233 234
double InternalStringToInt(UnicodeCache* unicode_cache,
                           Iterator current,
                           EndMark end,
                           int radix) {
235
  const bool allow_trailing_junk = true;
236
  const double empty_string_val = JunkStringValue();
237 238 239 240 241 242 243 244 245 246 247 248

  if (!AdvanceToNonspace(unicode_cache, &current, end)) {
    return empty_string_val;
  }

  bool negative = false;
  bool leading_zero = false;

  if (*current == '+') {
    // Ignore leading sign; skip following spaces.
    ++current;
    if (current == end) {
249
      return JunkStringValue();
250 251 252 253
    }
  } else if (*current == '-') {
    ++current;
    if (current == end) {
254
      return JunkStringValue();
255 256 257 258 259 260
    }
    negative = true;
  }

  if (radix == 0) {
    // Radix detection.
261
    radix = 10;
262 263 264 265 266 267
    if (*current == '0') {
      ++current;
      if (current == end) return SignedZero(negative);
      if (*current == 'x' || *current == 'X') {
        radix = 16;
        ++current;
268
        if (current == end) return JunkStringValue();
269 270 271 272 273 274 275 276 277 278 279
      } else {
        leading_zero = true;
      }
    }
  } else if (radix == 16) {
    if (*current == '0') {
      // Allow "0x" prefix.
      ++current;
      if (current == end) return SignedZero(negative);
      if (*current == 'x' || *current == 'X') {
        ++current;
280
        if (current == end) return JunkStringValue();
281 282 283 284 285 286
      } else {
        leading_zero = true;
      }
    }
  }

287
  if (radix < 2 || radix > 36) return JunkStringValue();
288 289 290 291 292 293 294 295 296

  // Skip leading zeros.
  while (*current == '0') {
    leading_zero = true;
    ++current;
    if (current == end) return SignedZero(negative);
  }

  if (!leading_zero && !isDigit(*current, radix)) {
297
    return JunkStringValue();
298 299
  }

300
  if (base::bits::IsPowerOfTwo32(radix)) {
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
    switch (radix) {
      case 2:
        return InternalStringToIntDouble<1>(
            unicode_cache, current, end, negative, allow_trailing_junk);
      case 4:
        return InternalStringToIntDouble<2>(
            unicode_cache, current, end, negative, allow_trailing_junk);
      case 8:
        return InternalStringToIntDouble<3>(
            unicode_cache, current, end, negative, allow_trailing_junk);

      case 16:
        return InternalStringToIntDouble<4>(
            unicode_cache, current, end, negative, allow_trailing_junk);

      case 32:
        return InternalStringToIntDouble<5>(
            unicode_cache, current, end, negative, allow_trailing_junk);
      default:
        UNREACHABLE();
    }
  }

  if (radix == 10) {
    // Parsing with strtod.
    const int kMaxSignificantDigits = 309;  // Doubles are less than 1.8e308.
    // The buffer may contain up to kMaxSignificantDigits + 1 digits and a zero
    // end.
    const int kBufferSize = kMaxSignificantDigits + 2;
    char buffer[kBufferSize];
    int buffer_pos = 0;
    while (*current >= '0' && *current <= '9') {
      if (buffer_pos <= kMaxSignificantDigits) {
        // If the number has more than kMaxSignificantDigits it will be parsed
        // as infinity.
336
        DCHECK(buffer_pos < kBufferSize);
337 338 339 340 341 342 343 344
        buffer[buffer_pos++] = static_cast<char>(*current);
      }
      ++current;
      if (current == end) break;
    }

    if (!allow_trailing_junk &&
        AdvanceToNonspace(unicode_cache, &current, end)) {
345
      return JunkStringValue();
346 347
    }

348
    SLOW_DCHECK(buffer_pos < kBufferSize);
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
    buffer[buffer_pos] = '\0';
    Vector<const char> buffer_vector(buffer, buffer_pos);
    return negative ? -Strtod(buffer_vector, 0) : Strtod(buffer_vector, 0);
  }

  // The following code causes accumulating rounding error for numbers greater
  // than ~2^56. It's explicitly allowed in the spec: "if R is not 2, 4, 8, 10,
  // 16, or 32, then mathInt may be an implementation-dependent approximation to
  // the mathematical integer value" (15.1.2.2).

  int lim_0 = '0' + (radix < 10 ? radix : 10);
  int lim_a = 'a' + (radix - 10);
  int lim_A = 'A' + (radix - 10);

  // NOTE: The code for computing the value may seem a bit complex at
  // first glance. It is structured to use 32-bit multiply-and-add
  // loops as long as possible to avoid loosing precision.

  double v = 0.0;
  bool done = false;
  do {
    // Parse the longest part of the string starting at index j
    // possible while keeping the multiplier, and thus the part
    // itself, within 32 bits.
    unsigned int part = 0, multiplier = 1;
    while (true) {
      int d;
      if (*current >= '0' && *current < lim_0) {
        d = *current - '0';
      } else if (*current >= 'a' && *current < lim_a) {
        d = *current - 'a' + 10;
      } else if (*current >= 'A' && *current < lim_A) {
        d = *current - 'A' + 10;
      } else {
        done = true;
        break;
      }

      // Update the value of the part as long as the multiplier fits
      // in 32 bits. When we can't guarantee that the next iteration
      // will not overflow the multiplier, we stop parsing the part
      // by leaving the loop.
      const unsigned int kMaximumMultiplier = 0xffffffffU / 36;
      uint32_t m = multiplier * radix;
      if (m > kMaximumMultiplier) break;
      part = part * radix + d;
      multiplier = m;
396
      DCHECK(multiplier > part);
397 398 399 400 401 402 403 404 405 406 407 408 409 410

      ++current;
      if (current == end) {
        done = true;
        break;
      }
    }

    // Update the value and skip the part in the string.
    v = v * multiplier + part;
  } while (!done);

  if (!allow_trailing_junk &&
      AdvanceToNonspace(unicode_cache, &current, end)) {
411
    return JunkStringValue();
412 413 414 415 416 417 418 419 420 421 422 423
  }

  return negative ? -v : v;
}


// Converts a string to a double value. Assumes the Iterator supports
// the following operations:
// 1. current == end (other ops are not allowed), current != end.
// 2. *current - gets the current character in the sequence.
// 3. ++current (advances the position).
template <class Iterator, class EndMark>
424 425 426 427 428
double InternalStringToDouble(UnicodeCache* unicode_cache,
                              Iterator current,
                              EndMark end,
                              int flags,
                              double empty_string_val) {
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
  // To make sure that iterator dereferencing is valid the following
  // convention is used:
  // 1. Each '++current' statement is followed by check for equality to 'end'.
  // 2. If AdvanceToNonspace returned false then current == end.
  // 3. If 'current' becomes be equal to 'end' the function returns or goes to
  // 'parsing_done'.
  // 4. 'current' is not dereferenced after the 'parsing_done' label.
  // 5. Code before 'parsing_done' may rely on 'current != end'.
  if (!AdvanceToNonspace(unicode_cache, &current, end)) {
    return empty_string_val;
  }

  const bool allow_trailing_junk = (flags & ALLOW_TRAILING_JUNK) != 0;

  // The longest form of simplified number is: "-<significant digits>'.1eXXX\0".
  const int kBufferSize = kMaxSignificantDigits + 10;
  char buffer[kBufferSize];  // NOLINT: size is known at compile time.
  int buffer_pos = 0;

  // Exponent will be adjusted if insignificant digits of the integer part
  // or insignificant leading zeros of the fractional part are dropped.
  int exponent = 0;
  int significant_digits = 0;
  int insignificant_digits = 0;
  bool nonzero_digit_dropped = false;

455 456 457 458 459 460 461
  enum Sign {
    NONE,
    NEGATIVE,
    POSITIVE
  };

  Sign sign = NONE;
462 463 464 465

  if (*current == '+') {
    // Ignore leading sign.
    ++current;
466
    if (current == end) return JunkStringValue();
467
    sign = POSITIVE;
468 469
  } else if (*current == '-') {
    ++current;
470
    if (current == end) return JunkStringValue();
471
    sign = NEGATIVE;
472 473
  }

474 475 476
  static const char kInfinityString[] = "Infinity";
  if (*current == kInfinityString[0]) {
    if (!SubStringEquals(&current, end, kInfinityString)) {
477
      return JunkStringValue();
478 479 480 481
    }

    if (!allow_trailing_junk &&
        AdvanceToNonspace(unicode_cache, &current, end)) {
482
      return JunkStringValue();
483 484
    }

485
    DCHECK(buffer_pos == 0);
486
    return (sign == NEGATIVE) ? -V8_INFINITY : V8_INFINITY;
487 488 489 490 491
  }

  bool leading_zero = false;
  if (*current == '0') {
    ++current;
492
    if (current == end) return SignedZero(sign == NEGATIVE);
493 494 495 496 497 498

    leading_zero = true;

    // It could be hexadecimal value.
    if ((flags & ALLOW_HEX) && (*current == 'x' || *current == 'X')) {
      ++current;
499
      if (current == end || !isDigit(*current, 16) || sign != NONE) {
500
        return JunkStringValue();  // "0x".
501 502 503 504 505
      }

      return InternalStringToIntDouble<4>(unicode_cache,
                                          current,
                                          end,
506
                                          false,
507
                                          allow_trailing_junk);
508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533

    // It could be an explicit octal value.
    } else if ((flags & ALLOW_OCTAL) && (*current == 'o' || *current == 'O')) {
      ++current;
      if (current == end || !isDigit(*current, 8) || sign != NONE) {
        return JunkStringValue();  // "0o".
      }

      return InternalStringToIntDouble<3>(unicode_cache,
                                          current,
                                          end,
                                          false,
                                          allow_trailing_junk);

    // It could be a binary value.
    } else if ((flags & ALLOW_BINARY) && (*current == 'b' || *current == 'B')) {
      ++current;
      if (current == end || !isBinaryDigit(*current) || sign != NONE) {
        return JunkStringValue();  // "0b".
      }

      return InternalStringToIntDouble<1>(unicode_cache,
                                          current,
                                          end,
                                          false,
                                          allow_trailing_junk);
534 535 536 537 538
    }

    // Ignore leading zeros in the integer part.
    while (*current == '0') {
      ++current;
539
      if (current == end) return SignedZero(sign == NEGATIVE);
540 541 542
    }
  }

543
  bool octal = leading_zero && (flags & ALLOW_IMPLICIT_OCTAL) != 0;
544 545 546 547

  // Copy significant digits of the integer part (if any) to the buffer.
  while (*current >= '0' && *current <= '9') {
    if (significant_digits < kMaxSignificantDigits) {
548
      DCHECK(buffer_pos < kBufferSize);
549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565
      buffer[buffer_pos++] = static_cast<char>(*current);
      significant_digits++;
      // Will later check if it's an octal in the buffer.
    } else {
      insignificant_digits++;  // Move the digit into the exponential part.
      nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
    }
    octal = octal && *current < '8';
    ++current;
    if (current == end) goto parsing_done;
  }

  if (significant_digits == 0) {
    octal = false;
  }

  if (*current == '.') {
566
    if (octal && !allow_trailing_junk) return JunkStringValue();
567 568 569 570 571
    if (octal) goto parsing_done;

    ++current;
    if (current == end) {
      if (significant_digits == 0 && !leading_zero) {
572
        return JunkStringValue();
573 574 575 576 577 578 579 580 581 582 583
      } else {
        goto parsing_done;
      }
    }

    if (significant_digits == 0) {
      // octal = false;
      // Integer part consists of 0 or is absent. Significant digits start after
      // leading zeros (if any).
      while (*current == '0') {
        ++current;
584
        if (current == end) return SignedZero(sign == NEGATIVE);
585 586 587 588
        exponent--;  // Move this 0 into the exponent.
      }
    }

589 590
    // There is a fractional part.  We don't emit a '.', but adjust the exponent
    // instead.
591 592
    while (*current >= '0' && *current <= '9') {
      if (significant_digits < kMaxSignificantDigits) {
593
        DCHECK(buffer_pos < kBufferSize);
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610
        buffer[buffer_pos++] = static_cast<char>(*current);
        significant_digits++;
        exponent--;
      } else {
        // Ignore insignificant digits in the fractional part.
        nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
      }
      ++current;
      if (current == end) goto parsing_done;
    }
  }

  if (!leading_zero && exponent == 0 && significant_digits == 0) {
    // If leading_zeros is true then the string contains zeros.
    // If exponent < 0 then string was [+-]\.0*...
    // If significant_digits != 0 the string is not equal to 0.
    // Otherwise there are no digits in the string.
611
    return JunkStringValue();
612 613 614 615
  }

  // Parse exponential part.
  if (*current == 'e' || *current == 'E') {
616
    if (octal) return JunkStringValue();
617 618 619 620 621
    ++current;
    if (current == end) {
      if (allow_trailing_junk) {
        goto parsing_done;
      } else {
622
        return JunkStringValue();
623 624 625 626 627 628 629 630 631 632
      }
    }
    char sign = '+';
    if (*current == '+' || *current == '-') {
      sign = static_cast<char>(*current);
      ++current;
      if (current == end) {
        if (allow_trailing_junk) {
          goto parsing_done;
        } else {
633
          return JunkStringValue();
634 635 636 637 638 639 640 641
        }
      }
    }

    if (current == end || *current < '0' || *current > '9') {
      if (allow_trailing_junk) {
        goto parsing_done;
      } else {
642
        return JunkStringValue();
643 644 645
      }
    }

646
    const int max_exponent = INT_MAX / 2;
647
    DCHECK(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2);
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
    int num = 0;
    do {
      // Check overflow.
      int digit = *current - '0';
      if (num >= max_exponent / 10
          && !(num == max_exponent / 10 && digit <= max_exponent % 10)) {
        num = max_exponent;
      } else {
        num = num * 10 + digit;
      }
      ++current;
    } while (current != end && *current >= '0' && *current <= '9');

    exponent += (sign == '-' ? -num : num);
  }

  if (!allow_trailing_junk &&
      AdvanceToNonspace(unicode_cache, &current, end)) {
666
    return JunkStringValue();
667 668 669 670 671 672 673 674 675
  }

  parsing_done:
  exponent += insignificant_digits;

  if (octal) {
    return InternalStringToIntDouble<3>(unicode_cache,
                                        buffer,
                                        buffer + buffer_pos,
676
                                        sign == NEGATIVE,
677 678 679 680 681 682 683 684
                                        allow_trailing_junk);
  }

  if (nonzero_digit_dropped) {
    buffer[buffer_pos++] = '1';
    exponent--;
  }

685
  SLOW_DCHECK(buffer_pos < kBufferSize);
686 687 688
  buffer[buffer_pos] = '\0';

  double converted = Strtod(Vector<const char>(buffer, buffer_pos), exponent);
689
  return (sign == NEGATIVE) ? -converted : converted;
690 691
}

692 693 694
} }  // namespace v8::internal

#endif  // V8_CONVERSIONS_INL_H_