Commit 35f94cbe authored by Jakob Kummerow's avatar Jakob Kummerow Committed by Commit Bot

[refactoring] Prepare conversions{.h,-inl.h,.cc} for BigInt.parseInt

- Move things to conversions.cc that don't need to be in headers
- Turn InternalStringToInt into a subclassable helper class
  so we can re-use it for BigInt.parseInt
- Bonus: play a round of IWYU with all the .cc files who thought that
  #including conversions-inl.h would give them nice Unicode things

Bug: v8:6791
Cq-Include-Trybots: master.tryserver.chromium.linux:linux_chromium_rel_ng
Change-Id: I64022543a9b83002e2b78416c7e87b40a1a016e6
Reviewed-on: https://chromium-review.googlesource.com/673725
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Reviewed-by: 's avatarYang Guo <yangguo@chromium.org>
Reviewed-by: 's avatarMarja Hölttä <marja@chromium.org>
Reviewed-by: 's avatarMichael Lippautz <mlippautz@chromium.org>
Cr-Commit-Position: refs/heads/master@{#48174}
parent 3b57e96c
......@@ -71,6 +71,7 @@
#include "src/startup-data-util.h"
#include "src/tracing/trace-event.h"
#include "src/trap-handler/trap-handler.h"
#include "src/unicode-cache-inl.h"
#include "src/unicode-inl.h"
#include "src/v8.h"
#include "src/v8threads.h"
......
......@@ -10,7 +10,6 @@
#include <stdarg.h>
#include <cmath>
#include "src/globals.h" // Required for V8_INFINITY
#include "src/unicode-cache-inl.h"
// ----------------------------------------------------------------------------
// Extra POSIX/ANSI functions for Win32/MSVC.
......@@ -20,21 +19,10 @@
#include "src/conversions.h"
#include "src/double.h"
#include "src/objects-inl.h"
#include "src/strtod.h"
namespace v8 {
namespace internal {
inline double JunkStringValue() {
return bit_cast<double, uint64_t>(kQuietNaNMask);
}
inline double SignedZero(bool negative) {
return negative ? uint64_to_double(Double::kSignMask) : 0.0;
}
// The fast double-to-unsigned-int conversion routine does not guarantee
// rounding towards zero, or any reasonable value if the argument is larger
// than what fits in an unsigned 32-bit integer.
......@@ -231,599 +219,6 @@ uint32_t DoubleToUint32(double x) {
return static_cast<uint32_t>(DoubleToInt32(x));
}
template <class Iterator, class EndMark>
bool SubStringEquals(Iterator* current,
EndMark end,
const char* substring) {
DCHECK(**current == *substring);
for (substring++; *substring != '\0'; substring++) {
++*current;
if (*current == end || **current != *substring) return false;
}
++*current;
return true;
}
// Returns true if a nonspace character has been found and false if the
// end was been reached before finding a nonspace character.
template <class Iterator, class EndMark>
inline bool AdvanceToNonspace(UnicodeCache* unicode_cache,
Iterator* current,
EndMark end) {
while (*current != end) {
if (!unicode_cache->IsWhiteSpaceOrLineTerminator(**current)) return true;
++*current;
}
return false;
}
// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
template <int radix_log_2, class Iterator, class EndMark>
double InternalStringToIntDouble(UnicodeCache* unicode_cache,
Iterator current,
EndMark end,
bool negative,
bool allow_trailing_junk) {
DCHECK(current != end);
// Skip leading 0s.
while (*current == '0') {
++current;
if (current == end) return SignedZero(negative);
}
int64_t number = 0;
int exponent = 0;
const int radix = (1 << radix_log_2);
do {
int digit;
if (*current >= '0' && *current <= '9' && *current < '0' + radix) {
digit = static_cast<char>(*current) - '0';
} else if (radix > 10 && *current >= 'a' && *current < 'a' + radix - 10) {
digit = static_cast<char>(*current) - 'a' + 10;
} else if (radix > 10 && *current >= 'A' && *current < 'A' + radix - 10) {
digit = static_cast<char>(*current) - 'A' + 10;
} else {
if (allow_trailing_junk ||
!AdvanceToNonspace(unicode_cache, &current, end)) {
break;
} else {
return JunkStringValue();
}
}
number = number * radix + digit;
int overflow = static_cast<int>(number >> 53);
if (overflow != 0) {
// Overflow occurred. Need to determine which direction to round the
// result.
int overflow_bits_count = 1;
while (overflow > 1) {
overflow_bits_count++;
overflow >>= 1;
}
int dropped_bits_mask = ((1 << overflow_bits_count) - 1);
int dropped_bits = static_cast<int>(number) & dropped_bits_mask;
number >>= overflow_bits_count;
exponent = overflow_bits_count;
bool zero_tail = true;
while (true) {
++current;
if (current == end || !isDigit(*current, radix)) break;
zero_tail = zero_tail && *current == '0';
exponent += radix_log_2;
}
if (!allow_trailing_junk &&
AdvanceToNonspace(unicode_cache, &current, end)) {
return JunkStringValue();
}
int middle_value = (1 << (overflow_bits_count - 1));
if (dropped_bits > middle_value) {
number++; // Rounding up.
} else if (dropped_bits == middle_value) {
// Rounding to even to consistency with decimals: half-way case rounds
// up if significant part is odd and down otherwise.
if ((number & 1) != 0 || !zero_tail) {
number++; // Rounding up.
}
}
// Rounding up may cause overflow.
if ((number & (static_cast<int64_t>(1) << 53)) != 0) {
exponent++;
number >>= 1;
}
break;
}
++current;
} while (current != end);
DCHECK(number < ((int64_t)1 << 53));
DCHECK(static_cast<int64_t>(static_cast<double>(number)) == number);
if (exponent == 0) {
if (negative) {
if (number == 0) return -0.0;
number = -number;
}
return static_cast<double>(number);
}
DCHECK(number != 0);
return std::ldexp(static_cast<double>(negative ? -number : number), exponent);
}
// ES6 18.2.5 parseInt(string, radix)
template <class Iterator, class EndMark>
double InternalStringToInt(UnicodeCache* unicode_cache,
Iterator current,
EndMark end,
int radix) {
const bool allow_trailing_junk = true;
const double empty_string_val = JunkStringValue();
if (!AdvanceToNonspace(unicode_cache, &current, end)) {
return empty_string_val;
}
bool negative = false;
bool leading_zero = false;
if (*current == '+') {
// Ignore leading sign; skip following spaces.
++current;
if (current == end) {
return JunkStringValue();
}
} else if (*current == '-') {
++current;
if (current == end) {
return JunkStringValue();
}
negative = true;
}
if (radix == 0) {
// Radix detection.
radix = 10;
if (*current == '0') {
++current;
if (current == end) return SignedZero(negative);
if (*current == 'x' || *current == 'X') {
radix = 16;
++current;
if (current == end) return JunkStringValue();
} else {
leading_zero = true;
}
}
} else if (radix == 16) {
if (*current == '0') {
// Allow "0x" prefix.
++current;
if (current == end) return SignedZero(negative);
if (*current == 'x' || *current == 'X') {
++current;
if (current == end) return JunkStringValue();
} else {
leading_zero = true;
}
}
}
if (radix < 2 || radix > 36) return JunkStringValue();
// Skip leading zeros.
while (*current == '0') {
leading_zero = true;
++current;
if (current == end) return SignedZero(negative);
}
if (!leading_zero && !isDigit(*current, radix)) {
return JunkStringValue();
}
if (base::bits::IsPowerOfTwo(radix)) {
switch (radix) {
case 2:
return InternalStringToIntDouble<1>(
unicode_cache, current, end, negative, allow_trailing_junk);
case 4:
return InternalStringToIntDouble<2>(
unicode_cache, current, end, negative, allow_trailing_junk);
case 8:
return InternalStringToIntDouble<3>(
unicode_cache, current, end, negative, allow_trailing_junk);
case 16:
return InternalStringToIntDouble<4>(
unicode_cache, current, end, negative, allow_trailing_junk);
case 32:
return InternalStringToIntDouble<5>(
unicode_cache, current, end, negative, allow_trailing_junk);
default:
UNREACHABLE();
}
}
if (radix == 10) {
// Parsing with strtod.
const int kMaxSignificantDigits = 309; // Doubles are less than 1.8e308.
// The buffer may contain up to kMaxSignificantDigits + 1 digits and a zero
// end.
const int kBufferSize = kMaxSignificantDigits + 2;
char buffer[kBufferSize];
int buffer_pos = 0;
while (*current >= '0' && *current <= '9') {
if (buffer_pos <= kMaxSignificantDigits) {
// If the number has more than kMaxSignificantDigits it will be parsed
// as infinity.
DCHECK(buffer_pos < kBufferSize);
buffer[buffer_pos++] = static_cast<char>(*current);
}
++current;
if (current == end) break;
}
if (!allow_trailing_junk &&
AdvanceToNonspace(unicode_cache, &current, end)) {
return JunkStringValue();
}
SLOW_DCHECK(buffer_pos < kBufferSize);
buffer[buffer_pos] = '\0';
Vector<const char> buffer_vector(buffer, buffer_pos);
return negative ? -Strtod(buffer_vector, 0) : Strtod(buffer_vector, 0);
}
// The following code causes accumulating rounding error for numbers greater
// than ~2^56. It's explicitly allowed in the spec: "if R is not 2, 4, 8, 10,
// 16, or 32, then mathInt may be an implementation-dependent approximation to
// the mathematical integer value" (15.1.2.2).
int lim_0 = '0' + (radix < 10 ? radix : 10);
int lim_a = 'a' + (radix - 10);
int lim_A = 'A' + (radix - 10);
// NOTE: The code for computing the value may seem a bit complex at
// first glance. It is structured to use 32-bit multiply-and-add
// loops as long as possible to avoid losing precision.
double v = 0.0;
bool done = false;
do {
// Parse the longest part of the string starting at index j
// possible while keeping the multiplier, and thus the part
// itself, within 32 bits.
unsigned int part = 0, multiplier = 1;
while (true) {
int d;
if (*current >= '0' && *current < lim_0) {
d = *current - '0';
} else if (*current >= 'a' && *current < lim_a) {
d = *current - 'a' + 10;
} else if (*current >= 'A' && *current < lim_A) {
d = *current - 'A' + 10;
} else {
done = true;
break;
}
// Update the value of the part as long as the multiplier fits
// in 32 bits. When we can't guarantee that the next iteration
// will not overflow the multiplier, we stop parsing the part
// by leaving the loop.
const unsigned int kMaximumMultiplier = 0xffffffffU / 36;
uint32_t m = multiplier * radix;
if (m > kMaximumMultiplier) break;
part = part * radix + d;
multiplier = m;
DCHECK(multiplier > part);
++current;
if (current == end) {
done = true;
break;
}
}
// Update the value and skip the part in the string.
v = v * multiplier + part;
} while (!done);
if (!allow_trailing_junk &&
AdvanceToNonspace(unicode_cache, &current, end)) {
return JunkStringValue();
}
return negative ? -v : v;
}
// Converts a string to a double value. Assumes the Iterator supports
// the following operations:
// 1. current == end (other ops are not allowed), current != end.
// 2. *current - gets the current character in the sequence.
// 3. ++current (advances the position).
template <class Iterator, class EndMark>
double InternalStringToDouble(UnicodeCache* unicode_cache,
Iterator current,
EndMark end,
int flags,
double empty_string_val) {
// To make sure that iterator dereferencing is valid the following
// convention is used:
// 1. Each '++current' statement is followed by check for equality to 'end'.
// 2. If AdvanceToNonspace returned false then current == end.
// 3. If 'current' becomes be equal to 'end' the function returns or goes to
// 'parsing_done'.
// 4. 'current' is not dereferenced after the 'parsing_done' label.
// 5. Code before 'parsing_done' may rely on 'current != end'.
if (!AdvanceToNonspace(unicode_cache, &current, end)) {
return empty_string_val;
}
const bool allow_trailing_junk = (flags & ALLOW_TRAILING_JUNK) != 0;
// The longest form of simplified number is: "-<significant digits>'.1eXXX\0".
const int kBufferSize = kMaxSignificantDigits + 10;
char buffer[kBufferSize]; // NOLINT: size is known at compile time.
int buffer_pos = 0;
// Exponent will be adjusted if insignificant digits of the integer part
// or insignificant leading zeros of the fractional part are dropped.
int exponent = 0;
int significant_digits = 0;
int insignificant_digits = 0;
bool nonzero_digit_dropped = false;
enum Sign {
NONE,
NEGATIVE,
POSITIVE
};
Sign sign = NONE;
if (*current == '+') {
// Ignore leading sign.
++current;
if (current == end) return JunkStringValue();
sign = POSITIVE;
} else if (*current == '-') {
++current;
if (current == end) return JunkStringValue();
sign = NEGATIVE;
}
static const char kInfinityString[] = "Infinity";
if (*current == kInfinityString[0]) {
if (!SubStringEquals(&current, end, kInfinityString)) {
return JunkStringValue();
}
if (!allow_trailing_junk &&
AdvanceToNonspace(unicode_cache, &current, end)) {
return JunkStringValue();
}
DCHECK(buffer_pos == 0);
return (sign == NEGATIVE) ? -V8_INFINITY : V8_INFINITY;
}
bool leading_zero = false;
if (*current == '0') {
++current;
if (current == end) return SignedZero(sign == NEGATIVE);
leading_zero = true;
// It could be hexadecimal value.
if ((flags & ALLOW_HEX) && (*current == 'x' || *current == 'X')) {
++current;
if (current == end || !isDigit(*current, 16) || sign != NONE) {
return JunkStringValue(); // "0x".
}
return InternalStringToIntDouble<4>(unicode_cache,
current,
end,
false,
allow_trailing_junk);
// It could be an explicit octal value.
} else if ((flags & ALLOW_OCTAL) && (*current == 'o' || *current == 'O')) {
++current;
if (current == end || !isDigit(*current, 8) || sign != NONE) {
return JunkStringValue(); // "0o".
}
return InternalStringToIntDouble<3>(unicode_cache,
current,
end,
false,
allow_trailing_junk);
// It could be a binary value.
} else if ((flags & ALLOW_BINARY) && (*current == 'b' || *current == 'B')) {
++current;
if (current == end || !isBinaryDigit(*current) || sign != NONE) {
return JunkStringValue(); // "0b".
}
return InternalStringToIntDouble<1>(unicode_cache,
current,
end,
false,
allow_trailing_junk);
}
// Ignore leading zeros in the integer part.
while (*current == '0') {
++current;
if (current == end) return SignedZero(sign == NEGATIVE);
}
}
bool octal = leading_zero && (flags & ALLOW_IMPLICIT_OCTAL) != 0;
// Copy significant digits of the integer part (if any) to the buffer.
while (*current >= '0' && *current <= '9') {
if (significant_digits < kMaxSignificantDigits) {
DCHECK(buffer_pos < kBufferSize);
buffer[buffer_pos++] = static_cast<char>(*current);
significant_digits++;
// Will later check if it's an octal in the buffer.
} else {
insignificant_digits++; // Move the digit into the exponential part.
nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
}
octal = octal && *current < '8';
++current;
if (current == end) goto parsing_done;
}
if (significant_digits == 0) {
octal = false;
}
if (*current == '.') {
if (octal && !allow_trailing_junk) return JunkStringValue();
if (octal) goto parsing_done;
++current;
if (current == end) {
if (significant_digits == 0 && !leading_zero) {
return JunkStringValue();
} else {
goto parsing_done;
}
}
if (significant_digits == 0) {
// octal = false;
// Integer part consists of 0 or is absent. Significant digits start after
// leading zeros (if any).
while (*current == '0') {
++current;
if (current == end) return SignedZero(sign == NEGATIVE);
exponent--; // Move this 0 into the exponent.
}
}
// There is a fractional part. We don't emit a '.', but adjust the exponent
// instead.
while (*current >= '0' && *current <= '9') {
if (significant_digits < kMaxSignificantDigits) {
DCHECK(buffer_pos < kBufferSize);
buffer[buffer_pos++] = static_cast<char>(*current);
significant_digits++;
exponent--;
} else {
// Ignore insignificant digits in the fractional part.
nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
}
++current;
if (current == end) goto parsing_done;
}
}
if (!leading_zero && exponent == 0 && significant_digits == 0) {
// If leading_zeros is true then the string contains zeros.
// If exponent < 0 then string was [+-]\.0*...
// If significant_digits != 0 the string is not equal to 0.
// Otherwise there are no digits in the string.
return JunkStringValue();
}
// Parse exponential part.
if (*current == 'e' || *current == 'E') {
if (octal) return JunkStringValue();
++current;
if (current == end) {
if (allow_trailing_junk) {
goto parsing_done;
} else {
return JunkStringValue();
}
}
char sign = '+';
if (*current == '+' || *current == '-') {
sign = static_cast<char>(*current);
++current;
if (current == end) {
if (allow_trailing_junk) {
goto parsing_done;
} else {
return JunkStringValue();
}
}
}
if (current == end || *current < '0' || *current > '9') {
if (allow_trailing_junk) {
goto parsing_done;
} else {
return JunkStringValue();
}
}
const int max_exponent = INT_MAX / 2;
DCHECK(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2);
int num = 0;
do {
// Check overflow.
int digit = *current - '0';
if (num >= max_exponent / 10
&& !(num == max_exponent / 10 && digit <= max_exponent % 10)) {
num = max_exponent;
} else {
num = num * 10 + digit;
}
++current;
} while (current != end && *current >= '0' && *current <= '9');
exponent += (sign == '-' ? -num : num);
}
if (!allow_trailing_junk &&
AdvanceToNonspace(unicode_cache, &current, end)) {
return JunkStringValue();
}
parsing_done:
exponent += insignificant_digits;
if (octal) {
return InternalStringToIntDouble<3>(unicode_cache,
buffer,
buffer + buffer_pos,
sign == NEGATIVE,
allow_trailing_junk);
}
if (nonzero_digit_dropped) {
buffer[buffer_pos++] = '1';
exponent--;
}
SLOW_DCHECK(buffer_pos < kBufferSize);
buffer[buffer_pos] = '\0';
double converted = Strtod(Vector<const char>(buffer, buffer_pos), exponent);
return (sign == NEGATIVE) ? -converted : converted;
}
} // namespace internal
} // namespace v8
......
......@@ -12,11 +12,12 @@
#include "src/assert-scope.h"
#include "src/char-predicates-inl.h"
#include "src/codegen.h"
#include "src/conversions-inl.h"
#include "src/dtoa.h"
#include "src/factory.h"
#include "src/handles.h"
#include "src/objects-inl.h"
#include "src/strtod.h"
#include "src/unicode-cache-inl.h"
#include "src/utils.h"
#if defined(_STLP_VENDOR_CSTD)
......@@ -29,47 +30,736 @@
namespace v8 {
namespace internal {
namespace {
// C++-style iterator adaptor for StringCharacterStream
// (unlike C++ iterators the end-marker has different type).
class StringCharacterStreamIterator {
inline double JunkStringValue() {
return bit_cast<double, uint64_t>(kQuietNaNMask);
}
inline double SignedZero(bool negative) {
return negative ? uint64_to_double(Double::kSignMask) : 0.0;
}
inline bool isDigit(int x, int radix) {
return (x >= '0' && x <= '9' && x < '0' + radix) ||
(radix > 10 && x >= 'a' && x < 'a' + radix - 10) ||
(radix > 10 && x >= 'A' && x < 'A' + radix - 10);
}
inline bool isBinaryDigit(int x) { return x == '0' || x == '1'; }
template <class Iterator, class EndMark>
bool SubStringEquals(Iterator* current, EndMark end, const char* substring) {
DCHECK(**current == *substring);
for (substring++; *substring != '\0'; substring++) {
++*current;
if (*current == end || **current != *substring) return false;
}
++*current;
return true;
}
// Returns true if a nonspace character has been found and false if the
// end was been reached before finding a nonspace character.
template <class Iterator, class EndMark>
inline bool AdvanceToNonspace(UnicodeCache* unicode_cache, Iterator* current,
EndMark end) {
while (*current != end) {
if (!unicode_cache->IsWhiteSpaceOrLineTerminator(**current)) return true;
++*current;
}
return false;
}
// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
template <int radix_log_2, class Iterator, class EndMark>
double InternalStringToIntDouble(UnicodeCache* unicode_cache, Iterator current,
EndMark end, bool negative,
bool allow_trailing_junk) {
DCHECK(current != end);
// Skip leading 0s.
while (*current == '0') {
++current;
if (current == end) return SignedZero(negative);
}
int64_t number = 0;
int exponent = 0;
const int radix = (1 << radix_log_2);
int lim_0 = '0' + (radix < 10 ? radix : 10);
int lim_a = 'a' + (radix - 10);
int lim_A = 'A' + (radix - 10);
do {
int digit;
if (*current >= '0' && *current < lim_0) {
digit = static_cast<char>(*current) - '0';
} else if (*current >= 'a' && *current < lim_a) {
digit = static_cast<char>(*current) - 'a' + 10;
} else if (*current >= 'A' && *current < lim_A) {
digit = static_cast<char>(*current) - 'A' + 10;
} else {
if (allow_trailing_junk ||
!AdvanceToNonspace(unicode_cache, &current, end)) {
break;
} else {
return JunkStringValue();
}
}
number = number * radix + digit;
int overflow = static_cast<int>(number >> 53);
if (overflow != 0) {
// Overflow occurred. Need to determine which direction to round the
// result.
int overflow_bits_count = 1;
while (overflow > 1) {
overflow_bits_count++;
overflow >>= 1;
}
int dropped_bits_mask = ((1 << overflow_bits_count) - 1);
int dropped_bits = static_cast<int>(number) & dropped_bits_mask;
number >>= overflow_bits_count;
exponent = overflow_bits_count;
bool zero_tail = true;
while (true) {
++current;
if (current == end || !isDigit(*current, radix)) break;
zero_tail = zero_tail && *current == '0';
exponent += radix_log_2;
}
if (!allow_trailing_junk &&
AdvanceToNonspace(unicode_cache, &current, end)) {
return JunkStringValue();
}
int middle_value = (1 << (overflow_bits_count - 1));
if (dropped_bits > middle_value) {
number++; // Rounding up.
} else if (dropped_bits == middle_value) {
// Rounding to even to consistency with decimals: half-way case rounds
// up if significant part is odd and down otherwise.
if ((number & 1) != 0 || !zero_tail) {
number++; // Rounding up.
}
}
// Rounding up may cause overflow.
if ((number & (static_cast<int64_t>(1) << 53)) != 0) {
exponent++;
number >>= 1;
}
break;
}
++current;
} while (current != end);
DCHECK(number < ((int64_t)1 << 53));
DCHECK(static_cast<int64_t>(static_cast<double>(number)) == number);
if (exponent == 0) {
if (negative) {
if (number == 0) return -0.0;
number = -number;
}
return static_cast<double>(number);
}
DCHECK(number != 0);
return std::ldexp(static_cast<double>(negative ? -number : number), exponent);
}
// ES6 18.2.5 parseInt(string, radix) (with NumberParseIntHelper subclass);
// https://tc39.github.io/proposal-bigint/#sec-bigint-parseint-string-radix
// (with BigIntParseIntHelper subclass).
class StringToIntHelper {
public:
class EndMarker {};
StringToIntHelper(Isolate* isolate, Handle<String> subject, int radix)
: isolate_(isolate), subject_(subject), radix_(radix) {
DCHECK(subject->IsFlat());
}
virtual ~StringToIntHelper() {}
protected:
// Subclasses must implement these:
virtual void AllocateResult() = 0;
virtual void ResultMultiplyAdd(uint32_t multiplier, uint32_t part) = 0;
// Subclasses must call this to do all the work.
void ParseInt();
// Subclasses may override this.
virtual void HandleSpecialCases() {}
explicit StringCharacterStreamIterator(StringCharacterStream* stream);
// Subclasses get access to internal state:
enum State { kRunning, kError, kJunk, kZero, kDone };
uint16_t operator*() const;
void operator++();
bool operator==(EndMarker const&) const { return end_; }
bool operator!=(EndMarker const& m) const { return !end_; }
Isolate* isolate() { return isolate_; }
Handle<String> subject() { return subject_; }
int radix() { return radix_; }
int cursor() { return cursor_; }
int length() { return length_; }
bool negative() { return negative_; }
State state() { return state_; }
void set_state(State state) { state_ = state; }
private:
StringCharacterStream* const stream_;
uint16_t current_;
bool end_;
template <class Char>
void DetectRadixInternal(Char current, int length);
template <class Char>
void ParseInternal(Char start);
Isolate* isolate_;
Handle<String> subject_;
int radix_;
int cursor_ = 0;
int length_ = 0;
bool negative_ = false;
bool leading_zero_ = false;
State state_ = kRunning;
};
void StringToIntHelper::ParseInt() {
{
DisallowHeapAllocation no_gc;
String::FlatContent flat = subject_->GetFlatContent();
if (flat.IsOneByte()) {
Vector<const uint8_t> vector = flat.ToOneByteVector();
DetectRadixInternal(vector.start(), vector.length());
} else {
Vector<const uc16> vector = flat.ToUC16Vector();
DetectRadixInternal(vector.start(), vector.length());
}
}
if (state_ != kRunning) return;
AllocateResult();
HandleSpecialCases();
if (state_ != kRunning) return;
{
DisallowHeapAllocation no_gc;
String::FlatContent flat = subject_->GetFlatContent();
if (flat.IsOneByte()) {
Vector<const uint8_t> vector = flat.ToOneByteVector();
DCHECK_EQ(length_, vector.length());
ParseInternal(vector.start());
} else {
Vector<const uc16> vector = flat.ToUC16Vector();
DCHECK_EQ(length_, vector.length());
ParseInternal(vector.start());
}
}
DCHECK(state_ != kRunning);
}
template <class Char>
void StringToIntHelper::DetectRadixInternal(Char current, int length) {
Char start = current;
length_ = length;
Char end = start + length;
UnicodeCache* unicode_cache = isolate_->unicode_cache();
if (!AdvanceToNonspace(unicode_cache, &current, end)) {
return set_state(kJunk);
}
if (*current == '+') {
// Ignore leading sign; skip following spaces.
++current;
if (current == end) {
return set_state(kJunk);
}
} else if (*current == '-') {
++current;
if (current == end) {
return set_state(kJunk);
}
negative_ = true;
}
StringCharacterStreamIterator::StringCharacterStreamIterator(
StringCharacterStream* stream) : stream_(stream) {
++(*this);
if (radix_ == 0) {
// Radix detection.
radix_ = 10;
if (*current == '0') {
++current;
if (current == end) return set_state(kZero);
if (*current == 'x' || *current == 'X') {
radix_ = 16;
++current;
if (current == end) return set_state(kJunk);
} else {
leading_zero_ = true;
}
}
} else if (radix_ == 16) {
if (*current == '0') {
// Allow "0x" prefix.
++current;
if (current == end) return set_state(kZero);
if (*current == 'x' || *current == 'X') {
++current;
if (current == end) return set_state(kJunk);
} else {
leading_zero_ = true;
}
}
}
// Skip leading zeros.
while (*current == '0') {
leading_zero_ = true;
++current;
if (current == end) return set_state(kZero);
}
if (!leading_zero_ && !isDigit(*current, radix_)) {
return set_state(kJunk);
}
DCHECK(radix_ >= 2 && radix_ <= 36);
STATIC_ASSERT(String::kMaxLength <= INT_MAX);
cursor_ = static_cast<int>(current - start);
}
uint16_t StringCharacterStreamIterator::operator*() const {
return current_;
template <class Char>
void StringToIntHelper::ParseInternal(Char start) {
Char current = start + cursor_;
Char end = start + length_;
// The following code causes accumulating rounding error for numbers greater
// than ~2^56. It's explicitly allowed in the spec: "if R is not 2, 4, 8, 10,
// 16, or 32, then mathInt may be an implementation-dependent approximation to
// the mathematical integer value" (15.1.2.2).
int lim_0 = '0' + (radix_ < 10 ? radix_ : 10);
int lim_a = 'a' + (radix_ - 10);
int lim_A = 'A' + (radix_ - 10);
// NOTE: The code for computing the value may seem a bit complex at
// first glance. It is structured to use 32-bit multiply-and-add
// loops as long as possible to avoid losing precision.
bool done = false;
do {
// Parse the longest part of the string starting at {current}
// possible while keeping the multiplier, and thus the part
// itself, within 32 bits.
uint32_t part = 0, multiplier = 1;
while (true) {
uint32_t d;
if (*current >= '0' && *current < lim_0) {
d = *current - '0';
} else if (*current >= 'a' && *current < lim_a) {
d = *current - 'a' + 10;
} else if (*current >= 'A' && *current < lim_A) {
d = *current - 'A' + 10;
} else {
done = true;
break;
}
// Update the value of the part as long as the multiplier fits
// in 32 bits. When we can't guarantee that the next iteration
// will not overflow the multiplier, we stop parsing the part
// by leaving the loop.
const uint32_t kMaximumMultiplier = 0xffffffffU / 36;
uint32_t m = multiplier * static_cast<uint32_t>(radix_);
if (m > kMaximumMultiplier) break;
part = part * radix_ + d;
multiplier = m;
DCHECK(multiplier > part);
++current;
if (current == end) {
done = true;
break;
}
}
// Update the value and skip the part in the string.
ResultMultiplyAdd(multiplier, part);
} while (!done);
return set_state(kDone);
}
class NumberParseIntHelper : public StringToIntHelper {
public:
NumberParseIntHelper(Isolate* isolate, Handle<String> string, int radix)
: StringToIntHelper(isolate, string, radix) {}
double GetResult() {
ParseInt();
switch (state()) {
case kJunk:
return JunkStringValue();
case kZero:
return SignedZero(negative());
case kDone:
return negative() ? -result_ : result_;
case kError:
case kRunning:
break;
}
UNREACHABLE();
}
void StringCharacterStreamIterator::operator++() {
end_ = !stream_->HasMore();
if (!end_) {
current_ = stream_->GetNext();
protected:
virtual void AllocateResult() {}
virtual void ResultMultiplyAdd(uint32_t multiplier, uint32_t part) {
result_ = result_ * multiplier + part;
}
private:
virtual void HandleSpecialCases() {
bool is_power_of_two = base::bits::IsPowerOfTwo(radix());
if (!is_power_of_two && radix() != 10) return;
DisallowHeapAllocation no_gc;
String::FlatContent flat = subject()->GetFlatContent();
if (flat.IsOneByte()) {
Vector<const uint8_t> vector = flat.ToOneByteVector();
DCHECK_EQ(length(), vector.length());
result_ = is_power_of_two ? HandlePowerOfTwoCase(vector.start())
: HandleBaseTenCase(vector.start());
} else {
Vector<const uc16> vector = flat.ToUC16Vector();
DCHECK_EQ(length(), vector.length());
result_ = is_power_of_two ? HandlePowerOfTwoCase(vector.start())
: HandleBaseTenCase(vector.start());
}
set_state(kDone);
}
template <class Char>
double HandlePowerOfTwoCase(Char start) {
Char current = start + cursor();
Char end = start + length();
UnicodeCache* unicode_cache = isolate()->unicode_cache();
const bool allow_trailing_junk = true;
// GetResult() will take care of the sign bit, so ignore it for now.
const bool negative = false;
switch (radix()) {
case 2:
return InternalStringToIntDouble<1>(unicode_cache, current, end,
negative, allow_trailing_junk);
case 4:
return InternalStringToIntDouble<2>(unicode_cache, current, end,
negative, allow_trailing_junk);
case 8:
return InternalStringToIntDouble<3>(unicode_cache, current, end,
negative, allow_trailing_junk);
case 16:
return InternalStringToIntDouble<4>(unicode_cache, current, end,
negative, allow_trailing_junk);
case 32:
return InternalStringToIntDouble<5>(unicode_cache, current, end,
negative, allow_trailing_junk);
default:
UNREACHABLE();
}
}
template <class Char>
double HandleBaseTenCase(Char start) {
// Parsing with strtod.
Char current = start + cursor();
Char end = start + length();
const int kMaxSignificantDigits = 309; // Doubles are less than 1.8e308.
// The buffer may contain up to kMaxSignificantDigits + 1 digits and a zero
// end.
const int kBufferSize = kMaxSignificantDigits + 2;
char buffer[kBufferSize];
int buffer_pos = 0;
while (*current >= '0' && *current <= '9') {
if (buffer_pos <= kMaxSignificantDigits) {
// If the number has more than kMaxSignificantDigits it will be parsed
// as infinity.
DCHECK(buffer_pos < kBufferSize);
buffer[buffer_pos++] = static_cast<char>(*current);
}
++current;
if (current == end) break;
}
SLOW_DCHECK(buffer_pos < kBufferSize);
buffer[buffer_pos] = '\0';
Vector<const char> buffer_vector(buffer, buffer_pos);
return Strtod(buffer_vector, 0);
}
double result_ = 0;
};
// Converts a string to a double value. Assumes the Iterator supports
// the following operations:
// 1. current == end (other ops are not allowed), current != end.
// 2. *current - gets the current character in the sequence.
// 3. ++current (advances the position).
template <class Iterator, class EndMark>
double InternalStringToDouble(UnicodeCache* unicode_cache, Iterator current,
EndMark end, int flags, double empty_string_val) {
// To make sure that iterator dereferencing is valid the following
// convention is used:
// 1. Each '++current' statement is followed by check for equality to 'end'.
// 2. If AdvanceToNonspace returned false then current == end.
// 3. If 'current' becomes be equal to 'end' the function returns or goes to
// 'parsing_done'.
// 4. 'current' is not dereferenced after the 'parsing_done' label.
// 5. Code before 'parsing_done' may rely on 'current != end'.
if (!AdvanceToNonspace(unicode_cache, &current, end)) {
return empty_string_val;
}
const bool allow_trailing_junk = (flags & ALLOW_TRAILING_JUNK) != 0;
// Maximum number of significant digits in decimal representation.
// The longest possible double in decimal representation is
// (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074
// (768 digits). If we parse a number whose first digits are equal to a
// mean of 2 adjacent doubles (that could have up to 769 digits) the result
// must be rounded to the bigger one unless the tail consists of zeros, so
// we don't need to preserve all the digits.
const int kMaxSignificantDigits = 772;
// The longest form of simplified number is: "-<significant digits>'.1eXXX\0".
const int kBufferSize = kMaxSignificantDigits + 10;
char buffer[kBufferSize]; // NOLINT: size is known at compile time.
int buffer_pos = 0;
// Exponent will be adjusted if insignificant digits of the integer part
// or insignificant leading zeros of the fractional part are dropped.
int exponent = 0;
int significant_digits = 0;
int insignificant_digits = 0;
bool nonzero_digit_dropped = false;
enum Sign { NONE, NEGATIVE, POSITIVE };
Sign sign = NONE;
if (*current == '+') {
// Ignore leading sign.
++current;
if (current == end) return JunkStringValue();
sign = POSITIVE;
} else if (*current == '-') {
++current;
if (current == end) return JunkStringValue();
sign = NEGATIVE;
}
static const char kInfinityString[] = "Infinity";
if (*current == kInfinityString[0]) {
if (!SubStringEquals(&current, end, kInfinityString)) {
return JunkStringValue();
}
if (!allow_trailing_junk &&
AdvanceToNonspace(unicode_cache, &current, end)) {
return JunkStringValue();
}
DCHECK(buffer_pos == 0);
return (sign == NEGATIVE) ? -V8_INFINITY : V8_INFINITY;
}
bool leading_zero = false;
if (*current == '0') {
++current;
if (current == end) return SignedZero(sign == NEGATIVE);
leading_zero = true;
// It could be hexadecimal value.
if ((flags & ALLOW_HEX) && (*current == 'x' || *current == 'X')) {
++current;
if (current == end || !isDigit(*current, 16) || sign != NONE) {
return JunkStringValue(); // "0x".
}
return InternalStringToIntDouble<4>(unicode_cache, current, end, false,
allow_trailing_junk);
// It could be an explicit octal value.
} else if ((flags & ALLOW_OCTAL) && (*current == 'o' || *current == 'O')) {
++current;
if (current == end || !isDigit(*current, 8) || sign != NONE) {
return JunkStringValue(); // "0o".
}
return InternalStringToIntDouble<3>(unicode_cache, current, end, false,
allow_trailing_junk);
// It could be a binary value.
} else if ((flags & ALLOW_BINARY) && (*current == 'b' || *current == 'B')) {
++current;
if (current == end || !isBinaryDigit(*current) || sign != NONE) {
return JunkStringValue(); // "0b".
}
return InternalStringToIntDouble<1>(unicode_cache, current, end, false,
allow_trailing_junk);
}
// Ignore leading zeros in the integer part.
while (*current == '0') {
++current;
if (current == end) return SignedZero(sign == NEGATIVE);
}
}
bool octal = leading_zero && (flags & ALLOW_IMPLICIT_OCTAL) != 0;
// Copy significant digits of the integer part (if any) to the buffer.
while (*current >= '0' && *current <= '9') {
if (significant_digits < kMaxSignificantDigits) {
DCHECK(buffer_pos < kBufferSize);
buffer[buffer_pos++] = static_cast<char>(*current);
significant_digits++;
// Will later check if it's an octal in the buffer.
} else {
insignificant_digits++; // Move the digit into the exponential part.
nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
}
octal = octal && *current < '8';
++current;
if (current == end) goto parsing_done;
}
if (significant_digits == 0) {
octal = false;
}
if (*current == '.') {
if (octal && !allow_trailing_junk) return JunkStringValue();
if (octal) goto parsing_done;
++current;
if (current == end) {
if (significant_digits == 0 && !leading_zero) {
return JunkStringValue();
} else {
goto parsing_done;
}
}
if (significant_digits == 0) {
// octal = false;
// Integer part consists of 0 or is absent. Significant digits start after
// leading zeros (if any).
while (*current == '0') {
++current;
if (current == end) return SignedZero(sign == NEGATIVE);
exponent--; // Move this 0 into the exponent.
}
}
// There is a fractional part. We don't emit a '.', but adjust the exponent
// instead.
while (*current >= '0' && *current <= '9') {
if (significant_digits < kMaxSignificantDigits) {
DCHECK(buffer_pos < kBufferSize);
buffer[buffer_pos++] = static_cast<char>(*current);
significant_digits++;
exponent--;
} else {
// Ignore insignificant digits in the fractional part.
nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
}
++current;
if (current == end) goto parsing_done;
}
}
if (!leading_zero && exponent == 0 && significant_digits == 0) {
// If leading_zeros is true then the string contains zeros.
// If exponent < 0 then string was [+-]\.0*...
// If significant_digits != 0 the string is not equal to 0.
// Otherwise there are no digits in the string.
return JunkStringValue();
}
// Parse exponential part.
if (*current == 'e' || *current == 'E') {
if (octal) return JunkStringValue();
++current;
if (current == end) {
if (allow_trailing_junk) {
goto parsing_done;
} else {
return JunkStringValue();
}
}
char sign = '+';
if (*current == '+' || *current == '-') {
sign = static_cast<char>(*current);
++current;
if (current == end) {
if (allow_trailing_junk) {
goto parsing_done;
} else {
return JunkStringValue();
}
}
}
if (current == end || *current < '0' || *current > '9') {
if (allow_trailing_junk) {
goto parsing_done;
} else {
return JunkStringValue();
}
}
const int max_exponent = INT_MAX / 2;
DCHECK(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2);
int num = 0;
do {
// Check overflow.
int digit = *current - '0';
if (num >= max_exponent / 10 &&
!(num == max_exponent / 10 && digit <= max_exponent % 10)) {
num = max_exponent;
} else {
num = num * 10 + digit;
}
++current;
} while (current != end && *current >= '0' && *current <= '9');
exponent += (sign == '-' ? -num : num);
}
if (!allow_trailing_junk && AdvanceToNonspace(unicode_cache, &current, end)) {
return JunkStringValue();
}
parsing_done:
exponent += insignificant_digits;
if (octal) {
return InternalStringToIntDouble<3>(unicode_cache, buffer,
buffer + buffer_pos, sign == NEGATIVE,
allow_trailing_junk);
}
if (nonzero_digit_dropped) {
buffer[buffer_pos++] = '1';
exponent--;
}
SLOW_DCHECK(buffer_pos < kBufferSize);
buffer[buffer_pos] = '\0';
double converted = Strtod(Vector<const char>(buffer, buffer_pos), exponent);
return (sign == NEGATIVE) ? -converted : converted;
}
} // End anonymous namespace.
} // namespace
double StringToDouble(UnicodeCache* unicode_cache,
const char* str, int flags, double empty_string_val) {
......@@ -104,23 +794,12 @@ double StringToDouble(UnicodeCache* unicode_cache,
empty_string_val);
}
// Converts a string into an integer.
double StringToInt(UnicodeCache* unicode_cache,
Vector<const uint8_t> vector,
int radix) {
return InternalStringToInt(
unicode_cache, vector.start(), vector.start() + vector.length(), radix);
double StringToInt(Isolate* isolate, Handle<String> string, int radix) {
NumberParseIntHelper helper(isolate, string, radix);
return helper.GetResult();
}
double StringToInt(UnicodeCache* unicode_cache,
Vector<const uc16> vector,
int radix) {
return InternalStringToInt(
unicode_cache, vector.start(), vector.start() + vector.length(), radix);
}
const char* DoubleToCString(double v, Vector<char> buffer) {
switch (FPCLASSIFY_NAMESPACE::fpclassify(v)) {
......
......@@ -17,31 +17,10 @@ template <typename T>
class Handle;
class UnicodeCache;
// Maximum number of significant digits in decimal representation.
// The longest possible double in decimal representation is
// (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074
// (768 digits). If we parse a number whose first digits are equal to a
// mean of 2 adjacent doubles (that could have up to 769 digits) the result
// must be rounded to the bigger one unless the tail consists of zeros, so
// we don't need to preserve all the digits.
const int kMaxSignificantDigits = 772;
// The limit for the the fractionDigits/precision for toFixed, toPrecision
// and toExponential.
const int kMaxFractionDigits = 100;
inline bool isDigit(int x, int radix) {
return (x >= '0' && x <= '9' && x < '0' + radix)
|| (radix > 10 && x >= 'a' && x < 'a' + radix - 10)
|| (radix > 10 && x >= 'A' && x < 'A' + radix - 10);
}
inline bool isBinaryDigit(int x) {
return x == '0' || x == '1';
}
// The fast double-to-(unsigned-)int conversion routine does not guarantee
// rounding towards zero.
// If x is NaN, the result is INT_MIN. Otherwise the result is the argument x,
......@@ -123,15 +102,7 @@ double StringToDouble(UnicodeCache* unicode_cache,
int flags,
double empty_string_val = 0);
// Converts a string into an integer.
double StringToInt(UnicodeCache* unicode_cache,
Vector<const uint8_t> vector,
int radix);
double StringToInt(UnicodeCache* unicode_cache,
Vector<const uc16> vector,
int radix);
double StringToInt(Isolate* isolate, Handle<String> string, int radix);
const int kDoubleToCStringMinBufferSize = 100;
......
......@@ -19,6 +19,8 @@
#include "src/objects/frame-array-inl.h"
#include "src/objects/module.h"
#include "src/objects/scope-info.h"
#include "src/unicode-cache.h"
#include "src/unicode-decoder.h"
namespace v8 {
namespace internal {
......
......@@ -52,6 +52,7 @@
#include "src/snapshot/snapshot.h"
#include "src/tracing/trace-event.h"
#include "src/trap-handler/trap-handler.h"
#include "src/unicode-inl.h"
#include "src/utils-inl.h"
#include "src/utils.h"
#include "src/v8.h"
......
......@@ -48,6 +48,7 @@
#include "src/simulator.h"
#include "src/snapshot/startup-deserializer.h"
#include "src/tracing/tracing-category-observer.h"
#include "src/unicode-cache.h"
#include "src/v8.h"
#include "src/version.h"
#include "src/visitors.h"
......
......@@ -29,6 +29,7 @@
#include "src/source-position-table.h"
#include "src/string-stream.h"
#include "src/tracing/tracing-category-observer.h"
#include "src/unicode-inl.h"
#include "src/vm-state-inl.h"
namespace v8 {
......
......@@ -70,6 +70,7 @@
#include "src/string-builder.h"
#include "src/string-search.h"
#include "src/string-stream.h"
#include "src/unicode-cache-inl.h"
#include "src/utils-inl.h"
#include "src/wasm/wasm-module.h"
#include "src/wasm/wasm-objects.h"
......
......@@ -14,6 +14,7 @@
#include "src/char-predicates-inl.h"
#include "src/conversions-inl.h"
#include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol
#include "src/unicode-cache-inl.h"
namespace v8 {
namespace internal {
......
......@@ -26,6 +26,7 @@
#include "src/splay-tree-inl.h"
#include "src/string-search.h"
#include "src/unicode-decoder.h"
#include "src/unicode-inl.h"
#ifdef V8_INTL_SUPPORT
#include "unicode/uniset.h"
......
......@@ -8,6 +8,7 @@
#include "src/isolate-inl.h"
#include "src/regexp/regexp-stack.h"
#include "src/simulator.h"
#include "src/unicode-inl.h"
#ifdef V8_INTL_SUPPORT
#include "unicode/uchar.h"
......
......@@ -52,20 +52,7 @@ RUNTIME_FUNCTION(Runtime_StringParseInt) {
return isolate->heap()->nan_value();
}
double result;
{
DisallowHeapAllocation no_gc;
String::FlatContent flat = subject->GetFlatContent();
if (flat.IsOneByte()) {
result = StringToInt(isolate->unicode_cache(), flat.ToOneByteVector(),
radix32);
} else {
result =
StringToInt(isolate->unicode_cache(), flat.ToUC16Vector(), radix32);
}
}
double result = StringToInt(isolate, subject, radix32);
return *isolate->factory()->NewNumber(result);
}
......
......@@ -10,6 +10,7 @@
#include "src/handles.h"
#include "src/isolate-inl.h"
#include "src/string-search.h"
#include "src/unicode-inl.h"
namespace v8 {
namespace internal {
......
......@@ -43,6 +43,8 @@
#include "src/regexp/regexp-parser.h"
#include "src/splay-tree-inl.h"
#include "src/string-stream.h"
#include "src/unicode-inl.h"
#ifdef V8_INTERPRETED_REGEXP
#include "src/regexp/interpreter-irregexp.h"
#else // V8_INTERPRETED_REGEXP
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment