Commit 89f46665 authored by lrn@chromium.org's avatar lrn@chromium.org

Flatten strings before parsing them as Date strings, and work on Vector of chars instead.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1525 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent eb656c72
...@@ -43,26 +43,28 @@ inline bool IsLineFeed(uc32 c) { ...@@ -43,26 +43,28 @@ inline bool IsLineFeed(uc32 c) {
} }
static inline bool IsInRange(int value, int lower_limit, int higher_limit) {
ASSERT(lower_limit <= higher_limit);
return static_cast<unsigned int>(value - lower_limit) <=
static_cast<unsigned int>(higher_limit - lower_limit);
}
inline bool IsDecimalDigit(uc32 c) { inline bool IsDecimalDigit(uc32 c) {
// ECMA-262, 3rd, 7.8.3 (p 16) // ECMA-262, 3rd, 7.8.3 (p 16)
return return IsInRange(c, '0', '9');
'0' <= c && c <= '9';
} }
inline bool IsHexDigit(uc32 c) { inline bool IsHexDigit(uc32 c) {
// ECMA-262, 3rd, 7.6 (p 15) // ECMA-262, 3rd, 7.6 (p 15)
return return IsDecimalDigit(c) || IsInRange(c | 0x20, 'a', 'f');
('0' <= c && c <= '9') ||
('A' <= c && c <= 'F') ||
('a' <= c && c <= 'f');
} }
inline bool IsRegExpWord(uc16 c) { inline bool IsRegExpWord(uc16 c) {
return ('a' <= c && c <= 'z') return IsInRange(c | 0x20, 'a', 'z')
|| ('A' <= c && c <= 'Z') || IsDecimalDigit(c)
|| ('0' <= c && c <= '9')
|| (c == '_'); || (c == '_');
} }
......
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace v8 { namespace internal {
template <typename Char>
bool DateParser::Parse(Vector<Char> str, FixedArray* out) {
ASSERT(out->length() == OUTPUT_SIZE);
InputReader<Char> in(str);
TimeZoneComposer tz;
TimeComposer time;
DayComposer day;
while (!in.IsEnd()) {
if (in.IsAsciiDigit()) {
// Parse a number (possibly with 1 or 2 trailing colons).
int n = in.ReadUnsignedNumber();
if (in.Skip(':')) {
if (in.Skip(':')) {
// n + "::"
if (!time.IsEmpty()) return false;
time.Add(n);
time.Add(0);
} else {
// n + ":"
if (!time.Add(n)) return false;
}
} else if (tz.IsExpecting(n)) {
tz.SetAbsoluteMinute(n);
} else if (time.IsExpecting(n)) {
time.AddFinal(n);
// Require end or white space immediately after finalizing time.
if (!in.IsEnd() && !in.SkipWhiteSpace()) return false;
} else {
if (!day.Add(n)) return false;
in.Skip('-'); // Ignore suffix '-' for year, month, or day.
}
} else if (in.IsAsciiAlphaOrAbove()) {
// Parse a "word" (sequence of chars. >= 'A').
uint32_t pre[KeywordTable::kPrefixLength];
int len = in.ReadWord(pre, KeywordTable::kPrefixLength);
int index = KeywordTable::Lookup(pre, len);
KeywordType type = KeywordTable::GetType(index);
if (type == AM_PM && !time.IsEmpty()) {
time.SetHourOffset(KeywordTable::GetValue(index));
} else if (type == MONTH_NAME) {
day.SetNamedMonth(KeywordTable::GetValue(index));
in.Skip('-'); // Ignore suffix '-' for month names
} else if (type == TIME_ZONE_NAME && in.HasReadNumber()) {
tz.Set(KeywordTable::GetValue(index));
} else {
// Garbage words are illegal if a number has been read.
if (in.HasReadNumber()) return false;
}
} else if (in.IsAsciiSign() && (tz.IsUTC() || !time.IsEmpty())) {
// Parse UTC offset (only after UTC or time).
tz.SetSign(in.GetAsciiSignValue());
in.Next();
int n = in.ReadUnsignedNumber();
if (in.Skip(':')) {
tz.SetAbsoluteHour(n);
tz.SetAbsoluteMinute(kNone);
} else {
tz.SetAbsoluteHour(n / 100);
tz.SetAbsoluteMinute(n % 100);
}
} else if (in.Is('(')) {
// Ignore anything from '(' to a matching ')' or end of string.
in.SkipParentheses();
} else if ((in.IsAsciiSign() || in.Is(')')) && in.HasReadNumber()) {
// Extra sign or ')' is illegal if a number has been read.
return false;
} else {
// Ignore other characters.
in.Next();
}
}
return day.Write(out) && time.Write(out) && tz.Write(out);
}
} } // namespace v8::internal
...@@ -31,84 +31,6 @@ ...@@ -31,84 +31,6 @@
namespace v8 { namespace internal { namespace v8 { namespace internal {
bool DateParser::Parse(String* str, FixedArray* out) {
ASSERT(out->length() == OUTPUT_SIZE);
InputReader in(str);
TimeZoneComposer tz;
TimeComposer time;
DayComposer day;
while (!in.IsEnd()) {
if (in.IsAsciiDigit()) {
// Parse a number (possibly with 1 or 2 trailing colons).
int n = in.ReadUnsignedNumber();
if (in.Skip(':')) {
if (in.Skip(':')) {
// n + "::"
if (!time.IsEmpty()) return false;
time.Add(n);
time.Add(0);
} else {
// n + ":"
if (!time.Add(n)) return false;
}
} else if (tz.IsExpecting(n)) {
tz.SetAbsoluteMinute(n);
} else if (time.IsExpecting(n)) {
time.AddFinal(n);
// Require end or white space immediately after finalizing time.
if (!in.IsEnd() && !in.SkipWhiteSpace()) return false;
} else {
if (!day.Add(n)) return false;
in.Skip('-'); // Ignore suffix '-' for year, month, or day.
}
} else if (in.IsAsciiAlphaOrAbove()) {
// Parse a "word" (sequence of chars. >= 'A').
uint32_t pre[KeywordTable::kPrefixLength];
int len = in.ReadWord(pre, KeywordTable::kPrefixLength);
int index = KeywordTable::Lookup(pre, len);
KeywordType type = KeywordTable::GetType(index);
if (type == AM_PM && !time.IsEmpty()) {
time.SetHourOffset(KeywordTable::GetValue(index));
} else if (type == MONTH_NAME) {
day.SetNamedMonth(KeywordTable::GetValue(index));
in.Skip('-'); // Ignore suffix '-' for month names
} else if (type == TIME_ZONE_NAME && in.HasReadNumber()) {
tz.Set(KeywordTable::GetValue(index));
} else {
// Garbage words are illegal if no number read yet.
if (in.HasReadNumber()) return false;
}
} else if (in.IsAsciiSign() && (tz.IsUTC() || !time.IsEmpty())) {
// Parse UTC offset (only after UTC or time).
tz.SetSign(in.GetAsciiSignValue());
in.Next();
int n = in.ReadUnsignedNumber();
if (in.Skip(':')) {
tz.SetAbsoluteHour(n);
tz.SetAbsoluteMinute(kNone);
} else {
tz.SetAbsoluteHour(n / 100);
tz.SetAbsoluteMinute(n % 100);
}
} else if (in.Is('(')) {
// Ignore anything from '(' to a matching ')' or end of string.
in.SkipParentheses();
} else if ((in.IsAsciiSign() || in.Is(')')) && in.HasReadNumber()) {
// Extra sign or ')' is illegal if no number read yet.
return false;
} else {
// Ignore other characters.
in.Next();
}
}
return day.Write(out) && time.Write(out) && tz.Write(out);
}
bool DateParser::DayComposer::Write(FixedArray* output) { bool DateParser::DayComposer::Write(FixedArray* output) {
int year = 0; // Default year is 0 (=> 2000) for KJS compatibility. int year = 0; // Default year is 0 (=> 2000) for KJS compatibility.
int month = kNone; int month = kNone;
...@@ -192,7 +114,6 @@ bool DateParser::TimeComposer::Write(FixedArray* output) { ...@@ -192,7 +114,6 @@ bool DateParser::TimeComposer::Write(FixedArray* output) {
return true; return true;
} }
bool DateParser::TimeZoneComposer::Write(FixedArray* output) { bool DateParser::TimeZoneComposer::Write(FixedArray* output) {
if (sign_ != kNone) { if (sign_ != kNone) {
if (hour_ == kNone) hour_ = 0; if (hour_ == kNone) hour_ = 0;
...@@ -210,9 +131,8 @@ bool DateParser::TimeZoneComposer::Write(FixedArray* output) { ...@@ -210,9 +131,8 @@ bool DateParser::TimeZoneComposer::Write(FixedArray* output) {
return true; return true;
} }
const int8_t DateParser::KeywordTable::
const int8_t array[][DateParser::KeywordTable::kEntrySize] = {
DateParser::KeywordTable::array[][DateParser::KeywordTable::kEntrySize] = {
{'j', 'a', 'n', DateParser::MONTH_NAME, 1}, {'j', 'a', 'n', DateParser::MONTH_NAME, 1},
{'f', 'e', 'b', DateParser::MONTH_NAME, 2}, {'f', 'e', 'b', DateParser::MONTH_NAME, 2},
{'m', 'a', 'r', DateParser::MONTH_NAME, 3}, {'m', 'a', 'r', DateParser::MONTH_NAME, 3},
......
...@@ -32,7 +32,6 @@ ...@@ -32,7 +32,6 @@
namespace v8 { namespace internal { namespace v8 { namespace internal {
class DateParser : public AllStatic { class DateParser : public AllStatic {
public: public:
...@@ -46,25 +45,32 @@ class DateParser : public AllStatic { ...@@ -46,25 +45,32 @@ class DateParser : public AllStatic {
// [5]: second // [5]: second
// [6]: UTC offset in seconds, or null value if no timezone specified // [6]: UTC offset in seconds, or null value if no timezone specified
// If parsing fails, return false (content of output array is not defined). // If parsing fails, return false (content of output array is not defined).
static bool Parse(String* str, FixedArray* output); template <typename Char>
static bool Parse(Vector<Char> str, FixedArray* output);
enum {YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, UTC_OFFSET, OUTPUT_SIZE}; enum {YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, UTC_OFFSET, OUTPUT_SIZE};
private: private:
// Range testing // Range testing
static bool Between(int x, int lo, int hi) { return x >= lo && x <= hi; } static inline bool Between(int x, int lo, int hi) {
return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
}
// Indicates a missing value. // Indicates a missing value.
static const int kNone = kMaxInt; static const int kNone = kMaxInt;
// InputReader provides basic string parsing and character classification. // InputReader provides basic string parsing and character classification.
template <typename Char>
class InputReader BASE_EMBEDDED { class InputReader BASE_EMBEDDED {
public: public:
explicit InputReader(String* s) : buffer_(s), has_read_number_(false) { explicit InputReader(Vector<Char> s)
: index_(0),
buffer_(s),
has_read_number_(false) {
Next(); Next();
} }
// Advance to the next character of the string. // Advance to the next character of the string.
void Next() { ch_ = buffer_.has_more() ? buffer_.GetNext() : 0; } void Next() { ch_ = (index_ < buffer_.length()) ? buffer_[index_++] : 0; }
// Read a string of digits as an unsigned number (cap just below kMaxInt). // Read a string of digits as an unsigned number (cap just below kMaxInt).
int ReadUnsignedNumber() { int ReadUnsignedNumber() {
...@@ -124,7 +130,8 @@ class DateParser : public AllStatic { ...@@ -124,7 +130,8 @@ class DateParser : public AllStatic {
// Else, return something outside of 'A'-'Z' and 'a'-'z'. // Else, return something outside of 'A'-'Z' and 'a'-'z'.
uint32_t GetAsciiAlphaLower() const { return ch_ | 32; } uint32_t GetAsciiAlphaLower() const { return ch_ | 32; }
StringInputBuffer buffer_; int index_;
Vector<Char> buffer_;
bool has_read_number_; bool has_read_number_;
uint32_t ch_; uint32_t ch_;
}; };
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include "compiler.h" #include "compiler.h"
#include "cpu.h" #include "cpu.h"
#include "dateparser.h" #include "dateparser.h"
#include "dateparser-inl.h"
#include "debug.h" #include "debug.h"
#include "execution.h" #include "execution.h"
#include "jsregexp.h" #include "jsregexp.h"
...@@ -4484,8 +4485,19 @@ static Object* Runtime_DateParseString(Arguments args) { ...@@ -4484,8 +4485,19 @@ static Object* Runtime_DateParseString(Arguments args) {
CONVERT_CHECKED(String, string_object, args[0]); CONVERT_CHECKED(String, string_object, args[0]);
Handle<String> str(string_object); Handle<String> str(string_object);
FlattenString(str);
Handle<FixedArray> output = Factory::NewFixedArray(DateParser::OUTPUT_SIZE); Handle<FixedArray> output = Factory::NewFixedArray(DateParser::OUTPUT_SIZE);
if (DateParser::Parse(*str, *output)) { bool result;
{
AssertNoAllocation no_allocation;
if (StringShape(*str).IsAsciiRepresentation()) {
result = DateParser::Parse(str->ToAsciiVector(), *output);
} else {
ASSERT(StringShape(*str).IsTwoByteRepresentation());
result = DateParser::Parse(str->ToUC16Vector(), *output);
}
}
if (result) {
return *Factory::NewJSArrayWithElements(output); return *Factory::NewJSArrayWithElements(output);
} else { } else {
return *Factory::null_value(); return *Factory::null_value();
......
...@@ -41,12 +41,13 @@ function testDateParse(string) { ...@@ -41,12 +41,13 @@ function testDateParse(string) {
// number of milliseconds to make it timezone independent. // number of milliseconds to make it timezone independent.
function testDateParseLocalTime(string) { function testDateParseLocalTime(string) {
var d = Date.parse(string); var d = Date.parse(string);
assertTrue(d > 0 && !isNaN(d)); assertTrue(!isNaN(d), string + " is NaN.");
assertTrue(d > 0, string + " <= 0.");
}; };
function testDateParseMisc(array) { function testDateParseMisc(array) {
assertTrue(array.length == 2); assertEquals(2, array.length, "array [" + array + "] length != 2.");
var string = array[0]; var string = array[0];
var expected = array[1]; var expected = array[1];
var d = Date.parse(string); var d = Date.parse(string);
...@@ -262,4 +263,6 @@ var testCasesNegative = [ ...@@ -262,4 +263,6 @@ var testCasesNegative = [
'May 25 2008 1:30( )AM (PM)', 'May 25 2008 1:30( )AM (PM)',
'May 25 2008 AAA (GMT)']; 'May 25 2008 AAA (GMT)'];
testCasesNegative.forEach(function (s) { assertTrue(isNaN(Date.parse(s))); }); testCasesNegative.forEach(function (s) {
assertTrue(isNaN(Date.parse(s)), s + " is not NaN.");
});
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment