Commit 89f46665 authored by lrn@chromium.org's avatar lrn@chromium.org

Flatten strings before parsing them as Date strings, and work on Vector of chars instead.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1525 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent eb656c72
......@@ -43,26 +43,28 @@ inline bool IsLineFeed(uc32 c) {
}
static inline bool IsInRange(int value, int lower_limit, int higher_limit) {
ASSERT(lower_limit <= higher_limit);
return static_cast<unsigned int>(value - lower_limit) <=
static_cast<unsigned int>(higher_limit - lower_limit);
}
inline bool IsDecimalDigit(uc32 c) {
// ECMA-262, 3rd, 7.8.3 (p 16)
return
'0' <= c && c <= '9';
return IsInRange(c, '0', '9');
}
inline bool IsHexDigit(uc32 c) {
// ECMA-262, 3rd, 7.6 (p 15)
return
('0' <= c && c <= '9') ||
('A' <= c && c <= 'F') ||
('a' <= c && c <= 'f');
return IsDecimalDigit(c) || IsInRange(c | 0x20, 'a', 'f');
}
inline bool IsRegExpWord(uc16 c) {
return ('a' <= c && c <= 'z')
|| ('A' <= c && c <= 'Z')
|| ('0' <= c && c <= '9')
return IsInRange(c | 0x20, 'a', 'z')
|| IsDecimalDigit(c)
|| (c == '_');
}
......
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace v8 { namespace internal {
template <typename Char>
bool DateParser::Parse(Vector<Char> str, FixedArray* out) {
ASSERT(out->length() == OUTPUT_SIZE);
InputReader<Char> in(str);
TimeZoneComposer tz;
TimeComposer time;
DayComposer day;
while (!in.IsEnd()) {
if (in.IsAsciiDigit()) {
// Parse a number (possibly with 1 or 2 trailing colons).
int n = in.ReadUnsignedNumber();
if (in.Skip(':')) {
if (in.Skip(':')) {
// n + "::"
if (!time.IsEmpty()) return false;
time.Add(n);
time.Add(0);
} else {
// n + ":"
if (!time.Add(n)) return false;
}
} else if (tz.IsExpecting(n)) {
tz.SetAbsoluteMinute(n);
} else if (time.IsExpecting(n)) {
time.AddFinal(n);
// Require end or white space immediately after finalizing time.
if (!in.IsEnd() && !in.SkipWhiteSpace()) return false;
} else {
if (!day.Add(n)) return false;
in.Skip('-'); // Ignore suffix '-' for year, month, or day.
}
} else if (in.IsAsciiAlphaOrAbove()) {
// Parse a "word" (sequence of chars. >= 'A').
uint32_t pre[KeywordTable::kPrefixLength];
int len = in.ReadWord(pre, KeywordTable::kPrefixLength);
int index = KeywordTable::Lookup(pre, len);
KeywordType type = KeywordTable::GetType(index);
if (type == AM_PM && !time.IsEmpty()) {
time.SetHourOffset(KeywordTable::GetValue(index));
} else if (type == MONTH_NAME) {
day.SetNamedMonth(KeywordTable::GetValue(index));
in.Skip('-'); // Ignore suffix '-' for month names
} else if (type == TIME_ZONE_NAME && in.HasReadNumber()) {
tz.Set(KeywordTable::GetValue(index));
} else {
// Garbage words are illegal if a number has been read.
if (in.HasReadNumber()) return false;
}
} else if (in.IsAsciiSign() && (tz.IsUTC() || !time.IsEmpty())) {
// Parse UTC offset (only after UTC or time).
tz.SetSign(in.GetAsciiSignValue());
in.Next();
int n = in.ReadUnsignedNumber();
if (in.Skip(':')) {
tz.SetAbsoluteHour(n);
tz.SetAbsoluteMinute(kNone);
} else {
tz.SetAbsoluteHour(n / 100);
tz.SetAbsoluteMinute(n % 100);
}
} else if (in.Is('(')) {
// Ignore anything from '(' to a matching ')' or end of string.
in.SkipParentheses();
} else if ((in.IsAsciiSign() || in.Is(')')) && in.HasReadNumber()) {
// Extra sign or ')' is illegal if a number has been read.
return false;
} else {
// Ignore other characters.
in.Next();
}
}
return day.Write(out) && time.Write(out) && tz.Write(out);
}
} } // namespace v8::internal
......@@ -31,84 +31,6 @@
namespace v8 { namespace internal {
bool DateParser::Parse(String* str, FixedArray* out) {
ASSERT(out->length() == OUTPUT_SIZE);
InputReader in(str);
TimeZoneComposer tz;
TimeComposer time;
DayComposer day;
while (!in.IsEnd()) {
if (in.IsAsciiDigit()) {
// Parse a number (possibly with 1 or 2 trailing colons).
int n = in.ReadUnsignedNumber();
if (in.Skip(':')) {
if (in.Skip(':')) {
// n + "::"
if (!time.IsEmpty()) return false;
time.Add(n);
time.Add(0);
} else {
// n + ":"
if (!time.Add(n)) return false;
}
} else if (tz.IsExpecting(n)) {
tz.SetAbsoluteMinute(n);
} else if (time.IsExpecting(n)) {
time.AddFinal(n);
// Require end or white space immediately after finalizing time.
if (!in.IsEnd() && !in.SkipWhiteSpace()) return false;
} else {
if (!day.Add(n)) return false;
in.Skip('-'); // Ignore suffix '-' for year, month, or day.
}
} else if (in.IsAsciiAlphaOrAbove()) {
// Parse a "word" (sequence of chars. >= 'A').
uint32_t pre[KeywordTable::kPrefixLength];
int len = in.ReadWord(pre, KeywordTable::kPrefixLength);
int index = KeywordTable::Lookup(pre, len);
KeywordType type = KeywordTable::GetType(index);
if (type == AM_PM && !time.IsEmpty()) {
time.SetHourOffset(KeywordTable::GetValue(index));
} else if (type == MONTH_NAME) {
day.SetNamedMonth(KeywordTable::GetValue(index));
in.Skip('-'); // Ignore suffix '-' for month names
} else if (type == TIME_ZONE_NAME && in.HasReadNumber()) {
tz.Set(KeywordTable::GetValue(index));
} else {
// Garbage words are illegal if no number read yet.
if (in.HasReadNumber()) return false;
}
} else if (in.IsAsciiSign() && (tz.IsUTC() || !time.IsEmpty())) {
// Parse UTC offset (only after UTC or time).
tz.SetSign(in.GetAsciiSignValue());
in.Next();
int n = in.ReadUnsignedNumber();
if (in.Skip(':')) {
tz.SetAbsoluteHour(n);
tz.SetAbsoluteMinute(kNone);
} else {
tz.SetAbsoluteHour(n / 100);
tz.SetAbsoluteMinute(n % 100);
}
} else if (in.Is('(')) {
// Ignore anything from '(' to a matching ')' or end of string.
in.SkipParentheses();
} else if ((in.IsAsciiSign() || in.Is(')')) && in.HasReadNumber()) {
// Extra sign or ')' is illegal if no number read yet.
return false;
} else {
// Ignore other characters.
in.Next();
}
}
return day.Write(out) && time.Write(out) && tz.Write(out);
}
bool DateParser::DayComposer::Write(FixedArray* output) {
int year = 0; // Default year is 0 (=> 2000) for KJS compatibility.
int month = kNone;
......@@ -192,7 +114,6 @@ bool DateParser::TimeComposer::Write(FixedArray* output) {
return true;
}
bool DateParser::TimeZoneComposer::Write(FixedArray* output) {
if (sign_ != kNone) {
if (hour_ == kNone) hour_ = 0;
......@@ -210,9 +131,8 @@ bool DateParser::TimeZoneComposer::Write(FixedArray* output) {
return true;
}
const int8_t
DateParser::KeywordTable::array[][DateParser::KeywordTable::kEntrySize] = {
const int8_t DateParser::KeywordTable::
array[][DateParser::KeywordTable::kEntrySize] = {
{'j', 'a', 'n', DateParser::MONTH_NAME, 1},
{'f', 'e', 'b', DateParser::MONTH_NAME, 2},
{'m', 'a', 'r', DateParser::MONTH_NAME, 3},
......
......@@ -32,7 +32,6 @@
namespace v8 { namespace internal {
class DateParser : public AllStatic {
public:
......@@ -46,25 +45,32 @@ class DateParser : public AllStatic {
// [5]: second
// [6]: UTC offset in seconds, or null value if no timezone specified
// If parsing fails, return false (content of output array is not defined).
static bool Parse(String* str, FixedArray* output);
template <typename Char>
static bool Parse(Vector<Char> str, FixedArray* output);
enum {YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, UTC_OFFSET, OUTPUT_SIZE};
private:
// Range testing
static bool Between(int x, int lo, int hi) { return x >= lo && x <= hi; }
static inline bool Between(int x, int lo, int hi) {
return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
}
// Indicates a missing value.
static const int kNone = kMaxInt;
// InputReader provides basic string parsing and character classification.
template <typename Char>
class InputReader BASE_EMBEDDED {
public:
explicit InputReader(String* s) : buffer_(s), has_read_number_(false) {
explicit InputReader(Vector<Char> s)
: index_(0),
buffer_(s),
has_read_number_(false) {
Next();
}
// Advance to the next character of the string.
void Next() { ch_ = buffer_.has_more() ? buffer_.GetNext() : 0; }
void Next() { ch_ = (index_ < buffer_.length()) ? buffer_[index_++] : 0; }
// Read a string of digits as an unsigned number (cap just below kMaxInt).
int ReadUnsignedNumber() {
......@@ -124,7 +130,8 @@ class DateParser : public AllStatic {
// Else, return something outside of 'A'-'Z' and 'a'-'z'.
uint32_t GetAsciiAlphaLower() const { return ch_ | 32; }
StringInputBuffer buffer_;
int index_;
Vector<Char> buffer_;
bool has_read_number_;
uint32_t ch_;
};
......
......@@ -35,6 +35,7 @@
#include "compiler.h"
#include "cpu.h"
#include "dateparser.h"
#include "dateparser-inl.h"
#include "debug.h"
#include "execution.h"
#include "jsregexp.h"
......@@ -4484,8 +4485,19 @@ static Object* Runtime_DateParseString(Arguments args) {
CONVERT_CHECKED(String, string_object, args[0]);
Handle<String> str(string_object);
FlattenString(str);
Handle<FixedArray> output = Factory::NewFixedArray(DateParser::OUTPUT_SIZE);
if (DateParser::Parse(*str, *output)) {
bool result;
{
AssertNoAllocation no_allocation;
if (StringShape(*str).IsAsciiRepresentation()) {
result = DateParser::Parse(str->ToAsciiVector(), *output);
} else {
ASSERT(StringShape(*str).IsTwoByteRepresentation());
result = DateParser::Parse(str->ToUC16Vector(), *output);
}
}
if (result) {
return *Factory::NewJSArrayWithElements(output);
} else {
return *Factory::null_value();
......
......@@ -41,12 +41,13 @@ function testDateParse(string) {
// number of milliseconds to make it timezone independent.
function testDateParseLocalTime(string) {
var d = Date.parse(string);
assertTrue(d > 0 && !isNaN(d));
assertTrue(!isNaN(d), string + " is NaN.");
assertTrue(d > 0, string + " <= 0.");
};
function testDateParseMisc(array) {
assertTrue(array.length == 2);
assertEquals(2, array.length, "array [" + array + "] length != 2.");
var string = array[0];
var expected = array[1];
var d = Date.parse(string);
......@@ -262,4 +263,6 @@ var testCasesNegative = [
'May 25 2008 1:30( )AM (PM)',
'May 25 2008 AAA (GMT)'];
testCasesNegative.forEach(function (s) { assertTrue(isNaN(Date.parse(s))); });
testCasesNegative.forEach(function (s) {
assertTrue(isNaN(Date.parse(s)), s + " is not NaN.");
});
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment