Commit f03e42b1 authored by lrn@chromium.org's avatar lrn@chromium.org

Added validating JSON parser mode to parser.

Review URL: http://codereview.chromium.org/549207

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3752 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 866eac30
......@@ -146,27 +146,6 @@ bool ObjectLiteral::Property::IsCompileTimeValue() {
}
bool ObjectLiteral::IsValidJSON() {
int length = properties()->length();
for (int i = 0; i < length; i++) {
Property* prop = properties()->at(i);
if (!prop->value()->IsValidJSON())
return false;
}
return true;
}
bool ArrayLiteral::IsValidJSON() {
int length = values()->length();
for (int i = 0; i < length; i++) {
if (!values()->at(i)->IsValidJSON())
return false;
}
return true;
}
void TargetCollector::AddTarget(BreakTarget* target) {
// Add the label to the collector, but discard duplicates.
int length = targets_->length();
......
......@@ -186,7 +186,6 @@ class Expression: public AstNode {
virtual Expression* AsExpression() { return this; }
virtual bool IsValidJSON() { return false; }
virtual bool IsValidLeftHandSide() { return false; }
// Symbols that cannot be parsed as array indices are considered property
......@@ -713,8 +712,6 @@ class Literal: public Expression {
return handle_.is_identical_to(other->handle_);
}
virtual bool IsValidJSON() { return true; }
virtual bool IsPropertyName() {
if (handle_->IsSymbol()) {
uint32_t ignored;
......@@ -751,8 +748,6 @@ class MaterializedLiteral: public Expression {
// constants and simple object and array literals.
bool is_simple() const { return is_simple_; }
virtual bool IsValidJSON() { return true; }
int depth() const { return depth_; }
private:
......@@ -806,7 +801,6 @@ class ObjectLiteral: public MaterializedLiteral {
virtual ObjectLiteral* AsObjectLiteral() { return this; }
virtual void Accept(AstVisitor* v);
virtual bool IsValidJSON();
Handle<FixedArray> constant_properties() const {
return constant_properties_;
......@@ -854,7 +848,6 @@ class ArrayLiteral: public MaterializedLiteral {
virtual void Accept(AstVisitor* v);
virtual ArrayLiteral* AsArrayLiteral() { return this; }
virtual bool IsValidJSON();
Handle<FixedArray> constant_elements() const { return constant_elements_; }
ZoneList<Expression*>* values() const { return values_; }
......
......@@ -121,17 +121,6 @@ static Handle<Code> MakeCode(FunctionLiteral* literal,
}
static bool IsValidJSON(FunctionLiteral* lit) {
if (lit->body()->length() != 1)
return false;
Statement* stmt = lit->body()->at(0);
if (stmt->AsExpressionStatement() == NULL)
return false;
Expression* expr = stmt->AsExpressionStatement()->expression();
return expr->IsValidJSON();
}
static Handle<JSFunction> MakeFunction(bool is_global,
bool is_eval,
Compiler::ValidationState validate,
......@@ -146,8 +135,8 @@ static Handle<JSFunction> MakeFunction(bool is_global,
ASSERT(!i::Top::global_context().is_null());
script->set_context_data((*i::Top::global_context())->data());
#ifdef ENABLE_DEBUGGER_SUPPORT
bool is_json = (validate == Compiler::VALIDATE_JSON);
#ifdef ENABLE_DEBUGGER_SUPPORT
if (is_eval || is_json) {
script->set_compilation_type(
is_json ? Smi::FromInt(Script::COMPILATION_TYPE_JSON) :
......@@ -172,7 +161,8 @@ static Handle<JSFunction> MakeFunction(bool is_global,
ASSERT(is_eval || is_global);
// Build AST.
FunctionLiteral* lit = MakeAST(is_global, script, extension, pre_data);
FunctionLiteral* lit =
MakeAST(is_global, script, extension, pre_data, is_json);
// Check for parse errors.
if (lit == NULL) {
......@@ -180,19 +170,6 @@ static Handle<JSFunction> MakeFunction(bool is_global,
return Handle<JSFunction>::null();
}
// When parsing JSON we do an ordinary parse and then afterwards
// check the AST to ensure it was well-formed. If not we give a
// syntax error.
if (validate == Compiler::VALIDATE_JSON && !IsValidJSON(lit)) {
HandleScope scope;
Handle<JSArray> args = Factory::NewJSArray(1);
Handle<Object> source(script->source());
SetElement(args, 0, source);
Handle<Object> result = Factory::NewSyntaxError("invalid_json", args);
Top::Throw(*result, NULL);
return Handle<JSFunction>::null();
}
// Measure how long it takes to do the compilation; only take the
// rest of the function into account to avoid overlap with the
// parsing statistics.
......
......@@ -29,7 +29,7 @@ var $JSON = global.JSON;
function ParseJSONUnfiltered(text) {
var s = $String(text);
var f = %CompileString("(" + text + ")", true);
var f = %CompileString(text, true);
return f();
}
......
This diff is collapsed.
......@@ -133,7 +133,8 @@ class ScriptDataImpl : public ScriptData {
FunctionLiteral* MakeAST(bool compile_in_global_context,
Handle<Script> script,
v8::Extension* extension,
ScriptDataImpl* pre_data);
ScriptDataImpl* pre_data,
bool is_json = false);
ScriptDataImpl* PreParse(Handle<String> source,
......
......@@ -323,11 +323,14 @@ void KeywordMatcher::Step(uc32 input) {
// ----------------------------------------------------------------------------
// Scanner
Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { }
Scanner::Scanner(ParserMode pre)
: stack_overflow_(false), is_pre_parsing_(pre == PREPARSE) { }
void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
int position) {
void Scanner::Init(Handle<String> source,
unibrow::CharacterStream* stream,
int position,
ParserLanguage language) {
// Initialize the source buffer.
if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
two_byte_string_buffer_.Initialize(
......@@ -339,6 +342,7 @@ void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
}
position_ = position;
is_parsing_json_ = (language == JSON);
// Set c0_ (one character ahead)
ASSERT(kCharacterLookaheadBufferSize == 1);
......@@ -416,7 +420,17 @@ static inline bool IsByteOrderMark(uc32 c) {
}
bool Scanner::SkipWhiteSpace() {
bool Scanner::SkipJsonWhiteSpace() {
int start_position = source_pos();
// JSON WhiteSpace is tab, carrige-return, newline and space.
while (c0_ == ' ' || c0_ == '\n' || c0_ == '\r' || c0_ == '\t') {
Advance();
}
return source_pos() != start_position;
}
bool Scanner::SkipJavaScriptWhiteSpace() {
int start_position = source_pos();
while (true) {
......@@ -512,7 +526,194 @@ Token::Value Scanner::ScanHtmlComment() {
}
void Scanner::Scan() {
void Scanner::ScanJson() {
next_.literal_buffer = NULL;
Token::Value token;
has_line_terminator_before_next_ = false;
do {
// Remember the position of the next token
next_.location.beg_pos = source_pos();
switch (c0_) {
case '\t':
case '\r':
case '\n':
case ' ':
Advance();
token = Token::WHITESPACE;
break;
case '{':
Advance();
token = Token::LBRACE;
break;
case '}':
Advance();
token = Token::RBRACE;
break;
case '[':
Advance();
token = Token::LBRACK;
break;
case ']':
Advance();
token = Token::RBRACK;
break;
case ':':
Advance();
token = Token::COLON;
break;
case ',':
Advance();
token = Token::COMMA;
break;
case '"':
token = ScanJsonString();
break;
case '-':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
token = ScanJsonNumber();
break;
case 't':
token = ScanJsonIdentifier("true", Token::TRUE_LITERAL);
break;
case 'f':
token = ScanJsonIdentifier("false", Token::FALSE_LITERAL);
break;
case 'n':
token = ScanJsonIdentifier("null", Token::NULL_LITERAL);
break;
default:
if (c0_ < 0) {
Advance();
token = Token::EOS;
} else {
Advance();
token = Select(Token::ILLEGAL);
}
}
} while (token == Token::WHITESPACE);
next_.location.end_pos = source_pos();
next_.token = token;
}
Token::Value Scanner::ScanJsonString() {
ASSERT_EQ('"', c0_);
Advance();
StartLiteral();
while (c0_ != '"' && c0_ > 0) {
// Check for control character (0x00-0x1f) or unterminated string (<0).
if (c0_ < 0x20) return Token::ILLEGAL;
if (c0_ != '\\') {
AddCharAdvance();
} else {
Advance();
switch (c0_) {
case '"':
case '\\':
case '/':
AddChar(c0_);
break;
case 'b':
AddChar('\x08');
break;
case 'f':
AddChar('\x0c');
break;
case 'n':
AddChar('\x0a');
break;
case 'r':
AddChar('\x0d');
break;
case 't':
AddChar('\x09');
break;
case 'u': {
uc32 value = 0;
for (int i = 0; i < 4; i++) {
Advance();
int digit = HexValue(c0_);
if (digit < 0) return Token::ILLEGAL;
value = value * 16 + digit;
}
AddChar(value);
break;
}
default:
return Token::ILLEGAL;
}
Advance();
}
}
if (c0_ != '"') {
return Token::ILLEGAL;
}
TerminateLiteral();
Advance();
return Token::STRING;
}
Token::Value Scanner::ScanJsonNumber() {
StartLiteral();
if (c0_ == '-') AddCharAdvance();
if (c0_ == '0') {
AddCharAdvance();
// Prefix zero is only allowed if it's the only digit before
// a decimal point or exponent.
if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
} else {
if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;
do {
AddCharAdvance();
} while (c0_ >= '0' && c0_ <= '9');
}
if (c0_ == '.') {
AddCharAdvance();
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
do {
AddCharAdvance();
} while (c0_ >= '0' && c0_ <= '9');
}
if ((c0_ | 0x20) == 'e') {
AddCharAdvance();
if (c0_ == '-' || c0_ == '+') AddCharAdvance();
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
do {
AddCharAdvance();
} while (c0_ >= '0' && c0_ <= '9');
}
TerminateLiteral();
return Token::NUMBER;
}
Token::Value Scanner::ScanJsonIdentifier(const char* text,
Token::Value token) {
StartLiteral();
while (*text != '\0') {
if (c0_ != *text) return Token::ILLEGAL;
Advance();
text++;
}
if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
TerminateLiteral();
return token;
}
void Scanner::ScanJavaScript() {
next_.literal_buffer = NULL;
Token::Value token;
has_line_terminator_before_next_ = false;
......
......@@ -252,18 +252,22 @@ class KeywordMatcher {
};
enum ParserMode { PARSE, PREPARSE };
enum ParserLanguage { JAVASCRIPT, JSON };
class Scanner {
public:
typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
// Construction
explicit Scanner(bool is_pre_parsing);
explicit Scanner(ParserMode parse_mode);
// Initialize the Scanner to scan source:
void Init(Handle<String> source,
unibrow::CharacterStream* stream,
int position);
int position,
ParserLanguage language);
// Returns the next token.
Token::Value Next();
......@@ -377,6 +381,7 @@ class Scanner {
TokenDesc next_; // desc for next token (one token look-ahead)
bool has_line_terminator_before_next_;
bool is_pre_parsing_;
bool is_parsing_json_;
// Literal buffer support
void StartLiteral();
......@@ -391,14 +396,57 @@ class Scanner {
c0_ = ch;
}
bool SkipWhiteSpace();
bool SkipWhiteSpace() {
if (is_parsing_json_) {
return SkipJsonWhiteSpace();
} else {
return SkipJavaScriptWhiteSpace();
}
}
bool SkipJavaScriptWhiteSpace();
bool SkipJsonWhiteSpace();
Token::Value SkipSingleLineComment();
Token::Value SkipMultiLineComment();
inline Token::Value Select(Token::Value tok);
inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_);
void Scan();
inline void Scan() {
if (is_parsing_json_) {
ScanJson();
} else {
ScanJavaScript();
}
}
// Scans a single JavaScript token.
void ScanJavaScript();
// Scan a single JSON token. The JSON lexical grammar is specified in the
// ECMAScript 5 standard, section 15.12.1.1.
// Recognizes all of the single-character tokens directly, or calls a function
// to scan a number, string or identifier literal.
// The only allowed whitespace characters between tokens are tab,
// carrige-return, newline and space.
void ScanJson();
// A JSON number (production JSONNumber) is a subset of the valid JavaScript
// decimal number literals.
// It includes an optional minus sign, must have at least one
// digit before and after a decimal point, may not have prefixed zeros (unless
// the integer part is zero), and may include an exponent part (e.g., "e-10").
// Hexadecimal and octal numbers are not allowed.
Token::Value ScanJsonNumber();
// A JSON string (production JSONString) is subset of valid JavaScript string
// literals. The string must only be double-quoted (not single-quoted), and
// the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
// four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
Token::Value ScanJsonString();
// Used to recognizes one of the literals "true", "false", or "null". These
// are the only valid JSON identifiers (productions JSONBooleanLiteral,
// JSONNullLiteral).
Token::Value ScanJsonIdentifier(const char* text, Token::Value token);
void ScanDecimalDigits();
Token::Value ScanNumber(bool seen_period);
Token::Value ScanIdentifier();
......
......@@ -107,7 +107,7 @@ compileSource('eval("a=2")');
source_count++; // Using eval causes additional compilation event.
compileSource('eval("eval(\'(function(){return a;})\')")');
source_count += 2; // Using eval causes additional compilation event.
compileSource('JSON.parse("{a:1,b:2}")');
compileSource('JSON.parse(\'{"a":1,"b":2}\')');
source_count++; // Using JSON.parse causes additional compilation event.
// Make sure that the debug event listener was invoked.
......
......@@ -93,20 +93,46 @@ for (var p in this)
assertFalse(p == "JSON");
// Parse
assertEquals({}, JSON.parse("{}"));
assertEquals({42:37}, JSON.parse('{"42":37}'));
assertEquals(null, JSON.parse("null"));
assertEquals(true, JSON.parse("true"));
assertEquals(false, JSON.parse("false"));
assertEquals("foo", JSON.parse('"foo"'));
assertEquals("f\no", JSON.parse('"f\\no"'));
assertEquals("\b\f\n\r\t\"\u2028\/\\",
JSON.parse('"\\b\\f\\n\\r\\t\\"\\u2028\\/\\\\"'));
assertEquals([1.1], JSON.parse("[1.1]"));
assertEquals([1], JSON.parse("[1.0]"));
assertEquals(0, JSON.parse("0"));
assertEquals(1, JSON.parse("1"));
assertEquals(0.1, JSON.parse("0.1"));
assertEquals(1.1, JSON.parse("1.1"));
assertEquals(1, JSON.parse("1.0"));
assertEquals(0.0000000003, JSON.parse("3e-10"));
assertEquals(1.1, JSON.parse("1.100000"));
assertEquals(1.111111, JSON.parse("1.111111"));
assertEquals(-0, JSON.parse("-0"));
assertEquals(-1, JSON.parse("-1"));
assertEquals(-0.1, JSON.parse("-0.1"));
assertEquals(-1.1, JSON.parse("-1.1"));
assertEquals(-1.1, JSON.parse("-1.100000"));
assertEquals(-1.111111, JSON.parse("-1.111111"));
assertEquals(11, JSON.parse("1.1e1"));
assertEquals(11, JSON.parse("1.1e+1"));
assertEquals(0.11, JSON.parse("1.1e-1"));
assertEquals(11, JSON.parse("1.1E1"));
assertEquals(11, JSON.parse("1.1E+1"));
assertEquals(0.11, JSON.parse("1.1E-1"));
assertEquals([], JSON.parse("[]"));
assertEquals([1], JSON.parse("[1]"));
assertEquals([1, "2", true, null], JSON.parse('[1, "2", true, null]'));
assertEquals("", JSON.parse('""'));
assertEquals(["", "", -0, ""], JSON.parse('[ "" , "" , -0, ""]'));
assertEquals("", JSON.parse('""'));
function GetFilter(name) {
function Filter(key, value) {
return (key == name) ? undefined : value;
......@@ -145,6 +171,64 @@ TestInvalid('function () { return 0; }');
TestInvalid("[1, 2");
TestInvalid('{"x": 3');
// JavaScript number literals not valid in JSON.
TestInvalid('[01]');
TestInvalid('[.1]');
TestInvalid('[1.]');
TestInvalid('[1.e1]');
TestInvalid('[-.1]');
TestInvalid('[-1.]');
// Plain invalid number literals.
TestInvalid('-');
TestInvalid('--1');
TestInvalid('-1e');
TestInvalid('1e--1]');
TestInvalid('1e+-1');
TestInvalid('1e-+1');
TestInvalid('1e++1');
// JavaScript string literals not valid in JSON.
TestInvalid("'single quote'"); // Valid JavaScript
TestInvalid('"\\a invalid escape"');
TestInvalid('"\\v invalid escape"'); // Valid JavaScript
TestInvalid('"\\\' invalid escape"'); // Valid JavaScript
TestInvalid('"\\x42 invalid escape"'); // Valid JavaScript
TestInvalid('"\\u202 invalid escape"');
TestInvalid('"\\012 invalid escape"');
TestInvalid('"Unterminated string');
TestInvalid('"Unterminated string\\"');
TestInvalid('"Unterminated string\\\\\\"');
// Test bad JSON that would be good JavaScript (ES5).
TestInvalid("{true:42}");
TestInvalid("{false:42}");
TestInvalid("{null:42}");
TestInvalid("{'foo':42}");
TestInvalid("{42:42}");
TestInvalid("{0:42}");
TestInvalid("{-1:42}");
// Test for trailing garbage detection.
TestInvalid('42 px');
TestInvalid('42 .2');
TestInvalid('42 2');
TestInvalid('42 e1');
TestInvalid('"42" ""');
TestInvalid('"42" ""');
TestInvalid('"" ""');
TestInvalid('true ""');
TestInvalid('false ""');
TestInvalid('null ""');
TestInvalid('null ""');
TestInvalid('[] ""');
TestInvalid('[true] ""');
TestInvalid('{} ""');
TestInvalid('{"x":true} ""');
TestInvalid('"Garbage""After string"');
// Stringify
assertEquals("true", JSON.stringify(true));
......@@ -196,12 +280,8 @@ assertEquals('{"y":6,"x":5}', JSON.stringify({x:5,y:6}, ['y', 'x']));
assertEquals(undefined, JSON.stringify(undefined));
assertEquals(undefined, JSON.stringify(function () { }));
function checkIllegal(str) {
assertThrows(function () { JSON.parse(str); }, SyntaxError);
}
checkIllegal('1); throw "foo"; (1');
TestInvalid('1); throw "foo"; (1');
var x = 0;
eval("(1); x++; (1)");
checkIllegal('1); x++; (1');
TestInvalid('1); x++; (1');
......@@ -87,8 +87,8 @@ testScriptMirror(function(){}, 'mirror-script.js', 100, 2, 0);
testScriptMirror(Math.sin, 'native math.js', -1, 0, 0);
testScriptMirror(eval('(function(){})'), null, 1, 2, 1, '(function(){})', 87);
testScriptMirror(eval('(function(){\n })'), null, 2, 2, 1, '(function(){\n })', 88);
testScriptMirror(%CompileString("({a:1,b:2})", true), null, 1, 2, 2, '({a:1,b:2})');
testScriptMirror(%CompileString("({a:1,\n b:2})", true), null, 2, 2, 2, '({a:1,\n b:2})');
testScriptMirror(%CompileString('{"a":1,"b":2}', true), null, 1, 2, 2, '{"a":1,"b":2}');
testScriptMirror(%CompileString('{"a":1,\n "b":2}', true), null, 2, 2, 2, '{"a":1,\n "b":2}');
// Test taking slices of source.
var mirror = debug.MakeMirror(eval('(function(){\n 1;\n})')).script();
......
......@@ -75,6 +75,7 @@ function deepEquals(a, b) {
if (typeof a == "number" && typeof b == "number" && isNaN(a) && isNaN(b)) {
return true;
}
if (a == null || b == null) return false;
if (a.constructor === RegExp || b.constructor === RegExp) {
return (a.constructor === b.constructor) && (a.toString === b.toString);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment