Added checking in the regular expression parser that we're below the

stack limit and that the zone allocation limit hasn't been met.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@879 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent b07b40b3
......@@ -495,10 +495,10 @@ class RegExpParser {
RegExpParser(FlatStringReader* in,
Handle<String>* error,
bool multiline_mode);
RegExpTree* ParsePattern(bool* ok);
RegExpTree* ParseDisjunction(bool* ok);
RegExpTree* ParseGroup(bool* ok);
RegExpTree* ParseCharacterClass(bool* ok);
RegExpTree* ParsePattern();
RegExpTree* ParseDisjunction();
RegExpTree* ParseGroup();
RegExpTree* ParseCharacterClass();
// Parses a {...,...} quantifier and stores the range in the given
// out parameters.
......@@ -506,13 +506,13 @@ class RegExpParser {
// Parses and returns a single escaped character. The character
// must not be 'b' or 'B' since they are usually handle specially.
uc32 ParseClassCharacterEscape(bool* ok);
uc32 ParseClassCharacterEscape();
// Checks whether the following is a length-digit hexadecimal number,
// and sets the value if it is.
bool ParseHexEscape(int length, uc32* value);
uc32 ParseControlLetterEscape(bool* ok);
uc32 ParseControlLetterEscape();
uc32 ParseOctalLiteral();
// Tries to parse the input as a back reference. If successful it
......@@ -521,9 +521,8 @@ class RegExpParser {
// can be reparsed.
bool ParseBackReferenceIndex(int* index_out);
CharacterRange ParseClassAtom(uc16* char_class,
bool* ok);
RegExpTree* ReportError(Vector<const char> message, bool* ok);
CharacterRange ParseClassAtom(uc16* char_class);
RegExpTree* ReportError(Vector<const char> message);
void Advance();
void Advance(int dist);
void Reset(int pos);
......@@ -532,6 +531,7 @@ class RegExpParser {
int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }
int position() { return next_pos_ - 1; }
bool failed() { return failed_; }
static const uc32 kEndMarker = (1 << 21);
private:
......@@ -553,6 +553,7 @@ class RegExpParser {
bool is_scanned_for_captures_;
// The capture count is only valid after we have scanned for captures.
int capture_count_;
bool failed_;
};
......@@ -1027,6 +1028,11 @@ class LexicalScope BASE_EMBEDDED {
#define DUMMY ) // to make indentation work
#undef DUMMY
#define CHECK_FAILED ); \
if (failed_) return NULL; \
((void)0
#define DUMMY ) // to make indentation work
#undef DUMMY
// ----------------------------------------------------------------------------
// Implementation of Parser
......@@ -3499,7 +3505,8 @@ RegExpParser::RegExpParser(FlatStringReader* in,
has_character_escapes_(false),
captures_(NULL),
is_scanned_for_captures_(false),
capture_count_(0) {
capture_count_(0),
failed_(false) {
Advance(1);
}
......@@ -3515,8 +3522,15 @@ uc32 RegExpParser::Next() {
void RegExpParser::Advance() {
if (next_pos_ < in()->length()) {
current_ = in()->Get(next_pos_);
next_pos_++;
StackLimitCheck check;
if (check.HasOverflowed()) {
ReportError(CStrVector(Top::kStackOverflowMessage));
} else if (Zone::excess_allocation()) {
ReportError(CStrVector("Regular expression too large"));
} else {
current_ = in()->Get(next_pos_);
next_pos_++;
}
} else {
current_ = kEndMarker;
has_more_ = false;
......@@ -3543,19 +3557,22 @@ bool RegExpParser::HasCharacterEscapes() {
return has_character_escapes_;
}
RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) {
*ok = false;
RegExpTree* RegExpParser::ReportError(Vector<const char> message) {
failed_ = true;
*error_ = Factory::NewStringFromAscii(message, NOT_TENURED);
// Zip to the end to make sure the no more input is read.
current_ = kEndMarker;
next_pos_ = in()->length();
return NULL;
}
// Pattern ::
// Disjunction
RegExpTree* RegExpParser::ParsePattern(bool* ok) {
RegExpTree* result = ParseDisjunction(CHECK_OK);
RegExpTree* RegExpParser::ParsePattern() {
RegExpTree* result = ParseDisjunction(CHECK_FAILED);
if (has_more()) {
ReportError(CStrVector("Unmatched ')'"), CHECK_OK);
ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);
}
return result;
}
......@@ -3579,7 +3596,7 @@ bool RegExpParser::CaptureAvailable(int index) {
// Assertion
// Atom
// Atom Quantifier
RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {
RegExpTree* RegExpParser::ParseDisjunction() {
RegExpBuilder builder;
int capture_start_index = captures_started();
while (true) {
......@@ -3603,7 +3620,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {
case '*':
case '+':
case '?':
ReportError(CStrVector("Nothing to repeat"), CHECK_OK);
ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);
case '^': {
Advance();
RegExpAssertion::Type type =
......@@ -3630,12 +3647,12 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {
break;
}
case '(': {
RegExpTree* atom = ParseGroup(CHECK_OK);
RegExpTree* atom = ParseGroup(CHECK_FAILED);
builder.AddAtom(atom);
break;
}
case '[': {
RegExpTree* atom = ParseCharacterClass(CHECK_OK);
RegExpTree* atom = ParseCharacterClass(CHECK_FAILED);
builder.AddAtom(atom);
break;
}
......@@ -3644,7 +3661,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {
case '\\':
switch (Next()) {
case kEndMarker:
ReportError(CStrVector("\\ at end of pattern"), CHECK_OK);
ReportError(CStrVector("\\ at end of pattern") CHECK_FAILED);
case 'b':
Advance(2);
builder.AddAssertion(
......@@ -3722,7 +3739,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {
break;
case 'c': {
Advance(2);
uc32 control = ParseControlLetterEscape(ok);
uc32 control = ParseControlLetterEscape();
builder.AddCharacter(control);
break;
}
......@@ -3757,7 +3774,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {
case '{': {
int dummy;
if (ParseIntervalQuantifier(&dummy, &dummy)) {
ReportError(CStrVector("Nothing to repeat"), CHECK_OK);
ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);
}
// fallthrough
}
......@@ -3974,9 +3991,9 @@ bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {
// Upper and lower case letters differ by one bit.
STATIC_CHECK(('a' ^ 'A') == 0x20);
uc32 RegExpParser::ParseControlLetterEscape(bool* ok) {
uc32 RegExpParser::ParseControlLetterEscape() {
if (!has_more()) {
ReportError(CStrVector("\\c at end of pattern"), ok);
ReportError(CStrVector("\\c at end of pattern"));
return '\0';
}
uc32 letter = current() & ~(0x20); // Collapse upper and lower case letters.
......@@ -4030,7 +4047,7 @@ bool RegExpParser::ParseHexEscape(int length, uc32 *value) {
}
uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) {
uc32 RegExpParser::ParseClassCharacterEscape() {
ASSERT(current() == '\\');
ASSERT(has_next() && !IsSpecialClassEscape(Next()));
Advance();
......@@ -4056,7 +4073,7 @@ uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) {
Advance();
return '\v';
case 'c':
return ParseControlLetterEscape(ok);
return ParseControlLetterEscape();
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7':
// For compatibility, we interpret a decimal escape that isn't
......@@ -4096,7 +4113,7 @@ uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) {
}
RegExpTree* RegExpParser::ParseGroup(bool* ok) {
RegExpTree* RegExpParser::ParseGroup() {
ASSERT_EQ(current(), '(');
char type = '(';
Advance();
......@@ -4107,7 +4124,7 @@ RegExpTree* RegExpParser::ParseGroup(bool* ok) {
Advance(2);
break;
default:
ReportError(CStrVector("Invalid group"), CHECK_OK);
ReportError(CStrVector("Invalid group") CHECK_FAILED);
break;
}
} else {
......@@ -4117,9 +4134,9 @@ RegExpTree* RegExpParser::ParseGroup(bool* ok) {
captures_->Add(NULL);
}
int capture_index = captures_started();
RegExpTree* body = ParseDisjunction(CHECK_OK);
RegExpTree* body = ParseDisjunction(CHECK_FAILED);
if (current() != ')') {
ReportError(CStrVector("Unterminated group"), CHECK_OK);
ReportError(CStrVector("Unterminated group") CHECK_FAILED);
}
Advance();
......@@ -4157,7 +4174,7 @@ RegExpTree* RegExpParser::ParseGroup(bool* ok) {
}
CharacterRange RegExpParser::ParseClassAtom(uc16* char_class, bool* ok) {
CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {
ASSERT_EQ(0, *char_class);
uc32 first = current();
if (first == '\\') {
......@@ -4168,7 +4185,7 @@ CharacterRange RegExpParser::ParseClassAtom(uc16* char_class, bool* ok) {
return CharacterRange::Singleton(0); // Return dummy value.
}
default:
uc32 c = ParseClassCharacterEscape(CHECK_OK);
uc32 c = ParseClassCharacterEscape(CHECK_FAILED);
return CharacterRange::Singleton(c);
}
} else {
......@@ -4178,7 +4195,7 @@ CharacterRange RegExpParser::ParseClassAtom(uc16* char_class, bool* ok) {
}
RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) {
RegExpTree* RegExpParser::ParseCharacterClass() {
static const char* kUnterminated = "Unterminated character class";
static const char* kRangeOutOfOrder = "Range out of order in character class";
......@@ -4192,7 +4209,7 @@ RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) {
ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
while (has_more() && current() != ']') {
uc16 char_class = 0;
CharacterRange first = ParseClassAtom(&char_class, CHECK_OK);
CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);
if (char_class) {
CharacterRange::AddClassEscape(char_class, ranges);
continue;
......@@ -4208,7 +4225,7 @@ RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) {
ranges->Add(CharacterRange::Singleton('-'));
break;
}
CharacterRange next = ParseClassAtom(&char_class, CHECK_OK);
CharacterRange next = ParseClassAtom(&char_class CHECK_FAILED);
if (char_class) {
ranges->Add(first);
ranges->Add(CharacterRange::Singleton('-'));
......@@ -4216,7 +4233,7 @@ RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) {
continue;
}
if (first.from() > next.to()) {
return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK);
return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED);
}
ranges->Add(CharacterRange::Range(first.from(), next.to()));
} else {
......@@ -4224,7 +4241,7 @@ RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) {
}
}
if (!has_more()) {
return ReportError(CStrVector(kUnterminated), CHECK_OK);
return ReportError(CStrVector(kUnterminated) CHECK_FAILED);
}
Advance();
if (ranges->length() == 0) {
......@@ -4287,21 +4304,20 @@ bool ParseRegExp(FlatStringReader* input,
bool multiline,
RegExpParseResult* result) {
ASSERT(result != NULL);
// Make sure we have a stack guard.
StackGuard guard;
RegExpParser parser(input, &result->error, multiline);
bool ok = true;
result->tree = parser.ParsePattern(&ok);
if (!ok) {
result->tree = parser.ParsePattern();
if (parser.failed()) {
ASSERT(result->tree == NULL);
ASSERT(!result->error.is_null());
} else {
ASSERT(result->tree != NULL);
ASSERT(result->error.is_null());
}
if (ok) {
result->has_character_escapes = parser.HasCharacterEscapes();
result->capture_count = parser.captures_started();
}
return ok;
return !parser.failed();
}
......
......@@ -603,6 +603,10 @@ bool Top::MayIndexedAccess(JSObject* receiver,
}
const char* Top::kStackOverflowMessage =
"Uncaught RangeError: Maximum call stack size exceeded";
Failure* Top::StackOverflow() {
HandleScope scope;
Handle<String> key = Factory::stack_overflow_symbol();
......@@ -616,9 +620,7 @@ Failure* Top::StackOverflow() {
// doesn't use ReportUncaughtException to determine the location
// from where the exception occurred. It should probably be
// reworked.
static const char* kMessage =
"Uncaught RangeError: Maximum call stack size exceeded";
DoThrow(*exception, NULL, kMessage);
DoThrow(*exception, NULL, kStackOverflowMessage);
return Failure::Exception();
}
......
......@@ -271,6 +271,8 @@ class Top {
static char* ArchiveThread(char* to);
static char* RestoreThread(char* from);
static const char* kStackOverflowMessage;
private:
// The context that initiated this JS execution.
static ThreadLocalTop thread_local_;
......
......@@ -48,6 +48,11 @@ inline void* Zone::New(int size) {
}
bool Zone::excess_allocation() {
return segment_bytes_allocated_ > zone_excess_limit_;
}
} } // namespace v8::internal
#endif // V8_ZONE_INL_H_
......@@ -34,6 +34,8 @@ namespace v8 { namespace internal {
Address Zone::position_ = 0;
Address Zone::limit_ = 0;
int Zone::zone_excess_limit_ = 256 * MB;
int Zone::segment_bytes_allocated_ = 0;
bool AssertNoZoneAllocation::allow_allocation_ = true;
......@@ -63,6 +65,7 @@ class Segment {
// of the segment chain. Returns the new segment.
static Segment* New(int size) {
Segment* result = reinterpret_cast<Segment*>(Malloced::New(size));
Zone::segment_bytes_allocated_ += size;
if (result != NULL) {
result->next_ = head_;
result->size_ = size;
......@@ -72,10 +75,13 @@ class Segment {
}
// Deletes the given segment. Does not touch the segment chain.
static void Delete(Segment* segment) {
static void Delete(Segment* segment, int size) {
Zone::segment_bytes_allocated_ -= size;
Malloced::Delete(segment);
}
static int bytes_allocated() { return bytes_allocated_; }
private:
// Computes the address of the nth byte in this segment.
Address address(int n) const {
......@@ -83,12 +89,14 @@ class Segment {
}
static Segment* head_;
static int bytes_allocated_;
Segment* next_;
int size_;
};
Segment* Segment::head_ = NULL;
int Segment::bytes_allocated_ = 0;
void Zone::DeleteAll() {
......@@ -112,11 +120,12 @@ void Zone::DeleteAll() {
// Unlink the segment we wish to keep from the list.
current->clear_next();
} else {
int size = current->size();
#ifdef DEBUG
// Zap the entire current segment (including the header).
memset(current, kZapDeadByte, current->size());
memset(current, kZapDeadByte, size);
#endif
Segment::Delete(current);
Segment::Delete(current, size);
}
current = next;
}
......
......@@ -61,7 +61,13 @@ class Zone {
// Delete all objects and free all memory allocated in the Zone.
static void DeleteAll();
// Returns true if more memory has been allocated in zones than
// the limit allows.
static inline bool excess_allocation();
private:
friend class Segment;
// All pointers returned from New() have this alignment.
static const int kAlignment = kPointerSize;
......@@ -71,6 +77,13 @@ class Zone {
// Never keep segments larger than this size in bytes around.
static const int kMaximumKeptSegmentSize = 64 * KB;
// Report zone excess when allocation exceeds this limit.
static int zone_excess_limit_;
// The number of bytes allocated in segments. Note that this number
// includes memory allocated from the OS but not yet allocated from
// the zone.
static int segment_bytes_allocated_;
// The Zone is intentionally a singleton; you should not try to
// allocate instances of the class.
......
......@@ -51,6 +51,7 @@ using namespace v8::internal;
static SmartPointer<const char> Parse(const char* input) {
V8::Initialize(NULL);
v8::HandleScope scope;
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
......@@ -63,6 +64,7 @@ static SmartPointer<const char> Parse(const char* input) {
}
static bool ParseEscapes(const char* input) {
V8::Initialize(NULL);
v8::HandleScope scope;
unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
ZoneScope zone_scope(DELETE_ON_EXIT);
......@@ -253,6 +255,7 @@ TEST(ParserRegression) {
static void ExpectError(const char* input,
const char* expected) {
V8::Initialize(NULL);
v8::HandleScope scope;
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
......@@ -372,6 +375,7 @@ TEST(CharacterClassEscapes) {
static RegExpNode* Compile(const char* input, bool multiline) {
V8::Initialize(NULL);
FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
if (!v8::internal::ParseRegExp(&reader, multiline, &result))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment