Commit 17ec357a authored by bmeurer@chromium.org's avatar bmeurer@chromium.org

Fix crash due RegExpAtom method called on RegExpCharacterClass object.

In the RegExpUnparser::VisitText(RegExpText* that, void* data) function always RegExpUnparser::VisitAtom function called via that->elements()->at(i).data.u_atom->Accept(this, data); even if the type of the object is RegExpCharacterClass.

The problem shows using g++ 4.7(.2, .3) since r16232, since GCC optimizes virtual method calls to direct calls based on __final/final hints. Tested on MIPS and x64:
Program received signal SIGSEGV, Segmentation fault.
0x0000000000588928 in v8::internal::RegExpUnparser::VisitAtom(v8::internal::RegExpAtom*, void*) ()

This cleans up the TextElement class to avoid the unsafe+unchecked union access, that caused the crash.

TEST=cctest/test-regexp/ParserRegression
R=jkummerow@chromium.org

Review URL: https://codereview.chromium.org/22815033

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@16289 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent af908ee5
......@@ -962,12 +962,12 @@ void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
if (that->elements()->length() == 1) {
that->elements()->at(0).data.u_atom->Accept(this, data);
that->elements()->at(0).tree()->Accept(this, data);
} else {
stream()->Add("(!");
for (int i = 0; i < that->elements()->length(); i++) {
stream()->Add(" ");
that->elements()->at(i).data.u_atom->Accept(this, data);
that->elements()->at(i).tree()->Accept(this, data);
}
stream()->Add(")");
}
......
......@@ -933,27 +933,25 @@ void RegExpText::AppendToText(RegExpText* text, Zone* zone) {
TextElement TextElement::Atom(RegExpAtom* atom) {
TextElement result = TextElement(ATOM);
result.data.u_atom = atom;
return result;
return TextElement(ATOM, atom);
}
TextElement TextElement::CharClass(
RegExpCharacterClass* char_class) {
TextElement result = TextElement(CHAR_CLASS);
result.data.u_char_class = char_class;
return result;
TextElement TextElement::CharClass(RegExpCharacterClass* char_class) {
return TextElement(CHAR_CLASS, char_class);
}
int TextElement::length() {
if (text_type == ATOM) {
return data.u_atom->length();
} else {
ASSERT(text_type == CHAR_CLASS);
return 1;
int TextElement::length() const {
switch (text_type()) {
case ATOM:
return atom()->length();
case CHAR_CLASS:
return 1;
}
UNREACHABLE();
return 0;
}
......@@ -2561,8 +2559,8 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
}
for (int k = 0; k < elms_->length(); k++) {
TextElement elm = elms_->at(k);
if (elm.text_type == TextElement::ATOM) {
Vector<const uc16> quarks = elm.data.u_atom->data();
if (elm.text_type() == TextElement::ATOM) {
Vector<const uc16> quarks = elm.atom()->data();
for (int i = 0; i < characters && i < quarks.length(); i++) {
QuickCheckDetails::Position* pos =
details->positions(characters_filled_in);
......@@ -2624,7 +2622,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
} else {
QuickCheckDetails::Position* pos =
details->positions(characters_filled_in);
RegExpCharacterClass* tree = elm.data.u_char_class;
RegExpCharacterClass* tree = elm.char_class();
ZoneList<CharacterRange>* ranges = tree->ranges(zone());
if (tree->is_negated()) {
// A quick check uses multi-character mask and compare. There is no
......@@ -2814,8 +2812,8 @@ RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
int element_count = elms_->length();
for (int i = 0; i < element_count; i++) {
TextElement elm = elms_->at(i);
if (elm.text_type == TextElement::ATOM) {
Vector<const uc16> quarks = elm.data.u_atom->data();
if (elm.text_type() == TextElement::ATOM) {
Vector<const uc16> quarks = elm.atom()->data();
for (int j = 0; j < quarks.length(); j++) {
uint16_t c = quarks[j];
if (c <= String::kMaxOneByteCharCode) continue;
......@@ -2830,8 +2828,8 @@ RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
copy[j] = converted;
}
} else {
ASSERT(elm.text_type == TextElement::CHAR_CLASS);
RegExpCharacterClass* cc = elm.data.u_char_class;
ASSERT(elm.text_type() == TextElement::CHAR_CLASS);
RegExpCharacterClass* cc = elm.char_class();
ZoneList<CharacterRange>* ranges = cc->ranges(zone());
if (!CharacterRange::IsCanonical(ranges)) {
CharacterRange::Canonicalize(ranges);
......@@ -3256,12 +3254,12 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
int element_count = elms_->length();
for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
TextElement elm = elms_->at(i);
int cp_offset = trace->cp_offset() + elm.cp_offset;
if (elm.text_type == TextElement::ATOM) {
Vector<const uc16> quarks = elm.data.u_atom->data();
int cp_offset = trace->cp_offset() + elm.cp_offset();
if (elm.text_type() == TextElement::ATOM) {
Vector<const uc16> quarks = elm.atom()->data();
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
if (first_element_checked && i == 0 && j == 0) continue;
if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue;
if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
EmitCharacterFunction* emit_function = NULL;
switch (pass) {
case NON_ASCII_MATCH:
......@@ -3295,11 +3293,11 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
}
}
} else {
ASSERT_EQ(elm.text_type, TextElement::CHAR_CLASS);
ASSERT_EQ(TextElement::CHAR_CLASS, elm.text_type());
if (pass == CHARACTER_CLASS_MATCH) {
if (first_element_checked && i == 0) continue;
if (DeterminedAlready(quick_check, elm.cp_offset)) continue;
RegExpCharacterClass* cc = elm.data.u_char_class;
if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
RegExpCharacterClass* cc = elm.char_class();
EmitCharClass(assembler,
cc,
ascii,
......@@ -3317,12 +3315,8 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
int TextNode::Length() {
TextElement elm = elms_->last();
ASSERT(elm.cp_offset >= 0);
if (elm.text_type == TextElement::ATOM) {
return elm.cp_offset + elm.data.u_atom->data().length();
} else {
return elm.cp_offset + 1;
}
ASSERT(elm.cp_offset() >= 0);
return elm.cp_offset() + elm.length();
}
......@@ -3424,8 +3418,8 @@ void TextNode::MakeCaseIndependent(bool is_ascii) {
int element_count = elms_->length();
for (int i = 0; i < element_count; i++) {
TextElement elm = elms_->at(i);
if (elm.text_type == TextElement::CHAR_CLASS) {
RegExpCharacterClass* cc = elm.data.u_char_class;
if (elm.text_type() == TextElement::CHAR_CLASS) {
RegExpCharacterClass* cc = elm.char_class();
// None of the standard character classes is different in the case
// independent case and it slows us down if we don't know that.
if (cc->is_standard(zone())) continue;
......@@ -3441,11 +3435,7 @@ void TextNode::MakeCaseIndependent(bool is_ascii) {
int TextNode::GreedyLoopTextLength() {
TextElement elm = elms_->at(elms_->length() - 1);
if (elm.text_type == TextElement::CHAR_CLASS) {
return elm.cp_offset + 1;
} else {
return elm.cp_offset + elm.data.u_atom->data().length();
}
return elm.cp_offset() + elm.length();
}
......@@ -3453,8 +3443,8 @@ RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
RegExpCompiler* compiler) {
if (elms_->length() != 1) return NULL;
TextElement elm = elms_->at(0);
if (elm.text_type != TextElement::CHAR_CLASS) return NULL;
RegExpCharacterClass* node = elm.data.u_char_class;
if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
RegExpCharacterClass* node = elm.char_class();
ZoneList<CharacterRange>* ranges = node->ranges(zone());
if (!CharacterRange::IsCanonical(ranges)) {
CharacterRange::Canonicalize(ranges);
......@@ -4528,13 +4518,13 @@ void DotPrinter::VisitText(TextNode* that) {
for (int i = 0; i < that->elements()->length(); i++) {
if (i > 0) stream()->Add(" ");
TextElement elm = that->elements()->at(i);
switch (elm.text_type) {
switch (elm.text_type()) {
case TextElement::ATOM: {
stream()->Add("'%w'", elm.data.u_atom->data());
stream()->Add("'%w'", elm.atom()->data());
break;
}
case TextElement::CHAR_CLASS: {
RegExpCharacterClass* node = elm.data.u_char_class;
RegExpCharacterClass* node = elm.char_class();
stream()->Add("[");
if (node->is_negated())
stream()->Add("^");
......@@ -5716,12 +5706,8 @@ void TextNode::CalculateOffsets() {
int cp_offset = 0;
for (int i = 0; i < element_count; i++) {
TextElement& elm = elements()->at(i);
elm.cp_offset = cp_offset;
if (elm.text_type == TextElement::ATOM) {
cp_offset += elm.data.u_atom->data().length();
} else {
cp_offset++;
}
elm.set_cp_offset(cp_offset);
cp_offset += elm.length();
}
}
......@@ -5837,8 +5823,8 @@ void TextNode::FillInBMInfo(int initial_offset,
return;
}
TextElement text = elements()->at(i);
if (text.text_type == TextElement::ATOM) {
RegExpAtom* atom = text.data.u_atom;
if (text.text_type() == TextElement::ATOM) {
RegExpAtom* atom = text.atom();
for (int j = 0; j < atom->length(); j++, offset++) {
if (offset >= bm->length()) {
if (initial_offset == 0) set_bm_info(not_at_start, bm);
......@@ -5860,8 +5846,8 @@ void TextNode::FillInBMInfo(int initial_offset,
}
}
} else {
ASSERT(text.text_type == TextElement::CHAR_CLASS);
RegExpCharacterClass* char_class = text.data.u_char_class;
ASSERT_EQ(TextElement::CHAR_CLASS, text.text_type());
RegExpCharacterClass* char_class = text.char_class();
ZoneList<CharacterRange>* ranges = char_class->ranges(zone());
if (char_class->is_negated()) {
bm->SetAll(offset);
......@@ -5973,14 +5959,14 @@ void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {
void DispatchTableConstructor::VisitText(TextNode* that) {
TextElement elm = that->elements()->at(0);
switch (elm.text_type) {
switch (elm.text_type()) {
case TextElement::ATOM: {
uc16 c = elm.data.u_atom->data()[0];
uc16 c = elm.atom()->data()[0];
AddRange(CharacterRange(c, c));
break;
}
case TextElement::CHAR_CLASS: {
RegExpCharacterClass* tree = elm.data.u_char_class;
RegExpCharacterClass* tree = elm.char_class();
ZoneList<CharacterRange>* ranges = tree->ranges(that->zone());
if (tree->is_negated()) {
AddInverse(ranges);
......
......@@ -426,20 +426,41 @@ FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
#undef FORWARD_DECLARE
class TextElement {
class TextElement V8_FINAL BASE_EMBEDDED {
public:
enum TextType {UNINITIALIZED, ATOM, CHAR_CLASS};
TextElement() : text_type(UNINITIALIZED) { }
explicit TextElement(TextType t) : text_type(t), cp_offset(-1) { }
enum TextType {
ATOM,
CHAR_CLASS
};
static TextElement Atom(RegExpAtom* atom);
static TextElement CharClass(RegExpCharacterClass* char_class);
int length();
TextType text_type;
union {
RegExpAtom* u_atom;
RegExpCharacterClass* u_char_class;
} data;
int cp_offset;
int cp_offset() const { return cp_offset_; }
void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
int length() const;
TextType text_type() const { return text_type_; }
RegExpTree* tree() const { return tree_; }
RegExpAtom* atom() const {
ASSERT(text_type() == ATOM);
return reinterpret_cast<RegExpAtom*>(tree());
}
RegExpCharacterClass* char_class() const {
ASSERT(text_type() == CHAR_CLASS);
return reinterpret_cast<RegExpCharacterClass*>(tree());
}
private:
TextElement(TextType text_type, RegExpTree* tree)
: cp_offset_(-1), text_type_(text_type), tree_(tree) {}
int cp_offset_;
TextType text_type_;
RegExpTree* tree_;
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment