Commit 5178af89 authored by lrn@chromium.org's avatar lrn@chromium.org

Irregexp is specialized on subject character type.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@937 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent c023ec3a
......@@ -201,6 +201,50 @@ static inline void ThrowRegExpException(Handle<JSRegExp> re,
}
// Generic RegExp methods. Dispatches to implementation specific methods.
class OffsetsVector {
public:
inline OffsetsVector(int num_registers)
: offsets_vector_length_(num_registers) {
if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
vector_ = NewArray<int>(offsets_vector_length_);
} else {
vector_ = static_offsets_vector_;
}
}
inline ~OffsetsVector() {
if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
DeleteArray(vector_);
vector_ = NULL;
}
}
inline int* vector() {
return vector_;
}
inline int length() {
return offsets_vector_length_;
}
private:
int* vector_;
int offsets_vector_length_;
static const int kStaticOffsetsVectorSize = 50;
static int static_offsets_vector_[kStaticOffsetsVectorSize];
};
int OffsetsVector::static_offsets_vector_[
OffsetsVector::kStaticOffsetsVectorSize];
Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
Handle<String> pattern,
Handle<String> flag_str) {
......@@ -224,7 +268,7 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
pattern,
parse_result.error,
"malformed_regexp");
return Handle<Object>();
return Handle<Object>::null();
}
RegExpAtom* atom = parse_result.tree->AsAtom();
if (atom != NULL && !flags.is_ignore_case()) {
......@@ -237,20 +281,10 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
result = AtomCompile(re, pattern, flags, pattern);
}
} else {
RegExpNode* node = NULL;
Handle<FixedArray> irregexp_data =
RegExpEngine::Compile(&parse_result,
&node,
flags.is_ignore_case(),
flags.is_multiline(),
pattern);
if (irregexp_data.is_null()) {
if (FLAG_disable_jscre) {
UNIMPLEMENTED();
}
result = JscrePrepare(re, pattern, flags);
if (FLAG_irregexp) {
result = IrregexpPrepare(re, pattern, flags);
} else {
result = IrregexpPrepare(re, pattern, flags, irregexp_data);
result = JscrePrepare(re, pattern, flags);
}
}
Object* data = re->data();
......@@ -270,18 +304,30 @@ Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index) {
switch (regexp->TypeTag()) {
case JSRegExp::ATOM:
return AtomExec(regexp, subject, index);
case JSRegExp::IRREGEXP: {
Handle<Object> result = IrregexpExec(regexp, subject, index);
if (!result.is_null()) {
return result;
}
// We couldn't handle the regexp using Irregexp, so fall back
// on JSCRE. We rejoice at the though of the day when this is
// no longer needed.
// Reset the JSRegExp to use JSCRE.
JscrePrepare(regexp,
Handle<String>(regexp->Pattern()),
regexp->GetFlags());
// Fall-through to JSCRE.
}
case JSRegExp::JSCRE:
if (FLAG_disable_jscre) {
UNIMPLEMENTED();
}
return JscreExec(regexp, subject, index);
case JSRegExp::ATOM:
return AtomExec(regexp, subject, index);
case JSRegExp::IRREGEXP:
return IrregexpExec(regexp, subject, index);
default:
UNREACHABLE();
return Handle<Object>();
return Handle<Object>::null();
}
}
......@@ -289,22 +335,37 @@ Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject) {
switch (regexp->TypeTag()) {
case JSRegExp::ATOM:
return AtomExecGlobal(regexp, subject);
case JSRegExp::IRREGEXP: {
Handle<Object> result = IrregexpExecGlobal(regexp, subject);
if (!result.is_null()) {
return result;
}
// We couldn't handle the regexp using Irregexp, so fall back
// on JSCRE. We rejoice at the though of the day when this is
// no longer needed.
// Reset the JSRegExp to use JSCRE.
JscrePrepare(regexp,
Handle<String>(regexp->Pattern()),
regexp->GetFlags());
// Fall-through to JSCRE.
}
case JSRegExp::JSCRE:
if (FLAG_disable_jscre) {
UNIMPLEMENTED();
}
return JscreExecGlobal(regexp, subject);
case JSRegExp::ATOM:
return AtomExecGlobal(regexp, subject);
case JSRegExp::IRREGEXP:
return IrregexpExecGlobal(regexp, subject);
default:
UNREACHABLE();
return Handle<Object>();
return Handle<Object>::null();
}
}
// RegExp Atom implementation: Simple string search using indexOf.
Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags,
......@@ -366,6 +427,21 @@ Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
}
// JSCRE implementation.
int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value();
}
ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
return ByteArray::cast(value->get(kJscreInternalIndex));
}
Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags) {
......@@ -375,20 +451,11 @@ Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re,
}
Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags,
Handle<FixedArray> irregexp_data) {
Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, irregexp_data);
return re;
}
static inline Object* DoCompile(String* pattern,
JSRegExp::Flags flags,
unsigned* number_of_captures,
const char** error_message,
v8::jscre::JscreRegExp** code) {
static inline Object* JscreDoCompile(String* pattern,
JSRegExp::Flags flags,
unsigned* number_of_captures,
const char** error_message,
v8::jscre::JscreRegExp** code) {
v8::jscre::JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case()
? v8::jscre::JSRegExpIgnoreCase
: v8::jscre::JSRegExpDoNotIgnoreCase;
......@@ -417,16 +484,16 @@ static inline Object* DoCompile(String* pattern,
}
void CompileWithRetryAfterGC(Handle<String> pattern,
JSRegExp::Flags flags,
unsigned* number_of_captures,
const char** error_message,
v8::jscre::JscreRegExp** code) {
CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern,
flags,
number_of_captures,
error_message,
code));
static void JscreCompileWithRetryAfterGC(Handle<String> pattern,
JSRegExp::Flags flags,
unsigned* number_of_captures,
const char** error_message,
v8::jscre::JscreRegExp** code) {
CALL_HEAP_FUNCTION_VOID(JscreDoCompile(*pattern,
flags,
number_of_captures,
error_message,
code));
}
......@@ -445,11 +512,11 @@ Handle<Object> RegExpImpl::JscreCompile(Handle<JSRegExp> re) {
v8::jscre::JscreRegExp* code = NULL;
FlattenString(pattern);
CompileWithRetryAfterGC(two_byte_pattern,
flags,
&number_of_captures,
&error_message,
&code);
JscreCompileWithRetryAfterGC(two_byte_pattern,
flags,
&number_of_captures,
&error_message,
&code);
if (code == NULL) {
// Throw an exception.
......@@ -476,92 +543,31 @@ Handle<Object> RegExpImpl::JscreCompile(Handle<JSRegExp> re) {
}
Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
int num_captures,
Handle<String> two_byte_subject,
int previous_index,
int* offsets_vector,
int offsets_vector_length) {
#ifdef DEBUG
if (FLAG_trace_regexp_bytecodes) {
String* pattern = regexp->Pattern();
PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
PrintF("\n\nSubject string: '%s'\n\n", *(two_byte_subject->ToCString()));
}
#endif
ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation());
ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject)));
bool rc;
for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
offsets_vector[i] = -1;
Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
Handle<Object> compile_result = JscreCompile(regexp);
if (compile_result.is_null()) return compile_result;
}
ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
LOG(RegExpExecEvent(regexp, previous_index, two_byte_subject));
FixedArray* irregexp =
FixedArray::cast(regexp->DataAt(JSRegExp::kIrregexpDataIndex));
int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
int num_captures = JscreNumberOfCaptures(regexp);
switch (tag) {
case RegExpMacroAssembler::kIA32Implementation: {
#ifndef ARM
Code* code = Code::cast(irregexp->get(kIrregexpCodeIndex));
Address start_addr =
Handle<SeqTwoByteString>::cast(two_byte_subject)->GetCharsAddress();
int string_offset =
start_addr - reinterpret_cast<Address>(*two_byte_subject);
int start_offset = string_offset + previous_index * sizeof(uc16);
int end_offset =
string_offset + two_byte_subject->length() * sizeof(uc16);
rc = RegExpMacroAssemblerIA32::Execute(code,
two_byte_subject.location(),
start_offset,
end_offset,
offsets_vector,
previous_index == 0);
if (rc) {
// Capture values are relative to start_offset only.
for (int i = 0; i < offsets_vector_length; i++) {
if (offsets_vector[i] >= 0) {
offsets_vector[i] += previous_index;
}
}
}
break;
#else
UNIMPLEMENTED();
rc = false;
break;
#endif
}
case RegExpMacroAssembler::kBytecodeImplementation: {
Handle<ByteArray> byte_codes = IrregexpCode(regexp);
OffsetsVector offsets((num_captures + 1) * 3);
rc = IrregexpInterpreter::Match(byte_codes,
two_byte_subject,
offsets_vector,
previous_index);
break;
}
case RegExpMacroAssembler::kARMImplementation:
default:
UNREACHABLE();
rc = false;
break;
}
int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
if (!rc) {
return Factory::null_value();
}
Handle<String> subject16 = CachedStringToTwoByte(subject);
Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
// The captures come in (start, end+1) pairs.
for (int i = 0; i < 2 * (num_captures+1); i += 2) {
array->set(i, Smi::FromInt(offsets_vector[i]));
array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
}
return Factory::NewJSArrayWithElements(array);
return JscreExecOnce(regexp,
num_captures,
subject,
previous_index,
subject16->GetTwoByteData(),
offsets.vector(),
offsets.length());
}
......@@ -617,76 +623,8 @@ Handle<Object> RegExpImpl::JscreExecOnce(Handle<JSRegExp> regexp,
}
class OffsetsVector {
public:
inline OffsetsVector(int num_registers)
: offsets_vector_length_(num_registers) {
if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
vector_ = NewArray<int>(offsets_vector_length_);
} else {
vector_ = static_offsets_vector_;
}
}
inline ~OffsetsVector() {
if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
DeleteArray(vector_);
vector_ = NULL;
}
}
inline int* vector() {
return vector_;
}
inline int length() {
return offsets_vector_length_;
}
private:
int* vector_;
int offsets_vector_length_;
static const int kStaticOffsetsVectorSize = 50;
static int static_offsets_vector_[kStaticOffsetsVectorSize];
};
int OffsetsVector::static_offsets_vector_[
OffsetsVector::kStaticOffsetsVectorSize];
Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());
// Prepare space for the return values.
int number_of_registers = IrregexpNumberOfRegisters(regexp);
OffsetsVector offsets(number_of_registers);
int num_captures = IrregexpNumberOfCaptures(regexp);
int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
Handle<String> subject16 = CachedStringToTwoByte(subject);
Handle<Object> result(IrregexpExecOnce(regexp,
num_captures,
subject16,
previous_index,
offsets.vector(),
offsets.length()));
return result;
}
Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index) {
Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
Handle<Object> compile_result = JscreCompile(regexp);
......@@ -694,35 +632,11 @@ Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
}
ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
// Prepare space for the return values.
int num_captures = JscreNumberOfCaptures(regexp);
OffsetsVector offsets((num_captures + 1) * 3);
int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
Handle<String> subject16 = CachedStringToTwoByte(subject);
Handle<Object> result(JscreExecOnce(regexp,
num_captures,
subject,
previous_index,
subject16->GetTwoByteData(),
offsets.vector(),
offsets.length()));
return result;
}
Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());
// Prepare space for the return values.
int number_of_registers = IrregexpNumberOfRegisters(regexp);
OffsetsVector offsets(number_of_registers);
int previous_index = 0;
Handle<JSArray> result = Factory::NewJSArray(0);
......@@ -737,12 +651,13 @@ Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
// string length, there is no match.
matches = Factory::null_value();
} else {
matches = IrregexpExecOnce(regexp,
IrregexpNumberOfCaptures(regexp),
subject16,
previous_index,
offsets.vector(),
offsets.length());
matches = JscreExecOnce(regexp,
num_captures,
subject,
previous_index,
subject16->GetTwoByteData(),
offsets.vector(),
offsets.length());
if (matches->IsJSArray()) {
SetElement(result, i, matches);
......@@ -766,19 +681,146 @@ Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
}
Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
Handle<Object> compile_result = JscreCompile(regexp);
if (compile_result.is_null()) return compile_result;
// Irregexp implementation.
static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re,
bool is_ascii) {
ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
Handle<FixedArray> alternatives(
FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)));
ASSERT_EQ(2, alternatives->length());
int index = is_ascii ? 0 : 1;
Object* entry = alternatives->get(index);
if (!entry->IsNull()) {
return Handle<FixedArray>(FixedArray::cast(entry));
}
// Compile the RegExp.
ZoneScope zone_scope(DELETE_ON_EXIT);
JSRegExp::Flags flags = re->GetFlags();
Handle<String> pattern(re->Pattern());
StringShape shape(*pattern);
if (!pattern->IsFlat(shape)) {
pattern->Flatten(shape);
}
RegExpParseResult parse_result;
FlatStringReader reader(pattern);
if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
// Throw an exception if we fail to parse the pattern.
// THIS SHOULD NOT HAPPEN. We already parsed it successfully once.
ThrowRegExpException(re,
pattern,
parse_result.error,
"malformed_regexp");
return Handle<FixedArray>::null();
}
Handle<FixedArray> compiled_entry =
RegExpEngine::Compile(&parse_result,
NULL,
flags.is_ignore_case(),
flags.is_multiline(),
pattern,
is_ascii);
if (!compiled_entry.is_null()) {
alternatives->set(index, *compiled_entry);
}
return compiled_entry;
}
int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) {
return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value();
}
int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) {
return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value();
}
Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) {
ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
== RegExpMacroAssembler::kBytecodeImplementation);
return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex)));
}
Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) {
ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
!= RegExpMacroAssembler::kBytecodeImplementation);
return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex)));
}
Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags) {
// Make space for ASCII and UC16 versions.
Handle<FixedArray> alternatives = Factory::NewFixedArray(2);
alternatives->set_null(0);
alternatives->set_null(1);
Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives);
return re;
}
Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
if (irregexp.is_null()) {
// We can't handle the RegExp with IRRegExp.
return Handle<Object>::null();
}
ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
// Prepare space for the return values.
int num_captures = JscreNumberOfCaptures(regexp);
int number_of_registers = IrregexpNumberOfRegisters(irregexp);
OffsetsVector offsets(number_of_registers);
OffsetsVector offsets((num_captures + 1) * 3);
int num_captures = IrregexpNumberOfCaptures(irregexp);
int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
#ifdef DEBUG
if (FLAG_trace_regexp_bytecodes) {
String* pattern = regexp->Pattern();
PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
}
#endif
LOG(RegExpExecEvent(regexp, previous_index, subject));
return IrregexpExecOnce(irregexp,
num_captures,
subject,
previous_index,
offsets.vector(),
offsets.length());
}
Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
StringShape shape(*subject);
bool is_ascii = shape.IsAsciiRepresentation();
Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
if (irregexp.is_null()) {
return Handle<Object>::null();
}
// Prepare space for the return values.
int number_of_registers = IrregexpNumberOfRegisters(irregexp);
OffsetsVector offsets(number_of_registers);
int previous_index = 0;
......@@ -786,7 +828,9 @@ Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
int i = 0;
Handle<Object> matches;
Handle<String> subject16 = CachedStringToTwoByte(subject);
if (!subject->IsFlat(shape)) {
subject->Flatten(shape);
}
do {
if (previous_index > subject->length() || previous_index < 0) {
......@@ -794,13 +838,20 @@ Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
// string length, there is no match.
matches = Factory::null_value();
} else {
matches = JscreExecOnce(regexp,
num_captures,
subject,
previous_index,
subject16->GetTwoByteData(),
offsets.vector(),
offsets.length());
#ifdef DEBUG
if (FLAG_trace_regexp_bytecodes) {
String* pattern = regexp->Pattern();
PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
}
#endif
LOG(RegExpExecEvent(regexp, previous_index, subject));
matches = IrregexpExecOnce(irregexp,
IrregexpNumberOfCaptures(irregexp),
subject,
previous_index,
offsets.vector(),
offsets.length());
if (matches->IsJSArray()) {
SetElement(result, i, matches);
......@@ -824,36 +875,120 @@ Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
}
int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value();
}
Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp,
int num_captures,
Handle<String> subject,
int previous_index,
int* offsets_vector,
int offsets_vector_length) {
bool rc;
int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
return ByteArray::cast(value->get(kJscreInternalIndex));
}
switch (tag) {
case RegExpMacroAssembler::kIA32Implementation: {
#ifndef ARM
if (!subject->IsFlat(StringShape(*subject))) {
FlattenString(subject);
}
Handle<Code> code = IrregexpNativeCode(irregexp);
StringShape shape(*subject);
int RegExpImpl::IrregexpNumberOfCaptures(Handle<JSRegExp> re) {
FixedArray* value =
FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
return Smi::cast(value->get(kIrregexpNumberOfCapturesIndex))->value();
}
// Character offsets into string.
int start_offset = previous_index;
int end_offset = subject->length(shape);
if (shape.IsCons()) {
subject = Handle<String>(ConsString::cast(*subject)->first());
} else if (shape.IsSliced()) {
SlicedString* slice = SlicedString::cast(*subject);
start_offset += slice->start();
end_offset += slice->start();
subject = Handle<String>(slice->buffer());
}
int RegExpImpl::IrregexpNumberOfRegisters(Handle<JSRegExp> re) {
FixedArray* value =
FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
return Smi::cast(value->get(kIrregexpNumberOfRegistersIndex))->value();
}
// String is now either Sequential or External
StringShape flatshape(*subject);
bool is_ascii = flatshape.IsAsciiRepresentation();
int char_size = is_ascii ? sizeof(char) : sizeof(uc16); // NOLINT
if (flatshape.IsExternal()) {
const byte* address;
if (is_ascii) {
ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
address = reinterpret_cast<const byte*>(ext->resource()->data());
} else {
ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
address = reinterpret_cast<const byte*>(ext->resource()->data());
}
rc = RegExpMacroAssemblerIA32::Execute(
*code,
&address,
start_offset * char_size,
end_offset * char_size,
offsets_vector,
previous_index == 0);
} else { // Sequential string
int byte_offset =
is_ascii ? SeqAsciiString::kHeaderSize - kHeapObjectTag:
SeqTwoByteString::kHeaderSize - kHeapObjectTag;
rc = RegExpMacroAssemblerIA32::Execute(
*code,
subject.location(),
byte_offset + start_offset * char_size,
byte_offset + end_offset * char_size,
offsets_vector,
previous_index == 0);
}
Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) {
FixedArray* value =
FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
return Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex)));
if (rc) {
// Capture values are relative to start_offset only.
for (int i = 0; i < offsets_vector_length; i++) {
if (offsets_vector[i] >= 0) {
offsets_vector[i] += previous_index;
}
}
}
break;
#else
UNIMPLEMENTED();
rc = false;
break;
#endif
}
case RegExpMacroAssembler::kBytecodeImplementation: {
for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
offsets_vector[i] = -1;
}
Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp);
Handle<String> two_byte_subject = CachedStringToTwoByte(subject);
rc = IrregexpInterpreter::Match(byte_codes,
two_byte_subject,
offsets_vector,
previous_index);
break;
}
case RegExpMacroAssembler::kARMImplementation:
default:
UNREACHABLE();
rc = false;
break;
}
if (!rc) {
return Factory::null_value();
}
Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
// The captures come in (start, end+1) pairs.
for (int i = 0; i < 2 * (num_captures+1); i += 2) {
array->set(i, Smi::FromInt(offsets_vector[i]));
array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
}
return Factory::NewJSArrayWithElements(array);
}
......@@ -3475,7 +3610,8 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
RegExpNode** node_return,
bool ignore_case,
bool is_multiline,
Handle<String> pattern) {
Handle<String> pattern,
bool is_ascii) {
RegExpCompiler compiler(input->capture_count, ignore_case);
// Wrap the body of the regexp in capture #0.
RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
......@@ -3500,10 +3636,6 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
NodeInfo info = *node->info();
node = node->EnsureExpanded(&info);
if (!FLAG_irregexp) {
return Handle<FixedArray>::null();
}
if (is_multiline && !FLAG_attempt_multiline_irregexp) {
return Handle<FixedArray>::null();
}
......@@ -3512,7 +3644,13 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
#ifdef ARM
// Unimplemented, fall-through to bytecode implementation.
#else // IA32
RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16,
RegExpMacroAssemblerIA32::Mode mode;
if (is_ascii) {
mode = RegExpMacroAssemblerIA32::ASCII;
} else {
mode = RegExpMacroAssemblerIA32::UC16;
}
RegExpMacroAssemblerIA32 macro_assembler(mode,
(input->capture_count + 1) * 2);
return compiler.Assemble(&macro_assembler,
node,
......
......@@ -48,6 +48,9 @@ class RegExpImpl {
// This function calls the garbage collector if necessary.
static Handle<String> ToString(Handle<Object> value);
// Parses the RegExp pattern and prepares the JSRegExp object with
// generic data and choice of implementation - as well as what
// the implementation wants to store in the data field.
static Handle<Object> Compile(Handle<JSRegExp> re,
Handle<String> pattern,
Handle<String> flags);
......@@ -71,12 +74,10 @@ class RegExpImpl {
Handle<String> pattern,
JSRegExp::Flags flags);
// Stores a compiled RegExp pattern in the JSRegExp object.
// The pattern is compiled by Irregexp.
// Prepares a JSRegExp object with Irregexp-specific data.
static Handle<Object> IrregexpPrepare(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags,
Handle<FixedArray> irregexp_data);
JSRegExp::Flags flags);
// Compile the pattern using JSCRE and store the result in the
......@@ -140,9 +141,10 @@ class RegExpImpl {
static int JscreNumberOfCaptures(Handle<JSRegExp> re);
static ByteArray* JscreInternal(Handle<JSRegExp> re);
static int IrregexpNumberOfCaptures(Handle<JSRegExp> re);
static int IrregexpNumberOfRegisters(Handle<JSRegExp> re);
static Handle<ByteArray> IrregexpCode(Handle<JSRegExp> re);
static int IrregexpNumberOfCaptures(Handle<FixedArray> re);
static int IrregexpNumberOfRegisters(Handle<FixedArray> re);
static Handle<ByteArray> IrregexpByteCode(Handle<FixedArray> re);
static Handle<Code> IrregexpNativeCode(Handle<FixedArray> re);
// Call jsRegExpExecute once
static Handle<Object> JscreExecOnce(Handle<JSRegExp> regexp,
......@@ -153,7 +155,7 @@ class RegExpImpl {
int* ovector,
int ovector_length);
static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> regexp,
static Handle<Object> IrregexpExecOnce(Handle<FixedArray> regexp,
int num_captures,
Handle<String> subject16,
int previous_index,
......@@ -1082,7 +1084,9 @@ class RegExpEngine: public AllStatic {
RegExpNode** node_return,
bool ignore_case,
bool multiline,
Handle<String> pattern);
Handle<String> pattern,
bool is_ascii);
static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
};
......
......@@ -2924,7 +2924,7 @@ class JSRegExp: public JSObject {
// ATOM: A simple string to match against using an indexOf operation.
// IRREGEXP: Compiled with Irregexp.
// IRREGEXP_NATIVE: Compiled to native code with Irregexp.
enum Type { NOT_COMPILED, JSCRE, ATOM, IRREGEXP, IRREGEXP_NATIVE };
enum Type { NOT_COMPILED, JSCRE, ATOM, IRREGEXP };
enum Flag { NONE = 0, GLOBAL = 1, IGNORE_CASE = 2, MULTILINE = 4 };
class Flags {
......
......@@ -111,9 +111,10 @@ RegExpMacroAssemblerIA32::~RegExpMacroAssemblerIA32() {
void RegExpMacroAssemblerIA32::AdvanceCurrentPosition(int by) {
ASSERT(by > 0);
Label inside_string;
__ add(Operand(edi), Immediate(by * char_size()));
if (by != 0) {
Label inside_string;
__ add(Operand(edi), Immediate(by * char_size()));
}
}
......@@ -138,7 +139,7 @@ void RegExpMacroAssemblerIA32::Bind(Label* label) {
void RegExpMacroAssemblerIA32::CheckBitmap(uc16 start,
Label* bitmap,
Label* on_zero) {
UNREACHABLE();
UNIMPLEMENTED();
__ mov(eax, current_character());
__ sub(Operand(eax), Immediate(start));
__ cmp(eax, 64); // FIXME: 64 = length_of_bitmap_in_bits.
......@@ -683,6 +684,8 @@ int RegExpMacroAssemblerIA32::CaseInsensitiveCompareUC16(uc16** buffer,
int byte_offset1,
int byte_offset2,
size_t byte_length) {
// This function MUST NOT cause a garbage collection. A GC might move
// the calling generated code and invalidate the stacked return address.
ASSERT(byte_length % 2 == 0);
Address buffer_address = reinterpret_cast<Address>(*buffer);
uc16* substring1 = reinterpret_cast<uc16*>(buffer_address + byte_offset1);
......
......@@ -355,7 +355,7 @@ TEST(CharacterClassEscapes) {
}
static RegExpNode* Compile(const char* input, bool multiline) {
static RegExpNode* Compile(const char* input, bool multiline, bool is_ascii) {
V8::Initialize(NULL);
FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
......@@ -363,17 +363,18 @@ static RegExpNode* Compile(const char* input, bool multiline) {
return NULL;
RegExpNode* node = NULL;
Handle<String> pattern = Factory::NewStringFromUtf8(CStrVector(input));
RegExpEngine::Compile(&result, &node, false, multiline, pattern);
RegExpEngine::Compile(&result, &node, false, multiline, pattern, is_ascii);
return node;
}
static void Execute(const char* input,
bool multiline,
bool is_ascii,
bool dot_output = false) {
v8::HandleScope scope;
ZoneScope zone_scope(DELETE_ON_EXIT);
RegExpNode* node = Compile(input, multiline);
RegExpNode* node = Compile(input, multiline, is_ascii);
USE(node);
#ifdef DEBUG
if (dot_output) {
......@@ -1130,7 +1131,7 @@ TEST(LatinCanonicalize) {
TEST(SimplePropagation) {
v8::HandleScope scope;
ZoneScope zone_scope(DELETE_ON_EXIT);
RegExpNode* node = Compile("(a|^b|c)", false);
RegExpNode* node = Compile("(a|^b|c)", false, true);
CHECK(node->info()->follows_start_interest);
}
......@@ -1300,5 +1301,5 @@ TEST(CharClassDifference) {
TEST(Graph) {
V8::Initialize(NULL);
Execute("(?=[d#.])", false, true);
Execute("(?=[d#.])", false, true, true);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment