Commit 6fa2f4f0 authored by lrn@chromium.org's avatar lrn@chromium.org

RegExps now restart if their input string changes representation during preemption.

Cleaned up the handling of strings moving, so strings moved by GC and strings changing representation are handled equivalently.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1562 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent b5284875
......@@ -466,15 +466,9 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
Handle<JSArray> last_match_info) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
return Handle<Object>::null();
}
// Prepare space for the return values.
Handle<FixedArray> re_data(FixedArray::cast(regexp->data()));
int number_of_capture_registers =
(IrregexpNumberOfCaptures(*re_data) + 1) * 2;
(IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
OffsetsVector offsets(number_of_capture_registers);
int previous_index = index;
......@@ -493,7 +487,7 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
return IrregexpExecOnce(re_data,
return IrregexpExecOnce(regexp,
number_of_capture_registers,
last_match_info,
subject,
......@@ -507,16 +501,10 @@ Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<JSArray> last_match_info) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()));
bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
return Handle<Object>::null();
}
// Prepare space for the return values.
int number_of_capture_registers =
(IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
(IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
OffsetsVector offsets(number_of_capture_registers);
int previous_index = 0;
......@@ -545,7 +533,7 @@ Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
}
#endif
HandleScope scope;
matches = IrregexpExecOnce(irregexp,
matches = IrregexpExecOnce(regexp,
number_of_capture_registers,
last_match_info,
subject,
......@@ -587,7 +575,7 @@ Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
}
Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp,
Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> jsregexp,
int number_of_capture_registers,
Handle<JSArray> last_match_info,
Handle<String> subject,
......@@ -595,22 +583,29 @@ Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp,
int* offsets_vector,
int offsets_vector_length) {
ASSERT(subject->IsFlat());
bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
bool rc;
Handle<String> original_subject = subject;
Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data()));
if (UseNativeRegexp()) {
#ifdef ARM
UNREACHABLE();
#else
Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii));
RegExpMacroAssemblerIA32::Result res =
RegExpMacroAssemblerIA32::Match(code,
subject,
offsets_vector,
offsets_vector_length,
previous_index);
RegExpMacroAssemblerIA32::Result res;
do {
bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
return Handle<Object>::null();
}
Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii));
res = RegExpMacroAssemblerIA32::Match(code,
subject,
offsets_vector,
offsets_vector_length,
previous_index);
// If result is RETRY, the string have changed representation, and we
// must restart from scratch.
} while (res == RegExpMacroAssemblerIA32::RETRY);
if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
ASSERT(Top::has_pending_exception());
return Handle<Object>::null();
......@@ -621,6 +616,7 @@ Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp,
rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
#endif
} else {
bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
for (int i = number_of_capture_registers - 1; i >= 0; i--) {
offsets_vector[i] = -1;
}
......
......@@ -158,7 +158,7 @@ class RegExpImpl {
// On a successful match, the result is a JSArray containing
// captured positions. On a failure, the result is the null value.
// Returns an empty handle in case of an exception.
static Handle<Object> IrregexpExecOnce(Handle<FixedArray> regexp,
static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> jsregexp,
int num_captures,
Handle<JSArray> lastMatchInfo,
Handle<String> subject16,
......
This diff is collapsed.
// Copyright 2008 the V8 project authors. All rights reserved.
// Copyright 2008-2009 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
......@@ -34,7 +34,16 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
public:
// Type of input string to generate code for.
enum Mode { ASCII = 1, UC16 = 2 };
enum Result { EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
// Result of calling the generated RegExp code:
// RETRY: Something significant changed during execution, and the matching
// should be retried from scratch.
// EXCEPTION: Something failed during execution. If no exception has been
// thrown, it's an internal out-of-memory, and the caller should
// throw the exception.
// FAILURE: Matching failed.
// SUCCESS: Matching succeeded, and the output array has been filled with
// capture positions.
enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
RegExpMacroAssemblerIA32(Mode mode, int registers_to_save);
virtual ~RegExpMacroAssemblerIA32();
......@@ -120,9 +129,10 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
int previous_index);
static Result Execute(Code* code,
Address* input,
String* input,
int start_offset,
int end_offset,
const byte* input_start,
const byte* input_end,
int* output,
bool at_start);
......@@ -131,10 +141,13 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
static const int kFramePointer = 0;
// Above the frame pointer - function parameters and return address.
static const int kReturn_eip = kFramePointer + kPointerSize;
static const int kInputBuffer = kReturn_eip + kPointerSize;
static const int kInputStartOffset = kInputBuffer + kPointerSize;
static const int kInputEndOffset = kInputStartOffset + kPointerSize;
static const int kRegisterOutput = kInputEndOffset + kPointerSize;
static const int kFrameAlign = kReturn_eip + kPointerSize;
// Parameters.
static const int kInputString = kFrameAlign;
static const int kStartIndex = kInputString + kPointerSize;
static const int kInputStart = kStartIndex + kPointerSize;
static const int kInputEnd = kInputStart + kPointerSize;
static const int kRegisterOutput = kInputEnd + kPointerSize;
static const int kAtStart = kRegisterOutput + kPointerSize;
static const int kStackHighEnd = kAtStart + kPointerSize;
// Below the frame pointer - local stack variables.
......@@ -152,11 +165,12 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
// Initial size of constant buffers allocated during compilation.
static const int kRegExpConstantsSize = 256;
static const byte* StringCharacterPosition(String* subject, int start_index);
// Compares two-byte strings case insensitively.
// Called from generated RegExp code.
static int CaseInsensitiveCompareUC16(uc16** buffer,
int byte_offset1,
int byte_offset2,
static int CaseInsensitiveCompareUC16(Address byte_offset1,
Address byte_offset2,
size_t byte_length);
// Load a number of characters at the given offset from the
......@@ -172,7 +186,12 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
// Called from RegExp if the stack-guard is triggered.
// If the code object is relocated, the return address is fixed before
// returning.
static int CheckStackGuardState(Address* return_address, Code* re_code);
static int CheckStackGuardState(Address* return_address,
Code* re_code,
Address re_frame);
// Generate a call to CheckStackGuardState.
void CallCheckStackGuardState(Register scratch);
// Called from RegExp if the backtrack stack limit is hit.
// Tries to expand the stack. Returns the new stack-pointer if
......
......@@ -5757,6 +5757,8 @@ THREADED_TEST(CrossContextNew) {
class RegExpInterruptTest {
public:
RegExpInterruptTest() : block_(NULL) {}
~RegExpInterruptTest() { delete block_; }
void RunTest() {
block_ = i::OS::CreateSemaphore(0);
gc_count_ = 0;
......@@ -5776,8 +5778,6 @@ class RegExpInterruptTest {
CHECK(regexp_success_);
CHECK(gc_success_);
}
RegExpInterruptTest() : block_(NULL) {}
~RegExpInterruptTest() { delete block_; }
private:
// Number of garbage collections required.
static const int kRequiredGCs = 5;
......@@ -5813,7 +5813,7 @@ class RegExpInterruptTest {
while (gc_during_regexp_ < kRequiredGCs) {
int gc_before = gc_count_;
{
// match 15-30 "a"'s against 14 and a "b".
// Match 15-30 "a"'s against 14 and a "b".
const char* c_source =
"/a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaa/"
".exec('aaaaaaaaaaaaaaab') === null";
......@@ -5826,7 +5826,7 @@ class RegExpInterruptTest {
}
}
{
// match 15-30 "a"'s against 15 and a "b".
// Match 15-30 "a"'s against 15 and a "b".
const char* c_source =
"/a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaa/"
".exec('aaaaaaaaaaaaaaaab')[0] === 'aaaaaaaaaaaaaaaa'";
......@@ -5909,3 +5909,172 @@ TEST(ObjectClone) {
CHECK_EQ(v8::Integer::New(123), obj->Get(v8_str("beta")));
CHECK_EQ(v8::Integer::New(456), clone->Get(v8_str("beta")));
}
class RegExpStringModificationTest {
public:
RegExpStringModificationTest()
: block_(i::OS::CreateSemaphore(0)),
morphs_(0),
morphs_during_regexp_(0),
ascii_resource_(i::Vector<const char>("aaaaaaaaaaaaaab", 15)),
uc16_resource_(i::Vector<const uint16_t>(two_byte_content_, 15)) {}
~RegExpStringModificationTest() { delete block_; }
void RunTest() {
regexp_success_ = false;
morph_success_ = false;
// Initialize the contents of two_byte_content_ to be a uc16 representation
// of "aaaaaaaaaaaaaab".
for (int i = 0; i < 14; i++) {
two_byte_content_[i] = 'a';
}
two_byte_content_[14] = 'b';
// Create the input string for the regexp - the one we are going to change
// properties of.
input_ = i::Factory::NewExternalStringFromAscii(&ascii_resource_);
// Inject the input as a global variable.
i::Handle<i::String> input_name =
i::Factory::NewStringFromAscii(i::Vector<const char>("input", 5));
i::Top::global_context()->global()->SetProperty(*input_name, *input_, NONE);
MorphThread morph_thread(this);
morph_thread.Start();
v8::Locker::StartPreemption(1);
LongRunningRegExp();
{
v8::Unlocker unlock;
morph_thread.Join();
}
v8::Locker::StopPreemption();
CHECK(regexp_success_);
CHECK(morph_success_);
}
private:
class AsciiVectorResource : public v8::String::ExternalAsciiStringResource {
public:
explicit AsciiVectorResource(i::Vector<const char> vector)
: data_(vector) {}
virtual ~AsciiVectorResource() {}
virtual size_t length() const { return data_.length(); }
virtual const char* data() const { return data_.start(); }
private:
i::Vector<const char> data_;
};
class UC16VectorResource : public v8::String::ExternalStringResource {
public:
explicit UC16VectorResource(i::Vector<const i::uc16> vector)
: data_(vector) {}
virtual ~UC16VectorResource() {}
virtual size_t length() const { return data_.length(); }
virtual const i::uc16* data() const { return data_.start(); }
private:
i::Vector<const i::uc16> data_;
};
// Number of string modifications required.
static const int kRequiredModifications = 5;
static const int kMaxModifications = 100;
class MorphThread : public i::Thread {
public:
explicit MorphThread(RegExpStringModificationTest* test)
: test_(test) {}
virtual void Run() {
test_->MorphString();
}
private:
RegExpStringModificationTest* test_;
};
void MorphString() {
block_->Wait();
while (morphs_during_regexp_ < kRequiredModifications &&
morphs_ < kMaxModifications) {
{
v8::Locker lock;
// Swap string between ascii and two-byte representation.
i::String* string = *input_;
CHECK(i::StringShape(string).IsExternal());
if (i::StringShape(string).IsAsciiRepresentation()) {
// Morph external string to be TwoByte string.
i::ExternalAsciiString* ext_string =
i::ExternalAsciiString::cast(string);
i::ExternalTwoByteString* morphed =
reinterpret_cast<i::ExternalTwoByteString*>(ext_string);
morphed->map()->set_instance_type(i::SHORT_EXTERNAL_STRING_TYPE);
morphed->set_resource(&uc16_resource_);
} else {
// Morph external string to be ASCII string.
i::ExternalTwoByteString* ext_string =
i::ExternalTwoByteString::cast(string);
i::ExternalAsciiString* morphed =
reinterpret_cast<i::ExternalAsciiString*>(ext_string);
morphed->map()->set_instance_type(
i::SHORT_EXTERNAL_ASCII_STRING_TYPE);
morphed->set_resource(&ascii_resource_);
}
morphs_++;
}
i::OS::Sleep(1);
}
morph_success_ = true;
}
void LongRunningRegExp() {
block_->Signal(); // Enable morphing thread on next preemption.
while (morphs_during_regexp_ < kRequiredModifications &&
morphs_ < kMaxModifications) {
int morphs_before = morphs_;
{
// Match 15-30 "a"'s against 14 and a "b".
const char* c_source =
"/a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaa/"
".exec(input) === null";
Local<String> source = String::New(c_source);
Local<Script> script = Script::Compile(source);
Local<Value> result = script->Run();
CHECK(result->IsTrue());
}
int morphs_after = morphs_;
morphs_during_regexp_ += morphs_after - morphs_before;
}
regexp_success_ = true;
}
i::uc16 two_byte_content_[15];
i::Semaphore* block_;
int morphs_;
int morphs_during_regexp_;
bool regexp_success_;
bool morph_success_;
i::Handle<i::String> input_;
AsciiVectorResource ascii_resource_;
UC16VectorResource uc16_resource_;
};
// Test that a regular expression execution can be interrupted and
// the string changed without failing.
TEST(RegExpStringModification) {
v8::Locker lock;
v8::V8::Initialize();
v8::HandleScope scope;
Local<Context> local_env;
{
LocalContext env;
local_env = env.local();
}
// Local context should still be live.
CHECK(!local_env.IsEmpty());
local_env->Enter();
// Should complete without problems.
RegExpStringModificationTest().RunTest();
local_env->Exit();
}
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment