Commit e0954ca5 authored by yangguo@chromium.org's avatar yangguo@chromium.org

Take advantage of batched results when matching global regexp.

BUG=
TEST=regexp-global.js

Review URL: https://chromiumcodereview.appspot.com/10831126

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12258 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 4e82c3fb
......@@ -4777,7 +4777,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
__ add(r2, r2, Operand(2)); // r2 was a smi.
// Check that the static offsets vector buffer is large enough.
__ cmp(r2, Operand(OffsetsVector::kStaticOffsetsVectorSize));
__ cmp(r2, Operand(Isolate::kJSRegexpStaticOffsetsVectorSize));
__ b(hi, &runtime);
// r2: Number of capture registers
......
......@@ -1092,7 +1092,7 @@ ExternalReference ExternalReference::re_word_character_map() {
ExternalReference ExternalReference::address_of_static_offsets_vector(
Isolate* isolate) {
return ExternalReference(
OffsetsVector::static_offsets_vector_address(isolate));
reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector()));
}
ExternalReference ExternalReference::address_of_regexp_stack_memory_address(
......
......@@ -3748,7 +3748,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
__ add(edx, Immediate(2)); // edx was a smi.
// Check that the static offsets vector buffer is large enough.
__ cmp(edx, OffsetsVector::kStaticOffsetsVectorSize);
__ cmp(edx, Isolate::kJSRegexpStaticOffsetsVectorSize);
__ j(above, &runtime);
// ecx: RegExp data (FixedArray)
......
......@@ -308,7 +308,7 @@ class ThreadLocalTop BASE_EMBEDDED {
#define ISOLATE_INIT_ARRAY_LIST(V) \
/* SerializerDeserializer state. */ \
V(int, jsregexp_static_offsets_vector, kJSRegexpStaticOffsetsVectorSize) \
V(int32_t, jsregexp_static_offsets_vector, kJSRegexpStaticOffsetsVectorSize) \
V(int, bad_char_shift_table, kUC16AlphabetSize) \
V(int, good_suffix_shift_table, (kBMMaxShift + 1)) \
V(int, suffix_table, (kBMMaxShift + 1)) \
......
This diff is collapsed.
......@@ -93,6 +93,14 @@ class RegExpImpl {
JSRegExp::Flags flags,
Handle<String> match_pattern);
static int AtomExecRaw(Handle<JSRegExp> regexp,
Handle<String> subject,
int index,
int32_t* output,
int output_size);
static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
Handle<String> subject,
int index,
......@@ -105,17 +113,11 @@ class RegExpImpl {
// This ensures that the regexp is compiled for the subject, and that
// the subject is flat.
// Returns the number of integer spaces required by IrregexpExecOnce
// as its "registers" argument. If the regexp cannot be compiled,
// as its "registers" argument. If the regexp cannot be compiled,
// an exception is set as pending, and this function returns negative.
static int IrregexpPrepare(Handle<JSRegExp> regexp,
Handle<String> subject);
// Calculate the size of offsets vector for the case of global regexp
// and the number of matches this vector is able to store.
static int GlobalOffsetsVectorSize(Handle<JSRegExp> regexp,
int registers_per_match,
int* max_matches);
// Execute a regular expression on the subject, starting from index.
// If matching succeeds, return the number of matches. This can be larger
// than one in the case of global regular expressions.
......@@ -125,17 +127,57 @@ class RegExpImpl {
static int IrregexpExecRaw(Handle<JSRegExp> regexp,
Handle<String> subject,
int index,
Vector<int> registers);
int32_t* output,
int output_size);
// Execute an Irregexp bytecode pattern.
// On a successful match, the result is a JSArray containing
// captured positions. On a failure, the result is the null value.
// captured positions. On a failure, the result is the null value.
// Returns an empty handle in case of an exception.
static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
Handle<String> subject,
int index,
Handle<JSArray> lastMatchInfo);
// Set last match info. If match is NULL, then setting captures is omitted.
static Handle<JSArray> SetLastMatchInfo(Handle<JSArray> last_match_info,
Handle<String> subject,
int capture_count,
int32_t* match);
class GlobalCache {
public:
GlobalCache(Handle<JSRegExp> regexp,
Handle<String> subject,
bool is_global,
Isolate* isolate);
~GlobalCache();
// Fetch the next entry in the cache for global regexp match results.
// This does not set the last match info. Upon failure, NULL is returned.
// The cause can be checked with Result(). The previous
// result is still in available in memory when a failure happens.
int32_t* FetchNext();
int32_t* LastSuccessfulMatch();
inline bool HasException() { return num_matches_ < 0; }
private:
int num_matches_;
int max_matches_;
int current_match_index_;
int registers_per_match_;
// Pointer to the last set of captures.
int32_t* register_array_;
int register_array_size_;
Handle<JSRegExp> regexp_;
Handle<String> subject_;
};
// Array index in the lastMatchInfo array.
static const int kLastCaptureCount = 0;
static const int kLastSubject = 1;
......@@ -195,30 +237,10 @@ class RegExpImpl {
static const int kRegWxpCompiledLimit = 1 * MB;
private:
static String* last_ascii_string_;
static String* two_byte_cached_string_;
static bool CompileIrregexp(
Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
static inline bool EnsureCompiledIrregexp(
Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
// Set the subject cache. The previous string buffer is not deleted, so the
// caller should ensure that it doesn't leak.
static void SetSubjectCache(String* subject,
char* utf8_subject,
int uft8_length,
int character_position,
int utf8_position);
// A one element cache of the last utf8_subject string and its length. The
// subject JS String object is cached in the heap. We also cache a
// translation between position and utf8 position.
static char* utf8_subject_cache_;
static int utf8_length_cache_;
static int utf8_position_;
static int character_position_;
};
......@@ -1622,40 +1644,6 @@ class RegExpEngine: public AllStatic {
};
class OffsetsVector {
public:
inline OffsetsVector(int num_registers, Isolate* isolate)
: offsets_vector_length_(num_registers) {
if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
vector_ = NewArray<int>(offsets_vector_length_);
} else {
vector_ = isolate->jsregexp_static_offsets_vector();
}
}
inline ~OffsetsVector() {
if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
DeleteArray(vector_);
vector_ = NULL;
}
}
inline int* vector() { return vector_; }
inline int length() { return offsets_vector_length_; }
static const int kStaticOffsetsVectorSize =
Isolate::kJSRegexpStaticOffsetsVectorSize;
private:
static Address static_offsets_vector_address(Isolate* isolate) {
return reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector());
}
int* vector_;
int offsets_vector_length_;
friend class ExternalReference;
};
} } // namespace v8::internal
#endif // V8_JSREGEXP_H_
......@@ -4977,7 +4977,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
__ Addu(a2, a2, Operand(2)); // a2 was a smi.
// Check that the static offsets vector buffer is large enough.
__ Branch(&runtime, hi, a2, Operand(OffsetsVector::kStaticOffsetsVectorSize));
__ Branch(
&runtime, hi, a2, Operand(Isolate::kJSRegexpStaticOffsetsVectorSize));
// a2: Number of capture registers
// regexp_data: RegExp data (FixedArray)
......
This diff is collapsed.
......@@ -2791,7 +2791,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
// Calculate number of capture registers (number_of_captures + 1) * 2.
__ leal(rdx, Operand(rdx, rdx, times_1, 2));
// Check that the static offsets vector buffer is large enough.
__ cmpl(rdx, Immediate(OffsetsVector::kStaticOffsetsVectorSize));
__ cmpl(rdx, Immediate(Isolate::kJSRegexpStaticOffsetsVectorSize));
__ j(above, &runtime);
// rax: RegExp data (FixedArray)
......
......@@ -267,6 +267,7 @@ TEST(Parser) {
CHECK_PARSE_EQ("\\u003z", "'u003z'");
CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");
CHECK_SIMPLE("", false);
CHECK_SIMPLE("a", true);
CHECK_SIMPLE("a|b", false);
CHECK_SIMPLE("a\\n", false);
......@@ -1349,7 +1350,7 @@ TEST(MacroAssembler) {
V8::Initialize(NULL);
byte codes[1024];
RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024),
Isolate::Current()->zone());
Isolate::Current()->runtime_zone());
// ^f(o)o.
Label fail, fail2, start;
uc16 foo_chars[3];
......
......@@ -139,3 +139,104 @@ str = str.replace(/\b(?=u(p))/g, function(match, capture) {
});
assertEquals("1up 1up 1up 1up", str);
// Create regexp that has a *lot* of captures.
var re_string = "(a)";
for (var i = 0; i < 500; i++) {
re_string = "(" + re_string + ")";
}
re_string = re_string + "1";
// re_string = "(((...((a))...)))1"
var regexps = new Array();
var last_match_expectations = new Array();
var first_capture_expectations = new Array();
// Atomic regexp.
regexps.push(/a1/g);
last_match_expectations.push("a1");
first_capture_expectations.push("");
// Small regexp (no capture);
regexps.push(/\w1/g);
last_match_expectations.push("a1");
first_capture_expectations.push("");
// Small regexp (one capture).
regexps.push(/(a)1/g);
last_match_expectations.push("a1");
first_capture_expectations.push("a");
// Large regexp (a lot of captures).
regexps.push(new RegExp(re_string, "g"));
last_match_expectations.push("a1");
first_capture_expectations.push("a");
function test_replace(result_expectation,
subject,
regexp,
replacement) {
for (var i = 0; i < regexps.length; i++) {
// Overwrite last match info.
"deadbeef".replace(/(dead)beef/, "$1holeycow");
// Conduct tests.
assertEquals(result_expectation, subject.replace(regexps[i], replacement));
if (subject.length == 0) {
assertEquals("deadbeef", RegExp.lastMatch);
assertEquals("dead", RegExp["$1"]);
} else {
assertEquals(last_match_expectations[i], RegExp.lastMatch);
assertEquals(first_capture_expectations[i], RegExp["$1"]);
}
}
}
function test_match(result_expectation,
subject,
regexp) {
for (var i = 0; i < regexps.length; i++) {
// Overwrite last match info.
"deadbeef".replace(/(dead)beef/, "$1holeycow");
// Conduct tests.
if (result_expectation == null) {
assertNull(subject.match(regexps[i]));
} else {
assertArrayEquals(result_expectation, subject.match(regexps[i]));
}
if (subject.length == 0) {
assertEquals("deadbeef", RegExp.lastMatch);
assertEquals("dead", RegExp["$1"]);
} else {
assertEquals(last_match_expectations[i], RegExp.lastMatch);
assertEquals(first_capture_expectations[i], RegExp["$1"]);
}
}
}
// Test for different number of matches.
for (var m = 0; m < 200; m++) {
// Create string that matches m times.
var subject = "";
var test_1_expectation = "";
var test_2_expectation = "";
var test_3_expectation = (m == 0) ? null : new Array();
for (var i = 0; i < m; i++) {
subject += "a11";
test_1_expectation += "x1";
test_2_expectation += "1";
test_3_expectation.push("a1");
}
// Test 1a: String.replace with string.
test_replace(test_1_expectation, subject, /a1/g, "x");
// Test 1b: String.replace with function.
function f() { return "x"; }
test_replace(test_1_expectation, subject, /a1/g, f);
// Test 2a: String.replace with empty string.
test_replace(test_2_expectation, subject, /a1/g, "");
// Test 3a: String.match.
test_match(test_3_expectation, subject, /a1/g);
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment