Commit 7cbca775 authored by yangguo@chromium.org's avatar yangguo@chromium.org

Reland regexp global optimizations.

BUG=

Review URL: https://chromiumcodereview.appspot.com/10872010

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12396 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent ee6ec7e2
......@@ -4818,7 +4818,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
__ add(r2, r2, Operand(2)); // r2 was a smi.
// Check that the static offsets vector buffer is large enough.
__ cmp(r2, Operand(OffsetsVector::kStaticOffsetsVectorSize));
__ cmp(r2, Operand(Isolate::kJSRegexpStaticOffsetsVectorSize));
__ b(hi, &runtime);
// r2: Number of capture registers
......
......@@ -1092,7 +1092,7 @@ ExternalReference ExternalReference::re_word_character_map() {
ExternalReference ExternalReference::address_of_static_offsets_vector(
Isolate* isolate) {
return ExternalReference(
OffsetsVector::static_offsets_vector_address(isolate));
reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector()));
}
ExternalReference ExternalReference::address_of_regexp_stack_memory_address(
......
......@@ -3748,7 +3748,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
__ add(edx, Immediate(2)); // edx was a smi.
// Check that the static offsets vector buffer is large enough.
__ cmp(edx, OffsetsVector::kStaticOffsetsVectorSize);
__ cmp(edx, Isolate::kJSRegexpStaticOffsetsVectorSize);
__ j(above, &runtime);
// ecx: RegExp data (FixedArray)
......
......@@ -308,7 +308,7 @@ class ThreadLocalTop BASE_EMBEDDED {
#define ISOLATE_INIT_ARRAY_LIST(V) \
/* SerializerDeserializer state. */ \
V(int, jsregexp_static_offsets_vector, kJSRegexpStaticOffsetsVectorSize) \
V(int32_t, jsregexp_static_offsets_vector, kJSRegexpStaticOffsetsVectorSize) \
V(int, bad_char_shift_table, kUC16AlphabetSize) \
V(int, good_suffix_shift_table, (kBMMaxShift + 1)) \
V(int, suffix_table, (kBMMaxShift + 1)) \
......
This diff is collapsed.
......@@ -93,6 +93,14 @@ class RegExpImpl {
JSRegExp::Flags flags,
Handle<String> match_pattern);
static int AtomExecRaw(Handle<JSRegExp> regexp,
Handle<String> subject,
int index,
int32_t* output,
int output_size);
static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
Handle<String> subject,
int index,
......@@ -105,17 +113,11 @@ class RegExpImpl {
// This ensures that the regexp is compiled for the subject, and that
// the subject is flat.
// Returns the number of integer spaces required by IrregexpExecOnce
// as its "registers" argument. If the regexp cannot be compiled,
// as its "registers" argument. If the regexp cannot be compiled,
// an exception is set as pending, and this function returns negative.
static int IrregexpPrepare(Handle<JSRegExp> regexp,
Handle<String> subject);
// Calculate the size of offsets vector for the case of global regexp
// and the number of matches this vector is able to store.
static int GlobalOffsetsVectorSize(Handle<JSRegExp> regexp,
int registers_per_match,
int* max_matches);
// Execute a regular expression on the subject, starting from index.
// If matching succeeds, return the number of matches. This can be larger
// than one in the case of global regular expressions.
......@@ -125,17 +127,57 @@ class RegExpImpl {
static int IrregexpExecRaw(Handle<JSRegExp> regexp,
Handle<String> subject,
int index,
Vector<int> registers);
int32_t* output,
int output_size);
// Execute an Irregexp bytecode pattern.
// On a successful match, the result is a JSArray containing
// captured positions. On a failure, the result is the null value.
// captured positions. On a failure, the result is the null value.
// Returns an empty handle in case of an exception.
static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
Handle<String> subject,
int index,
Handle<JSArray> lastMatchInfo);
// Set last match info. If match is NULL, then setting captures is omitted.
static Handle<JSArray> SetLastMatchInfo(Handle<JSArray> last_match_info,
Handle<String> subject,
int capture_count,
int32_t* match);
class GlobalCache {
public:
GlobalCache(Handle<JSRegExp> regexp,
Handle<String> subject,
bool is_global,
Isolate* isolate);
~GlobalCache();
// Fetch the next entry in the cache for global regexp match results.
// This does not set the last match info. Upon failure, NULL is returned.
// The cause can be checked with Result(). The previous
// result is still in available in memory when a failure happens.
int32_t* FetchNext();
int32_t* LastSuccessfulMatch();
inline bool HasException() { return num_matches_ < 0; }
private:
int num_matches_;
int max_matches_;
int current_match_index_;
int registers_per_match_;
// Pointer to the last set of captures.
int32_t* register_array_;
int register_array_size_;
Handle<JSRegExp> regexp_;
Handle<String> subject_;
};
// Array index in the lastMatchInfo array.
static const int kLastCaptureCount = 0;
static const int kLastSubject = 1;
......@@ -195,30 +237,10 @@ class RegExpImpl {
static const int kRegWxpCompiledLimit = 1 * MB;
private:
static String* last_ascii_string_;
static String* two_byte_cached_string_;
static bool CompileIrregexp(
Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
static inline bool EnsureCompiledIrregexp(
Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
// Set the subject cache. The previous string buffer is not deleted, so the
// caller should ensure that it doesn't leak.
static void SetSubjectCache(String* subject,
char* utf8_subject,
int uft8_length,
int character_position,
int utf8_position);
// A one element cache of the last utf8_subject string and its length. The
// subject JS String object is cached in the heap. We also cache a
// translation between position and utf8 position.
static char* utf8_subject_cache_;
static int utf8_length_cache_;
static int utf8_position_;
static int character_position_;
};
......@@ -1622,40 +1644,6 @@ class RegExpEngine: public AllStatic {
};
class OffsetsVector {
public:
inline OffsetsVector(int num_registers, Isolate* isolate)
: offsets_vector_length_(num_registers) {
if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
vector_ = NewArray<int>(offsets_vector_length_);
} else {
vector_ = isolate->jsregexp_static_offsets_vector();
}
}
inline ~OffsetsVector() {
if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
DeleteArray(vector_);
vector_ = NULL;
}
}
inline int* vector() { return vector_; }
inline int length() { return offsets_vector_length_; }
static const int kStaticOffsetsVectorSize =
Isolate::kJSRegexpStaticOffsetsVectorSize;
private:
static Address static_offsets_vector_address(Isolate* isolate) {
return reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector());
}
int* vector_;
int offsets_vector_length_;
friend class ExternalReference;
};
} } // namespace v8::internal
#endif // V8_JSREGEXP_H_
......@@ -4977,7 +4977,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
__ Addu(a2, a2, Operand(2)); // a2 was a smi.
// Check that the static offsets vector buffer is large enough.
__ Branch(&runtime, hi, a2, Operand(OffsetsVector::kStaticOffsetsVectorSize));
__ Branch(
&runtime, hi, a2, Operand(Isolate::kJSRegexpStaticOffsetsVectorSize));
// a2: Number of capture registers
// regexp_data: RegExp data (FixedArray)
......
This diff is collapsed.
......@@ -29,6 +29,7 @@
#define V8_UNICODE_INL_H_
#include "unicode.h"
#include "checks.h"
namespace unibrow {
......@@ -144,6 +145,7 @@ uchar CharacterStream::GetNext() {
} else {
remaining_--;
}
ASSERT(BoundsCheck(cursor_));
return result;
}
......
......@@ -201,6 +201,7 @@ class CharacterStream {
protected:
virtual void FillBuffer() = 0;
virtual bool BoundsCheck(unsigned offset) = 0;
// The number of characters left in the current buffer
unsigned remaining_;
// The current offset within the buffer
......@@ -228,6 +229,9 @@ class InputBuffer : public CharacterStream {
InputBuffer() { }
explicit InputBuffer(Input input) { Reset(input); }
virtual void FillBuffer();
virtual bool BoundsCheck(unsigned offset) {
return (buffer_ != util_buffer_) || (offset < kSize);
}
// A custom offset that can be used by the string implementation to
// mark progress within the encoded string.
......
......@@ -2791,7 +2791,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
// Calculate number of capture registers (number_of_captures + 1) * 2.
__ leal(rdx, Operand(rdx, rdx, times_1, 2));
// Check that the static offsets vector buffer is large enough.
__ cmpl(rdx, Immediate(OffsetsVector::kStaticOffsetsVectorSize));
__ cmpl(rdx, Immediate(Isolate::kJSRegexpStaticOffsetsVectorSize));
__ j(above, &runtime);
// rax: RegExp data (FixedArray)
......
......@@ -267,6 +267,7 @@ TEST(Parser) {
CHECK_PARSE_EQ("\\u003z", "'u003z'");
CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");
CHECK_SIMPLE("", false);
CHECK_SIMPLE("a", true);
CHECK_SIMPLE("a|b", false);
CHECK_SIMPLE("a\\n", false);
......@@ -1349,7 +1350,7 @@ TEST(MacroAssembler) {
V8::Initialize(NULL);
byte codes[1024];
RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024),
Isolate::Current()->zone());
Isolate::Current()->runtime_zone());
// ^f(o)o.
Label fail, fail2, start;
uc16 foo_chars[3];
......
// Copyright 2011 the V8 project authors. All rights reserved.
// Copyright 2012 the V8 project authors. All rights reserved.
// Check that we can traverse very deep stacks of ConsStrings using
// StringInputBuffer. Check that Get(int) works on very deep stacks
......@@ -691,3 +691,20 @@ TEST(RegExpOverflow) {
CHECK(result.IsEmpty());
CHECK(context->HasOutOfMemoryException());
}
TEST(StringReplaceAtomTwoByteResult) {
InitializeVM();
HandleScope scope;
LocalContext context;
v8::Local<v8::Value> result = CompileRun(
"var subject = 'ascii~only~string~'; "
"var replace = '\x80'; "
"subject.replace(/~/g, replace); ");
CHECK(result->IsString());
Handle<String> string = v8::Utils::OpenHandle(v8::String::Cast(*result));
CHECK(string->IsSeqTwoByteString());
v8::Local<v8::String> expected = v8_str("ascii\x80only\x80string\x80");
CHECK(expected->Equals(result));
}
......@@ -239,4 +239,16 @@ for (var m = 0; m < 200; m++) {
// Test 3a: String.match.
test_match(test_3_expectation, subject, /a1/g);
}
\ No newline at end of file
}
// Test String hashing (compiling regular expression includes hashing).
var crosscheck = "\x80";
for (var i = 0; i < 12; i++) crosscheck += crosscheck;
new RegExp(crosscheck);
var subject = "ascii~only~string~here~";
var replacement = "\x80";
var result = subject.replace(/~/g, replacement);
for (var i = 0; i < 5; i++) result += result;
new RegExp(result);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment