Commit 4254388c authored by lrn@chromium.org's avatar lrn@chromium.org

X64: Implement RegExp natively.

Review URL: http://codereview.chromium.org/165443


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@2688 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 7b056783
...@@ -101,6 +101,9 @@ LIBRARY_FLAGS = { ...@@ -101,6 +101,9 @@ LIBRARY_FLAGS = {
'regexp:native': { 'regexp:native': {
'arch:ia32' : { 'arch:ia32' : {
'CPPDEFINES': ['V8_NATIVE_REGEXP'] 'CPPDEFINES': ['V8_NATIVE_REGEXP']
},
'arch:x64' : {
'CPPDEFINES': ['V8_NATIVE_REGEXP']
} }
} }
}, },
...@@ -716,7 +719,11 @@ class BuildContext(object): ...@@ -716,7 +719,11 @@ class BuildContext(object):
result = [] result = []
result += source.get('all', []) result += source.get('all', [])
for (name, value) in self.options.iteritems(): for (name, value) in self.options.iteritems():
result += source.get(name + ':' + value, []) source_value = source.get(name + ':' + value, [])
if type(source_value) == dict:
result += self.GetRelevantSources(source_value)
else:
result += source_value
return sorted(result) return sorted(result)
def AppendFlags(self, options, added): def AppendFlags(self, options, added):
......
...@@ -63,24 +63,32 @@ SOURCES = { ...@@ -63,24 +63,32 @@ SOURCES = {
'arm/register-allocator-arm.cc', 'arm/stub-cache-arm.cc', 'arm/register-allocator-arm.cc', 'arm/stub-cache-arm.cc',
'arm/virtual-frame-arm.cc' 'arm/virtual-frame-arm.cc'
], ],
'arch:ia32': [ 'arch:ia32': {
'ia32/assembler-ia32.cc', 'ia32/builtins-ia32.cc', 'ia32/cfg-ia32.cc', 'all': [
'ia32/codegen-ia32.cc', 'ia32/cpu-ia32.cc', 'ia32/disasm-ia32.cc', 'ia32/assembler-ia32.cc', 'ia32/builtins-ia32.cc', 'ia32/cfg-ia32.cc',
'ia32/debug-ia32.cc', 'ia32/frames-ia32.cc', 'ia32/ic-ia32.cc', 'ia32/codegen-ia32.cc', 'ia32/cpu-ia32.cc', 'ia32/disasm-ia32.cc',
'ia32/jump-target-ia32.cc', 'ia32/macro-assembler-ia32.cc', 'ia32/debug-ia32.cc', 'ia32/frames-ia32.cc', 'ia32/ic-ia32.cc',
'ia32/regexp-macro-assembler-ia32.cc', 'ia32/jump-target-ia32.cc', 'ia32/macro-assembler-ia32.cc',
'ia32/register-allocator-ia32.cc', 'ia32/stub-cache-ia32.cc', 'ia32/register-allocator-ia32.cc', 'ia32/stub-cache-ia32.cc',
'ia32/virtual-frame-ia32.cc' 'ia32/virtual-frame-ia32.cc'
], ],
'arch:x64': [ 'regexp:native': [
'x64/assembler-x64.cc', 'x64/builtins-x64.cc', 'x64/cfg-x64.cc', 'ia32/regexp-macro-assembler-ia32.cc',
'x64/codegen-x64.cc', 'x64/cpu-x64.cc', 'x64/disasm-x64.cc', ]
'x64/debug-x64.cc', 'x64/frames-x64.cc', 'x64/ic-x64.cc', },
'x64/jump-target-x64.cc', 'x64/macro-assembler-x64.cc', 'arch:x64': {
# 'x64/regexp-macro-assembler-x64.cc', 'all': [
'x64/register-allocator-x64.cc', 'x64/assembler-x64.cc', 'x64/builtins-x64.cc', 'x64/cfg-x64.cc',
'x64/stub-cache-x64.cc', 'x64/virtual-frame-x64.cc' 'x64/codegen-x64.cc', 'x64/cpu-x64.cc', 'x64/disasm-x64.cc',
], 'x64/debug-x64.cc', 'x64/frames-x64.cc', 'x64/ic-x64.cc',
'x64/jump-target-x64.cc', 'x64/macro-assembler-x64.cc',
'x64/register-allocator-x64.cc',
'x64/stub-cache-x64.cc', 'x64/virtual-frame-x64.cc'
],
'regexp:native': [
'x64/regexp-macro-assembler-x64.cc'
]
},
'simulator:arm': ['arm/simulator-arm.cc'], 'simulator:arm': ['arm/simulator-arm.cc'],
'os:freebsd': ['platform-freebsd.cc', 'platform-posix.cc'], 'os:freebsd': ['platform-freebsd.cc', 'platform-posix.cc'],
'os:linux': ['platform-linux.cc', 'platform-posix.cc'], 'os:linux': ['platform-linux.cc', 'platform-posix.cc'],
......
...@@ -82,8 +82,8 @@ static void RecordWriteHelper(MacroAssembler* masm, ...@@ -82,8 +82,8 @@ static void RecordWriteHelper(MacroAssembler* masm,
// page_start + kObjectStartOffset + objectSize // page_start + kObjectStartOffset + objectSize
// where objectSize is FixedArray::kHeaderSize + kPointerSize * array_length. // where objectSize is FixedArray::kHeaderSize + kPointerSize * array_length.
// Add the delta between the end of the normal RSet and the start of the // Add the delta between the end of the normal RSet and the start of the
// extra RSet to 'object', so that addressing the bit using 'pointer_offset' // extra RSet to 'page_start', so that addressing the bit using
// hits the extra RSet words. // 'pointer_offset' hits the extra RSet words.
masm->lea(page_start, masm->lea(page_start,
Operand(page_start, array_length, times_pointer_size, Operand(page_start, array_length, times_pointer_size,
Page::kObjectStartOffset + FixedArray::kHeaderSize Page::kObjectStartOffset + FixedArray::kHeaderSize
......
...@@ -54,7 +54,7 @@ namespace internal { ...@@ -54,7 +54,7 @@ namespace internal {
* *
* Each call to a public method should retain this convention. * Each call to a public method should retain this convention.
* The stack will have the following structure: * The stack will have the following structure:
* - stack_area_top (High end of the memory area to use as * - stack_area_base (High end of the memory area to use as
* backtracking stack) * backtracking stack)
* - at_start (if 1, start at start of string, if 0, don't) * - at_start (if 1, start at start of string, if 0, don't)
* - int* capture_array (int[num_saved_registers_], for output). * - int* capture_array (int[num_saved_registers_], for output).
...@@ -78,13 +78,13 @@ namespace internal { ...@@ -78,13 +78,13 @@ namespace internal {
* character of the string). The remaining registers starts out as garbage. * character of the string). The remaining registers starts out as garbage.
* *
* The data up to the return address must be placed there by the calling * The data up to the return address must be placed there by the calling
* code, e.g., by calling the code entry as cast to: * code, by calling the code entry as cast to a function with the signature:
* int (*match)(String* input_string, * int (*match)(String* input_string,
* Address start, * Address start,
* Address end, * Address end,
* int* capture_output_array, * int* capture_output_array,
* bool at_start, * bool at_start,
* byte* stack_area_top) * byte* stack_area_base)
*/ */
#define __ ACCESS_MASM(masm_) #define __ ACCESS_MASM(masm_)
...@@ -93,7 +93,6 @@ RegExpMacroAssemblerIA32::RegExpMacroAssemblerIA32( ...@@ -93,7 +93,6 @@ RegExpMacroAssemblerIA32::RegExpMacroAssemblerIA32(
Mode mode, Mode mode,
int registers_to_save) int registers_to_save)
: masm_(new MacroAssembler(NULL, kRegExpCodeSize)), : masm_(new MacroAssembler(NULL, kRegExpCodeSize)),
constants_(kRegExpConstantsSize),
mode_(mode), mode_(mode),
num_registers_(registers_to_save), num_registers_(registers_to_save),
num_saved_registers_(registers_to_save), num_saved_registers_(registers_to_save),
...@@ -156,13 +155,6 @@ void RegExpMacroAssemblerIA32::Bind(Label* label) { ...@@ -156,13 +155,6 @@ void RegExpMacroAssemblerIA32::Bind(Label* label) {
} }
void RegExpMacroAssemblerIA32::CheckBitmap(uc16 start,
Label* bitmap,
Label* on_zero) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIA32::CheckCharacter(uint32_t c, Label* on_equal) { void RegExpMacroAssemblerIA32::CheckCharacter(uint32_t c, Label* on_equal) {
__ cmp(current_character(), c); __ cmp(current_character(), c);
BranchOrBacktrack(equal, on_equal); BranchOrBacktrack(equal, on_equal);
...@@ -217,15 +209,9 @@ void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str, ...@@ -217,15 +209,9 @@ void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
BranchOrBacktrack(greater, on_failure); BranchOrBacktrack(greater, on_failure);
} }
Label backtrack;
if (on_failure == NULL) { if (on_failure == NULL) {
// Avoid inlining the Backtrack macro for each test. // Instead of inlining a backtrack, (re)use the global backtrack target.
Label skip_backtrack; on_failure = &backtrack_label_;
__ jmp(&skip_backtrack);
__ bind(&backtrack);
Backtrack();
__ bind(&skip_backtrack);
on_failure = &backtrack;
} }
for (int i = 0; i < str.length(); i++) { for (int i = 0; i < str.length(); i++) {
...@@ -581,34 +567,6 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type, ...@@ -581,34 +567,6 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
} }
} }
void RegExpMacroAssemblerIA32::DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIA32::DispatchByteMap(
uc16 start,
Label* byte_map,
const Vector<Label*>& destinations) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIA32::DispatchHighByteMap(
byte start,
Label* byte_map,
const Vector<Label*>& destinations) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIA32::EmitOrLink(Label* label) {
UNIMPLEMENTED(); // Has no use.
}
void RegExpMacroAssemblerIA32::Fail() { void RegExpMacroAssemblerIA32::Fail() {
ASSERT(FAILURE == 0); // Return value for failure is zero. ASSERT(FAILURE == 0); // Return value for failure is zero.
...@@ -668,17 +626,17 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) { ...@@ -668,17 +626,17 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
__ mov(edi, Operand(ebp, kInputStart)); __ mov(edi, Operand(ebp, kInputStart));
// Set up edi to be negative offset from string end. // Set up edi to be negative offset from string end.
__ sub(edi, Operand(esi)); __ sub(edi, Operand(esi));
if (num_saved_registers_ > 0) { // Set eax to address of char before start of input
// (effectively string position -1).
__ lea(eax, Operand(edi, -char_size()));
// Store this value in a local variable, for use when clearing
// position registers.
__ mov(Operand(ebp, kInputStartMinusOne), eax);
if (num_saved_registers_ > 0) { // Always is, if generated from a regexp.
// Fill saved registers with initial value = start offset - 1 // Fill saved registers with initial value = start offset - 1
// Fill in stack push order, to avoid accessing across an unwritten // Fill in stack push order, to avoid accessing across an unwritten
// page (a problem on Windows). // page (a problem on Windows).
__ mov(ecx, kRegisterZero); __ mov(ecx, kRegisterZero);
// Set eax to address of char before start of input
// (effectively string position -1).
__ lea(eax, Operand(edi, -char_size()));
// Store this value in a local variable, for use when clearing
// position registers.
__ mov(Operand(ebp, kInputStartMinusOne), eax);
Label init_loop; Label init_loop;
__ bind(&init_loop); __ bind(&init_loop);
__ mov(Operand(ebp, ecx, times_1, +0), eax); __ mov(Operand(ebp, ecx, times_1, +0), eax);
...@@ -942,139 +900,8 @@ void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) { ...@@ -942,139 +900,8 @@ void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
} }
RegExpMacroAssemblerIA32::Result RegExpMacroAssemblerIA32::Match(
Handle<Code> regexp_code,
Handle<String> subject,
int* offsets_vector,
int offsets_vector_length,
int previous_index) {
ASSERT(subject->IsFlat());
ASSERT(previous_index >= 0);
ASSERT(previous_index <= subject->length());
// No allocations before calling the regexp, but we can't use
// AssertNoAllocation, since regexps might be preempted, and another thread
// might do allocation anyway.
String* subject_ptr = *subject;
// Character offsets into string.
int start_offset = previous_index;
int end_offset = subject_ptr->length();
bool is_ascii = subject->IsAsciiRepresentation();
if (StringShape(subject_ptr).IsCons()) {
subject_ptr = ConsString::cast(subject_ptr)->first();
} else if (StringShape(subject_ptr).IsSliced()) {
SlicedString* slice = SlicedString::cast(subject_ptr);
start_offset += slice->start();
end_offset += slice->start();
subject_ptr = slice->buffer();
}
// Ensure that an underlying string has the same ascii-ness.
ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii);
ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
// String is now either Sequential or External
int char_size_shift = is_ascii ? 0 : 1;
int char_length = end_offset - start_offset;
const byte* input_start =
StringCharacterPosition(subject_ptr, start_offset);
int byte_length = char_length << char_size_shift;
const byte* input_end = input_start + byte_length;
RegExpMacroAssemblerIA32::Result res = Execute(*regexp_code,
subject_ptr,
start_offset,
input_start,
input_end,
offsets_vector,
previous_index == 0);
if (res == SUCCESS) {
// Capture values are relative to start_offset only.
// Convert them to be relative to start of string.
for (int i = 0; i < offsets_vector_length; i++) {
if (offsets_vector[i] >= 0) {
offsets_vector[i] += previous_index;
}
}
}
return res;
}
// Private methods: // Private methods:
static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
RegExpMacroAssemblerIA32::Result RegExpMacroAssemblerIA32::Execute(
Code* code,
String* input,
int start_offset,
const byte* input_start,
const byte* input_end,
int* output,
bool at_start) {
typedef int (*matcher)(String*, int, const byte*,
const byte*, int*, int, Address);
matcher matcher_func = FUNCTION_CAST<matcher>(code->entry());
int at_start_val = at_start ? 1 : 0;
// Ensure that the minimum stack has been allocated.
RegExpStack stack;
Address stack_top = RegExpStack::stack_top();
int result = matcher_func(input,
start_offset,
input_start,
input_end,
output,
at_start_val,
stack_top);
ASSERT(result <= SUCCESS);
ASSERT(result >= RETRY);
if (result == EXCEPTION && !Top::has_pending_exception()) {
// We detected a stack overflow (on the backtrack stack) in RegExp code,
// but haven't created the exception yet.
Top::StackOverflow();
}
return static_cast<Result>(result);
}
int RegExpMacroAssemblerIA32::CaseInsensitiveCompareUC16(Address byte_offset1,
Address byte_offset2,
size_t byte_length) {
// This function is not allowed to cause a garbage collection.
// A GC might move the calling generated code and invalidate the
// return address on the stack.
ASSERT(byte_length % 2 == 0);
uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
size_t length = byte_length >> 1;
for (size_t i = 0; i < length; i++) {
unibrow::uchar c1 = substring1[i];
unibrow::uchar c2 = substring2[i];
if (c1 != c2) {
unibrow::uchar s1[1] = { c1 };
canonicalize.get(c1, '\0', s1);
if (s1[0] != c2) {
unibrow::uchar s2[1] = { c2 };
canonicalize.get(c2, '\0', s2);
if (s1[0] != s2[0]) {
return 0;
}
}
}
}
return 1;
}
void RegExpMacroAssemblerIA32::CallCheckStackGuardState(Register scratch) { void RegExpMacroAssemblerIA32::CallCheckStackGuardState(Register scratch) {
int num_arguments = 3; int num_arguments = 3;
FrameAlign(num_arguments, scratch); FrameAlign(num_arguments, scratch);
...@@ -1096,35 +923,6 @@ static T& frame_entry(Address re_frame, int frame_offset) { ...@@ -1096,35 +923,6 @@ static T& frame_entry(Address re_frame, int frame_offset) {
} }
const byte* RegExpMacroAssemblerIA32::StringCharacterPosition(String* subject,
int start_index) {
// Not just flat, but ultra flat.
ASSERT(subject->IsExternalString() || subject->IsSeqString());
ASSERT(start_index >= 0);
ASSERT(start_index <= subject->length());
if (subject->IsAsciiRepresentation()) {
const byte* address;
if (StringShape(subject).IsExternal()) {
const char* data = ExternalAsciiString::cast(subject)->resource()->data();
address = reinterpret_cast<const byte*>(data);
} else {
ASSERT(subject->IsSeqAsciiString());
char* data = SeqAsciiString::cast(subject)->GetChars();
address = reinterpret_cast<const byte*>(data);
}
return address + start_index;
}
const uc16* data;
if (StringShape(subject).IsExternal()) {
data = ExternalTwoByteString::cast(subject)->resource()->data();
} else {
ASSERT(subject->IsSeqTwoByteString());
data = SeqTwoByteString::cast(subject)->GetChars();
}
return reinterpret_cast<const byte*>(data + start_index);
}
int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address, int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
Code* re_code, Code* re_code,
Address re_frame) { Address re_frame) {
...@@ -1198,18 +996,18 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address, ...@@ -1198,18 +996,18 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
Address RegExpMacroAssemblerIA32::GrowStack(Address stack_pointer, Address RegExpMacroAssemblerIA32::GrowStack(Address stack_pointer,
Address* stack_top) { Address* stack_base) {
size_t size = RegExpStack::stack_capacity(); size_t size = RegExpStack::stack_capacity();
Address old_stack_top = RegExpStack::stack_top(); Address old_stack_base = RegExpStack::stack_base();
ASSERT(old_stack_top == *stack_top); ASSERT(old_stack_base == *stack_base);
ASSERT(stack_pointer <= old_stack_top); ASSERT(stack_pointer <= old_stack_base);
ASSERT(static_cast<size_t>(old_stack_top - stack_pointer) <= size); ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
Address new_stack_top = RegExpStack::EnsureCapacity(size * 2); Address new_stack_base = RegExpStack::EnsureCapacity(size * 2);
if (new_stack_top == NULL) { if (new_stack_base == NULL) {
return NULL; return NULL;
} }
*stack_top = new_stack_top; *stack_base = new_stack_base;
return new_stack_top - (old_stack_top - stack_pointer); return new_stack_base - (old_stack_base - stack_pointer);
} }
...@@ -1373,11 +1171,5 @@ void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset, ...@@ -1373,11 +1171,5 @@ void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset,
} }
void RegExpMacroAssemblerIA32::LoadConstantBufferAddress(Register reg,
ArraySlice* buffer) {
__ mov(reg, buffer->array());
__ add(Operand(reg), Immediate(buffer->base_offset()));
}
#undef __ #undef __
}} // namespace v8::internal }} // namespace v8::internal
...@@ -31,21 +31,8 @@ ...@@ -31,21 +31,8 @@
namespace v8 { namespace v8 {
namespace internal { namespace internal {
class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { class RegExpMacroAssemblerIA32: public NativeRegExpMacroAssembler {
public: public:
// Type of input string to generate code for.
enum Mode { ASCII = 1, UC16 = 2 };
// Result of calling the generated RegExp code:
// RETRY: Something significant changed during execution, and the matching
// should be retried from scratch.
// EXCEPTION: Something failed during execution. If no exception has been
// thrown, it's an internal out-of-memory, and the caller should
// throw the exception.
// FAILURE: Matching failed.
// SUCCESS: Matching succeeded, and the output array has been filled with
// capture positions.
enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
RegExpMacroAssemblerIA32(Mode mode, int registers_to_save); RegExpMacroAssemblerIA32(Mode mode, int registers_to_save);
virtual ~RegExpMacroAssemblerIA32(); virtual ~RegExpMacroAssemblerIA32();
virtual int stack_limit_slack(); virtual int stack_limit_slack();
...@@ -54,7 +41,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { ...@@ -54,7 +41,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void Backtrack(); virtual void Backtrack();
virtual void Bind(Label* label); virtual void Bind(Label* label);
virtual void CheckAtStart(Label* on_at_start); virtual void CheckAtStart(Label* on_at_start);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void CheckCharacter(uint32_t c, Label* on_equal); virtual void CheckCharacter(uint32_t c, Label* on_equal);
virtual void CheckCharacterAfterAnd(uint32_t c, virtual void CheckCharacterAfterAnd(uint32_t c,
uint32_t mask, uint32_t mask,
...@@ -88,16 +74,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { ...@@ -88,16 +74,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
int cp_offset, int cp_offset,
bool check_offset, bool check_offset,
Label* on_no_match); Label* on_no_match);
virtual void DispatchByteMap(uc16 start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void DispatchHalfNibbleMap(uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations);
virtual void DispatchHighByteMap(byte start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void EmitOrLink(Label* label);
virtual void Fail(); virtual void Fail();
virtual Handle<Object> GetCode(Handle<String> source); virtual Handle<Object> GetCode(Handle<String> source);
virtual void GoTo(Label* label); virtual void GoTo(Label* label);
...@@ -123,20 +99,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { ...@@ -123,20 +99,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void ClearRegisters(int reg_from, int reg_to); virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg); virtual void WriteStackPointerToRegister(int reg);
static Result Match(Handle<Code> regexp,
Handle<String> subject,
int* offsets_vector,
int offsets_vector_length,
int previous_index);
static Result Execute(Code* code,
String* input,
int start_offset,
const byte* input_start,
const byte* input_end,
int* output,
bool at_start);
private: private:
// Offsets from ebp of function parameters and stored registers. // Offsets from ebp of function parameters and stored registers.
static const int kFramePointer = 0; static const int kFramePointer = 0;
...@@ -163,16 +125,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { ...@@ -163,16 +125,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
// Initial size of code buffer. // Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024; static const size_t kRegExpCodeSize = 1024;
// Initial size of constant buffers allocated during compilation.
static const int kRegExpConstantsSize = 256;
static const byte* StringCharacterPosition(String* subject, int start_index);
// Compares two-byte strings case insensitively.
// Called from generated RegExp code.
static int CaseInsensitiveCompareUC16(Address byte_offset1,
Address byte_offset2,
size_t byte_length);
// Load a number of characters at the given offset from the // Load a number of characters at the given offset from the
// current position, into the current-character register. // current position, into the current-character register.
...@@ -218,11 +170,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { ...@@ -218,11 +170,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
// is NULL, in which case it is a conditional Backtrack. // is NULL, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Condition condition, Label* to, Hint hint = no_hint); void BranchOrBacktrack(Condition condition, Label* to, Hint hint = no_hint);
// Load the address of a "constant buffer" (a slice of a byte array)
// into a register. The address is computed from the ByteArray* address
// and an offset. Uses no extra registers.
void LoadConstantBufferAddress(Register reg, ArraySlice* buffer);
// Call and return internally in the generated code in a way that // Call and return internally in the generated code in a way that
// is GC-safe (i.e., doesn't leave absolute code addresses on the stack) // is GC-safe (i.e., doesn't leave absolute code addresses on the stack)
inline void SafeCall(Label* to); inline void SafeCall(Label* to);
...@@ -258,10 +205,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { ...@@ -258,10 +205,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
MacroAssembler* masm_; MacroAssembler* masm_;
// Constant buffer provider. Allocates external storage for storing
// constants.
ByteArrayProvider constants_;
// Which mode to generate code for (ASCII or UC16). // Which mode to generate code for (ASCII or UC16).
Mode mode_; Mode mode_;
......
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include "regexp-macro-assembler-irregexp.h" #include "regexp-macro-assembler-irregexp.h"
#include "regexp-stack.h" #include "regexp-stack.h"
#ifdef V8_NATIVE_REGEXP
#if V8_TARGET_ARCH_IA32 #if V8_TARGET_ARCH_IA32
#include "ia32/macro-assembler-ia32.h" #include "ia32/macro-assembler-ia32.h"
#include "ia32/regexp-macro-assembler-ia32.h" #include "ia32/regexp-macro-assembler-ia32.h"
...@@ -54,6 +55,7 @@ ...@@ -54,6 +55,7 @@
#else #else
#error Unsupported target architecture. #error Unsupported target architecture.
#endif #endif
#endif
#include "interpreter-irregexp.h" #include "interpreter-irregexp.h"
...@@ -270,10 +272,11 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, ...@@ -270,10 +272,11 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
// If compilation fails, an exception is thrown and this function // If compilation fails, an exception is thrown and this function
// returns false. // returns false.
bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) { bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) {
Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii));
#ifdef V8_NATIVE_REGEXP #ifdef V8_NATIVE_REGEXP
if (re->DataAt(JSRegExp::code_index(is_ascii))->IsCode()) return true; if (compiled_code->IsCode()) return true;
#else // ! V8_NATIVE_REGEXP (RegExp interpreter code) #else // ! V8_NATIVE_REGEXP (RegExp interpreter code)
if (re->DataAt(JSRegExp::code_index(is_ascii))->IsByteArray()) return true; if (compiled_code->IsByteArray()) return true;
#endif #endif
return CompileIrregexp(re, is_ascii); return CompileIrregexp(re, is_ascii);
} }
...@@ -414,33 +417,36 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, ...@@ -414,33 +417,36 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
// Dispatch to the correct RegExp implementation. // Dispatch to the correct RegExp implementation.
Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data())); Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data()));
#ifdef V8_NATIVE_REGEXP #ifdef V8_NATIVE_REGEXP
#if V8_TARGET_ARCH_IA32 #ifdef V8_TARGET_ARCH_ARM
UNIMPLEMENTED();
#else // Native regexp supported.
OffsetsVector captures(number_of_capture_registers); OffsetsVector captures(number_of_capture_registers);
int* captures_vector = captures.vector(); int* captures_vector = captures.vector();
RegExpMacroAssemblerIA32::Result res; NativeRegExpMacroAssembler::Result res;
do { do {
bool is_ascii = subject->IsAsciiRepresentation(); bool is_ascii = subject->IsAsciiRepresentation();
if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) { if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
return Handle<Object>::null(); return Handle<Object>::null();
} }
Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii)); Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii));
res = RegExpMacroAssemblerIA32::Match(code, res = NativeRegExpMacroAssembler::Match(code,
subject, subject,
captures_vector, captures_vector,
captures.length(), captures.length(),
previous_index); previous_index);
// If result is RETRY, the string have changed representation, and we // If result is RETRY, the string have changed representation, and we
// must restart from scratch. // must restart from scratch.
} while (res == RegExpMacroAssemblerIA32::RETRY); } while (res == NativeRegExpMacroAssembler::RETRY);
if (res == RegExpMacroAssemblerIA32::EXCEPTION) { if (res == NativeRegExpMacroAssembler::EXCEPTION) {
ASSERT(Top::has_pending_exception()); ASSERT(Top::has_pending_exception());
return Handle<Object>::null(); return Handle<Object>::null();
} }
ASSERT(res == RegExpMacroAssemblerIA32::SUCCESS ASSERT(res == NativeRegExpMacroAssembler::SUCCESS
|| res == RegExpMacroAssemblerIA32::FAILURE); || res == NativeRegExpMacroAssembler::FAILURE);
if (res != RegExpMacroAssemblerIA32::SUCCESS) return Factory::null_value(); if (res != NativeRegExpMacroAssembler::SUCCESS) return Factory::null_value();
array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements())); array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements()));
ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
...@@ -449,10 +455,9 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, ...@@ -449,10 +455,9 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
SetCapture(*array, i, captures_vector[i]); SetCapture(*array, i, captures_vector[i]);
SetCapture(*array, i + 1, captures_vector[i + 1]); SetCapture(*array, i + 1, captures_vector[i + 1]);
} }
#else // !V8_TARGET_ARCH_IA32 #endif // Native regexp supported.
UNREACHABLE();
#endif // V8_TARGET_ARCH_IA32 #else // ! V8_NATIVE_REGEXP
#else // !V8_NATIVE_REGEXP
bool is_ascii = subject->IsAsciiRepresentation(); bool is_ascii = subject->IsAsciiRepresentation();
if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) { if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
return Handle<Object>::null(); return Handle<Object>::null();
...@@ -4457,38 +4462,36 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data, ...@@ -4457,38 +4462,36 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data,
NodeInfo info = *node->info(); NodeInfo info = *node->info();
// Create the correct assembler for the architecture.
#ifdef V8_NATIVE_REGEXP #ifdef V8_NATIVE_REGEXP
#ifdef V8_TARGET_ARCH_ARM // Native regexp implementation.
// ARM native regexp not implemented yet.
UNREACHABLE(); NativeRegExpMacroAssembler::Mode mode =
#endif is_ascii ? NativeRegExpMacroAssembler::ASCII
#ifdef V8_TARGET_ARCH_X64 : NativeRegExpMacroAssembler::UC16;
// X64 native regexp not implemented yet.
UNREACHABLE();
#endif
#ifdef V8_TARGET_ARCH_IA32 #ifdef V8_TARGET_ARCH_IA32
RegExpMacroAssemblerIA32::Mode mode;
if (is_ascii) {
mode = RegExpMacroAssemblerIA32::ASCII;
} else {
mode = RegExpMacroAssemblerIA32::UC16;
}
RegExpMacroAssemblerIA32 macro_assembler(mode, RegExpMacroAssemblerIA32 macro_assembler(mode,
(data->capture_count + 1) * 2); (data->capture_count + 1) * 2);
return compiler.Assemble(&macro_assembler,
node,
data->capture_count,
pattern);
#endif #endif
#ifdef V8_TARGET_ARCH_X64
RegExpMacroAssemblerX64 macro_assembler(mode,
(data->capture_count + 1) * 2);
#endif
#ifdef V8_TARGET_ARCH_ARM
UNIMPLEMENTED();
#endif
#else // ! V8_NATIVE_REGEXP #else // ! V8_NATIVE_REGEXP
// Interpreted regexp. // Interpreted regexp implementation.
EmbeddedVector<byte, 1024> codes; EmbeddedVector<byte, 1024> codes;
RegExpMacroAssemblerIrregexp macro_assembler(codes); RegExpMacroAssemblerIrregexp macro_assembler(codes);
#endif
return compiler.Assemble(&macro_assembler, return compiler.Assemble(&macro_assembler,
node, node,
data->capture_count, data->capture_count,
pattern); pattern);
#endif // V8_NATIVE_REGEXP
} }
}} // namespace v8::internal }} // namespace v8::internal
...@@ -375,37 +375,6 @@ void RegExpMacroAssemblerIrregexp::CheckNotRegistersEqual(int reg1, ...@@ -375,37 +375,6 @@ void RegExpMacroAssemblerIrregexp::CheckNotRegistersEqual(int reg1,
} }
void RegExpMacroAssemblerIrregexp::CheckBitmap(uc16 start,
Label* bitmap,
Label* on_zero) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIrregexp::DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& table) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIrregexp::DispatchByteMap(
uc16 start,
Label* byte_map,
const Vector<Label*>& table) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIrregexp::DispatchHighByteMap(
byte start,
Label* byte_map,
const Vector<Label*>& table) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIrregexp::CheckCharacters( void RegExpMacroAssemblerIrregexp::CheckCharacters(
Vector<const uc16> str, Vector<const uc16> str,
int cp_offset, int cp_offset,
......
...@@ -52,7 +52,6 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { ...@@ -52,7 +52,6 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
// The byte-code interpreter checks on each push anyway. // The byte-code interpreter checks on each push anyway.
virtual int stack_limit_slack() { return 1; } virtual int stack_limit_slack() { return 1; }
virtual void Bind(Label* label); virtual void Bind(Label* label);
virtual void EmitOrLink(Label* label);
virtual void AdvanceCurrentPosition(int by); // Signed cp change. virtual void AdvanceCurrentPosition(int by); // Signed cp change.
virtual void PopCurrentPosition(); virtual void PopCurrentPosition();
virtual void PushCurrentPosition(); virtual void PushCurrentPosition();
...@@ -100,16 +99,6 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { ...@@ -100,16 +99,6 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
int cp_offset, int cp_offset,
Label* on_failure, Label* on_failure,
bool check_end_of_string); bool check_end_of_string);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void DispatchHalfNibbleMap(uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations);
virtual void DispatchByteMap(uc16 start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void DispatchHighByteMap(byte start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void IfRegisterLT(int register_index, int comparand, Label* if_lt); virtual void IfRegisterLT(int register_index, int comparand, Label* if_lt);
virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge); virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
virtual void IfRegisterEqPos(int register_index, Label* if_eq); virtual void IfRegisterEqPos(int register_index, Label* if_eq);
...@@ -119,6 +108,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { ...@@ -119,6 +108,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
private: private:
void Expand(); void Expand();
// Code and bitmap emission. // Code and bitmap emission.
inline void EmitOrLink(Label* label);
inline void Emit32(uint32_t x); inline void Emit32(uint32_t x);
inline void Emit16(uint32_t x); inline void Emit16(uint32_t x);
inline void Emit(uint32_t bc, uint32_t arg); inline void Emit(uint32_t bc, uint32_t arg);
......
...@@ -53,12 +53,6 @@ void RegExpMacroAssemblerTracer::Bind(Label* label) { ...@@ -53,12 +53,6 @@ void RegExpMacroAssemblerTracer::Bind(Label* label) {
} }
void RegExpMacroAssemblerTracer::EmitOrLink(Label* label) {
PrintF(" EmitOrLink(label[%08x]);\n", label);
assembler_->EmitOrLink(label);
}
void RegExpMacroAssemblerTracer::AdvanceCurrentPosition(int by) { void RegExpMacroAssemblerTracer::AdvanceCurrentPosition(int by) {
PrintF(" AdvanceCurrentPosition(by=%d);\n", by); PrintF(" AdvanceCurrentPosition(by=%d);\n", by);
assembler_->AdvanceCurrentPosition(by); assembler_->AdvanceCurrentPosition(by);
...@@ -311,13 +305,6 @@ void RegExpMacroAssemblerTracer::CheckCharacters(Vector<const uc16> str, ...@@ -311,13 +305,6 @@ void RegExpMacroAssemblerTracer::CheckCharacters(Vector<const uc16> str,
} }
void RegExpMacroAssemblerTracer::CheckBitmap(uc16 start, Label* bitmap,
Label* on_zero) {
PrintF(" CheckBitmap(start=u%04x, <bitmap>, label[%08x]);\n", start, on_zero);
assembler_->CheckBitmap(start, bitmap, on_zero);
}
bool RegExpMacroAssemblerTracer::CheckSpecialCharacterClass( bool RegExpMacroAssemblerTracer::CheckSpecialCharacterClass(
uc16 type, uc16 type,
int cp_offset, int cp_offset,
...@@ -338,51 +325,6 @@ bool RegExpMacroAssemblerTracer::CheckSpecialCharacterClass( ...@@ -338,51 +325,6 @@ bool RegExpMacroAssemblerTracer::CheckSpecialCharacterClass(
} }
void RegExpMacroAssemblerTracer::DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations) {
PrintF(" DispatchHalfNibbleMap(start=u%04x, <half_nibble_map>, [", start);
for (int i = 0; i < destinations.length(); i++) {
if (i > 0)
PrintF(", ");
PrintF("label[%08x]", destinations[i]);
}
PrintF(");\n");
assembler_->DispatchHalfNibbleMap(start, half_nibble_map, destinations);
}
void RegExpMacroAssemblerTracer::DispatchByteMap(
uc16 start,
Label* byte_map,
const Vector<Label*>& destinations) {
PrintF(" DispatchByteMap(start=u%04x, <byte_map>, [", start);
for (int i = 0; i < destinations.length(); i++) {
if (i > 0)
PrintF(", ");
PrintF("label[%08x]", destinations[i]);
}
PrintF(");\n");
assembler_->DispatchByteMap(start, byte_map, destinations);
}
void RegExpMacroAssemblerTracer::DispatchHighByteMap(
byte start,
Label* byte_map,
const Vector<Label*>& destinations) {
PrintF(" DispatchHighByteMap(start=u%04x, <byte_map>, [", start);
for (int i = 0; i < destinations.length(); i++) {
if (i > 0)
PrintF(", ");
PrintF("label[%08x]", destinations[i]);
}
PrintF(");\n");
assembler_->DispatchHighByteMap(start, byte_map, destinations);
}
void RegExpMacroAssemblerTracer::IfRegisterLT(int register_index, void RegExpMacroAssemblerTracer::IfRegisterLT(int register_index,
int comparand, Label* if_lt) { int comparand, Label* if_lt) {
PrintF(" IfRegisterLT(register=%d, number=%d, label[%08x]);\n", PrintF(" IfRegisterLT(register=%d, number=%d, label[%08x]);\n",
......
...@@ -43,7 +43,6 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler { ...@@ -43,7 +43,6 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void Backtrack(); virtual void Backtrack();
virtual void Bind(Label* label); virtual void Bind(Label* label);
virtual void CheckAtStart(Label* on_at_start); virtual void CheckAtStart(Label* on_at_start);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void CheckCharacter(uint32_t c, Label* on_equal); virtual void CheckCharacter(uint32_t c, Label* on_equal);
virtual void CheckCharacterAfterAnd(uint32_t c, virtual void CheckCharacterAfterAnd(uint32_t c,
uint32_t and_with, uint32_t and_with,
...@@ -73,19 +72,6 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler { ...@@ -73,19 +72,6 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
int cp_offset, int cp_offset,
bool check_offset, bool check_offset,
Label* on_no_match); Label* on_no_match);
virtual void DispatchByteMap(
uc16 start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations);
virtual void DispatchHighByteMap(
byte start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void EmitOrLink(Label* label);
virtual void Fail(); virtual void Fail();
virtual Handle<Object> GetCode(Handle<String> source); virtual Handle<Object> GetCode(Handle<String> source);
virtual void GoTo(Label* label); virtual void GoTo(Label* label);
......
...@@ -25,10 +25,10 @@ ...@@ -25,10 +25,10 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <string.h>
#include "v8.h" #include "v8.h"
#include "ast.h" #include "ast.h"
#include "assembler.h" #include "assembler.h"
#include "regexp-stack.h"
#include "regexp-macro-assembler.h" #include "regexp-macro-assembler.h"
namespace v8 { namespace v8 {
...@@ -42,38 +42,176 @@ RegExpMacroAssembler::~RegExpMacroAssembler() { ...@@ -42,38 +42,176 @@ RegExpMacroAssembler::~RegExpMacroAssembler() {
} }
ByteArrayProvider::ByteArrayProvider(unsigned int initial_size) #ifdef V8_NATIVE_REGEXP // Avoid unused code, e.g., on ARM.
: byte_array_size_(initial_size),
current_byte_array_(), NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() {
current_byte_array_free_offset_(initial_size) {} }
ArraySlice ByteArrayProvider::GetBuffer(unsigned int size, NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
unsigned int elem_size) { }
ASSERT(size > 0);
size_t byte_size = size * elem_size;
int free_offset = current_byte_array_free_offset_;
// align elements
free_offset += elem_size - 1;
free_offset = free_offset - (free_offset % elem_size);
if (free_offset + byte_size > byte_array_size_) { const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
if (byte_size > (byte_array_size_ / 2)) { String* subject,
Handle<ByteArray> solo_buffer(Factory::NewByteArray(byte_size, TENURED)); int start_index) {
return ArraySlice(solo_buffer, 0); // Not just flat, but ultra flat.
ASSERT(subject->IsExternalString() || subject->IsSeqString());
ASSERT(start_index >= 0);
ASSERT(start_index <= subject->length());
if (subject->IsAsciiRepresentation()) {
const byte* address;
if (StringShape(subject).IsExternal()) {
const char* data = ExternalAsciiString::cast(subject)->resource()->data();
address = reinterpret_cast<const byte*>(data);
} else {
ASSERT(subject->IsSeqAsciiString());
char* data = SeqAsciiString::cast(subject)->GetChars();
address = reinterpret_cast<const byte*>(data);
} }
current_byte_array_ = Factory::NewByteArray(byte_array_size_, TENURED); return address + start_index;
free_offset = 0; }
const uc16* data;
if (StringShape(subject).IsExternal()) {
data = ExternalTwoByteString::cast(subject)->resource()->data();
} else {
ASSERT(subject->IsSeqTwoByteString());
data = SeqTwoByteString::cast(subject)->GetChars();
} }
current_byte_array_free_offset_ = free_offset + byte_size; return reinterpret_cast<const byte*>(data + start_index);
return ArraySlice(current_byte_array_, free_offset);
} }
template <typename T> NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
ArraySlice ByteArrayProvider::GetBuffer(Vector<T> values) { Handle<Code> regexp_code,
ArraySlice slice = GetBuffer(values.length(), sizeof(T)); Handle<String> subject,
memcpy(slice.location(), values.start(), values.length() * sizeof(T)); int* offsets_vector,
return slice; int offsets_vector_length,
int previous_index) {
ASSERT(subject->IsFlat());
ASSERT(previous_index >= 0);
ASSERT(previous_index <= subject->length());
// No allocations before calling the regexp, but we can't use
// AssertNoAllocation, since regexps might be preempted, and another thread
// might do allocation anyway.
String* subject_ptr = *subject;
// Character offsets into string.
int start_offset = previous_index;
int end_offset = subject_ptr->length();
bool is_ascii = subject->IsAsciiRepresentation();
if (StringShape(subject_ptr).IsCons()) {
subject_ptr = ConsString::cast(subject_ptr)->first();
} else if (StringShape(subject_ptr).IsSliced()) {
SlicedString* slice = SlicedString::cast(subject_ptr);
start_offset += slice->start();
end_offset += slice->start();
subject_ptr = slice->buffer();
}
// Ensure that an underlying string has the same ascii-ness.
ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii);
ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
// String is now either Sequential or External
int char_size_shift = is_ascii ? 0 : 1;
int char_length = end_offset - start_offset;
const byte* input_start =
StringCharacterPosition(subject_ptr, start_offset);
int byte_length = char_length << char_size_shift;
const byte* input_end = input_start + byte_length;
Result res = Execute(*regexp_code,
subject_ptr,
start_offset,
input_start,
input_end,
offsets_vector,
previous_index == 0);
if (res == SUCCESS) {
// Capture values are relative to start_offset only.
// Convert them to be relative to start of string.
for (int i = 0; i < offsets_vector_length; i++) {
if (offsets_vector[i] >= 0) {
offsets_vector[i] += previous_index;
}
}
}
return res;
} }
NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
Code* code,
String* input,
int start_offset,
const byte* input_start,
const byte* input_end,
int* output,
bool at_start) {
typedef int (*matcher)(String*, int, const byte*,
const byte*, int*, int, Address);
matcher matcher_func = FUNCTION_CAST<matcher>(code->entry());
int at_start_val = at_start ? 1 : 0;
// Ensure that the minimum stack has been allocated.
RegExpStack stack;
Address stack_base = RegExpStack::stack_base();
int result = matcher_func(input,
start_offset,
input_start,
input_end,
output,
at_start_val,
stack_base);
ASSERT(result <= SUCCESS);
ASSERT(result >= RETRY);
if (result == EXCEPTION && !Top::has_pending_exception()) {
// We detected a stack overflow (on the backtrack stack) in RegExp code,
// but haven't created the exception yet.
Top::StackOverflow();
}
return static_cast<Result>(result);
}
static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
Address byte_offset1,
Address byte_offset2,
size_t byte_length) {
// This function is not allowed to cause a garbage collection.
// A GC might move the calling generated code and invalidate the
// return address on the stack.
ASSERT(byte_length % 2 == 0);
uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
size_t length = byte_length >> 1;
for (size_t i = 0; i < length; i++) {
unibrow::uchar c1 = substring1[i];
unibrow::uchar c2 = substring2[i];
if (c1 != c2) {
unibrow::uchar s1[1] = { c1 };
canonicalize.get(c1, '\0', s1);
if (s1[0] != c2) {
unibrow::uchar s2[1] = { c2 };
canonicalize.get(c2, '\0', s2);
if (s1[0] != s2[0]) {
return 0;
}
}
}
}
return 1;
}
#endif // V8_NATIVE_REGEXP
} } // namespace v8::internal } } // namespace v8::internal
...@@ -46,6 +46,7 @@ class RegExpMacroAssembler { ...@@ -46,6 +46,7 @@ class RegExpMacroAssembler {
enum IrregexpImplementation { enum IrregexpImplementation {
kIA32Implementation, kIA32Implementation,
kARMImplementation, kARMImplementation,
kX64Implementation,
kBytecodeImplementation kBytecodeImplementation
}; };
...@@ -67,12 +68,6 @@ class RegExpMacroAssembler { ...@@ -67,12 +68,6 @@ class RegExpMacroAssembler {
virtual void Backtrack() = 0; virtual void Backtrack() = 0;
virtual void Bind(Label* label) = 0; virtual void Bind(Label* label) = 0;
virtual void CheckAtStart(Label* on_at_start) = 0; virtual void CheckAtStart(Label* on_at_start) = 0;
// Check the current character against a bitmap. The range of the current
// character must be from start to start + length_of_bitmap_in_bits.
virtual void CheckBitmap(
uc16 start, // The bitmap is indexed from this character.
Label* bitmap, // Where the bitmap is emitted.
Label* on_zero) = 0; // Where to go if the bit is 0. Fall through on 1.
// Dispatch after looking the current character up in a 2-bits-per-entry // Dispatch after looking the current character up in a 2-bits-per-entry
// map. The destinations vector has up to 4 labels. // map. The destinations vector has up to 4 labels.
virtual void CheckCharacter(uint32_t c, Label* on_equal) = 0; virtual void CheckCharacter(uint32_t c, Label* on_equal) = 0;
...@@ -132,23 +127,6 @@ class RegExpMacroAssembler { ...@@ -132,23 +127,6 @@ class RegExpMacroAssembler {
Label* on_no_match) { Label* on_no_match) {
return false; return false;
} }
// Dispatch after looking the current character up in a byte map. The
// destinations vector has up to 256 labels.
virtual void DispatchByteMap(
uc16 start,
Label* byte_map,
const Vector<Label*>& destinations) = 0;
virtual void DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations) = 0;
// Dispatch after looking the high byte of the current character up in a byte
// map. The destinations vector has up to 256 labels.
virtual void DispatchHighByteMap(
byte start,
Label* byte_map,
const Vector<Label*>& destinations) = 0;
virtual void EmitOrLink(Label* label) = 0;
virtual void Fail() = 0; virtual void Fail() = 0;
virtual Handle<Object> GetCode(Handle<String> source) = 0; virtual Handle<Object> GetCode(Handle<String> source) = 0;
virtual void GoTo(Label* label) = 0; virtual void GoTo(Label* label) = 0;
...@@ -181,51 +159,53 @@ class RegExpMacroAssembler { ...@@ -181,51 +159,53 @@ class RegExpMacroAssembler {
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0; virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
virtual void ClearRegisters(int reg_from, int reg_to) = 0; virtual void ClearRegisters(int reg_from, int reg_to) = 0;
virtual void WriteStackPointerToRegister(int reg) = 0; virtual void WriteStackPointerToRegister(int reg) = 0;
private:
}; };
struct ArraySlice { #ifdef V8_NATIVE_REGEXP // Avoid compiling unused code.
class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
public: public:
ArraySlice(Handle<ByteArray> array, size_t offset) // Type of input string to generate code for.
: array_(array), offset_(offset) {} enum Mode { ASCII = 1, UC16 = 2 };
Handle<ByteArray> array() { return array_; }
// Offset in the byte array data.
size_t offset() { return offset_; }
// Offset from the ByteArray pointer.
size_t base_offset() {
return ByteArray::kHeaderSize - kHeapObjectTag + offset_;
}
void* location() {
return reinterpret_cast<void*>(array_->GetDataStartAddress() + offset_);
}
template <typename T>
T& at(int idx) {
return reinterpret_cast<T*>(array_->GetDataStartAddress() + offset_)[idx];
}
private:
Handle<ByteArray> array_;
size_t offset_;
};
// Result of calling generated native RegExp code.
// RETRY: Something significant changed during execution, and the matching
// should be retried from scratch.
// EXCEPTION: Something failed during execution. If no exception has been
// thrown, it's an internal out-of-memory, and the caller should
// throw the exception.
// FAILURE: Matching failed.
// SUCCESS: Matching succeeded, and the output array has been filled with
// capture positions.
enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
class ByteArrayProvider { NativeRegExpMacroAssembler();
public: virtual ~NativeRegExpMacroAssembler();
explicit ByteArrayProvider(unsigned int initial_size);
// Provides a place to put "size" elements of size "element_size". static Result Match(Handle<Code> regexp,
// The information can be stored in the provided ByteArray at the "offset". Handle<String> subject,
// The offset is aligned to the element size. int* offsets_vector,
ArraySlice GetBuffer(unsigned int size, int offsets_vector_length,
unsigned int element_size); int previous_index);
template <typename T>
ArraySlice GetBuffer(Vector<T> values);
private:
size_t byte_array_size_;
Handle<ByteArray> current_byte_array_;
int current_byte_array_free_offset_;
};
// Compares two-byte strings case insensitively.
// Called from generated RegExp code.
static int CaseInsensitiveCompareUC16(Address byte_offset1,
Address byte_offset2,
size_t byte_length);
static const byte* StringCharacterPosition(String* subject, int start_index);
static Result Execute(Code* code,
String* input,
int start_offset,
const byte* input_start,
const byte* input_end,
int* output,
bool at_start);
};
#endif // V8_NATIVE_REGEXP
} } // namespace v8::internal } } // namespace v8::internal
#endif // V8_REGEXP_MACRO_ASSEMBLER_H_ #endif // V8_REGEXP_MACRO_ASSEMBLER_H_
...@@ -48,7 +48,7 @@ class RegExpStack { ...@@ -48,7 +48,7 @@ class RegExpStack {
~RegExpStack(); // Releases the stack if it has grown. ~RegExpStack(); // Releases the stack if it has grown.
// Gives the top of the memory used as stack. // Gives the top of the memory used as stack.
static Address stack_top() { static Address stack_base() {
ASSERT(thread_local_.memory_size_ != 0); ASSERT(thread_local_.memory_size_ != 0);
return thread_local_.memory_ + thread_local_.memory_size_; return thread_local_.memory_ + thread_local_.memory_size_;
} }
...@@ -74,7 +74,7 @@ class RegExpStack { ...@@ -74,7 +74,7 @@ class RegExpStack {
private: private:
// Artificial limit used when no memory has been allocated. // Artificial limit used when no memory has been allocated.
static const uint32_t kMemoryTop = 0xffffffff; static const uintptr_t kMemoryTop = -1;
// Minimal size of allocated stack area. // Minimal size of allocated stack area.
static const size_t kMinimumStackSize = 1 * KB; static const size_t kMinimumStackSize = 1 * KB;
......
...@@ -437,21 +437,43 @@ void Assembler::arithmetic_op(byte opcode, Register reg, const Operand& op) { ...@@ -437,21 +437,43 @@ void Assembler::arithmetic_op(byte opcode, Register reg, const Operand& op) {
} }
void Assembler::arithmetic_op(byte opcode, Register dst, Register src) { void Assembler::arithmetic_op(byte opcode, Register reg, Register rm_reg) {
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
last_pc_ = pc_; last_pc_ = pc_;
emit_rex_64(dst, src); emit_rex_64(reg, rm_reg);
emit(opcode); emit(opcode);
emit_modrm(dst, src); emit_modrm(reg, rm_reg);
} }
void Assembler::arithmetic_op_32(byte opcode, Register dst, Register src) { void Assembler::arithmetic_op_16(byte opcode, Register reg, Register rm_reg) {
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
last_pc_ = pc_; last_pc_ = pc_;
emit_optional_rex_32(dst, src); emit(0x66);
emit_optional_rex_32(reg, rm_reg);
emit(opcode); emit(opcode);
emit_modrm(dst, src); emit_modrm(reg, rm_reg);
}
void Assembler::arithmetic_op_16(byte opcode,
Register reg,
const Operand& rm_reg) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
emit(0x66);
emit_optional_rex_32(reg, rm_reg);
emit(opcode);
emit_operand(reg, rm_reg);
}
void Assembler::arithmetic_op_32(byte opcode, Register reg, Register rm_reg) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
emit_optional_rex_32(reg, rm_reg);
emit(opcode);
emit_modrm(reg, rm_reg);
} }
...@@ -504,6 +526,47 @@ void Assembler::immediate_arithmetic_op(byte subcode, ...@@ -504,6 +526,47 @@ void Assembler::immediate_arithmetic_op(byte subcode,
} }
void Assembler::immediate_arithmetic_op_16(byte subcode,
Register dst,
Immediate src) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
emit(0x66); // Operand size override prefix.
emit_optional_rex_32(dst);
if (is_int8(src.value_)) {
emit(0x83);
emit_modrm(subcode, dst);
emit(src.value_);
} else if (dst.is(rax)) {
emit(0x05 | (subcode << 3));
emitl(src.value_);
} else {
emit(0x81);
emit_modrm(subcode, dst);
emitl(src.value_);
}
}
void Assembler::immediate_arithmetic_op_16(byte subcode,
const Operand& dst,
Immediate src) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
emit(0x66); // Operand size override prefix.
emit_optional_rex_32(dst);
if (is_int8(src.value_)) {
emit(0x83);
emit_operand(subcode, dst);
emit(src.value_);
} else {
emit(0x81);
emit_operand(subcode, dst);
emitl(src.value_);
}
}
void Assembler::immediate_arithmetic_op_32(byte subcode, void Assembler::immediate_arithmetic_op_32(byte subcode,
Register dst, Register dst,
Immediate src) { Immediate src) {
...@@ -744,6 +807,14 @@ void Assembler::cmovl(Condition cc, Register dst, const Operand& src) { ...@@ -744,6 +807,14 @@ void Assembler::cmovl(Condition cc, Register dst, const Operand& src) {
} }
void Assembler::cmpb_al(Immediate imm8) {
ASSERT(is_int8(imm8.value_) || is_uint8(imm8.value_));
EnsureSpace ensure_space(this);
last_pc_ = pc_;
emit(0x3c);
emit(imm8.value_);
}
void Assembler::cpuid() { void Assembler::cpuid() {
ASSERT(CpuFeatures::IsEnabled(CpuFeatures::CPUID)); ASSERT(CpuFeatures::IsEnabled(CpuFeatures::CPUID));
...@@ -1193,6 +1264,32 @@ void Assembler::movq(const Operand& dst, Immediate value) { ...@@ -1193,6 +1264,32 @@ void Assembler::movq(const Operand& dst, Immediate value) {
} }
/*
* Loads the ip-relative location of the src label into the target
* location (as a 32-bit offset sign extended to 64-bit).
*/
void Assembler::movl(const Operand& dst, Label* src) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
emit_optional_rex_32(dst);
emit(0xC7);
emit_operand(0, dst);
if (src->is_bound()) {
int offset = src->pos() - pc_offset() - sizeof(int32_t);
ASSERT(offset <= 0);
emitl(offset);
} else if (src->is_linked()) {
emitl(src->pos());
src->link_to(pc_offset() - sizeof(int32_t));
} else {
ASSERT(src->is_unused());
int32_t current = pc_offset();
emitl(current);
src->link_to(current);
}
}
void Assembler::movq(Register dst, Handle<Object> value, RelocInfo::Mode mode) { void Assembler::movq(Register dst, Handle<Object> value, RelocInfo::Mode mode) {
// If there is no relocation info, emit the value of the handle efficiently // If there is no relocation info, emit the value of the handle efficiently
// (possibly using less that 8 bytes for the value). // (possibly using less that 8 bytes for the value).
......
...@@ -496,13 +496,17 @@ class Assembler : public Malloced { ...@@ -496,13 +496,17 @@ class Assembler : public Malloced {
// Load a 32-bit immediate value, zero-extended to 64 bits. // Load a 32-bit immediate value, zero-extended to 64 bits.
void movl(Register dst, Immediate imm32); void movl(Register dst, Immediate imm32);
// Move 64 bit register value to 64-bit memory location.
void movq(const Operand& dst, Register src);
// Move 64 bit memory location to 64-bit register value.
void movq(Register dst, const Operand& src); void movq(Register dst, const Operand& src);
void movq(Register dst, Register src);
// Sign extends immediate 32-bit value to 64 bits. // Sign extends immediate 32-bit value to 64 bits.
void movq(Register dst, Immediate x); void movq(Register dst, Immediate x);
void movq(Register dst, Register src); // Move the offset of the label location relative to the current
// position (after the move) to the destination.
void movl(const Operand& dst, Label* src);
// Move 64 bit register value to 64-bit memory location.
void movq(const Operand& dst, Register src);
// Move sign extended immediate to memory location. // Move sign extended immediate to memory location.
void movq(const Operand& dst, Immediate value); void movq(const Operand& dst, Immediate value);
// New x64 instructions to load a 64-bit immediate into a register. // New x64 instructions to load a 64-bit immediate into a register.
...@@ -535,7 +539,11 @@ class Assembler : public Malloced { ...@@ -535,7 +539,11 @@ class Assembler : public Malloced {
// Arithmetics // Arithmetics
void addl(Register dst, Register src) { void addl(Register dst, Register src) {
arithmetic_op_32(0x03, dst, src); if (dst.low_bits() == 4) { // Forces SIB byte.
arithmetic_op_32(0x01, src, dst);
} else {
arithmetic_op_32(0x03, dst, src);
}
} }
void addl(Register dst, Immediate src) { void addl(Register dst, Immediate src) {
...@@ -574,10 +582,44 @@ class Assembler : public Malloced { ...@@ -574,10 +582,44 @@ class Assembler : public Malloced {
immediate_arithmetic_op_8(0x7, dst, src); immediate_arithmetic_op_8(0x7, dst, src);
} }
void cmpb_al(Immediate src);
void cmpb(Register dst, Register src) {
arithmetic_op(0x3A, dst, src);
}
void cmpb(Register dst, const Operand& src) {
arithmetic_op(0x3A, dst, src);
}
void cmpb(const Operand& dst, Register src) {
arithmetic_op(0x38, src, dst);
}
void cmpb(const Operand& dst, Immediate src) { void cmpb(const Operand& dst, Immediate src) {
immediate_arithmetic_op_8(0x7, dst, src); immediate_arithmetic_op_8(0x7, dst, src);
} }
void cmpw(const Operand& dst, Immediate src) {
immediate_arithmetic_op_16(0x7, dst, src);
}
void cmpw(Register dst, Immediate src) {
immediate_arithmetic_op_16(0x7, dst, src);
}
void cmpw(Register dst, const Operand& src) {
arithmetic_op_16(0x3B, dst, src);
}
void cmpw(Register dst, Register src) {
arithmetic_op_16(0x3B, dst, src);
}
void cmpw(const Operand& dst, Register src) {
arithmetic_op_16(0x39, src, dst);
}
void cmpl(Register dst, Register src) { void cmpl(Register dst, Register src) {
arithmetic_op_32(0x3B, dst, src); arithmetic_op_32(0x3B, dst, src);
} }
...@@ -794,6 +836,10 @@ class Assembler : public Malloced { ...@@ -794,6 +836,10 @@ class Assembler : public Malloced {
immediate_arithmetic_op_32(0x5, dst, src); immediate_arithmetic_op_32(0x5, dst, src);
} }
void subb(Register dst, Immediate src) {
immediate_arithmetic_op_8(0x5, dst, src);
}
void testb(Register reg, Immediate mask); void testb(Register reg, Immediate mask);
void testb(const Operand& op, Immediate mask); void testb(const Operand& op, Immediate mask);
void testl(Register dst, Register src); void testl(Register dst, Register src);
...@@ -1141,26 +1187,36 @@ class Assembler : public Malloced { ...@@ -1141,26 +1187,36 @@ class Assembler : public Malloced {
// AND, OR, XOR, or CMP. The encodings of these operations are all // AND, OR, XOR, or CMP. The encodings of these operations are all
// similar, differing just in the opcode or in the reg field of the // similar, differing just in the opcode or in the reg field of the
// ModR/M byte. // ModR/M byte.
void arithmetic_op(byte opcode, Register dst, Register src); void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
void arithmetic_op_32(byte opcode, Register dst, Register src); void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg);
void arithmetic_op_32(byte opcode, Register reg, Register rm_reg);
void arithmetic_op_32(byte opcode, Register reg, const Operand& rm_reg); void arithmetic_op_32(byte opcode, Register reg, const Operand& rm_reg);
void arithmetic_op(byte opcode, Register reg, Register rm_reg);
void arithmetic_op(byte opcode, Register reg, const Operand& rm_reg); void arithmetic_op(byte opcode, Register reg, const Operand& rm_reg);
void immediate_arithmetic_op(byte subcode, Register dst, Immediate src); void immediate_arithmetic_op(byte subcode, Register dst, Immediate src);
void immediate_arithmetic_op(byte subcode, const Operand& dst, Immediate src); void immediate_arithmetic_op(byte subcode, const Operand& dst, Immediate src);
// Operate on a 32-bit word in memory or register.
void immediate_arithmetic_op_32(byte subcode,
const Operand& dst,
Immediate src);
void immediate_arithmetic_op_32(byte subcode,
Register dst,
Immediate src);
// Operate on a byte in memory or register. // Operate on a byte in memory or register.
void immediate_arithmetic_op_8(byte subcode, void immediate_arithmetic_op_8(byte subcode,
const Operand& dst, Register dst,
Immediate src); Immediate src);
void immediate_arithmetic_op_8(byte subcode, void immediate_arithmetic_op_8(byte subcode,
Register dst, const Operand& dst,
Immediate src); Immediate src);
// Operate on a word in memory or register.
void immediate_arithmetic_op_16(byte subcode,
Register dst,
Immediate src);
void immediate_arithmetic_op_16(byte subcode,
const Operand& dst,
Immediate src);
// Operate on a 32-bit word in memory or register.
void immediate_arithmetic_op_32(byte subcode,
Register dst,
Immediate src);
void immediate_arithmetic_op_32(byte subcode,
const Operand& dst,
Immediate src);
// Emit machine code for a shift operation. // Emit machine code for a shift operation.
void shift(Register dst, Immediate shift_amount, int subcode); void shift(Register dst, Immediate shift_amount, int subcode);
void shift_32(Register dst, Immediate shift_amount, int subcode); void shift_32(Register dst, Immediate shift_amount, int subcode);
...@@ -1180,6 +1236,7 @@ class Assembler : public Malloced { ...@@ -1180,6 +1236,7 @@ class Assembler : public Malloced {
friend class CodePatcher; friend class CodePatcher;
friend class EnsureSpace; friend class EnsureSpace;
friend class RegExpMacroAssemblerX64;
// Code buffer: // Code buffer:
// The buffer into which code and relocation info are generated. // The buffer into which code and relocation info are generated.
......
...@@ -25,3 +25,1277 @@ ...@@ -25,3 +25,1277 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "v8.h"
#include "serialize.h"
#include "unicode.h"
#include "log.h"
#include "ast.h"
#include "regexp-stack.h"
#include "macro-assembler.h"
#include "regexp-macro-assembler.h"
#include "x64/macro-assembler-x64.h"
#include "x64/regexp-macro-assembler-x64.h"
namespace v8 {
namespace internal {
/*
* This assembler uses the following register assignment convention
* - rdx : currently loaded character(s) as ASCII or UC16. Must be loaded using
* LoadCurrentCharacter before using any of the dispatch methods.
* - rdi : current position in input, as negative offset from end of string.
* Please notice that this is the byte offset, not the character
* offset! Is always a 32-bit signed (negative) offset, but must be
* maintained sign-extended to 64 bits, since it is used as index.
* - rsi : end of input (points to byte after last character in input),
* so that rsi+rdi points to the current character.
* - rbp : frame pointer. Used to access arguments, local variables and
* RegExp registers.
* - rsp : points to tip of C stack.
* - rcx : points to tip of backtrack stack. The backtrack stack contains
* only 32-bit values. Most are offsets from some base (e.g., character
* positions from end of string or code location from Code* pointer).
* - r8 : code object pointer. Used to convert between absolute and
* code-object-relative addresses.
*
* The registers rax, rbx, rcx, r9 and r11 are free to use for computations.
* If changed to use r12+, they should be saved as callee-save registers.
*
* Each call to a C++ method should retain these registers.
*
* The stack will have the following content, in some order, indexable from the
* frame pointer (see, e.g., kStackHighEnd):
* - stack_area_base (High end of the memory area to use as
* backtracking stack)
* - at_start (if 1, start at start of string, if 0, don't)
* - int* capture_array (int[num_saved_registers_], for output).
* - end of input (Address of end of string)
* - start of input (Address of first character in string)
* - String** input_string (location of a handle containing the string)
* - return address
* - backup of callee save registers (rbx, possibly rsi and rdi).
* - Offset of location before start of input (effectively character
* position -1). Used to initialize capture registers to a non-position.
* - register 0 rbp[-n] (Only positions must be stored in the first
* - register 1 rbp[-n-8] num_saved_registers_ registers)
* - ...
*
* The first num_saved_registers_ registers are initialized to point to
* "character -1" in the string (i.e., char_size() bytes before the first
* character of the string). The remaining registers starts out uninitialized.
*
* The first seven values must be provided by the calling code by
* calling the code's entry address cast to a function pointer with the
* following signature:
* int (*match)(String* input_string,
* Address start,
* Address end,
* int* capture_output_array,
* bool at_start,
* byte* stack_area_base)
*/
#define __ ACCESS_MASM(masm_)
RegExpMacroAssemblerX64::RegExpMacroAssemblerX64(
Mode mode,
int registers_to_save)
: masm_(new MacroAssembler(NULL, kRegExpCodeSize)),
code_relative_fixup_positions_(4),
mode_(mode),
num_registers_(registers_to_save),
num_saved_registers_(registers_to_save),
entry_label_(),
start_label_(),
success_label_(),
backtrack_label_(),
exit_label_() {
__ jmp(&entry_label_); // We'll write the entry code when we know more.
__ bind(&start_label_); // And then continue from here.
}
RegExpMacroAssemblerX64::~RegExpMacroAssemblerX64() {
delete masm_;
// Unuse labels in case we throw away the assembler without calling GetCode.
entry_label_.Unuse();
start_label_.Unuse();
success_label_.Unuse();
backtrack_label_.Unuse();
exit_label_.Unuse();
check_preempt_label_.Unuse();
stack_overflow_label_.Unuse();
}
int RegExpMacroAssemblerX64::stack_limit_slack() {
return RegExpStack::kStackLimitSlack;
}
void RegExpMacroAssemblerX64::AdvanceCurrentPosition(int by) {
if (by != 0) {
Label inside_string;
__ addq(rdi, Immediate(by * char_size()));
}
}
void RegExpMacroAssemblerX64::AdvanceRegister(int reg, int by) {
ASSERT(reg >= 0);
ASSERT(reg < num_registers_);
if (by != 0) {
__ addq(register_location(reg), Immediate(by));
}
}
void RegExpMacroAssemblerX64::Backtrack() {
CheckPreemption();
// Pop Code* offset from backtrack stack, add Code* and jump to location.
Pop(rbx);
__ addq(rbx, code_object_pointer());
__ jmp(rbx);
}
void RegExpMacroAssemblerX64::Bind(Label* label) {
__ bind(label);
}
void RegExpMacroAssemblerX64::CheckCharacter(uint32_t c, Label* on_equal) {
__ cmpl(current_character(), Immediate(c));
BranchOrBacktrack(equal, on_equal);
}
void RegExpMacroAssemblerX64::CheckCharacterGT(uc16 limit, Label* on_greater) {
__ cmpl(current_character(), Immediate(limit));
BranchOrBacktrack(greater, on_greater);
}
void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) {
Label not_at_start;
// Did we start the match at the start of the string at all?
__ cmpb(Operand(rbp, kAtStart), Immediate(0));
BranchOrBacktrack(equal, &not_at_start);
// If we did, are we still at the start of the input?
__ lea(rax, Operand(rsi, rdi, times_1, 0));
__ cmpq(rax, Operand(rbp, kInputStart));
BranchOrBacktrack(equal, on_at_start);
__ bind(&not_at_start);
}
void RegExpMacroAssemblerX64::CheckNotAtStart(Label* on_not_at_start) {
// Did we start the match at the start of the string at all?
__ cmpb(Operand(rbp, kAtStart), Immediate(0));
BranchOrBacktrack(equal, on_not_at_start);
// If we did, are we still at the start of the input?
__ lea(rax, Operand(rsi, rdi, times_1, 0));
__ cmpq(rax, Operand(rbp, kInputStart));
BranchOrBacktrack(not_equal, on_not_at_start);
}
void RegExpMacroAssemblerX64::CheckCharacterLT(uc16 limit, Label* on_less) {
__ cmpl(current_character(), Immediate(limit));
BranchOrBacktrack(less, on_less);
}
void RegExpMacroAssemblerX64::CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string) {
int byte_length = str.length() * char_size();
int byte_offset = cp_offset * char_size();
if (check_end_of_string) {
// Check that there are at least str.length() characters left in the input.
__ cmpl(rdi, Immediate(-(byte_offset + byte_length)));
BranchOrBacktrack(greater, on_failure);
}
if (on_failure == NULL) {
// Instead of inlining a backtrack, (re)use the global backtrack target.
on_failure = &backtrack_label_;
}
// TODO(lrn): Test multiple characters at a time by loading 4 or 8 bytes
// at a time.
for (int i = 0; i < str.length(); i++) {
if (mode_ == ASCII) {
__ cmpb(Operand(rsi, rdi, times_1, byte_offset + i),
Immediate(static_cast<int8_t>(str[i])));
} else {
ASSERT(mode_ == UC16);
__ cmpw(Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)),
Immediate(str[i]));
}
BranchOrBacktrack(not_equal, on_failure);
}
}
void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) {
Label fallthrough;
__ cmpl(rdi, Operand(backtrack_stackpointer(), 0));
__ j(not_equal, &fallthrough);
Drop();
BranchOrBacktrack(no_condition, on_equal);
__ bind(&fallthrough);
}
void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
int start_reg,
Label* on_no_match) {
Label fallthrough;
__ movq(rdx, register_location(start_reg)); // Offset of start of capture
__ movq(rbx, register_location(start_reg + 1)); // Offset of end of capture
__ subq(rbx, rdx); // Length of capture.
// -----------------------
// rdx = Start offset of capture.
// rbx = Length of capture
// If length is negative, this code will fail (it's a symptom of a partial or
// illegal capture where start of capture after end of capture).
// This must not happen (no back-reference can reference a capture that wasn't
// closed before in the reg-exp, and we must not generate code that can cause
// this condition).
// If length is zero, either the capture is empty or it is nonparticipating.
// In either case succeed immediately.
__ j(equal, &fallthrough);
if (mode_ == ASCII) {
Label loop_increment;
if (on_no_match == NULL) {
on_no_match = &backtrack_label_;
}
__ lea(r9, Operand(rsi, rdx, times_1, 0));
__ lea(r11, Operand(rsi, rdi, times_1, 0));
__ addq(rbx, r9); // End of capture
// ---------------------
// r11 - current input character address
// r9 - current capture character address
// rbx - end of capture
Label loop;
__ bind(&loop);
__ movzxbl(rdx, Operand(r9, 0));
__ movzxbl(rax, Operand(r11, 0));
// al - input character
// dl - capture character
__ cmpb(rax, rdx);
__ j(equal, &loop_increment);
// Mismatch, try case-insensitive match (converting letters to lower-case).
// I.e., if or-ing with 0x20 makes values equal and in range 'a'-'z', it's
// a match.
__ or_(rax, Immediate(0x20)); // Convert match character to lower-case.
__ or_(rdx, Immediate(0x20)); // Convert capture character to lower-case.
__ cmpb(rax, rdx);
__ j(not_equal, on_no_match); // Definitely not equal.
__ subb(rax, Immediate('a'));
__ cmpb(rax, Immediate('z' - 'a'));
__ j(above, on_no_match); // Weren't letters anyway.
__ bind(&loop_increment);
// Increment pointers into match and capture strings.
__ addq(r11, Immediate(1));
__ addq(r9, Immediate(1));
// Compare to end of capture, and loop if not done.
__ cmpq(r9, rbx);
__ j(below, &loop);
// Compute new value of character position after the matched part.
__ movq(rdi, r11);
__ subq(rdi, rsi);
} else {
ASSERT(mode_ == UC16);
// Save important/volatile registers before calling C function.
#ifndef __MSVC__
// Callee save on Win64
__ push(rsi);
__ push(rdi);
#endif
__ push(backtrack_stackpointer());
int num_arguments = 3;
FrameAlign(num_arguments);
// Put arguments into parameter registers. Parameters are
// Address byte_offset1 - Address captured substring's start.
// Address byte_offset2 - Address of current character position.
// size_t byte_length - length of capture in bytes(!)
#ifdef __MSVC__
// Compute and set byte_offset1 (start of capture).
__ lea(rcx, Operand(rsi, rdx, times_1, 0));
// Set byte_offset2.
__ lea(rdx, Operand(rsi, rdi, times_1, 0));
// Set byte_length.
__ movq(r8, rbx);
#else // AMD64 calling convention
// Compute byte_offset2 (current position = rsi+rdi).
__ lea(rax, Operand(rsi, rdi, times_1, 0));
// Compute and set byte_offset1 (start of capture).
__ lea(rdi, Operand(rsi, rdx, times_1, 0));
// Set byte_offset2.
__ movq(rsi, rax);
// Set byte_length.
__ movq(rdx, rbx);
#endif
Address function_address = FUNCTION_ADDR(&CaseInsensitiveCompareUC16);
CallCFunction(function_address, num_arguments);
// Restore original values before reacting on result value.
__ Move(code_object_pointer(), masm_->CodeObject());
__ pop(backtrack_stackpointer());
#ifndef __MSVC__
__ pop(rdi);
__ pop(rsi);
#endif
// Check if function returned non-zero for success or zero for failure.
__ testq(rax, rax);
BranchOrBacktrack(zero, on_no_match);
// On success, increment position by length of capture.
// Requires that rbx is callee save (true for both Win64 and AMD64 ABIs).
__ addq(rdi, rbx);
}
__ bind(&fallthrough);
}
void RegExpMacroAssemblerX64::CheckNotBackReference(
int start_reg,
Label* on_no_match) {
Label fallthrough;
// Find length of back-referenced capture.
__ movq(rdx, register_location(start_reg));
__ movq(rax, register_location(start_reg + 1));
__ subq(rax, rdx); // Length to check.
// Fail on partial or illegal capture (start of capture after end of capture).
// This must not happen (no back-reference can reference a capture that wasn't
// closed before in the reg-exp).
__ Check(greater_equal, "Invalid capture referenced");
// Succeed on empty capture (including non-participating capture)
__ j(equal, &fallthrough);
// -----------------------
// rdx - Start of capture
// rax - length of capture
// Check that there are sufficient characters left in the input.
__ movl(rbx, rdi);
__ addl(rbx, rax);
BranchOrBacktrack(greater, on_no_match);
// Compute pointers to match string and capture string
__ lea(rbx, Operand(rsi, rdi, times_1, 0)); // Start of match.
__ addq(rdx, rsi); // Start of capture.
__ lea(r9, Operand(rdx, rax, times_1, 0)); // End of capture
// -----------------------
// rbx - current capture character address.
// rbx - current input character address .
// r9 - end of input to match (capture length after rbx).
Label loop;
__ bind(&loop);
if (mode_ == ASCII) {
__ movzxbl(rax, Operand(rdx, 0));
__ cmpb(rax, Operand(rbx, 0));
} else {
ASSERT(mode_ == UC16);
__ movzxwl(rax, Operand(rdx, 0));
__ cmpw(rax, Operand(rbx, 0));
}
BranchOrBacktrack(not_equal, on_no_match);
// Increment pointers into capture and match string.
__ addq(rbx, Immediate(char_size()));
__ addq(rdx, Immediate(char_size()));
// Check if we have reached end of match area.
__ cmpq(rdx, r9);
__ j(below, &loop);
// Success.
// Set current character position to position after match.
__ movq(rdi, rbx);
__ subq(rdi, rsi);
__ bind(&fallthrough);
}
void RegExpMacroAssemblerX64::CheckNotRegistersEqual(int reg1,
int reg2,
Label* on_not_equal) {
__ movq(rax, register_location(reg1));
__ cmpq(rax, register_location(reg2));
BranchOrBacktrack(not_equal, on_not_equal);
}
void RegExpMacroAssemblerX64::CheckNotCharacter(uint32_t c,
Label* on_not_equal) {
__ cmpl(current_character(), Immediate(c));
BranchOrBacktrack(not_equal, on_not_equal);
}
void RegExpMacroAssemblerX64::CheckCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_equal) {
__ movl(rax, current_character());
__ and_(rax, Immediate(mask));
__ cmpl(rax, Immediate(c));
BranchOrBacktrack(equal, on_equal);
}
void RegExpMacroAssemblerX64::CheckNotCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_not_equal) {
__ movl(rax, current_character());
__ and_(rax, Immediate(mask));
__ cmpl(rax, Immediate(c));
BranchOrBacktrack(not_equal, on_not_equal);
}
void RegExpMacroAssemblerX64::CheckNotCharacterAfterMinusAnd(
uc16 c,
uc16 minus,
uc16 mask,
Label* on_not_equal) {
ASSERT(minus < String::kMaxUC16CharCode);
__ lea(rax, Operand(current_character(), -minus));
__ and_(rax, Immediate(mask));
__ cmpl(rax, Immediate(c));
BranchOrBacktrack(not_equal, on_not_equal);
}
bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,
int cp_offset,
bool check_offset,
Label* on_no_match) {
// Range checks (c in min..max) are generally implemented by an unsigned
// (c - min) <= (max - min) check
switch (type) {
case 's':
// Match space-characters
if (mode_ == ASCII) {
// ASCII space characters are '\t'..'\r' and ' '.
if (check_offset) {
LoadCurrentCharacter(cp_offset, on_no_match);
} else {
LoadCurrentCharacterUnchecked(cp_offset, 1);
}
Label success;
__ cmpl(current_character(), Immediate(' '));
__ j(equal, &success);
// Check range 0x09..0x0d
__ subl(current_character(), Immediate('\t'));
__ cmpl(current_character(), Immediate('\r' - '\t'));
BranchOrBacktrack(above, on_no_match);
__ bind(&success);
return true;
}
return false;
case 'S':
// Match non-space characters.
if (check_offset) {
LoadCurrentCharacter(cp_offset, on_no_match, 1);
} else {
LoadCurrentCharacterUnchecked(cp_offset, 1);
}
if (mode_ == ASCII) {
// ASCII space characters are '\t'..'\r' and ' '.
__ cmpl(current_character(), Immediate(' '));
BranchOrBacktrack(equal, on_no_match);
__ subl(current_character(), Immediate('\t'));
__ cmpl(current_character(), Immediate('\r' - '\t'));
BranchOrBacktrack(below_equal, on_no_match);
return true;
}
return false;
case 'd':
// Match ASCII digits ('0'..'9')
if (check_offset) {
LoadCurrentCharacter(cp_offset, on_no_match, 1);
} else {
LoadCurrentCharacterUnchecked(cp_offset, 1);
}
__ subl(current_character(), Immediate('0'));
__ cmpl(current_character(), Immediate('9' - '0'));
BranchOrBacktrack(above, on_no_match);
return true;
case 'D':
// Match non ASCII-digits
if (check_offset) {
LoadCurrentCharacter(cp_offset, on_no_match, 1);
} else {
LoadCurrentCharacterUnchecked(cp_offset, 1);
}
__ subl(current_character(), Immediate('0'));
__ cmpl(current_character(), Immediate('9' - '0'));
BranchOrBacktrack(below_equal, on_no_match);
return true;
case '.': {
// Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
if (check_offset) {
LoadCurrentCharacter(cp_offset, on_no_match, 1);
} else {
LoadCurrentCharacterUnchecked(cp_offset, 1);
}
__ xor_(current_character(), Immediate(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
__ subl(current_character(), Immediate(0x0b));
__ cmpl(current_character(), Immediate(0x0c - 0x0b));
BranchOrBacktrack(below_equal, on_no_match);
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
// computed (current_char ^ 0x01 - 0x0b). I.e., check for
// 0x201d (0x2028 - 0x0b) or 0x201e.
__ subl(current_character(), Immediate(0x2028 - 0x0b));
__ cmpl(current_character(), Immediate(1));
BranchOrBacktrack(below_equal, on_no_match);
}
return true;
}
case '*':
// Match any character.
if (check_offset) {
CheckPosition(cp_offset, on_no_match);
}
return true;
// No custom implementation (yet): w, W, s(UC16), S(UC16).
default:
return false;
}
}
void RegExpMacroAssemblerX64::Fail() {
ASSERT(FAILURE == 0); // Return value for failure is zero.
__ xor_(rax, rax); // zero rax.
__ jmp(&exit_label_);
}
Handle<Object> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
// Finalize code - write the entry point code now we know how many
// registers we need.
// Entry code:
__ bind(&entry_label_);
// Start new stack frame.
__ push(rbp);
__ movq(rbp, rsp);
// Save parameters and callee-save registers. Order here should correspond
// to order of kBackup_ebx etc.
#ifdef __MSVC__
// MSVC passes arguments in rcx, rdx, r8, r9, with backing stack slots.
// Store register parameters in pre-allocated stack slots,
__ movq(Operand(rbp, kInputString), rcx);
__ movq(Operand(rbp, kStartIndex), rdx);
__ movq(Operand(rbp, kInputStart), r8);
__ movq(Operand(rbp, kInputEnd), r9);
// Callee-save on Win64.
__ push(rsi);
__ push(rdi);
__ push(rbx);
#else
// GCC passes arguments in rdi, rsi, rdx, rcx, r8, r9 (and then on stack).
// Push register parameters on stack for reference.
ASSERT_EQ(kInputString, -1 * kPointerSize);
ASSERT_EQ(kStartIndex, -2 * kPointerSize);
ASSERT_EQ(kInputStart, -3 * kPointerSize);
ASSERT_EQ(kInputEnd, -4 * kPointerSize);
ASSERT_EQ(kRegisterOutput, -5 * kPointerSize);
ASSERT_EQ(kAtStart, -6 * kPointerSize);
__ push(rdi);
__ push(rsi);
__ push(rdx);
__ push(rcx);
__ push(r8);
__ push(r9);
__ push(rbx); // Callee-save
#endif
__ push(Immediate(0)); // Make room for "input start - 1" constant.
// Check if we have space on the stack for registers.
Label stack_limit_hit;
Label stack_ok;
ExternalReference stack_guard_limit =
ExternalReference::address_of_stack_guard_limit();
__ movq(rcx, rsp);
__ movq(kScratchRegister, stack_guard_limit);
__ subq(rcx, Operand(kScratchRegister, 0));
// Handle it if the stack pointer is already below the stack limit.
__ j(below_equal, &stack_limit_hit);
// Check if there is room for the variable number of registers above
// the stack limit.
__ cmpq(rcx, Immediate(num_registers_ * kPointerSize));
__ j(above_equal, &stack_ok);
// Exit with OutOfMemory exception. There is not enough space on the stack
// for our working registers.
__ movq(rax, Immediate(EXCEPTION));
__ jmp(&exit_label_);
__ bind(&stack_limit_hit);
__ Move(code_object_pointer(), masm_->CodeObject());
CallCheckStackGuardState(); // Preserves no registers beside rbp and rsp.
__ testq(rax, rax);
// If returned value is non-zero, we exit with the returned value as result.
__ j(not_zero, &exit_label_);
__ bind(&stack_ok);
// Allocate space on stack for registers.
__ subq(rsp, Immediate(num_registers_ * kPointerSize));
// Load string length.
__ movq(rsi, Operand(rbp, kInputEnd));
// Load input position.
__ movq(rdi, Operand(rbp, kInputStart));
// Set up rdi to be negative offset from string end.
__ subq(rdi, rsi);
// Set rax to address of char before start of input
// (effectively string position -1).
__ lea(rax, Operand(rdi, -char_size()));
// Store this value in a local variable, for use when clearing
// position registers.
__ movq(Operand(rbp, kInputStartMinusOne), rax);
if (num_saved_registers_ > 0) {
// Fill saved registers with initial value = start offset - 1
// Fill in stack push order, to avoid accessing across an unwritten
// page (a problem on Windows).
__ movq(rcx, Immediate(kRegisterZero));
Label init_loop;
__ bind(&init_loop);
__ movq(Operand(rbp, rcx, times_1, 0), rax);
__ subq(rcx, Immediate(kPointerSize));
__ cmpq(rcx,
Immediate(kRegisterZero - num_saved_registers_ * kPointerSize));
__ j(greater, &init_loop);
}
// Ensure that we have written to each stack page, in order. Skipping a page
// on Windows can cause segmentation faults. Assuming page size is 4k.
const int kPageSize = 4096;
const int kRegistersPerPage = kPageSize / kPointerSize;
for (int i = num_saved_registers_ + kRegistersPerPage - 1;
i < num_registers_;
i += kRegistersPerPage) {
__ movq(register_location(i), rax); // One write every page.
}
// Initialize backtrack stack pointer.
__ movq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd));
// Initialize code object pointer.
__ Move(code_object_pointer(), masm_->CodeObject());
// Load previous char as initial value of current-character.
Label at_start;
__ cmpq(Operand(rbp, kAtStart), Immediate(0));
__ j(not_equal, &at_start);
LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
__ jmp(&start_label_);
__ bind(&at_start);
__ movq(current_character(), Immediate('\n'));
__ jmp(&start_label_);
// Exit code:
if (success_label_.is_linked()) {
// Save captures when successful.
__ bind(&success_label_);
if (num_saved_registers_ > 0) {
// copy captures to output
__ movq(rbx, Operand(rbp, kRegisterOutput));
__ movq(rcx, Operand(rbp, kInputEnd));
__ subq(rcx, Operand(rbp, kInputStart));
for (int i = 0; i < num_saved_registers_; i++) {
__ movq(rax, register_location(i));
__ addq(rax, rcx); // Convert to index from start, not end.
if (mode_ == UC16) {
__ sar(rax, Immediate(1)); // Convert byte index to character index.
}
__ movl(Operand(rbx, i * kIntSize), rax);
}
}
__ movq(rax, Immediate(SUCCESS));
}
// Exit and return rax
__ bind(&exit_label_);
#ifdef __MSVC__
// Restore callee save registers.
__ lea(rsp, Operand(rbp, kLastCalleeSaveRegister));
__ pop(rbx);
__ pop(rdi);
__ pop(rsi);
// Stack now at rbp.
#else
// Restore callee save register.
__ movq(rbx, Operand(rbp, kBackup_rbx));
// Skip rsp to rbp.
__ movq(rsp, rbp);
#endif
// Exit function frame, restore previous one.
__ pop(rbp);
__ ret(0);
// Backtrack code (branch target for conditional backtracks).
if (backtrack_label_.is_linked()) {
__ bind(&backtrack_label_);
Backtrack();
}
Label exit_with_exception;
// Preempt-code
if (check_preempt_label_.is_linked()) {
SafeCallTarget(&check_preempt_label_);
__ push(backtrack_stackpointer());
__ push(rdi);
CallCheckStackGuardState();
__ testq(rax, rax);
// If returning non-zero, we should end execution with the given
// result as return value.
__ j(not_zero, &exit_label_);
// Restore registers.
__ Move(code_object_pointer(), masm_->CodeObject());
__ pop(rdi);
__ pop(backtrack_stackpointer());
// String might have moved: Reload esi from frame.
__ movq(rsi, Operand(rbp, kInputEnd));
SafeReturn();
}
// Backtrack stack overflow code.
if (stack_overflow_label_.is_linked()) {
SafeCallTarget(&stack_overflow_label_);
// Reached if the backtrack-stack limit has been hit.
Label grow_failed;
// Save registers before calling C function
#ifndef __MSVC__
// Callee-save in Microsoft 64-bit ABI, but not in AMD64 ABI.
__ push(rsi);
__ push(rdi);
#endif
// Call GrowStack(backtrack_stackpointer())
int num_arguments = 2;
FrameAlign(num_arguments);
#ifdef __MSVC__
// Microsoft passes parameters in rcx, rdx.
// First argument, backtrack stackpointer, is already in rcx.
__ lea(rdx, Operand(rbp, kStackHighEnd)); // Second argument
#else
// AMD64 ABI passes paremeters in rdi, rsi.
__ movq(rdi, backtrack_stackpointer()); // First argument.
__ lea(rsi, Operand(rbp, kStackHighEnd)); // Second argument.
#endif
CallCFunction(FUNCTION_ADDR(&GrowStack), num_arguments);
// If return NULL, we have failed to grow the stack, and
// must exit with a stack-overflow exception.
__ testq(rax, rax);
__ j(equal, &exit_with_exception);
// Otherwise use return value as new stack pointer.
__ movq(backtrack_stackpointer(), rax);
// Restore saved registers and continue.
__ Move(code_object_pointer(), masm_->CodeObject());
#ifndef __MSVC__
__ pop(rdi);
__ pop(rsi);
#endif
SafeReturn();
}
if (exit_with_exception.is_linked()) {
// If any of the code above needed to exit with an exception.
__ bind(&exit_with_exception);
// Exit with Result EXCEPTION(-1) to signal thrown exception.
__ movq(rax, Immediate(EXCEPTION));
__ jmp(&exit_label_);
}
FixupCodeRelativePositions();
CodeDesc code_desc;
masm_->GetCode(&code_desc);
Handle<Code> code = Factory::NewCode(code_desc,
NULL,
Code::ComputeFlags(Code::REGEXP),
masm_->CodeObject());
LOG(RegExpCodeCreateEvent(*code, *source));
return Handle<Object>::cast(code);
}
void RegExpMacroAssemblerX64::GoTo(Label* to) {
BranchOrBacktrack(no_condition, to);
}
void RegExpMacroAssemblerX64::IfRegisterGE(int reg,
int comparand,
Label* if_ge) {
__ cmpq(register_location(reg), Immediate(comparand));
BranchOrBacktrack(greater_equal, if_ge);
}
void RegExpMacroAssemblerX64::IfRegisterLT(int reg,
int comparand,
Label* if_lt) {
__ cmpq(register_location(reg), Immediate(comparand));
BranchOrBacktrack(less, if_lt);
}
void RegExpMacroAssemblerX64::IfRegisterEqPos(int reg,
Label* if_eq) {
__ cmpq(rdi, register_location(reg));
BranchOrBacktrack(equal, if_eq);
}
RegExpMacroAssembler::IrregexpImplementation
RegExpMacroAssemblerX64::Implementation() {
return kX64Implementation;
}
void RegExpMacroAssemblerX64::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
bool check_bounds,
int characters) {
ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
CheckPosition(cp_offset + characters - 1, on_end_of_input);
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
void RegExpMacroAssemblerX64::PopCurrentPosition() {
Pop(rdi);
}
void RegExpMacroAssemblerX64::PopRegister(int register_index) {
Pop(rax);
__ movq(register_location(register_index), rax);
}
void RegExpMacroAssemblerX64::PushBacktrack(Label* label) {
Push(label);
CheckStackLimit();
}
void RegExpMacroAssemblerX64::PushCurrentPosition() {
Push(rdi);
}
void RegExpMacroAssemblerX64::PushRegister(int register_index,
StackCheckFlag check_stack_limit) {
__ movq(rax, register_location(register_index));
Push(rax);
if (check_stack_limit) CheckStackLimit();
}
void RegExpMacroAssemblerX64::ReadCurrentPositionFromRegister(int reg) {
__ movq(rdi, register_location(reg));
}
void RegExpMacroAssemblerX64::ReadStackPointerFromRegister(int reg) {
__ movq(backtrack_stackpointer(), register_location(reg));
__ addq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd));
}
void RegExpMacroAssemblerX64::SetRegister(int register_index, int to) {
ASSERT(register_index >= num_saved_registers_); // Reserved for positions!
__ movq(register_location(register_index), Immediate(to));
}
void RegExpMacroAssemblerX64::Succeed() {
__ jmp(&success_label_);
}
void RegExpMacroAssemblerX64::WriteCurrentPositionToRegister(int reg,
int cp_offset) {
if (cp_offset == 0) {
__ movq(register_location(reg), rdi);
} else {
__ lea(rax, Operand(rdi, cp_offset * char_size()));
__ movq(register_location(reg), rax);
}
}
void RegExpMacroAssemblerX64::ClearRegisters(int reg_from, int reg_to) {
ASSERT(reg_from <= reg_to);
__ movq(rax, Operand(rbp, kInputStartMinusOne));
for (int reg = reg_from; reg <= reg_to; reg++) {
__ movq(register_location(reg), rax);
}
}
void RegExpMacroAssemblerX64::WriteStackPointerToRegister(int reg) {
__ movq(rax, backtrack_stackpointer());
__ subq(rax, Operand(rbp, kStackHighEnd));
__ movq(register_location(reg), rax);
}
// Private methods:
void RegExpMacroAssemblerX64::CallCheckStackGuardState() {
// This function call preserves no register values. Caller should
// store anything volatile in a C call or overwritten by this function.
int num_arguments = 3;
FrameAlign(num_arguments);
#ifdef __MSVC__
// Second argument: Code* of self. (Do this before overwriting r8).
__ movq(rdx, code_object_pointer());
// Third argument: RegExp code frame pointer.
__ movq(r8, rbp);
// First argument: Next address on the stack (will be address of
// return address).
__ lea(rcx, Operand(rsp, -kPointerSize));
#else
// Third argument: RegExp code frame pointer.
__ movq(rdx, rbp);
// Second argument: Code* of self.
__ movq(rsi, code_object_pointer());
// First argument: Next address on the stack (will be address of
// return address).
__ lea(rdi, Operand(rsp, -kPointerSize));
#endif
CallCFunction(FUNCTION_ADDR(&CheckStackGuardState), num_arguments);
}
// Helper function for reading a value out of a stack frame.
template <typename T>
static T& frame_entry(Address re_frame, int frame_offset) {
return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset));
}
int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
Code* re_code,
Address re_frame) {
if (StackGuard::IsStackOverflow()) {
Top::StackOverflow();
return EXCEPTION;
}
// If not real stack overflow the stack guard was used to interrupt
// execution for another purpose.
// Prepare for possible GC.
HandleScope handles;
Handle<Code> code_handle(re_code);
Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
// Current string.
bool is_ascii = subject->IsAsciiRepresentation();
ASSERT(re_code->instruction_start() <= *return_address);
ASSERT(*return_address <=
re_code->instruction_start() + re_code->instruction_size());
Object* result = Execution::HandleStackGuardInterrupt();
if (*code_handle != re_code) { // Return address no longer valid
intptr_t delta = *code_handle - re_code;
// Overwrite the return address on the stack.
*return_address += delta;
}
if (result->IsException()) {
return EXCEPTION;
}
// String might have changed.
if (subject->IsAsciiRepresentation() != is_ascii) {
// If we changed between an ASCII and an UC16 string, the specialized
// code cannot be used, and we need to restart regexp matching from
// scratch (including, potentially, compiling a new version of the code).
return RETRY;
}
// Otherwise, the content of the string might have moved. It must still
// be a sequential or external string with the same content.
// Update the start and end pointers in the stack frame to the current
// location (whether it has actually moved or not).
ASSERT(StringShape(*subject).IsSequential() ||
StringShape(*subject).IsExternal());
// The original start address of the characters to match.
const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart);
// Find the current start address of the same character at the current string
// position.
int start_index = frame_entry<int>(re_frame, kStartIndex);
const byte* new_address = StringCharacterPosition(*subject, start_index);
if (start_address != new_address) {
// If there is a difference, update the object pointer and start and end
// addresses in the RegExp stack frame to match the new value.
const byte* end_address = frame_entry<const byte* >(re_frame, kInputEnd);
int byte_length = end_address - start_address;
frame_entry<const String*>(re_frame, kInputString) = *subject;
frame_entry<const byte*>(re_frame, kInputStart) = new_address;
frame_entry<const byte*>(re_frame, kInputEnd) = new_address + byte_length;
}
return 0;
}
Address RegExpMacroAssemblerX64::GrowStack(Address stack_pointer,
Address* stack_base) {
size_t size = RegExpStack::stack_capacity();
Address old_stack_base = RegExpStack::stack_base();
ASSERT(old_stack_base == *stack_base);
ASSERT(stack_pointer <= old_stack_base);
ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
Address new_stack_base = RegExpStack::EnsureCapacity(size * 2);
if (new_stack_base == NULL) {
return NULL;
}
*stack_base = new_stack_base;
intptr_t stack_content_size = old_stack_base - stack_pointer;
return new_stack_base - stack_content_size;
}
Operand RegExpMacroAssemblerX64::register_location(int register_index) {
ASSERT(register_index < (1<<30));
if (num_registers_ <= register_index) {
num_registers_ = register_index + 1;
}
return Operand(rbp, kRegisterZero - register_index * kPointerSize);
}
void RegExpMacroAssemblerX64::CheckPosition(int cp_offset,
Label* on_outside_input) {
__ cmpl(rdi, Immediate(-cp_offset * char_size()));
BranchOrBacktrack(greater_equal, on_outside_input);
}
void RegExpMacroAssemblerX64::BranchOrBacktrack(Condition condition,
Label* to) {
if (condition < 0) { // No condition
if (to == NULL) {
Backtrack();
return;
}
__ jmp(to);
return;
}
if (to == NULL) {
__ j(condition, &backtrack_label_);
return;
}
__ j(condition, to);
}
void RegExpMacroAssemblerX64::SafeCall(Label* to) {
__ call(to);
}
void RegExpMacroAssemblerX64::SafeCallTarget(Label* label) {
__ bind(label);
__ subq(Operand(rsp, 0), code_object_pointer());
}
void RegExpMacroAssemblerX64::SafeReturn() {
__ addq(Operand(rsp, 0), code_object_pointer());
__ ret(0);
}
void RegExpMacroAssemblerX64::Push(Register source) {
ASSERT(!source.is(backtrack_stackpointer()));
// Notice: This updates flags, unlike normal Push.
__ subq(backtrack_stackpointer(), Immediate(kIntSize));
__ movl(Operand(backtrack_stackpointer(), 0), source);
}
void RegExpMacroAssemblerX64::Push(Immediate value) {
// Notice: This updates flags, unlike normal Push.
__ subq(backtrack_stackpointer(), Immediate(kIntSize));
__ movl(Operand(backtrack_stackpointer(), 0), value);
}
void RegExpMacroAssemblerX64::FixupCodeRelativePositions() {
for (int i = 0, n = code_relative_fixup_positions_.length(); i < n; i++) {
int position = code_relative_fixup_positions_[i];
// The position succeeds a relative label offset from position.
// Patch the relative offset to be relative to the Code object pointer
// instead.
int patch_position = position - kIntSize;
int offset = masm_->long_at(patch_position);
masm_->long_at_put(patch_position,
offset
+ position
+ Code::kHeaderSize
- kHeapObjectTag);
}
code_relative_fixup_positions_.Clear();
}
void RegExpMacroAssemblerX64::Push(Label* backtrack_target) {
__ subq(backtrack_stackpointer(), Immediate(kIntSize));
__ movl(Operand(backtrack_stackpointer(), 0), backtrack_target);
MarkPositionForCodeRelativeFixup();
}
void RegExpMacroAssemblerX64::Pop(Register target) {
ASSERT(!target.is(backtrack_stackpointer()));
__ movsxlq(target, Operand(backtrack_stackpointer(), 0));
// Notice: This updates flags, unlike normal Pop.
__ addq(backtrack_stackpointer(), Immediate(kIntSize));
}
void RegExpMacroAssemblerX64::Drop() {
__ addq(backtrack_stackpointer(), Immediate(kIntSize));
}
void RegExpMacroAssemblerX64::CheckPreemption() {
// Check for preemption.
Label no_preempt;
ExternalReference stack_guard_limit =
ExternalReference::address_of_stack_guard_limit();
__ load_rax(stack_guard_limit);
__ cmpq(rsp, rax);
__ j(above, &no_preempt);
SafeCall(&check_preempt_label_);
__ bind(&no_preempt);
}
void RegExpMacroAssemblerX64::CheckStackLimit() {
if (FLAG_check_stack) {
Label no_stack_overflow;
ExternalReference stack_limit =
ExternalReference::address_of_regexp_stack_limit();
__ load_rax(stack_limit);
__ cmpq(backtrack_stackpointer(), rax);
__ j(above, &no_stack_overflow);
SafeCall(&stack_overflow_label_);
__ bind(&no_stack_overflow);
}
}
void RegExpMacroAssemblerX64::FrameAlign(int num_arguments) {
// TODO(lrn): Since we no longer use the system stack arbitrarily (but we do
// use it, e.g., for SafeCall), we know the number of elements on the stack
// since the last frame alignment. We might be able to do this simpler then.
int frameAlignment = OS::ActivationFrameAlignment();
ASSERT(frameAlignment != 0);
// Make stack end at alignment and make room for num_arguments pointers
// (on Win64 only) and the original value of rsp.
__ movq(kScratchRegister, rsp);
ASSERT(IsPowerOf2(frameAlignment));
#ifdef __MSVC__
// Allocate space for parameters and old rsp.
__ subq(rsp, Immediate((num_arguments + 1) * kPointerSize));
__ and_(rsp, -frameAlignment);
__ movq(Operand(rsp, num_arguments * kPointerSize), kScratchRegister);
#else
// Allocate space for old rsp.
__ subq(rsp, Immediate(kPointerSize));
__ and_(rsp, Immediate(-frameAlignment));
__ movq(Operand(rsp, 0), kScratchRegister);
#endif
}
void RegExpMacroAssemblerX64::CallCFunction(Address function_address,
int num_arguments) {
// Don't compile regexps with serialization enabled. The addresses of the C++
// function being called isn't relocatable.
ASSERT(!Serializer::enabled());
__ movq(rax, reinterpret_cast<intptr_t>(function_address), RelocInfo::NONE);
__ call(rax);
ASSERT(OS::ActivationFrameAlignment() != 0);
#ifdef __MSVC__
__ movq(rsp, Operand(rsp, num_arguments * kPointerSize));
#else
__ pop(rsp);
#endif
}
void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset,
int characters) {
if (mode_ == ASCII) {
if (characters == 4) {
__ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
} else if (characters == 2) {
__ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
} else {
ASSERT(characters == 1);
__ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
}
} else {
ASSERT(mode_ == UC16);
if (characters == 2) {
__ movl(current_character(),
Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));
} else {
ASSERT(characters == 1);
__ movzxwl(current_character(),
Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));
}
}
}
#undef __
}} // namespace v8::internal
...@@ -25,3 +25,271 @@ ...@@ -25,3 +25,271 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef V8_X64_REGEXP_MACRO_ASSEMBLER_X64_H_
#define V8_X64_REGEXP_MACRO_ASSEMBLER_X64_H_
namespace v8 {
namespace internal {
class RegExpMacroAssemblerX64: public NativeRegExpMacroAssembler {
public:
RegExpMacroAssemblerX64(Mode mode, int registers_to_save);
virtual ~RegExpMacroAssemblerX64();
virtual int stack_limit_slack();
virtual void AdvanceCurrentPosition(int by);
virtual void AdvanceRegister(int reg, int by);
virtual void Backtrack();
virtual void Bind(Label* label);
virtual void CheckAtStart(Label* on_at_start);
virtual void CheckCharacter(uint32_t c, Label* on_equal);
virtual void CheckCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string);
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
uc16 minus,
uc16 mask,
Label* on_not_equal);
// Checks whether the given offset from the current position is before
// the end of the string.
virtual void CheckPosition(int cp_offset, Label* on_outside_input);
virtual bool CheckSpecialCharacterClass(uc16 type,
int cp_offset,
bool check_offset,
Label* on_no_match);
virtual void Fail();
virtual Handle<Object> GetCode(Handle<String> source);
virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual void IfRegisterEqPos(int reg, Label* if_eq);
virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
bool check_bounds = true,
int characters = 1);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
virtual void PushCurrentPosition();
virtual void PushRegister(int register_index,
StackCheckFlag check_stack_limit);
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
static Result Match(Handle<Code> regexp,
Handle<String> subject,
int* offsets_vector,
int offsets_vector_length,
int previous_index);
static Result Execute(Code* code,
String* input,
int start_offset,
const byte* input_start,
const byte* input_end,
int* output,
bool at_start);
private:
// Offsets from rbp of function parameters and stored registers.
static const int kFramePointer = 0;
// Above the frame pointer - function parameters and return address.
static const int kReturn_eip = kFramePointer + kPointerSize;
static const int kFrameAlign = kReturn_eip + kPointerSize;
#ifdef __MSVC__
// Parameters (first four passed as registers, but with room on stack).
// In Microsoft 64-bit Calling Convention, there is room on the callers
// stack (before the return address) to spill parameter registers. We
// use this space to store the register passed parameters.
static const int kInputString = kFrameAlign;
static const int kStartIndex = kInputString + kPointerSize;
static const int kInputStart = kStartIndex + kPointerSize;
static const int kInputEnd = kInputStart + kPointerSize;
static const int kRegisterOutput = kInputEnd + kPointerSize;
static const int kAtStart = kRegisterOutput + kPointerSize;
static const int kStackHighEnd = kAtStart + kPointerSize;
#else
// In AMD64 ABI Calling Convention, the first six integer parameters
// are passed as registers, and caller must allocate space on the stack
// if it wants them stored. We push the parameters after the frame pointer.
static const int kInputString = kFramePointer - kPointerSize;
static const int kStartIndex = kInputString - kPointerSize;
static const int kInputStart = kStartIndex - kPointerSize;
static const int kInputEnd = kInputStart - kPointerSize;
static const int kRegisterOutput = kInputEnd - kPointerSize;
static const int kAtStart = kRegisterOutput - kPointerSize;
static const int kStackHighEnd = kFrameAlign;
#endif
#ifdef __MSVC__
// Microsoft calling convention has three callee-saved registers
// (that we are using). We push these after the frame pointer.
static const int kBackup_rsi = kFramePointer - kPointerSize;
static const int kBackup_rdi = kBackup_rsi - kPointerSize;
static const int kBackup_rbx = kBackup_rdi - kPointerSize;
static const int kLastCalleeSaveRegister = kBackup_rbx;
#else
// AMD64 Calling Convention has only one callee-save register that
// we use. We push this after the frame pointer (and after the
// parameters).
static const int kBackup_rbx = kAtStart - kPointerSize;
static const int kLastCalleeSaveRegister = kBackup_rbx;
#endif
// When adding local variables remember to push space for them in
// the frame in GetCode.
static const int kInputStartMinusOne =
kLastCalleeSaveRegister - kPointerSize;
// First register address. Following registers are below it on the stack.
static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;
// Load a number of characters at the given offset from the
// current position, into the current-character register.
void LoadCurrentCharacterUnchecked(int cp_offset, int character_count);
// Check whether preemption has been requested.
void CheckPreemption();
// Check whether we are exceeding the stack limit on the backtrack stack.
void CheckStackLimit();
// Called from RegExp if the stack-guard is triggered.
// If the code object is relocated, the return address is fixed before
// returning.
static int CheckStackGuardState(Address* return_address,
Code* re_code,
Address re_frame);
// Generate a call to CheckStackGuardState.
void CallCheckStackGuardState();
// Called from RegExp if the backtrack stack limit is hit.
// Tries to expand the stack. Returns the new stack-pointer if
// successful, and updates the stack_top address, or returns 0 if unable
// to grow the stack.
// This function must not trigger a garbage collection.
static Address GrowStack(Address stack_pointer, Address* stack_top);
// The rbp-relative location of a regexp register.
Operand register_location(int register_index);
// The register containing the current character after LoadCurrentCharacter.
inline Register current_character() { return rdx; }
// The register containing the backtrack stack top. Provides a meaningful
// name to the register.
inline Register backtrack_stackpointer() { return rcx; }
// The registers containing a self pointer to this code's Code object.
inline Register code_object_pointer() { return r8; }
// Byte size of chars in the string to match (decided by the Mode argument)
inline int char_size() { return static_cast<int>(mode_); }
// Equivalent to a conditional branch to the label, unless the label
// is NULL, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Condition condition, Label* to);
void MarkPositionForCodeRelativeFixup() {
code_relative_fixup_positions_.Add(masm_->pc_offset());
}
void FixupCodeRelativePositions();
// Call and return internally in the generated code in a way that
// is GC-safe (i.e., doesn't leave absolute code addresses on the stack)
inline void SafeCall(Label* to);
inline void SafeCallTarget(Label* label);
inline void SafeReturn();
// Pushes the value of a register on the backtrack stack. Decrements the
// stack pointer (rcx) by a word size and stores the register's value there.
inline void Push(Register source);
// Pushes a value on the backtrack stack. Decrements the stack pointer (rcx)
// by a word size and stores the value there.
inline void Push(Immediate value);
// Pushes the Code object relative offset of a label on the backtrack stack
// (i.e., a backtrack target). Decrements the stack pointer (rcx)
// by a word size and stores the value there.
inline void Push(Label* label);
// Pops a value from the backtrack stack. Reads the word at the stack pointer
// (rcx) and increments it by a word size.
inline void Pop(Register target);
// Drops the top value from the backtrack stack without reading it.
// Increments the stack pointer (rcx) by a word size.
inline void Drop();
// Before calling a C-function from generated code, align arguments on stack.
// After aligning the frame, arguments must be stored in esp[0], esp[4],
// etc., not pushed. The argument count assumes all arguments are word sized.
// Some compilers/platforms require the stack to be aligned when calling
// C++ code.
// Needs a scratch register to do some arithmetic. This register will be
// trashed.
inline void FrameAlign(int num_arguments);
// Calls a C function and cleans up the space for arguments allocated
// by FrameAlign. The called function is not allowed to trigger a garbage
// collection, since that might move the code and invalidate the return
// address (unless this is somehow accounted for by the called function).
inline void CallCFunction(Address function_address, int num_arguments);
MacroAssembler* masm_;
ZoneList<int> code_relative_fixup_positions_;
// Which mode to generate code for (ASCII or UC16).
Mode mode_;
// One greater than maximal register index actually used.
int num_registers_;
// Number of registers to output at the end (the saved registers
// are always 0..num_saved_registers_-1)
int num_saved_registers_;
// Labels used internally.
Label entry_label_;
Label start_label_;
Label success_label_;
Label backtrack_label_;
Label exit_label_;
Label check_preempt_label_;
Label stack_overflow_label_;
};
}} // namespace v8::internal
#endif // V8_X64_REGEXP_MACRO_ASSEMBLER_X64_H_
...@@ -113,6 +113,7 @@ test-debug/DebuggerUnload: CRASH || FAIL ...@@ -113,6 +113,7 @@ test-debug/DebuggerUnload: CRASH || FAIL
test-debug/DebuggerHostDispatch: CRASH || FAIL test-debug/DebuggerHostDispatch: CRASH || FAIL
test-debug/DebugBreakInMessageHandler: CRASH || FAIL test-debug/DebugBreakInMessageHandler: CRASH || FAIL
test-debug/NoDebugBreakInAfterCompileMessageHandler: CRASH || FAIL test-debug/NoDebugBreakInAfterCompileMessageHandler: CRASH || FAIL
test-debug/RegExpDebugBreak: FAIL
test-api/Threading: CRASH || FAIL test-api/Threading: CRASH || FAIL
test-api/Threading2: PASS || TIMEOUT test-api/Threading2: PASS || TIMEOUT
test-api/TryCatchSourceInfo: CRASH || FAIL test-api/TryCatchSourceInfo: CRASH || FAIL
......
...@@ -38,18 +38,21 @@ ...@@ -38,18 +38,21 @@
#include "jsregexp.h" #include "jsregexp.h"
#include "regexp-macro-assembler.h" #include "regexp-macro-assembler.h"
#include "regexp-macro-assembler-irregexp.h" #include "regexp-macro-assembler-irregexp.h"
#ifdef V8_NATIVE_REGEXP
#ifdef V8_TARGET_ARCH_ARM #ifdef V8_TARGET_ARCH_ARM
#include "arm/regexp-macro-assembler-arm.h" #include "arm/regexp-macro-assembler-arm.h"
#endif #endif
#ifdef V8_TARGET_ARCH_X64 #ifdef V8_TARGET_ARCH_X64
// No X64-implementation yet. #include "x64/macro-assembler-x64.h"
#include "x64/regexp-macro-assembler-x64.h"
#endif #endif
#ifdef V8_TARGET_ARCH_IA32 #ifdef V8_TARGET_ARCH_IA32
#include "ia32/macro-assembler-ia32.h" #include "ia32/macro-assembler-ia32.h"
#include "ia32/regexp-macro-assembler-ia32.h" #include "ia32/regexp-macro-assembler-ia32.h"
#endif #endif
#else
#include "interpreter-irregexp.h" #include "interpreter-irregexp.h"
#endif
using namespace v8::internal; using namespace v8::internal;
...@@ -599,75 +602,20 @@ TEST(DispatchTableConstruction) { ...@@ -599,75 +602,20 @@ TEST(DispatchTableConstruction) {
// Tests of interpreter. // Tests of interpreter.
TEST(MacroAssembler) {
V8::Initialize(NULL);
byte codes[1024];
RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024));
// ^f(o)o.
Label fail, fail2, start;
uc16 foo_chars[3];
foo_chars[0] = 'f';
foo_chars[1] = 'o';
foo_chars[2] = 'o';
Vector<const uc16> foo(foo_chars, 3);
m.SetRegister(4, 42);
m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
m.AdvanceRegister(4, 42);
m.GoTo(&start);
m.Fail();
m.Bind(&start);
m.PushBacktrack(&fail2);
m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0, 0);
m.PushCurrentPosition();
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1, 0);
m.PopCurrentPosition();
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(2, 0);
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(3, 0);
m.Succeed();
m.Bind(&fail);
m.Backtrack();
m.Succeed();
m.Bind(&fail2);
m.PopRegister(0);
m.Fail();
v8::HandleScope scope;
Handle<String> source = Factory::NewStringFromAscii(CStrVector("^f(o)o"));
Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
int captures[5];
const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
Handle<String> f1_16 =
Factory::NewStringFromTwoByte(Vector<const uc16>(str1, 6));
CHECK(IrregexpInterpreter::Match(array, f1_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]);
CHECK_EQ(1, captures[2]);
CHECK_EQ(2, captures[3]);
CHECK_EQ(84, captures[4]);
const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
Handle<String> f2_16 =
Factory::NewStringFromTwoByte(Vector<const uc16>(str2, 6));
CHECK(!IrregexpInterpreter::Match(array, f2_16, captures, 0));
CHECK_EQ(42, captures[0]);
}
#ifdef V8_TARGET_ARCH_IA32 // IA32 Native Regexp only tests.
#ifdef V8_NATIVE_REGEXP #ifdef V8_NATIVE_REGEXP
#ifdef V8_TARGET_ARCH_IA32
typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
#endif
#ifdef V8_TARGET_ARCH_X64
typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
#endif
class ContextInitializer { class ContextInitializer {
public: public:
ContextInitializer() : env_(), scope_(), stack_guard_() { ContextInitializer()
: env_(), scope_(), zone_(DELETE_ON_EXIT), stack_guard_() {
env_ = v8::Context::New(); env_ = v8::Context::New();
env_->Enter(); env_->Enter();
} }
...@@ -678,18 +626,19 @@ class ContextInitializer { ...@@ -678,18 +626,19 @@ class ContextInitializer {
private: private:
v8::Persistent<v8::Context> env_; v8::Persistent<v8::Context> env_;
v8::HandleScope scope_; v8::HandleScope scope_;
v8::internal::ZoneScope zone_;
v8::internal::StackGuard stack_guard_; v8::internal::StackGuard stack_guard_;
}; };
static RegExpMacroAssemblerIA32::Result ExecuteIA32(Code* code, static ArchRegExpMacroAssembler::Result Execute(Code* code,
String* input, String* input,
int start_offset, int start_offset,
const byte* input_start, const byte* input_start,
const byte* input_end, const byte* input_end,
int* captures, int* captures,
bool at_start) { bool at_start) {
return RegExpMacroAssemblerIA32::Execute( return NativeRegExpMacroAssembler::Execute(
code, code,
input, input,
start_offset, start_offset,
...@@ -700,11 +649,11 @@ static RegExpMacroAssemblerIA32::Result ExecuteIA32(Code* code, ...@@ -700,11 +649,11 @@ static RegExpMacroAssemblerIA32::Result ExecuteIA32(Code* code,
} }
TEST(MacroAssemblerIA32Success) { TEST(MacroAssemblerNativeSuccess) {
v8::V8::Initialize(); v8::V8::Initialize();
ContextInitializer initializer; ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4); ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4);
m.Succeed(); m.Succeed();
...@@ -718,16 +667,16 @@ TEST(MacroAssemblerIA32Success) { ...@@ -718,16 +667,16 @@ TEST(MacroAssemblerIA32Success) {
const byte* start_adr = const byte* start_adr =
reinterpret_cast<const byte*>(seq_input->GetCharsAddress()); reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
RegExpMacroAssemblerIA32::Result result = NativeRegExpMacroAssembler::Result result =
ExecuteIA32(*code, Execute(*code,
*input, *input,
0, 0,
start_adr, start_adr,
start_adr + seq_input->length(), start_adr + seq_input->length(),
captures, captures,
true); true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result); CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(-1, captures[0]); CHECK_EQ(-1, captures[0]);
CHECK_EQ(-1, captures[1]); CHECK_EQ(-1, captures[1]);
CHECK_EQ(-1, captures[2]); CHECK_EQ(-1, captures[2]);
...@@ -735,11 +684,11 @@ TEST(MacroAssemblerIA32Success) { ...@@ -735,11 +684,11 @@ TEST(MacroAssemblerIA32Success) {
} }
TEST(MacroAssemblerIA32Simple) { TEST(MacroAssemblerNativeSimple) {
v8::V8::Initialize(); v8::V8::Initialize();
ContextInitializer initializer; ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4); ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4);
uc16 foo_chars[3] = {'f', 'o', 'o'}; uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3); Vector<const uc16> foo(foo_chars, 3);
...@@ -762,16 +711,16 @@ TEST(MacroAssemblerIA32Simple) { ...@@ -762,16 +711,16 @@ TEST(MacroAssemblerIA32Simple) {
Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input); Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
Address start_adr = seq_input->GetCharsAddress(); Address start_adr = seq_input->GetCharsAddress();
RegExpMacroAssemblerIA32::Result result = NativeRegExpMacroAssembler::Result result =
ExecuteIA32(*code, Execute(*code,
*input, *input,
0, 0,
start_adr, start_adr,
start_adr + input->length(), start_adr + input->length(),
captures, captures,
true); true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result); CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, captures[0]); CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]); CHECK_EQ(3, captures[1]);
CHECK_EQ(-1, captures[2]); CHECK_EQ(-1, captures[2]);
...@@ -781,23 +730,23 @@ TEST(MacroAssemblerIA32Simple) { ...@@ -781,23 +730,23 @@ TEST(MacroAssemblerIA32Simple) {
seq_input = Handle<SeqAsciiString>::cast(input); seq_input = Handle<SeqAsciiString>::cast(input);
start_adr = seq_input->GetCharsAddress(); start_adr = seq_input->GetCharsAddress();
result = ExecuteIA32(*code, result = Execute(*code,
*input, *input,
0, 0,
start_adr, start_adr,
start_adr + input->length(), start_adr + input->length(),
captures, captures,
true); true);
CHECK_EQ(RegExpMacroAssemblerIA32::FAILURE, result); CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
} }
TEST(MacroAssemblerIA32SimpleUC16) { TEST(MacroAssemblerNativeSimpleUC16) {
v8::V8::Initialize(); v8::V8::Initialize();
ContextInitializer initializer; ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::UC16, 4); ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4);
uc16 foo_chars[3] = {'f', 'o', 'o'}; uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3); Vector<const uc16> foo(foo_chars, 3);
...@@ -822,16 +771,16 @@ TEST(MacroAssemblerIA32SimpleUC16) { ...@@ -822,16 +771,16 @@ TEST(MacroAssemblerIA32SimpleUC16) {
Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input); Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
Address start_adr = seq_input->GetCharsAddress(); Address start_adr = seq_input->GetCharsAddress();
RegExpMacroAssemblerIA32::Result result = NativeRegExpMacroAssembler::Result result =
ExecuteIA32(*code, Execute(*code,
*input, *input,
0, 0,
start_adr, start_adr,
start_adr + input->length(), start_adr + input->length(),
captures, captures,
true); true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result); CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, captures[0]); CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]); CHECK_EQ(3, captures[1]);
CHECK_EQ(-1, captures[2]); CHECK_EQ(-1, captures[2]);
...@@ -842,23 +791,23 @@ TEST(MacroAssemblerIA32SimpleUC16) { ...@@ -842,23 +791,23 @@ TEST(MacroAssemblerIA32SimpleUC16) {
seq_input = Handle<SeqTwoByteString>::cast(input); seq_input = Handle<SeqTwoByteString>::cast(input);
start_adr = seq_input->GetCharsAddress(); start_adr = seq_input->GetCharsAddress();
result = ExecuteIA32(*code, result = Execute(*code,
*input, *input,
0, 0,
start_adr, start_adr,
start_adr + input->length() * 2, start_adr + input->length() * 2,
captures, captures,
true); true);
CHECK_EQ(RegExpMacroAssemblerIA32::FAILURE, result); CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
} }
TEST(MacroAssemblerIA32Backtrack) { TEST(MacroAssemblerNativeBacktrack) {
v8::V8::Initialize(); v8::V8::Initialize();
ContextInitializer initializer; ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0); ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0);
Label fail; Label fail;
Label backtrack; Label backtrack;
...@@ -879,24 +828,24 @@ TEST(MacroAssemblerIA32Backtrack) { ...@@ -879,24 +828,24 @@ TEST(MacroAssemblerIA32Backtrack) {
Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input); Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
Address start_adr = seq_input->GetCharsAddress(); Address start_adr = seq_input->GetCharsAddress();
RegExpMacroAssemblerIA32::Result result = NativeRegExpMacroAssembler::Result result =
ExecuteIA32(*code, Execute(*code,
*input, *input,
0, 0,
start_adr, start_adr,
start_adr + input->length(), start_adr + input->length(),
NULL, NULL,
true); true);
CHECK_EQ(RegExpMacroAssemblerIA32::FAILURE, result); CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
} }
TEST(MacroAssemblerIA32BackReferenceASCII) { TEST(MacroAssemblerNativeBackReferenceASCII) {
v8::V8::Initialize(); v8::V8::Initialize();
ContextInitializer initializer; ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 3); ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 3);
m.WriteCurrentPositionToRegister(0, 0); m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(2); m.AdvanceCurrentPosition(2);
...@@ -922,27 +871,27 @@ TEST(MacroAssemblerIA32BackReferenceASCII) { ...@@ -922,27 +871,27 @@ TEST(MacroAssemblerIA32BackReferenceASCII) {
Address start_adr = seq_input->GetCharsAddress(); Address start_adr = seq_input->GetCharsAddress();
int output[3]; int output[3];
RegExpMacroAssemblerIA32::Result result = NativeRegExpMacroAssembler::Result result =
ExecuteIA32(*code, Execute(*code,
*input, *input,
0, 0,
start_adr, start_adr,
start_adr + input->length(), start_adr + input->length(),
output, output,
true); true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result); CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, output[0]); CHECK_EQ(0, output[0]);
CHECK_EQ(2, output[1]); CHECK_EQ(2, output[1]);
CHECK_EQ(6, output[2]); CHECK_EQ(6, output[2]);
} }
TEST(MacroAssemblerIA32BackReferenceUC16) { TEST(MacroAssemblerNativeBackReferenceUC16) {
v8::V8::Initialize(); v8::V8::Initialize();
ContextInitializer initializer; ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::UC16, 3); ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 3);
m.WriteCurrentPositionToRegister(0, 0); m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(2); m.AdvanceCurrentPosition(2);
...@@ -970,8 +919,8 @@ TEST(MacroAssemblerIA32BackReferenceUC16) { ...@@ -970,8 +919,8 @@ TEST(MacroAssemblerIA32BackReferenceUC16) {
Address start_adr = seq_input->GetCharsAddress(); Address start_adr = seq_input->GetCharsAddress();
int output[3]; int output[3];
RegExpMacroAssemblerIA32::Result result = NativeRegExpMacroAssembler::Result result =
ExecuteIA32(*code, Execute(*code,
*input, *input,
0, 0,
start_adr, start_adr,
...@@ -979,7 +928,7 @@ TEST(MacroAssemblerIA32BackReferenceUC16) { ...@@ -979,7 +928,7 @@ TEST(MacroAssemblerIA32BackReferenceUC16) {
output, output,
true); true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result); CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, output[0]); CHECK_EQ(0, output[0]);
CHECK_EQ(2, output[1]); CHECK_EQ(2, output[1]);
CHECK_EQ(6, output[2]); CHECK_EQ(6, output[2]);
...@@ -987,11 +936,11 @@ TEST(MacroAssemblerIA32BackReferenceUC16) { ...@@ -987,11 +936,11 @@ TEST(MacroAssemblerIA32BackReferenceUC16) {
TEST(MacroAssemblerIA32AtStart) { TEST(MacroAssemblernativeAtStart) {
v8::V8::Initialize(); v8::V8::Initialize();
ContextInitializer initializer; ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0); ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0);
Label not_at_start, newline, fail; Label not_at_start, newline, fail;
m.CheckNotAtStart(&not_at_start); m.CheckNotAtStart(&not_at_start);
...@@ -1022,34 +971,34 @@ TEST(MacroAssemblerIA32AtStart) { ...@@ -1022,34 +971,34 @@ TEST(MacroAssemblerIA32AtStart) {
Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input); Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
Address start_adr = seq_input->GetCharsAddress(); Address start_adr = seq_input->GetCharsAddress();
RegExpMacroAssemblerIA32::Result result = NativeRegExpMacroAssembler::Result result =
ExecuteIA32(*code, Execute(*code,
*input, *input,
0, 0,
start_adr, start_adr,
start_adr + input->length(), start_adr + input->length(),
NULL, NULL,
true); true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result); CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
result = ExecuteIA32(*code, result = Execute(*code,
*input, *input,
3, 3,
start_adr + 3, start_adr + 3,
start_adr + input->length(), start_adr + input->length(),
NULL, NULL,
false); false);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result); CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
} }
TEST(MacroAssemblerIA32BackRefNoCase) { TEST(MacroAssemblerNativeBackRefNoCase) {
v8::V8::Initialize(); v8::V8::Initialize();
ContextInitializer initializer; ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4); ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4);
Label fail, succ; Label fail, succ;
...@@ -1084,16 +1033,16 @@ TEST(MacroAssemblerIA32BackRefNoCase) { ...@@ -1084,16 +1033,16 @@ TEST(MacroAssemblerIA32BackRefNoCase) {
Address start_adr = seq_input->GetCharsAddress(); Address start_adr = seq_input->GetCharsAddress();
int output[4]; int output[4];
RegExpMacroAssemblerIA32::Result result = NativeRegExpMacroAssembler::Result result =
ExecuteIA32(*code, Execute(*code,
*input, *input,
0, 0,
start_adr, start_adr,
start_adr + input->length(), start_adr + input->length(),
output, output,
true); true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result); CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, output[0]); CHECK_EQ(0, output[0]);
CHECK_EQ(12, output[1]); CHECK_EQ(12, output[1]);
CHECK_EQ(0, output[2]); CHECK_EQ(0, output[2]);
...@@ -1102,11 +1051,11 @@ TEST(MacroAssemblerIA32BackRefNoCase) { ...@@ -1102,11 +1051,11 @@ TEST(MacroAssemblerIA32BackRefNoCase) {
TEST(MacroAssemblerIA32Registers) { TEST(MacroAssemblerNativeRegisters) {
v8::V8::Initialize(); v8::V8::Initialize();
ContextInitializer initializer; ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 5); ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 5);
uc16 foo_chars[3] = {'f', 'o', 'o'}; uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3); Vector<const uc16> foo(foo_chars, 3);
...@@ -1184,8 +1133,8 @@ TEST(MacroAssemblerIA32Registers) { ...@@ -1184,8 +1133,8 @@ TEST(MacroAssemblerIA32Registers) {
Address start_adr = seq_input->GetCharsAddress(); Address start_adr = seq_input->GetCharsAddress();
int output[5]; int output[5];
RegExpMacroAssemblerIA32::Result result = NativeRegExpMacroAssembler::Result result =
ExecuteIA32(*code, Execute(*code,
*input, *input,
0, 0,
start_adr, start_adr,
...@@ -1193,7 +1142,7 @@ TEST(MacroAssemblerIA32Registers) { ...@@ -1193,7 +1142,7 @@ TEST(MacroAssemblerIA32Registers) {
output, output,
true); true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result); CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, output[0]); CHECK_EQ(0, output[0]);
CHECK_EQ(3, output[1]); CHECK_EQ(3, output[1]);
CHECK_EQ(6, output[2]); CHECK_EQ(6, output[2]);
...@@ -1202,11 +1151,11 @@ TEST(MacroAssemblerIA32Registers) { ...@@ -1202,11 +1151,11 @@ TEST(MacroAssemblerIA32Registers) {
} }
TEST(MacroAssemblerIA32StackOverflow) { TEST(MacroAssemblerStackOverflow) {
v8::V8::Initialize(); v8::V8::Initialize();
ContextInitializer initializer; ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0); ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0);
Label loop; Label loop;
m.Bind(&loop); m.Bind(&loop);
...@@ -1224,26 +1173,26 @@ TEST(MacroAssemblerIA32StackOverflow) { ...@@ -1224,26 +1173,26 @@ TEST(MacroAssemblerIA32StackOverflow) {
Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input); Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
Address start_adr = seq_input->GetCharsAddress(); Address start_adr = seq_input->GetCharsAddress();
RegExpMacroAssemblerIA32::Result result = NativeRegExpMacroAssembler::Result result =
ExecuteIA32(*code, Execute(*code,
*input, *input,
0, 0,
start_adr, start_adr,
start_adr + input->length(), start_adr + input->length(),
NULL, NULL,
true); true);
CHECK_EQ(RegExpMacroAssemblerIA32::EXCEPTION, result); CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
CHECK(Top::has_pending_exception()); CHECK(Top::has_pending_exception());
Top::clear_pending_exception(); Top::clear_pending_exception();
} }
TEST(MacroAssemblerIA32LotsOfRegisters) { TEST(MacroAssemblerNativeLotsOfRegisters) {
v8::V8::Initialize(); v8::V8::Initialize();
ContextInitializer initializer; ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 2); ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 2);
// At least 2048, to ensure the allocated space for registers // At least 2048, to ensure the allocated space for registers
// span one full page. // span one full page.
...@@ -1270,24 +1219,88 @@ TEST(MacroAssemblerIA32LotsOfRegisters) { ...@@ -1270,24 +1219,88 @@ TEST(MacroAssemblerIA32LotsOfRegisters) {
Address start_adr = seq_input->GetCharsAddress(); Address start_adr = seq_input->GetCharsAddress();
int captures[2]; int captures[2];
RegExpMacroAssemblerIA32::Result result = NativeRegExpMacroAssembler::Result result =
ExecuteIA32(*code, Execute(*code,
*input, *input,
0, 0,
start_adr, start_adr,
start_adr + input->length(), start_adr + input->length(),
captures, captures,
true); true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result); CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, captures[0]); CHECK_EQ(0, captures[0]);
CHECK_EQ(42, captures[1]); CHECK_EQ(42, captures[1]);
Top::clear_pending_exception(); Top::clear_pending_exception();
} }
#endif // V8_REGEXP_NATIVE #else // ! V8_REGEX_NATIVE
#endif // V8_TARGET_ARCH_IA32
TEST(MacroAssembler) {
V8::Initialize(NULL);
byte codes[1024];
RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024));
// ^f(o)o.
Label fail, fail2, start;
uc16 foo_chars[3];
foo_chars[0] = 'f';
foo_chars[1] = 'o';
foo_chars[2] = 'o';
Vector<const uc16> foo(foo_chars, 3);
m.SetRegister(4, 42);
m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
m.AdvanceRegister(4, 42);
m.GoTo(&start);
m.Fail();
m.Bind(&start);
m.PushBacktrack(&fail2);
m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0, 0);
m.PushCurrentPosition();
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1, 0);
m.PopCurrentPosition();
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(2, 0);
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(3, 0);
m.Succeed();
m.Bind(&fail);
m.Backtrack();
m.Succeed();
m.Bind(&fail2);
m.PopRegister(0);
m.Fail();
v8::HandleScope scope;
Handle<String> source = Factory::NewStringFromAscii(CStrVector("^f(o)o"));
Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
int captures[5];
const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
Handle<String> f1_16 =
Factory::NewStringFromTwoByte(Vector<const uc16>(str1, 6));
CHECK(IrregexpInterpreter::Match(array, f1_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]);
CHECK_EQ(1, captures[2]);
CHECK_EQ(2, captures[3]);
CHECK_EQ(84, captures[4]);
const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
Handle<String> f2_16 =
Factory::NewStringFromTwoByte(Vector<const uc16>(str2, 6));
CHECK(!IrregexpInterpreter::Match(array, f2_16, captures, 0));
CHECK_EQ(42, captures[0]);
}
#endif // ! V8_REGEXP_NATIVE
TEST(AddInverseToTable) { TEST(AddInverseToTable) {
......
...@@ -103,22 +103,24 @@ function testStrippedCustomError() { ...@@ -103,22 +103,24 @@ function testStrippedCustomError() {
// Utility function for testing that the expected strings occur // Utility function for testing that the expected strings occur
// in the stack trace produced when running the given function. // in the stack trace produced when running the given function.
function testTrace(fun, expected, unexpected) { function testTrace(name, fun, expected, unexpected) {
var threw = false; var threw = false;
try { try {
fun(); fun();
} catch (e) { } catch (e) {
for (var i = 0; i < expected.length; i++) { for (var i = 0; i < expected.length; i++) {
assertTrue(e.stack.indexOf(expected[i]) != -1); assertTrue(e.stack.indexOf(expected[i]) != -1,
name + " doesn't contain expected[" + i + "]");
} }
if (unexpected) { if (unexpected) {
for (var i = 0; i < unexpected.length; i++) { for (var i = 0; i < unexpected.length; i++) {
assertEquals(e.stack.indexOf(unexpected[i]), -1); assertEquals(e.stack.indexOf(unexpected[i]), -1,
name + " contains unexpected[" + i + "]");
} }
} }
threw = true; threw = true;
} }
assertTrue(threw); assertTrue(threw, name + " didn't throw");
} }
// Test that the error constructor is not shown in the trace // Test that the error constructor is not shown in the trace
...@@ -127,10 +129,11 @@ function testCallerCensorship() { ...@@ -127,10 +129,11 @@ function testCallerCensorship() {
try { try {
FAIL; FAIL;
} catch (e) { } catch (e) {
assertEquals(-1, e.stack.indexOf('at new ReferenceError')); assertEquals(-1, e.stack.indexOf('at new ReferenceError'),
"CallerCensorship contained new ReferenceError");
threw = true; threw = true;
} }
assertTrue(threw); assertTrue(threw, "CallerCensorship didn't throw");
} }
// Test that the explicit constructor call is shown in the trace // Test that the explicit constructor call is shown in the trace
...@@ -143,10 +146,11 @@ function testUnintendedCallerCensorship() { ...@@ -143,10 +146,11 @@ function testUnintendedCallerCensorship() {
} }
}); });
} catch (e) { } catch (e) {
assertTrue(e.stack.indexOf('at new ReferenceError') != -1); assertTrue(e.stack.indexOf('at new ReferenceError') != -1,
"UnintendedCallerCensorship didn't contain new ReferenceError");
threw = true; threw = true;
} }
assertTrue(threw); assertTrue(threw, "UnintendedCallerCensorship didn't throw");
} }
// If an error occurs while the stack trace is being formatted it should // If an error occurs while the stack trace is being formatted it should
...@@ -161,9 +165,10 @@ function testErrorsDuringFormatting() { ...@@ -161,9 +165,10 @@ function testErrorsDuringFormatting() {
n.foo(); n.foo();
} catch (e) { } catch (e) {
threw = true; threw = true;
assertTrue(e.stack.indexOf('<error: ReferenceError') != -1); assertTrue(e.stack.indexOf('<error: ReferenceError') != -1,
"ErrorsDuringFormatting didn't contain error: ReferenceError");
} }
assertTrue(threw); assertTrue(threw, "ErrorsDuringFormatting didn't throw");
threw = false; threw = false;
// Now we can't even format the message saying that we couldn't format // Now we can't even format the message saying that we couldn't format
// the stack frame. Put that in your pipe and smoke it! // the stack frame. Put that in your pipe and smoke it!
...@@ -172,26 +177,28 @@ function testErrorsDuringFormatting() { ...@@ -172,26 +177,28 @@ function testErrorsDuringFormatting() {
n.foo(); n.foo();
} catch (e) { } catch (e) {
threw = true; threw = true;
assertTrue(e.stack.indexOf('<error>') != -1); assertTrue(e.stack.indexOf('<error>') != -1,
"ErrorsDuringFormatting didn't contain <error>");
} }
assertTrue(threw); assertTrue(threw, "ErrorsDuringFormatting didnt' throw (2)");
} }
testTrace(testArrayNative, ["Array.map (native)"]);
testTrace(testNested, ["at one", "at two", "at three"]); testTrace("testArrayNative", testArrayNative, ["Array.map (native)"]);
testTrace(testMethodNameInference, ["at Foo.bar"]); testTrace("testNested", testNested, ["at one", "at two", "at three"]);
testTrace(testImplicitConversion, ["at Nirk.valueOf"]); testTrace("testMethodNameInference", testMethodNameInference, ["at Foo.bar"]);
testTrace(testEval, ["at Doo (eval at testEval"]); testTrace("testImplicitConversion", testImplicitConversion, ["at Nirk.valueOf"]);
testTrace(testNestedEval, ["eval at Inner (eval at Outer"]); testTrace("testEval", testEval, ["at Doo (eval at testEval"]);
testTrace(testValue, ["at Number.causeError"]); testTrace("testNestedEval", testNestedEval, ["eval at Inner (eval at Outer"]);
testTrace(testConstructor, ["new Plonk"]); testTrace("testValue", testValue, ["at Number.causeError"]);
testTrace(testRenamedMethod, ["Wookie.a$b$c$d [as d]"]); testTrace("testConstructor", testConstructor, ["new Plonk"]);
testTrace(testAnonymousMethod, ["Array.<anonymous>"]); testTrace("testRenamedMethod", testRenamedMethod, ["Wookie.a$b$c$d [as d]"]);
testTrace(testDefaultCustomError, ["hep-hey", "new CustomError"], testTrace("testAnonymousMethod", testAnonymousMethod, ["Array.<anonymous>"]);
testTrace("testDefaultCustomError", testDefaultCustomError,
["hep-hey", "new CustomError"],
["collectStackTrace"]); ["collectStackTrace"]);
testTrace(testStrippedCustomError, ["hep-hey"], ["new CustomError", testTrace("testStrippedCustomError", testStrippedCustomError, ["hep-hey"],
"collectStackTrace"]); ["new CustomError", "collectStackTrace"]);
testCallerCensorship(); testCallerCensorship();
testUnintendedCallerCensorship(); testUnintendedCallerCensorship();
testErrorsDuringFormatting(); testErrorsDuringFormatting();
...@@ -803,10 +803,3 @@ ecma/Expressions/11.7.3: SKIP ...@@ -803,10 +803,3 @@ ecma/Expressions/11.7.3: SKIP
ecma/Expressions/11.10-3: SKIP ecma/Expressions/11.10-3: SKIP
ecma/Expressions/11.7.1: SKIP ecma/Expressions/11.7.1: SKIP
ecma_3/RegExp/regress-209067: SKIP ecma_3/RegExp/regress-209067: SKIP
[ $ARCH == x64 ]
# Tests that fail on the 64-bit port. This section should be empty
# when the 64-bit port is fully debugged.
js1_2/regexp/regress-9141: FAIL
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
'gcc_version%': 'unknown', 'gcc_version%': 'unknown',
'target_arch%': 'ia32', 'target_arch%': 'ia32',
'v8_use_snapshot%': 'true', 'v8_use_snapshot%': 'true',
'v8_regexp%': 'native',
}, },
'includes': [ 'includes': [
'../../../build/common.gypi', '../../../build/common.gypi',
...@@ -55,6 +56,7 @@ ...@@ -55,6 +56,7 @@
['target_arch=="x64"', { ['target_arch=="x64"', {
'defines': [ 'defines': [
'V8_TARGET_ARCH_X64', 'V8_TARGET_ARCH_X64',
'V8_NATIVE_REGEXP',
], ],
}], }],
], ],
...@@ -428,14 +430,18 @@ ...@@ -428,14 +430,18 @@
'../../src/ia32/jump-target-ia32.cc', '../../src/ia32/jump-target-ia32.cc',
'../../src/ia32/macro-assembler-ia32.cc', '../../src/ia32/macro-assembler-ia32.cc',
'../../src/ia32/macro-assembler-ia32.h', '../../src/ia32/macro-assembler-ia32.h',
'../../src/ia32/regexp-macro-assembler-ia32.cc',
'../../src/ia32/regexp-macro-assembler-ia32.h',
'../../src/ia32/register-allocator-ia32.cc', '../../src/ia32/register-allocator-ia32.cc',
'../../src/ia32/stub-cache-ia32.cc', '../../src/ia32/stub-cache-ia32.cc',
'../../src/ia32/virtual-frame-ia32.cc', '../../src/ia32/virtual-frame-ia32.cc',
'../../src/ia32/virtual-frame-ia32.h', '../../src/ia32/virtual-frame-ia32.h',
], ],
}], }],
['target_arch=="x32" and v8_regexp=="native"', {
'sources': [
'../../src/ia32/regexp-macro-assembler-ia32.cc',
'../../src/ia32/regexp-macro-assembler-ia32.h',
],
}],
['target_arch=="x64"', { ['target_arch=="x64"', {
'include_dirs+': [ 'include_dirs+': [
'../../src/x64', '../../src/x64',
...@@ -457,14 +463,18 @@ ...@@ -457,14 +463,18 @@
'../../src/x64/jump-target-x64.cc', '../../src/x64/jump-target-x64.cc',
'../../src/x64/macro-assembler-x64.cc', '../../src/x64/macro-assembler-x64.cc',
'../../src/x64/macro-assembler-x64.h', '../../src/x64/macro-assembler-x64.h',
#'../../src/x64/regexp-macro-assembler-x64.cc',
#'../../src/x64/regexp-macro-assembler-x64.h',
'../../src/x64/register-allocator-x64.cc', '../../src/x64/register-allocator-x64.cc',
'../../src/x64/stub-cache-x64.cc', '../../src/x64/stub-cache-x64.cc',
'../../src/x64/virtual-frame-x64.cc', '../../src/x64/virtual-frame-x64.cc',
'../../src/x64/virtual-frame-x64.h', '../../src/x64/virtual-frame-x64.h',
], ],
}], }],
['target_arch=="x64" and v8_regexp=="native"', {
'sources': [
'../../src/x64/regexp-macro-assembler-x64.cc',
'../../src/x64/regexp-macro-assembler-x64.h',
],
}],
['OS=="linux"', { ['OS=="linux"', {
'link_settings': { 'link_settings': {
'libraries': [ 'libraries': [
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment