Commit 4254388c authored by lrn@chromium.org's avatar lrn@chromium.org

X64: Implement RegExp natively.

Review URL: http://codereview.chromium.org/165443


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@2688 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 7b056783
......@@ -101,6 +101,9 @@ LIBRARY_FLAGS = {
'regexp:native': {
'arch:ia32' : {
'CPPDEFINES': ['V8_NATIVE_REGEXP']
},
'arch:x64' : {
'CPPDEFINES': ['V8_NATIVE_REGEXP']
}
}
},
......@@ -716,7 +719,11 @@ class BuildContext(object):
result = []
result += source.get('all', [])
for (name, value) in self.options.iteritems():
result += source.get(name + ':' + value, [])
source_value = source.get(name + ':' + value, [])
if type(source_value) == dict:
result += self.GetRelevantSources(source_value)
else:
result += source_value
return sorted(result)
def AppendFlags(self, options, added):
......
......@@ -63,24 +63,32 @@ SOURCES = {
'arm/register-allocator-arm.cc', 'arm/stub-cache-arm.cc',
'arm/virtual-frame-arm.cc'
],
'arch:ia32': [
'ia32/assembler-ia32.cc', 'ia32/builtins-ia32.cc', 'ia32/cfg-ia32.cc',
'ia32/codegen-ia32.cc', 'ia32/cpu-ia32.cc', 'ia32/disasm-ia32.cc',
'ia32/debug-ia32.cc', 'ia32/frames-ia32.cc', 'ia32/ic-ia32.cc',
'ia32/jump-target-ia32.cc', 'ia32/macro-assembler-ia32.cc',
'ia32/regexp-macro-assembler-ia32.cc',
'ia32/register-allocator-ia32.cc', 'ia32/stub-cache-ia32.cc',
'ia32/virtual-frame-ia32.cc'
],
'arch:x64': [
'x64/assembler-x64.cc', 'x64/builtins-x64.cc', 'x64/cfg-x64.cc',
'x64/codegen-x64.cc', 'x64/cpu-x64.cc', 'x64/disasm-x64.cc',
'x64/debug-x64.cc', 'x64/frames-x64.cc', 'x64/ic-x64.cc',
'x64/jump-target-x64.cc', 'x64/macro-assembler-x64.cc',
# 'x64/regexp-macro-assembler-x64.cc',
'x64/register-allocator-x64.cc',
'x64/stub-cache-x64.cc', 'x64/virtual-frame-x64.cc'
],
'arch:ia32': {
'all': [
'ia32/assembler-ia32.cc', 'ia32/builtins-ia32.cc', 'ia32/cfg-ia32.cc',
'ia32/codegen-ia32.cc', 'ia32/cpu-ia32.cc', 'ia32/disasm-ia32.cc',
'ia32/debug-ia32.cc', 'ia32/frames-ia32.cc', 'ia32/ic-ia32.cc',
'ia32/jump-target-ia32.cc', 'ia32/macro-assembler-ia32.cc',
'ia32/register-allocator-ia32.cc', 'ia32/stub-cache-ia32.cc',
'ia32/virtual-frame-ia32.cc'
],
'regexp:native': [
'ia32/regexp-macro-assembler-ia32.cc',
]
},
'arch:x64': {
'all': [
'x64/assembler-x64.cc', 'x64/builtins-x64.cc', 'x64/cfg-x64.cc',
'x64/codegen-x64.cc', 'x64/cpu-x64.cc', 'x64/disasm-x64.cc',
'x64/debug-x64.cc', 'x64/frames-x64.cc', 'x64/ic-x64.cc',
'x64/jump-target-x64.cc', 'x64/macro-assembler-x64.cc',
'x64/register-allocator-x64.cc',
'x64/stub-cache-x64.cc', 'x64/virtual-frame-x64.cc'
],
'regexp:native': [
'x64/regexp-macro-assembler-x64.cc'
]
},
'simulator:arm': ['arm/simulator-arm.cc'],
'os:freebsd': ['platform-freebsd.cc', 'platform-posix.cc'],
'os:linux': ['platform-linux.cc', 'platform-posix.cc'],
......
......@@ -82,8 +82,8 @@ static void RecordWriteHelper(MacroAssembler* masm,
// page_start + kObjectStartOffset + objectSize
// where objectSize is FixedArray::kHeaderSize + kPointerSize * array_length.
// Add the delta between the end of the normal RSet and the start of the
// extra RSet to 'object', so that addressing the bit using 'pointer_offset'
// hits the extra RSet words.
// extra RSet to 'page_start', so that addressing the bit using
// 'pointer_offset' hits the extra RSet words.
masm->lea(page_start,
Operand(page_start, array_length, times_pointer_size,
Page::kObjectStartOffset + FixedArray::kHeaderSize
......
......@@ -54,7 +54,7 @@ namespace internal {
*
* Each call to a public method should retain this convention.
* The stack will have the following structure:
* - stack_area_top (High end of the memory area to use as
* - stack_area_base (High end of the memory area to use as
* backtracking stack)
* - at_start (if 1, start at start of string, if 0, don't)
* - int* capture_array (int[num_saved_registers_], for output).
......@@ -78,13 +78,13 @@ namespace internal {
* character of the string). The remaining registers starts out as garbage.
*
* The data up to the return address must be placed there by the calling
* code, e.g., by calling the code entry as cast to:
* code, by calling the code entry as cast to a function with the signature:
* int (*match)(String* input_string,
* Address start,
* Address end,
* int* capture_output_array,
* bool at_start,
* byte* stack_area_top)
* byte* stack_area_base)
*/
#define __ ACCESS_MASM(masm_)
......@@ -93,7 +93,6 @@ RegExpMacroAssemblerIA32::RegExpMacroAssemblerIA32(
Mode mode,
int registers_to_save)
: masm_(new MacroAssembler(NULL, kRegExpCodeSize)),
constants_(kRegExpConstantsSize),
mode_(mode),
num_registers_(registers_to_save),
num_saved_registers_(registers_to_save),
......@@ -156,13 +155,6 @@ void RegExpMacroAssemblerIA32::Bind(Label* label) {
}
void RegExpMacroAssemblerIA32::CheckBitmap(uc16 start,
Label* bitmap,
Label* on_zero) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIA32::CheckCharacter(uint32_t c, Label* on_equal) {
__ cmp(current_character(), c);
BranchOrBacktrack(equal, on_equal);
......@@ -217,15 +209,9 @@ void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
BranchOrBacktrack(greater, on_failure);
}
Label backtrack;
if (on_failure == NULL) {
// Avoid inlining the Backtrack macro for each test.
Label skip_backtrack;
__ jmp(&skip_backtrack);
__ bind(&backtrack);
Backtrack();
__ bind(&skip_backtrack);
on_failure = &backtrack;
// Instead of inlining a backtrack, (re)use the global backtrack target.
on_failure = &backtrack_label_;
}
for (int i = 0; i < str.length(); i++) {
......@@ -581,34 +567,6 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
}
}
void RegExpMacroAssemblerIA32::DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIA32::DispatchByteMap(
uc16 start,
Label* byte_map,
const Vector<Label*>& destinations) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIA32::DispatchHighByteMap(
byte start,
Label* byte_map,
const Vector<Label*>& destinations) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIA32::EmitOrLink(Label* label) {
UNIMPLEMENTED(); // Has no use.
}
void RegExpMacroAssemblerIA32::Fail() {
ASSERT(FAILURE == 0); // Return value for failure is zero.
......@@ -668,17 +626,17 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
__ mov(edi, Operand(ebp, kInputStart));
// Set up edi to be negative offset from string end.
__ sub(edi, Operand(esi));
if (num_saved_registers_ > 0) {
// Set eax to address of char before start of input
// (effectively string position -1).
__ lea(eax, Operand(edi, -char_size()));
// Store this value in a local variable, for use when clearing
// position registers.
__ mov(Operand(ebp, kInputStartMinusOne), eax);
if (num_saved_registers_ > 0) { // Always is, if generated from a regexp.
// Fill saved registers with initial value = start offset - 1
// Fill in stack push order, to avoid accessing across an unwritten
// page (a problem on Windows).
__ mov(ecx, kRegisterZero);
// Set eax to address of char before start of input
// (effectively string position -1).
__ lea(eax, Operand(edi, -char_size()));
// Store this value in a local variable, for use when clearing
// position registers.
__ mov(Operand(ebp, kInputStartMinusOne), eax);
Label init_loop;
__ bind(&init_loop);
__ mov(Operand(ebp, ecx, times_1, +0), eax);
......@@ -942,139 +900,8 @@ void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
}
RegExpMacroAssemblerIA32::Result RegExpMacroAssemblerIA32::Match(
Handle<Code> regexp_code,
Handle<String> subject,
int* offsets_vector,
int offsets_vector_length,
int previous_index) {
ASSERT(subject->IsFlat());
ASSERT(previous_index >= 0);
ASSERT(previous_index <= subject->length());
// No allocations before calling the regexp, but we can't use
// AssertNoAllocation, since regexps might be preempted, and another thread
// might do allocation anyway.
String* subject_ptr = *subject;
// Character offsets into string.
int start_offset = previous_index;
int end_offset = subject_ptr->length();
bool is_ascii = subject->IsAsciiRepresentation();
if (StringShape(subject_ptr).IsCons()) {
subject_ptr = ConsString::cast(subject_ptr)->first();
} else if (StringShape(subject_ptr).IsSliced()) {
SlicedString* slice = SlicedString::cast(subject_ptr);
start_offset += slice->start();
end_offset += slice->start();
subject_ptr = slice->buffer();
}
// Ensure that an underlying string has the same ascii-ness.
ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii);
ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
// String is now either Sequential or External
int char_size_shift = is_ascii ? 0 : 1;
int char_length = end_offset - start_offset;
const byte* input_start =
StringCharacterPosition(subject_ptr, start_offset);
int byte_length = char_length << char_size_shift;
const byte* input_end = input_start + byte_length;
RegExpMacroAssemblerIA32::Result res = Execute(*regexp_code,
subject_ptr,
start_offset,
input_start,
input_end,
offsets_vector,
previous_index == 0);
if (res == SUCCESS) {
// Capture values are relative to start_offset only.
// Convert them to be relative to start of string.
for (int i = 0; i < offsets_vector_length; i++) {
if (offsets_vector[i] >= 0) {
offsets_vector[i] += previous_index;
}
}
}
return res;
}
// Private methods:
static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
RegExpMacroAssemblerIA32::Result RegExpMacroAssemblerIA32::Execute(
Code* code,
String* input,
int start_offset,
const byte* input_start,
const byte* input_end,
int* output,
bool at_start) {
typedef int (*matcher)(String*, int, const byte*,
const byte*, int*, int, Address);
matcher matcher_func = FUNCTION_CAST<matcher>(code->entry());
int at_start_val = at_start ? 1 : 0;
// Ensure that the minimum stack has been allocated.
RegExpStack stack;
Address stack_top = RegExpStack::stack_top();
int result = matcher_func(input,
start_offset,
input_start,
input_end,
output,
at_start_val,
stack_top);
ASSERT(result <= SUCCESS);
ASSERT(result >= RETRY);
if (result == EXCEPTION && !Top::has_pending_exception()) {
// We detected a stack overflow (on the backtrack stack) in RegExp code,
// but haven't created the exception yet.
Top::StackOverflow();
}
return static_cast<Result>(result);
}
int RegExpMacroAssemblerIA32::CaseInsensitiveCompareUC16(Address byte_offset1,
Address byte_offset2,
size_t byte_length) {
// This function is not allowed to cause a garbage collection.
// A GC might move the calling generated code and invalidate the
// return address on the stack.
ASSERT(byte_length % 2 == 0);
uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
size_t length = byte_length >> 1;
for (size_t i = 0; i < length; i++) {
unibrow::uchar c1 = substring1[i];
unibrow::uchar c2 = substring2[i];
if (c1 != c2) {
unibrow::uchar s1[1] = { c1 };
canonicalize.get(c1, '\0', s1);
if (s1[0] != c2) {
unibrow::uchar s2[1] = { c2 };
canonicalize.get(c2, '\0', s2);
if (s1[0] != s2[0]) {
return 0;
}
}
}
}
return 1;
}
void RegExpMacroAssemblerIA32::CallCheckStackGuardState(Register scratch) {
int num_arguments = 3;
FrameAlign(num_arguments, scratch);
......@@ -1096,35 +923,6 @@ static T& frame_entry(Address re_frame, int frame_offset) {
}
const byte* RegExpMacroAssemblerIA32::StringCharacterPosition(String* subject,
int start_index) {
// Not just flat, but ultra flat.
ASSERT(subject->IsExternalString() || subject->IsSeqString());
ASSERT(start_index >= 0);
ASSERT(start_index <= subject->length());
if (subject->IsAsciiRepresentation()) {
const byte* address;
if (StringShape(subject).IsExternal()) {
const char* data = ExternalAsciiString::cast(subject)->resource()->data();
address = reinterpret_cast<const byte*>(data);
} else {
ASSERT(subject->IsSeqAsciiString());
char* data = SeqAsciiString::cast(subject)->GetChars();
address = reinterpret_cast<const byte*>(data);
}
return address + start_index;
}
const uc16* data;
if (StringShape(subject).IsExternal()) {
data = ExternalTwoByteString::cast(subject)->resource()->data();
} else {
ASSERT(subject->IsSeqTwoByteString());
data = SeqTwoByteString::cast(subject)->GetChars();
}
return reinterpret_cast<const byte*>(data + start_index);
}
int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
Code* re_code,
Address re_frame) {
......@@ -1198,18 +996,18 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
Address RegExpMacroAssemblerIA32::GrowStack(Address stack_pointer,
Address* stack_top) {
Address* stack_base) {
size_t size = RegExpStack::stack_capacity();
Address old_stack_top = RegExpStack::stack_top();
ASSERT(old_stack_top == *stack_top);
ASSERT(stack_pointer <= old_stack_top);
ASSERT(static_cast<size_t>(old_stack_top - stack_pointer) <= size);
Address new_stack_top = RegExpStack::EnsureCapacity(size * 2);
if (new_stack_top == NULL) {
Address old_stack_base = RegExpStack::stack_base();
ASSERT(old_stack_base == *stack_base);
ASSERT(stack_pointer <= old_stack_base);
ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
Address new_stack_base = RegExpStack::EnsureCapacity(size * 2);
if (new_stack_base == NULL) {
return NULL;
}
*stack_top = new_stack_top;
return new_stack_top - (old_stack_top - stack_pointer);
*stack_base = new_stack_base;
return new_stack_base - (old_stack_base - stack_pointer);
}
......@@ -1373,11 +1171,5 @@ void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset,
}
void RegExpMacroAssemblerIA32::LoadConstantBufferAddress(Register reg,
ArraySlice* buffer) {
__ mov(reg, buffer->array());
__ add(Operand(reg), Immediate(buffer->base_offset()));
}
#undef __
}} // namespace v8::internal
......@@ -31,21 +31,8 @@
namespace v8 {
namespace internal {
class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
class RegExpMacroAssemblerIA32: public NativeRegExpMacroAssembler {
public:
// Type of input string to generate code for.
enum Mode { ASCII = 1, UC16 = 2 };
// Result of calling the generated RegExp code:
// RETRY: Something significant changed during execution, and the matching
// should be retried from scratch.
// EXCEPTION: Something failed during execution. If no exception has been
// thrown, it's an internal out-of-memory, and the caller should
// throw the exception.
// FAILURE: Matching failed.
// SUCCESS: Matching succeeded, and the output array has been filled with
// capture positions.
enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
RegExpMacroAssemblerIA32(Mode mode, int registers_to_save);
virtual ~RegExpMacroAssemblerIA32();
virtual int stack_limit_slack();
......@@ -54,7 +41,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void Backtrack();
virtual void Bind(Label* label);
virtual void CheckAtStart(Label* on_at_start);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void CheckCharacter(uint32_t c, Label* on_equal);
virtual void CheckCharacterAfterAnd(uint32_t c,
uint32_t mask,
......@@ -88,16 +74,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
int cp_offset,
bool check_offset,
Label* on_no_match);
virtual void DispatchByteMap(uc16 start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void DispatchHalfNibbleMap(uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations);
virtual void DispatchHighByteMap(byte start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void EmitOrLink(Label* label);
virtual void Fail();
virtual Handle<Object> GetCode(Handle<String> source);
virtual void GoTo(Label* label);
......@@ -123,20 +99,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
static Result Match(Handle<Code> regexp,
Handle<String> subject,
int* offsets_vector,
int offsets_vector_length,
int previous_index);
static Result Execute(Code* code,
String* input,
int start_offset,
const byte* input_start,
const byte* input_end,
int* output,
bool at_start);
private:
// Offsets from ebp of function parameters and stored registers.
static const int kFramePointer = 0;
......@@ -163,16 +125,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;
// Initial size of constant buffers allocated during compilation.
static const int kRegExpConstantsSize = 256;
static const byte* StringCharacterPosition(String* subject, int start_index);
// Compares two-byte strings case insensitively.
// Called from generated RegExp code.
static int CaseInsensitiveCompareUC16(Address byte_offset1,
Address byte_offset2,
size_t byte_length);
// Load a number of characters at the given offset from the
// current position, into the current-character register.
......@@ -218,11 +170,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
// is NULL, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Condition condition, Label* to, Hint hint = no_hint);
// Load the address of a "constant buffer" (a slice of a byte array)
// into a register. The address is computed from the ByteArray* address
// and an offset. Uses no extra registers.
void LoadConstantBufferAddress(Register reg, ArraySlice* buffer);
// Call and return internally in the generated code in a way that
// is GC-safe (i.e., doesn't leave absolute code addresses on the stack)
inline void SafeCall(Label* to);
......@@ -258,10 +205,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
MacroAssembler* masm_;
// Constant buffer provider. Allocates external storage for storing
// constants.
ByteArrayProvider constants_;
// Which mode to generate code for (ASCII or UC16).
Mode mode_;
......
......@@ -43,6 +43,7 @@
#include "regexp-macro-assembler-irregexp.h"
#include "regexp-stack.h"
#ifdef V8_NATIVE_REGEXP
#if V8_TARGET_ARCH_IA32
#include "ia32/macro-assembler-ia32.h"
#include "ia32/regexp-macro-assembler-ia32.h"
......@@ -54,6 +55,7 @@
#else
#error Unsupported target architecture.
#endif
#endif
#include "interpreter-irregexp.h"
......@@ -270,10 +272,11 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
// If compilation fails, an exception is thrown and this function
// returns false.
bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) {
Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii));
#ifdef V8_NATIVE_REGEXP
if (re->DataAt(JSRegExp::code_index(is_ascii))->IsCode()) return true;
if (compiled_code->IsCode()) return true;
#else // ! V8_NATIVE_REGEXP (RegExp interpreter code)
if (re->DataAt(JSRegExp::code_index(is_ascii))->IsByteArray()) return true;
if (compiled_code->IsByteArray()) return true;
#endif
return CompileIrregexp(re, is_ascii);
}
......@@ -414,33 +417,36 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
// Dispatch to the correct RegExp implementation.
Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data()));
#ifdef V8_NATIVE_REGEXP
#if V8_TARGET_ARCH_IA32
#ifdef V8_TARGET_ARCH_ARM
UNIMPLEMENTED();
#else // Native regexp supported.
OffsetsVector captures(number_of_capture_registers);
int* captures_vector = captures.vector();
RegExpMacroAssemblerIA32::Result res;
NativeRegExpMacroAssembler::Result res;
do {
bool is_ascii = subject->IsAsciiRepresentation();
if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
return Handle<Object>::null();
}
Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii));
res = RegExpMacroAssemblerIA32::Match(code,
subject,
captures_vector,
captures.length(),
previous_index);
res = NativeRegExpMacroAssembler::Match(code,
subject,
captures_vector,
captures.length(),
previous_index);
// If result is RETRY, the string have changed representation, and we
// must restart from scratch.
} while (res == RegExpMacroAssemblerIA32::RETRY);
if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
} while (res == NativeRegExpMacroAssembler::RETRY);
if (res == NativeRegExpMacroAssembler::EXCEPTION) {
ASSERT(Top::has_pending_exception());
return Handle<Object>::null();
}
ASSERT(res == RegExpMacroAssemblerIA32::SUCCESS
|| res == RegExpMacroAssemblerIA32::FAILURE);
ASSERT(res == NativeRegExpMacroAssembler::SUCCESS
|| res == NativeRegExpMacroAssembler::FAILURE);
if (res != RegExpMacroAssemblerIA32::SUCCESS) return Factory::null_value();
if (res != NativeRegExpMacroAssembler::SUCCESS) return Factory::null_value();
array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements()));
ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
......@@ -449,10 +455,9 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
SetCapture(*array, i, captures_vector[i]);
SetCapture(*array, i + 1, captures_vector[i + 1]);
}
#else // !V8_TARGET_ARCH_IA32
UNREACHABLE();
#endif // V8_TARGET_ARCH_IA32
#else // !V8_NATIVE_REGEXP
#endif // Native regexp supported.
#else // ! V8_NATIVE_REGEXP
bool is_ascii = subject->IsAsciiRepresentation();
if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
return Handle<Object>::null();
......@@ -4457,38 +4462,36 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data,
NodeInfo info = *node->info();
// Create the correct assembler for the architecture.
#ifdef V8_NATIVE_REGEXP
#ifdef V8_TARGET_ARCH_ARM
// ARM native regexp not implemented yet.
UNREACHABLE();
#endif
#ifdef V8_TARGET_ARCH_X64
// X64 native regexp not implemented yet.
UNREACHABLE();
#endif
// Native regexp implementation.
NativeRegExpMacroAssembler::Mode mode =
is_ascii ? NativeRegExpMacroAssembler::ASCII
: NativeRegExpMacroAssembler::UC16;
#ifdef V8_TARGET_ARCH_IA32
RegExpMacroAssemblerIA32::Mode mode;
if (is_ascii) {
mode = RegExpMacroAssemblerIA32::ASCII;
} else {
mode = RegExpMacroAssemblerIA32::UC16;
}
RegExpMacroAssemblerIA32 macro_assembler(mode,
(data->capture_count + 1) * 2);
return compiler.Assemble(&macro_assembler,
node,
data->capture_count,
pattern);
#endif
#ifdef V8_TARGET_ARCH_X64
RegExpMacroAssemblerX64 macro_assembler(mode,
(data->capture_count + 1) * 2);
#endif
#ifdef V8_TARGET_ARCH_ARM
UNIMPLEMENTED();
#endif
#else // ! V8_NATIVE_REGEXP
// Interpreted regexp.
// Interpreted regexp implementation.
EmbeddedVector<byte, 1024> codes;
RegExpMacroAssemblerIrregexp macro_assembler(codes);
#endif
return compiler.Assemble(&macro_assembler,
node,
data->capture_count,
pattern);
#endif // V8_NATIVE_REGEXP
}
}} // namespace v8::internal
......@@ -375,37 +375,6 @@ void RegExpMacroAssemblerIrregexp::CheckNotRegistersEqual(int reg1,
}
void RegExpMacroAssemblerIrregexp::CheckBitmap(uc16 start,
Label* bitmap,
Label* on_zero) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIrregexp::DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& table) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIrregexp::DispatchByteMap(
uc16 start,
Label* byte_map,
const Vector<Label*>& table) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIrregexp::DispatchHighByteMap(
byte start,
Label* byte_map,
const Vector<Label*>& table) {
UNIMPLEMENTED();
}
void RegExpMacroAssemblerIrregexp::CheckCharacters(
Vector<const uc16> str,
int cp_offset,
......
......@@ -52,7 +52,6 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
// The byte-code interpreter checks on each push anyway.
virtual int stack_limit_slack() { return 1; }
virtual void Bind(Label* label);
virtual void EmitOrLink(Label* label);
virtual void AdvanceCurrentPosition(int by); // Signed cp change.
virtual void PopCurrentPosition();
virtual void PushCurrentPosition();
......@@ -100,16 +99,6 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
int cp_offset,
Label* on_failure,
bool check_end_of_string);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void DispatchHalfNibbleMap(uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations);
virtual void DispatchByteMap(uc16 start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void DispatchHighByteMap(byte start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void IfRegisterLT(int register_index, int comparand, Label* if_lt);
virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
virtual void IfRegisterEqPos(int register_index, Label* if_eq);
......@@ -119,6 +108,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
private:
void Expand();
// Code and bitmap emission.
inline void EmitOrLink(Label* label);
inline void Emit32(uint32_t x);
inline void Emit16(uint32_t x);
inline void Emit(uint32_t bc, uint32_t arg);
......
......@@ -53,12 +53,6 @@ void RegExpMacroAssemblerTracer::Bind(Label* label) {
}
void RegExpMacroAssemblerTracer::EmitOrLink(Label* label) {
PrintF(" EmitOrLink(label[%08x]);\n", label);
assembler_->EmitOrLink(label);
}
void RegExpMacroAssemblerTracer::AdvanceCurrentPosition(int by) {
PrintF(" AdvanceCurrentPosition(by=%d);\n", by);
assembler_->AdvanceCurrentPosition(by);
......@@ -311,13 +305,6 @@ void RegExpMacroAssemblerTracer::CheckCharacters(Vector<const uc16> str,
}
void RegExpMacroAssemblerTracer::CheckBitmap(uc16 start, Label* bitmap,
Label* on_zero) {
PrintF(" CheckBitmap(start=u%04x, <bitmap>, label[%08x]);\n", start, on_zero);
assembler_->CheckBitmap(start, bitmap, on_zero);
}
bool RegExpMacroAssemblerTracer::CheckSpecialCharacterClass(
uc16 type,
int cp_offset,
......@@ -338,51 +325,6 @@ bool RegExpMacroAssemblerTracer::CheckSpecialCharacterClass(
}
void RegExpMacroAssemblerTracer::DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations) {
PrintF(" DispatchHalfNibbleMap(start=u%04x, <half_nibble_map>, [", start);
for (int i = 0; i < destinations.length(); i++) {
if (i > 0)
PrintF(", ");
PrintF("label[%08x]", destinations[i]);
}
PrintF(");\n");
assembler_->DispatchHalfNibbleMap(start, half_nibble_map, destinations);
}
void RegExpMacroAssemblerTracer::DispatchByteMap(
uc16 start,
Label* byte_map,
const Vector<Label*>& destinations) {
PrintF(" DispatchByteMap(start=u%04x, <byte_map>, [", start);
for (int i = 0; i < destinations.length(); i++) {
if (i > 0)
PrintF(", ");
PrintF("label[%08x]", destinations[i]);
}
PrintF(");\n");
assembler_->DispatchByteMap(start, byte_map, destinations);
}
void RegExpMacroAssemblerTracer::DispatchHighByteMap(
byte start,
Label* byte_map,
const Vector<Label*>& destinations) {
PrintF(" DispatchHighByteMap(start=u%04x, <byte_map>, [", start);
for (int i = 0; i < destinations.length(); i++) {
if (i > 0)
PrintF(", ");
PrintF("label[%08x]", destinations[i]);
}
PrintF(");\n");
assembler_->DispatchHighByteMap(start, byte_map, destinations);
}
void RegExpMacroAssemblerTracer::IfRegisterLT(int register_index,
int comparand, Label* if_lt) {
PrintF(" IfRegisterLT(register=%d, number=%d, label[%08x]);\n",
......
......@@ -43,7 +43,6 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void Backtrack();
virtual void Bind(Label* label);
virtual void CheckAtStart(Label* on_at_start);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void CheckCharacter(uint32_t c, Label* on_equal);
virtual void CheckCharacterAfterAnd(uint32_t c,
uint32_t and_with,
......@@ -73,19 +72,6 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
int cp_offset,
bool check_offset,
Label* on_no_match);
virtual void DispatchByteMap(
uc16 start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations);
virtual void DispatchHighByteMap(
byte start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void EmitOrLink(Label* label);
virtual void Fail();
virtual Handle<Object> GetCode(Handle<String> source);
virtual void GoTo(Label* label);
......
......@@ -25,10 +25,10 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <string.h>
#include "v8.h"
#include "ast.h"
#include "assembler.h"
#include "regexp-stack.h"
#include "regexp-macro-assembler.h"
namespace v8 {
......@@ -42,38 +42,176 @@ RegExpMacroAssembler::~RegExpMacroAssembler() {
}
ByteArrayProvider::ByteArrayProvider(unsigned int initial_size)
: byte_array_size_(initial_size),
current_byte_array_(),
current_byte_array_free_offset_(initial_size) {}
#ifdef V8_NATIVE_REGEXP // Avoid unused code, e.g., on ARM.
NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() {
}
ArraySlice ByteArrayProvider::GetBuffer(unsigned int size,
unsigned int elem_size) {
ASSERT(size > 0);
size_t byte_size = size * elem_size;
int free_offset = current_byte_array_free_offset_;
// align elements
free_offset += elem_size - 1;
free_offset = free_offset - (free_offset % elem_size);
NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
}
if (free_offset + byte_size > byte_array_size_) {
if (byte_size > (byte_array_size_ / 2)) {
Handle<ByteArray> solo_buffer(Factory::NewByteArray(byte_size, TENURED));
return ArraySlice(solo_buffer, 0);
const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
String* subject,
int start_index) {
// Not just flat, but ultra flat.
ASSERT(subject->IsExternalString() || subject->IsSeqString());
ASSERT(start_index >= 0);
ASSERT(start_index <= subject->length());
if (subject->IsAsciiRepresentation()) {
const byte* address;
if (StringShape(subject).IsExternal()) {
const char* data = ExternalAsciiString::cast(subject)->resource()->data();
address = reinterpret_cast<const byte*>(data);
} else {
ASSERT(subject->IsSeqAsciiString());
char* data = SeqAsciiString::cast(subject)->GetChars();
address = reinterpret_cast<const byte*>(data);
}
current_byte_array_ = Factory::NewByteArray(byte_array_size_, TENURED);
free_offset = 0;
return address + start_index;
}
const uc16* data;
if (StringShape(subject).IsExternal()) {
data = ExternalTwoByteString::cast(subject)->resource()->data();
} else {
ASSERT(subject->IsSeqTwoByteString());
data = SeqTwoByteString::cast(subject)->GetChars();
}
current_byte_array_free_offset_ = free_offset + byte_size;
return ArraySlice(current_byte_array_, free_offset);
return reinterpret_cast<const byte*>(data + start_index);
}
template <typename T>
ArraySlice ByteArrayProvider::GetBuffer(Vector<T> values) {
ArraySlice slice = GetBuffer(values.length(), sizeof(T));
memcpy(slice.location(), values.start(), values.length() * sizeof(T));
return slice;
NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
Handle<Code> regexp_code,
Handle<String> subject,
int* offsets_vector,
int offsets_vector_length,
int previous_index) {
ASSERT(subject->IsFlat());
ASSERT(previous_index >= 0);
ASSERT(previous_index <= subject->length());
// No allocations before calling the regexp, but we can't use
// AssertNoAllocation, since regexps might be preempted, and another thread
// might do allocation anyway.
String* subject_ptr = *subject;
// Character offsets into string.
int start_offset = previous_index;
int end_offset = subject_ptr->length();
bool is_ascii = subject->IsAsciiRepresentation();
if (StringShape(subject_ptr).IsCons()) {
subject_ptr = ConsString::cast(subject_ptr)->first();
} else if (StringShape(subject_ptr).IsSliced()) {
SlicedString* slice = SlicedString::cast(subject_ptr);
start_offset += slice->start();
end_offset += slice->start();
subject_ptr = slice->buffer();
}
// Ensure that an underlying string has the same ascii-ness.
ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii);
ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
// String is now either Sequential or External
int char_size_shift = is_ascii ? 0 : 1;
int char_length = end_offset - start_offset;
const byte* input_start =
StringCharacterPosition(subject_ptr, start_offset);
int byte_length = char_length << char_size_shift;
const byte* input_end = input_start + byte_length;
Result res = Execute(*regexp_code,
subject_ptr,
start_offset,
input_start,
input_end,
offsets_vector,
previous_index == 0);
if (res == SUCCESS) {
// Capture values are relative to start_offset only.
// Convert them to be relative to start of string.
for (int i = 0; i < offsets_vector_length; i++) {
if (offsets_vector[i] >= 0) {
offsets_vector[i] += previous_index;
}
}
}
return res;
}
NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
Code* code,
String* input,
int start_offset,
const byte* input_start,
const byte* input_end,
int* output,
bool at_start) {
typedef int (*matcher)(String*, int, const byte*,
const byte*, int*, int, Address);
matcher matcher_func = FUNCTION_CAST<matcher>(code->entry());
int at_start_val = at_start ? 1 : 0;
// Ensure that the minimum stack has been allocated.
RegExpStack stack;
Address stack_base = RegExpStack::stack_base();
int result = matcher_func(input,
start_offset,
input_start,
input_end,
output,
at_start_val,
stack_base);
ASSERT(result <= SUCCESS);
ASSERT(result >= RETRY);
if (result == EXCEPTION && !Top::has_pending_exception()) {
// We detected a stack overflow (on the backtrack stack) in RegExp code,
// but haven't created the exception yet.
Top::StackOverflow();
}
return static_cast<Result>(result);
}
static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
Address byte_offset1,
Address byte_offset2,
size_t byte_length) {
// This function is not allowed to cause a garbage collection.
// A GC might move the calling generated code and invalidate the
// return address on the stack.
ASSERT(byte_length % 2 == 0);
uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
size_t length = byte_length >> 1;
for (size_t i = 0; i < length; i++) {
unibrow::uchar c1 = substring1[i];
unibrow::uchar c2 = substring2[i];
if (c1 != c2) {
unibrow::uchar s1[1] = { c1 };
canonicalize.get(c1, '\0', s1);
if (s1[0] != c2) {
unibrow::uchar s2[1] = { c2 };
canonicalize.get(c2, '\0', s2);
if (s1[0] != s2[0]) {
return 0;
}
}
}
}
return 1;
}
#endif // V8_NATIVE_REGEXP
} } // namespace v8::internal
......@@ -46,6 +46,7 @@ class RegExpMacroAssembler {
enum IrregexpImplementation {
kIA32Implementation,
kARMImplementation,
kX64Implementation,
kBytecodeImplementation
};
......@@ -67,12 +68,6 @@ class RegExpMacroAssembler {
virtual void Backtrack() = 0;
virtual void Bind(Label* label) = 0;
virtual void CheckAtStart(Label* on_at_start) = 0;
// Check the current character against a bitmap. The range of the current
// character must be from start to start + length_of_bitmap_in_bits.
virtual void CheckBitmap(
uc16 start, // The bitmap is indexed from this character.
Label* bitmap, // Where the bitmap is emitted.
Label* on_zero) = 0; // Where to go if the bit is 0. Fall through on 1.
// Dispatch after looking the current character up in a 2-bits-per-entry
// map. The destinations vector has up to 4 labels.
virtual void CheckCharacter(uint32_t c, Label* on_equal) = 0;
......@@ -132,23 +127,6 @@ class RegExpMacroAssembler {
Label* on_no_match) {
return false;
}
// Dispatch after looking the current character up in a byte map. The
// destinations vector has up to 256 labels.
virtual void DispatchByteMap(
uc16 start,
Label* byte_map,
const Vector<Label*>& destinations) = 0;
virtual void DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations) = 0;
// Dispatch after looking the high byte of the current character up in a byte
// map. The destinations vector has up to 256 labels.
virtual void DispatchHighByteMap(
byte start,
Label* byte_map,
const Vector<Label*>& destinations) = 0;
virtual void EmitOrLink(Label* label) = 0;
virtual void Fail() = 0;
virtual Handle<Object> GetCode(Handle<String> source) = 0;
virtual void GoTo(Label* label) = 0;
......@@ -181,51 +159,53 @@ class RegExpMacroAssembler {
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
virtual void ClearRegisters(int reg_from, int reg_to) = 0;
virtual void WriteStackPointerToRegister(int reg) = 0;
private:
};
struct ArraySlice {
#ifdef V8_NATIVE_REGEXP // Avoid compiling unused code.
class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
public:
ArraySlice(Handle<ByteArray> array, size_t offset)
: array_(array), offset_(offset) {}
Handle<ByteArray> array() { return array_; }
// Offset in the byte array data.
size_t offset() { return offset_; }
// Offset from the ByteArray pointer.
size_t base_offset() {
return ByteArray::kHeaderSize - kHeapObjectTag + offset_;
}
void* location() {
return reinterpret_cast<void*>(array_->GetDataStartAddress() + offset_);
}
template <typename T>
T& at(int idx) {
return reinterpret_cast<T*>(array_->GetDataStartAddress() + offset_)[idx];
}
private:
Handle<ByteArray> array_;
size_t offset_;
};
// Type of input string to generate code for.
enum Mode { ASCII = 1, UC16 = 2 };
// Result of calling generated native RegExp code.
// RETRY: Something significant changed during execution, and the matching
// should be retried from scratch.
// EXCEPTION: Something failed during execution. If no exception has been
// thrown, it's an internal out-of-memory, and the caller should
// throw the exception.
// FAILURE: Matching failed.
// SUCCESS: Matching succeeded, and the output array has been filled with
// capture positions.
enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
class ByteArrayProvider {
public:
explicit ByteArrayProvider(unsigned int initial_size);
// Provides a place to put "size" elements of size "element_size".
// The information can be stored in the provided ByteArray at the "offset".
// The offset is aligned to the element size.
ArraySlice GetBuffer(unsigned int size,
unsigned int element_size);
template <typename T>
ArraySlice GetBuffer(Vector<T> values);
private:
size_t byte_array_size_;
Handle<ByteArray> current_byte_array_;
int current_byte_array_free_offset_;
};
NativeRegExpMacroAssembler();
virtual ~NativeRegExpMacroAssembler();
static Result Match(Handle<Code> regexp,
Handle<String> subject,
int* offsets_vector,
int offsets_vector_length,
int previous_index);
// Compares two-byte strings case insensitively.
// Called from generated RegExp code.
static int CaseInsensitiveCompareUC16(Address byte_offset1,
Address byte_offset2,
size_t byte_length);
static const byte* StringCharacterPosition(String* subject, int start_index);
static Result Execute(Code* code,
String* input,
int start_offset,
const byte* input_start,
const byte* input_end,
int* output,
bool at_start);
};
#endif // V8_NATIVE_REGEXP
} } // namespace v8::internal
#endif // V8_REGEXP_MACRO_ASSEMBLER_H_
......@@ -48,7 +48,7 @@ class RegExpStack {
~RegExpStack(); // Releases the stack if it has grown.
// Gives the top of the memory used as stack.
static Address stack_top() {
static Address stack_base() {
ASSERT(thread_local_.memory_size_ != 0);
return thread_local_.memory_ + thread_local_.memory_size_;
}
......@@ -74,7 +74,7 @@ class RegExpStack {
private:
// Artificial limit used when no memory has been allocated.
static const uint32_t kMemoryTop = 0xffffffff;
static const uintptr_t kMemoryTop = -1;
// Minimal size of allocated stack area.
static const size_t kMinimumStackSize = 1 * KB;
......
......@@ -437,21 +437,43 @@ void Assembler::arithmetic_op(byte opcode, Register reg, const Operand& op) {
}
void Assembler::arithmetic_op(byte opcode, Register dst, Register src) {
void Assembler::arithmetic_op(byte opcode, Register reg, Register rm_reg) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
emit_rex_64(dst, src);
emit_rex_64(reg, rm_reg);
emit(opcode);
emit_modrm(dst, src);
emit_modrm(reg, rm_reg);
}
void Assembler::arithmetic_op_32(byte opcode, Register dst, Register src) {
void Assembler::arithmetic_op_16(byte opcode, Register reg, Register rm_reg) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
emit_optional_rex_32(dst, src);
emit(0x66);
emit_optional_rex_32(reg, rm_reg);
emit(opcode);
emit_modrm(dst, src);
emit_modrm(reg, rm_reg);
}
void Assembler::arithmetic_op_16(byte opcode,
Register reg,
const Operand& rm_reg) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
emit(0x66);
emit_optional_rex_32(reg, rm_reg);
emit(opcode);
emit_operand(reg, rm_reg);
}
void Assembler::arithmetic_op_32(byte opcode, Register reg, Register rm_reg) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
emit_optional_rex_32(reg, rm_reg);
emit(opcode);
emit_modrm(reg, rm_reg);
}
......@@ -504,6 +526,47 @@ void Assembler::immediate_arithmetic_op(byte subcode,
}
void Assembler::immediate_arithmetic_op_16(byte subcode,
Register dst,
Immediate src) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
emit(0x66); // Operand size override prefix.
emit_optional_rex_32(dst);
if (is_int8(src.value_)) {
emit(0x83);
emit_modrm(subcode, dst);
emit(src.value_);
} else if (dst.is(rax)) {
emit(0x05 | (subcode << 3));
emitl(src.value_);
} else {
emit(0x81);
emit_modrm(subcode, dst);
emitl(src.value_);
}
}
void Assembler::immediate_arithmetic_op_16(byte subcode,
const Operand& dst,
Immediate src) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
emit(0x66); // Operand size override prefix.
emit_optional_rex_32(dst);
if (is_int8(src.value_)) {
emit(0x83);
emit_operand(subcode, dst);
emit(src.value_);
} else {
emit(0x81);
emit_operand(subcode, dst);
emitl(src.value_);
}
}
void Assembler::immediate_arithmetic_op_32(byte subcode,
Register dst,
Immediate src) {
......@@ -744,6 +807,14 @@ void Assembler::cmovl(Condition cc, Register dst, const Operand& src) {
}
void Assembler::cmpb_al(Immediate imm8) {
ASSERT(is_int8(imm8.value_) || is_uint8(imm8.value_));
EnsureSpace ensure_space(this);
last_pc_ = pc_;
emit(0x3c);
emit(imm8.value_);
}
void Assembler::cpuid() {
ASSERT(CpuFeatures::IsEnabled(CpuFeatures::CPUID));
......@@ -1193,6 +1264,32 @@ void Assembler::movq(const Operand& dst, Immediate value) {
}
/*
* Loads the ip-relative location of the src label into the target
* location (as a 32-bit offset sign extended to 64-bit).
*/
void Assembler::movl(const Operand& dst, Label* src) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
emit_optional_rex_32(dst);
emit(0xC7);
emit_operand(0, dst);
if (src->is_bound()) {
int offset = src->pos() - pc_offset() - sizeof(int32_t);
ASSERT(offset <= 0);
emitl(offset);
} else if (src->is_linked()) {
emitl(src->pos());
src->link_to(pc_offset() - sizeof(int32_t));
} else {
ASSERT(src->is_unused());
int32_t current = pc_offset();
emitl(current);
src->link_to(current);
}
}
void Assembler::movq(Register dst, Handle<Object> value, RelocInfo::Mode mode) {
// If there is no relocation info, emit the value of the handle efficiently
// (possibly using less that 8 bytes for the value).
......
......@@ -496,13 +496,17 @@ class Assembler : public Malloced {
// Load a 32-bit immediate value, zero-extended to 64 bits.
void movl(Register dst, Immediate imm32);
// Move 64 bit register value to 64-bit memory location.
void movq(const Operand& dst, Register src);
// Move 64 bit memory location to 64-bit register value.
void movq(Register dst, const Operand& src);
void movq(Register dst, Register src);
// Sign extends immediate 32-bit value to 64 bits.
void movq(Register dst, Immediate x);
void movq(Register dst, Register src);
// Move the offset of the label location relative to the current
// position (after the move) to the destination.
void movl(const Operand& dst, Label* src);
// Move 64 bit register value to 64-bit memory location.
void movq(const Operand& dst, Register src);
// Move sign extended immediate to memory location.
void movq(const Operand& dst, Immediate value);
// New x64 instructions to load a 64-bit immediate into a register.
......@@ -535,7 +539,11 @@ class Assembler : public Malloced {
// Arithmetics
void addl(Register dst, Register src) {
arithmetic_op_32(0x03, dst, src);
if (dst.low_bits() == 4) { // Forces SIB byte.
arithmetic_op_32(0x01, src, dst);
} else {
arithmetic_op_32(0x03, dst, src);
}
}
void addl(Register dst, Immediate src) {
......@@ -574,10 +582,44 @@ class Assembler : public Malloced {
immediate_arithmetic_op_8(0x7, dst, src);
}
void cmpb_al(Immediate src);
void cmpb(Register dst, Register src) {
arithmetic_op(0x3A, dst, src);
}
void cmpb(Register dst, const Operand& src) {
arithmetic_op(0x3A, dst, src);
}
void cmpb(const Operand& dst, Register src) {
arithmetic_op(0x38, src, dst);
}
void cmpb(const Operand& dst, Immediate src) {
immediate_arithmetic_op_8(0x7, dst, src);
}
void cmpw(const Operand& dst, Immediate src) {
immediate_arithmetic_op_16(0x7, dst, src);
}
void cmpw(Register dst, Immediate src) {
immediate_arithmetic_op_16(0x7, dst, src);
}
void cmpw(Register dst, const Operand& src) {
arithmetic_op_16(0x3B, dst, src);
}
void cmpw(Register dst, Register src) {
arithmetic_op_16(0x3B, dst, src);
}
void cmpw(const Operand& dst, Register src) {
arithmetic_op_16(0x39, src, dst);
}
void cmpl(Register dst, Register src) {
arithmetic_op_32(0x3B, dst, src);
}
......@@ -794,6 +836,10 @@ class Assembler : public Malloced {
immediate_arithmetic_op_32(0x5, dst, src);
}
void subb(Register dst, Immediate src) {
immediate_arithmetic_op_8(0x5, dst, src);
}
void testb(Register reg, Immediate mask);
void testb(const Operand& op, Immediate mask);
void testl(Register dst, Register src);
......@@ -1141,26 +1187,36 @@ class Assembler : public Malloced {
// AND, OR, XOR, or CMP. The encodings of these operations are all
// similar, differing just in the opcode or in the reg field of the
// ModR/M byte.
void arithmetic_op(byte opcode, Register dst, Register src);
void arithmetic_op_32(byte opcode, Register dst, Register src);
void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg);
void arithmetic_op_32(byte opcode, Register reg, Register rm_reg);
void arithmetic_op_32(byte opcode, Register reg, const Operand& rm_reg);
void arithmetic_op(byte opcode, Register reg, Register rm_reg);
void arithmetic_op(byte opcode, Register reg, const Operand& rm_reg);
void immediate_arithmetic_op(byte subcode, Register dst, Immediate src);
void immediate_arithmetic_op(byte subcode, const Operand& dst, Immediate src);
// Operate on a 32-bit word in memory or register.
void immediate_arithmetic_op_32(byte subcode,
const Operand& dst,
Immediate src);
void immediate_arithmetic_op_32(byte subcode,
Register dst,
Immediate src);
// Operate on a byte in memory or register.
void immediate_arithmetic_op_8(byte subcode,
const Operand& dst,
Register dst,
Immediate src);
void immediate_arithmetic_op_8(byte subcode,
Register dst,
const Operand& dst,
Immediate src);
// Operate on a word in memory or register.
void immediate_arithmetic_op_16(byte subcode,
Register dst,
Immediate src);
void immediate_arithmetic_op_16(byte subcode,
const Operand& dst,
Immediate src);
// Operate on a 32-bit word in memory or register.
void immediate_arithmetic_op_32(byte subcode,
Register dst,
Immediate src);
void immediate_arithmetic_op_32(byte subcode,
const Operand& dst,
Immediate src);
// Emit machine code for a shift operation.
void shift(Register dst, Immediate shift_amount, int subcode);
void shift_32(Register dst, Immediate shift_amount, int subcode);
......@@ -1180,6 +1236,7 @@ class Assembler : public Malloced {
friend class CodePatcher;
friend class EnsureSpace;
friend class RegExpMacroAssemblerX64;
// Code buffer:
// The buffer into which code and relocation info are generated.
......
......@@ -25,3 +25,1277 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "v8.h"
#include "serialize.h"
#include "unicode.h"
#include "log.h"
#include "ast.h"
#include "regexp-stack.h"
#include "macro-assembler.h"
#include "regexp-macro-assembler.h"
#include "x64/macro-assembler-x64.h"
#include "x64/regexp-macro-assembler-x64.h"
namespace v8 {
namespace internal {
/*
* This assembler uses the following register assignment convention
* - rdx : currently loaded character(s) as ASCII or UC16. Must be loaded using
* LoadCurrentCharacter before using any of the dispatch methods.
* - rdi : current position in input, as negative offset from end of string.
* Please notice that this is the byte offset, not the character
* offset! Is always a 32-bit signed (negative) offset, but must be
* maintained sign-extended to 64 bits, since it is used as index.
* - rsi : end of input (points to byte after last character in input),
* so that rsi+rdi points to the current character.
* - rbp : frame pointer. Used to access arguments, local variables and
* RegExp registers.
* - rsp : points to tip of C stack.
* - rcx : points to tip of backtrack stack. The backtrack stack contains
* only 32-bit values. Most are offsets from some base (e.g., character
* positions from end of string or code location from Code* pointer).
* - r8 : code object pointer. Used to convert between absolute and
* code-object-relative addresses.
*
* The registers rax, rbx, rcx, r9 and r11 are free to use for computations.
* If changed to use r12+, they should be saved as callee-save registers.
*
* Each call to a C++ method should retain these registers.
*
* The stack will have the following content, in some order, indexable from the
* frame pointer (see, e.g., kStackHighEnd):
* - stack_area_base (High end of the memory area to use as
* backtracking stack)
* - at_start (if 1, start at start of string, if 0, don't)
* - int* capture_array (int[num_saved_registers_], for output).
* - end of input (Address of end of string)
* - start of input (Address of first character in string)
* - String** input_string (location of a handle containing the string)
* - return address
* - backup of callee save registers (rbx, possibly rsi and rdi).
* - Offset of location before start of input (effectively character
* position -1). Used to initialize capture registers to a non-position.
* - register 0 rbp[-n] (Only positions must be stored in the first
* - register 1 rbp[-n-8] num_saved_registers_ registers)
* - ...
*
* The first num_saved_registers_ registers are initialized to point to
* "character -1" in the string (i.e., char_size() bytes before the first
* character of the string). The remaining registers starts out uninitialized.
*
* The first seven values must be provided by the calling code by
* calling the code's entry address cast to a function pointer with the
* following signature:
* int (*match)(String* input_string,
* Address start,
* Address end,
* int* capture_output_array,
* bool at_start,
* byte* stack_area_base)
*/
#define __ ACCESS_MASM(masm_)
RegExpMacroAssemblerX64::RegExpMacroAssemblerX64(
Mode mode,
int registers_to_save)
: masm_(new MacroAssembler(NULL, kRegExpCodeSize)),
code_relative_fixup_positions_(4),
mode_(mode),
num_registers_(registers_to_save),
num_saved_registers_(registers_to_save),
entry_label_(),
start_label_(),
success_label_(),
backtrack_label_(),
exit_label_() {
__ jmp(&entry_label_); // We'll write the entry code when we know more.
__ bind(&start_label_); // And then continue from here.
}
RegExpMacroAssemblerX64::~RegExpMacroAssemblerX64() {
delete masm_;
// Unuse labels in case we throw away the assembler without calling GetCode.
entry_label_.Unuse();
start_label_.Unuse();
success_label_.Unuse();
backtrack_label_.Unuse();
exit_label_.Unuse();
check_preempt_label_.Unuse();
stack_overflow_label_.Unuse();
}
int RegExpMacroAssemblerX64::stack_limit_slack() {
return RegExpStack::kStackLimitSlack;
}
void RegExpMacroAssemblerX64::AdvanceCurrentPosition(int by) {
if (by != 0) {
Label inside_string;
__ addq(rdi, Immediate(by * char_size()));
}
}
void RegExpMacroAssemblerX64::AdvanceRegister(int reg, int by) {
ASSERT(reg >= 0);
ASSERT(reg < num_registers_);
if (by != 0) {
__ addq(register_location(reg), Immediate(by));
}
}
void RegExpMacroAssemblerX64::Backtrack() {
CheckPreemption();
// Pop Code* offset from backtrack stack, add Code* and jump to location.
Pop(rbx);
__ addq(rbx, code_object_pointer());
__ jmp(rbx);
}
void RegExpMacroAssemblerX64::Bind(Label* label) {
__ bind(label);
}
void RegExpMacroAssemblerX64::CheckCharacter(uint32_t c, Label* on_equal) {
__ cmpl(current_character(), Immediate(c));
BranchOrBacktrack(equal, on_equal);
}
void RegExpMacroAssemblerX64::CheckCharacterGT(uc16 limit, Label* on_greater) {
__ cmpl(current_character(), Immediate(limit));
BranchOrBacktrack(greater, on_greater);
}
void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) {
Label not_at_start;
// Did we start the match at the start of the string at all?
__ cmpb(Operand(rbp, kAtStart), Immediate(0));
BranchOrBacktrack(equal, &not_at_start);
// If we did, are we still at the start of the input?
__ lea(rax, Operand(rsi, rdi, times_1, 0));
__ cmpq(rax, Operand(rbp, kInputStart));
BranchOrBacktrack(equal, on_at_start);
__ bind(&not_at_start);
}
void RegExpMacroAssemblerX64::CheckNotAtStart(Label* on_not_at_start) {
// Did we start the match at the start of the string at all?
__ cmpb(Operand(rbp, kAtStart), Immediate(0));
BranchOrBacktrack(equal, on_not_at_start);
// If we did, are we still at the start of the input?
__ lea(rax, Operand(rsi, rdi, times_1, 0));
__ cmpq(rax, Operand(rbp, kInputStart));
BranchOrBacktrack(not_equal, on_not_at_start);
}
void RegExpMacroAssemblerX64::CheckCharacterLT(uc16 limit, Label* on_less) {
__ cmpl(current_character(), Immediate(limit));
BranchOrBacktrack(less, on_less);
}
void RegExpMacroAssemblerX64::CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string) {
int byte_length = str.length() * char_size();
int byte_offset = cp_offset * char_size();
if (check_end_of_string) {
// Check that there are at least str.length() characters left in the input.
__ cmpl(rdi, Immediate(-(byte_offset + byte_length)));
BranchOrBacktrack(greater, on_failure);
}
if (on_failure == NULL) {
// Instead of inlining a backtrack, (re)use the global backtrack target.
on_failure = &backtrack_label_;
}
// TODO(lrn): Test multiple characters at a time by loading 4 or 8 bytes
// at a time.
for (int i = 0; i < str.length(); i++) {
if (mode_ == ASCII) {
__ cmpb(Operand(rsi, rdi, times_1, byte_offset + i),
Immediate(static_cast<int8_t>(str[i])));
} else {
ASSERT(mode_ == UC16);
__ cmpw(Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)),
Immediate(str[i]));
}
BranchOrBacktrack(not_equal, on_failure);
}
}
void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) {
Label fallthrough;
__ cmpl(rdi, Operand(backtrack_stackpointer(), 0));
__ j(not_equal, &fallthrough);
Drop();
BranchOrBacktrack(no_condition, on_equal);
__ bind(&fallthrough);
}
void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
int start_reg,
Label* on_no_match) {
Label fallthrough;
__ movq(rdx, register_location(start_reg)); // Offset of start of capture
__ movq(rbx, register_location(start_reg + 1)); // Offset of end of capture
__ subq(rbx, rdx); // Length of capture.
// -----------------------
// rdx = Start offset of capture.
// rbx = Length of capture
// If length is negative, this code will fail (it's a symptom of a partial or
// illegal capture where start of capture after end of capture).
// This must not happen (no back-reference can reference a capture that wasn't
// closed before in the reg-exp, and we must not generate code that can cause
// this condition).
// If length is zero, either the capture is empty or it is nonparticipating.
// In either case succeed immediately.
__ j(equal, &fallthrough);
if (mode_ == ASCII) {
Label loop_increment;
if (on_no_match == NULL) {
on_no_match = &backtrack_label_;
}
__ lea(r9, Operand(rsi, rdx, times_1, 0));
__ lea(r11, Operand(rsi, rdi, times_1, 0));
__ addq(rbx, r9); // End of capture
// ---------------------
// r11 - current input character address
// r9 - current capture character address
// rbx - end of capture
Label loop;
__ bind(&loop);
__ movzxbl(rdx, Operand(r9, 0));
__ movzxbl(rax, Operand(r11, 0));
// al - input character
// dl - capture character
__ cmpb(rax, rdx);
__ j(equal, &loop_increment);
// Mismatch, try case-insensitive match (converting letters to lower-case).
// I.e., if or-ing with 0x20 makes values equal and in range 'a'-'z', it's
// a match.
__ or_(rax, Immediate(0x20)); // Convert match character to lower-case.
__ or_(rdx, Immediate(0x20)); // Convert capture character to lower-case.
__ cmpb(rax, rdx);
__ j(not_equal, on_no_match); // Definitely not equal.
__ subb(rax, Immediate('a'));
__ cmpb(rax, Immediate('z' - 'a'));
__ j(above, on_no_match); // Weren't letters anyway.
__ bind(&loop_increment);
// Increment pointers into match and capture strings.
__ addq(r11, Immediate(1));
__ addq(r9, Immediate(1));
// Compare to end of capture, and loop if not done.
__ cmpq(r9, rbx);
__ j(below, &loop);
// Compute new value of character position after the matched part.
__ movq(rdi, r11);
__ subq(rdi, rsi);
} else {
ASSERT(mode_ == UC16);
// Save important/volatile registers before calling C function.
#ifndef __MSVC__
// Callee save on Win64
__ push(rsi);
__ push(rdi);
#endif
__ push(backtrack_stackpointer());
int num_arguments = 3;
FrameAlign(num_arguments);
// Put arguments into parameter registers. Parameters are
// Address byte_offset1 - Address captured substring's start.
// Address byte_offset2 - Address of current character position.
// size_t byte_length - length of capture in bytes(!)
#ifdef __MSVC__
// Compute and set byte_offset1 (start of capture).
__ lea(rcx, Operand(rsi, rdx, times_1, 0));
// Set byte_offset2.
__ lea(rdx, Operand(rsi, rdi, times_1, 0));
// Set byte_length.
__ movq(r8, rbx);
#else // AMD64 calling convention
// Compute byte_offset2 (current position = rsi+rdi).
__ lea(rax, Operand(rsi, rdi, times_1, 0));
// Compute and set byte_offset1 (start of capture).
__ lea(rdi, Operand(rsi, rdx, times_1, 0));
// Set byte_offset2.
__ movq(rsi, rax);
// Set byte_length.
__ movq(rdx, rbx);
#endif
Address function_address = FUNCTION_ADDR(&CaseInsensitiveCompareUC16);
CallCFunction(function_address, num_arguments);
// Restore original values before reacting on result value.
__ Move(code_object_pointer(), masm_->CodeObject());
__ pop(backtrack_stackpointer());
#ifndef __MSVC__
__ pop(rdi);
__ pop(rsi);
#endif
// Check if function returned non-zero for success or zero for failure.
__ testq(rax, rax);
BranchOrBacktrack(zero, on_no_match);
// On success, increment position by length of capture.
// Requires that rbx is callee save (true for both Win64 and AMD64 ABIs).
__ addq(rdi, rbx);
}
__ bind(&fallthrough);
}
void RegExpMacroAssemblerX64::CheckNotBackReference(
int start_reg,
Label* on_no_match) {
Label fallthrough;
// Find length of back-referenced capture.
__ movq(rdx, register_location(start_reg));
__ movq(rax, register_location(start_reg + 1));
__ subq(rax, rdx); // Length to check.
// Fail on partial or illegal capture (start of capture after end of capture).
// This must not happen (no back-reference can reference a capture that wasn't
// closed before in the reg-exp).
__ Check(greater_equal, "Invalid capture referenced");
// Succeed on empty capture (including non-participating capture)
__ j(equal, &fallthrough);
// -----------------------
// rdx - Start of capture
// rax - length of capture
// Check that there are sufficient characters left in the input.
__ movl(rbx, rdi);
__ addl(rbx, rax);
BranchOrBacktrack(greater, on_no_match);
// Compute pointers to match string and capture string
__ lea(rbx, Operand(rsi, rdi, times_1, 0)); // Start of match.
__ addq(rdx, rsi); // Start of capture.
__ lea(r9, Operand(rdx, rax, times_1, 0)); // End of capture
// -----------------------
// rbx - current capture character address.
// rbx - current input character address .
// r9 - end of input to match (capture length after rbx).
Label loop;
__ bind(&loop);
if (mode_ == ASCII) {
__ movzxbl(rax, Operand(rdx, 0));
__ cmpb(rax, Operand(rbx, 0));
} else {
ASSERT(mode_ == UC16);
__ movzxwl(rax, Operand(rdx, 0));
__ cmpw(rax, Operand(rbx, 0));
}
BranchOrBacktrack(not_equal, on_no_match);
// Increment pointers into capture and match string.
__ addq(rbx, Immediate(char_size()));
__ addq(rdx, Immediate(char_size()));
// Check if we have reached end of match area.
__ cmpq(rdx, r9);
__ j(below, &loop);
// Success.
// Set current character position to position after match.
__ movq(rdi, rbx);
__ subq(rdi, rsi);
__ bind(&fallthrough);
}
void RegExpMacroAssemblerX64::CheckNotRegistersEqual(int reg1,
int reg2,
Label* on_not_equal) {
__ movq(rax, register_location(reg1));
__ cmpq(rax, register_location(reg2));
BranchOrBacktrack(not_equal, on_not_equal);
}
void RegExpMacroAssemblerX64::CheckNotCharacter(uint32_t c,
Label* on_not_equal) {
__ cmpl(current_character(), Immediate(c));
BranchOrBacktrack(not_equal, on_not_equal);
}
void RegExpMacroAssemblerX64::CheckCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_equal) {
__ movl(rax, current_character());
__ and_(rax, Immediate(mask));
__ cmpl(rax, Immediate(c));
BranchOrBacktrack(equal, on_equal);
}
void RegExpMacroAssemblerX64::CheckNotCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_not_equal) {
__ movl(rax, current_character());
__ and_(rax, Immediate(mask));
__ cmpl(rax, Immediate(c));
BranchOrBacktrack(not_equal, on_not_equal);
}
void RegExpMacroAssemblerX64::CheckNotCharacterAfterMinusAnd(
uc16 c,
uc16 minus,
uc16 mask,
Label* on_not_equal) {
ASSERT(minus < String::kMaxUC16CharCode);
__ lea(rax, Operand(current_character(), -minus));
__ and_(rax, Immediate(mask));
__ cmpl(rax, Immediate(c));
BranchOrBacktrack(not_equal, on_not_equal);
}
bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,
int cp_offset,
bool check_offset,
Label* on_no_match) {
// Range checks (c in min..max) are generally implemented by an unsigned
// (c - min) <= (max - min) check
switch (type) {
case 's':
// Match space-characters
if (mode_ == ASCII) {
// ASCII space characters are '\t'..'\r' and ' '.
if (check_offset) {
LoadCurrentCharacter(cp_offset, on_no_match);
} else {
LoadCurrentCharacterUnchecked(cp_offset, 1);
}
Label success;
__ cmpl(current_character(), Immediate(' '));
__ j(equal, &success);
// Check range 0x09..0x0d
__ subl(current_character(), Immediate('\t'));
__ cmpl(current_character(), Immediate('\r' - '\t'));
BranchOrBacktrack(above, on_no_match);
__ bind(&success);
return true;
}
return false;
case 'S':
// Match non-space characters.
if (check_offset) {
LoadCurrentCharacter(cp_offset, on_no_match, 1);
} else {
LoadCurrentCharacterUnchecked(cp_offset, 1);
}
if (mode_ == ASCII) {
// ASCII space characters are '\t'..'\r' and ' '.
__ cmpl(current_character(), Immediate(' '));
BranchOrBacktrack(equal, on_no_match);
__ subl(current_character(), Immediate('\t'));
__ cmpl(current_character(), Immediate('\r' - '\t'));
BranchOrBacktrack(below_equal, on_no_match);
return true;
}
return false;
case 'd':
// Match ASCII digits ('0'..'9')
if (check_offset) {
LoadCurrentCharacter(cp_offset, on_no_match, 1);
} else {
LoadCurrentCharacterUnchecked(cp_offset, 1);
}
__ subl(current_character(), Immediate('0'));
__ cmpl(current_character(), Immediate('9' - '0'));
BranchOrBacktrack(above, on_no_match);
return true;
case 'D':
// Match non ASCII-digits
if (check_offset) {
LoadCurrentCharacter(cp_offset, on_no_match, 1);
} else {
LoadCurrentCharacterUnchecked(cp_offset, 1);
}
__ subl(current_character(), Immediate('0'));
__ cmpl(current_character(), Immediate('9' - '0'));
BranchOrBacktrack(below_equal, on_no_match);
return true;
case '.': {
// Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
if (check_offset) {
LoadCurrentCharacter(cp_offset, on_no_match, 1);
} else {
LoadCurrentCharacterUnchecked(cp_offset, 1);
}
__ xor_(current_character(), Immediate(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
__ subl(current_character(), Immediate(0x0b));
__ cmpl(current_character(), Immediate(0x0c - 0x0b));
BranchOrBacktrack(below_equal, on_no_match);
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
// computed (current_char ^ 0x01 - 0x0b). I.e., check for
// 0x201d (0x2028 - 0x0b) or 0x201e.
__ subl(current_character(), Immediate(0x2028 - 0x0b));
__ cmpl(current_character(), Immediate(1));
BranchOrBacktrack(below_equal, on_no_match);
}
return true;
}
case '*':
// Match any character.
if (check_offset) {
CheckPosition(cp_offset, on_no_match);
}
return true;
// No custom implementation (yet): w, W, s(UC16), S(UC16).
default:
return false;
}
}
void RegExpMacroAssemblerX64::Fail() {
ASSERT(FAILURE == 0); // Return value for failure is zero.
__ xor_(rax, rax); // zero rax.
__ jmp(&exit_label_);
}
Handle<Object> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
// Finalize code - write the entry point code now we know how many
// registers we need.
// Entry code:
__ bind(&entry_label_);
// Start new stack frame.
__ push(rbp);
__ movq(rbp, rsp);
// Save parameters and callee-save registers. Order here should correspond
// to order of kBackup_ebx etc.
#ifdef __MSVC__
// MSVC passes arguments in rcx, rdx, r8, r9, with backing stack slots.
// Store register parameters in pre-allocated stack slots,
__ movq(Operand(rbp, kInputString), rcx);
__ movq(Operand(rbp, kStartIndex), rdx);
__ movq(Operand(rbp, kInputStart), r8);
__ movq(Operand(rbp, kInputEnd), r9);
// Callee-save on Win64.
__ push(rsi);
__ push(rdi);
__ push(rbx);
#else
// GCC passes arguments in rdi, rsi, rdx, rcx, r8, r9 (and then on stack).
// Push register parameters on stack for reference.
ASSERT_EQ(kInputString, -1 * kPointerSize);
ASSERT_EQ(kStartIndex, -2 * kPointerSize);
ASSERT_EQ(kInputStart, -3 * kPointerSize);
ASSERT_EQ(kInputEnd, -4 * kPointerSize);
ASSERT_EQ(kRegisterOutput, -5 * kPointerSize);
ASSERT_EQ(kAtStart, -6 * kPointerSize);
__ push(rdi);
__ push(rsi);
__ push(rdx);
__ push(rcx);
__ push(r8);
__ push(r9);
__ push(rbx); // Callee-save
#endif
__ push(Immediate(0)); // Make room for "input start - 1" constant.
// Check if we have space on the stack for registers.
Label stack_limit_hit;
Label stack_ok;
ExternalReference stack_guard_limit =
ExternalReference::address_of_stack_guard_limit();
__ movq(rcx, rsp);
__ movq(kScratchRegister, stack_guard_limit);
__ subq(rcx, Operand(kScratchRegister, 0));
// Handle it if the stack pointer is already below the stack limit.
__ j(below_equal, &stack_limit_hit);
// Check if there is room for the variable number of registers above
// the stack limit.
__ cmpq(rcx, Immediate(num_registers_ * kPointerSize));
__ j(above_equal, &stack_ok);
// Exit with OutOfMemory exception. There is not enough space on the stack
// for our working registers.
__ movq(rax, Immediate(EXCEPTION));
__ jmp(&exit_label_);
__ bind(&stack_limit_hit);
__ Move(code_object_pointer(), masm_->CodeObject());
CallCheckStackGuardState(); // Preserves no registers beside rbp and rsp.
__ testq(rax, rax);
// If returned value is non-zero, we exit with the returned value as result.
__ j(not_zero, &exit_label_);
__ bind(&stack_ok);
// Allocate space on stack for registers.
__ subq(rsp, Immediate(num_registers_ * kPointerSize));
// Load string length.
__ movq(rsi, Operand(rbp, kInputEnd));
// Load input position.
__ movq(rdi, Operand(rbp, kInputStart));
// Set up rdi to be negative offset from string end.
__ subq(rdi, rsi);
// Set rax to address of char before start of input
// (effectively string position -1).
__ lea(rax, Operand(rdi, -char_size()));
// Store this value in a local variable, for use when clearing
// position registers.
__ movq(Operand(rbp, kInputStartMinusOne), rax);
if (num_saved_registers_ > 0) {
// Fill saved registers with initial value = start offset - 1
// Fill in stack push order, to avoid accessing across an unwritten
// page (a problem on Windows).
__ movq(rcx, Immediate(kRegisterZero));
Label init_loop;
__ bind(&init_loop);
__ movq(Operand(rbp, rcx, times_1, 0), rax);
__ subq(rcx, Immediate(kPointerSize));
__ cmpq(rcx,
Immediate(kRegisterZero - num_saved_registers_ * kPointerSize));
__ j(greater, &init_loop);
}
// Ensure that we have written to each stack page, in order. Skipping a page
// on Windows can cause segmentation faults. Assuming page size is 4k.
const int kPageSize = 4096;
const int kRegistersPerPage = kPageSize / kPointerSize;
for (int i = num_saved_registers_ + kRegistersPerPage - 1;
i < num_registers_;
i += kRegistersPerPage) {
__ movq(register_location(i), rax); // One write every page.
}
// Initialize backtrack stack pointer.
__ movq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd));
// Initialize code object pointer.
__ Move(code_object_pointer(), masm_->CodeObject());
// Load previous char as initial value of current-character.
Label at_start;
__ cmpq(Operand(rbp, kAtStart), Immediate(0));
__ j(not_equal, &at_start);
LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
__ jmp(&start_label_);
__ bind(&at_start);
__ movq(current_character(), Immediate('\n'));
__ jmp(&start_label_);
// Exit code:
if (success_label_.is_linked()) {
// Save captures when successful.
__ bind(&success_label_);
if (num_saved_registers_ > 0) {
// copy captures to output
__ movq(rbx, Operand(rbp, kRegisterOutput));
__ movq(rcx, Operand(rbp, kInputEnd));
__ subq(rcx, Operand(rbp, kInputStart));
for (int i = 0; i < num_saved_registers_; i++) {
__ movq(rax, register_location(i));
__ addq(rax, rcx); // Convert to index from start, not end.
if (mode_ == UC16) {
__ sar(rax, Immediate(1)); // Convert byte index to character index.
}
__ movl(Operand(rbx, i * kIntSize), rax);
}
}
__ movq(rax, Immediate(SUCCESS));
}
// Exit and return rax
__ bind(&exit_label_);
#ifdef __MSVC__
// Restore callee save registers.
__ lea(rsp, Operand(rbp, kLastCalleeSaveRegister));
__ pop(rbx);
__ pop(rdi);
__ pop(rsi);
// Stack now at rbp.
#else
// Restore callee save register.
__ movq(rbx, Operand(rbp, kBackup_rbx));
// Skip rsp to rbp.
__ movq(rsp, rbp);
#endif
// Exit function frame, restore previous one.
__ pop(rbp);
__ ret(0);
// Backtrack code (branch target for conditional backtracks).
if (backtrack_label_.is_linked()) {
__ bind(&backtrack_label_);
Backtrack();
}
Label exit_with_exception;
// Preempt-code
if (check_preempt_label_.is_linked()) {
SafeCallTarget(&check_preempt_label_);
__ push(backtrack_stackpointer());
__ push(rdi);
CallCheckStackGuardState();
__ testq(rax, rax);
// If returning non-zero, we should end execution with the given
// result as return value.
__ j(not_zero, &exit_label_);
// Restore registers.
__ Move(code_object_pointer(), masm_->CodeObject());
__ pop(rdi);
__ pop(backtrack_stackpointer());
// String might have moved: Reload esi from frame.
__ movq(rsi, Operand(rbp, kInputEnd));
SafeReturn();
}
// Backtrack stack overflow code.
if (stack_overflow_label_.is_linked()) {
SafeCallTarget(&stack_overflow_label_);
// Reached if the backtrack-stack limit has been hit.
Label grow_failed;
// Save registers before calling C function
#ifndef __MSVC__
// Callee-save in Microsoft 64-bit ABI, but not in AMD64 ABI.
__ push(rsi);
__ push(rdi);
#endif
// Call GrowStack(backtrack_stackpointer())
int num_arguments = 2;
FrameAlign(num_arguments);
#ifdef __MSVC__
// Microsoft passes parameters in rcx, rdx.
// First argument, backtrack stackpointer, is already in rcx.
__ lea(rdx, Operand(rbp, kStackHighEnd)); // Second argument
#else
// AMD64 ABI passes paremeters in rdi, rsi.
__ movq(rdi, backtrack_stackpointer()); // First argument.
__ lea(rsi, Operand(rbp, kStackHighEnd)); // Second argument.
#endif
CallCFunction(FUNCTION_ADDR(&GrowStack), num_arguments);
// If return NULL, we have failed to grow the stack, and
// must exit with a stack-overflow exception.
__ testq(rax, rax);
__ j(equal, &exit_with_exception);
// Otherwise use return value as new stack pointer.
__ movq(backtrack_stackpointer(), rax);
// Restore saved registers and continue.
__ Move(code_object_pointer(), masm_->CodeObject());
#ifndef __MSVC__
__ pop(rdi);
__ pop(rsi);
#endif
SafeReturn();
}
if (exit_with_exception.is_linked()) {
// If any of the code above needed to exit with an exception.
__ bind(&exit_with_exception);
// Exit with Result EXCEPTION(-1) to signal thrown exception.
__ movq(rax, Immediate(EXCEPTION));
__ jmp(&exit_label_);
}
FixupCodeRelativePositions();
CodeDesc code_desc;
masm_->GetCode(&code_desc);
Handle<Code> code = Factory::NewCode(code_desc,
NULL,
Code::ComputeFlags(Code::REGEXP),
masm_->CodeObject());
LOG(RegExpCodeCreateEvent(*code, *source));
return Handle<Object>::cast(code);
}
void RegExpMacroAssemblerX64::GoTo(Label* to) {
BranchOrBacktrack(no_condition, to);
}
void RegExpMacroAssemblerX64::IfRegisterGE(int reg,
int comparand,
Label* if_ge) {
__ cmpq(register_location(reg), Immediate(comparand));
BranchOrBacktrack(greater_equal, if_ge);
}
void RegExpMacroAssemblerX64::IfRegisterLT(int reg,
int comparand,
Label* if_lt) {
__ cmpq(register_location(reg), Immediate(comparand));
BranchOrBacktrack(less, if_lt);
}
void RegExpMacroAssemblerX64::IfRegisterEqPos(int reg,
Label* if_eq) {
__ cmpq(rdi, register_location(reg));
BranchOrBacktrack(equal, if_eq);
}
RegExpMacroAssembler::IrregexpImplementation
RegExpMacroAssemblerX64::Implementation() {
return kX64Implementation;
}
void RegExpMacroAssemblerX64::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
bool check_bounds,
int characters) {
ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
CheckPosition(cp_offset + characters - 1, on_end_of_input);
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
void RegExpMacroAssemblerX64::PopCurrentPosition() {
Pop(rdi);
}
void RegExpMacroAssemblerX64::PopRegister(int register_index) {
Pop(rax);
__ movq(register_location(register_index), rax);
}
void RegExpMacroAssemblerX64::PushBacktrack(Label* label) {
Push(label);
CheckStackLimit();
}
void RegExpMacroAssemblerX64::PushCurrentPosition() {
Push(rdi);
}
void RegExpMacroAssemblerX64::PushRegister(int register_index,
StackCheckFlag check_stack_limit) {
__ movq(rax, register_location(register_index));
Push(rax);
if (check_stack_limit) CheckStackLimit();
}
void RegExpMacroAssemblerX64::ReadCurrentPositionFromRegister(int reg) {
__ movq(rdi, register_location(reg));
}
void RegExpMacroAssemblerX64::ReadStackPointerFromRegister(int reg) {
__ movq(backtrack_stackpointer(), register_location(reg));
__ addq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd));
}
void RegExpMacroAssemblerX64::SetRegister(int register_index, int to) {
ASSERT(register_index >= num_saved_registers_); // Reserved for positions!
__ movq(register_location(register_index), Immediate(to));
}
void RegExpMacroAssemblerX64::Succeed() {
__ jmp(&success_label_);
}
void RegExpMacroAssemblerX64::WriteCurrentPositionToRegister(int reg,
int cp_offset) {
if (cp_offset == 0) {
__ movq(register_location(reg), rdi);
} else {
__ lea(rax, Operand(rdi, cp_offset * char_size()));
__ movq(register_location(reg), rax);
}
}
void RegExpMacroAssemblerX64::ClearRegisters(int reg_from, int reg_to) {
ASSERT(reg_from <= reg_to);
__ movq(rax, Operand(rbp, kInputStartMinusOne));
for (int reg = reg_from; reg <= reg_to; reg++) {
__ movq(register_location(reg), rax);
}
}
void RegExpMacroAssemblerX64::WriteStackPointerToRegister(int reg) {
__ movq(rax, backtrack_stackpointer());
__ subq(rax, Operand(rbp, kStackHighEnd));
__ movq(register_location(reg), rax);
}
// Private methods:
void RegExpMacroAssemblerX64::CallCheckStackGuardState() {
// This function call preserves no register values. Caller should
// store anything volatile in a C call or overwritten by this function.
int num_arguments = 3;
FrameAlign(num_arguments);
#ifdef __MSVC__
// Second argument: Code* of self. (Do this before overwriting r8).
__ movq(rdx, code_object_pointer());
// Third argument: RegExp code frame pointer.
__ movq(r8, rbp);
// First argument: Next address on the stack (will be address of
// return address).
__ lea(rcx, Operand(rsp, -kPointerSize));
#else
// Third argument: RegExp code frame pointer.
__ movq(rdx, rbp);
// Second argument: Code* of self.
__ movq(rsi, code_object_pointer());
// First argument: Next address on the stack (will be address of
// return address).
__ lea(rdi, Operand(rsp, -kPointerSize));
#endif
CallCFunction(FUNCTION_ADDR(&CheckStackGuardState), num_arguments);
}
// Helper function for reading a value out of a stack frame.
template <typename T>
static T& frame_entry(Address re_frame, int frame_offset) {
return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset));
}
int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
Code* re_code,
Address re_frame) {
if (StackGuard::IsStackOverflow()) {
Top::StackOverflow();
return EXCEPTION;
}
// If not real stack overflow the stack guard was used to interrupt
// execution for another purpose.
// Prepare for possible GC.
HandleScope handles;
Handle<Code> code_handle(re_code);
Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
// Current string.
bool is_ascii = subject->IsAsciiRepresentation();
ASSERT(re_code->instruction_start() <= *return_address);
ASSERT(*return_address <=
re_code->instruction_start() + re_code->instruction_size());
Object* result = Execution::HandleStackGuardInterrupt();
if (*code_handle != re_code) { // Return address no longer valid
intptr_t delta = *code_handle - re_code;
// Overwrite the return address on the stack.
*return_address += delta;
}
if (result->IsException()) {
return EXCEPTION;
}
// String might have changed.
if (subject->IsAsciiRepresentation() != is_ascii) {
// If we changed between an ASCII and an UC16 string, the specialized
// code cannot be used, and we need to restart regexp matching from
// scratch (including, potentially, compiling a new version of the code).
return RETRY;
}
// Otherwise, the content of the string might have moved. It must still
// be a sequential or external string with the same content.
// Update the start and end pointers in the stack frame to the current
// location (whether it has actually moved or not).
ASSERT(StringShape(*subject).IsSequential() ||
StringShape(*subject).IsExternal());
// The original start address of the characters to match.
const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart);
// Find the current start address of the same character at the current string
// position.
int start_index = frame_entry<int>(re_frame, kStartIndex);
const byte* new_address = StringCharacterPosition(*subject, start_index);
if (start_address != new_address) {
// If there is a difference, update the object pointer and start and end
// addresses in the RegExp stack frame to match the new value.
const byte* end_address = frame_entry<const byte* >(re_frame, kInputEnd);
int byte_length = end_address - start_address;
frame_entry<const String*>(re_frame, kInputString) = *subject;
frame_entry<const byte*>(re_frame, kInputStart) = new_address;
frame_entry<const byte*>(re_frame, kInputEnd) = new_address + byte_length;
}
return 0;
}
Address RegExpMacroAssemblerX64::GrowStack(Address stack_pointer,
Address* stack_base) {
size_t size = RegExpStack::stack_capacity();
Address old_stack_base = RegExpStack::stack_base();
ASSERT(old_stack_base == *stack_base);
ASSERT(stack_pointer <= old_stack_base);
ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
Address new_stack_base = RegExpStack::EnsureCapacity(size * 2);
if (new_stack_base == NULL) {
return NULL;
}
*stack_base = new_stack_base;
intptr_t stack_content_size = old_stack_base - stack_pointer;
return new_stack_base - stack_content_size;
}
Operand RegExpMacroAssemblerX64::register_location(int register_index) {
ASSERT(register_index < (1<<30));
if (num_registers_ <= register_index) {
num_registers_ = register_index + 1;
}
return Operand(rbp, kRegisterZero - register_index * kPointerSize);
}
void RegExpMacroAssemblerX64::CheckPosition(int cp_offset,
Label* on_outside_input) {
__ cmpl(rdi, Immediate(-cp_offset * char_size()));
BranchOrBacktrack(greater_equal, on_outside_input);
}
void RegExpMacroAssemblerX64::BranchOrBacktrack(Condition condition,
Label* to) {
if (condition < 0) { // No condition
if (to == NULL) {
Backtrack();
return;
}
__ jmp(to);
return;
}
if (to == NULL) {
__ j(condition, &backtrack_label_);
return;
}
__ j(condition, to);
}
void RegExpMacroAssemblerX64::SafeCall(Label* to) {
__ call(to);
}
void RegExpMacroAssemblerX64::SafeCallTarget(Label* label) {
__ bind(label);
__ subq(Operand(rsp, 0), code_object_pointer());
}
void RegExpMacroAssemblerX64::SafeReturn() {
__ addq(Operand(rsp, 0), code_object_pointer());
__ ret(0);
}
void RegExpMacroAssemblerX64::Push(Register source) {
ASSERT(!source.is(backtrack_stackpointer()));
// Notice: This updates flags, unlike normal Push.
__ subq(backtrack_stackpointer(), Immediate(kIntSize));
__ movl(Operand(backtrack_stackpointer(), 0), source);
}
void RegExpMacroAssemblerX64::Push(Immediate value) {
// Notice: This updates flags, unlike normal Push.
__ subq(backtrack_stackpointer(), Immediate(kIntSize));
__ movl(Operand(backtrack_stackpointer(), 0), value);
}
void RegExpMacroAssemblerX64::FixupCodeRelativePositions() {
for (int i = 0, n = code_relative_fixup_positions_.length(); i < n; i++) {
int position = code_relative_fixup_positions_[i];
// The position succeeds a relative label offset from position.
// Patch the relative offset to be relative to the Code object pointer
// instead.
int patch_position = position - kIntSize;
int offset = masm_->long_at(patch_position);
masm_->long_at_put(patch_position,
offset
+ position
+ Code::kHeaderSize
- kHeapObjectTag);
}
code_relative_fixup_positions_.Clear();
}
void RegExpMacroAssemblerX64::Push(Label* backtrack_target) {
__ subq(backtrack_stackpointer(), Immediate(kIntSize));
__ movl(Operand(backtrack_stackpointer(), 0), backtrack_target);
MarkPositionForCodeRelativeFixup();
}
void RegExpMacroAssemblerX64::Pop(Register target) {
ASSERT(!target.is(backtrack_stackpointer()));
__ movsxlq(target, Operand(backtrack_stackpointer(), 0));
// Notice: This updates flags, unlike normal Pop.
__ addq(backtrack_stackpointer(), Immediate(kIntSize));
}
void RegExpMacroAssemblerX64::Drop() {
__ addq(backtrack_stackpointer(), Immediate(kIntSize));
}
void RegExpMacroAssemblerX64::CheckPreemption() {
// Check for preemption.
Label no_preempt;
ExternalReference stack_guard_limit =
ExternalReference::address_of_stack_guard_limit();
__ load_rax(stack_guard_limit);
__ cmpq(rsp, rax);
__ j(above, &no_preempt);
SafeCall(&check_preempt_label_);
__ bind(&no_preempt);
}
void RegExpMacroAssemblerX64::CheckStackLimit() {
if (FLAG_check_stack) {
Label no_stack_overflow;
ExternalReference stack_limit =
ExternalReference::address_of_regexp_stack_limit();
__ load_rax(stack_limit);
__ cmpq(backtrack_stackpointer(), rax);
__ j(above, &no_stack_overflow);
SafeCall(&stack_overflow_label_);
__ bind(&no_stack_overflow);
}
}
void RegExpMacroAssemblerX64::FrameAlign(int num_arguments) {
// TODO(lrn): Since we no longer use the system stack arbitrarily (but we do
// use it, e.g., for SafeCall), we know the number of elements on the stack
// since the last frame alignment. We might be able to do this simpler then.
int frameAlignment = OS::ActivationFrameAlignment();
ASSERT(frameAlignment != 0);
// Make stack end at alignment and make room for num_arguments pointers
// (on Win64 only) and the original value of rsp.
__ movq(kScratchRegister, rsp);
ASSERT(IsPowerOf2(frameAlignment));
#ifdef __MSVC__
// Allocate space for parameters and old rsp.
__ subq(rsp, Immediate((num_arguments + 1) * kPointerSize));
__ and_(rsp, -frameAlignment);
__ movq(Operand(rsp, num_arguments * kPointerSize), kScratchRegister);
#else
// Allocate space for old rsp.
__ subq(rsp, Immediate(kPointerSize));
__ and_(rsp, Immediate(-frameAlignment));
__ movq(Operand(rsp, 0), kScratchRegister);
#endif
}
void RegExpMacroAssemblerX64::CallCFunction(Address function_address,
int num_arguments) {
// Don't compile regexps with serialization enabled. The addresses of the C++
// function being called isn't relocatable.
ASSERT(!Serializer::enabled());
__ movq(rax, reinterpret_cast<intptr_t>(function_address), RelocInfo::NONE);
__ call(rax);
ASSERT(OS::ActivationFrameAlignment() != 0);
#ifdef __MSVC__
__ movq(rsp, Operand(rsp, num_arguments * kPointerSize));
#else
__ pop(rsp);
#endif
}
void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset,
int characters) {
if (mode_ == ASCII) {
if (characters == 4) {
__ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
} else if (characters == 2) {
__ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
} else {
ASSERT(characters == 1);
__ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
}
} else {
ASSERT(mode_ == UC16);
if (characters == 2) {
__ movl(current_character(),
Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));
} else {
ASSERT(characters == 1);
__ movzxwl(current_character(),
Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));
}
}
}
#undef __
}} // namespace v8::internal
......@@ -25,3 +25,271 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef V8_X64_REGEXP_MACRO_ASSEMBLER_X64_H_
#define V8_X64_REGEXP_MACRO_ASSEMBLER_X64_H_
namespace v8 {
namespace internal {
class RegExpMacroAssemblerX64: public NativeRegExpMacroAssembler {
public:
RegExpMacroAssemblerX64(Mode mode, int registers_to_save);
virtual ~RegExpMacroAssemblerX64();
virtual int stack_limit_slack();
virtual void AdvanceCurrentPosition(int by);
virtual void AdvanceRegister(int reg, int by);
virtual void Backtrack();
virtual void Bind(Label* label);
virtual void CheckAtStart(Label* on_at_start);
virtual void CheckCharacter(uint32_t c, Label* on_equal);
virtual void CheckCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string);
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
uint32_t mask,
Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
uc16 minus,
uc16 mask,
Label* on_not_equal);
// Checks whether the given offset from the current position is before
// the end of the string.
virtual void CheckPosition(int cp_offset, Label* on_outside_input);
virtual bool CheckSpecialCharacterClass(uc16 type,
int cp_offset,
bool check_offset,
Label* on_no_match);
virtual void Fail();
virtual Handle<Object> GetCode(Handle<String> source);
virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual void IfRegisterEqPos(int reg, Label* if_eq);
virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
bool check_bounds = true,
int characters = 1);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
virtual void PushCurrentPosition();
virtual void PushRegister(int register_index,
StackCheckFlag check_stack_limit);
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
static Result Match(Handle<Code> regexp,
Handle<String> subject,
int* offsets_vector,
int offsets_vector_length,
int previous_index);
static Result Execute(Code* code,
String* input,
int start_offset,
const byte* input_start,
const byte* input_end,
int* output,
bool at_start);
private:
// Offsets from rbp of function parameters and stored registers.
static const int kFramePointer = 0;
// Above the frame pointer - function parameters and return address.
static const int kReturn_eip = kFramePointer + kPointerSize;
static const int kFrameAlign = kReturn_eip + kPointerSize;
#ifdef __MSVC__
// Parameters (first four passed as registers, but with room on stack).
// In Microsoft 64-bit Calling Convention, there is room on the callers
// stack (before the return address) to spill parameter registers. We
// use this space to store the register passed parameters.
static const int kInputString = kFrameAlign;
static const int kStartIndex = kInputString + kPointerSize;
static const int kInputStart = kStartIndex + kPointerSize;
static const int kInputEnd = kInputStart + kPointerSize;
static const int kRegisterOutput = kInputEnd + kPointerSize;
static const int kAtStart = kRegisterOutput + kPointerSize;
static const int kStackHighEnd = kAtStart + kPointerSize;
#else
// In AMD64 ABI Calling Convention, the first six integer parameters
// are passed as registers, and caller must allocate space on the stack
// if it wants them stored. We push the parameters after the frame pointer.
static const int kInputString = kFramePointer - kPointerSize;
static const int kStartIndex = kInputString - kPointerSize;
static const int kInputStart = kStartIndex - kPointerSize;
static const int kInputEnd = kInputStart - kPointerSize;
static const int kRegisterOutput = kInputEnd - kPointerSize;
static const int kAtStart = kRegisterOutput - kPointerSize;
static const int kStackHighEnd = kFrameAlign;
#endif
#ifdef __MSVC__
// Microsoft calling convention has three callee-saved registers
// (that we are using). We push these after the frame pointer.
static const int kBackup_rsi = kFramePointer - kPointerSize;
static const int kBackup_rdi = kBackup_rsi - kPointerSize;
static const int kBackup_rbx = kBackup_rdi - kPointerSize;
static const int kLastCalleeSaveRegister = kBackup_rbx;
#else
// AMD64 Calling Convention has only one callee-save register that
// we use. We push this after the frame pointer (and after the
// parameters).
static const int kBackup_rbx = kAtStart - kPointerSize;
static const int kLastCalleeSaveRegister = kBackup_rbx;
#endif
// When adding local variables remember to push space for them in
// the frame in GetCode.
static const int kInputStartMinusOne =
kLastCalleeSaveRegister - kPointerSize;
// First register address. Following registers are below it on the stack.
static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;
// Load a number of characters at the given offset from the
// current position, into the current-character register.
void LoadCurrentCharacterUnchecked(int cp_offset, int character_count);
// Check whether preemption has been requested.
void CheckPreemption();
// Check whether we are exceeding the stack limit on the backtrack stack.
void CheckStackLimit();
// Called from RegExp if the stack-guard is triggered.
// If the code object is relocated, the return address is fixed before
// returning.
static int CheckStackGuardState(Address* return_address,
Code* re_code,
Address re_frame);
// Generate a call to CheckStackGuardState.
void CallCheckStackGuardState();
// Called from RegExp if the backtrack stack limit is hit.
// Tries to expand the stack. Returns the new stack-pointer if
// successful, and updates the stack_top address, or returns 0 if unable
// to grow the stack.
// This function must not trigger a garbage collection.
static Address GrowStack(Address stack_pointer, Address* stack_top);
// The rbp-relative location of a regexp register.
Operand register_location(int register_index);
// The register containing the current character after LoadCurrentCharacter.
inline Register current_character() { return rdx; }
// The register containing the backtrack stack top. Provides a meaningful
// name to the register.
inline Register backtrack_stackpointer() { return rcx; }
// The registers containing a self pointer to this code's Code object.
inline Register code_object_pointer() { return r8; }
// Byte size of chars in the string to match (decided by the Mode argument)
inline int char_size() { return static_cast<int>(mode_); }
// Equivalent to a conditional branch to the label, unless the label
// is NULL, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Condition condition, Label* to);
void MarkPositionForCodeRelativeFixup() {
code_relative_fixup_positions_.Add(masm_->pc_offset());
}
void FixupCodeRelativePositions();
// Call and return internally in the generated code in a way that
// is GC-safe (i.e., doesn't leave absolute code addresses on the stack)
inline void SafeCall(Label* to);
inline void SafeCallTarget(Label* label);
inline void SafeReturn();
// Pushes the value of a register on the backtrack stack. Decrements the
// stack pointer (rcx) by a word size and stores the register's value there.
inline void Push(Register source);
// Pushes a value on the backtrack stack. Decrements the stack pointer (rcx)
// by a word size and stores the value there.
inline void Push(Immediate value);
// Pushes the Code object relative offset of a label on the backtrack stack
// (i.e., a backtrack target). Decrements the stack pointer (rcx)
// by a word size and stores the value there.
inline void Push(Label* label);
// Pops a value from the backtrack stack. Reads the word at the stack pointer
// (rcx) and increments it by a word size.
inline void Pop(Register target);
// Drops the top value from the backtrack stack without reading it.
// Increments the stack pointer (rcx) by a word size.
inline void Drop();
// Before calling a C-function from generated code, align arguments on stack.
// After aligning the frame, arguments must be stored in esp[0], esp[4],
// etc., not pushed. The argument count assumes all arguments are word sized.
// Some compilers/platforms require the stack to be aligned when calling
// C++ code.
// Needs a scratch register to do some arithmetic. This register will be
// trashed.
inline void FrameAlign(int num_arguments);
// Calls a C function and cleans up the space for arguments allocated
// by FrameAlign. The called function is not allowed to trigger a garbage
// collection, since that might move the code and invalidate the return
// address (unless this is somehow accounted for by the called function).
inline void CallCFunction(Address function_address, int num_arguments);
MacroAssembler* masm_;
ZoneList<int> code_relative_fixup_positions_;
// Which mode to generate code for (ASCII or UC16).
Mode mode_;
// One greater than maximal register index actually used.
int num_registers_;
// Number of registers to output at the end (the saved registers
// are always 0..num_saved_registers_-1)
int num_saved_registers_;
// Labels used internally.
Label entry_label_;
Label start_label_;
Label success_label_;
Label backtrack_label_;
Label exit_label_;
Label check_preempt_label_;
Label stack_overflow_label_;
};
}} // namespace v8::internal
#endif // V8_X64_REGEXP_MACRO_ASSEMBLER_X64_H_
......@@ -113,6 +113,7 @@ test-debug/DebuggerUnload: CRASH || FAIL
test-debug/DebuggerHostDispatch: CRASH || FAIL
test-debug/DebugBreakInMessageHandler: CRASH || FAIL
test-debug/NoDebugBreakInAfterCompileMessageHandler: CRASH || FAIL
test-debug/RegExpDebugBreak: FAIL
test-api/Threading: CRASH || FAIL
test-api/Threading2: PASS || TIMEOUT
test-api/TryCatchSourceInfo: CRASH || FAIL
......
......@@ -38,18 +38,21 @@
#include "jsregexp.h"
#include "regexp-macro-assembler.h"
#include "regexp-macro-assembler-irregexp.h"
#ifdef V8_NATIVE_REGEXP
#ifdef V8_TARGET_ARCH_ARM
#include "arm/regexp-macro-assembler-arm.h"
#endif
#ifdef V8_TARGET_ARCH_X64
// No X64-implementation yet.
#include "x64/macro-assembler-x64.h"
#include "x64/regexp-macro-assembler-x64.h"
#endif
#ifdef V8_TARGET_ARCH_IA32
#include "ia32/macro-assembler-ia32.h"
#include "ia32/regexp-macro-assembler-ia32.h"
#endif
#else
#include "interpreter-irregexp.h"
#endif
using namespace v8::internal;
......@@ -599,75 +602,20 @@ TEST(DispatchTableConstruction) {
// Tests of interpreter.
TEST(MacroAssembler) {
V8::Initialize(NULL);
byte codes[1024];
RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024));
// ^f(o)o.
Label fail, fail2, start;
uc16 foo_chars[3];
foo_chars[0] = 'f';
foo_chars[1] = 'o';
foo_chars[2] = 'o';
Vector<const uc16> foo(foo_chars, 3);
m.SetRegister(4, 42);
m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
m.AdvanceRegister(4, 42);
m.GoTo(&start);
m.Fail();
m.Bind(&start);
m.PushBacktrack(&fail2);
m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0, 0);
m.PushCurrentPosition();
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1, 0);
m.PopCurrentPosition();
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(2, 0);
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(3, 0);
m.Succeed();
m.Bind(&fail);
m.Backtrack();
m.Succeed();
m.Bind(&fail2);
m.PopRegister(0);
m.Fail();
v8::HandleScope scope;
Handle<String> source = Factory::NewStringFromAscii(CStrVector("^f(o)o"));
Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
int captures[5];
const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
Handle<String> f1_16 =
Factory::NewStringFromTwoByte(Vector<const uc16>(str1, 6));
CHECK(IrregexpInterpreter::Match(array, f1_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]);
CHECK_EQ(1, captures[2]);
CHECK_EQ(2, captures[3]);
CHECK_EQ(84, captures[4]);
const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
Handle<String> f2_16 =
Factory::NewStringFromTwoByte(Vector<const uc16>(str2, 6));
CHECK(!IrregexpInterpreter::Match(array, f2_16, captures, 0));
CHECK_EQ(42, captures[0]);
}
#ifdef V8_TARGET_ARCH_IA32 // IA32 Native Regexp only tests.
#ifdef V8_NATIVE_REGEXP
#ifdef V8_TARGET_ARCH_IA32
typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
#endif
#ifdef V8_TARGET_ARCH_X64
typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
#endif
class ContextInitializer {
public:
ContextInitializer() : env_(), scope_(), stack_guard_() {
ContextInitializer()
: env_(), scope_(), zone_(DELETE_ON_EXIT), stack_guard_() {
env_ = v8::Context::New();
env_->Enter();
}
......@@ -678,18 +626,19 @@ class ContextInitializer {
private:
v8::Persistent<v8::Context> env_;
v8::HandleScope scope_;
v8::internal::ZoneScope zone_;
v8::internal::StackGuard stack_guard_;
};
static RegExpMacroAssemblerIA32::Result ExecuteIA32(Code* code,
String* input,
int start_offset,
const byte* input_start,
const byte* input_end,
int* captures,
bool at_start) {
return RegExpMacroAssemblerIA32::Execute(
static ArchRegExpMacroAssembler::Result Execute(Code* code,
String* input,
int start_offset,
const byte* input_start,
const byte* input_end,
int* captures,
bool at_start) {
return NativeRegExpMacroAssembler::Execute(
code,
input,
start_offset,
......@@ -700,11 +649,11 @@ static RegExpMacroAssemblerIA32::Result ExecuteIA32(Code* code,
}
TEST(MacroAssemblerIA32Success) {
TEST(MacroAssemblerNativeSuccess) {
v8::V8::Initialize();
ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4);
m.Succeed();
......@@ -718,16 +667,16 @@ TEST(MacroAssemblerIA32Success) {
const byte* start_adr =
reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
RegExpMacroAssemblerIA32::Result result =
ExecuteIA32(*code,
*input,
0,
start_adr,
start_adr + seq_input->length(),
captures,
true);
NativeRegExpMacroAssembler::Result result =
Execute(*code,
*input,
0,
start_adr,
start_adr + seq_input->length(),
captures,
true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(-1, captures[0]);
CHECK_EQ(-1, captures[1]);
CHECK_EQ(-1, captures[2]);
......@@ -735,11 +684,11 @@ TEST(MacroAssemblerIA32Success) {
}
TEST(MacroAssemblerIA32Simple) {
TEST(MacroAssemblerNativeSimple) {
v8::V8::Initialize();
ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4);
uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3);
......@@ -762,16 +711,16 @@ TEST(MacroAssemblerIA32Simple) {
Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
Address start_adr = seq_input->GetCharsAddress();
RegExpMacroAssemblerIA32::Result result =
ExecuteIA32(*code,
*input,
0,
start_adr,
start_adr + input->length(),
captures,
true);
NativeRegExpMacroAssembler::Result result =
Execute(*code,
*input,
0,
start_adr,
start_adr + input->length(),
captures,
true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]);
CHECK_EQ(-1, captures[2]);
......@@ -781,23 +730,23 @@ TEST(MacroAssemblerIA32Simple) {
seq_input = Handle<SeqAsciiString>::cast(input);
start_adr = seq_input->GetCharsAddress();
result = ExecuteIA32(*code,
*input,
0,
start_adr,
start_adr + input->length(),
captures,
true);
result = Execute(*code,
*input,
0,
start_adr,
start_adr + input->length(),
captures,
true);
CHECK_EQ(RegExpMacroAssemblerIA32::FAILURE, result);
CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
}
TEST(MacroAssemblerIA32SimpleUC16) {
TEST(MacroAssemblerNativeSimpleUC16) {
v8::V8::Initialize();
ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::UC16, 4);
ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4);
uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3);
......@@ -822,16 +771,16 @@ TEST(MacroAssemblerIA32SimpleUC16) {
Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
Address start_adr = seq_input->GetCharsAddress();
RegExpMacroAssemblerIA32::Result result =
ExecuteIA32(*code,
*input,
0,
start_adr,
start_adr + input->length(),
captures,
true);
NativeRegExpMacroAssembler::Result result =
Execute(*code,
*input,
0,
start_adr,
start_adr + input->length(),
captures,
true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]);
CHECK_EQ(-1, captures[2]);
......@@ -842,23 +791,23 @@ TEST(MacroAssemblerIA32SimpleUC16) {
seq_input = Handle<SeqTwoByteString>::cast(input);
start_adr = seq_input->GetCharsAddress();
result = ExecuteIA32(*code,
*input,
0,
start_adr,
start_adr + input->length() * 2,
captures,
true);
result = Execute(*code,
*input,
0,
start_adr,
start_adr + input->length() * 2,
captures,
true);
CHECK_EQ(RegExpMacroAssemblerIA32::FAILURE, result);
CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
}
TEST(MacroAssemblerIA32Backtrack) {
TEST(MacroAssemblerNativeBacktrack) {
v8::V8::Initialize();
ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0);
Label fail;
Label backtrack;
......@@ -879,24 +828,24 @@ TEST(MacroAssemblerIA32Backtrack) {
Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
Address start_adr = seq_input->GetCharsAddress();
RegExpMacroAssemblerIA32::Result result =
ExecuteIA32(*code,
*input,
0,
start_adr,
start_adr + input->length(),
NULL,
true);
NativeRegExpMacroAssembler::Result result =
Execute(*code,
*input,
0,
start_adr,
start_adr + input->length(),
NULL,
true);
CHECK_EQ(RegExpMacroAssemblerIA32::FAILURE, result);
CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
}
TEST(MacroAssemblerIA32BackReferenceASCII) {
TEST(MacroAssemblerNativeBackReferenceASCII) {
v8::V8::Initialize();
ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 3);
ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 3);
m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(2);
......@@ -922,27 +871,27 @@ TEST(MacroAssemblerIA32BackReferenceASCII) {
Address start_adr = seq_input->GetCharsAddress();
int output[3];
RegExpMacroAssemblerIA32::Result result =
ExecuteIA32(*code,
*input,
0,
start_adr,
start_adr + input->length(),
output,
true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
NativeRegExpMacroAssembler::Result result =
Execute(*code,
*input,
0,
start_adr,
start_adr + input->length(),
output,
true);
CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, output[0]);
CHECK_EQ(2, output[1]);
CHECK_EQ(6, output[2]);
}
TEST(MacroAssemblerIA32BackReferenceUC16) {
TEST(MacroAssemblerNativeBackReferenceUC16) {
v8::V8::Initialize();
ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::UC16, 3);
ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 3);
m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(2);
......@@ -970,8 +919,8 @@ TEST(MacroAssemblerIA32BackReferenceUC16) {
Address start_adr = seq_input->GetCharsAddress();
int output[3];
RegExpMacroAssemblerIA32::Result result =
ExecuteIA32(*code,
NativeRegExpMacroAssembler::Result result =
Execute(*code,
*input,
0,
start_adr,
......@@ -979,7 +928,7 @@ TEST(MacroAssemblerIA32BackReferenceUC16) {
output,
true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, output[0]);
CHECK_EQ(2, output[1]);
CHECK_EQ(6, output[2]);
......@@ -987,11 +936,11 @@ TEST(MacroAssemblerIA32BackReferenceUC16) {
TEST(MacroAssemblerIA32AtStart) {
TEST(MacroAssemblernativeAtStart) {
v8::V8::Initialize();
ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0);
Label not_at_start, newline, fail;
m.CheckNotAtStart(&not_at_start);
......@@ -1022,34 +971,34 @@ TEST(MacroAssemblerIA32AtStart) {
Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
Address start_adr = seq_input->GetCharsAddress();
RegExpMacroAssemblerIA32::Result result =
ExecuteIA32(*code,
*input,
0,
start_adr,
start_adr + input->length(),
NULL,
true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
result = ExecuteIA32(*code,
*input,
3,
start_adr + 3,
start_adr + input->length(),
NULL,
false);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
NativeRegExpMacroAssembler::Result result =
Execute(*code,
*input,
0,
start_adr,
start_adr + input->length(),
NULL,
true);
CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
result = Execute(*code,
*input,
3,
start_adr + 3,
start_adr + input->length(),
NULL,
false);
CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
}
TEST(MacroAssemblerIA32BackRefNoCase) {
TEST(MacroAssemblerNativeBackRefNoCase) {
v8::V8::Initialize();
ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4);
Label fail, succ;
......@@ -1084,16 +1033,16 @@ TEST(MacroAssemblerIA32BackRefNoCase) {
Address start_adr = seq_input->GetCharsAddress();
int output[4];
RegExpMacroAssemblerIA32::Result result =
ExecuteIA32(*code,
*input,
0,
start_adr,
start_adr + input->length(),
output,
true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
NativeRegExpMacroAssembler::Result result =
Execute(*code,
*input,
0,
start_adr,
start_adr + input->length(),
output,
true);
CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, output[0]);
CHECK_EQ(12, output[1]);
CHECK_EQ(0, output[2]);
......@@ -1102,11 +1051,11 @@ TEST(MacroAssemblerIA32BackRefNoCase) {
TEST(MacroAssemblerIA32Registers) {
TEST(MacroAssemblerNativeRegisters) {
v8::V8::Initialize();
ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 5);
ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 5);
uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3);
......@@ -1184,8 +1133,8 @@ TEST(MacroAssemblerIA32Registers) {
Address start_adr = seq_input->GetCharsAddress();
int output[5];
RegExpMacroAssemblerIA32::Result result =
ExecuteIA32(*code,
NativeRegExpMacroAssembler::Result result =
Execute(*code,
*input,
0,
start_adr,
......@@ -1193,7 +1142,7 @@ TEST(MacroAssemblerIA32Registers) {
output,
true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, output[0]);
CHECK_EQ(3, output[1]);
CHECK_EQ(6, output[2]);
......@@ -1202,11 +1151,11 @@ TEST(MacroAssemblerIA32Registers) {
}
TEST(MacroAssemblerIA32StackOverflow) {
TEST(MacroAssemblerStackOverflow) {
v8::V8::Initialize();
ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0);
Label loop;
m.Bind(&loop);
......@@ -1224,26 +1173,26 @@ TEST(MacroAssemblerIA32StackOverflow) {
Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
Address start_adr = seq_input->GetCharsAddress();
RegExpMacroAssemblerIA32::Result result =
ExecuteIA32(*code,
*input,
0,
start_adr,
start_adr + input->length(),
NULL,
true);
NativeRegExpMacroAssembler::Result result =
Execute(*code,
*input,
0,
start_adr,
start_adr + input->length(),
NULL,
true);
CHECK_EQ(RegExpMacroAssemblerIA32::EXCEPTION, result);
CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
CHECK(Top::has_pending_exception());
Top::clear_pending_exception();
}
TEST(MacroAssemblerIA32LotsOfRegisters) {
TEST(MacroAssemblerNativeLotsOfRegisters) {
v8::V8::Initialize();
ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 2);
ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 2);
// At least 2048, to ensure the allocated space for registers
// span one full page.
......@@ -1270,24 +1219,88 @@ TEST(MacroAssemblerIA32LotsOfRegisters) {
Address start_adr = seq_input->GetCharsAddress();
int captures[2];
RegExpMacroAssemblerIA32::Result result =
ExecuteIA32(*code,
*input,
0,
start_adr,
start_adr + input->length(),
captures,
true);
CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
NativeRegExpMacroAssembler::Result result =
Execute(*code,
*input,
0,
start_adr,
start_adr + input->length(),
captures,
true);
CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, captures[0]);
CHECK_EQ(42, captures[1]);
Top::clear_pending_exception();
}
#endif // V8_REGEXP_NATIVE
#endif // V8_TARGET_ARCH_IA32
#else // ! V8_REGEX_NATIVE
TEST(MacroAssembler) {
V8::Initialize(NULL);
byte codes[1024];
RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024));
// ^f(o)o.
Label fail, fail2, start;
uc16 foo_chars[3];
foo_chars[0] = 'f';
foo_chars[1] = 'o';
foo_chars[2] = 'o';
Vector<const uc16> foo(foo_chars, 3);
m.SetRegister(4, 42);
m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
m.AdvanceRegister(4, 42);
m.GoTo(&start);
m.Fail();
m.Bind(&start);
m.PushBacktrack(&fail2);
m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0, 0);
m.PushCurrentPosition();
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1, 0);
m.PopCurrentPosition();
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(2, 0);
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(3, 0);
m.Succeed();
m.Bind(&fail);
m.Backtrack();
m.Succeed();
m.Bind(&fail2);
m.PopRegister(0);
m.Fail();
v8::HandleScope scope;
Handle<String> source = Factory::NewStringFromAscii(CStrVector("^f(o)o"));
Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
int captures[5];
const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
Handle<String> f1_16 =
Factory::NewStringFromTwoByte(Vector<const uc16>(str1, 6));
CHECK(IrregexpInterpreter::Match(array, f1_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]);
CHECK_EQ(1, captures[2]);
CHECK_EQ(2, captures[3]);
CHECK_EQ(84, captures[4]);
const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
Handle<String> f2_16 =
Factory::NewStringFromTwoByte(Vector<const uc16>(str2, 6));
CHECK(!IrregexpInterpreter::Match(array, f2_16, captures, 0));
CHECK_EQ(42, captures[0]);
}
#endif // ! V8_REGEXP_NATIVE
TEST(AddInverseToTable) {
......
......@@ -103,22 +103,24 @@ function testStrippedCustomError() {
// Utility function for testing that the expected strings occur
// in the stack trace produced when running the given function.
function testTrace(fun, expected, unexpected) {
function testTrace(name, fun, expected, unexpected) {
var threw = false;
try {
fun();
} catch (e) {
for (var i = 0; i < expected.length; i++) {
assertTrue(e.stack.indexOf(expected[i]) != -1);
assertTrue(e.stack.indexOf(expected[i]) != -1,
name + " doesn't contain expected[" + i + "]");
}
if (unexpected) {
for (var i = 0; i < unexpected.length; i++) {
assertEquals(e.stack.indexOf(unexpected[i]), -1);
assertEquals(e.stack.indexOf(unexpected[i]), -1,
name + " contains unexpected[" + i + "]");
}
}
threw = true;
}
assertTrue(threw);
assertTrue(threw, name + " didn't throw");
}
// Test that the error constructor is not shown in the trace
......@@ -127,10 +129,11 @@ function testCallerCensorship() {
try {
FAIL;
} catch (e) {
assertEquals(-1, e.stack.indexOf('at new ReferenceError'));
assertEquals(-1, e.stack.indexOf('at new ReferenceError'),
"CallerCensorship contained new ReferenceError");
threw = true;
}
assertTrue(threw);
assertTrue(threw, "CallerCensorship didn't throw");
}
// Test that the explicit constructor call is shown in the trace
......@@ -143,10 +146,11 @@ function testUnintendedCallerCensorship() {
}
});
} catch (e) {
assertTrue(e.stack.indexOf('at new ReferenceError') != -1);
assertTrue(e.stack.indexOf('at new ReferenceError') != -1,
"UnintendedCallerCensorship didn't contain new ReferenceError");
threw = true;
}
assertTrue(threw);
assertTrue(threw, "UnintendedCallerCensorship didn't throw");
}
// If an error occurs while the stack trace is being formatted it should
......@@ -161,9 +165,10 @@ function testErrorsDuringFormatting() {
n.foo();
} catch (e) {
threw = true;
assertTrue(e.stack.indexOf('<error: ReferenceError') != -1);
assertTrue(e.stack.indexOf('<error: ReferenceError') != -1,
"ErrorsDuringFormatting didn't contain error: ReferenceError");
}
assertTrue(threw);
assertTrue(threw, "ErrorsDuringFormatting didn't throw");
threw = false;
// Now we can't even format the message saying that we couldn't format
// the stack frame. Put that in your pipe and smoke it!
......@@ -172,26 +177,28 @@ function testErrorsDuringFormatting() {
n.foo();
} catch (e) {
threw = true;
assertTrue(e.stack.indexOf('<error>') != -1);
assertTrue(e.stack.indexOf('<error>') != -1,
"ErrorsDuringFormatting didn't contain <error>");
}
assertTrue(threw);
}
testTrace(testArrayNative, ["Array.map (native)"]);
testTrace(testNested, ["at one", "at two", "at three"]);
testTrace(testMethodNameInference, ["at Foo.bar"]);
testTrace(testImplicitConversion, ["at Nirk.valueOf"]);
testTrace(testEval, ["at Doo (eval at testEval"]);
testTrace(testNestedEval, ["eval at Inner (eval at Outer"]);
testTrace(testValue, ["at Number.causeError"]);
testTrace(testConstructor, ["new Plonk"]);
testTrace(testRenamedMethod, ["Wookie.a$b$c$d [as d]"]);
testTrace(testAnonymousMethod, ["Array.<anonymous>"]);
testTrace(testDefaultCustomError, ["hep-hey", "new CustomError"],
assertTrue(threw, "ErrorsDuringFormatting didnt' throw (2)");
}
testTrace("testArrayNative", testArrayNative, ["Array.map (native)"]);
testTrace("testNested", testNested, ["at one", "at two", "at three"]);
testTrace("testMethodNameInference", testMethodNameInference, ["at Foo.bar"]);
testTrace("testImplicitConversion", testImplicitConversion, ["at Nirk.valueOf"]);
testTrace("testEval", testEval, ["at Doo (eval at testEval"]);
testTrace("testNestedEval", testNestedEval, ["eval at Inner (eval at Outer"]);
testTrace("testValue", testValue, ["at Number.causeError"]);
testTrace("testConstructor", testConstructor, ["new Plonk"]);
testTrace("testRenamedMethod", testRenamedMethod, ["Wookie.a$b$c$d [as d]"]);
testTrace("testAnonymousMethod", testAnonymousMethod, ["Array.<anonymous>"]);
testTrace("testDefaultCustomError", testDefaultCustomError,
["hep-hey", "new CustomError"],
["collectStackTrace"]);
testTrace(testStrippedCustomError, ["hep-hey"], ["new CustomError",
"collectStackTrace"]);
testTrace("testStrippedCustomError", testStrippedCustomError, ["hep-hey"],
["new CustomError", "collectStackTrace"]);
testCallerCensorship();
testUnintendedCallerCensorship();
testErrorsDuringFormatting();
......@@ -803,10 +803,3 @@ ecma/Expressions/11.7.3: SKIP
ecma/Expressions/11.10-3: SKIP
ecma/Expressions/11.7.1: SKIP
ecma_3/RegExp/regress-209067: SKIP
[ $ARCH == x64 ]
# Tests that fail on the 64-bit port. This section should be empty
# when the 64-bit port is fully debugged.
js1_2/regexp/regress-9141: FAIL
......@@ -32,6 +32,7 @@
'gcc_version%': 'unknown',
'target_arch%': 'ia32',
'v8_use_snapshot%': 'true',
'v8_regexp%': 'native',
},
'includes': [
'../../../build/common.gypi',
......@@ -55,6 +56,7 @@
['target_arch=="x64"', {
'defines': [
'V8_TARGET_ARCH_X64',
'V8_NATIVE_REGEXP',
],
}],
],
......@@ -428,14 +430,18 @@
'../../src/ia32/jump-target-ia32.cc',
'../../src/ia32/macro-assembler-ia32.cc',
'../../src/ia32/macro-assembler-ia32.h',
'../../src/ia32/regexp-macro-assembler-ia32.cc',
'../../src/ia32/regexp-macro-assembler-ia32.h',
'../../src/ia32/register-allocator-ia32.cc',
'../../src/ia32/stub-cache-ia32.cc',
'../../src/ia32/virtual-frame-ia32.cc',
'../../src/ia32/virtual-frame-ia32.h',
],
}],
['target_arch=="x32" and v8_regexp=="native"', {
'sources': [
'../../src/ia32/regexp-macro-assembler-ia32.cc',
'../../src/ia32/regexp-macro-assembler-ia32.h',
],
}],
['target_arch=="x64"', {
'include_dirs+': [
'../../src/x64',
......@@ -457,14 +463,18 @@
'../../src/x64/jump-target-x64.cc',
'../../src/x64/macro-assembler-x64.cc',
'../../src/x64/macro-assembler-x64.h',
#'../../src/x64/regexp-macro-assembler-x64.cc',
#'../../src/x64/regexp-macro-assembler-x64.h',
'../../src/x64/register-allocator-x64.cc',
'../../src/x64/stub-cache-x64.cc',
'../../src/x64/virtual-frame-x64.cc',
'../../src/x64/virtual-frame-x64.h',
],
}],
['target_arch=="x64" and v8_regexp=="native"', {
'sources': [
'../../src/x64/regexp-macro-assembler-x64.cc',
'../../src/x64/regexp-macro-assembler-x64.h',
],
}],
['OS=="linux"', {
'link_settings': {
'libraries': [
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment