Commit 49d05495 authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

* Remove an unused layer of abstraction by not having both a macro assembler and

  an assembler when compiling to bytecode.  This fixes
  http://code.google.com/p/v8/issues/detail?id=165
* Preload the 'current character' register when starting a match (byte code only
  at the moment).
Review URL: http://codereview.chromium.org/10995

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@865 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 54f8a1ea
......@@ -35,20 +35,20 @@ Import('context')
SOURCES = {
'all': [
'accessors.cc', 'allocation.cc', 'api.cc', 'assembler.cc',
'assembler-irregexp.cc', 'ast.cc', 'bootstrapper.cc', 'builtins.cc',
'checks.cc', 'code-stubs.cc', 'codegen.cc', 'compilation-cache.cc',
'compiler.cc', 'contexts.cc', 'conversions.cc', 'counters.cc',
'dateparser.cc', 'debug.cc', 'disassembler.cc', 'execution.cc',
'factory.cc', 'flags.cc', 'frames.cc', 'global-handles.cc',
'handles.cc', 'hashmap.cc', 'heap.cc', 'ic.cc', 'interpreter-irregexp.cc',
'jsregexp.cc', 'log.cc', 'mark-compact.cc', 'messages.cc',
'objects.cc', 'parser.cc', 'property.cc', 'regexp-macro-assembler.cc',
'regexp-macro-assembler-irregexp.cc', 'rewriter.cc', 'runtime.cc', 'scanner.cc',
'scopeinfo.cc', 'scopes.cc', 'serialize.cc', 'snapshot-common.cc',
'spaces.cc', 'string-stream.cc', 'stub-cache.cc', 'token.cc', 'top.cc',
'unicode.cc', 'usage-analyzer.cc', 'utils.cc', 'v8-counters.cc',
'v8.cc', 'v8threads.cc', 'variables.cc', 'zone.cc'
'accessors.cc', 'allocation.cc', 'api.cc', 'assembler.cc', 'ast.cc',
'bootstrapper.cc', 'builtins.cc', 'checks.cc', 'code-stubs.cc',
'codegen.cc', 'compilation-cache.cc', 'compiler.cc', 'contexts.cc',
'conversions.cc', 'counters.cc', 'dateparser.cc', 'debug.cc',
'disassembler.cc', 'execution.cc', 'factory.cc', 'flags.cc', 'frames.cc',
'global-handles.cc', 'handles.cc', 'hashmap.cc', 'heap.cc', 'ic.cc',
'interpreter-irregexp.cc', 'jsregexp.cc', 'log.cc', 'mark-compact.cc',
'messages.cc', 'objects.cc', 'parser.cc', 'property.cc',
'regexp-macro-assembler.cc', 'regexp-macro-assembler-irregexp.cc',
'rewriter.cc', 'runtime.cc', 'scanner.cc', 'scopeinfo.cc', 'scopes.cc',
'serialize.cc', 'snapshot-common.cc', 'spaces.cc', 'string-stream.cc',
'stub-cache.cc', 'token.cc', 'top.cc', 'unicode.cc', 'usage-analyzer.cc',
'utils.cc', 'v8-counters.cc', 'v8.cc', 'v8threads.cc', 'variables.cc',
'zone.cc'
],
'arch:arm': ['assembler-arm.cc', 'builtins-arm.cc', 'codegen-arm.cc',
'cpu-arm.cc', 'disasm-arm.cc', 'frames-arm.cc', 'ic-arm.cc',
......
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// A light-weight assembler for the Irregexp byte code.
#include "v8.h"
#include "ast.h"
#include "bytecodes-irregexp.h"
#include "assembler-irregexp.h"
#include "assembler-irregexp-inl.h"
namespace v8 { namespace internal {
IrregexpAssembler::IrregexpAssembler(Vector<byte> buffer)
: buffer_(buffer),
pc_(0),
own_buffer_(false) {
}
IrregexpAssembler::~IrregexpAssembler() {
if (own_buffer_) {
buffer_.Dispose();
}
}
void IrregexpAssembler::PushCurrentPosition(int cp_offset) {
ASSERT(cp_offset >= 0);
Emit(BC_PUSH_CP);
Emit32(cp_offset);
}
void IrregexpAssembler::PushBacktrack(Label* l) {
Emit(BC_PUSH_BT);
EmitOrLink(l);
}
void IrregexpAssembler::PushRegister(int index) {
ASSERT(index >= 0);
Emit(BC_PUSH_REGISTER);
Emit(index);
}
void IrregexpAssembler::WriteCurrentPositionToRegister(int index,
int cp_offset) {
ASSERT(cp_offset >= 0);
ASSERT(index >= 0);
Emit(BC_SET_REGISTER_TO_CP);
Emit(index);
Emit32(cp_offset);
}
void IrregexpAssembler::ReadCurrentPositionFromRegister(int index) {
ASSERT(index >= 0);
Emit(BC_SET_CP_TO_REGISTER);
Emit(index);
}
void IrregexpAssembler::WriteStackPointerToRegister(int index) {
ASSERT(index >= 0);
Emit(BC_SET_REGISTER_TO_SP);
Emit(index);
}
void IrregexpAssembler::ReadStackPointerFromRegister(int index) {
ASSERT(index >= 0);
Emit(BC_SET_SP_TO_REGISTER);
Emit(index);
}
void IrregexpAssembler::SetRegister(int index, int value) {
ASSERT(index >= 0);
Emit(BC_SET_REGISTER);
Emit(index);
Emit32(value);
}
void IrregexpAssembler::AdvanceRegister(int index, int by) {
ASSERT(index >= 0);
Emit(BC_ADVANCE_REGISTER);
Emit(index);
Emit32(by);
}
void IrregexpAssembler::PopCurrentPosition() {
Emit(BC_POP_CP);
}
void IrregexpAssembler::PopBacktrack() {
Emit(BC_POP_BT);
}
void IrregexpAssembler::PopRegister(int index) {
Emit(BC_POP_REGISTER);
Emit(index);
}
void IrregexpAssembler::Fail() {
Emit(BC_FAIL);
}
void IrregexpAssembler::Break() {
Emit(BC_BREAK);
}
void IrregexpAssembler::Succeed() {
Emit(BC_SUCCEED);
}
void IrregexpAssembler::Bind(Label* l) {
ASSERT(!l->is_bound());
if (l->is_linked()) {
int pos = l->pos();
while (pos != 0) {
int fixup = pos;
pos = Load32(buffer_.start() + fixup);
Store32(buffer_.start() + fixup, pc_);
}
}
l->bind_to(pc_);
}
void IrregexpAssembler::AdvanceCP(int cp_offset) {
Emit(BC_ADVANCE_CP);
Emit32(cp_offset);
}
void IrregexpAssembler::GoTo(Label* l) {
Emit(BC_GOTO);
EmitOrLink(l);
}
void IrregexpAssembler::LoadCurrentChar(int cp_offset, Label* on_end) {
Emit(BC_LOAD_CURRENT_CHAR);
Emit32(cp_offset);
EmitOrLink(on_end);
}
void IrregexpAssembler::CheckCharacter(uc16 c, Label* on_match) {
Emit(BC_CHECK_CHAR);
Emit16(c);
EmitOrLink(on_match);
}
void IrregexpAssembler::CheckNotCharacter(uc16 c, Label* on_mismatch) {
Emit(BC_CHECK_NOT_CHAR);
Emit16(c);
EmitOrLink(on_mismatch);
}
void IrregexpAssembler::OrThenCheckNotCharacter(uc16 c,
uc16 mask,
Label* on_mismatch) {
Emit(BC_OR_CHECK_NOT_CHAR);
Emit16(c);
Emit16(mask);
EmitOrLink(on_mismatch);
}
void IrregexpAssembler::MinusOrThenCheckNotCharacter(uc16 c,
uc16 mask,
Label* on_mismatch) {
Emit(BC_MINUS_OR_CHECK_NOT_CHAR);
Emit16(c);
Emit16(mask);
EmitOrLink(on_mismatch);
}
void IrregexpAssembler::CheckCharacterLT(uc16 limit, Label* on_less) {
Emit(BC_CHECK_LT);
Emit16(limit);
EmitOrLink(on_less);
}
void IrregexpAssembler::CheckCharacterGT(uc16 limit, Label* on_greater) {
Emit(BC_CHECK_GT);
Emit16(limit);
EmitOrLink(on_greater);
}
void IrregexpAssembler::CheckNotBackReference(int capture_index,
Label* on_mismatch) {
Emit(BC_CHECK_NOT_BACK_REF);
Emit(capture_index);
EmitOrLink(on_mismatch);
}
void IrregexpAssembler::CheckNotBackReferenceNoCase(int capture_index,
Label* on_mismatch) {
Emit(BC_CHECK_NOT_BACK_REF_NO_CASE);
Emit(capture_index);
EmitOrLink(on_mismatch);
}
void IrregexpAssembler::CheckNotRegistersEqual(int reg1,
int reg2,
Label* on_not_equal) {
Emit(BC_CHECK_NOT_REGS_EQUAL);
Emit(reg1);
Emit(reg2);
EmitOrLink(on_not_equal);
}
void IrregexpAssembler::CheckRegister(int byte_code,
int reg_index,
uint16_t vs,
Label* on_true) {
Emit(byte_code);
Emit(reg_index);
Emit16(vs);
EmitOrLink(on_true);
}
void IrregexpAssembler::CheckRegisterLT(int reg_index,
uint16_t vs,
Label* on_less_than) {
CheckRegister(BC_CHECK_REGISTER_LT, reg_index, vs, on_less_than);
}
void IrregexpAssembler::CheckRegisterGE(int reg_index,
uint16_t vs,
Label* on_greater_than_equal) {
CheckRegister(BC_CHECK_REGISTER_GE, reg_index, vs, on_greater_than_equal);
}
void IrregexpAssembler::LookupMap1(uc16 start, Label* bit_map, Label* on_zero) {
Emit(BC_LOOKUP_MAP1);
Emit16(start);
EmitOrLink(bit_map);
EmitOrLink(on_zero);
}
void IrregexpAssembler::LookupMap2(uc16 start,
Label* half_nibble_map,
const Vector<Label*>& table) {
Emit(BC_LOOKUP_MAP2);
Emit16(start);
EmitOrLink(half_nibble_map);
ASSERT(table.length() > 0);
ASSERT(table.length() <= 4);
for (int i = 0; i < table.length(); i++) {
EmitOrLink(table[i]);
}
}
void IrregexpAssembler::LookupMap8(uc16 start,
Label* byte_map,
const Vector<Label*>& table) {
Emit(BC_LOOKUP_MAP8);
Emit16(start);
EmitOrLink(byte_map);
ASSERT(table.length() > 0);
ASSERT(table.length() <= 256);
for (int i = 0; i < table.length(); i++) {
EmitOrLink(table[i]);
}
}
void IrregexpAssembler::LookupHighMap8(byte start,
Label* byte_map,
const Vector<Label*>& table) {
Emit(BC_LOOKUP_HI_MAP8);
Emit(start);
EmitOrLink(byte_map);
ASSERT(table.length() > 0);
ASSERT(table.length() <= 256);
for (int i = 0; i < table.length(); i++) {
EmitOrLink(table[i]);
}
}
int IrregexpAssembler::length() {
return pc_;
}
void IrregexpAssembler::Copy(Address a) {
memcpy(a, buffer_.start(), length());
}
void IrregexpAssembler::Expand() {
bool old_buffer_was_our_own = own_buffer_;
Vector<byte> old_buffer = buffer_;
buffer_ = Vector<byte>::New(old_buffer.length() * 2);
own_buffer_ = true;
memcpy(buffer_.start(), old_buffer.start(), old_buffer.length());
if (old_buffer_was_our_own) {
old_buffer.Dispose();
}
}
} } // namespace v8::internal
// Copyright 2006-2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// A light-weight assembler for the Irregexp byte code.
#ifndef V8_ASSEMBLER_IRREGEXP_H_
#define V8_ASSEMBLER_IRREGEXP_H_
namespace v8 { namespace internal {
class IrregexpAssembler {
public:
// Create an assembler. Instructions and relocation information are emitted
// into a buffer, with the instructions starting from the beginning and the
// relocation information starting from the end of the buffer. See CodeDesc
// for a detailed comment on the layout (globals.h).
//
// If the provided buffer is NULL, the assembler allocates and grows its own
// buffer, and buffer_size determines the initial buffer size. The buffer is
// owned by the assembler and deallocated upon destruction of the assembler.
//
// If the provided buffer is not NULL, the assembler uses the provided buffer
// for code generation and assumes its size to be buffer_size. If the buffer
// is too small, a fatal error occurs. No deallocation of the buffer is done
// upon destruction of the assembler.
explicit IrregexpAssembler(Vector<byte>);
~IrregexpAssembler();
// CP = current position in source.
// BT = backtrack label.
// Stack.
void PushCurrentPosition(int cp_offset = 0);
void PushBacktrack(Label* l);
void PushRegister(int index);
void WriteCurrentPositionToRegister(int index, int cp_offset = 0);
void ReadCurrentPositionFromRegister(int index);
void WriteStackPointerToRegister(int index);
void ReadStackPointerFromRegister(int index);
void SetRegister(int index, int value);
void AdvanceRegister(int index, int by);
void PopCurrentPosition();
void PopBacktrack();
void PopRegister(int index);
void Fail();
void Succeed();
// This instruction will cause a fatal VM error if hit.
void Break();
// Binds an unbound label L to the current code posn.
void Bind(Label* l);
void AdvanceCP(int by);
void GoTo(Label* l);
// Loads current char into a machine register. Jumps to the label if we
// reached the end of the subject string. Fall through otherwise.
void LoadCurrentChar(int cp_offset, Label* on_end);
// Checks current char register against a singleton.
void CheckCharacter(uc16 c, Label* on_match);
void CheckNotCharacter(uc16 c, Label* on_mismatch);
void OrThenCheckNotCharacter(uc16 c, uc16 mask, Label* on_mismatch);
void MinusOrThenCheckNotCharacter(uc16 c, uc16 mask, Label* on_mismatch);
// Used to check current char register against a range.
void CheckCharacterLT(uc16 limit, Label* on_less);
void CheckCharacterGT(uc16 limit, Label* on_greater);
// Checks current position for a match against a previous capture. Advances
// current position by the length of the capture iff it matches. The capture
// is stored in a given register and the register after. If a register
// contains -1 then the other register must always contain -1 and the
// on_mismatch label will never be called.
void CheckNotBackReference(int capture_index, Label* on_mismatch);
void CheckNotBackReferenceNoCase(int capture_index, Label* on_mismatch);
void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
// Checks a register for strictly-less-than or greater-than-or-equal.
void CheckRegisterLT(int reg_index, uint16_t vs, Label* on_less_than);
void CheckRegisterGE(int reg_index, uint16_t vs, Label* on_greater_equal);
// Subtracts a 16 bit value from the current character, uses the result to
// look up in a bit array, uses the result of that to decide whether to fall
// though (on 1) or jump to the on_zero label (on 0).
void LookupMap1(uc16 start, Label* bit_map, Label* on_zero);
// Subtracts a 16 bit value from the current character, uses the result to
// look up in a 2-bit array, uses the result of that to look up in a label
// table and jumps to the label.
void LookupMap2(uc16 start,
Label* half_nibble_map,
const Vector<Label*>& table);
// Subtracts a 16 bit value from the current character, uses the result to
// look up in a byte array, uses the result of that to look up in a label
// array and jumps to the label.
void LookupMap8(uc16 start, Label* byte_map, const Vector<Label*>& table);
// Takes the high byte of the current character, uses the result to
// look up in a byte array, uses the result of that to look up in a label
// array and jumps to the label.
void LookupHighMap8(byte start, Label* byte_map, const Vector<Label*>& table);
// Code and bitmap emission.
inline void Emit32(uint32_t x);
inline void Emit16(uint32_t x);
inline void Emit(uint32_t x);
// Bytecode buffer.
int length();
void Copy(Address a);
inline void EmitOrLink(Label* l);
private:
inline void CheckRegister(int byte_code,
int reg_index,
uint16_t vs,
Label* on_true);
void Expand();
// The buffer into which code and relocation info are generated.
Vector<byte> buffer_;
// The program counter.
int pc_;
// True if the assembler owns the buffer, false if buffer is external.
bool own_buffer_;
DISALLOW_IMPLICIT_CONSTRUCTORS(IrregexpAssembler);
};
} } // namespace v8::internal
#endif // V8_ASSEMBLER_IRREGEXP_H_
......@@ -85,7 +85,7 @@ class Label : public ZoneObject { // LabelShadows are dynamically allocated.
friend class RegexpAssembler;
friend class Displacement;
friend class LabelShadow;
friend class IrregexpAssembler;
friend class RegExpMacroAssemblerIrregexp;
};
......
......@@ -99,13 +99,13 @@ static void TraceInterpreter(const byte* code_base,
static bool RawMatch(const byte* code_base,
Vector<const uc16> subject,
int* registers,
int current) {
int current,
int current_char) {
const byte* pc = code_base;
static const int kBacktrackStackSize = 10000;
int backtrack_stack[kBacktrackStackSize];
int backtrack_stack_space = kBacktrackStackSize;
int* backtrack_sp = backtrack_stack;
int current_char = -1;
#ifdef DEBUG
if (FLAG_trace_regexp_bytecodes) {
PrintF("\n\nStart bytecode interpreter\n\n");
......@@ -382,11 +382,15 @@ bool IrregexpInterpreter::Match(Handle<ByteArray> code_array,
AssertNoAllocation a;
const byte* code_base = code_array->GetDataStartAddress();
uc16 previous_char = '\n';
Vector<const uc16> subject_vector =
Vector<const uc16>(subject16->GetTwoByteData(), subject16->length());
if (start_position != 0) previous_char = subject_vector[start_position - 1];
return RawMatch(code_base,
Vector<const uc16>(subject16->GetTwoByteData(),
subject16->length()),
subject_vector,
registers,
start_position);
start_position,
previous_char);
}
} } // namespace v8::internal
......@@ -40,7 +40,6 @@
#include "compilation-cache.h"
#include "string-stream.h"
#include "parser.h"
#include "assembler-irregexp.h"
#include "regexp-macro-assembler.h"
#include "regexp-macro-assembler-tracer.h"
#include "regexp-macro-assembler-irregexp.h"
......@@ -2730,9 +2729,8 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
input->capture_count);
#endif
}
byte codes[1024];
IrregexpAssembler assembler(Vector<byte>(codes, 1024));
RegExpMacroAssemblerIrregexp macro_assembler(&assembler);
EmbeddedVector<byte, 1024> codes;
RegExpMacroAssemblerIrregexp macro_assembler(codes);
return compiler.Assemble(&macro_assembler,
node,
input->capture_count);
......
......@@ -31,13 +31,12 @@
#include "v8.h"
#include "ast.h"
#include "bytecodes-irregexp.h"
#include "assembler-irregexp.h"
namespace v8 { namespace internal {
void IrregexpAssembler::Emit(uint32_t byte) {
void RegExpMacroAssemblerIrregexp::Emit(uint32_t byte) {
ASSERT(pc_ <= buffer_.length());
if (pc_ == buffer_.length()) {
Expand();
......@@ -46,7 +45,7 @@ void IrregexpAssembler::Emit(uint32_t byte) {
}
void IrregexpAssembler::Emit16(uint32_t word) {
void RegExpMacroAssemblerIrregexp::Emit16(uint32_t word) {
ASSERT(pc_ <= buffer_.length());
if (pc_ + 1 >= buffer_.length()) {
Expand();
......@@ -56,7 +55,7 @@ void IrregexpAssembler::Emit16(uint32_t word) {
}
void IrregexpAssembler::Emit32(uint32_t word) {
void RegExpMacroAssemblerIrregexp::Emit32(uint32_t word) {
ASSERT(pc_ <= buffer_.length());
if (pc_ + 3 >= buffer_.length()) {
Expand();
......@@ -66,17 +65,4 @@ void IrregexpAssembler::Emit32(uint32_t word) {
}
void IrregexpAssembler::EmitOrLink(Label* l) {
if (l->is_bound()) {
Emit32(l->pos());
} else {
int pos = 0;
if (l->is_linked()) {
pos = l->pos();
}
l->link_to(pc_);
Emit32(pos);
}
}
} } // namespace v8::internal
This diff is collapsed.
......@@ -33,9 +33,20 @@ namespace v8 { namespace internal {
class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
public:
explicit RegExpMacroAssemblerIrregexp(IrregexpAssembler* assembler)
: assembler_(assembler) {
}
// Create an assembler. Instructions and relocation information are emitted
// into a buffer, with the instructions starting from the beginning and the
// relocation information starting from the end of the buffer. See CodeDesc
// for a detailed comment on the layout (globals.h).
//
// If the provided buffer is NULL, the assembler allocates and grows its own
// buffer, and buffer_size determines the initial buffer size. The buffer is
// owned by the assembler and deallocated upon destruction of the assembler.
//
// If the provided buffer is not NULL, the assembler uses the provided buffer
// for code generation and assumes its size to be buffer_size. If the buffer
// is too small, a fatal error occurs. No deallocation of the buffer is done
// upon destruction of the assembler.
explicit RegExpMacroAssemblerIrregexp(Vector<byte>);
virtual ~RegExpMacroAssemblerIrregexp();
virtual void Bind(Label* label);
virtual void EmitOrLink(Label* label);
......@@ -88,7 +99,25 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual IrregexpImplementation Implementation();
virtual Handle<Object> GetCode();
private:
IrregexpAssembler* assembler_;
void Expand();
// Code and bitmap emission.
inline void Emit32(uint32_t x);
inline void Emit16(uint32_t x);
inline void Emit(uint32_t x);
// Bytecode buffer.
int length();
void Copy(Address a);
// The buffer into which code and relocation info are generated.
Vector<byte> buffer_;
// The program counter.
int pc_;
// True if the assembler owns the buffer, false if buffer is external.
bool own_buffer_;
DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpMacroAssemblerIrregexp);
};
} } // namespace v8::internal
......
......@@ -36,7 +36,6 @@
#include "parser.h"
#include "ast.h"
#include "jsregexp-inl.h"
#include "assembler-irregexp.h"
#include "regexp-macro-assembler.h"
#include "regexp-macro-assembler-irregexp.h"
#ifdef ARM
......@@ -519,146 +518,10 @@ TEST(DispatchTableConstruction) {
}
TEST(Assembler) {
V8::Initialize(NULL);
byte codes[1024];
IrregexpAssembler assembler(Vector<byte>(codes, 1024));
#define __ assembler.
Label advance;
Label look_for_foo;
Label fail;
__ GoTo(&look_for_foo);
__ Bind(&advance);
__ AdvanceCP(1);
__ Bind(&look_for_foo);
__ LoadCurrentChar(0, &fail);
__ CheckNotCharacter('f', &advance);
__ LoadCurrentChar(1, &fail);
__ CheckNotCharacter('o', &advance);
__ LoadCurrentChar(2, &fail);
__ CheckNotCharacter('o', &advance);
__ WriteCurrentPositionToRegister(0);
__ WriteCurrentPositionToRegister(1, 2);
__ Succeed();
__ Bind(&fail);
__ Fail();
v8::HandleScope scope;
Handle<ByteArray> array = Factory::NewByteArray(assembler.length());
assembler.Copy(array->GetDataStartAddress());
int captures[2];
Handle<String> f1 =
Factory::NewStringFromAscii(CStrVector("Now is the time"));
Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);
CHECK(!IrregexpInterpreter::Match(array, f1_16, captures, 0));
Handle<String> f2 = Factory::NewStringFromAscii(CStrVector("foo bar baz"));
Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);
CHECK(IrregexpInterpreter::Match(array, f2_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(2, captures[1]);
Handle<String> f3 = Factory::NewStringFromAscii(CStrVector("tomfoolery"));
Handle<String> f3_16 = RegExpImpl::StringToTwoByte(f3);
CHECK(IrregexpInterpreter::Match(array, f3_16, captures, 0));
CHECK_EQ(3, captures[0]);
CHECK_EQ(5, captures[1]);
}
TEST(Assembler2) {
V8::Initialize(NULL);
byte codes[1024];
IrregexpAssembler assembler(Vector<byte>(codes, 1024));
#define __ assembler.
// /^.*foo/
Label more_dots;
Label unwind_dot;
Label failure;
Label foo;
Label foo_failed;
Label dot_match;
// ^
__ PushCurrentPosition();
__ PushRegister(0);
__ WriteCurrentPositionToRegister(0);
__ PushBacktrack(&failure);
__ GoTo(&dot_match);
// .*
__ Bind(&more_dots);
__ AdvanceCP(1);
__ Bind(&dot_match);
__ PushCurrentPosition();
__ PushBacktrack(&unwind_dot);
__ LoadCurrentChar(0, &foo);
__ CheckNotCharacter('\n', &more_dots);
// foo
__ Bind(&foo);
__ CheckNotCharacter('f', &foo_failed);
__ LoadCurrentChar(1, &foo_failed);
__ CheckNotCharacter('o', &foo_failed);
__ LoadCurrentChar(2, &foo_failed);
__ CheckNotCharacter('o', &foo_failed);
__ WriteCurrentPositionToRegister(1, 2);
__ Succeed();
__ Break();
__ Bind(&foo_failed);
__ PopBacktrack();
__ Break();
__ Bind(&unwind_dot);
__ PopCurrentPosition();
__ LoadCurrentChar(0, &foo_failed);
__ GoTo(&foo);
__ Bind(&failure);
__ PopRegister(0);
__ PopCurrentPosition();
__ Fail();
v8::HandleScope scope;
Handle<ByteArray> array = Factory::NewByteArray(assembler.length());
assembler.Copy(array->GetDataStartAddress());
int captures[2];
Handle<String> f1 =
Factory::NewStringFromAscii(CStrVector("Now is the time"));
Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);
CHECK(!IrregexpInterpreter::Match(array, f1_16, captures, 0));
Handle<String> f2 = Factory::NewStringFromAscii(CStrVector("foo bar baz"));
Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);
CHECK(IrregexpInterpreter::Match(array, f2_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(2, captures[1]);
Handle<String> f3 = Factory::NewStringFromAscii(CStrVector("tomfoolery"));
Handle<String> f3_16 = RegExpImpl::StringToTwoByte(f3);
CHECK(IrregexpInterpreter::Match(array, f3_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(5, captures[1]);
Handle<String> f4 =
Factory::NewStringFromAscii(CStrVector("football buffoonery"));
Handle<String> f4_16 = RegExpImpl::StringToTwoByte(f4);
CHECK(IrregexpInterpreter::Match(array, f4_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(14, captures[1]);
Handle<String> f5 =
Factory::NewStringFromAscii(CStrVector("walking\nbarefoot"));
Handle<String> f5_16 = RegExpImpl::StringToTwoByte(f5);
CHECK(!IrregexpInterpreter::Match(array, f5_16, captures, 0));
}
TEST(MacroAssembler) {
V8::Initialize(NULL);
byte codes[1024];
IrregexpAssembler assembler(Vector<byte>(codes, 1024));
RegExpMacroAssemblerIrregexp m(&assembler);
RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024));
// ^f(o)o.
Label fail, fail2, start;
uc16 foo_chars[3];
......@@ -695,8 +558,7 @@ TEST(MacroAssembler) {
v8::HandleScope scope;
Handle<ByteArray> array = Factory::NewByteArray(assembler.length());
assembler.Copy(array->GetDataStartAddress());
Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode());
int captures[5];
Handle<String> f1 =
......
......@@ -297,15 +297,7 @@
>
</File>
<File
RelativePath="..\..\src\assembler-irregexp-inl.h"
>
</File>
<File
RelativePath="..\..\src\assembler-irregexp.cc"
>
</File>
<File
RelativePath="..\..\src\assembler-irregexp.h"
RelativePath="..\..\src\regexp-macro-assembler-irregexp-inl.h"
>
</File>
<File
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment