Commit 263bac50 authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

* Irregexp: Move from a byte-oriented bytecode format to a 32-bit oriented

bytecode format.  This provides a nice speedup on Intel and probably an
even better one on ARM.  Also removes the 256-register limitation on the
interpreter.
Review URL: http://codereview.chromium.org/18363

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1111 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent ad3fdf16
......@@ -31,50 +31,59 @@
namespace v8 { namespace internal {
static const int BYTECODE_MASK = 0xff;
static const unsigned int MAX_FIRST_ARG = 0xffffffu;
static const int BYTECODE_SHIFT = 8;
#define BYTECODE_ITERATOR(V) \
V(BREAK, 0, 1) /* break */ \
V(PUSH_CP, 1, 5) /* push_cp offset32 */ \
V(PUSH_BT, 2, 5) /* push_bt addr32 */ \
V(PUSH_REGISTER, 3, 2) /* push_register register_index */ \
V(SET_REGISTER_TO_CP, 4, 6) /* set_register_to_cp register_index offset32 */ \
V(SET_CP_TO_REGISTER, 5, 2) /* set_cp_to_registger register_index */ \
V(SET_REGISTER_TO_SP, 6, 2) /* set_register_to_sp register_index */ \
V(SET_SP_TO_REGISTER, 7, 2) /* set_sp_to_registger register_index */ \
V(SET_REGISTER, 8, 6) /* set_register register_index value32 */ \
V(ADVANCE_REGISTER, 9, 6) /* advance_register register_index value32 */ \
V(POP_CP, 10, 1) /* pop_cp */ \
V(POP_BT, 11, 1) /* pop_bt */ \
V(POP_REGISTER, 12, 2) /* pop_register register_index */ \
V(FAIL, 13, 1) /* fail */ \
V(SUCCEED, 14, 1) /* succeed */ \
V(ADVANCE_CP, 15, 5) /* advance_cp offset32 */ \
V(GOTO, 16, 5) /* goto addr32 */ \
V(LOAD_CURRENT_CHAR, 17, 9) /* load offset32 addr32 */ \
V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 5) /* load offset32 */ \
V(LOAD_2_CURRENT_CHARS, 19, 9) /* load offset32 addr32 */ \
V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 5) /* load offset32 */ \
V(LOAD_4_CURRENT_CHARS, 21, 9) /* load offset32 addr32 */ \
V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 5) /* load offset32 */ \
V(CHECK_CHAR, 23, 9) /* check_char uint32 addr32 */ \
V(CHECK_NOT_CHAR, 24, 9) /* check_not_char uint32 addr32 */ \
V(AND_CHECK_CHAR, 25, 13) /* and_check_char uint32 uint32 addr32 */ \
V(AND_CHECK_NOT_CHAR, 26, 13) /* and_check_not_char uint32 uint32 addr32 */ \
V(MINUS_AND_CHECK_NOT_CHAR, 27, 11) /* minus_and_check_not_char uc16 uc16...*/ \
V(CHECK_LT, 28, 7) /* check_lt uc16 addr32 */ \
V(CHECK_GT, 29, 7) /* check_gr uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 30, 6) /* check_not_back_ref capture_idx addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 31, 6) /* check_not_back_ref_no_case captu... */ \
V(CHECK_NOT_REGS_EQUAL, 32, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \
V(LOOKUP_MAP1, 33, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP2, 34, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_MAP8, 35, 99) /* l_map8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 36, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 37, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 38, 8) /* check_reg_ge register_index value16 addr32 */ \
V(CHECK_REGISTER_EQ_POS, 39, 6) /* check_register_eq_pos index addr32 */ \
V(CHECK_AT_START, 40, 5) /* check_at_start addr32 */ \
V(CHECK_NOT_AT_START, 41, 5) /* check_not_at_start addr32 */ \
V(CHECK_GREEDY, 42, 5) /* check_greedy addr32 */
V(BREAK, 0, 4) /* bc8 */ \
V(PUSH_CP, 1, 4) /* bc8 pad24 */ \
V(PUSH_BT, 2, 8) /* bc8 pad24 offset32 */ \
V(PUSH_REGISTER, 3, 4) /* bc8 reg_idx24 */ \
V(SET_REGISTER_TO_CP, 4, 8) /* bc8 reg_idx24 offset32 */ \
V(SET_CP_TO_REGISTER, 5, 4) /* bc8 reg_idx24 */ \
V(SET_REGISTER_TO_SP, 6, 4) /* bc8 reg_idx24 */ \
V(SET_SP_TO_REGISTER, 7, 4) /* bc8 reg_idx24 */ \
V(SET_REGISTER, 8, 8) /* bc8 reg_idx24 value32 */ \
V(ADVANCE_REGISTER, 9, 8) /* bc8 reg_idx24 value32 */ \
V(POP_CP, 10, 4) /* bc8 pad24 */ \
V(POP_BT, 11, 4) /* bc8 pad24 */ \
V(POP_REGISTER, 12, 4) /* bc8 reg_idx24 */ \
V(FAIL, 13, 4) /* bc8 pad24 */ \
V(SUCCEED, 14, 4) /* bc8 pad24 */ \
V(ADVANCE_CP, 15, 4) /* bc8 offset24 */ \
V(GOTO, 16, 8) /* bc8 pad24 addr32 */ \
V(LOAD_CURRENT_CHAR, 17, 8) /* bc8 offset24 addr32 */ \
V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 4) /* bc8 offset24 */ \
V(LOAD_2_CURRENT_CHARS, 19, 8) /* bc8 offset24 addr32 */ \
V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 4) /* bc8 offset24 */ \
V(LOAD_4_CURRENT_CHARS, 21, 8) /* bc8 offset24 addr32 */ \
V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 4) /* bc8 offset24 */ \
V(CHECK_4_CHARS, 23, 12) /* bc8 pad24 uint32 addr32 */ \
V(CHECK_CHAR, 24, 8) /* bc8 pad8 uint16 addr32 */ \
V(CHECK_NOT_4_CHARS, 25, 12) /* bc8 pad24 uint32 addr32 */ \
V(CHECK_NOT_CHAR, 26, 8) /* bc8 pad8 uint16 addr32 */ \
V(AND_CHECK_4_CHARS, 27, 16) /* bc8 pad24 uint32 uint32 addr32 */ \
V(AND_CHECK_CHAR, 28, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
V(AND_CHECK_NOT_4_CHARS, 29, 16) /* bc8 pad24 uint32 uint32 addr32 */ \
V(AND_CHECK_NOT_CHAR, 30, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
V(MINUS_AND_CHECK_NOT_CHAR, 31, 12) /* bc8 pad8 uc16 uc16 addr32 */ \
V(CHECK_LT, 32, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_GT, 33, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 34, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 35, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_REGS_EQUAL, 36, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
V(LOOKUP_MAP1, 37, 12) /* bc8 pad8 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP2, 38, 96) /* bc8 pad8 start16 half_nibble_map_addr32* */ \
V(LOOKUP_MAP8, 39, 96) /* bc8 pad8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 40, 96) /* bc8 start24 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 41, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_GE, 42, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_EQ_POS, 43, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_AT_START, 44, 8) /* bc8 pad24 addr32 */ \
V(CHECK_NOT_AT_START, 45, 8) /* bc8 pad24 addr32 */ \
V(CHECK_GREEDY, 46, 8) /* bc8 pad24 addr32 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
......
This diff is collapsed.
......@@ -36,12 +36,14 @@
namespace v8 { namespace internal {
void RegExpMacroAssemblerIrregexp::Emit(uint32_t byte) {
void RegExpMacroAssemblerIrregexp::Emit(uint32_t byte, uint32_t twenty_four_bits) {
uint32_t word = ((twenty_four_bits << BYTECODE_SHIFT) | byte);
ASSERT(pc_ <= buffer_.length());
if (pc_ == buffer_.length()) {
if (pc_ + 3 >= buffer_.length()) {
Expand();
}
buffer_[pc_++] = byte;
Store32(buffer_.start() + pc_, word);
pc_ += 4;
}
......
......@@ -84,8 +84,7 @@ void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) {
void RegExpMacroAssemblerIrregexp::PopRegister(int register_index) {
Emit(BC_POP_REGISTER);
Emit(register_index);
Emit(BC_POP_REGISTER, register_index);
}
......@@ -93,16 +92,14 @@ void RegExpMacroAssemblerIrregexp::PushRegister(
int register_index,
StackCheckFlag check_stack_limit) {
ASSERT(register_index >= 0);
Emit(BC_PUSH_REGISTER);
Emit(register_index);
Emit(BC_PUSH_REGISTER, register_index);
}
void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister(
int register_index, int cp_offset) {
ASSERT(register_index >= 0);
Emit(BC_SET_REGISTER_TO_CP);
Emit(register_index);
Emit(BC_SET_REGISTER_TO_CP, register_index);
Emit32(cp_offset); // Current position offset.
}
......@@ -115,90 +112,83 @@ void RegExpMacroAssemblerIrregexp::ClearRegister(int reg) {
void RegExpMacroAssemblerIrregexp::ReadCurrentPositionFromRegister(
int register_index) {
ASSERT(register_index >= 0);
Emit(BC_SET_CP_TO_REGISTER);
Emit(register_index);
Emit(BC_SET_CP_TO_REGISTER, register_index);
}
void RegExpMacroAssemblerIrregexp::WriteStackPointerToRegister(
int register_index) {
ASSERT(register_index >= 0);
Emit(BC_SET_REGISTER_TO_SP);
Emit(register_index);
Emit(BC_SET_REGISTER_TO_SP, register_index);
}
void RegExpMacroAssemblerIrregexp::ReadStackPointerFromRegister(
int register_index) {
ASSERT(register_index >= 0);
Emit(BC_SET_SP_TO_REGISTER);
Emit(register_index);
Emit(BC_SET_SP_TO_REGISTER, register_index);
}
void RegExpMacroAssemblerIrregexp::SetRegister(int register_index, int to) {
ASSERT(register_index >= 0);
Emit(BC_SET_REGISTER);
Emit(register_index);
Emit(BC_SET_REGISTER, register_index);
Emit32(to);
}
void RegExpMacroAssemblerIrregexp::AdvanceRegister(int register_index, int by) {
ASSERT(register_index >= 0);
Emit(BC_ADVANCE_REGISTER);
Emit(register_index);
Emit(BC_ADVANCE_REGISTER, register_index);
Emit32(by);
}
void RegExpMacroAssemblerIrregexp::PopCurrentPosition() {
Emit(BC_POP_CP);
Emit(BC_POP_CP, 0);
}
void RegExpMacroAssemblerIrregexp::PushCurrentPosition() {
Emit(BC_PUSH_CP);
Emit32(0); // Current position offset.
Emit(BC_PUSH_CP, 0);
}
void RegExpMacroAssemblerIrregexp::Backtrack() {
Emit(BC_POP_BT);
Emit(BC_POP_BT, 0);
}
void RegExpMacroAssemblerIrregexp::GoTo(Label* l) {
Emit(BC_GOTO);
Emit(BC_GOTO, 0);
EmitOrLink(l);
}
void RegExpMacroAssemblerIrregexp::PushBacktrack(Label* l) {
Emit(BC_PUSH_BT);
Emit(BC_PUSH_BT, 0);
EmitOrLink(l);
}
void RegExpMacroAssemblerIrregexp::Succeed() {
Emit(BC_SUCCEED);
Emit(BC_SUCCEED, 0);
}
void RegExpMacroAssemblerIrregexp::Fail() {
Emit(BC_FAIL);
Emit(BC_FAIL, 0);
}
void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) {
Emit(BC_ADVANCE_CP);
Emit32(by);
Emit(BC_ADVANCE_CP, by);
}
void RegExpMacroAssemblerIrregexp::CheckGreedyLoop(
Label* on_tos_equals_current_position) {
Emit(BC_CHECK_GREEDY);
Emit(BC_CHECK_GREEDY, 0);
EmitOrLink(on_tos_equals_current_position);
}
......@@ -227,51 +217,56 @@ void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset,
bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED;
}
}
Emit(bytecode);
Emit32(cp_offset);
Emit(bytecode, cp_offset);
if (check_bounds) EmitOrLink(on_failure);
}
void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit,
Label* on_less) {
Emit(BC_CHECK_LT);
Emit16(limit);
Emit(BC_CHECK_LT, limit);
EmitOrLink(on_less);
}
void RegExpMacroAssemblerIrregexp::CheckCharacterGT(uc16 limit,
Label* on_greater) {
Emit(BC_CHECK_GT);
Emit16(limit);
Emit(BC_CHECK_GT, limit);
EmitOrLink(on_greater);
}
void RegExpMacroAssemblerIrregexp::CheckCharacter(uint32_t c, Label* on_equal) {
Emit(BC_CHECK_CHAR);
if (c > MAX_FIRST_ARG) {
Emit(BC_CHECK_4_CHARS, 0);
Emit32(c);
} else {
Emit(BC_CHECK_CHAR, c);
}
EmitOrLink(on_equal);
}
void RegExpMacroAssemblerIrregexp::CheckAtStart(Label* on_at_start) {
Emit(BC_CHECK_AT_START);
Emit(BC_CHECK_AT_START, 0);
EmitOrLink(on_at_start);
}
void RegExpMacroAssemblerIrregexp::CheckNotAtStart(Label* on_not_at_start) {
Emit(BC_CHECK_NOT_AT_START);
Emit(BC_CHECK_NOT_AT_START, 0);
EmitOrLink(on_not_at_start);
}
void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uint32_t c,
Label* on_not_equal) {
Emit(BC_CHECK_NOT_CHAR);
if (c > MAX_FIRST_ARG) {
Emit(BC_CHECK_NOT_4_CHARS, 0);
Emit32(c);
} else {
Emit(BC_CHECK_NOT_CHAR, c);
}
EmitOrLink(on_not_equal);
}
......@@ -280,8 +275,12 @@ void RegExpMacroAssemblerIrregexp::CheckCharacterAfterAnd(
uint32_t c,
uint32_t mask,
Label* on_equal) {
Emit(BC_AND_CHECK_CHAR);
if (c > MAX_FIRST_ARG) {
Emit(BC_AND_CHECK_4_CHARS, 0);
Emit32(c);
} else {
Emit(BC_AND_CHECK_CHAR, c);
}
Emit32(mask);
EmitOrLink(on_equal);
}
......@@ -291,8 +290,12 @@ void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterAnd(
uint32_t c,
uint32_t mask,
Label* on_not_equal) {
Emit(BC_AND_CHECK_NOT_CHAR);
if (c > MAX_FIRST_ARG) {
Emit(BC_AND_CHECK_NOT_4_CHARS, 0);
Emit32(c);
} else {
Emit(BC_AND_CHECK_NOT_CHAR, c);
}
Emit32(mask);
EmitOrLink(on_not_equal);
}
......@@ -303,8 +306,7 @@ void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd(
uc16 minus,
uc16 mask,
Label* on_not_equal) {
Emit(BC_MINUS_AND_CHECK_NOT_CHAR);
Emit16(c);
Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c);
Emit16(minus);
Emit16(mask);
EmitOrLink(on_not_equal);
......@@ -313,8 +315,7 @@ void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd(
void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
Label* on_not_equal) {
Emit(BC_CHECK_NOT_BACK_REF);
Emit(start_reg);
Emit(BC_CHECK_NOT_BACK_REF, start_reg);
EmitOrLink(on_not_equal);
}
......@@ -322,8 +323,7 @@ void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase(
int start_reg,
Label* on_not_equal) {
Emit(BC_CHECK_NOT_BACK_REF_NO_CASE);
Emit(start_reg);
Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg);
EmitOrLink(on_not_equal);
}
......@@ -331,9 +331,8 @@ void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase(
void RegExpMacroAssemblerIrregexp::CheckNotRegistersEqual(int reg1,
int reg2,
Label* on_not_equal) {
Emit(BC_CHECK_NOT_REGS_EQUAL);
Emit(reg1);
Emit(reg2);
Emit(BC_CHECK_NOT_REGS_EQUAL, reg1);
Emit32(reg2);
EmitOrLink(on_not_equal);
}
......@@ -378,15 +377,12 @@ void RegExpMacroAssemblerIrregexp::CheckCharacters(
// load below.
for (int i = str.length() - 1; i >= 0; i--) {
if (check_end_of_string && i == str.length() - 1) {
Emit(BC_LOAD_CURRENT_CHAR);
Emit32(cp_offset + i);
Emit(BC_LOAD_CURRENT_CHAR, cp_offset + i);
EmitOrLink(on_failure);
} else {
Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED);
Emit32(cp_offset + i);
Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED, cp_offset + i);
}
Emit(BC_CHECK_NOT_CHAR);
Emit32(str[i]);
Emit(BC_CHECK_NOT_CHAR, str[i]);
EmitOrLink(on_failure);
}
}
......@@ -396,9 +392,8 @@ void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index,
int comparand,
Label* on_less_than) {
ASSERT(comparand >= 0 && comparand <= 65535);
Emit(BC_CHECK_REGISTER_LT);
Emit(register_index);
Emit16(comparand);
Emit(BC_CHECK_REGISTER_LT, register_index);
Emit32(comparand);
EmitOrLink(on_less_than);
}
......@@ -407,24 +402,22 @@ void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index,
int comparand,
Label* on_greater_or_equal) {
ASSERT(comparand >= 0 && comparand <= 65535);
Emit(BC_CHECK_REGISTER_GE);
Emit(register_index);
Emit16(comparand);
Emit(BC_CHECK_REGISTER_GE, register_index);
Emit32(comparand);
EmitOrLink(on_greater_or_equal);
}
void RegExpMacroAssemblerIrregexp::IfRegisterEqPos(int register_index,
Label* on_eq) {
Emit(BC_CHECK_REGISTER_EQ_POS);
Emit(register_index);
Emit(BC_CHECK_REGISTER_EQ_POS, register_index);
EmitOrLink(on_eq);
}
Handle<Object> RegExpMacroAssemblerIrregexp::GetCode(Handle<String> source) {
Bind(&backtrack_);
Emit(BC_POP_BT);
Emit(BC_POP_BT, 0);
Handle<ByteArray> array = Factory::NewByteArray(length());
Copy(array->GetDataStartAddress());
return array;
......
......@@ -120,7 +120,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
// Code and bitmap emission.
inline void Emit32(uint32_t x);
inline void Emit16(uint32_t x);
inline void Emit(uint32_t x);
inline void Emit(uint32_t bc, uint32_t arg);
// Bytecode buffer.
int length();
void Copy(Address a);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment