Commit 1a9e303e authored by lrn@chromium.org's avatar lrn@chromium.org

Made char comparisons work

Tracer for regexp macro-assembler instructions


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@859 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 319fddf6
......@@ -65,7 +65,9 @@ SOURCES = {
'os:nullos': ['platform-nullos.cc'],
'os:win32': ['platform-win32.cc'],
'mode:release': [],
'mode:debug': ['objects-debug.cc', 'prettyprinter.cc']
'mode:debug': [
'objects-debug.cc', 'prettyprinter.cc', 'regexp-macro-assembler-tracer.cc'
]
}
......
......@@ -877,7 +877,7 @@ void Assembler::rep_cmpsb() {
EMIT(0xA6); // CMPSB
}
void Assembler::rep_cmpsw() {
void Assembler::rep_cmpsl() {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0xFC); // CLD to ensure forward operation
......
......@@ -499,7 +499,7 @@ class Assembler : public Malloced {
void cmp(const Operand& op, const Immediate& imm);
void rep_cmpsb();
void rep_cmpsw();
void rep_cmpsl();
void dec_b(Register dst);
......
......@@ -202,7 +202,6 @@ DEFINE_bool(preemption, false,
// irregexp
DEFINE_bool(irregexp, false, "new regular expression code")
DEFINE_bool(trace_regexps, false, "trace Irregexp execution")
DEFINE_bool(trace_regexp_bytecodes, false, "trace Irregexp bytecode execution")
DEFINE_bool(irregexp_native, false, "use native code Irregexp implementation (IA32 only)")
DEFINE_bool(disable_jscre, false, "abort if JSCRE is used. Only useful with --irregexp")
......@@ -296,6 +295,11 @@ DEFINE_bool(collect_heap_spill_statistics, false,
"report heap spill statistics along with heap_stats "
"(requires heap_stats)")
// irregexp
DEFINE_bool(trace_regexp_bytecodes, false, "trace Irregexp bytecode execution")
DEFINE_bool(trace_regexp_assembler, false,
"trace Irregexp macro assembler calls.")
//
// Logging and profiling only flags
//
......
......@@ -42,6 +42,7 @@
#include "parser.h"
#include "assembler-irregexp.h"
#include "regexp-macro-assembler.h"
#include "regexp-macro-assembler-tracer.h"
#include "regexp-macro-assembler-irregexp.h"
#ifdef ARM
......@@ -945,11 +946,16 @@ Handle<FixedArray> RegExpCompiler::Assemble(
RegExpMacroAssembler* macro_assembler,
RegExpNode* start,
int capture_count) {
macro_assembler_ = macro_assembler;
#ifdef DEBUG
if (FLAG_trace_regexp_assembler)
macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
else
#endif
macro_assembler_ = macro_assembler;
List <RegExpNode*> work_list(0);
work_list_ = &work_list;
Label fail;
macro_assembler->PushBacktrack(&fail);
macro_assembler_->PushBacktrack(&fail);
if (!start->GoTo(this)) {
fail.Unuse();
return Handle<FixedArray>::null();
......@@ -960,19 +966,24 @@ Handle<FixedArray> RegExpCompiler::Assemble(
return Handle<FixedArray>::null();
}
}
macro_assembler->Bind(&fail);
macro_assembler->Fail();
macro_assembler_->Bind(&fail);
macro_assembler_->Fail();
Handle<FixedArray> array =
Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength);
array->set(RegExpImpl::kIrregexpImplementationIndex,
Smi::FromInt(macro_assembler->Implementation()));
Smi::FromInt(macro_assembler_->Implementation()));
array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex,
Smi::FromInt(next_register_));
array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex,
Smi::FromInt(capture_count));
Handle<Object> code = macro_assembler->GetCode();
Handle<Object> code = macro_assembler_->GetCode();
array->set(RegExpImpl::kIrregexpCodeIndex, *code);
work_list_ = NULL;
#ifdef DEBUG
if (FLAG_trace_regexp_assembler) {
delete macro_assembler_;
}
#endif
return array;
}
......
......@@ -192,13 +192,8 @@ void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
__ mov(ebx, esi);
__ lea(edi, Operand(esi, edi, times_1, byte_offset));
LoadConstantBufferAddress(esi, &constant_buffer);
__ mov(ecx, str.length());
if (mode_ == ASCII) {
__ rep_cmpsb();
} else {
ASSERT(mode_ == UC16);
__ rep_cmpsw();
}
__ mov(ecx, str.length() * char_size());
__ rep_cmpsb();
__ mov(esi, ebx);
__ mov(edi, eax);
BranchOrBacktrack(not_equal, on_failure);
......@@ -229,11 +224,11 @@ void RegExpMacroAssemblerIA32::CheckNotBackReference(
__ push(esi);
__ add(edi, Operand(esi));
__ add(esi, Operand(eax));
if (mode_ == ASCII) {
__ rep_cmpsb();
} else {
__ rep_cmpsw();
if (char_size() > 0) {
ASSERT(char_size() == 2);
__ add(ecx, Operand(ecx));
}
__ rep_cmpsb();
__ pop(esi);
__ mov(edi, Operand(ebx));
BranchOrBacktrack(not_equal, on_no_match);
......@@ -397,8 +392,9 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode() {
for (int i = 0; i < num_saved_registers_; i++) {
__ mov(eax, register_location(i));
__ add(eax, Operand(ecx)); // Convert to index from start, not end.
if (char_size() == 2) {
__ shr(eax);
if (char_size() > 1) {
ASSERT(char_size() == 2);
__ sar(eax, 1); // Convert to character index, not byte.
}
__ mov(Operand(ebx, i * kPointerSize), eax);
}
......
This diff is collapsed.
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef REGEXP_MACRO_ASSEMBLER_TRACER_H_
#define REGEXP_MACRO_ASSEMBLER_TRACER_H_
namespace v8 { namespace internal {
// Decorator on a RegExpMacroAssembler that write all calls.
class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
public:
explicit RegExpMacroAssemblerTracer(RegExpMacroAssembler* assembler);
virtual ~RegExpMacroAssemblerTracer();
virtual void AdvanceCurrentPosition(int by); // Signed cp change.
virtual void AdvanceRegister(int reg, int by); // r[reg] += by.
virtual void Backtrack();
virtual void Bind(Label* label);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void CheckCharacter(uc16 c, Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(
Vector<const uc16> str,
int cp_offset,
Label* on_failure);
virtual void CheckCurrentPosition(
int register_index,
Label* on_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c,
uc16 or_with,
Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
uc16 minus_then_or_with,
Label* on_not_equal);
virtual void DispatchByteMap(
uc16 start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations);
virtual void DispatchHighByteMap(
byte start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void EmitOrLink(Label* label);
virtual void Fail();
virtual Handle<Object> GetCode();
virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
virtual void PushCurrentPosition();
virtual void PushRegister(int register_index);
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg);
virtual void WriteStackPointerToRegister(int reg);
private:
RegExpMacroAssembler* assembler_;
};
}} // namespace v8::internal
#endif // REGEXP_MACRO_ASSEMBLER_TRACER_H_
......@@ -811,6 +811,65 @@ TEST(MacroAssemblerIA32Simple) {
}
TEST(MacroAssemblerIA32SimpleUC16) {
typedef bool (*UC16Test) (
SeqTwoByteString** base, int start_index, int end_index, int* captures);
V8::Initialize(NULL);
// regexp-macro-assembler-ia32 needs a handle scope to allocate
// byte-arrays for constants.
v8::HandleScope scope;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::UC16, 4);
uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3);
Label fail;
m.CheckCharacters(foo, 0, &fail);
m.WriteCurrentPositionToRegister(0);
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1);
m.Succeed();
m.Bind(&fail);
m.Fail();
Handle<Object> code_object = m.GetCode();
Handle<Code> code = Handle<Code>::cast(code_object);
UC16Test test = FUNCTION_CAST<UC16Test>(code->entry());
int captures[4] = {42, 37, 87, 117};
const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o', '\xa0'};
Handle<String> input =
Factory::NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
Address start_adr = seq_input->GetCharsAddress();
int start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
int end_offset = start_offset + seq_input->length() * sizeof(uc16);
bool success =
test(seq_input.location(), start_offset, end_offset, captures);
CHECK(success);
CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]);
CHECK_EQ(-1, captures[2]);
CHECK_EQ(-1, captures[3]);
const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a', '\xa0'};
input = Factory::NewStringFromTwoByte(Vector<const uc16>(input_data2, 9));
seq_input = Handle<SeqTwoByteString>::cast(input);
start_adr = seq_input->GetCharsAddress();
start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
end_offset = start_offset + seq_input->length() * sizeof(uc16);
success = test(seq_input.location(), start_offset, end_offset, captures);
CHECK(!success);
}
TEST(MacroAssemblerIA32Backtrack) {
typedef bool (*AsciiTest) (
SeqAsciiString** base, int start_index, int end_index, int* captures);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment