Commit eebb18d3 authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

[regexp] Add dedicated flags for printing regexp code and bytecode

Printing regexp code used to behind the generic --print-code flag, but
there was no way to distinguish between irregexp-generated code; and
printing regexp bytecode was not supported at all (the
--trace-regexp-bytecodes flag *did* exist, but prints the execution
trace at runtime and not the generated bytecode sequence).

This CL adds two new flags:

--print-regexp-code
--print-regexp-bytecode

Regexp code is no longer printed as part of --print-code.

Example output for --print-regexp-bytecode:

generated bytecode for regexp pattern: .(?<!^.)
0x1ddcc614cbd0     0  PUSH_BT, 02, 00, 00, 00, c0, 00, 00, 00 .......
0x1ddcc614cbd8     8  LOAD_CURRENT_CHAR, 11, 00, 00, 00, b0, 00, 00, 00 .......
0x1ddcc614cbe0    10  CHECK_CHAR, 18, 0a, 00, 00, b0, 00, 00, 00 .......
0x1ddcc614cbe8    18  CHECK_CHAR, 18, 0d, 00, 00, b0, 00, 00, 00 .......
0x1ddcc614cbf0    20  PUSH_CP, 01, 00, 00, 00 ...

Bug: chromium:996391
Change-Id: I731defbd7cf9ed29753a39bb1d7205dc136ca950
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1773249
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Auto-Submit: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63442}
parent e39c7019
......@@ -1514,7 +1514,6 @@ DEFINE_BOOL(trace_elements_transitions, false, "trace elements transitions")
DEFINE_BOOL(trace_creation_allocation_sites, false,
"trace the creation of allocation sites")
// codegen-ia32.cc / codegen-arm.cc
DEFINE_BOOL(print_code, false, "print generated code")
DEFINE_BOOL(print_opt_code, false, "print optimized code")
DEFINE_STRING(print_opt_code_filter, "*", "filter for printing optimized code")
......@@ -1522,6 +1521,8 @@ DEFINE_BOOL(print_code_verbose, false, "print more information for code")
DEFINE_BOOL(print_builtin_code, false, "print generated code for builtins")
DEFINE_STRING(print_builtin_code_filter, "*",
"filter for printing builtin code")
DEFINE_BOOL(print_regexp_code, false, "print generated regexp code")
DEFINE_BOOL(print_regexp_bytecode, false, "print generated regexp bytecode")
DEFINE_BOOL(print_builtin_size, false, "print code size for builtins")
#ifdef ENABLE_DISASSEMBLER
......@@ -1538,6 +1539,7 @@ DEFINE_IMPLICATION(print_all_code, print_code)
DEFINE_IMPLICATION(print_all_code, print_opt_code)
DEFINE_IMPLICATION(print_all_code, print_code_verbose)
DEFINE_IMPLICATION(print_all_code, print_builtin_code)
DEFINE_IMPLICATION(print_all_code, print_regexp_code)
DEFINE_IMPLICATION(print_all_code, code_comments)
#endif
......
......@@ -5,6 +5,8 @@
#ifndef V8_REGEXP_REGEXP_BYTECODES_H_
#define V8_REGEXP_REGEXP_BYTECODES_H_
#include "src/base/macros.h"
namespace v8 {
namespace internal {
......@@ -70,14 +72,40 @@ const int BYTECODE_SHIFT = 8;
V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */ \
V(CHECK_CURRENT_POSITION, 52, 8) /* bc8 idx24 addr32 */
#define DECLARE_BYTECODES(name, code, length) static const int BC_##name = code;
#define COUNT(...) +1
static constexpr int kRegExpBytecodeCount = BYTECODE_ITERATOR(COUNT);
#undef COUNT
// Just making sure we assigned values above properly. They should be
// contiguous, strictly increasing, and start at 0.
// TODO(jgruber): Do not explicitly assign values, instead generate them
// implicitly from the list order.
STATIC_ASSERT(kRegExpBytecodeCount == 53);
#define DECLARE_BYTECODES(name, code, length) \
static constexpr int BC_##name = code;
BYTECODE_ITERATOR(DECLARE_BYTECODES)
#undef DECLARE_BYTECODES
#define DECLARE_BYTECODE_LENGTH(name, code, length) \
static const int BC_##name##_LENGTH = length;
BYTECODE_ITERATOR(DECLARE_BYTECODE_LENGTH)
static constexpr int kRegExpBytecodeLengths[] = {
#define DECLARE_BYTECODE_LENGTH(name, code, length) length,
BYTECODE_ITERATOR(DECLARE_BYTECODE_LENGTH)
#undef DECLARE_BYTECODE_LENGTH
};
inline constexpr int RegExpBytecodeLength(int bytecode) {
return kRegExpBytecodeLengths[bytecode];
}
static const char* const kRegExpBytecodeNames[] = {
#define DECLARE_BYTECODE_NAME(name, ...) #name,
BYTECODE_ITERATOR(DECLARE_BYTECODE_NAME)
#undef DECLARE_BYTECODE_NAME
};
inline const char* RegExpBytecodeName(int bytecode) {
return kRegExpBytecodeNames[bytecode];
}
} // namespace internal
} // namespace v8
......
......@@ -5,13 +5,11 @@
#include "src/regexp/regexp-compiler.h"
#include "src/base/safe_conversions.h"
#include "src/diagnostics/code-tracer.h"
#include "src/execution/isolate.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/regexp-macro-assembler-arch.h"
#include "src/regexp/regexp-macro-assembler-tracer.h"
#include "src/strings/unicode-inl.h"
#include "src/utils/ostreams.h"
#include "src/zone/zone-list-inl.h"
#ifdef V8_INTL_SUPPORT
......@@ -273,13 +271,7 @@ RegExpCompiler::CompilationResult RegExpCompiler::Assemble(
Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
isolate->IncreaseTotalRegexpCodeGenerated(code->Size());
work_list_ = nullptr;
#ifdef ENABLE_DISASSEMBLER
if (FLAG_print_code && code->IsCode()) {
CodeTracer::Scope trace_scope(isolate->GetCodeTracer());
OFStream os(trace_scope.file());
Handle<Code>::cast(code)->Disassemble(pattern->ToCString().get(), os);
}
#endif
#ifdef DEBUG
if (FLAG_trace_regexp_assembler) {
delete macro_assembler_;
......
......@@ -30,9 +30,10 @@
namespace v8 {
namespace internal {
static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
int len, Vector<const uc16> subject,
bool unicode) {
namespace {
bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
Vector<const uc16> subject, bool unicode) {
Address offset_a =
reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
Address offset_b =
......@@ -42,9 +43,8 @@ static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
offset_a, offset_b, length, unicode ? nullptr : isolate) == 1;
}
static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
int len, Vector<const uint8_t> subject,
bool unicode) {
bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
Vector<const uint8_t> subject, bool unicode) {
// For Latin1 characters the unicode flag makes no difference.
for (int i = 0; i < len; i++) {
unsigned int old_char = subject[from++];
......@@ -63,42 +63,48 @@ static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
return true;
}
void DisassembleSingleBytecode(const byte* code_base, const byte* pc) {
PrintF("%s", RegExpBytecodeName(*pc));
// Args and the bytecode as hex.
for (int i = 0; i < RegExpBytecodeLength(*pc); i++) {
PrintF(", %02x", pc[i]);
}
PrintF(" ");
// Args as ascii.
for (int i = 1; i < RegExpBytecodeLength(*pc); i++) {
unsigned char b = pc[i];
PrintF("%c", std::isprint(b) ? b : '.');
}
PrintF("\n");
}
#ifdef DEBUG
static void TraceInterpreter(const byte* code_base, const byte* pc,
int stack_depth, int current_position,
uint32_t current_char, int bytecode_length,
const char* bytecode_name) {
void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
int stack_depth, int current_position,
uint32_t current_char, int bytecode_length,
const char* bytecode_name) {
if (FLAG_trace_regexp_bytecodes) {
bool printable = (current_char < 127 && current_char >= 32);
const bool printable = std::isprint(current_char);
const char* format =
printable
? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s"
: "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = "
: "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = ";
PrintF(format, pc - code_base, stack_depth, current_position, current_char,
printable ? current_char : '.', bytecode_name);
for (int i = 0; i < bytecode_length; i++) {
printf(", %02x", pc[i]);
}
printf(" ");
for (int i = 1; i < bytecode_length; i++) {
unsigned char b = pc[i];
if (b < 127 && b >= 32) {
printf("%c", b);
} else {
printf(".");
}
}
printf("\n");
printable ? current_char : '.');
DisassembleSingleBytecode(code_base, pc);
}
}
#endif // DEBUG
static int32_t Load32Aligned(const byte* pc) {
int32_t Load32Aligned(const byte* pc) {
DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 3);
return *reinterpret_cast<const int32_t*>(pc);
}
static int32_t Load16Aligned(const byte* pc) {
int32_t Load16Aligned(const byte* pc) {
DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
return *reinterpret_cast<const uint16_t*>(pc);
}
......@@ -140,8 +146,6 @@ class BacktrackStack {
DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
};
namespace {
IrregexpInterpreter::Result StackOverflow(Isolate* isolate,
RegExp::CallOrigin call_origin) {
CHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
......@@ -268,18 +272,18 @@ IrregexpInterpreter::Result HandleInterrupts(
// don't hit the cache and have to fetch the next handler address from physical
// memory, instructions between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH can
// potentially be executed unconditionally, reducing memory stall.
#define ADVANCE(name) \
next_pc = pc + BC_##name##_LENGTH; \
#define ADVANCE(name) \
next_pc = pc + RegExpBytecodeLength(BC_##name); \
DECODE()
#define SET_PC_FROM_OFFSET(offset) \
next_pc = code_base + offset; \
DECODE()
#ifdef DEBUG
#define BYTECODE(name) \
BC_LABEL(name) \
TraceInterpreter(code_base, pc, backtrack_stack.sp(), current, current_char, \
BC_##name##_LENGTH, #name);
#define BYTECODE(name) \
BC_LABEL(name) \
MaybeTraceInterpreter(code_base, pc, backtrack_stack.sp(), current, \
current_char, RegExpBytecodeLength(BC_##name), #name);
#else
#define BYTECODE(name) BC_LABEL(name)
#endif // DEBUG
......@@ -779,6 +783,25 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
} // namespace
// static
void IrregexpInterpreter::Disassemble(ByteArray byte_array,
const std::string& pattern) {
DisallowHeapAllocation no_gc;
PrintF("[generated bytecode for regexp pattern: '%s']\n", pattern.c_str());
const byte* const code_base = byte_array.GetDataStartAddress();
const int byte_array_length = byte_array.length();
ptrdiff_t offset = 0;
while (offset < byte_array_length) {
const byte* const pc = code_base + offset;
PrintF("%p %4" V8PRIxPTRDIFF " ", pc, offset);
DisassembleSingleBytecode(code_base, pc);
offset += RegExpBytecodeLength(*pc);
}
}
// static
IrregexpInterpreter::Result IrregexpInterpreter::Match(
Isolate* isolate, JSRegExp regexp, String subject_string, int* registers,
......
......@@ -41,6 +41,8 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
int registers_length, int start_position,
RegExp::CallOrigin call_origin);
static void Disassemble(ByteArray byte_array, const std::string& pattern);
private:
static Result Match(Isolate* isolate, JSRegExp regexp, String subject_string,
int* registers, int registers_length, int start_position,
......
......@@ -5,6 +5,7 @@
#include "src/regexp/regexp.h"
#include "src/codegen/compilation-cache.h"
#include "src/diagnostics/code-tracer.h"
#include "src/heap/heap-inl.h"
#include "src/objects/js-regexp-inl.h"
#include "src/regexp/regexp-bytecode-generator.h"
......@@ -14,6 +15,7 @@
#include "src/regexp/regexp-macro-assembler-arch.h"
#include "src/regexp/regexp-parser.h"
#include "src/strings/string-search.h"
#include "src/utils/ostreams.h"
namespace v8 {
namespace internal {
......@@ -572,14 +574,15 @@ MaybeHandle<Object> RegExpImpl::IrregexpExec(
subject = String::Flatten(isolate, subject);
// Prepare space for the return values.
#ifdef DEBUG
if (FLAG_regexp_interpret_all && FLAG_trace_regexp_bytecodes) {
if (FLAG_trace_regexp_bytecodes && regexp->ShouldProduceBytecode()) {
String pattern = regexp->Pattern();
PrintF("\n\nRegexp match: /%s/\n\n", pattern.ToCString().get());
PrintF("\n\nSubject string: '%s'\n\n", subject->ToCString().get());
}
#endif
// Prepare space for the return values.
int required_registers = RegExp::IrregexpPrepare(isolate, regexp, subject);
if (required_registers < 0) {
// Compiling failed with an exception.
......@@ -830,6 +833,26 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
RegExpCompiler::CompilationResult result = compiler.Assemble(
isolate, macro_assembler.get(), node, data->capture_count, pattern);
// Code / bytecode printing.
{
#ifdef ENABLE_DISASSEMBLER
if (FLAG_print_regexp_code &&
data->compilation_target == RegExpCompilationTarget::kNative) {
CodeTracer::Scope trace_scope(isolate->GetCodeTracer());
OFStream os(trace_scope.file());
Handle<Code> c(Code::cast(result.code), isolate);
auto pattern_cstring = pattern->ToCString();
c->Disassemble(pattern_cstring.get(), os);
}
#endif
if (FLAG_print_regexp_bytecode &&
data->compilation_target == RegExpCompilationTarget::kBytecode) {
Handle<ByteArray> bytecode(ByteArray::cast(result.code), isolate);
auto pattern_cstring = pattern->ToCString();
IrregexpInterpreter::Disassemble(*bytecode, pattern_cstring.get());
}
}
if (FLAG_correctness_fuzzer_suppressions &&
strncmp(result.error_message, "Stack overflow", 15) == 0) {
FATAL("Aborting on stack overflow");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment