Commit 282a74c7 authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

Reland "[regexp] Bytecode peephole optimization"

This is a reland of 66129430

Fixed: Unaligned reads, unspecified evaluation order.

Original change's description:
> [regexp] Bytecode peephole optimization
>
> Bytecodes used by the regular expression interpreter often occur in
> specific sequences. The number of dispatches in the interpreter can be
> reduced if those sequences are combined into a single bytecode.
>
> This CL adds a peephole optimization pass for regexp bytecodes.
> This pass checks the generated bytecode for pre-defined sequences that
> can be merged into a single bytecode.
>
> With the currently implemented bytecode sequences a speedup of 1.12x on
> regex-dna and octane-regexp is achieved.
>
> Bug: v8:9330
> Change-Id: I827f93273a5848e5963c7e3329daeb898995d151
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1813743
> Commit-Queue: Patrick Thier <pthier@google.com>
> Reviewed-by: Peter Marshall <petermarshall@chromium.org>
> Reviewed-by: Jakob Gruber <jgruber@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#63992}

Cq-Include-Trybots: luci.v8.try:v8_linux64_ubsan_rel_ng
Cq-Include-Trybots: luci.v8.try:v8_linux_gcc_rel
Bug: v8:9330,chromium:1008502,chromium:1008631
Change-Id: Ib9fc395b6809aa1debdb54d9fba5b7f09a235e5b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1828917Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64064}
parent 14ffd21d
......@@ -2778,6 +2778,9 @@ v8_source_set("v8_base_without_compiler") {
"src/regexp/regexp-bytecode-generator-inl.h",
"src/regexp/regexp-bytecode-generator.cc",
"src/regexp/regexp-bytecode-generator.h",
"src/regexp/regexp-bytecode-peephole.cc",
"src/regexp/regexp-bytecode-peephole.h",
"src/regexp/regexp-bytecodes.cc",
"src/regexp/regexp-bytecodes.h",
"src/regexp/regexp-compiler-tonode.cc",
"src/regexp/regexp-compiler.cc",
......
......@@ -1274,6 +1274,15 @@ DEFINE_BOOL(regexp_tier_up, true,
DEFINE_INT(regexp_tier_up_ticks, 1,
"set the number of executions for the regexp interpreter before "
"tiering-up to the compiler")
DEFINE_BOOL(regexp_peephole_optimization, true,
"enable peephole optimization for regexp bytecode")
DEFINE_BOOL(trace_regexp_peephole_optimization, false,
"trace regexp bytecode peephole optimization")
DEFINE_BOOL(trace_regexp_bytecodes, false, "trace regexp bytecode execution")
DEFINE_BOOL(trace_regexp_assembler, false,
"trace regexp macro assembler calls.")
DEFINE_BOOL(trace_regexp_parser, false, "trace regexp parsing")
DEFINE_BOOL(trace_regexp_tier_up, false, "trace regexp tiering up execution")
// Testing flags test/cctest/test-{flags,api,serialization}.cc
DEFINE_BOOL(testing_bool_flag, true, "testing_bool_flag")
......@@ -1408,11 +1417,6 @@ DEFINE_BOOL(trace_isolates, false, "trace isolate state changes")
// Regexp
DEFINE_BOOL(regexp_possessive_quantifier, false,
"enable possessive quantifier syntax for testing")
DEFINE_BOOL(trace_regexp_bytecodes, false, "trace regexp bytecode execution")
DEFINE_BOOL(trace_regexp_assembler, false,
"trace regexp macro assembler calls.")
DEFINE_BOOL(trace_regexp_parser, false, "trace regexp parsing")
DEFINE_BOOL(trace_regexp_tier_up, false, "trace regexp tiering up execution")
// Debugger
DEFINE_BOOL(print_break_location, false, "print source location on debug break")
......
......@@ -7,6 +7,7 @@
#include "src/ast/ast.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/regexp-bytecode-generator-inl.h"
#include "src/regexp/regexp-bytecode-peephole.h"
#include "src/regexp/regexp-bytecodes.h"
#include "src/regexp/regexp-macro-assembler.h"
......@@ -18,6 +19,7 @@ RegExpBytecodeGenerator::RegExpBytecodeGenerator(Isolate* isolate, Zone* zone)
buffer_(Vector<byte>::New(1024)),
pc_(0),
advance_current_end_(kInvalidPC),
jump_edges_(zone),
isolate_(isolate) {}
RegExpBytecodeGenerator::~RegExpBytecodeGenerator() {
......@@ -39,6 +41,7 @@ void RegExpBytecodeGenerator::Bind(Label* l) {
int fixup = pos;
pos = *reinterpret_cast<int32_t*>(buffer_.begin() + fixup);
*reinterpret_cast<uint32_t*>(buffer_.begin() + fixup) = pc_;
jump_edges_.emplace(fixup, pc_);
}
}
l->bind_to(pc_);
......@@ -46,16 +49,17 @@ void RegExpBytecodeGenerator::Bind(Label* l) {
void RegExpBytecodeGenerator::EmitOrLink(Label* l) {
if (l == nullptr) l = &backtrack_;
int pos = 0;
if (l->is_bound()) {
Emit32(l->pos());
pos = l->pos();
jump_edges_.emplace(pc_, pos);
} else {
int pos = 0;
if (l->is_linked()) {
pos = l->pos();
}
l->link_to(pc_);
Emit32(pos);
}
Emit32(pos);
}
void RegExpBytecodeGenerator::PopRegister(int register_index) {
......@@ -365,8 +369,16 @@ void RegExpBytecodeGenerator::IfRegisterEqPos(int register_index,
Handle<HeapObject> RegExpBytecodeGenerator::GetCode(Handle<String> source) {
Bind(&backtrack_);
Emit(BC_POP_BT, 0);
Handle<ByteArray> array = isolate_->factory()->NewByteArray(length());
Copy(array->GetDataStartAddress());
Handle<ByteArray> array;
if (FLAG_regexp_peephole_optimization) {
array = RegExpBytecodePeepholeOptimization::OptimizeBytecode(
isolate_, zone(), source, buffer_.begin(), length(), jump_edges_);
} else {
array = isolate_->factory()->NewByteArray(length());
Copy(array->GetDataStartAddress());
}
return array;
}
......
......@@ -100,6 +100,12 @@ class V8_EXPORT_PRIVATE RegExpBytecodeGenerator : public RegExpMacroAssembler {
int advance_current_offset_;
int advance_current_end_;
// Stores jump edges emitted for the bytecode (used by
// RegExpBytecodePeepholeOptimization).
// Key: jump source (offset in buffer_ where jump destination is stored).
// Value: jump destination (offset in buffer_ to jump to).
ZoneUnorderedMap<int, int> jump_edges_;
Isolate* isolate_;
static const int kInvalidPC = -1;
......
This diff is collapsed.
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
#define V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
#include "src/common/globals.h"
#include "src/zone/zone-containers.h"
namespace v8 {
namespace internal {
class ByteArray;
// Peephole optimization for regexp interpreter bytecode.
// Pre-defined bytecode sequences occuring in the bytecode generated by the
// RegExpBytecodeGenerator can be optimized into a single bytecode.
class RegExpBytecodePeepholeOptimization : public AllStatic {
public:
// Performs peephole optimization on the given bytecode and returns the
// optimized bytecode.
static Handle<ByteArray> OptimizeBytecode(
Isolate* isolate, Zone* zone, Handle<String> source, const byte* bytecode,
int length, const ZoneUnorderedMap<int, int>& jump_edges);
};
} // namespace internal
} // namespace v8
#endif // V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/regexp/regexp-bytecodes.h"
#include <cctype>
#include "src/utils/utils.h"
namespace v8 {
namespace internal {
void RegExpBytecodeDisassembleSingle(const byte* code_base, const byte* pc) {
PrintF("%s", RegExpBytecodeName(*pc));
// Args and the bytecode as hex.
for (int i = 0; i < RegExpBytecodeLength(*pc); i++) {
PrintF(", %02x", pc[i]);
}
PrintF(" ");
// Args as ascii.
for (int i = 1; i < RegExpBytecodeLength(*pc); i++) {
unsigned char b = pc[i];
PrintF("%c", std::isprint(b) ? b : '.');
}
PrintF("\n");
}
void RegExpBytecodeDisassemble(const byte* code_base, int length,
const char* pattern) {
PrintF("[generated bytecode for regexp pattern: '%s']\n", pattern);
ptrdiff_t offset = 0;
while (offset < length) {
const byte* const pc = code_base + offset;
PrintF("%p %4" V8PRIxPTRDIFF " ", pc, offset);
RegExpBytecodeDisassembleSingle(code_base, pc);
offset += RegExpBytecodeLength(*pc);
}
}
} // namespace internal
} // namespace v8
This diff is collapsed.
......@@ -64,23 +64,6 @@ bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
return true;
}
void DisassembleSingleBytecode(const byte* code_base, const byte* pc) {
PrintF("%s", RegExpBytecodeName(*pc));
// Args and the bytecode as hex.
for (int i = 0; i < RegExpBytecodeLength(*pc); i++) {
PrintF(", %02x", pc[i]);
}
PrintF(" ");
// Args as ascii.
for (int i = 1; i < RegExpBytecodeLength(*pc); i++) {
unsigned char b = pc[i];
PrintF("%c", std::isprint(b) ? b : '.');
}
PrintF("\n");
}
#ifdef DEBUG
void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
int stack_depth, int current_position,
......@@ -95,7 +78,7 @@ void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
PrintF(format, pc - code_base, stack_depth, current_position, current_char,
printable ? current_char : '.');
DisassembleSingleBytecode(code_base, pc);
RegExpBytecodeDisassembleSingle(code_base, pc);
}
}
#endif // DEBUG
......@@ -257,6 +240,13 @@ IrregexpInterpreter::Result HandleInterrupts(
return IrregexpInterpreter::SUCCESS;
}
bool CheckBitInTable(const uint32_t current_char, const byte* const table) {
int mask = RegExpMacroAssembler::kTableMask;
int b = table[(current_char & mask) >> kBitsPerByteLog2];
int bit = (current_char & (kBitsPerByte - 1));
return (b & (1 << bit)) != 0;
}
// If computed gotos are supported by the compiler, we can get addresses to
// labels directly in C/C++. Every bytecode handler has its own label and we
// store the addresses in a dispatch table indexed by bytecode. To execute the
......@@ -281,7 +271,7 @@ IrregexpInterpreter::Result HandleInterrupts(
#define DISPATCH() \
pc = next_pc; \
insn = next_insn; \
break
goto switch_dispatch_continuation
#endif // V8_USE_COMPUTED_GOTO
// ADVANCE/SET_PC_FROM_OFFSET are separated from DISPATCH, because ideally some
......@@ -331,19 +321,13 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
// Fill dispatch table from last defined bytecode up to the next power of two
// with BREAK (invalid operation).
// TODO(pthier): Find a way to fill up automatically (at compile time)
// 53 real bytecodes -> 11 fillers
// 59 real bytecodes -> 5 fillers
#define BYTECODE_FILLER_ITERATOR(V) \
V(BREAK) /* 1 */ \
V(BREAK) /* 2 */ \
V(BREAK) /* 3 */ \
V(BREAK) /* 4 */ \
V(BREAK) /* 5 */ \
V(BREAK) /* 6 */ \
V(BREAK) /* 7 */ \
V(BREAK) /* 8 */ \
V(BREAK) /* 9 */ \
V(BREAK) /* 10 */ \
V(BREAK) /* 11 */
V(BREAK) /* 5 */
#define COUNT(...) +1
static constexpr int kRegExpBytecodeFillerCount =
......@@ -652,10 +636,7 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
DISPATCH();
}
BYTECODE(CHECK_BIT_IN_TABLE) {
int mask = RegExpMacroAssembler::kTableMask;
byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
int bit = (current_char & (kBitsPerByte - 1));
if ((b & (1 << bit)) != 0) {
if (CheckBitInTable(current_char, pc + 8)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
ADVANCE(CHECK_BIT_IN_TABLE);
......@@ -834,6 +815,118 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
}
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint32_t advance = Load16Aligned(pc + 4);
uint32_t c = Load16Aligned(pc + 6);
while (static_cast<uintptr_t>(current + load_offset) <
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
if (c == current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR_AND) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint16_t advance = Load16Aligned(pc + 4);
uint16_t c = Load16Aligned(pc + 6);
uint32_t mask = Load32Aligned(pc + 8);
int32_t maximum_offset = Load32Aligned(pc + 12);
while (static_cast<uintptr_t>(current + maximum_offset) <=
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
if (c == (current_char & mask)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 20));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR_POS_CHECKED) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint16_t advance = Load16Aligned(pc + 4);
uint16_t c = Load16Aligned(pc + 6);
int32_t maximum_offset = Load32Aligned(pc + 8);
while (static_cast<uintptr_t>(current + maximum_offset) <=
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
if (c == current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_BIT_IN_TABLE) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint32_t advance = Load16Aligned(pc + 4);
const byte* table = pc + 8;
while (static_cast<uintptr_t>(current + load_offset) <
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
if (CheckBitInTable(current_char, table)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint16_t advance = Load16Aligned(pc + 4);
uint16_t limit = Load16Aligned(pc + 6);
const byte* table = pc + 8;
while (static_cast<uintptr_t>(current + load_offset) <
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
if (current_char > limit) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
DISPATCH();
}
if (!CheckBitInTable(current_char, table)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR_OR_CHAR) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint32_t advance = Load32Aligned(pc + 4);
uint16_t c = Load16Aligned(pc + 8);
uint16_t c2 = Load16Aligned(pc + 10);
while (static_cast<uintptr_t>(current + load_offset) <
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
// The two if-statements below are split up intentionally, as combining
// them seems to result in register allocation behaving quite
// differently and slowing down the resulting code.
if (c == current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
DISPATCH();
}
if (c2 == current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
DISPATCH();
}
#if V8_USE_COMPUTED_GOTO
// Lint gets confused a lot if we just use !V8_USE_COMPUTED_GOTO or ifndef
// V8_USE_COMPUTED_GOTO here.
......@@ -841,6 +934,9 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
default:
UNREACHABLE();
}
// Label we jump to in DISPATCH(). There must be no instructions between the
// end of the switch, this label and the end of the loop.
switch_dispatch_continuation : {}
#endif // V8_USE_COMPUTED_GOTO
}
}
......@@ -855,25 +951,6 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
} // namespace
// static
void IrregexpInterpreter::Disassemble(ByteArray byte_array,
const std::string& pattern) {
DisallowHeapAllocation no_gc;
PrintF("[generated bytecode for regexp pattern: '%s']\n", pattern.c_str());
const byte* const code_base = byte_array.GetDataStartAddress();
const int byte_array_length = byte_array.length();
ptrdiff_t offset = 0;
while (offset < byte_array_length) {
const byte* const pc = code_base + offset;
PrintF("%p %4" V8PRIxPTRDIFF " ", pc, offset);
DisassembleSingleBytecode(code_base, pc);
offset += RegExpBytecodeLength(*pc);
}
}
// static
IrregexpInterpreter::Result IrregexpInterpreter::Match(
Isolate* isolate, JSRegExp regexp, String subject_string, int* registers,
......
......@@ -46,8 +46,6 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
int registers_length, int start_position,
RegExp::CallOrigin call_origin);
static void Disassemble(ByteArray byte_array, const std::string& pattern);
private:
static Result Match(Isolate* isolate, JSRegExp regexp, String subject_string,
int* registers, int registers_length, int start_position,
......
......@@ -9,6 +9,7 @@
#include "src/heap/heap-inl.h"
#include "src/objects/js-regexp-inl.h"
#include "src/regexp/regexp-bytecode-generator.h"
#include "src/regexp/regexp-bytecodes.h"
#include "src/regexp/regexp-compiler.h"
#include "src/regexp/regexp-dotprinter.h"
#include "src/regexp/regexp-interpreter.h"
......@@ -881,7 +882,8 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
data->compilation_target == RegExpCompilationTarget::kBytecode) {
Handle<ByteArray> bytecode(ByteArray::cast(result.code), isolate);
auto pattern_cstring = pattern->ToCString();
IrregexpInterpreter::Disassemble(*bytecode, pattern_cstring.get());
RegExpBytecodeDisassemble(bytecode->GetDataStartAddress(),
bytecode->length(), pattern_cstring.get());
}
}
......
This diff is collapsed.
#!/usr/bin/env python
# Copyright 2019 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
python %prog trace-file
Parses output generated by v8 with flag --trace-regexp-bytecodes and generates
a list of the most common sequences.
"""
from __future__ import print_function
import sys
import re
import collections
def parse(file, seqlen):
# example:
# pc = 00, sp = 0, curpos = 0, curchar = 0000000a ..., bc = PUSH_BT, 02, 00, 00, 00, e8, 00, 00, 00 .......
rx = re.compile(r'pc = (?P<pc>[0-9a-f]+), sp = (?P<sp>\d+), '
r'curpos = (?P<curpos>\d+), curchar = (?P<char_hex>[0-9a-f]+) '
r'(:?\.|\()(?P<char>\.|\w)(:?\.|\)), bc = (?P<bc>\w+), .*')
total = 0
bc_cnt = [None] * seqlen
for i in xrange(seqlen):
bc_cnt[i] = {}
last = [None] * seqlen
with open(file) as f:
l = f.readline()
while l:
l = l.strip()
if l.startswith("Start bytecode interpreter"):
for i in xrange(seqlen):
last[i] = collections.deque(maxlen=i+1)
match = rx.search(l)
if match:
total += 1
bc = match.group('bc')
for i in xrange(seqlen):
last[i].append(bc)
key = ' --> '.join(last[i])
bc_cnt[i][key] = bc_cnt[i].get(key,0) + 1
l = f.readline()
return bc_cnt, total
def print_most_common(d, seqlen, total):
sorted_d = sorted(d.items(), key=lambda kv: kv[1], reverse=True)
for (k,v) in sorted_d:
if v*100/total < 1.0:
return
print("{}: {} ({} %)".format(k,v,(v*100/total)))
def main(argv):
max_seq = 7
bc_cnt, total = parse(argv[1],max_seq)
for i in xrange(max_seq):
print()
print("Most common of length {}".format(i+1))
print()
print_most_common(bc_cnt[i], i, total)
if __name__ == '__main__':
main(sys.argv)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment