Commit 05eda1ac authored by Clemens Backes [né Hammacher]'s avatar Clemens Backes [né Hammacher] Committed by Commit Bot

Revert "[regexp] Bytecode peephole optimization"

This reverts commit 66129430.

Reason for revert: Fails on gcc: https://ci.chromium.org/p/v8/builders/ci/V8%20Linux%20gcc/3394

Original change's description:
> [regexp] Bytecode peephole optimization
> 
> Bytecodes used by the regular expression interpreter often occur in
> specific sequences. The number of dispatches in the interpreter can be
> reduced if those sequences are combined into a single bytecode.
> 
> This CL adds a peephole optimization pass for regexp bytecodes.
> This pass checks the generated bytecode for pre-defined sequences that
> can be merged into a single bytecode.
> 
> With the currently implemented bytecode sequences a speedup of 1.12x on
> regex-dna and octane-regexp is achieved.
> 
> Bug: v8:9330
> Change-Id: I827f93273a5848e5963c7e3329daeb898995d151
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1813743
> Commit-Queue: Patrick Thier <pthier@google.com>
> Reviewed-by: Peter Marshall <petermarshall@chromium.org>
> Reviewed-by: Jakob Gruber <jgruber@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#63992}

TBR=jgruber@chromium.org,petermarshall@chromium.org,pthier@google.com

Change-Id: Ie526fe3691f6abdd16b51979000fdafb7afce8ef
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: v8:9330
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1826727Reviewed-by: 's avatarClemens Backes [né Hammacher] <clemensb@chromium.org>
Commit-Queue: Clemens Backes [né Hammacher] <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63998}
parent 70879048
......@@ -2776,9 +2776,6 @@ v8_source_set("v8_base_without_compiler") {
"src/regexp/regexp-bytecode-generator-inl.h",
"src/regexp/regexp-bytecode-generator.cc",
"src/regexp/regexp-bytecode-generator.h",
"src/regexp/regexp-bytecode-peephole.cc",
"src/regexp/regexp-bytecode-peephole.h",
"src/regexp/regexp-bytecodes.cc",
"src/regexp/regexp-bytecodes.h",
"src/regexp/regexp-compiler-tonode.cc",
"src/regexp/regexp-compiler.cc",
......
......@@ -1274,15 +1274,6 @@ DEFINE_BOOL(regexp_tier_up, true,
DEFINE_INT(regexp_tier_up_ticks, 1,
"set the number of executions for the regexp interpreter before "
"tiering-up to the compiler")
DEFINE_BOOL(regexp_peephole_optimization, true,
"enable peephole optimization for regexp bytecode")
DEFINE_BOOL(trace_regexp_peephole_optimization, false,
"trace regexp bytecode peephole optimization")
DEFINE_BOOL(trace_regexp_bytecodes, false, "trace regexp bytecode execution")
DEFINE_BOOL(trace_regexp_assembler, false,
"trace regexp macro assembler calls.")
DEFINE_BOOL(trace_regexp_parser, false, "trace regexp parsing")
DEFINE_BOOL(trace_regexp_tier_up, false, "trace regexp tiering up execution")
// Testing flags test/cctest/test-{flags,api,serialization}.cc
DEFINE_BOOL(testing_bool_flag, true, "testing_bool_flag")
......@@ -1417,6 +1408,11 @@ DEFINE_BOOL(trace_isolates, false, "trace isolate state changes")
// Regexp
DEFINE_BOOL(regexp_possessive_quantifier, false,
"enable possessive quantifier syntax for testing")
DEFINE_BOOL(trace_regexp_bytecodes, false, "trace regexp bytecode execution")
DEFINE_BOOL(trace_regexp_assembler, false,
"trace regexp macro assembler calls.")
DEFINE_BOOL(trace_regexp_parser, false, "trace regexp parsing")
DEFINE_BOOL(trace_regexp_tier_up, false, "trace regexp tiering up execution")
// Debugger
DEFINE_BOOL(print_break_location, false, "print source location on debug break")
......
......@@ -7,7 +7,6 @@
#include "src/ast/ast.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/regexp-bytecode-generator-inl.h"
#include "src/regexp/regexp-bytecode-peephole.h"
#include "src/regexp/regexp-bytecodes.h"
#include "src/regexp/regexp-macro-assembler.h"
......@@ -19,7 +18,6 @@ RegExpBytecodeGenerator::RegExpBytecodeGenerator(Isolate* isolate, Zone* zone)
buffer_(Vector<byte>::New(1024)),
pc_(0),
advance_current_end_(kInvalidPC),
jump_edges_(zone),
isolate_(isolate) {}
RegExpBytecodeGenerator::~RegExpBytecodeGenerator() {
......@@ -41,7 +39,6 @@ void RegExpBytecodeGenerator::Bind(Label* l) {
int fixup = pos;
pos = *reinterpret_cast<int32_t*>(buffer_.begin() + fixup);
*reinterpret_cast<uint32_t*>(buffer_.begin() + fixup) = pc_;
jump_edges_.emplace(fixup, pc_);
}
}
l->bind_to(pc_);
......@@ -49,17 +46,16 @@ void RegExpBytecodeGenerator::Bind(Label* l) {
void RegExpBytecodeGenerator::EmitOrLink(Label* l) {
if (l == nullptr) l = &backtrack_;
int pos = 0;
if (l->is_bound()) {
pos = l->pos();
jump_edges_.emplace(pc_, pos);
Emit32(l->pos());
} else {
int pos = 0;
if (l->is_linked()) {
pos = l->pos();
}
l->link_to(pc_);
Emit32(pos);
}
Emit32(pos);
}
void RegExpBytecodeGenerator::PopRegister(int register_index) {
......@@ -369,16 +365,8 @@ void RegExpBytecodeGenerator::IfRegisterEqPos(int register_index,
Handle<HeapObject> RegExpBytecodeGenerator::GetCode(Handle<String> source) {
Bind(&backtrack_);
Emit(BC_POP_BT, 0);
Handle<ByteArray> array;
if (FLAG_regexp_peephole_optimization) {
array = RegExpBytecodePeepholeOptimization::OptimizeBytecode(
isolate_, zone(), source, buffer_.begin(), length(), jump_edges_);
} else {
array = isolate_->factory()->NewByteArray(length());
Copy(array->GetDataStartAddress());
}
Handle<ByteArray> array = isolate_->factory()->NewByteArray(length());
Copy(array->GetDataStartAddress());
return array;
}
......
......@@ -100,12 +100,6 @@ class V8_EXPORT_PRIVATE RegExpBytecodeGenerator : public RegExpMacroAssembler {
int advance_current_offset_;
int advance_current_end_;
// Stores jump edges emitted for the bytecode (used by
// RegExpBytecodePeepholeOptimization).
// Key: jump source (offset in buffer_ where jump destination is stored).
// Value: jump destination (offset in buffer_ to jump to).
ZoneUnorderedMap<int, int> jump_edges_;
Isolate* isolate_;
static const int kInvalidPC = -1;
......
This diff is collapsed.
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
#define V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
#include "src/common/globals.h"
#include "src/zone/zone-containers.h"
namespace v8 {
namespace internal {
class ByteArray;
// Peephole optimization for regexp interpreter bytecode.
// Pre-defined bytecode sequences occuring in the bytecode generated by the
// RegExpBytecodeGenerator can be optimized into a single bytecode.
class RegExpBytecodePeepholeOptimization : public AllStatic {
public:
// Performs peephole optimization on the given bytecode and returns the
// optimized bytecode.
static Handle<ByteArray> OptimizeBytecode(
Isolate* isolate, Zone* zone, Handle<String> source, const byte* bytecode,
int length, const ZoneUnorderedMap<int, int>& jump_edges);
};
} // namespace internal
} // namespace v8
#endif // V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/regexp/regexp-bytecodes.h"
#include <cctype>
#include "src/utils/utils.h"
namespace v8 {
namespace internal {
void RegExpBytecodeDisassembleSingle(const byte* code_base, const byte* pc) {
PrintF("%s", RegExpBytecodeName(*pc));
// Args and the bytecode as hex.
for (int i = 0; i < RegExpBytecodeLength(*pc); i++) {
PrintF(", %02x", pc[i]);
}
PrintF(" ");
// Args as ascii.
for (int i = 1; i < RegExpBytecodeLength(*pc); i++) {
unsigned char b = pc[i];
PrintF("%c", std::isprint(b) ? b : '.');
}
PrintF("\n");
}
void RegExpBytecodeDisassemble(const byte* code_base, int length,
const char* pattern) {
PrintF("[generated bytecode for regexp pattern: '%s']\n", pattern);
ptrdiff_t offset = 0;
while (offset < length) {
const byte* const pc = code_base + offset;
PrintF("%p %4" V8PRIxPTRDIFF " ", pc, offset);
RegExpBytecodeDisassembleSingle(code_base, pc);
offset += RegExpBytecodeLength(*pc);
}
}
} // namespace internal
} // namespace v8
This diff is collapsed.
......@@ -64,6 +64,23 @@ bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
return true;
}
void DisassembleSingleBytecode(const byte* code_base, const byte* pc) {
PrintF("%s", RegExpBytecodeName(*pc));
// Args and the bytecode as hex.
for (int i = 0; i < RegExpBytecodeLength(*pc); i++) {
PrintF(", %02x", pc[i]);
}
PrintF(" ");
// Args as ascii.
for (int i = 1; i < RegExpBytecodeLength(*pc); i++) {
unsigned char b = pc[i];
PrintF("%c", std::isprint(b) ? b : '.');
}
PrintF("\n");
}
#ifdef DEBUG
void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
int stack_depth, int current_position,
......@@ -78,7 +95,7 @@ void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
PrintF(format, pc - code_base, stack_depth, current_position, current_char,
printable ? current_char : '.');
RegExpBytecodeDisassembleSingle(code_base, pc);
DisassembleSingleBytecode(code_base, pc);
}
}
#endif // DEBUG
......@@ -240,13 +257,6 @@ IrregexpInterpreter::Result HandleInterrupts(
return IrregexpInterpreter::SUCCESS;
}
bool CheckBitInTable(const uint32_t current_char, const byte* const table) {
int mask = RegExpMacroAssembler::kTableMask;
int b = table[(current_char & mask) >> kBitsPerByteLog2];
int bit = (current_char & (kBitsPerByte - 1));
return (b & (1 << bit)) != 0;
}
// If computed gotos are supported by the compiler, we can get addresses to
// labels directly in C/C++. Every bytecode handler has its own label and we
// store the addresses in a dispatch table indexed by bytecode. To execute the
......@@ -271,7 +281,7 @@ bool CheckBitInTable(const uint32_t current_char, const byte* const table) {
#define DISPATCH() \
pc = next_pc; \
insn = next_insn; \
goto switch_dispatch_continuation
break
#endif // V8_USE_COMPUTED_GOTO
// ADVANCE/SET_PC_FROM_OFFSET are separated from DISPATCH, because ideally some
......@@ -321,13 +331,19 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
// Fill dispatch table from last defined bytecode up to the next power of two
// with BREAK (invalid operation).
// TODO(pthier): Find a way to fill up automatically (at compile time)
// 59 real bytecodes -> 5 fillers
// 53 real bytecodes -> 11 fillers
#define BYTECODE_FILLER_ITERATOR(V) \
V(BREAK) /* 1 */ \
V(BREAK) /* 2 */ \
V(BREAK) /* 3 */ \
V(BREAK) /* 4 */ \
V(BREAK) /* 5 */
V(BREAK) /* 5 */ \
V(BREAK) /* 6 */ \
V(BREAK) /* 7 */ \
V(BREAK) /* 8 */ \
V(BREAK) /* 9 */ \
V(BREAK) /* 10 */ \
V(BREAK) /* 11 */
#define COUNT(...) +1
static constexpr int kRegExpBytecodeFillerCount =
......@@ -636,7 +652,10 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
DISPATCH();
}
BYTECODE(CHECK_BIT_IN_TABLE) {
if (CheckBitInTable(current_char, pc + 8)) {
int mask = RegExpMacroAssembler::kTableMask;
byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
int bit = (current_char & (kBitsPerByte - 1));
if ((b & (1 << bit)) != 0) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
ADVANCE(CHECK_BIT_IN_TABLE);
......@@ -815,118 +834,6 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
}
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint32_t advance = Load16Aligned(pc + 4);
uint32_t c = Load16Aligned(pc + 6);
while (static_cast<uintptr_t>(current + load_offset) <
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
if (c == current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR_AND) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint16_t advance = Load16Aligned(pc + 4);
uint16_t c = Load16Aligned(pc + 6);
uint32_t mask = Load32Aligned(pc + 8);
int32_t maximum_offset = Load32Aligned(pc + 12);
while (static_cast<uintptr_t>(current + maximum_offset) <=
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
if (c == (current_char & mask)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 20));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR_POS_CHECKED) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint16_t advance = Load16Aligned(pc + 4);
uint16_t c = Load16Aligned(pc + 6);
int32_t maximum_offset = Load32Aligned(pc + 8);
while (static_cast<uintptr_t>(current + maximum_offset) <=
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
if (c == current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_BIT_IN_TABLE) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint32_t advance = Load16Aligned(pc + 4);
const byte* table = pc + 8;
while (static_cast<uintptr_t>(current + load_offset) <
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
if (CheckBitInTable(current_char, table)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint16_t advance = Load16Aligned(pc + 4);
uint16_t limit = Load16Aligned(pc + 6);
const byte* table = pc + 8;
while (static_cast<uintptr_t>(current + load_offset) <
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
if (current_char > limit) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
DISPATCH();
}
if (!CheckBitInTable(current_char, table)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR_OR_CHAR) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint32_t advance = Load16Aligned(pc + 4);
uint16_t c = Load16Aligned(pc + 8);
uint16_t c2 = Load16Aligned(pc + 10);
while (static_cast<uintptr_t>(current + load_offset) <
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
// The two if-statements below are split up intentionally, as combining
// them seems to result in register allocation behaving quite
// differently and slowing down the resulting code.
if (c == current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
DISPATCH();
}
if (c2 == current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
DISPATCH();
}
#if V8_USE_COMPUTED_GOTO
// Lint gets confused a lot if we just use !V8_USE_COMPUTED_GOTO or ifndef
// V8_USE_COMPUTED_GOTO here.
......@@ -934,9 +841,6 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
default:
UNREACHABLE();
}
// Label we jump to in DISPATCH(). There must be no instructions between the
// end of the switch, this label and the end of the loop.
switch_dispatch_continuation : {}
#endif // V8_USE_COMPUTED_GOTO
}
}
......@@ -951,6 +855,25 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
} // namespace
// static
void IrregexpInterpreter::Disassemble(ByteArray byte_array,
const std::string& pattern) {
DisallowHeapAllocation no_gc;
PrintF("[generated bytecode for regexp pattern: '%s']\n", pattern.c_str());
const byte* const code_base = byte_array.GetDataStartAddress();
const int byte_array_length = byte_array.length();
ptrdiff_t offset = 0;
while (offset < byte_array_length) {
const byte* const pc = code_base + offset;
PrintF("%p %4" V8PRIxPTRDIFF " ", pc, offset);
DisassembleSingleBytecode(code_base, pc);
offset += RegExpBytecodeLength(*pc);
}
}
// static
IrregexpInterpreter::Result IrregexpInterpreter::Match(
Isolate* isolate, JSRegExp regexp, String subject_string, int* registers,
......
......@@ -46,6 +46,8 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
int registers_length, int start_position,
RegExp::CallOrigin call_origin);
static void Disassemble(ByteArray byte_array, const std::string& pattern);
private:
static Result Match(Isolate* isolate, JSRegExp regexp, String subject_string,
int* registers, int registers_length, int start_position,
......
......@@ -9,7 +9,6 @@
#include "src/heap/heap-inl.h"
#include "src/objects/js-regexp-inl.h"
#include "src/regexp/regexp-bytecode-generator.h"
#include "src/regexp/regexp-bytecodes.h"
#include "src/regexp/regexp-compiler.h"
#include "src/regexp/regexp-dotprinter.h"
#include "src/regexp/regexp-interpreter.h"
......@@ -868,8 +867,7 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
data->compilation_target == RegExpCompilationTarget::kBytecode) {
Handle<ByteArray> bytecode(ByteArray::cast(result.code), isolate);
auto pattern_cstring = pattern->ToCString();
RegExpBytecodeDisassemble(bytecode->GetDataStartAddress(),
bytecode->length(), pattern_cstring.get());
IrregexpInterpreter::Disassemble(*bytecode, pattern_cstring.get());
}
}
......
This diff is collapsed.
#!/usr/bin/env python
# Copyright 2019 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
python %prog trace-file
Parses output generated by v8 with flag --trace-regexp-bytecodes and generates
a list of the most common sequences.
"""
from __future__ import print_function
import sys
import re
import collections
def parse(file, seqlen):
# example:
# pc = 00, sp = 0, curpos = 0, curchar = 0000000a ..., bc = PUSH_BT, 02, 00, 00, 00, e8, 00, 00, 00 .......
rx = re.compile(r'pc = (?P<pc>[0-9a-f]+), sp = (?P<sp>\d+), '
r'curpos = (?P<curpos>\d+), curchar = (?P<char_hex>[0-9a-f]+) '
r'(:?\.|\()(?P<char>\.|\w)(:?\.|\)), bc = (?P<bc>\w+), .*')
total = 0
bc_cnt = [None] * seqlen
for i in xrange(seqlen):
bc_cnt[i] = {}
last = [None] * seqlen
with open(file) as f:
l = f.readline()
while l:
l = l.strip()
if l.startswith("Start bytecode interpreter"):
for i in xrange(seqlen):
last[i] = collections.deque(maxlen=i+1)
match = rx.search(l)
if match:
total += 1
bc = match.group('bc')
for i in xrange(seqlen):
last[i].append(bc)
key = ' --> '.join(last[i])
bc_cnt[i][key] = bc_cnt[i].get(key,0) + 1
l = f.readline()
return bc_cnt, total
def print_most_common(d, seqlen, total):
sorted_d = sorted(d.items(), key=lambda kv: kv[1], reverse=True)
for (k,v) in sorted_d:
if v*100/total < 1.0:
return
print("{}: {} ({} %)".format(k,v,(v*100/total)))
def main(argv):
max_seq = 7
bc_cnt, total = parse(argv[1],max_seq)
for i in xrange(max_seq):
print()
print("Most common of length {}".format(i+1))
print()
print_most_common(bc_cnt[i], i, total)
if __name__ == '__main__':
main(sys.argv)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment