Commit 943f78a4 authored by Martin Bidlingmaier's avatar Martin Bidlingmaier Committed by Commit Bot

[regexp] Fix usage of {Is,Mark}PcProcessed in NfaInterpreter

Previously we checked whether a thread's pc IsPcProcessed before pushing
to the stack of (postponed) active_threads_.  This commit moves the
IsPcProcessed check and corresponding MarkPcProcessed call to when the
thread is actually processed, i.e. when it is popped from the
active_threads_ stack again.

This fixes two issues:
- Consider what used to happen in the following scenario:
1. An active thread t is postponed (e.g. because it is a fork) and
 pushed on active_threads_.  IsPcProcessed(t.pc) is false, so t is
 not discarded and does actually end up on active_threads_.
2. Some other thread s is executed, and at some point s.pc == t.pc,
 i.e. t.pc is marked as processed.
3. t is popped from active_threads_ for processing.

In 3 we don't want to continue execution of t: After all, its pc is
already marked as processed.  But because previously we only checked
for IsPcProcessed in step 1 before pushing to active_threads_, we used
to continue execution in 3.  I don't think this is a correctness
issue, but possibly a performance problem.  In any case, this commit
moves the IsPcProcessed check from 1 to 3 and so fixes this.
- After flushing blocked_threads_, we push them to active_threads_
again.  While doing so, we used to mark these thread's pcs as processed.
This meant that sometimes a (fork of a) high priority thread was
cancelled by the IsPcProcessed check even though its pc was only
marked as processed by a thread with lower priority during flushing.
We need it to be the other way round:  The low priority thread should
be cancelled after its pc is processed by a thread with higher
priority.
With this commit we don't MarkPcProcessed during flushing, it's
postponed to when we're actually processing.  This was a correctness
issue, and there's a new corresponding test case.


Bug: v8:10765
Change-Id: Ie12682cf3f8a04222d907edd8a3ad25baa69465a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2388112
Commit-Queue: Martin Bidlingmaier <mbid@google.com>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69668}
parent aaff9d37
......@@ -138,7 +138,7 @@ class NfaInterpreter {
DCHECK_EQ(best_match_, base::nullopt);
// All threads start at bytecode 0.
PushActiveThreadUnchecked(InterpreterThread{0, input_index_});
active_threads_.emplace_back(InterpreterThread{0, input_index_});
// Run the initial thread, potentially forking new threads, until every
// thread is blocked without further input.
RunActiveThreads();
......@@ -186,6 +186,9 @@ class NfaInterpreter {
// the current input index. All remaining `active_threads_` are discarded.
void RunActiveThread(InterpreterThread t) {
while (true) {
if (IsPcProcessed(t.pc)) return;
MarkPcProcessed(t.pc);
RegExpInstruction inst = bytecode_[t.pc];
switch (inst.opcode) {
case RegExpInstruction::CONSUME_RANGE: {
......@@ -195,26 +198,12 @@ class NfaInterpreter {
case RegExpInstruction::FORK: {
InterpreterThread fork = t;
fork.pc = inst.payload.pc;
active_threads_.emplace_back(fork);
++t.pc;
// t has higher priority than fork. If t.pc hasn't been processed,we
// push fork on the active_thread_ stack and continue directly with
// t. Otherwise we continue directly with fork if possible.
if (!IsPcProcessed(t.pc)) {
MarkPcProcessed(t.pc);
PushActiveThread(fork);
break;
} else if (!IsPcProcessed(fork.pc)) {
t = fork;
MarkPcProcessed(t.pc);
break;
}
return;
break;
}
case RegExpInstruction::JMP:
t.pc = inst.payload.pc;
if (IsPcProcessed(t.pc)) return;
MarkPcProcessed(t.pc);
break;
case RegExpInstruction::ACCEPT:
best_match_ = MatchRange{t.match_begin, input_index_};
......@@ -254,7 +243,7 @@ class NfaInterpreter {
RegExpInstruction::Uc16Range range = inst.payload.consume_range;
if (input_char >= range.min && input_char <= range.max) {
++t.pc;
PushActiveThreadUnchecked(t);
active_threads_.emplace_back(t);
}
}
blocked_threads_.clear();
......@@ -284,22 +273,6 @@ class NfaInterpreter {
pc_last_input_index_[pc] = input_index_;
}
// Functions to push a thread `t` onto the list of active threads, but only
// if `t.pc` was not already the pc of some other thread at the current
// subject index.
void PushActiveThreadUnchecked(InterpreterThread t) {
DCHECK(!IsPcProcessed(t.pc));
MarkPcProcessed(t.pc);
active_threads_.emplace_back(t);
}
void PushActiveThread(InterpreterThread t) {
if (IsPcProcessed(t.pc)) {
return;
}
PushActiveThreadUnchecked(t);
}
Vector<const RegExpInstruction> bytecode_;
Vector<const Character> input_;
int input_index_;
......
......@@ -5,10 +5,10 @@
// Flags: --allow-natives-syntax --enable-experimental-regexp-engine
function Test(regexp, subject, expectedResult, expectedLastIndex) {
//assertEquals(%RegexpTypeTag(regexp), "EXPERIMENTAL");
assertEquals(%RegexpTypeTag(regexp), "EXPERIMENTAL");
var result = regexp.exec(subject);
assertArrayEquals(result, expectedResult);
assertEquals(regexp.lastIndex, expectedLastIndex);
assertArrayEquals(expectedResult, result);
assertEquals(expectedLastIndex, regexp.lastIndex);
}
// The empty regexp.
......@@ -39,6 +39,7 @@ Test(/[💩]/, "f💩", [String.fromCodePoint(0xD83D)], 0);
// Greedy quantifier for 0 or more matches.
Test(/x*/, "asdfxk", [""], 0);
Test(/xx*a/, "xxa", ["xxa"], 0);
Test(/asdf*/, "aasdfffk", ["asdfff"], 0);
// Non-capturing groups and nested operators.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment