Commit 2f8361d4 authored by Ana Peško's avatar Ana Peško Committed by Commit Bot

[regexp] Multiple interpreter executions tier-up

This CL implements the tier-up strategy where the interpreter can be used for
an arbitrary number of executions for every regex, before tiering-up to the
compiler. The only exception is for functional global replaces, where we
eagerly tier-up to native code right away.

To use the tier-up logic --regexp-tier-up=value needs to be set. It is
currently set to 0 by default.

Change-Id: I770857e5eae710a952fe47661cb42957c53848b4
Bug: v8:9566
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1789299
Commit-Queue: Ana Pesko <anapesko@google.com>
Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Reviewed-by: 's avatarUlan Degenbaev <ulan@chromium.org>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63623}
parent af063685
......@@ -522,7 +522,7 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
GotoIf(TaggedIsSmi(var_code.value()), &runtime);
TNode<Code> code = CAST(var_code.value());
// Tier-up in runtime if ticks are non-zero and tier-up hasn't happened yet
// Tier-up in runtime if ticks are zero and tier-up hasn't happened yet
// and ensure that a RegExp stack is allocated when using compiled Irregexp.
{
Label next(this), check_tier_up(this);
......@@ -538,9 +538,9 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
// Check if tier-up is requested.
BIND(&check_tier_up);
TNode<Smi> ticks = CAST(
UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpTierUpTicksIndex));
GotoIf(SmiToInt32(ticks), &runtime);
TNode<Smi> ticks = CAST(UnsafeLoadFixedArrayElement(
data, JSRegExp::kIrregexpTicksUntilTierUpIndex));
GotoIfNot(SmiToInt32(ticks), &runtime);
Goto(&next);
BIND(&next);
......
......@@ -1449,7 +1449,7 @@ void JSRegExp::JSRegExpVerify(Isolate* isolate) {
CHECK(arr.get(JSRegExp::kIrregexpCaptureCountIndex).IsSmi());
CHECK(arr.get(JSRegExp::kIrregexpMaxRegisterCountIndex).IsSmi());
CHECK(arr.get(JSRegExp::kIrregexpTierUpTicksIndex).IsSmi());
CHECK(arr.get(JSRegExp::kIrregexpTicksUntilTierUpIndex).IsSmi());
break;
}
default:
......
......@@ -1265,8 +1265,11 @@ DEFINE_BOOL(regexp_optimization, true, "generate optimized regexp code")
DEFINE_BOOL(regexp_mode_modifiers, false, "enable inline flags in regexp.")
DEFINE_BOOL(regexp_interpret_all, false, "interpret all regexp code")
DEFINE_BOOL(regexp_tier_up, false,
"enable regexp interpreter and tier up to the compiler")
DEFINE_NEG_IMPLICATION(regexp_interpret_all, regexp_tier_up)
"enable regexp interpreter and tier up to the compiler after the "
"number of executions set by the tier up ticks flag")
DEFINE_INT(regexp_tier_up_ticks, 1,
"set the number of executions for the regexp interpreter before "
"tiering-up to the compiler")
// Testing flags test/cctest/test-{flags,api,serialization}.cc
DEFINE_BOOL(testing_bool_flag, true, "testing_bool_flag")
......
......@@ -3930,6 +3930,9 @@ void Factory::SetRegExpIrregexpData(Handle<JSRegExp> regexp,
JSRegExp::Flags flags, int capture_count) {
Handle<FixedArray> store = NewFixedArray(JSRegExp::kIrregexpDataSize);
Smi uninitialized = Smi::FromInt(JSRegExp::kUninitializedValue);
Smi ticks_until_tier_up = FLAG_regexp_tier_up
? Smi::FromInt(FLAG_regexp_tier_up_ticks)
: uninitialized;
store->set(JSRegExp::kTagIndex, Smi::FromInt(type));
store->set(JSRegExp::kSourceIndex, *source);
store->set(JSRegExp::kFlagsIndex, Smi::FromInt(flags));
......@@ -3940,7 +3943,7 @@ void Factory::SetRegExpIrregexpData(Handle<JSRegExp> regexp,
store->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::kZero);
store->set(JSRegExp::kIrregexpCaptureCountIndex, Smi::FromInt(capture_count));
store->set(JSRegExp::kIrregexpCaptureNameMapIndex, uninitialized);
store->set(JSRegExp::kIrregexpTierUpTicksIndex, Smi::kZero);
store->set(JSRegExp::kIrregexpTicksUntilTierUpIndex, ticks_until_tier_up);
regexp->set_data(*store);
}
......
......@@ -95,7 +95,11 @@ void V8::InitializeOncePerProcessImpl() {
// generation.
CHECK_WITH_MSG(!FLAG_interpreted_frames_native_stack || !FLAG_jitless,
"The --jitless and --interpreted-frames-native-stack flags "
"are incompatible.");
"are incompatible");
CHECK_WITH_MSG(
!FLAG_regexp_interpret_all || !FLAG_regexp_tier_up,
"The --regexp-interpret-all and --regexp-tier-up flags are incompatible");
base::OS::Initialize(FLAG_hard_abort, FLAG_gc_fake_mmap);
......
......@@ -96,7 +96,8 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
Handle<String> flags_string);
bool MarkedForTierUp();
void ResetTierUp();
void ResetLastTierUpTick();
void TierUpTick();
void MarkTierUpForNextExec();
inline Type TypeTag() const;
......@@ -176,9 +177,13 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
// Maps names of named capture groups (at indices 2i) to their corresponding
// (1-based) capture group indices (at indices 2i + 1).
static const int kIrregexpCaptureNameMapIndex = kDataIndex + 6;
static const int kIrregexpTierUpTicksIndex = kDataIndex + 7;
// Tier-up ticks are set to the value of the tier-up ticks flag. The value is
// decremented on each execution of the bytecode, so that the tier-up
// happens once the ticks reach zero.
// This value is ignored if the regexp-tier-up flag isn't turned on.
static const int kIrregexpTicksUntilTierUpIndex = kDataIndex + 7;
static const int kIrregexpDataSize = kIrregexpTierUpTicksIndex + 1;
static const int kIrregexpDataSize = kIrregexpTicksUntilTierUpIndex + 1;
// In-object fields.
static const int kLastIndexFieldIndex = 0;
......
......@@ -6153,27 +6153,40 @@ bool JSRegExp::ShouldProduceBytecode() {
}
// An irregexp is considered to be marked for tier up if the tier-up ticks value
// is not zero. An atom is not subject to tier-up implementation, so the tier-up
// ticks value is not set.
// reaches zero. An atom is not subject to tier-up implementation, so the
// tier-up ticks value is not set.
bool JSRegExp::MarkedForTierUp() {
DCHECK(data().IsFixedArray());
if (TypeTag() == JSRegExp::ATOM) {
if (TypeTag() == JSRegExp::ATOM || !FLAG_regexp_tier_up) {
return false;
}
return Smi::ToInt(DataAt(kIrregexpTierUpTicksIndex)) != 0;
return Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex)) == 0;
}
void JSRegExp::ResetTierUp() {
void JSRegExp::ResetLastTierUpTick() {
DCHECK(FLAG_regexp_tier_up);
DCHECK_EQ(TypeTag(), JSRegExp::IRREGEXP);
FixedArray::cast(data()).set(JSRegExp::kIrregexpTierUpTicksIndex, Smi::kZero);
int tier_up_ticks = Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex)) + 1;
FixedArray::cast(data()).set(JSRegExp::kIrregexpTicksUntilTierUpIndex,
Smi::FromInt(tier_up_ticks));
}
void JSRegExp::TierUpTick() {
DCHECK(FLAG_regexp_tier_up);
DCHECK_EQ(TypeTag(), JSRegExp::IRREGEXP);
int tier_up_ticks = Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex));
if (tier_up_ticks == 0) {
return;
}
FixedArray::cast(data()).set(JSRegExp::kIrregexpTicksUntilTierUpIndex,
Smi::FromInt(tier_up_ticks - 1));
}
void JSRegExp::MarkTierUpForNextExec() {
DCHECK(FLAG_regexp_tier_up);
DCHECK_EQ(TypeTag(), JSRegExp::IRREGEXP);
FixedArray::cast(data()).set(JSRegExp::kIrregexpTierUpTicksIndex,
Smi::FromInt(1));
FixedArray::cast(data()).set(JSRegExp::kIrregexpTicksUntilTierUpIndex,
Smi::kZero);
}
namespace {
......
......@@ -807,7 +807,7 @@ IrregexpInterpreter::Result IrregexpInterpreter::Match(
Isolate* isolate, JSRegExp regexp, String subject_string, int* registers,
int registers_length, int start_position, RegExp::CallOrigin call_origin) {
if (FLAG_regexp_tier_up) {
regexp.MarkTierUpForNextExec();
regexp.TierUpTick();
}
bool is_one_byte = String::IsOneByteRepresentationUnderneath(subject_string);
......
......@@ -574,7 +574,7 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
// match.
// We need to reset the tier up to start over with compilation.
if (FLAG_regexp_tier_up) {
regexp->ResetTierUp();
regexp->ResetLastTierUpTick();
}
is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
......
......@@ -55,10 +55,7 @@ struct RegExpCompileData {
class RegExp final : public AllStatic {
public:
// Whether the irregexp engine generates native code or interpreter bytecode.
static bool CanGenerateNativeCode() {
return !FLAG_regexp_interpret_all || FLAG_regexp_tier_up;
}
// Whether the irregexp engine generates interpreter bytecode.
static bool CanGenerateBytecode() {
return FLAG_regexp_interpret_all || FLAG_regexp_tier_up;
}
......
......@@ -209,11 +209,5 @@ TEST(FlagsJitlessImplications) {
}
}
TEST(FlagsRegexpInterpretAllImplications) {
if (FLAG_regexp_interpret_all) {
CHECK(!FLAG_regexp_tier_up);
}
}
} // namespace internal
} // namespace v8
......@@ -396,6 +396,8 @@
'regress/regress-crbug-759327': [SKIP],
'regress/regress-crbug-898974': [SKIP],
'regexp-tier-up': [SKIP],
'regexp-tier-up-multiple': [SKIP],
'regress/regress-996234': [SKIP],
# These tests check that we can trace the compiler.
'tools/compiler-trace-flags': [SKIP],
......@@ -983,6 +985,7 @@
# The RegExp code cache means running this test multiple times is invalid.
'regexp-tier-up': [SKIP],
'regexp-tier-up-multiple': [SKIP],
# Flaky crash on Odroid devices: https://crbug.com/v8/7678
'regress/regress-336820': [PASS, ['arch == arm and not simulator_run', SKIP]],
......
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Tier-up behavior differs between slow and fast paths in
// RegExp.prototype.replace with a function as an argument.
// Flags: --regexp-tier-up --regexp-tier-up-ticks=5
// Flags: --allow-natives-syntax --no-force-slow-path --no-regexp-interpret-all
const kLatin1 = true;
const kUnicode = false;
function CheckRegexpNotYetCompiled(regexp) {
assertFalse(%RegexpHasBytecode(regexp, kLatin1) &&
%RegexpHasNativeCode(regexp, kLatin1));
assertFalse(%RegexpHasBytecode(regexp, kUnicode) &&
%RegexpHasNativeCode(regexp, kUnicode));
}
// Testing RegExp.test method which calls into Runtime_RegExpExec.
let re = new RegExp('^.$');
CheckRegexpNotYetCompiled(re);
// Testing first five executions of regexp with one-byte string subject.
for (var i = 0; i < 5; i++) {
re.test("a");
assertTrue(%RegexpHasBytecode(re, kLatin1));
assertTrue(!%RegexpHasBytecode(re, kUnicode) &&
!%RegexpHasNativeCode(re, kUnicode));
}
// Testing the tier-up to native code.
re.test("a");
assertTrue(!%RegexpHasBytecode(re, kLatin1) &&
%RegexpHasNativeCode(re,kLatin1));
assertTrue(!%RegexpHasBytecode(re, kUnicode) &&
!%RegexpHasNativeCode(re,kUnicode));
re.test("a");
assertTrue(!%RegexpHasBytecode(re, kLatin1) &&
%RegexpHasNativeCode(re,kLatin1));
assertTrue(!%RegexpHasBytecode(re, kUnicode) &&
!%RegexpHasNativeCode(re,kUnicode));
// Testing that the regexp will compile to native code for two-byte string
// subject as well, because we have a single tick counter for both string
// representations.
re.test("π");
assertTrue(!%RegexpHasBytecode(re, kLatin1) &&
%RegexpHasNativeCode(re,kLatin1));
assertTrue(!%RegexpHasBytecode(re, kUnicode) &&
%RegexpHasNativeCode(re,kUnicode));
// Testing String.replace method for non-global regexps.
var subject = "a1111";
re = /\w1/;
CheckRegexpNotYetCompiled(re);
for (var i = 0; i < 5; i++) {
subject.replace(re, "x");
assertTrue(%RegexpHasBytecode(re, kLatin1));
assertTrue(!%RegexpHasBytecode(re, kUnicode) &&
!%RegexpHasNativeCode(re, kUnicode));
}
subject.replace(re, "x");
assertTrue(!%RegexpHasBytecode(re, kLatin1) &&
%RegexpHasNativeCode(re, kLatin1));
assertTrue(!%RegexpHasBytecode(re, kUnicode) &&
!%RegexpHasNativeCode(re, kUnicode));
// Testing String.replace method for global regexps.
let re_g = /\w11111/g;
CheckRegexpNotYetCompiled(re_g);
// This regexp will not match, so it will only execute the bytecode once,
// each time the replace method is invoked, without tiering-up and
// recompiling to native code.
for (var i = 0; i < 5; i++) {
subject.replace(re_g, "x");
assertTrue(%RegexpHasBytecode(re_g, kLatin1));
assertTrue(!%RegexpHasBytecode(re_g, kUnicode) &&
!%RegexpHasNativeCode(re_g, kUnicode));
}
// This regexp will match, so it will execute five times, and tier-up.
re_g = /\w/g;
CheckRegexpNotYetCompiled(re_g);
subject.replace(re_g, "x");
assertTrue(!%RegexpHasBytecode(re_g, kLatin1) &&
%RegexpHasNativeCode(re_g, kLatin1));
assertTrue(!%RegexpHasBytecode(re_g, kUnicode) &&
!%RegexpHasNativeCode(re_g, kUnicode));
// Testing String.replace method for global regexps with a function as a
// parameter. This will tier-up eagerly and compile to native code right
// away, even though the regexp is only executed once.
function f() { return "x"; }
re_g = /\w2/g;
CheckRegexpNotYetCompiled(re_g);
subject.replace(re_g, f);
assertTrue(!%RegexpHasBytecode(re_g, kLatin1) &&
%RegexpHasNativeCode(re_g, kLatin1));
assertTrue(!%RegexpHasBytecode(re_g, kUnicode) &&
!%RegexpHasNativeCode(re_g, kUnicode));
......@@ -2,9 +2,10 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Tier-up behavior differs between slow and fast paths in functional
// RegExp.prototype.replace.
// Flags: --regexp-tier-up --allow-natives-syntax --no-force-slow-path
// Tier-up behavior differs between slow and fast paths in
// RegExp.prototype.replace with a function as an argument.
// Flags: --regexp-tier-up --regexp-tier-up-ticks=1
// Flags: --allow-natives-syntax --no-force-slow-path --no-regexp-interpret-all
const kLatin1 = true;
const kUnicode = false;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment