Commit 2f8361d4 authored by Ana Peško's avatar Ana Peško Committed by Commit Bot

[regexp] Multiple interpreter executions tier-up

This CL implements the tier-up strategy where the interpreter can be used for
an arbitrary number of executions for every regex, before tiering-up to the
compiler. The only exception is for functional global replaces, where we
eagerly tier-up to native code right away.

To use the tier-up logic --regexp-tier-up=value needs to be set. It is
currently set to 0 by default.

Change-Id: I770857e5eae710a952fe47661cb42957c53848b4
Bug: v8:9566
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1789299
Commit-Queue: Ana Pesko <anapesko@google.com>
Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Reviewed-by: 's avatarUlan Degenbaev <ulan@chromium.org>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63623}
parent af063685
...@@ -522,7 +522,7 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal( ...@@ -522,7 +522,7 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
GotoIf(TaggedIsSmi(var_code.value()), &runtime); GotoIf(TaggedIsSmi(var_code.value()), &runtime);
TNode<Code> code = CAST(var_code.value()); TNode<Code> code = CAST(var_code.value());
// Tier-up in runtime if ticks are non-zero and tier-up hasn't happened yet // Tier-up in runtime if ticks are zero and tier-up hasn't happened yet
// and ensure that a RegExp stack is allocated when using compiled Irregexp. // and ensure that a RegExp stack is allocated when using compiled Irregexp.
{ {
Label next(this), check_tier_up(this); Label next(this), check_tier_up(this);
...@@ -538,9 +538,9 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal( ...@@ -538,9 +538,9 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
// Check if tier-up is requested. // Check if tier-up is requested.
BIND(&check_tier_up); BIND(&check_tier_up);
TNode<Smi> ticks = CAST( TNode<Smi> ticks = CAST(UnsafeLoadFixedArrayElement(
UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpTierUpTicksIndex)); data, JSRegExp::kIrregexpTicksUntilTierUpIndex));
GotoIf(SmiToInt32(ticks), &runtime); GotoIfNot(SmiToInt32(ticks), &runtime);
Goto(&next); Goto(&next);
BIND(&next); BIND(&next);
......
...@@ -1449,7 +1449,7 @@ void JSRegExp::JSRegExpVerify(Isolate* isolate) { ...@@ -1449,7 +1449,7 @@ void JSRegExp::JSRegExpVerify(Isolate* isolate) {
CHECK(arr.get(JSRegExp::kIrregexpCaptureCountIndex).IsSmi()); CHECK(arr.get(JSRegExp::kIrregexpCaptureCountIndex).IsSmi());
CHECK(arr.get(JSRegExp::kIrregexpMaxRegisterCountIndex).IsSmi()); CHECK(arr.get(JSRegExp::kIrregexpMaxRegisterCountIndex).IsSmi());
CHECK(arr.get(JSRegExp::kIrregexpTierUpTicksIndex).IsSmi()); CHECK(arr.get(JSRegExp::kIrregexpTicksUntilTierUpIndex).IsSmi());
break; break;
} }
default: default:
......
...@@ -1265,8 +1265,11 @@ DEFINE_BOOL(regexp_optimization, true, "generate optimized regexp code") ...@@ -1265,8 +1265,11 @@ DEFINE_BOOL(regexp_optimization, true, "generate optimized regexp code")
DEFINE_BOOL(regexp_mode_modifiers, false, "enable inline flags in regexp.") DEFINE_BOOL(regexp_mode_modifiers, false, "enable inline flags in regexp.")
DEFINE_BOOL(regexp_interpret_all, false, "interpret all regexp code") DEFINE_BOOL(regexp_interpret_all, false, "interpret all regexp code")
DEFINE_BOOL(regexp_tier_up, false, DEFINE_BOOL(regexp_tier_up, false,
"enable regexp interpreter and tier up to the compiler") "enable regexp interpreter and tier up to the compiler after the "
DEFINE_NEG_IMPLICATION(regexp_interpret_all, regexp_tier_up) "number of executions set by the tier up ticks flag")
DEFINE_INT(regexp_tier_up_ticks, 1,
"set the number of executions for the regexp interpreter before "
"tiering-up to the compiler")
// Testing flags test/cctest/test-{flags,api,serialization}.cc // Testing flags test/cctest/test-{flags,api,serialization}.cc
DEFINE_BOOL(testing_bool_flag, true, "testing_bool_flag") DEFINE_BOOL(testing_bool_flag, true, "testing_bool_flag")
......
...@@ -3930,6 +3930,9 @@ void Factory::SetRegExpIrregexpData(Handle<JSRegExp> regexp, ...@@ -3930,6 +3930,9 @@ void Factory::SetRegExpIrregexpData(Handle<JSRegExp> regexp,
JSRegExp::Flags flags, int capture_count) { JSRegExp::Flags flags, int capture_count) {
Handle<FixedArray> store = NewFixedArray(JSRegExp::kIrregexpDataSize); Handle<FixedArray> store = NewFixedArray(JSRegExp::kIrregexpDataSize);
Smi uninitialized = Smi::FromInt(JSRegExp::kUninitializedValue); Smi uninitialized = Smi::FromInt(JSRegExp::kUninitializedValue);
Smi ticks_until_tier_up = FLAG_regexp_tier_up
? Smi::FromInt(FLAG_regexp_tier_up_ticks)
: uninitialized;
store->set(JSRegExp::kTagIndex, Smi::FromInt(type)); store->set(JSRegExp::kTagIndex, Smi::FromInt(type));
store->set(JSRegExp::kSourceIndex, *source); store->set(JSRegExp::kSourceIndex, *source);
store->set(JSRegExp::kFlagsIndex, Smi::FromInt(flags)); store->set(JSRegExp::kFlagsIndex, Smi::FromInt(flags));
...@@ -3940,7 +3943,7 @@ void Factory::SetRegExpIrregexpData(Handle<JSRegExp> regexp, ...@@ -3940,7 +3943,7 @@ void Factory::SetRegExpIrregexpData(Handle<JSRegExp> regexp,
store->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::kZero); store->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::kZero);
store->set(JSRegExp::kIrregexpCaptureCountIndex, Smi::FromInt(capture_count)); store->set(JSRegExp::kIrregexpCaptureCountIndex, Smi::FromInt(capture_count));
store->set(JSRegExp::kIrregexpCaptureNameMapIndex, uninitialized); store->set(JSRegExp::kIrregexpCaptureNameMapIndex, uninitialized);
store->set(JSRegExp::kIrregexpTierUpTicksIndex, Smi::kZero); store->set(JSRegExp::kIrregexpTicksUntilTierUpIndex, ticks_until_tier_up);
regexp->set_data(*store); regexp->set_data(*store);
} }
......
...@@ -95,7 +95,11 @@ void V8::InitializeOncePerProcessImpl() { ...@@ -95,7 +95,11 @@ void V8::InitializeOncePerProcessImpl() {
// generation. // generation.
CHECK_WITH_MSG(!FLAG_interpreted_frames_native_stack || !FLAG_jitless, CHECK_WITH_MSG(!FLAG_interpreted_frames_native_stack || !FLAG_jitless,
"The --jitless and --interpreted-frames-native-stack flags " "The --jitless and --interpreted-frames-native-stack flags "
"are incompatible."); "are incompatible");
CHECK_WITH_MSG(
!FLAG_regexp_interpret_all || !FLAG_regexp_tier_up,
"The --regexp-interpret-all and --regexp-tier-up flags are incompatible");
base::OS::Initialize(FLAG_hard_abort, FLAG_gc_fake_mmap); base::OS::Initialize(FLAG_hard_abort, FLAG_gc_fake_mmap);
......
...@@ -96,7 +96,8 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> { ...@@ -96,7 +96,8 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
Handle<String> flags_string); Handle<String> flags_string);
bool MarkedForTierUp(); bool MarkedForTierUp();
void ResetTierUp(); void ResetLastTierUpTick();
void TierUpTick();
void MarkTierUpForNextExec(); void MarkTierUpForNextExec();
inline Type TypeTag() const; inline Type TypeTag() const;
...@@ -176,9 +177,13 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> { ...@@ -176,9 +177,13 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
// Maps names of named capture groups (at indices 2i) to their corresponding // Maps names of named capture groups (at indices 2i) to their corresponding
// (1-based) capture group indices (at indices 2i + 1). // (1-based) capture group indices (at indices 2i + 1).
static const int kIrregexpCaptureNameMapIndex = kDataIndex + 6; static const int kIrregexpCaptureNameMapIndex = kDataIndex + 6;
static const int kIrregexpTierUpTicksIndex = kDataIndex + 7; // Tier-up ticks are set to the value of the tier-up ticks flag. The value is
// decremented on each execution of the bytecode, so that the tier-up
// happens once the ticks reach zero.
// This value is ignored if the regexp-tier-up flag isn't turned on.
static const int kIrregexpTicksUntilTierUpIndex = kDataIndex + 7;
static const int kIrregexpDataSize = kIrregexpTierUpTicksIndex + 1; static const int kIrregexpDataSize = kIrregexpTicksUntilTierUpIndex + 1;
// In-object fields. // In-object fields.
static const int kLastIndexFieldIndex = 0; static const int kLastIndexFieldIndex = 0;
......
...@@ -6153,27 +6153,40 @@ bool JSRegExp::ShouldProduceBytecode() { ...@@ -6153,27 +6153,40 @@ bool JSRegExp::ShouldProduceBytecode() {
} }
// An irregexp is considered to be marked for tier up if the tier-up ticks value // An irregexp is considered to be marked for tier up if the tier-up ticks value
// is not zero. An atom is not subject to tier-up implementation, so the tier-up // reaches zero. An atom is not subject to tier-up implementation, so the
// ticks value is not set. // tier-up ticks value is not set.
bool JSRegExp::MarkedForTierUp() { bool JSRegExp::MarkedForTierUp() {
DCHECK(data().IsFixedArray()); DCHECK(data().IsFixedArray());
if (TypeTag() == JSRegExp::ATOM) { if (TypeTag() == JSRegExp::ATOM || !FLAG_regexp_tier_up) {
return false; return false;
} }
return Smi::ToInt(DataAt(kIrregexpTierUpTicksIndex)) != 0; return Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex)) == 0;
} }
void JSRegExp::ResetTierUp() { void JSRegExp::ResetLastTierUpTick() {
DCHECK(FLAG_regexp_tier_up); DCHECK(FLAG_regexp_tier_up);
DCHECK_EQ(TypeTag(), JSRegExp::IRREGEXP); DCHECK_EQ(TypeTag(), JSRegExp::IRREGEXP);
FixedArray::cast(data()).set(JSRegExp::kIrregexpTierUpTicksIndex, Smi::kZero); int tier_up_ticks = Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex)) + 1;
FixedArray::cast(data()).set(JSRegExp::kIrregexpTicksUntilTierUpIndex,
Smi::FromInt(tier_up_ticks));
}
void JSRegExp::TierUpTick() {
DCHECK(FLAG_regexp_tier_up);
DCHECK_EQ(TypeTag(), JSRegExp::IRREGEXP);
int tier_up_ticks = Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex));
if (tier_up_ticks == 0) {
return;
}
FixedArray::cast(data()).set(JSRegExp::kIrregexpTicksUntilTierUpIndex,
Smi::FromInt(tier_up_ticks - 1));
} }
void JSRegExp::MarkTierUpForNextExec() { void JSRegExp::MarkTierUpForNextExec() {
DCHECK(FLAG_regexp_tier_up); DCHECK(FLAG_regexp_tier_up);
DCHECK_EQ(TypeTag(), JSRegExp::IRREGEXP); DCHECK_EQ(TypeTag(), JSRegExp::IRREGEXP);
FixedArray::cast(data()).set(JSRegExp::kIrregexpTierUpTicksIndex, FixedArray::cast(data()).set(JSRegExp::kIrregexpTicksUntilTierUpIndex,
Smi::FromInt(1)); Smi::kZero);
} }
namespace { namespace {
......
...@@ -807,7 +807,7 @@ IrregexpInterpreter::Result IrregexpInterpreter::Match( ...@@ -807,7 +807,7 @@ IrregexpInterpreter::Result IrregexpInterpreter::Match(
Isolate* isolate, JSRegExp regexp, String subject_string, int* registers, Isolate* isolate, JSRegExp regexp, String subject_string, int* registers,
int registers_length, int start_position, RegExp::CallOrigin call_origin) { int registers_length, int start_position, RegExp::CallOrigin call_origin) {
if (FLAG_regexp_tier_up) { if (FLAG_regexp_tier_up) {
regexp.MarkTierUpForNextExec(); regexp.TierUpTick();
} }
bool is_one_byte = String::IsOneByteRepresentationUnderneath(subject_string); bool is_one_byte = String::IsOneByteRepresentationUnderneath(subject_string);
......
...@@ -574,7 +574,7 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, ...@@ -574,7 +574,7 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
// match. // match.
// We need to reset the tier up to start over with compilation. // We need to reset the tier up to start over with compilation.
if (FLAG_regexp_tier_up) { if (FLAG_regexp_tier_up) {
regexp->ResetTierUp(); regexp->ResetLastTierUpTick();
} }
is_one_byte = String::IsOneByteRepresentationUnderneath(*subject); is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte); EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
......
...@@ -55,10 +55,7 @@ struct RegExpCompileData { ...@@ -55,10 +55,7 @@ struct RegExpCompileData {
class RegExp final : public AllStatic { class RegExp final : public AllStatic {
public: public:
// Whether the irregexp engine generates native code or interpreter bytecode. // Whether the irregexp engine generates interpreter bytecode.
static bool CanGenerateNativeCode() {
return !FLAG_regexp_interpret_all || FLAG_regexp_tier_up;
}
static bool CanGenerateBytecode() { static bool CanGenerateBytecode() {
return FLAG_regexp_interpret_all || FLAG_regexp_tier_up; return FLAG_regexp_interpret_all || FLAG_regexp_tier_up;
} }
......
...@@ -209,11 +209,5 @@ TEST(FlagsJitlessImplications) { ...@@ -209,11 +209,5 @@ TEST(FlagsJitlessImplications) {
} }
} }
TEST(FlagsRegexpInterpretAllImplications) {
if (FLAG_regexp_interpret_all) {
CHECK(!FLAG_regexp_tier_up);
}
}
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8
...@@ -396,6 +396,8 @@ ...@@ -396,6 +396,8 @@
'regress/regress-crbug-759327': [SKIP], 'regress/regress-crbug-759327': [SKIP],
'regress/regress-crbug-898974': [SKIP], 'regress/regress-crbug-898974': [SKIP],
'regexp-tier-up': [SKIP], 'regexp-tier-up': [SKIP],
'regexp-tier-up-multiple': [SKIP],
'regress/regress-996234': [SKIP],
# These tests check that we can trace the compiler. # These tests check that we can trace the compiler.
'tools/compiler-trace-flags': [SKIP], 'tools/compiler-trace-flags': [SKIP],
...@@ -983,6 +985,7 @@ ...@@ -983,6 +985,7 @@
# The RegExp code cache means running this test multiple times is invalid. # The RegExp code cache means running this test multiple times is invalid.
'regexp-tier-up': [SKIP], 'regexp-tier-up': [SKIP],
'regexp-tier-up-multiple': [SKIP],
# Flaky crash on Odroid devices: https://crbug.com/v8/7678 # Flaky crash on Odroid devices: https://crbug.com/v8/7678
'regress/regress-336820': [PASS, ['arch == arm and not simulator_run', SKIP]], 'regress/regress-336820': [PASS, ['arch == arm and not simulator_run', SKIP]],
......
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Tier-up behavior differs between slow and fast paths in
// RegExp.prototype.replace with a function as an argument.
// Flags: --regexp-tier-up --regexp-tier-up-ticks=5
// Flags: --allow-natives-syntax --no-force-slow-path --no-regexp-interpret-all
const kLatin1 = true;
const kUnicode = false;
function CheckRegexpNotYetCompiled(regexp) {
assertFalse(%RegexpHasBytecode(regexp, kLatin1) &&
%RegexpHasNativeCode(regexp, kLatin1));
assertFalse(%RegexpHasBytecode(regexp, kUnicode) &&
%RegexpHasNativeCode(regexp, kUnicode));
}
// Testing RegExp.test method which calls into Runtime_RegExpExec.
let re = new RegExp('^.$');
CheckRegexpNotYetCompiled(re);
// Testing first five executions of regexp with one-byte string subject.
for (var i = 0; i < 5; i++) {
re.test("a");
assertTrue(%RegexpHasBytecode(re, kLatin1));
assertTrue(!%RegexpHasBytecode(re, kUnicode) &&
!%RegexpHasNativeCode(re, kUnicode));
}
// Testing the tier-up to native code.
re.test("a");
assertTrue(!%RegexpHasBytecode(re, kLatin1) &&
%RegexpHasNativeCode(re,kLatin1));
assertTrue(!%RegexpHasBytecode(re, kUnicode) &&
!%RegexpHasNativeCode(re,kUnicode));
re.test("a");
assertTrue(!%RegexpHasBytecode(re, kLatin1) &&
%RegexpHasNativeCode(re,kLatin1));
assertTrue(!%RegexpHasBytecode(re, kUnicode) &&
!%RegexpHasNativeCode(re,kUnicode));
// Testing that the regexp will compile to native code for two-byte string
// subject as well, because we have a single tick counter for both string
// representations.
re.test("π");
assertTrue(!%RegexpHasBytecode(re, kLatin1) &&
%RegexpHasNativeCode(re,kLatin1));
assertTrue(!%RegexpHasBytecode(re, kUnicode) &&
%RegexpHasNativeCode(re,kUnicode));
// Testing String.replace method for non-global regexps.
var subject = "a1111";
re = /\w1/;
CheckRegexpNotYetCompiled(re);
for (var i = 0; i < 5; i++) {
subject.replace(re, "x");
assertTrue(%RegexpHasBytecode(re, kLatin1));
assertTrue(!%RegexpHasBytecode(re, kUnicode) &&
!%RegexpHasNativeCode(re, kUnicode));
}
subject.replace(re, "x");
assertTrue(!%RegexpHasBytecode(re, kLatin1) &&
%RegexpHasNativeCode(re, kLatin1));
assertTrue(!%RegexpHasBytecode(re, kUnicode) &&
!%RegexpHasNativeCode(re, kUnicode));
// Testing String.replace method for global regexps.
let re_g = /\w11111/g;
CheckRegexpNotYetCompiled(re_g);
// This regexp will not match, so it will only execute the bytecode once,
// each time the replace method is invoked, without tiering-up and
// recompiling to native code.
for (var i = 0; i < 5; i++) {
subject.replace(re_g, "x");
assertTrue(%RegexpHasBytecode(re_g, kLatin1));
assertTrue(!%RegexpHasBytecode(re_g, kUnicode) &&
!%RegexpHasNativeCode(re_g, kUnicode));
}
// This regexp will match, so it will execute five times, and tier-up.
re_g = /\w/g;
CheckRegexpNotYetCompiled(re_g);
subject.replace(re_g, "x");
assertTrue(!%RegexpHasBytecode(re_g, kLatin1) &&
%RegexpHasNativeCode(re_g, kLatin1));
assertTrue(!%RegexpHasBytecode(re_g, kUnicode) &&
!%RegexpHasNativeCode(re_g, kUnicode));
// Testing String.replace method for global regexps with a function as a
// parameter. This will tier-up eagerly and compile to native code right
// away, even though the regexp is only executed once.
function f() { return "x"; }
re_g = /\w2/g;
CheckRegexpNotYetCompiled(re_g);
subject.replace(re_g, f);
assertTrue(!%RegexpHasBytecode(re_g, kLatin1) &&
%RegexpHasNativeCode(re_g, kLatin1));
assertTrue(!%RegexpHasBytecode(re_g, kUnicode) &&
!%RegexpHasNativeCode(re_g, kUnicode));
...@@ -2,9 +2,10 @@ ...@@ -2,9 +2,10 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
// Tier-up behavior differs between slow and fast paths in functional // Tier-up behavior differs between slow and fast paths in
// RegExp.prototype.replace. // RegExp.prototype.replace with a function as an argument.
// Flags: --regexp-tier-up --allow-natives-syntax --no-force-slow-path // Flags: --regexp-tier-up --regexp-tier-up-ticks=1
// Flags: --allow-natives-syntax --no-force-slow-path --no-regexp-interpret-all
const kLatin1 = true; const kLatin1 = true;
const kUnicode = false; const kUnicode = false;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment