Commit a2b17a72 authored by Iain Ireland's avatar Iain Ireland Committed by Commit Bot

[regexp] Upstream small changes

This is a grab-bag of small compatibility fixes to make it easier to
import irregexp into SpiderMonkey. For changes where the commit
message was longer than the change itself, it didn't seem worth
opening a separate review.

[regexp] Use uc16 in FilterOneByte

SpiderMonkey uses char16_t instead of uint16_t for its two-byte
strings. (This matches ICU. It looks like V8 considered making the
same change, but decided against it: see
https://bugs.chromium.org/p/v8/issues/detail?id=6487.) Fortunately,
irregexp is careful about only using uc16, so SpiderMonkey can just
define uc16 = char16_t and *almost* everything works out. This patch
fixes the single place in irregexp where that is not true.

[regexp] Remove unreachable return

The return statement at the end of
RegExpParser::ParseClassCharacterEscape is unreachable, because every
branch of the switch returns. This triggered static analysis errors in
SpiderMonkey.

[regexp] Remove trivial assertion

The assertion in BytecodeSequenceNode::ArgumentMapping cannot fail,
because size_t is an unsigned type. This triggered static analysis
warnings in SpiderMonkey.

[regexp] Make RegExpStack constructor public

In V8, the RegExpStack's private constructor is called from Isolate,
which is a friend class. In SpiderMonkey, we use a wrapper around new
to control where memory is allocated, so we need the RegExpStack
constructor to be visible outside of Isolate.

[regexp] Refactor Isolate::IncreaseTotalRegexpCodeGenerated

The call-site of Isolate::IncreaseTotalRegexpCodeGenerated is the only
place inside irregexp where HeapObject::Size is called. SpiderMonkey's
heap-allocated objects live in arenas, and don't have a standardized
way of finding the size. In this particular case it would be safe to
hardcode a size of 0, but leaving HeapObject::Size undefined will
ensure that SpiderMonkey doesn't silently do the wrong thing if
somebody in V8 adds a new, more meaningful call to HeapObject::Size.

R=jgruber@chromium.org

Bug: v8:10303
Change-Id: I5b81e1a261fec8c85a63f71f34cd12d68f638334
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2090191
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66676}
parent e35b048c
......@@ -3705,6 +3705,11 @@ bool Isolate::use_optimizer() {
!is_precise_count_code_coverage();
}
void Isolate::IncreaseTotalRegexpCodeGenerated(Handle<HeapObject> code) {
DCHECK(code->IsCode() || code->IsByteArray());
total_regexp_code_generated_ += code->Size();
}
bool Isolate::NeedsDetailedOptimizedCodeLineInfo() const {
return NeedsSourcePositionsForProfiling() ||
detailed_source_positions_for_profiling();
......
......@@ -1040,9 +1040,7 @@ class V8_EXPORT_PRIVATE Isolate final : private HiddenFactory {
RegExpStack* regexp_stack() { return regexp_stack_; }
size_t total_regexp_code_generated() { return total_regexp_code_generated_; }
void IncreaseTotalRegexpCodeGenerated(int size) {
total_regexp_code_generated_ += size;
}
void IncreaseTotalRegexpCodeGenerated(Handle<HeapObject> code);
std::vector<int>* regexp_indices() { return &regexp_indices_; }
......
......@@ -436,7 +436,6 @@ BytecodeArgumentMapping BytecodeSequenceNode::ArgumentMapping(
size_t index) const {
DCHECK(IsSequence());
DCHECK(argument_mapping_ != nullptr);
DCHECK_GE(index, 0);
DCHECK_LT(index, argument_mapping_->size());
return argument_mapping_->at(index);
......
......@@ -272,7 +272,7 @@ RegExpCompiler::CompilationResult RegExpCompiler::Assemble(
}
Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
isolate->IncreaseTotalRegexpCodeGenerated(code->Size());
isolate->IncreaseTotalRegexpCodeGenerated(code);
work_list_ = nullptr;
#ifdef DEBUG
......@@ -1849,13 +1849,13 @@ RegExpNode* TextNode::FilterOneByte(int depth) {
if (elm.text_type() == TextElement::ATOM) {
Vector<const uc16> quarks = elm.atom()->data();
for (int j = 0; j < quarks.length(); j++) {
uint16_t c = quarks[j];
uc16 c = quarks[j];
if (elm.atom()->ignore_case()) {
c = unibrow::Latin1::TryConvertToLatin1(c);
}
if (c > unibrow::Latin1::kMaxChar) return set_replacement(nullptr);
// Replace quark in case we converted to Latin-1.
uint16_t* writable_quarks = const_cast<uint16_t*>(quarks.begin());
uc16* writable_quarks = const_cast<uc16*>(quarks.begin());
writable_quarks[j] = c;
}
} else {
......
......@@ -1681,7 +1681,7 @@ uc32 RegExpParser::ParseClassCharacterEscape() {
return 0;
}
}
return 0;
UNREACHABLE();
}
void RegExpParser::ParseClassEscape(ZoneList<CharacterRange>* ranges,
......
......@@ -38,6 +38,9 @@ class RegExpStackScope {
class RegExpStack {
public:
RegExpStack();
~RegExpStack();
// Number of allocated locations on the stack below the limit.
// No sequence of pushes must be longer that this without doing a stack-limit
// check.
......@@ -77,9 +80,6 @@ class RegExpStack {
static constexpr size_t kMaximumStackSize = 64 * MB;
private:
RegExpStack();
~RegExpStack();
// Artificial limit used when the thread-local state has been destroyed.
static const Address kMemoryTop =
static_cast<Address>(static_cast<uintptr_t>(-1));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment