// Copyright 2020 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "src/regexp/experimental/experimental.h" #include "src/objects/js-regexp-inl.h" #include "src/regexp/experimental/experimental-compiler.h" #include "src/regexp/experimental/experimental-interpreter.h" #include "src/regexp/regexp-parser.h" #include "src/utils/ostreams.h" namespace v8 { namespace internal { bool ExperimentalRegExp::CanBeHandled(RegExpTree* tree, JSRegExp::Flags flags, int capture_count, Zone* zone) { return ExperimentalRegExpCompiler::CanBeHandled(tree, flags, capture_count, zone); } void ExperimentalRegExp::Initialize(Isolate* isolate, Handle<JSRegExp> re, Handle<String> source, JSRegExp::Flags flags, int capture_count) { DCHECK(FLAG_enable_experimental_regexp_engine); if (FLAG_trace_experimental_regexp_engine) { StdoutStream{} << "Initializing experimental regexp " << *source << std::endl; } isolate->factory()->SetRegExpExperimentalData(re, source, flags, capture_count); } bool ExperimentalRegExp::IsCompiled(Handle<JSRegExp> re, Isolate* isolate) { DCHECK(FLAG_enable_experimental_regexp_engine); DCHECK_EQ(re->TypeTag(), JSRegExp::EXPERIMENTAL); #ifdef VERIFY_HEAP re->JSRegExpVerify(isolate); #endif return re->DataAt(JSRegExp::kIrregexpLatin1BytecodeIndex) != Smi::FromInt(JSRegExp::kUninitializedValue); } bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) { DCHECK_EQ(re->TypeTag(), JSRegExp::EXPERIMENTAL); #ifdef VERIFY_HEAP re->JSRegExpVerify(isolate); #endif Handle<String> source(re->Pattern(), isolate); if (FLAG_trace_experimental_regexp_engine) { StdoutStream{} << "Compiling experimental regexp " << *source << std::endl; } Zone zone(isolate->allocator(), ZONE_NAME); // Parse and compile the regexp source. RegExpCompileData parse_result; JSRegExp::Flags flags = re->GetFlags(); FlatStringReader reader(isolate, source); DCHECK(!isolate->has_pending_exception()); bool parse_success = RegExpParser::ParseRegExp(isolate, &zone, &reader, flags, &parse_result); if (!parse_success) { // The pattern was already parsed successfully during initialization, so // the only way parsing can fail now is because of stack overflow. CHECK_EQ(parse_result.error, RegExpError::kStackOverflow); USE(RegExp::ThrowRegExpException(isolate, re, source, parse_result.error)); return false; } ZoneList<RegExpInstruction> bytecode = ExperimentalRegExpCompiler::Compile(parse_result.tree, flags, &zone); int byte_length = sizeof(RegExpInstruction) * bytecode.length(); Handle<ByteArray> bytecode_byte_array = isolate->factory()->NewByteArray(byte_length); MemCopy(bytecode_byte_array->GetDataStartAddress(), bytecode.begin(), byte_length); re->SetDataAt(JSRegExp::kIrregexpLatin1BytecodeIndex, *bytecode_byte_array); re->SetDataAt(JSRegExp::kIrregexpUC16BytecodeIndex, *bytecode_byte_array); Handle<Code> trampoline = BUILTIN_CODE(isolate, RegExpExperimentalTrampoline); re->SetDataAt(JSRegExp::kIrregexpLatin1CodeIndex, *trampoline); re->SetDataAt(JSRegExp::kIrregexpUC16CodeIndex, *trampoline); re->SetCaptureNameMap(parse_result.capture_name_map); return true; } Vector<RegExpInstruction> AsInstructionSequence(ByteArray raw_bytes) { RegExpInstruction* inst_begin = reinterpret_cast<RegExpInstruction*>(raw_bytes.GetDataStartAddress()); int inst_num = raw_bytes.length() / sizeof(RegExpInstruction); DCHECK_EQ(sizeof(RegExpInstruction) * inst_num, raw_bytes.length()); return Vector<RegExpInstruction>(inst_begin, inst_num); } // Returns the number of matches. int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate, JSRegExp regexp, String subject, int32_t* output_registers, int32_t output_register_count, int32_t subject_index) { DisallowHeapAllocation no_gc; DCHECK(FLAG_enable_experimental_regexp_engine); if (FLAG_trace_experimental_regexp_engine) { String source = String::cast(regexp.DataAt(JSRegExp::kSourceIndex)); StdoutStream{} << "Executing experimental regexp " << source << std::endl; } Vector<RegExpInstruction> bytecode = AsInstructionSequence( ByteArray::cast(regexp.DataAt(JSRegExp::kIrregexpLatin1BytecodeIndex))); if (FLAG_print_regexp_bytecode) { StdoutStream{} << "Bytecode:" << std::endl; StdoutStream{} << bytecode << std::endl; } int register_count_per_match = JSRegExp::RegistersForCaptureCount(regexp.CaptureCount()); DCHECK(subject.IsFlat()); String::FlatContent subject_content = subject.GetFlatContent(no_gc); Zone zone(isolate->allocator(), ZONE_NAME); if (subject_content.IsOneByte()) { return ExperimentalRegExpInterpreter::FindMatchesNfaOneByte( bytecode, register_count_per_match, subject_content.ToOneByteVector(), subject_index, output_registers, output_register_count, &zone); } else { return ExperimentalRegExpInterpreter::FindMatchesNfaTwoByte( bytecode, register_count_per_match, subject_content.ToUC16Vector(), subject_index, output_registers, output_register_count, &zone); } } int32_t ExperimentalRegExp::MatchForCallFromJs( Address subject, int32_t start_position, Address input_start, Address input_end, int* output_registers, int32_t output_register_count, Address backtrack_stack, RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp) { DCHECK(FLAG_enable_experimental_regexp_engine); DCHECK_NOT_NULL(isolate); DCHECK_NOT_NULL(output_registers); DCHECK(call_origin == RegExp::CallOrigin::kFromJs); DisallowHeapAllocation no_gc; DisallowJavascriptExecution no_js(isolate); DisallowHandleAllocation no_handles; DisallowHandleDereference no_deref; String subject_string = String::cast(Object(subject)); JSRegExp regexp_obj = JSRegExp::cast(Object(regexp)); return ExecRaw(isolate, regexp_obj, subject_string, output_registers, output_register_count, start_position); } MaybeHandle<Object> ExperimentalRegExp::Exec( Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int subject_index, Handle<RegExpMatchInfo> last_match_info) { DCHECK(FLAG_enable_experimental_regexp_engine); DCHECK_EQ(regexp->TypeTag(), JSRegExp::EXPERIMENTAL); #ifdef VERIFY_HEAP regexp->JSRegExpVerify(isolate); #endif if (!IsCompiled(regexp, isolate) && !Compile(isolate, regexp)) { DCHECK(isolate->has_pending_exception()); return MaybeHandle<Object>(); } DCHECK(IsCompiled(regexp, isolate)); subject = String::Flatten(isolate, subject); int capture_count = regexp->CaptureCount(); int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count); int32_t* output_registers; std::unique_ptr<int32_t[]> output_registers_release; if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) { output_registers = isolate->jsregexp_static_offsets_vector(); } else { output_registers = NewArray<int32_t>(output_register_count); output_registers_release.reset(output_registers); } int num_matches = ExecRaw(isolate, *regexp, *subject, output_registers, output_register_count, subject_index); if (num_matches == 0) { return isolate->factory()->null_value(); } else { DCHECK_EQ(num_matches, 1); return RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count, output_registers); return last_match_info; } } } // namespace internal } // namespace v8