mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-11 09:18:05 +03:00
0f0ac37b56
If the sticky flag is set, the regex execution loop should break immediately even if the execution was a failure. The specification for several RegExp.prototype methods (e.g. exec and @@split) rely on this behavior.
281 lines
10 KiB
C++
281 lines
10 KiB
C++
/*
|
|
* Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "RegexByteCode.h"
|
|
#include "RegexMatch.h"
|
|
#include "RegexOptions.h"
|
|
#include "RegexParser.h"
|
|
|
|
#include <AK/Forward.h>
|
|
#include <AK/HashMap.h>
|
|
#include <AK/NonnullOwnPtrVector.h>
|
|
#include <AK/Types.h>
|
|
#include <AK/Utf32View.h>
|
|
#include <AK/Vector.h>
|
|
#include <ctype.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
namespace regex {
|
|
|
|
static const constexpr size_t c_max_recursion = 5000;
|
|
static const constexpr size_t c_match_preallocation_count = 0;
|
|
|
|
struct RegexResult final {
|
|
bool success { false };
|
|
size_t count { 0 };
|
|
Vector<Match> matches;
|
|
Vector<Vector<Match>> capture_group_matches;
|
|
Vector<HashMap<String, Match>> named_capture_group_matches;
|
|
size_t n_operations { 0 };
|
|
size_t n_capture_groups { 0 };
|
|
size_t n_named_capture_groups { 0 };
|
|
};
|
|
|
|
template<class Parser>
|
|
class Regex;
|
|
|
|
template<class Parser>
|
|
class Matcher final {
|
|
|
|
public:
|
|
Matcher(const Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
: m_pattern(pattern)
|
|
, m_regex_options(regex_options.value_or({}))
|
|
{
|
|
}
|
|
~Matcher() = default;
|
|
|
|
RegexResult match(const RegexStringView&, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
|
|
RegexResult match(const Vector<RegexStringView>, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
|
|
|
|
typename ParserTraits<Parser>::OptionsType options() const
|
|
{
|
|
return m_regex_options;
|
|
}
|
|
|
|
private:
|
|
Optional<bool> execute(const MatchInput& input, MatchState& state, MatchOutput& output, size_t recursion_level) const;
|
|
ALWAYS_INLINE Optional<bool> execute_low_prio_forks(const MatchInput& input, MatchState& original_state, MatchOutput& output, Vector<MatchState> states, size_t recursion_level) const;
|
|
|
|
const Regex<Parser>& m_pattern;
|
|
const typename ParserTraits<Parser>::OptionsType m_regex_options;
|
|
};
|
|
|
|
template<class Parser>
|
|
class Regex final {
|
|
public:
|
|
String pattern_value;
|
|
regex::Parser::Result parser_result;
|
|
OwnPtr<Matcher<Parser>> matcher { nullptr };
|
|
mutable size_t start_offset { 0 };
|
|
|
|
explicit Regex(StringView pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
|
|
~Regex() = default;
|
|
Regex(Regex&&) = default;
|
|
Regex& operator=(Regex&&) = default;
|
|
|
|
typename ParserTraits<Parser>::OptionsType options() const;
|
|
void print_bytecode(FILE* f = stdout) const;
|
|
String error_string(Optional<String> message = {}) const;
|
|
|
|
RegexResult match(const RegexStringView view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
return matcher->match(view, regex_options);
|
|
}
|
|
|
|
RegexResult match(const Vector<RegexStringView> views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
return matcher->match(views, regex_options);
|
|
}
|
|
|
|
String replace(const RegexStringView view, const StringView& replacement_pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
|
|
StringBuilder builder;
|
|
size_t start_offset = 0;
|
|
RegexResult result = matcher->match(view, regex_options);
|
|
if (!result.success)
|
|
return view.to_string();
|
|
|
|
for (size_t i = 0; i < result.matches.size(); ++i) {
|
|
auto& match = result.matches[i];
|
|
builder.append(view.substring_view(start_offset, match.global_offset - start_offset).to_string());
|
|
start_offset = match.global_offset + match.view.length();
|
|
GenericLexer lexer(replacement_pattern);
|
|
while (!lexer.is_eof()) {
|
|
if (lexer.consume_specific('\\')) {
|
|
if (lexer.consume_specific('\\')) {
|
|
builder.append('\\');
|
|
continue;
|
|
}
|
|
auto number = lexer.consume_while(isdigit);
|
|
if (auto index = number.to_uint(); index.has_value() && result.n_capture_groups >= index.value()) {
|
|
builder.append(result.capture_group_matches[i][index.value() - 1].view.to_string());
|
|
} else {
|
|
builder.appendff("\\{}", number);
|
|
}
|
|
} else {
|
|
builder.append(lexer.consume_while([](auto ch) { return ch != '\\'; }));
|
|
}
|
|
}
|
|
}
|
|
|
|
builder.append(view.substring_view(start_offset, view.length() - start_offset).to_string());
|
|
|
|
return builder.to_string();
|
|
}
|
|
|
|
// FIXME: replace(const Vector<RegexStringView>, ...)
|
|
|
|
RegexResult search(const RegexStringView view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
|
|
AllOptions options = (AllOptions)regex_options.value_or({});
|
|
if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
|
|
options.reset_flag(AllFlags::MatchNotEndOfLine);
|
|
options.reset_flag(AllFlags::MatchNotBeginOfLine);
|
|
}
|
|
options.reset_flag(AllFlags::Internal_Stateful);
|
|
options |= AllFlags::Global;
|
|
|
|
return matcher->match(view, options);
|
|
}
|
|
|
|
RegexResult search(const Vector<RegexStringView> views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
|
|
AllOptions options = (AllOptions)regex_options.value_or({});
|
|
if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
|
|
options.reset_flag(AllFlags::MatchNotEndOfLine);
|
|
options.reset_flag(AllFlags::MatchNotBeginOfLine);
|
|
}
|
|
options.reset_flag(AllFlags::Internal_Stateful);
|
|
options |= AllFlags::Global;
|
|
|
|
return matcher->match(views, options);
|
|
}
|
|
|
|
bool match(const RegexStringView view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = match(view, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool match(const Vector<RegexStringView> views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = match(views, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool search(const RegexStringView view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = search(view, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool search(const Vector<RegexStringView> views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = search(views, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool has_match(const RegexStringView view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return false;
|
|
RegexResult result = matcher->match(view, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
|
|
return result.success;
|
|
}
|
|
|
|
bool has_match(const Vector<RegexStringView> views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return false;
|
|
RegexResult result = matcher->match(views, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
|
|
return result.success;
|
|
}
|
|
};
|
|
|
|
// free standing functions for match, search and has_match
|
|
template<class Parser>
|
|
RegexResult match(const RegexStringView view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
RegexResult match(const Vector<RegexStringView> view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool match(const RegexStringView view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool match(const Vector<RegexStringView> view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
RegexResult search(const RegexStringView view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
RegexResult search(const Vector<RegexStringView> views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(views, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool search(const RegexStringView view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool search(const Vector<RegexStringView> views, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(views, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool has_match(const RegexStringView view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.has_match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool has_match(const Vector<RegexStringView> views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.has_match(views, regex_options);
|
|
}
|
|
}
|
|
|
|
using regex::has_match;
|
|
using regex::match;
|
|
using regex::Regex;
|
|
using regex::RegexResult;
|