1
1
mirror of https://github.com/rui314/mold.git synced 2024-09-11 21:17:28 +03:00

Move glob pattern matchers out of elf directory

Because we want to use them in macho too.
This commit is contained in:
Rui Ueyama 2022-06-05 17:14:14 +08:00
parent c786f55370
commit d2d73cbcbc
6 changed files with 95 additions and 87 deletions

View File

@ -544,7 +544,7 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
} else if (read_arg("sysroot")) {
ctx.arg.sysroot = arg;
} else if (read_arg("unique")) {
std::optional<GlobPattern> pat = GlobPattern::compile(arg);
std::optional<Glob> pat = Glob::compile(arg);
if (!pat)
Fatal(ctx) << "-unique: invalid glob pattern: " << arg;
ctx.arg.unique = std::move(*pat);

View File

@ -1233,60 +1233,6 @@ void parse_version_script(Context<E> &ctx, std::string path);
template <typename E>
void parse_dynamic_list(Context<E> &ctx, std::string path);
//
// glob.cc
//
class GlobPattern {
typedef enum { STRING, STAR, QUESTION, BRACKET } Kind;
struct Element {
Element(Kind k) : kind(k) {}
Kind kind;
std::string str;
std::bitset<256> bitset;
};
public:
static std::optional<GlobPattern> compile(std::string_view pat);
bool match(std::string_view str);
private:
GlobPattern(std::vector<Element> &&vec) : elements(vec) {}
static bool do_match(std::string_view str, std::span<Element> elements);
std::vector<Element> elements;
};
//
// version-matcher.cc
//
class VersionMatcher {
public:
bool add(std::string_view pat, u16 ver);
bool empty() const { return !root && globs.empty(); }
std::optional<u16> find(std::string_view str);
private:
struct TrieNode {
u32 value = -1;
TrieNode *suffix_link = nullptr;
std::unique_ptr<TrieNode> children[256];
};
void compile();
void fix_suffix_links(TrieNode &node);
void fix_values();
std::vector<std::string> strings;
std::unique_ptr<TrieNode> root;
std::vector<std::pair<GlobPattern, u32>> globs;
std::vector<u16> versions;
std::once_flag once_flag;
bool compiled = false;
};
//
// lto.cc
//
@ -1598,7 +1544,7 @@ struct Context {
i64 print_dependencies = 0;
i64 spare_dynamic_tags = 5;
i64 thread_count = 0;
std::optional<GlobPattern> unique;
std::optional<Glob> unique;
std::optional<u64> shuffle_sections_seed;
std::string Map;
std::string chroot;

View File

@ -1064,8 +1064,8 @@ void apply_version_script(Context<E> &ctx) {
}
// Otherwise, use glob pattern matchers.
VersionMatcher matcher;
VersionMatcher cpp_matcher;
MultiGlob matcher;
MultiGlob cpp_matcher;
for (VersionPattern &v : ctx.version_patterns) {
if (v.is_cpp) {
@ -1077,6 +1077,9 @@ void apply_version_script(Context<E> &ctx) {
}
}
matcher.compile();
cpp_matcher.compile();
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
for (Symbol<E> *sym : file->get_global_syms()) {
if (sym->file != file)

View File

@ -2,9 +2,9 @@
#include <cstring>
namespace mold::elf {
namespace mold {
std::optional<GlobPattern> GlobPattern::compile(std::string_view pat) {
std::optional<Glob> Glob::compile(std::string_view pat) {
std::vector<Element> vec;
while (!pat.empty()) {
@ -87,14 +87,14 @@ std::optional<GlobPattern> GlobPattern::compile(std::string_view pat) {
}
}
return {GlobPattern{std::move(vec)}};
return {Glob{std::move(vec)}};
}
bool GlobPattern::match(std::string_view str) {
bool Glob::match(std::string_view str) {
return do_match(str, elements);
}
bool GlobPattern::do_match(std::string_view str, std::span<Element> elements) {
bool Glob::do_match(std::string_view str, std::span<Element> elements) {
while (!elements.empty()) {
Element &e = elements[0];
elements = elements.subspan(1);
@ -146,4 +146,4 @@ bool GlobPattern::do_match(std::string_view str, std::span<Element> elements) {
return str.empty();
}
} // namespace mold::elf
} // namespace mold

55
mold.h
View File

@ -5,6 +5,7 @@
#include <array>
#include <atomic>
#include <bit>
#include <bitset>
#include <cassert>
#include <cstdio>
#include <cstring>
@ -12,6 +13,7 @@
#include <filesystem>
#include <iostream>
#include <mutex>
#include <optional>
#include <span>
#include <sstream>
#include <string>
@ -465,6 +467,59 @@ private:
std::vector<std::atomic_uint8_t> buckets;
};
//
// glob.cc
//
class Glob {
typedef enum { STRING, STAR, QUESTION, BRACKET } Kind;
struct Element {
Element(Kind k) : kind(k) {}
Kind kind;
std::string str;
std::bitset<256> bitset;
};
public:
static std::optional<Glob> compile(std::string_view pat);
bool match(std::string_view str);
private:
Glob(std::vector<Element> &&vec) : elements(vec) {}
static bool do_match(std::string_view str, std::span<Element> elements);
std::vector<Element> elements;
};
//
// multi-glob.cc
//
class MultiGlob {
public:
bool add(std::string_view pat, u32 val);
void compile();
bool empty() const { return strings.empty(); }
std::optional<u32> find(std::string_view str);
private:
struct TrieNode {
u32 value = -1;
TrieNode *suffix_link = nullptr;
std::unique_ptr<TrieNode> children[256];
};
void fix_suffix_links(TrieNode &node);
void fix_values();
std::vector<std::string> strings;
std::unique_ptr<TrieNode> root;
std::vector<std::pair<Glob, u32>> globs;
std::vector<u32> values;
bool compiled = false;
};
//
// uuid.cc
//

View File

@ -1,34 +1,34 @@
// This file implements the Aho-Corasick algorithm to match version
// script patterns to symbol strings as quickly as possible.
// This file implements the Aho-Corasick algorithm to match multiple
// glob patterns to symbol strings as quickly as possible.
//
// Here are some examples of version script patterns:
// Here are some examples of glob patterns:
//
// qt_private_api_tag*
// *16QAccessibleCache*
// *32QAbstractFileIconProviderPrivate*
// *17QPixmapIconEngine*
//
// The pattern is a glob pattern, so `*` is a wildcard that matches
// any substring. We sometimes have hundreds of version script
// patterns and have to match them against millions of symbol strings.
// `*` is a wildcard that matches any substring. We sometimes have
// hundreds of glob patterns and have to match them against millions
// of symbol strings.
//
// Aho-Corasick cannot handle complex patterns such as `*foo*bar*`.
// We handle such patterns with GlobPattern. GlobPattern is relatively
// slow, but complex patterns are rare in practice, so it should be OK.
// We handle such patterns with the Glob class. Glob is relatively
// slow, but complex patterns are rare in practice, so it should be
// OK.
#include "mold.h"
#include <queue>
#include <regex>
namespace mold::elf {
namespace mold {
std::optional<u16> VersionMatcher::find(std::string_view str) {
std::optional<u32> MultiGlob::find(std::string_view str) {
assert(compiled);
u32 idx = UINT32_MAX;
if (root) {
std::call_once(once_flag, [&] { compile(); });
// Match against simple glob patterns
TrieNode *node = root.get();
@ -53,13 +53,13 @@ std::optional<u16> VersionMatcher::find(std::string_view str) {
}
// Match against complex glob patterns
for (std::pair<GlobPattern, u32> &glob : globs)
for (std::pair<Glob, u32> &glob : globs)
if (glob.first.match(str))
idx = std::min(idx, glob.second);
if (idx == UINT32_MAX)
return {};
return versions[idx];
return values[idx];
}
static bool is_simple_pattern(std::string_view pat) {
@ -82,17 +82,17 @@ static std::string handle_stars(std::string_view pat) {
return "\0"s + str + "\0"s;
}
bool VersionMatcher::add(std::string_view pat, u16 ver) {
bool MultiGlob::add(std::string_view pat, u32 val) {
assert(!compiled);
assert(!pat.empty());
u32 idx = strings.size();
strings.push_back(std::string(pat));
versions.push_back(ver);
values.push_back(val);
// Complex glob pattern
if (!is_simple_pattern(pat)) {
if (std::optional<GlobPattern> glob = GlobPattern::compile(pat)) {
if (std::optional<Glob> glob = Glob::compile(pat)) {
globs.push_back({std::move(*glob), idx});
return true;
}
@ -114,13 +114,17 @@ bool VersionMatcher::add(std::string_view pat, u16 ver) {
return true;
}
void VersionMatcher::compile() {
fix_suffix_links(*root);
fix_values();
void MultiGlob::compile() {
assert(!compiled);
compiled = true;
if (root) {
fix_suffix_links(*root);
fix_values();
}
}
void VersionMatcher::fix_suffix_links(TrieNode &node) {
void MultiGlob::fix_suffix_links(TrieNode &node) {
for (i64 i = 0; i < 256; i++) {
if (!node.children[i])
continue;
@ -146,7 +150,7 @@ void VersionMatcher::fix_suffix_links(TrieNode &node) {
}
}
void VersionMatcher::fix_values() {
void MultiGlob::fix_values() {
std::queue<TrieNode *> queue;
queue.push(root.get());
@ -163,4 +167,4 @@ void VersionMatcher::fix_values() {
} while (!queue.empty());
}
} // namespace mold::elf
} // namespace mold