1
1
mirror of https://github.com/rui314/mold.git synced 2024-11-13 09:39:13 +03:00
mold/macho/yaml.cc
2021-10-16 19:59:21 +09:00

243 lines
5.7 KiB
C++

#include "mold.h"
namespace mold::macho {
struct Token {
enum { STRING = 1, LABEL, INDENT, DEDENT, RESET, END };
u8 kind = 0;
std::string_view str;
};
class YamlParser {
public:
YamlParser(std::string_view input) : input(input) {}
YamlNode parse(Context &ctx);
private:
std::vector<Token> tokenize(Context &ctx);
i64 get_indent(std::string_view str);
std::string_view
tokenize_bare_string(Context &ctx, std::vector<Token> &tokens,
std::string_view str);
std::string_view
tokenize_list(Context &ctx, std::vector<Token> &tokens, std::string_view str);
std::string_view
tokenize_string(Context &ctx, std::vector<Token> &tokens,
std::string_view str, char end);
std::string_view input;
};
std::vector<Token> YamlParser::tokenize(Context &ctx) {
std::vector<Token> tokens;
std::vector<i64> indents;
std::string_view str = input;
while (!str.empty()) {
if (str.starts_with("---")) {
tokens.push_back({Token::RESET, str.substr(0, 3)});
str = str.substr(str.find('\n'));
continue;
}
if (str.starts_with("- ")) {
tokens.push_back({'-', str.substr(0, 1)});
str = str.substr(str.substr(1).find_first_not_of(' '));
if (!str.starts_with("\n"))
indents.push_back(get_indent(str));
continue;
}
if (str.starts_with('[')) {
str = tokenize_list(ctx, tokens, str);
continue;
}
if (str.starts_with('\'')) {
str = tokenize_string(ctx, tokens, str, '\'');
continue;
}
if (str.starts_with('"')) {
str = tokenize_string(ctx, tokens, str, '"');
continue;
}
if (str.starts_with(',')) {
tokens.push_back({',', str.substr(0, 1)});
str = str.substr(1);
continue;
}
if (str.starts_with('#')) {
str = str.substr(str.find('\n'));
continue;
}
if (str.starts_with(' ')) {
size_t pos = str.find_first_not_of(' ');
if (str[pos] == '#') {
str = str.substr(str.find('\n'));
continue;
}
if (str[pos] == '\n') {
str = str.substr(pos + 1);
continue;
}
if (indents.empty() || indents.back() < pos) {
tokens.push_back({Token::INDENT, str.substr(0, pos)});
str = str.substr(pos);
continue;
}
if (indents.back() == pos) {
str = str.substr(pos);
continue;
}
assert(indents.back() > pos);
indents.pop_back();
tokens.push_back({Token::DEDENT, str.substr(0, pos)});
str = str.substr(pos);
while (!indents.empty()) {
if (indents.back() == pos)
break;
if (indents.back() < pos)
Fatal(ctx) << "bad indentation";
tokens.push_back(tokens.back());
}
continue;
}
str = tokenize_bare_string(ctx, tokens, str);
}
tokens.push_back({Token::END, str});
return tokens;
}
i64 YamlParser::get_indent(std::string_view str) {
u8 *p = (u8 *)str.data();
u8 *begin = (u8 *)input.data();
assert(begin <= p && p < begin + input.size());
std::string_view s = input.substr(0, p - begin);
size_t pos = s.rfind('\n');
if (pos == s.npos)
return s.size();
return s.size() - pos;
}
std::string_view
YamlParser::tokenize_list(Context &ctx, std::vector<Token> &tokens,
std::string_view str) {
tokens.push_back({'[', str.substr(0, 1)});
str = str.substr(1);
while (!str.empty() && str[0] != ']') {
if (str[0] == ' ' || str[0] == '\n') {
str = str.substr( str.find_first_not_of(" \n"));
continue;
}
if (str.starts_with('\'')) {
str = tokenize_string(ctx, tokens, str, '\'');
continue;
}
if (str.starts_with('"')) {
str = tokenize_string(ctx, tokens, str, '"');
continue;
}
if (str.starts_with(',')) {
tokens.push_back({',', str.substr(0, 1)});
str = str.substr(1);
continue;
}
}
if (str.empty())
Error(ctx) << "unclosed list";
return str.substr(1);
}
std::string_view
YamlParser::tokenize_string(Context &ctx, std::vector<Token> &tokens,
std::string_view str, char end) {
size_t pos = str.substr(1).find(end);
if (pos == str.npos)
Fatal(ctx) << "unterminated string literal";
tokens.push_back({Token::STRING, str.substr(1, pos)});
return str.substr(pos);
}
std::string_view
YamlParser::tokenize_bare_string(Context &ctx, std::vector<Token> &tokens,
std::string_view str) {
size_t pos = str.find_first_not_of(
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-/.");
tokens.push_back({Token::STRING, str.substr(0, pos)});
return str.substr(pos);
}
YamlNode YamlParser::parse(Context &ctx) {
std::vector<Token> tokens = tokenize(ctx);
for (Token &tok : tokens) {
switch (tok.kind) {
case Token::STRING:
SyncOut(ctx) << "STRING " << tok.str;
break;
case Token::LABEL:
SyncOut(ctx) << "LABEL " << tok.str;
break;
case Token::INDENT:
SyncOut(ctx) << "INDENT " << tok.str;
break;
case Token::DEDENT:
SyncOut(ctx) << "DEDENT " << tok.str;
break;
case Token::RESET:
SyncOut(ctx) << "RESET " << tok.str;
break;
case Token::END:
SyncOut(ctx) << "END " << tok.str;
break;
case '[':
SyncOut(ctx) << "'[' " << tok.str;
break;
case ']':
SyncOut(ctx) << "']' " << tok.str;
break;
case ',':
SyncOut(ctx) << "',' " << tok.str;
break;
case '-':
SyncOut(ctx) << "'-' " << tok.str;
break;
}
}
return {"foo"};
}
YamlNode parse_yaml(Context &ctx, std::string_view str) {
assert(!str.empty());
assert(str[str.size() - 1] == '\n');
return YamlParser(str).parse(ctx);
}
} // namespace mold::macho