ladybird/Userland/Libraries/LibCMake/Lexer.cpp
2023-03-04 05:43:43 -07:00

372 lines
9.7 KiB
C++

/*
* Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "Lexer.h"
#include <AK/CharacterTypes.h>
#include <AK/Debug.h>
#include <AK/Format.h>
#include <AK/ScopeLogger.h>
namespace CMake {
static bool is_valid_identifier_initial_char(char c)
{
return is_ascii_alpha(c) || c == '_';
}
static bool is_valid_identifier_char(char c)
{
return is_ascii_alphanumeric(c) || c == '_';
}
ErrorOr<Vector<Token>> Lexer::lex(StringView input)
{
Lexer lexer { input };
return lexer.lex_file();
}
Lexer::Lexer(StringView input)
: GenericLexer(input)
{
}
ErrorOr<Vector<Token>> Lexer::lex_file()
{
m_tokens.clear_with_capacity();
while (!is_eof()) {
consume_whitespace_or_comments();
if (is_eof())
break;
if (is_valid_identifier_initial_char(peek())) {
consume_command_invocation();
} else {
consume_garbage();
}
}
return m_tokens;
}
void Lexer::skip_whitespace()
{
while (!is_eof()) {
if (next_is('\n')) {
next_line();
continue;
}
auto consumed = consume_while([&](char c) {
return c == ' ' || c == '\t';
});
if (consumed.is_empty())
break;
}
}
void Lexer::consume_whitespace_or_comments()
{
ScopeLogger<CMAKE_DEBUG> log;
while (!is_eof()) {
skip_whitespace();
if (next_is('#')) {
consume_comment();
} else {
break;
}
}
}
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#command-invocations
void Lexer::consume_command_invocation()
{
ScopeLogger<CMAKE_DEBUG> log;
auto identifier_start = position();
auto identifier = consume_while(is_valid_identifier_char);
auto control_keyword = control_keyword_from_string(identifier);
if (control_keyword.has_value()) {
emit_token(Token::Type::ControlKeyword, identifier, identifier_start, position(), control_keyword.release_value());
} else {
emit_token(Token::Type::Identifier, identifier, identifier_start, position());
}
consume_whitespace_or_comments();
if (next_is('('))
consume_open_paren();
consume_arguments();
if (next_is(')'))
consume_close_paren();
}
void Lexer::consume_arguments()
{
ScopeLogger<CMAKE_DEBUG> log;
while (!is_eof()) {
consume_whitespace_or_comments();
if (next_is('(')) {
consume_open_paren();
consume_whitespace_or_comments();
consume_arguments();
consume_whitespace_or_comments();
if (next_is(')'))
consume_close_paren();
continue;
}
if (next_is(')'))
return;
consume_argument();
}
}
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#command-arguments
void Lexer::consume_argument()
{
ScopeLogger<CMAKE_DEBUG> log;
consume_whitespace_or_comments();
if (next_is('[')) {
consume_bracket_argument();
return;
}
if (next_is('"')) {
consume_quoted_argument();
return;
}
consume_unquoted_argument();
}
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#bracket-argument
void Lexer::consume_bracket_argument()
{
ScopeLogger<CMAKE_DEBUG> log;
auto start = position();
auto value = read_bracket_argument();
emit_token(Token::Type::BracketArgument, value, start, position());
}
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#quoted-argument
void Lexer::consume_quoted_argument()
{
ScopeLogger<CMAKE_DEBUG> log;
auto start = position();
auto start_offset = tell();
VERIFY(consume_specific('"'));
while (!is_eof()) {
if (next_is('"')) {
ignore();
break;
}
if (next_is("\\\""sv)) {
ignore(2);
continue;
}
if (next_is('\n')) {
next_line();
continue;
}
ignore();
}
auto whole_token = m_input.substring_view(start_offset, tell() - start_offset);
auto value = whole_token.substring_view(1, whole_token.length() - 2);
auto variable_references = parse_variable_references_from_argument(whole_token, start);
emit_token(Token::Type::QuotedArgument, value, start, position(), {}, move(variable_references));
}
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#unquoted-argument
void Lexer::consume_unquoted_argument()
{
ScopeLogger<CMAKE_DEBUG> log;
auto start_offset = tell();
auto start = position();
while (!is_eof()) {
if (next_is('\\')) {
consume_escaped_character('\\');
continue;
}
auto consumed = consume_until([](char c) { return is_ascii_space(c) || "()#\"\\'"sv.contains(c); });
if (consumed.is_empty())
break;
// FIXME: `unquoted_legacy`
}
auto value = m_input.substring_view(start_offset, tell() - start_offset);
auto variable_references = parse_variable_references_from_argument(value, start);
emit_token(Token::Type::UnquotedArgument, value, start, position(), {}, move(variable_references));
}
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#comments
void Lexer::consume_comment()
{
ScopeLogger<CMAKE_DEBUG> log;
auto start = position();
VERIFY(consume_specific('#'));
if (next_is('[')) {
// Bracket comment
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#bracket-comment
auto comment = read_bracket_argument();
emit_token(Token::Type::BracketComment, comment, start, position());
return;
}
// Line comment
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#line-comment
auto comment = consume_until('\n');
emit_token(Token::Type::LineComment, comment, start, position());
}
void Lexer::consume_open_paren()
{
auto start = position();
VERIFY(consume_specific('('));
emit_token(Token::Type::OpenParen, "("sv, start, position());
}
void Lexer::consume_close_paren()
{
auto start = position();
VERIFY(consume_specific(')'));
emit_token(Token::Type::CloseParen, ")"sv, start, position());
}
void Lexer::consume_garbage()
{
ScopeLogger<CMAKE_DEBUG> log;
auto start = position();
auto contents = consume_until(is_ascii_space);
if (!contents.is_empty())
emit_token(Token::Type::Garbage, contents, start, position());
}
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#bracket-argument
// Used by both bracket arguments and bracket comments.
StringView Lexer::read_bracket_argument()
{
VERIFY(consume_specific('['));
auto leading_equals_signs = consume_while([](char c) { return c == '='; });
consume_specific('[');
auto start = tell();
auto end = start;
while (!is_eof()) {
// Read everything until we see `]={len}]`.
ignore_until(']');
end = tell();
ignore();
if (next_is(leading_equals_signs))
ignore(leading_equals_signs.length());
if (consume_specific(']'))
break;
}
return m_input.substring_view(start, end - start);
}
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#variable-references
Vector<VariableReference> Lexer::parse_variable_references_from_argument(StringView argument_value, Position argument_start)
{
auto position = argument_start;
GenericLexer lexer { argument_value };
Vector<VariableReference> variable_references;
while (!lexer.is_eof()) {
if (lexer.next_is('\n')) {
lexer.ignore();
position.column = 0;
position.line++;
continue;
}
if (lexer.next_is('\\')) {
lexer.ignore();
if (lexer.next_is('\n')) {
lexer.ignore();
position.column = 0;
position.line++;
continue;
}
lexer.ignore();
position.column += 2;
}
if (lexer.next_is('$')) {
auto start = position;
lexer.ignore();
position.column++;
if (lexer.next_is("ENV{"sv)) {
lexer.ignore(4);
position.column += 4;
} else if (lexer.next_is('{')) {
lexer.ignore();
position.column++;
} else {
auto skipped = lexer.consume_until(is_any_of("$ \n"sv));
position.column += skipped.length();
continue;
}
auto variable_name = lexer.consume_until(is_any_of("} \n"sv));
position.column += variable_name.length();
if (lexer.next_is('}')) {
lexer.ignore();
position.column++;
variable_references.empend(variable_name, start, position);
}
continue;
}
lexer.ignore();
position.column++;
}
return variable_references;
}
Position Lexer::position() const
{
return Position {
.line = m_line,
.column = tell() - m_string_offset_after_previous_newline,
};
}
void Lexer::next_line()
{
VERIFY(consume_specific('\n'));
m_string_offset_after_previous_newline = tell();
m_line++;
}
void Lexer::emit_token(Token::Type type, StringView value, Position start, Position end, Optional<ControlKeywordType> control_keyword, Vector<VariableReference> variable_references)
{
dbgln_if(CMAKE_DEBUG, "Emitting {} token: `{}` ({}:{} to {}:{})", to_string(type), value, start.line, start.column, end.line, end.column);
m_tokens.empend(type, value, start, end, move(control_keyword), move(variable_references));
}
}