ladybird/Userland/Shell/PosixLexer.h
Ali Mohammad Pur 7b031138fc Shell: Correctly handle commands after heredoc contents
Previously we did not emit a newline after the ending heredoc key, which
wreaked havoc on the parser logic, leading to parse errors.
2023-05-05 09:35:24 +03:30

444 lines
12 KiB
C++

/*
* Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/GenericLexer.h>
#include <AK/Queue.h>
#include <AK/String.h>
#include <AK/Variant.h>
#include <AK/Vector.h>
#include <Shell/AST.h>
namespace Shell::Posix {
enum class Reduction {
None,
End,
Operator,
Comment,
SingleQuotedString,
DoubleQuotedString,
Expansion,
CommandExpansion,
Start,
ArithmeticExpansion,
SpecialParameterExpansion,
ParameterExpansion,
CommandOrArithmeticSubstitutionExpansion,
ExtendedParameterExpansion,
// Separate rule, not used by the main flow.
HeredocContents,
};
struct ExpansionRange {
size_t start;
size_t length;
};
struct ParameterExpansion {
StringBuilder parameter;
ExpansionRange range;
};
struct CommandExpansion {
StringBuilder command;
ExpansionRange range;
};
struct ArithmeticExpansion {
String expression;
StringBuilder value;
ExpansionRange range;
};
using Expansion = Variant<ParameterExpansion, CommandExpansion, ArithmeticExpansion>;
struct ResolvedParameterExpansion {
String parameter;
String argument;
ExpansionRange range;
enum class Op {
UseDefaultValue, // ${parameter:-word}
AssignDefaultValue, // ${parameter:=word}
IndicateErrorIfEmpty, // ${parameter:?word}
UseAlternativeValue, // ${parameter:+word}
UseDefaultValueIfUnset, // ${parameter-default}
AssignDefaultValueIfUnset, // ${parameter=default}
IndicateErrorIfUnset, // ${parameter?default}
UseAlternativeValueIfUnset, // ${parameter+default}
RemoveLargestSuffixByPattern, // ${parameter%%pattern}
RemoveLargestPrefixByPattern, // ${parameter##pattern}
RemoveSmallestSuffixByPattern, // ${parameter%pattern}
RemoveSmallestPrefixByPattern, // ${parameter#pattern}
StringLength, // ${#parameter}
GetPositionalParameter, // ${parameter}
GetVariable, // ${parameter}
GetLastBackgroundPid, // $!
GetPositionalParameterList, // $*
GetCurrentOptionFlags, // $-
GetPositionalParameterCount, // $#
GetLastExitStatus, // $?
GetPositionalParameterListAsString, // $@
GetShellProcessId, // $$
} op;
enum class Expand {
Nothing,
Word,
} expand { Expand::Nothing };
DeprecatedString to_deprecated_string() const
{
StringBuilder builder;
builder.append("{"sv);
switch (op) {
case Op::UseDefaultValue:
builder.append("UseDefaultValue"sv);
break;
case Op::AssignDefaultValue:
builder.append("AssignDefaultValue"sv);
break;
case Op::IndicateErrorIfEmpty:
builder.append("IndicateErrorIfEmpty"sv);
break;
case Op::UseAlternativeValue:
builder.append("UseAlternativeValue"sv);
break;
case Op::UseDefaultValueIfUnset:
builder.append("UseDefaultValueIfUnset"sv);
break;
case Op::AssignDefaultValueIfUnset:
builder.append("AssignDefaultValueIfUnset"sv);
break;
case Op::IndicateErrorIfUnset:
builder.append("IndicateErrorIfUnset"sv);
break;
case Op::UseAlternativeValueIfUnset:
builder.append("UseAlternativeValueIfUnset"sv);
break;
case Op::RemoveLargestSuffixByPattern:
builder.append("RemoveLargestSuffixByPattern"sv);
break;
case Op::RemoveLargestPrefixByPattern:
builder.append("RemoveLargestPrefixByPattern"sv);
break;
case Op::RemoveSmallestSuffixByPattern:
builder.append("RemoveSmallestSuffixByPattern"sv);
break;
case Op::RemoveSmallestPrefixByPattern:
builder.append("RemoveSmallestPrefixByPattern"sv);
break;
case Op::StringLength:
builder.append("StringLength"sv);
break;
case Op::GetPositionalParameter:
builder.append("GetPositionalParameter"sv);
break;
case Op::GetLastBackgroundPid:
builder.append("GetLastBackgroundPid"sv);
break;
case Op::GetPositionalParameterList:
builder.append("GetPositionalParameterList"sv);
break;
case Op::GetCurrentOptionFlags:
builder.append("GetCurrentOptionFlags"sv);
break;
case Op::GetPositionalParameterCount:
builder.append("GetPositionalParameterCount"sv);
break;
case Op::GetLastExitStatus:
builder.append("GetLastExitStatus"sv);
break;
case Op::GetPositionalParameterListAsString:
builder.append("GetPositionalParameterListAsString"sv);
break;
case Op::GetShellProcessId:
builder.append("GetShellProcessId"sv);
break;
case Op::GetVariable:
builder.append("GetVariable"sv);
break;
}
builder.append(" "sv);
builder.append(parameter);
builder.append(" ("sv);
builder.append(argument);
builder.append(")"sv);
builder.append("}"sv);
return builder.to_deprecated_string();
}
};
struct ResolvedCommandExpansion {
RefPtr<AST::Node> command;
ExpansionRange range;
};
struct ResolvedArithmeticExpansion {
String source_expression;
ExpansionRange range;
};
using ResolvedExpansion = Variant<ResolvedParameterExpansion, ResolvedCommandExpansion, ResolvedArithmeticExpansion>;
struct HeredocEntry {
String key;
bool allow_interpolation;
bool dedent;
};
struct State {
StringBuilder buffer {};
Reduction previous_reduction { Reduction::Start };
bool escaping { false };
AST::Position position {
.start_offset = 0,
.end_offset = 0,
.start_line = {
.line_number = 0,
.line_column = 0,
},
.end_line = {
.line_number = 0,
.line_column = 0,
},
};
Vector<Expansion> expansions {};
Queue<HeredocEntry> heredoc_entries {};
bool on_new_line { true };
};
struct Token {
enum class Type {
Eof,
Newline,
Continuation,
Token,
And,
Pipe,
OpenParen,
CloseParen,
Great,
Less,
AndIf,
OrIf,
DoubleSemicolon,
DoubleLess,
DoubleGreat,
LessAnd,
GreatAnd,
LessGreat,
DoubleLessDash,
Clobber,
Semicolon,
HeredocContents,
// Not produced by this lexer, but generated in later stages.
AssignmentWord,
Bang,
Case,
CloseBrace,
Do,
Done,
Elif,
Else,
Esac,
Fi,
For,
If,
In,
IoNumber,
OpenBrace,
Then,
Until,
VariableName,
While,
Word,
};
Type type;
String value;
Optional<AST::Position> position;
Vector<Expansion> expansions;
Vector<ResolvedExpansion> resolved_expansions {};
StringView original_text;
Optional<String> relevant_heredoc_key {};
bool could_be_start_of_a_simple_command { false };
static ErrorOr<Vector<Token>> maybe_from_state(State const& state)
{
if (state.buffer.is_empty() || state.buffer.string_view().trim_whitespace().is_empty())
return Vector<Token> {};
auto token = Token {
.type = Type::Token,
.value = TRY(state.buffer.to_string()),
.position = state.position,
.expansions = state.expansions,
.original_text = {},
};
return Vector<Token> { move(token) };
}
static Optional<Token::Type> operator_from_name(StringView name)
{
if (name == "&&"sv)
return Token::Type::AndIf;
if (name == "||"sv)
return Token::Type::OrIf;
if (name == ";;"sv)
return Token::Type::DoubleSemicolon;
if (name == "<<"sv)
return Token::Type::DoubleLess;
if (name == ">>"sv)
return Token::Type::DoubleGreat;
if (name == "<&"sv)
return Token::Type::LessAnd;
if (name == ">&"sv)
return Token::Type::GreatAnd;
if (name == "<>"sv)
return Token::Type::LessGreat;
if (name == "<<-"sv)
return Token::Type::DoubleLessDash;
if (name == ">|"sv)
return Token::Type::Clobber;
if (name == ";"sv)
return Token::Type::Semicolon;
if (name == "&"sv)
return Token::Type::And;
if (name == "|"sv)
return Token::Type::Pipe;
if (name == "("sv)
return Token::Type::OpenParen;
if (name == ")"sv)
return Token::Type::CloseParen;
if (name == ">"sv)
return Token::Type::Great;
if (name == "<"sv)
return Token::Type::Less;
if (name == "\n"sv)
return Token::Type::Newline;
return {};
}
static ErrorOr<Vector<Token>> operators_from(State const& state)
{
auto name = TRY(state.buffer.to_string());
auto type = operator_from_name(name);
if (!type.has_value())
return Vector<Token> {};
return Vector {
Token {
.type = *type,
.value = name,
.position = state.position,
.expansions = {},
.original_text = {},
}
};
}
static Token eof()
{
return {
.type = Type::Eof,
.value = {},
.position = {},
.expansions = {},
.original_text = {},
};
}
static Token newline()
{
return {
.type = Type::Newline,
.value = "\n"_short_string,
.position = {},
.expansions = {},
.original_text = {},
};
}
static Token continuation(char expected)
{
return {
.type = Type::Continuation,
.value = String::from_code_point(expected),
.position = {},
.expansions = {},
.original_text = {},
};
}
static Token continuation(String expected)
{
return {
.type = Type::Continuation,
.value = move(expected),
.position = {},
.expansions = {},
.original_text = {},
};
}
StringView type_name() const;
};
class Lexer {
public:
explicit Lexer(StringView input)
: m_lexer(input)
{
}
ErrorOr<Vector<Token>> batch_next(Optional<Reduction> starting_reduction = {});
struct HeredocKeyResult {
String key;
bool allow_interpolation;
};
static HeredocKeyResult process_heredoc_key(Token const&);
private:
struct ReductionResult {
Vector<Token> tokens;
Reduction next_reduction { Reduction::None };
};
ErrorOr<ReductionResult> reduce(Reduction);
ErrorOr<ReductionResult> reduce_end();
ErrorOr<ReductionResult> reduce_operator();
ErrorOr<ReductionResult> reduce_comment();
ErrorOr<ReductionResult> reduce_single_quoted_string();
ErrorOr<ReductionResult> reduce_double_quoted_string();
ErrorOr<ReductionResult> reduce_expansion();
ErrorOr<ReductionResult> reduce_command_expansion();
ErrorOr<ReductionResult> reduce_start();
ErrorOr<ReductionResult> reduce_arithmetic_expansion();
ErrorOr<ReductionResult> reduce_special_parameter_expansion();
ErrorOr<ReductionResult> reduce_parameter_expansion();
ErrorOr<ReductionResult> reduce_command_or_arithmetic_substitution_expansion();
ErrorOr<ReductionResult> reduce_extended_parameter_expansion();
ErrorOr<ReductionResult> reduce_heredoc_contents();
char consume();
bool consume_specific(char);
void reconsume(StringView);
ExpansionRange range(ssize_t offset = 0) const;
GenericLexer m_lexer;
State m_state;
Reduction m_next_reduction { Reduction::Start };
};
}