LibCpp: Add the beginning of a C++ parser

This parser will be used by the C++ langauge server to provide better
auto-complete (& maybe also other things in the future).

It is designed to be error tolerant, and keeps track of the position
spans of the AST nodes, which should be useful later for incremental
parsing.
This commit is contained in:
Itamar 2021-01-23 16:47:20 +02:00 committed by Andreas Kling
parent aec9658b4f
commit c96b6987c4
Notes: sideshowbarker 2024-07-18 22:49:40 +09:00
11 changed files with 2298 additions and 9 deletions

65
AK/ScopeLogger.h Normal file
View File

@ -0,0 +1,65 @@
/*
* Copyright (c) 2020, Denis Campredon <deni_@hotmail.fr>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/StringBuilder.h>
#ifdef DEBUG_SPAM
namespace AK {
class ScopeLogger {
public:
ScopeLogger(StringView&& fun)
: m_fun(fun)
{
StringBuilder sb;
for (auto indent = m_depth++; indent > 0; indent--)
sb.append(' ');
dbgln("\033[1;{}m{}entering {}\033[0m", m_depth % 8 + 30, sb.to_string(), m_fun);
}
~ScopeLogger()
{
StringBuilder sb;
for (auto indent = --m_depth; indent > 0; indent--)
sb.append(' ');
dbgln("\033[1;{}m{}leaving {}\033[0m", (m_depth + 1) % 8 + 30, sb.to_string(), m_fun);
}
private:
static inline size_t m_depth = 0;
StringView m_fun;
};
}
using AK::ScopeLogger;
# define SCOPE_LOGGER() auto tmp##__COUNTER__ = ScopeLogger(__PRETTY_FUNCTION__);
#else
# define SCOPE_LOGGER()
#endif

View File

@ -159,6 +159,7 @@ set(UPDATE_COALESCING_DEBUG ON)
set(VOLATILE_PAGE_RANGES_DEBUG ON)
set(WSMESSAGELOOP_DEBUG ON)
set(GPT_DEBUG ON)
set(CPP_DEBUG ON)
# False positive: DEBUG is a flag but it works differently.
# set(DEBUG ON)

View File

@ -0,0 +1,382 @@
/*
* Copyright (c) 2021, Itamar S. <itamar8910@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "AST.h"
#include "AK/LogStream.h"
namespace Cpp {
static void print_indent(int indent)
{
for (int i = 0; i < indent * 2; ++i)
dbgprintf(" ");
}
void ASTNode::dump(size_t indent) const
{
print_indent(indent);
dbgprintf("%s[%lu:%lu->%lu:%lu]\n", class_name(), start().line, start().column, end().line, end().column);
}
void TranslationUnit::dump(size_t indent) const
{
ASTNode::dump(indent);
for (const auto& child : m_children) {
child.dump(indent + 1);
}
}
void FunctionDeclaration::dump(size_t indent) const
{
ASTNode::dump(indent);
m_return_type->dump(indent + 1);
if (!m_name.is_null()) {
print_indent(indent + 1);
dbgprintf("%s\n", m_name.to_string().characters());
}
print_indent(indent + 1);
dbgprintf("(\n");
for (const auto& arg : m_parameters) {
arg.dump(indent + 1);
}
print_indent(indent + 1);
dbgprintf(")\n");
if (!m_definition.is_null()) {
m_definition->dump(indent + 1);
}
}
NonnullRefPtrVector<Declaration> FunctionDeclaration::declarations() const
{
NonnullRefPtrVector<Declaration> declarations;
for (auto& arg : m_parameters) {
declarations.append(arg);
}
return declarations;
}
void Type::dump(size_t indent) const
{
ASTNode::dump(indent);
print_indent(indent + 1);
dbgprintf("%s\n", m_name.to_string().characters());
}
void Parameter::dump(size_t indent) const
{
ASTNode::dump(indent);
if (!m_name.is_null()) {
print_indent(indent);
dbgprintf("%s\n", m_name.to_string().characters());
}
m_type->dump(indent + 1);
// print_indent(indent);
// dbgprintf("%s [%s]\n", m_name.is_null() ? "" : m_name.to_string().characters(), m_type->name().to_string().characters());
}
void FunctionDefinition::dump(size_t indent) const
{
ASTNode::dump(indent);
print_indent(indent);
dbgprintf("{\n");
for (const auto& statement : m_statements) {
statement.dump(indent + 1);
}
print_indent(indent);
dbgprintf("}\n");
}
NonnullRefPtrVector<Declaration> FunctionDefinition::declarations() const
{
NonnullRefPtrVector<Declaration> declarations;
for (auto& statement : m_statements) {
declarations.append(statement.declarations());
}
return declarations;
}
void VariableDeclaration::dump(size_t indent) const
{
ASTNode::dump(indent);
m_type->dump(indent + 1);
print_indent(indent + 1);
dbgprintf("%s\n", m_name.to_string().characters());
if (m_initial_value)
m_initial_value->dump(indent + 1);
}
void Identifier::dump(size_t indent) const
{
ASTNode::dump(indent);
print_indent(indent);
dbgprintf("%s\n", m_name.to_string().characters());
}
void NumericLiteral::dump(size_t indent) const
{
ASTNode::dump(indent);
print_indent(indent);
dbgprintf("%s\n", m_value.to_string().characters());
}
void BinaryExpression::dump(size_t indent) const
{
ASTNode::dump(indent);
const char* op_string = nullptr;
switch (m_op) {
case BinaryOp::Addition:
op_string = "+";
break;
case BinaryOp::Subtraction:
op_string = "-";
break;
case BinaryOp::Multiplication:
op_string = "*";
break;
case BinaryOp::Division:
op_string = "/";
break;
case BinaryOp::Modulo:
op_string = "%";
break;
case BinaryOp::GreaterThan:
op_string = ">";
break;
case BinaryOp::GreaterThanEquals:
op_string = ">=";
break;
case BinaryOp::LessThan:
op_string = "<";
break;
case BinaryOp::LessThanEquals:
op_string = "<=";
break;
case BinaryOp::BitwiseAnd:
op_string = "&";
break;
case BinaryOp::BitwiseOr:
op_string = "|";
break;
case BinaryOp::BitwiseXor:
op_string = "^";
break;
case BinaryOp::LeftShift:
op_string = "<<";
break;
case BinaryOp::RightShift:
op_string = ">>";
break;
}
m_lhs->dump(indent + 1);
print_indent(indent + 1);
ASSERT(op_string);
dbgprintf("%s\n", op_string);
m_rhs->dump(indent + 1);
}
void AssignmentExpression::dump(size_t indent) const
{
ASTNode::dump(indent);
const char* op_string = nullptr;
switch (m_op) {
case AssignmentOp::Assignment:
op_string = "=";
break;
case AssignmentOp::AdditionAssignment:
op_string = "+=";
break;
case AssignmentOp::SubtractionAssignment:
op_string = "-=";
break;
}
m_lhs->dump(indent + 1);
print_indent(indent + 1);
ASSERT(op_string);
dbgprintf("%s\n", op_string);
m_rhs->dump(indent + 1);
}
void FunctionCall::dump(size_t indent) const
{
ASTNode::dump(indent);
print_indent(indent);
dbgprintf("%s\n", m_name.to_string().characters());
for (const auto& arg : m_arguments) {
arg.dump(indent + 1);
}
}
void StringLiteral::dump(size_t indent) const
{
ASTNode::dump(indent);
print_indent(indent + 1);
dbgprintf("%s\n", m_value.to_string().characters());
}
void ReturnStatement::dump(size_t indent) const
{
ASTNode::dump(indent);
m_value->dump(indent + 1);
}
void EnumDeclaration::dump(size_t indent) const
{
ASTNode::dump(indent);
print_indent(indent);
dbgprintf("%s\n", m_name.to_string().characters());
for (auto& entry : m_entries) {
print_indent(indent + 1);
dbgprintf("%s\n", entry.to_string().characters());
}
}
void StructOrClassDeclaration::dump(size_t indent) const
{
ASTNode::dump(indent);
print_indent(indent);
dbgprintf("%s\n", m_name.to_string().characters());
for (auto& member : m_members) {
member.dump(indent + 1);
}
}
void MemberDeclaration::dump(size_t indent) const
{
ASTNode::dump(indent);
m_type->dump(indent + 1);
print_indent(indent + 1);
dbgprintf("%s\n", m_name.to_string().characters());
if (m_initial_value) {
m_initial_value->dump(indent + 2);
}
}
void UnaryExpression::dump(size_t indent) const
{
ASTNode::dump(indent);
const char* op_string = nullptr;
switch (m_op) {
case UnaryOp::BitwiseNot:
op_string = "~";
break;
case UnaryOp::Not:
op_string = "!";
break;
case UnaryOp::Plus:
op_string = "+";
break;
case UnaryOp::Minus:
op_string = "-";
break;
case UnaryOp::PlusPlus:
op_string = "++";
break;
default:
op_string = "<invalid>";
}
ASSERT(op_string);
print_indent(indent + 1);
dbgprintf("%s\n", op_string);
m_lhs->dump(indent + 1);
}
void BooleanLiteral::dump(size_t indent) const
{
ASTNode::dump(indent);
print_indent(indent + 1);
dbgprintf("%s\n", m_value ? "true" : "false");
}
void Pointer::dump(size_t indent) const
{
ASTNode::dump(indent);
if (!m_pointee.is_null()) {
m_pointee->dump(indent + 1);
}
}
void MemberExpression::dump(size_t indent) const
{
ASTNode::dump(indent);
m_object->dump(indent + 1);
m_property->dump(indent + 1);
}
void BlockStatement::dump(size_t indent) const
{
ASTNode::dump(indent);
for (auto& statement : m_statements) {
statement.dump(indent + 1);
}
}
void ForStatement::dump(size_t indent) const
{
ASTNode::dump(indent);
if (m_init)
m_init->dump(indent + 1);
if (m_test)
m_test->dump(indent + 1);
if (m_update)
m_update->dump(indent + 1);
if (m_body)
m_body->dump(indent + 1);
}
NonnullRefPtrVector<Declaration> Statement::declarations() const
{
if (is_declaration()) {
NonnullRefPtrVector<Declaration> vec;
const auto& decl = static_cast<const Declaration&>(*this);
vec.empend(const_cast<Declaration&>(decl));
return vec;
}
return {};
}
NonnullRefPtrVector<Declaration> ForStatement::declarations() const
{
auto declarations = m_init->declarations();
declarations.append(m_body->declarations());
return declarations;
}
NonnullRefPtrVector<Declaration> BlockStatement::declarations() const
{
NonnullRefPtrVector<Declaration> declarations;
for (auto& statement : m_statements) {
declarations.append(statement.declarations());
}
return declarations;
}
}

View File

@ -0,0 +1,585 @@
/*
* Copyright (c) 2021, Itamar S. <itamar8910@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "Applications/Piano/Music.h"
#include <AK/NonnullRefPtrVector.h>
#include <AK/Optional.h>
#include <AK/RefCounted.h>
#include <AK/String.h>
#include <AK/Vector.h>
#include <LibCpp/Lexer.h>
namespace Cpp {
class ASTNode;
class TranslationUnit;
class Declaration;
class FunctionDefinition;
class Type;
class Parameter;
class Statement;
class ASTNode : public RefCounted<ASTNode> {
public:
virtual ~ASTNode() = default;
virtual const char* class_name() const = 0;
virtual void dump(size_t indent) const;
ASTNode* parent() const { return m_parent; }
Position start() const
{
ASSERT(m_start.has_value());
return m_start.value();
}
Position end() const
{
ASSERT(m_end.has_value());
return m_end.value();
}
void set_end(const Position& end) { m_end = end; }
void set_parent(ASTNode& parent) { m_parent = &parent; }
virtual NonnullRefPtrVector<Declaration> declarations() const { return {}; }
virtual bool is_identifier() const { return false; }
virtual bool is_member_expression() const { return false; }
virtual bool is_variable_or_parameter_declaration() const { return false; }
protected:
ASTNode(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: m_parent(parent)
, m_start(start)
, m_end(end)
{
}
private:
ASTNode* m_parent { nullptr };
Optional<Position> m_start;
Optional<Position> m_end;
};
class TranslationUnit : public ASTNode {
public:
virtual ~TranslationUnit() override = default;
const NonnullRefPtrVector<Declaration>& children() const { return m_children; }
virtual const char* class_name() const override { return "TranslationUnit"; }
virtual void dump(size_t indent) const override;
void append(NonnullRefPtr<Declaration> child)
{
m_children.append(move(child));
}
virtual NonnullRefPtrVector<Declaration> declarations() const override { return m_children; }
public:
TranslationUnit(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: ASTNode(parent, start, end)
{
}
private:
NonnullRefPtrVector<Declaration> m_children;
};
class Statement : public ASTNode {
public:
virtual ~Statement() override = default;
virtual const char* class_name() const override { return "Statement"; }
virtual bool is_declaration() const { return false; }
virtual NonnullRefPtrVector<Declaration> declarations() const override;
protected:
Statement(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: ASTNode(parent, start, end)
{
}
};
class Declaration : public Statement {
public:
virtual bool is_declaration() const override { return true; }
virtual bool is_variable_declaration() const { return false; }
virtual bool is_parameter() const { return false; }
virtual bool is_struct_or_class() const { return false; }
protected:
Declaration(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Statement(parent, start, end)
{
}
};
class InvalidDeclaration : public Declaration {
public:
virtual ~InvalidDeclaration() override = default;
virtual const char* class_name() const override { return "InvalidDeclaration"; }
InvalidDeclaration(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Declaration(parent, start, end)
{
}
};
class FunctionDeclaration : public Declaration {
public:
virtual ~FunctionDeclaration() override = default;
virtual const char* class_name() const override { return "FunctionDeclaration"; }
virtual void dump(size_t indent) const override;
const StringView& name() const { return m_name; }
RefPtr<FunctionDefinition> definition() { return m_definition; }
FunctionDeclaration(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Declaration(parent, start, end)
{
}
virtual NonnullRefPtrVector<Declaration> declarations() const override;
StringView m_name;
RefPtr<Type> m_return_type;
NonnullRefPtrVector<Parameter> m_parameters;
RefPtr<FunctionDefinition> m_definition;
};
class VariableOrParameterDeclaration : public Declaration {
public:
virtual ~VariableOrParameterDeclaration() override = default;
virtual bool is_variable_or_parameter_declaration() const override { return true; }
StringView m_name;
RefPtr<Type> m_type;
protected:
VariableOrParameterDeclaration(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Declaration(parent, start, end)
{
}
};
class Parameter : public VariableOrParameterDeclaration {
public:
virtual ~Parameter() override = default;
virtual const char* class_name() const override { return "Parameter"; }
virtual void dump(size_t indent) const override;
Parameter(ASTNode* parent, Optional<Position> start, Optional<Position> end, StringView name)
: VariableOrParameterDeclaration(parent, start, end)
{
m_name = name;
}
virtual bool is_parameter() const override { return true; }
};
class Type : public ASTNode {
public:
virtual ~Type() override = default;
virtual const char* class_name() const override { return "Type"; }
const StringView& name() const { return m_name; }
virtual void dump(size_t indent) const override;
Type(ASTNode* parent, Optional<Position> start, Optional<Position> end, StringView name)
: ASTNode(parent, start, end)
, m_name(name)
{
}
StringView m_name;
};
class Pointer : public Type {
public:
virtual ~Pointer() override = default;
virtual const char* class_name() const override { return "Pointer"; }
virtual void dump(size_t indent) const override;
Pointer(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Type(parent, start, end, {})
{
}
RefPtr<Type> m_pointee;
};
class FunctionDefinition : public ASTNode {
public:
virtual ~FunctionDefinition() override = default;
virtual const char* class_name() const override { return "FunctionDefinition"; }
NonnullRefPtrVector<Statement>& statements() { return m_statements; }
virtual void dump(size_t indent) const override;
FunctionDefinition(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: ASTNode(parent, start, end)
{
}
virtual NonnullRefPtrVector<Declaration> declarations() const override;
NonnullRefPtrVector<Statement> m_statements;
};
class InvalidStatement : public Statement {
public:
virtual ~InvalidStatement() override = default;
virtual const char* class_name() const override { return "InvalidStatement"; }
InvalidStatement(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Statement(parent, start, end)
{
}
};
class Expression : public Statement {
public:
virtual ~Expression() override = default;
virtual const char* class_name() const override { return "Expression"; }
protected:
Expression(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Statement(parent, start, end)
{
}
};
class InvalidExpression : public Expression {
public:
virtual ~InvalidExpression() override = default;
virtual const char* class_name() const override { return "InvalidExpression"; }
InvalidExpression(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Expression(parent, start, end)
{
}
};
class VariableDeclaration : public VariableOrParameterDeclaration {
public:
virtual ~VariableDeclaration() override = default;
virtual const char* class_name() const override { return "VariableDeclaration"; }
virtual void dump(size_t indent) const override;
VariableDeclaration(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: VariableOrParameterDeclaration(parent, start, end)
{
}
virtual bool is_variable_declaration() const override { return true; }
RefPtr<Expression> m_initial_value;
};
class Identifier : public Expression {
public:
virtual ~Identifier() override = default;
virtual const char* class_name() const override { return "Identifier"; }
virtual void dump(size_t indent) const override;
Identifier(ASTNode* parent, Optional<Position> start, Optional<Position> end, StringView name)
: Expression(parent, start, end)
, m_name(name)
{
}
Identifier(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Identifier(parent, start, end, {})
{
}
virtual bool is_identifier() const override { return true; }
StringView m_name;
};
class NumericLiteral : public Expression {
public:
virtual ~NumericLiteral() override = default;
virtual const char* class_name() const override { return "NumricLiteral"; }
virtual void dump(size_t indent) const override;
NumericLiteral(ASTNode* parent, Optional<Position> start, Optional<Position> end, StringView value)
: Expression(parent, start, end)
, m_value(value)
{
}
StringView m_value;
};
class BooleanLiteral : public Expression {
public:
virtual ~BooleanLiteral() override = default;
virtual const char* class_name() const override { return "BooleanLiteral"; }
virtual void dump(size_t indent) const override;
BooleanLiteral(ASTNode* parent, Optional<Position> start, Optional<Position> end, bool value)
: Expression(parent, start, end)
, m_value(value)
{
}
bool m_value;
};
enum class BinaryOp {
Addition,
Subtraction,
Multiplication,
Division,
Modulo,
GreaterThan,
GreaterThanEquals,
LessThan,
LessThanEquals,
BitwiseAnd,
BitwiseOr,
BitwiseXor,
LeftShift,
RightShift,
};
class BinaryExpression : public Expression {
public:
BinaryExpression(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Expression(parent, start, end)
{
}
virtual ~BinaryExpression() override = default;
virtual const char* class_name() const override { return "BinaryExpression"; }
virtual void dump(size_t indent) const override;
BinaryOp m_op;
RefPtr<Expression> m_lhs;
RefPtr<Expression> m_rhs;
};
enum class AssignmentOp {
Assignment,
AdditionAssignment,
SubtractionAssignment,
};
class AssignmentExpression : public Expression {
public:
AssignmentExpression(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Expression(parent, start, end)
{
}
virtual ~AssignmentExpression() override = default;
virtual const char* class_name() const override { return "AssignmentExpression"; }
virtual void dump(size_t indent) const override;
AssignmentOp m_op;
RefPtr<Expression> m_lhs;
RefPtr<Expression> m_rhs;
};
class FunctionCall final : public Expression {
public:
FunctionCall(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Expression(parent, start, end)
{
}
~FunctionCall() override = default;
virtual const char* class_name() const override { return "FunctionCall"; }
virtual void dump(size_t indent) const override;
StringView m_name;
NonnullRefPtrVector<Expression> m_arguments;
};
class StringLiteral final : public Expression {
public:
StringLiteral(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Expression(parent, start, end)
{
}
~StringLiteral() override = default;
virtual const char* class_name() const override { return "StringLiteral"; }
virtual void dump(size_t indent) const override;
StringView m_value;
};
class ReturnStatement : public Statement {
public:
virtual ~ReturnStatement() override = default;
virtual const char* class_name() const override { return "ReturnStatement"; }
ReturnStatement(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Statement(parent, start, end)
{
}
virtual void dump(size_t indent) const override;
RefPtr<Expression> m_value;
};
class EnumDeclaration : public Declaration {
public:
virtual ~EnumDeclaration() override = default;
virtual const char* class_name() const override { return "EnumDeclaration"; }
virtual void dump(size_t indent) const override;
EnumDeclaration(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Declaration(parent, start, end)
{
}
StringView m_name;
Vector<StringView> m_entries;
};
class MemberDeclaration : public Declaration {
public:
virtual ~MemberDeclaration() override = default;
virtual const char* class_name() const override { return "MemberDeclaration"; }
virtual void dump(size_t indent) const override;
MemberDeclaration(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Declaration(parent, start, end)
{
}
RefPtr<Type> m_type;
StringView m_name;
RefPtr<Expression> m_initial_value;
};
class StructOrClassDeclaration : public Declaration {
public:
virtual ~StructOrClassDeclaration() override = default;
virtual const char* class_name() const override { return "StructOrClassDeclaration"; }
virtual void dump(size_t indent) const override;
virtual bool is_struct_or_class() const override { return true; }
enum class Type {
Struct,
Class
};
StructOrClassDeclaration(ASTNode* parent, Optional<Position> start, Optional<Position> end, StructOrClassDeclaration::Type type)
: Declaration(parent, start, end)
, m_type(type)
{
}
StructOrClassDeclaration::Type m_type;
StringView m_name;
NonnullRefPtrVector<MemberDeclaration> m_members;
};
enum class UnaryOp {
Invalid,
BitwiseNot,
Not,
Plus,
Minus,
PlusPlus,
};
class UnaryExpression : public Expression {
public:
UnaryExpression(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Expression(parent, start, end)
{
}
virtual ~UnaryExpression() override = default;
virtual const char* class_name() const override { return "UnaryExpression"; }
virtual void dump(size_t indent) const override;
UnaryOp m_op;
RefPtr<Expression> m_lhs;
};
class MemberExpression : public Expression {
public:
MemberExpression(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Expression(parent, start, end)
{
}
virtual ~MemberExpression() override = default;
virtual const char* class_name() const override { return "MemberExpression"; }
virtual void dump(size_t indent) const override;
virtual bool is_member_expression() const override { return true; }
RefPtr<Expression> m_object;
RefPtr<Identifier> m_property;
};
class ForStatement : public Statement {
public:
ForStatement(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Statement(parent, start, end)
{
}
virtual ~ForStatement() override = default;
virtual const char* class_name() const override { return "ForStatement"; }
virtual void dump(size_t indent) const override;
virtual NonnullRefPtrVector<Declaration> declarations() const override;
RefPtr<VariableDeclaration> m_init;
RefPtr<Expression> m_test;
RefPtr<Expression> m_update;
RefPtr<Statement> m_body;
};
class BlockStatement final : public Statement {
public:
BlockStatement(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Statement(parent, start, end)
{
}
virtual ~BlockStatement() override = default;
virtual const char* class_name() const override { return "BlockStatement"; }
virtual void dump(size_t indent) const override;
virtual NonnullRefPtrVector<Declaration> declarations() const override;
NonnullRefPtrVector<Statement> m_statements;
};
class Comment final : public Statement {
public:
Comment(ASTNode* parent, Optional<Position> start, Optional<Position> end)
: Statement(parent, start, end)
{
}
virtual ~Comment() override = default;
virtual const char* class_name() const override { return "Comment"; }
};
}

View File

@ -1,5 +1,7 @@
set(SOURCES
AST.cpp
Lexer.cpp
Parser.cpp
)
serenity_lib(LibCpp cpp)

View File

@ -581,12 +581,13 @@ Vector<Token> Lexer::lex()
commit_token(Token::Type::IncludePath);
begin_token();
}
}
} else {
while (peek() && peek() != '\n')
consume();
commit_token(Token::Type::PreprocessorStatement);
}
continue;
}
if (ch == '/' && peek(1) == '/') {
@ -786,4 +787,17 @@ Vector<Token> Lexer::lex()
return tokens;
}
bool Position::operator<(const Position& other) const
{
return line < other.line || (line == other.line && column < other.column);
}
bool Position::operator>(const Position& other) const
{
return !(*this < other) && !(*this == other);
}
bool Position::operator==(const Position& other) const
{
return line == other.line && column == other.column;
}
}

View File

@ -96,11 +96,16 @@ namespace Cpp {
__TOKEN(Float) \
__TOKEN(Keyword) \
__TOKEN(KnownType) \
__TOKEN(Identifier)
__TOKEN(Identifier) \
__TOKEN(EOF_TOKEN)
struct Position {
size_t line;
size_t column;
size_t line { 0 };
size_t column { 0 };
bool operator<(const Position&) const;
bool operator>(const Position&) const;
bool operator==(const Position&) const;
};
struct Token {
@ -110,9 +115,9 @@ struct Token {
#undef __TOKEN
};
const char* to_string() const
static const char* type_to_string(Type t)
{
switch (m_type) {
switch (t) {
#define __TOKEN(x) \
case Type::x: \
return #x;
@ -122,6 +127,14 @@ struct Token {
ASSERT_NOT_REACHED();
}
const char* to_string() const
{
return type_to_string(m_type);
}
Position start() const { return m_start; }
Position end() const { return m_end; }
Type type() const { return m_type; }
Type m_type { Type::Unknown };
Position m_start;
Position m_end;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,162 @@
/*
* Copyright (c) 2021, Itamar S. <itamar8910@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "AK/NonnullRefPtr.h"
#include "AST.h"
#include <LibCpp/Lexer.h>
namespace Cpp {
class Parser final {
public:
explicit Parser(const StringView&);
~Parser() = default;
NonnullRefPtr<TranslationUnit> parse();
bool eof() const;
RefPtr<ASTNode> eof_node() const;
RefPtr<ASTNode> node_at(Position) const;
Optional<Token> token_at(Position) const;
RefPtr<const TranslationUnit> root_node() const { return m_root_node; }
StringView text_of_node(const ASTNode&) const;
void print_tokens() const;
Vector<String> errors() const { return m_errors; }
private:
enum class DeclarationType {
Function,
Variable,
Enum,
Struct,
};
bool done();
Optional<DeclarationType> match_declaration();
Optional<DeclarationType> match_declaration_in_translation_unit();
Optional<DeclarationType> match_declaration_in_function_definition();
bool match_function_declaration();
bool match_comment();
bool match_preprocessor();
bool match_whitespace();
bool match_variable_declaration();
bool match_expression();
bool match_function_call();
bool match_secondary_expression();
bool match_enum_declaration();
bool match_struct_declaration();
bool match_literal();
bool match_unary_expression();
bool match_boolean_literal();
bool match_keyword(const String&);
bool match_block_statement();
Optional<NonnullRefPtrVector<Parameter>> parse_parameter_list(ASTNode& parent);
Optional<Token> consume_whitespace();
void consume_preprocessor();
NonnullRefPtr<Declaration> parse_declaration(ASTNode& parent, DeclarationType);
NonnullRefPtr<FunctionDeclaration> parse_function_declaration(ASTNode& parent);
NonnullRefPtr<FunctionDefinition> parse_function_definition(ASTNode& parent);
NonnullRefPtr<Statement> parse_statement(ASTNode& parent);
NonnullRefPtr<VariableDeclaration> parse_variable_declaration(ASTNode& parent);
NonnullRefPtr<Expression> parse_expression(ASTNode& parent);
NonnullRefPtr<Expression> parse_primary_expression(ASTNode& parent);
NonnullRefPtr<Expression> parse_secondary_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs);
NonnullRefPtr<FunctionCall> parse_function_call(ASTNode& parent);
NonnullRefPtr<StringLiteral> parse_string_literal(ASTNode& parent);
NonnullRefPtr<ReturnStatement> parse_return_statement(ASTNode& parent);
NonnullRefPtr<EnumDeclaration> parse_enum_declaration(ASTNode& parent);
NonnullRefPtr<StructOrClassDeclaration> parse_struct_or_class_declaration(ASTNode& parent, StructOrClassDeclaration::Type);
NonnullRefPtr<MemberDeclaration> parse_member_declaration(ASTNode& parent);
NonnullRefPtr<Expression> parse_literal(ASTNode& parent);
NonnullRefPtr<UnaryExpression> parse_unary_expression(ASTNode& parent);
NonnullRefPtr<BooleanLiteral> parse_boolean_literal(ASTNode& parent);
NonnullRefPtr<Type> parse_type(ASTNode& parent);
NonnullRefPtr<BinaryExpression> parse_binary_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs, BinaryOp);
NonnullRefPtr<AssignmentExpression> parse_assignment_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs, AssignmentOp);
NonnullRefPtr<ForStatement> parse_for_statement(ASTNode& parent);
NonnullRefPtr<BlockStatement> parse_block_statement(ASTNode& parent);
NonnullRefPtr<Comment> parse_comment(ASTNode& parent);
bool match(Token::Type);
Token consume(Token::Type);
Token consume();
Token consume_keyword(const String&);
Token peek() const;
Optional<Token> peek(Token::Type) const;
Position position() const;
void save_state();
void load_state();
enum class Context {
InTranslationUnit,
InFunctionDefinition,
};
struct State {
Context context { Context::InTranslationUnit };
size_t token_index { 0 };
};
StringView text_of_token(const Cpp::Token& token);
void error(StringView message = {});
size_t node_span_size(const ASTNode& node) const;
template<class T, class... Args>
NonnullRefPtr<T>
create_ast_node(ASTNode& parent, const Position& start, Optional<Position> end, Args&&... args)
{
auto node = adopt(*new T(&parent, start, end, forward<Args>(args)...));
m_nodes.append(node);
return node;
}
NonnullRefPtr<TranslationUnit>
create_root_ast_node(const Position& start, Position end)
{
auto node = adopt(*new TranslationUnit(nullptr, start, end));
m_nodes.append(node);
m_root_node = node;
return node;
}
StringView m_program;
Vector<StringView> m_lines;
Vector<Token> m_tokens;
State m_state;
Vector<State> m_saved_states;
RefPtr<TranslationUnit> m_root_node;
NonnullRefPtrVector<ASTNode> m_nodes;
Vector<String> m_errors;
};
}

View File

@ -47,3 +47,4 @@ target_link_libraries(test-web LibWeb)
target_link_libraries(tt LibPthread)
target_link_libraries(grep LibRegex)
target_link_libraries(gunzip LibCompress)
target_link_libraries(CppParserTest LibCpp)

View File

@ -0,0 +1,64 @@
/*
* Copyright (c) 2021, the SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "AK/Forward.h"
#include "LibCpp/AST.h"
#include <LibCore/ArgsParser.h>
#include <LibCore/File.h>
#include <LibCpp/Parser.h>
int main(int argc, char** argv)
{
Core::ArgsParser args_parser;
const char* path = nullptr;
bool tokens_mode = false;
args_parser.add_option(tokens_mode, "Print Tokens", "tokens", 'T');
args_parser.add_positional_argument(path, "Cpp File", "cpp-file", Core::ArgsParser::Required::No);
args_parser.parse(argc, argv);
if (!path)
path = "Source/little/main.cpp";
auto file = Core::File::construct(path);
if (!file->open(Core::IODevice::ReadOnly)) {
perror("open");
exit(1);
}
auto content = file->read_all();
StringView content_view(content);
::Cpp::Parser parser(content_view);
if (tokens_mode) {
parser.print_tokens();
return 0;
}
auto root = parser.parse();
dbgln("Parser errors:");
for (auto& error : parser.errors()) {
dbgln("{}", error);
}
root->dump(0);
}