Integrate new error resilient parser into waspc (#684)

* Add CST parsing code into waspc

* Implement CST -> AST conversion

* Implement parseExpression & isValidWaspIdentifier

* Implement golden tests for Parser

* Configure CI git checkout to always use LF line endings
This commit is contained in:
Craig McIlwrath 2022-08-11 12:26:12 -04:00 committed by GitHub
parent 485f92ca13
commit 0de0855787
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
71 changed files with 2410 additions and 891 deletions

View File

@ -42,6 +42,15 @@ jobs:
- windows-latest
steps:
- name: Configure git
working-directory: ~
# For waspc parser tests, we need to make sure git doesn't convert line
# endings to CRLF during checkout on Windows because that would cause
# the test cases to fail.
run: |
git config --global core.autocrlf input
git config --global core.eol lf
- name: Checkout the repo
uses: actions/checkout@v2

View File

@ -1,17 +1,17 @@
module Wasp.Analyzer.Parser
( -- * Overview
-- | The "Analyzer.Parser" module is built of two parts:
-- | The "Analyzer.Parser" module is built of three parts:
--
-- - The lexer, generated with Alex, which creates tokens from wasp source.
-- - The parser, generated with Happy, which builds an abstract syntax
-- tree from the tokens.
-- - The concrete syntax parser, which takes tokens and builds a loosely
-- structured concrete syntax tree. This step is included because it can
-- handle syntax error recovery and allows for analysis on source that has
-- errors in it (used in the language server).
-- - The abstract syntax parser, which takes a concrete syntax tree and converts
-- it into an abstract tree.
--
-- Lexing and parsing are not implemented as two separate phases that happen one after another.
-- Instead, parser controls and uses lexer internally to produce tokens as needed, on the go.
--
-- Both lexer and parser are operating in a "Parser" monad, which manages state and exceptions for the parser,
-- and therefore also for the lexer, which functions as a part of and is controlled by the parser.
-- The phases are run in sequence, one after another.
parseStatements,
parseExpression,
AST (..),
@ -29,31 +29,42 @@ module Wasp.Analyzer.Parser
ExtImportName (..),
ParseError (..),
SourcePosition (..),
SourceRegion (..),
Token (..),
TokenType (..),
TokenKind (..),
)
where
import Control.Monad.Except (runExcept)
import Control.Monad.State (evalStateT)
import Wasp.Analyzer.Parser.AST
import qualified Wasp.Analyzer.Parser.AbstractParser as P
import qualified Wasp.Analyzer.Parser.ConcreteParser as CST
import Wasp.Analyzer.Parser.Ctx (Ctx (..), WithCtx (..), ctxFromPos, ctxFromRgn, fromWithCtx, getCtxRgn, withCtx)
import Wasp.Analyzer.Parser.Monad (Parser, makeInitialState)
import qualified Wasp.Analyzer.Parser.Lexer as L
import Wasp.Analyzer.Parser.ParseError
import qualified Wasp.Analyzer.Parser.Parser as P
import Wasp.Analyzer.Parser.SourcePosition (SourcePosition (..))
import Wasp.Analyzer.Parser.SourceRegion (SourceRegion (..))
import Wasp.Analyzer.Parser.Token
-- | Checks if a string is a valid wasp identifier.
isValidWaspIdentifier :: String -> Bool
isValidWaspIdentifier str = case parseExpression str of
Right (Var name) -> noCharsSkipped where noCharsSkipped = length name == length str
isValidWaspIdentifier str = case L.lex str of
-- Lex the string and check that it only contains an Identifier token
[tok] | tokenKind tok == Identifier -> True
_ -> False
parseStatements :: String -> Either ParseError AST
parseStatements = runParser P.parseStatements
parseStatements = runParser CST.parseCST P.parseStatements
parseExpression :: String -> Either ParseError Expr
parseExpression = runParser P.parseExpression
parseExpression = runParser CST.parseCSTExpression P.parseExpression
runParser :: Parser a -> String -> Either ParseError a
runParser parser = runExcept . evalStateT parser . makeInitialState
-- TODO: report multiple errors. Have to hunt down everywhere this return is
-- used and figure out best way to handle list of ParseError
runParser ::
([Token] -> ([CST.ParseError], [CST.SyntaxNode])) ->
(String -> [CST.SyntaxNode] -> Either ParseError a) ->
(String -> Either ParseError a)
runParser cstParser astParser source =
case cstParser (L.lex source) of
([], cst) -> astParser source cst
(cstErrors, _) -> Left $ head $ map (parseErrorFromCSTParseError source) cstErrors

View File

@ -0,0 +1,294 @@
module Wasp.Analyzer.Parser.AbstractParser
( -- * AST to CST conversion
-- | This module takes @["SyntaxNode"]@ produced by 'Wasp.Analyzer.Parser.ConcreteParser'
-- and converts it into an abstract syntax tree.
parseStatements,
parseExpression,
)
where
import Control.Arrow (Arrow (first))
import Control.Monad.Except (throwError)
import Control.Monad.State.Strict (gets)
import Data.Maybe (catMaybes)
import Wasp.Analyzer.Parser.AST (AST, Expr, ExtImportName, Identifier, Stmt)
import qualified Wasp.Analyzer.Parser.AST as AST
import Wasp.Analyzer.Parser.AbstractParser.Monad
import Wasp.Analyzer.Parser.ConcreteParser.CST (SyntaxKind, SyntaxNode (SyntaxNode))
import qualified Wasp.Analyzer.Parser.ConcreteParser.CST as S
import Wasp.Analyzer.Parser.Ctx (Ctx (Ctx), WithCtx (WithCtx), ctxFromRgn)
import Wasp.Analyzer.Parser.ParseError
import Wasp.Analyzer.Parser.SourcePosition (SourcePosition (SourcePosition))
import Wasp.Analyzer.Parser.SourceRegion (SourceRegion (SourceRegion))
import qualified Wasp.Analyzer.Parser.Token as T
-- In the below type definitions, "using" a syntax node means that its width
-- is used to adjust the source position in the "ParseState".
-- | A parser that uses a single syntax node.
type NodeParser a = SyntaxNode -> ParserM a
-- | A parser that uses an entire list of syntax nodes.
type NodesParser a = [SyntaxNode] -> ParserM a
-- | A parser that may use only part of a list of syntax nodes. Returns the
-- nodes it does not use.
type NodesPartialParser a = [SyntaxNode] -> ParserM (a, [SyntaxNode])
-- | @parseStatements sourceString syntax@ tries to convert a concrete syntax
-- tree into an AST.
parseStatements :: String -> [SyntaxNode] -> Either ParseError AST
parseStatements source syntax = runParserM source $ coerceProgram syntax
-- | @parseExpression sourceString syntax@ tries to convert a concrete syntax
-- tree into an AST representing a single expression. It ignores any extra
-- syntax nodes after the expression. For example, for an input list of nodes
-- @[Integer, Whitespace, String]@ it would return an @IntLiteral@ AST node,
-- ignoring the @Whitespace@ and @String@ that follow.
--
-- This should never cause any issues: correct output from the CST parser will
-- not have any extra nodes (besides trivia nodes).
parseExpression :: String -> [SyntaxNode] -> Either ParseError Expr
parseExpression source syntax = case runParserM source $ coerceExpr syntax of
Left err -> Left err
Right (WithCtx _ expr, _) -> Right expr
-- | Try to turn CST into top-level AST.
coerceProgram :: NodesParser AST
coerceProgram [] = return $ AST.AST []
coerceProgram (SyntaxNode S.Program _ children : _) = AST.AST . catMaybes <$> mapM coerceStmt children
coerceProgram (SyntaxNode kind width _ : remaining)
| S.syntaxKindIsTrivia kind = advance width >> coerceProgram remaining
| otherwise = unexpectedNode kind "in root"
-- | Try to turn CST into Stmt AST. Returns @Nothing@ if the given node is a
-- trivia node.
coerceStmt :: NodeParser (Maybe (WithCtx Stmt))
coerceStmt (SyntaxNode S.Decl _ children) = do
startPos <- gets pstateStartPos
((declType, declName, expr), remaining) <-
sequence3
( coerceLexeme S.DeclType "declaration type",
coerceLexeme S.DeclName "declaration name",
coerceExpr
)
children
endPos <- gets pstateEndPos
-- This is a node parser, so we have to use all remaining nodes
mapM_ (advance . S.snodeWidth) remaining
return $ Just $ WithCtx (ctxFromRgn startPos endPos) (AST.Decl declType declName expr)
coerceStmt (SyntaxNode kind width _)
| S.syntaxKindIsTrivia kind = advance width >> return Nothing
| otherwise = unexpectedNode kind "in Program"
-- | Try to turn CST into Expr AST. Returns @(expr, remainingNodesFromInput)@
-- when successful.
coerceExpr :: NodesPartialParser (WithCtx Expr)
coerceExpr [] = throwMissingSyntax "expression"
coerceExpr (SyntaxNode kind width children : remaining)
| S.syntaxKindIsTrivia kind = advance width >> coerceExpr remaining
| otherwise = do
startPos <- gets pstateStartPos
expr <- case kind of
S.String -> AST.StringLiteral . tail . init <$> consume width
S.Int -> AST.IntegerLiteral . read <$> consume width
S.Double -> AST.DoubleLiteral . read <$> consume width
S.BoolTrue -> advance width >> return (AST.BoolLiteral True)
S.BoolFalse -> advance width >> return (AST.BoolLiteral False)
S.Var -> AST.Var <$> consume width
S.Dict -> coerceDict children
S.List -> coerceList children
S.Tuple -> coerceTuple children
S.ExtImport -> coerceExtImport children
S.Quoter -> coerceQuoter children
_ -> unexpectedNode kind "where an expression was expected"
endPos <- gets pstateEndPos
return (WithCtx (ctxFromRgn startPos endPos) expr, remaining)
coerceDict :: NodesParser Expr
coerceDict syntax = do
(_, entries, _) <-
runNodesPartialParser syntax $
sequence3
( coerceLexeme (S.Token T.LCurly) "{",
coerceEntries,
coerceLexeme (S.Token T.RCurly) "}"
)
return $ AST.Dict entries
coerceEntries :: NodesPartialParser [(Identifier, WithCtx Expr)]
coerceEntries [] = return ([], [])
coerceEntries (n@(SyntaxNode kind width children) : remaining)
| kind == S.DictEntry = (first . (:)) <$> coerceEntry children <*> coerceEntries remaining
| kind == S.Token T.Comma || S.syntaxKindIsTrivia kind = advance width >> coerceEntries remaining
| otherwise = return ([], n : remaining)
coerceEntry :: NodesParser (Identifier, WithCtx Expr)
coerceEntry syntax = do
(dictKey, _, expr) <-
runNodesPartialParser syntax $
sequence3
( coerceLexeme S.DictKey "dictionary key",
coerceLexeme (S.Token T.Colon) ":",
coerceExpr
)
return (dictKey, expr)
coerceList :: NodesParser Expr
coerceList syntax = do
(_, values, _) <-
runNodesPartialParser syntax $
sequence3
( coerceLexeme (S.Token T.LSquare) "[",
coerceValues,
coerceLexeme (S.Token T.RSquare) "]"
)
return $ AST.List values
coerceTuple :: NodesParser Expr
coerceTuple syntax = do
startPos <- gets pstateStartPos
(_, values, _) <-
runNodesPartialParser syntax $
sequence3
( coerceLexeme (S.Token T.LParen) "(",
coerceValues,
coerceLexeme (S.Token T.RParen) ")"
)
endPos <- gets pstateEndPos
case values of
(x1 : x2 : xs) -> return $ AST.Tuple (x1, x2, xs)
_ -> throwError $ TupleTooFewValues (SourceRegion startPos endPos) (length values)
coerceValues :: NodesPartialParser [WithCtx Expr]
coerceValues [] = return ([], [])
coerceValues (n@(SyntaxNode kind width _) : remaining)
| kind == S.Token T.Comma || S.syntaxKindIsTrivia kind = advance width >> coerceValues remaining
| S.syntaxKindIsExpr kind = do
expr <- runNodesPartialParser [n] coerceExpr
first (expr :) <$> coerceValues remaining
| otherwise = return ([], n : remaining)
coerceExtImport :: NodesParser Expr
coerceExtImport syntax = do
(_, name, _, from) <-
runNodesPartialParser syntax $
sequence4
( coerceLexeme (S.Token T.KwImport) "import",
coerceExtImportName,
coerceLexeme (S.Token T.KwFrom) "from",
coerceLexeme S.ExtImportPath "a string"
)
return $ AST.ExtImport name (tail $ init from)
coerceExtImportName :: NodesPartialParser ExtImportName
coerceExtImportName [] = throwMissingSyntax "external import name"
coerceExtImportName (SyntaxNode kind width _ : remaining)
| kind == S.ExtImportModule = do
name <- AST.ExtImportModule <$> consume width
return (name, remaining)
| kind == S.Token T.LCurly = do
advance width
((name, _), syntax') <-
sequence2
( coerceLexeme S.ExtImportField "external import field",
coerceLexeme (S.Token T.RCurly) "}"
)
remaining
return (AST.ExtImportField name, syntax')
| S.syntaxKindIsTrivia kind = advance width >> coerceExtImportName remaining
| otherwise = unexpectedNode kind "in external import name"
coerceQuoter :: NodesParser Expr
coerceQuoter syntax = do
((lquote, lquoteRgn), contents, (rquote, rquoteRgn)) <-
runNodesPartialParser syntax $
sequence3
( withRegion $ coerceLexeme (S.Token T.LQuote) "{=tag",
(first concat <$>) . collectQuoted,
withRegion $ coerceLexeme (S.Token T.RQuote) "tag=}"
)
let ltag = drop 2 lquote
let rtag = take (length rquote - 2) rquote
if ltag /= rtag
then
throwError $
QuoterDifferentTags
(WithCtx (Ctx lquoteRgn) lquote)
(WithCtx (Ctx rquoteRgn) rquote)
else return $ AST.Quoter ltag contents
collectQuoted :: NodesPartialParser [String]
collectQuoted [] = return ([], [])
collectQuoted (n@(SyntaxNode kind width _) : remaining)
| kind == S.Token T.Quoted = do
lexeme <- consume width
first (lexeme :) <$> collectQuoted remaining
| kind == S.Token T.RQuote = return ([], n : remaining)
| otherwise = unexpectedNode kind "inside quoter"
-- | Run 2 NodesPartialParsers, using the remaining nodes from each NodesPartialParser for the next
sequence2 :: (NodesPartialParser a, NodesPartialParser b) -> NodesPartialParser (a, b)
sequence2 (fa, fb) syntax = do
(a, syntax') <- fa syntax
(b, syntax'') <- fb syntax'
return ((a, b), syntax'')
-- | Run 3 NodesPartialParsers, using the remaining nodes from each NodesPartialParser for the next
sequence3 :: (NodesPartialParser a, NodesPartialParser b, NodesPartialParser c) -> NodesPartialParser (a, b, c)
sequence3 (fa, fb, fc) syntax = do
(a, syntax') <- fa syntax
(b, syntax'') <- fb syntax'
(c, syntax''') <- fc syntax''
return ((a, b, c), syntax''')
-- Run 4 NodesPartialParsers, using the remaining nodes from each NodesPartialParser for the next
sequence4 :: (NodesPartialParser a, NodesPartialParser b, NodesPartialParser c, NodesPartialParser d) -> NodesPartialParser (a, b, c, d)
sequence4 (fa, fb, fc, fd) syntax = do
(a, syntax') <- fa syntax
(b, syntax'') <- fb syntax'
(c, syntax''') <- fc syntax''
(d, syntax'''') <- fd syntax'''
return ((a, b, c, d), syntax'''')
-- | Run an NodesPartialParser and advance past all remaining nodes. Essentially,
-- this converts a 'NodesPartialParser' into a 'NodesParser'.
--
-- This function should only be used when, assuming we start with a valid CST,
-- there are only trivia nodes remaining.
runNodesPartialParser :: [SyntaxNode] -> NodesPartialParser a -> ParserM a
runNodesPartialParser syntax fa = do
(a, syntax') <- fa syntax
mapM_ (advance . S.snodeWidth) syntax'
return a
-- | Run an NodesPartialParser and track the region surrounding it
withRegion :: NodesPartialParser a -> NodesPartialParser (a, SourceRegion)
withRegion fa syntax = do
start <- gets pstateStartPos
(a, syntax') <- fa syntax
end <- gets pstateEndPos
return ((a, SourceRegion start end), syntax')
coerceLexeme :: SyntaxKind -> String -> NodesPartialParser String
coerceLexeme _ description [] = throwMissingSyntax description
coerceLexeme wantedKind description (SyntaxNode kind width _ : remaining)
| kind == wantedKind = do
lexeme <- consume width
return (lexeme, remaining)
| S.syntaxKindIsTrivia kind = advance width >> coerceLexeme wantedKind description remaining
| otherwise = unexpectedNode kind $ "instead of " ++ description
-- | Alias for 'pstatePos'
pstateStartPos :: ParseState -> SourcePosition
pstateStartPos = pstatePos
-- | Get the position of the previous character. This can be used for getting
-- the (inclusive) end position for a region.
pstateEndPos :: ParseState -> SourcePosition
pstateEndPos state = case pstatePos state of
-- Move back a character, moving up a line if necessary
SourcePosition 1 1 -> SourcePosition 1 1
SourcePosition l 1 -> SourcePosition (l - 1) (pstateLastLineLength state)
SourcePosition l c -> SourcePosition l (c - 1)

View File

@ -0,0 +1,69 @@
{-# LANGUAGE LambdaCase #-}
module Wasp.Analyzer.Parser.AbstractParser.Monad
( ParseState (..),
pstatePos,
ParserM,
runParserM,
consume,
advance,
unexpectedNode,
throwMissingSyntax,
)
where
import Control.Monad.Except (Except, runExcept, throwError)
import Control.Monad.State.Strict (StateT, evalStateT, gets, modify)
import Wasp.Analyzer.Parser.ConcreteParser.CST (SyntaxKind)
import Wasp.Analyzer.Parser.ParseError
import Wasp.Analyzer.Parser.SourcePosition (SourcePosition (SourcePosition))
data ParseState = ParseState
{ pstateLine :: !Int,
pstateColumn :: !Int,
pstateLastLineLength :: !Int,
pstateRemainingSource :: String
}
pstatePos :: ParseState -> SourcePosition
pstatePos s = SourcePosition (pstateLine s) (pstateColumn s)
type ParserM a = StateT ParseState (Except ParseError) a
runParserM :: String -> ParserM a -> Either ParseError a
runParserM source parser = runExcept $ evalStateT parser initialState
where
initialState =
ParseState
{ pstateLine = 1,
pstateColumn = 1,
pstateLastLineLength = 1,
pstateRemainingSource = source
}
consume :: Int -> ParserM String
consume amount = do
lexeme <- gets (take amount . pstateRemainingSource)
advance amount
return lexeme
advance :: Int -> ParserM ()
advance 0 = return ()
advance amount = do
gets (head . pstateRemainingSource) >>= \case
'\n' -> modify (\s -> s {pstateLine = pstateLine s + 1, pstateColumn = 1, pstateLastLineLength = pstateColumn s})
_ -> modify (\s -> s {pstateColumn = pstateColumn s + 1})
modify (\s -> s {pstateRemainingSource = tail (pstateRemainingSource s)})
advance (amount - 1)
-- | Returns a GHC error. Use this when a node is found in the CST that should
-- not be in that position. This scenario is a bug in the parser, which is why
-- it crashes waspc.
unexpectedNode :: SyntaxKind -> String -> ParserM a
unexpectedNode unexpectedKind locationDescription =
error $ "Unexpected syntax " ++ show unexpectedKind ++ " " ++ locationDescription ++ " created by CST"
throwMissingSyntax :: String -> ParserM a
throwMissingSyntax reason = do
pos <- SourcePosition <$> gets pstateLine <*> gets pstateColumn
throwError $ MissingSyntax pos reason

View File

@ -0,0 +1,106 @@
module Wasp.Analyzer.Parser.ConcreteParser
( -- * Concrete parser
-- | This module contains functions for converting lists of "Token"s into
-- concrete syntax trees. These trees represent any Wasp source file completely,
-- including whitespace and comments, even if the source file is invalid.
parseCST,
parseCSTExpression,
-- * Types
SyntaxNode (..),
SyntaxKind (..),
ParseError (..),
)
where
import Wasp.Analyzer.Parser.ConcreteParser.CST (SyntaxKind (..), SyntaxNode (..))
import Wasp.Analyzer.Parser.ConcreteParser.ParseError
import Wasp.Analyzer.Parser.ConcreteParser.ParserLib
import Wasp.Analyzer.Parser.Token (Token)
import qualified Wasp.Analyzer.Parser.Token as T
-- | Parse a list of tokens representing a Wasp program into a concrete syntax
-- tree.
--
-- See "SyntaxNode" for a description of what a concrete syntax tree contains.
parseCST :: [Token] -> ([ParseError], [SyntaxNode])
parseCST tokens = parse tokens (root <> eof)
-- | Parse a list of tokens into a concrete syntax tree for a wasp expression.
-- This is mainly used for testing.
--
-- See "SyntaxNode" for a description of what a concrete syntax tree contains.
parseCSTExpression :: [Token] -> ([ParseError], [SyntaxNode])
parseCSTExpression tokens = parse tokens (expr <> eof)
root :: GrammarRule
root = Program <$$> stmts
stmts :: GrammarRule
stmts = eof <|> (stmt <> stmts)
stmt :: GrammarRule
stmt =
Decl
<$$> (T.Identifier `as` DeclType)
<> (T.Identifier `as` DeclName)
<> expr
{- ORMOLU_DISABLE -}
expr :: GrammarRule
expr =
Dict <$$> listLike lcurly dictEntry comma rcurly
<|> List <$$> listLike lsquare expr comma rsquare
-- Note that we don't check number of tuple element here: this is left to
-- the next phase of parsing.
<|> Tuple <$$> listLike lparen expr comma rparen
<|> quoter
<|> int
<|> double
<|> string
<|> (T.Identifier `as` Var)
<|> kwTrue
<|> kwFalse
<|> extImport
{- ORMOLU_ENABLE -}
dictEntry :: GrammarRule
dictEntry = DictEntry <$$> (T.Identifier `as` DictKey) <> colon <> expr
quoter :: GrammarRule
quoter = Quoter <$$> lquote <> quoterTail
where
quoterTail = rquote <|> (quoted <> quoterTail)
extImport :: GrammarRule
extImport =
ExtImport
<$$> kwImport <> extImportName <> kwFrom <> (T.String `as` ExtImportPath)
where
extImportName :: GrammarRule
extImportName =
lcurly <> (T.Identifier `as` ExtImportField) <> rcurly
<|> (T.Identifier `as` ExtImportModule)
-- | @listLike open value sep close@ parses list like structures in the form:
--
-- @open (value sep)* (value sep?) close@
--
-- In other words, a list with an optional trailing separator.
listLike ::
-- | start grammar rule
GrammarRule ->
-- | value grammar rule
GrammarRule ->
-- | separator grammar rule
GrammarRule ->
-- | end grammar rule
GrammarRule ->
GrammarRule
listLike open value sep close =
open <> listLikeTail
where
listLikeTail :: GrammarRule
listLikeTail =
close <|> (value <> ((sep <> listLikeTail) <|> close))

View File

@ -0,0 +1,103 @@
{-# LANGUAGE DeriveGeneric #-}
module Wasp.Analyzer.Parser.ConcreteParser.CST
( -- * Concrete Syntax
-- | This data structure for storing syntax information is inspired by the
-- rust analyzer's parsing library rowan (https://github.com/rust-analyzer/rowan).
SyntaxNode (..),
SyntaxKind (..),
cstPrettyPrint,
syntaxKindIsTrivia,
syntaxKindIsExpr,
)
where
import Control.DeepSeq (NFData)
import Data.Aeson (ToJSON)
import Data.List (foldl', intercalate)
import GHC.Generics (Generic)
import Wasp.Analyzer.Parser.Token (TokenKind, tokenKindIsTrivia)
-- | The kind of a "SyntaxNode".
data SyntaxKind
= Token !TokenKind
| Program
| Decl
| DeclType
| DeclName
| Dict
| DictEntry
| DictKey
| List
| Tuple
| ExtImport
| ExtImportModule
| ExtImportField
| ExtImportPath
| String
| Int
| Double
| BoolTrue
| BoolFalse
| Var
| Quoter
| Error
deriving (Eq, Ord, Show, Generic)
instance NFData SyntaxKind
instance ToJSON SyntaxKind
-- | A node in a concrete syntax tree. We use such a loosely typed system so
-- that we can store all pieces of source syntax in the tree, including
-- comments, whitespace, and unexpected tokens/characters.
--
-- Later, this CST is processed into an AST, a more strongly typed format that
-- leaves out all of the trivia tokens.
data SyntaxNode = SyntaxNode
{ snodeKind :: !SyntaxKind,
-- | Width of this "SyntaxNode". For nodes with children, this is the sum
-- of their children's widths. For nodes without children, this is the
-- width of the source text covered by the node.
snodeWidth :: !Int,
snodeChildren :: [SyntaxNode]
}
deriving (Eq, Ord, Show, Generic)
instance NFData SyntaxNode
instance ToJSON SyntaxNode
-- | Pretty print a concrete syntax tree. Shows tree of syntax kinds and their
-- offsets in the source file.
cstPrettyPrint :: SyntaxNode -> String
cstPrettyPrint node = go 0 "" node
where
go :: Int -> String -> SyntaxNode -> String
go offset indent pnode =
let nodeTxt = indent ++ show (snodeKind pnode) ++ "@" ++ show offset ++ ".." ++ show (offset + snodeWidth pnode)
childrenTxt =
fst $
foldl'
(\(strs, o) child -> (strs ++ [go o (indent ++ " ") child], o + snodeWidth child))
([], offset)
(snodeChildren pnode)
in intercalate "\n" (nodeTxt : childrenTxt)
syntaxKindIsTrivia :: SyntaxKind -> Bool
syntaxKindIsTrivia (Token k) = tokenKindIsTrivia k
syntaxKindIsTrivia _ = False
syntaxKindIsExpr :: SyntaxKind -> Bool
syntaxKindIsExpr Dict = True
syntaxKindIsExpr List = True
syntaxKindIsExpr Tuple = True
syntaxKindIsExpr ExtImport = True
syntaxKindIsExpr String = True
syntaxKindIsExpr Int = True
syntaxKindIsExpr Double = True
syntaxKindIsExpr BoolTrue = True
syntaxKindIsExpr BoolFalse = True
syntaxKindIsExpr Var = True
syntaxKindIsExpr _ = False

View File

@ -0,0 +1,30 @@
{-# LANGUAGE DeriveGeneric #-}
module Wasp.Analyzer.Parser.ConcreteParser.ParseError
( -- * Parse error
ParseError (..),
-- * Source positions
Region (..),
)
where
import Control.DeepSeq (NFData)
import GHC.Generics (Generic)
import Wasp.Analyzer.Parser.Token (TokenKind)
import Wasp.Analyzer.Parser.TokenSet (TokenSet)
data ParseError
= UnexpectedToken !Region !TokenKind TokenSet
| UnexpectedEOF !Int TokenSet
deriving (Eq, Ord, Show, Generic)
instance NFData ParseError
-- | @Region start end@ where @start@ is the offset of the first character in
-- the region and @end@ is the offset of the first character after the region.
-- In other words, its the region of characters with offsets from @start@ to
-- @end@, including @start@ but not including @end@.
data Region = Region !Int !Int deriving (Eq, Ord, Show, Generic)
instance NFData Region

View File

@ -0,0 +1,774 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
module Wasp.Analyzer.Parser.ConcreteParser.ParserLib
( -- * Internal parsing library
GrammarRule,
parse,
-- * Combinators
-- | The fixity of the operators ('(<>)', '`as`', '(<$$>)', and '(<|>)') was
-- chosen to make using operators require as little parentheses as possible:
--
-- >>> Program <$$> lparen <> rparen
-- <|> string
-- Alternative (Group Program (Chain (Token LParen) (Token RParen))) (Token String)
as,
(<$$>),
perhaps,
(<|>),
-- * Primitive grammar rules
-- | These grammar rules (besides `eof`) consume a single token of the kind
-- matching their name. The syntax node they produce has an "S.Token" kind,
-- unless otherwise specified.
lparen,
rparen,
lsquare,
rsquare,
lcurly,
rcurly,
comma,
colon,
kwImport,
kwFrom,
kwTrue,
kwFalse,
string,
int,
double,
lquote,
rquote,
quoted,
-- | A primitive for "T.Identifier" is not included: this should always be
-- done with an `as`, i.e. @"T.Identifier" ``as`` "S.DictKey"@
eof,
-- * Error Recovery
-- | This parser has built-in automatic error recovery. Currently, error
-- recovery occurs when an error is produced inside of a group rule. Recovery
-- is based on the possible tokens that can follow the current rule.
--
-- First, we define the follow set of a grammar rule @R@ in the grammar
-- @R <> S@ to be the set of token kinds that @S@ can accept as its first
-- token. For example, @"let"@ in @"let" <> ("=" <|> "+=")@ has the follow
-- set @["=", "+="]@.
--
-- We can then describe the basic mechanism as follows:
--
-- For a "GrammarRule" @label '<$$>' first '<>' next@, if an error occurs
-- while parsing @first@, check if the token it errored on is in the follow
-- set for the group.
--
-- If yes, then there is a following rule that can accept this token. So
-- just continue parsing. Otherwise, no following rule has a use for the
-- token, so discard it.
--
-- In either case, we log an error to report at the end of parsing.
--
-- As an example, consider the grammar rules:
--
-- @
-- stmts = eof <|> (stmt <> stmts)
-- stmt = Stmt <$$> "let" <> identifier <> "=" <> expr <> ";"
-- @
--
-- With the example source text:
--
-- @
-- let x = 2
-- let y = 4;
-- @
--
-- A parse error will occur at the beginning of line 2, when @let@ is found
-- instead of the expected @;@.
--
-- First, we find the follow set of the @Stmt@ group. From the definition of
-- @stmts@, we can see @stmt@ is followed by @stmts@. @stmts@ can start with
-- either @eof@ or @stmt@ (which in turn can start with @"let"@. Hence, the
-- follow set is @[eof, "let"]@.
--
-- Since the "let" token the parser erroed on is in this set, it does not
-- get discarded by the group. This leaves it available for the call to
-- @stmt@, which can parse successfully because of this, resulting in the
-- following parse tree and error:
--
-- @
-- [ Stmt ["let", "x", "=", "2", error]
-- , Stmt ["let", "y", "=", "4", ";"]
-- ]
--
-- [Unexpected "let" at line 2 column 1: expected ";"]
-- @
--
-- But if we look at a more complex situation, with nested groups, we need
-- to introduce another concept to produce correct parses. For example:
--
-- @
-- list = List <$$> '[' <> expr <> ']'
-- tuple = Tuple <$$> '(' <> expr <> ')'
-- expr = list <|> tuple <|> number
-- @
--
-- with source:
--
-- @
-- [(2 ]
-- @
--
-- If we were to apply the same logic as before, we would find the follow
-- set of the @2@ to be @[")"]@. Since @]@ is not in this set, we discard
-- the right square bracket. But what we want to do is leave it so that
-- the @list@ rule can use it as its closing bracket.
--
-- To handle this scenario, we introduce an unwinding behavior to travel up
-- through group rules until an appropriate point is found to recover. We
-- do this first by defining the extended follow set of a rule @R@ as
-- the union of the follow sets of @R@ and its ancestor rules.
--
-- For example, we find the follow set and extended follow set of the @2@ in
-- the example code. To start, we write out the tree for grammar rules that
-- are used:
--
-- @
-- List <$$> '[' <> (Tuple <$$> '(' <> number <> ')') <> ']'
-- @
--
-- Now, we see the follow set of the @2@ is just @[')']@. Then, the follow
-- set of its ancestors (@Tuple<$$>@ and @List<$$>@) are @[']']@ and @[]@,
-- respectively. So, the extended follow set is @[')', ']']@.
--
-- Unwinding starts when:
--
-- (1) The next token __is not__ in the follow set of the current rule
-- (2) The next token __is__ in the extended follow set of the current rule
--
-- While unwinding, the parser will stop at each group ancestor of the
-- original rule to determine whether to continue unwinding. Unwinding stops
-- when a group rule @S@ is reached such that the follow set of @S@ includes
-- the next token. At this point, the same thing happens as in the first
-- description of error recovery.
--
-- The example rules and source would produce the following parse tree and
-- errors:
--
-- @
-- [List ["[", Tuple ["(", "2"], "]"]]
--
-- [Unexpected "]" at line 1 column 9: expected expression]
-- @
--
-- ** Error Recovery Rules
--
-- A description of the full error recovery process:
--
-- 1. While parsing @first@ in @label '<$$>' first '<>' next@, an
-- unexpected token is encountered.
-- 2. The follow set and extended follow set for the group node are
-- calculated.
-- 3. If the unexpected token is in the follow set, continue parsing normally.
-- 4. If the unexpected token is not in the follow set, but is in the extended
-- follow set, begin unwinding:
-- a. Stop at each group rule ancestor @S@ and compute the follow set of
-- @S@.
-- b. If the follow set of @S@ contains the unexpected token, stop
-- unwinding and resume parsing.
-- c. Otherwise, keep unwinding.
-- 5. If the unexpected token is not in the extended follow set, discard the
-- token and continue parsing.
-- * Future improvements
-- |
-- __TODO #1:__
--
-- Move error recovery from the group rule to the '<>' rules.
--
-- __TODO #2:__
--
-- The current error recovery and the upgraded version described in TODO #1
-- can do what feels like the wrong thing in some cases. For example,
-- consider the following invalid wasp code:
--
-- @
-- route TestRoute { path: "/", to:
--
-- page TestPage { fn: import TestPage from "@ext/TestPage.js" }
-- @
--
-- To a human, this should parse as two declaration statements. But the
-- current system identifies @page@ as a variable for the dictionary entry
-- @to:@. Then it treats @TestPage@ as the key for another entry, and
-- complains that it found @{@ instead of @:@.
--
-- This is due to the unwinding behavior on errors being very short-sighted:
-- it only cares whether the next non-white token will succeed.
--
-- A possible fix to this is to explore all possible (or many possible)
-- unwinding stopping points and figuring out which one results in the
-- fewest future parse errors.
--
-- In case this has performance issues with having to explore lots of possible
-- parse trees, we could limit how many branches we check, similar to optimizing
-- DFS for state tree searches (like a chess solver).
--
-- __TODO #3:__
--
-- Change wasp language a bit to make the start of a declaration unambiguous.
-- This would make it much easier for the parser to recover from an unfinished
-- declaration.
--
-- Ideas for this:
--
-- (1) Make all type names reserved keywords in the grammar
-- (2) Distinguish between type names and variable names based on the case
-- of the first letter (lowercase = type, uppercase = variable)
--
-- __TODO #4:__
--
-- When the next token is not in the follow set of the rule, try discarding
-- multiple tokens, stopping when we do find a token in the follow set.
)
where
import Control.Monad (unless)
import Control.Monad.Except (ExceptT, MonadError (catchError, throwError), runExceptT)
import Control.Monad.State.Strict (State, gets, modify, runState)
import Data.Maybe (fromMaybe)
import Wasp.Analyzer.Parser.ConcreteParser.CST (SyntaxKind, SyntaxNode (SyntaxNode, snodeChildren, snodeKind, snodeWidth))
import qualified Wasp.Analyzer.Parser.ConcreteParser.CST as S
import Wasp.Analyzer.Parser.ConcreteParser.ParseError
import Wasp.Analyzer.Parser.Token (Token (tokenKind, tokenWidth), TokenKind, tokenKindIsTrivia)
import qualified Wasp.Analyzer.Parser.Token as T
import Wasp.Analyzer.Parser.TokenSet (TokenSet)
import qualified Wasp.Analyzer.Parser.TokenSet as TokenSet
-- | The type of a grammar rule. Use combinators and primitives to build more
-- complex grammars. Run with `parse`, which supports reasonable automatic error
-- recovery.
--
-- Every rule begins parsing by consuming trivia tokens at the beginning of the
-- input and creating a syntax node for each trivia token.
data GrammarRule
= -- | Always succeeds, consuming only trivia tokens.
Succeed
| -- | Checks if the first non-trivia token is the expected token
-- kind. If it is, it consumes the token and outputs a syntax node. Otherwise,
-- it does not consume the token and throws a parse error exception.
--
-- If a "SyntaxKind" is specified, use it to label the syntax node created.
Token TokenKind (Maybe SyntaxKind)
| -- | Succeeds if the input is empty after consuming trivia tokens at the
-- beginning of the input.
--
-- If it fails, it will report an error at the first non-trivia token while
-- also consuming all remaining input.
EOF
| -- | Run the inner grammar rule and wraps its resulting nodes in a node
-- labeled by the given syntax kind. If it fails, apply the error recovery
-- strategy (this is currently the only place where error recovery is
-- performed).
--
-- This is the only rule that adds structure and depth into the CST.
Group SyntaxKind GrammarRule
| -- | Run the first grammar rule. If it succeeds, run the second rule.
Chain GrammarRule GrammarRule
| -- | Run the grammar rule that can accept the next token in the input as its
-- first token.
--
-- Invariant: There can be no overlap in which token kinds each parser can
-- accept. The parser will use a Haskell 'error' in this case.
--
-- TODO: Remove the invariant and use backtracking to try each rule
Alternative GrammarRule GrammarRule
deriving (Eq, Ord, Show)
-- | @rule1 '<>' rule2@ is a parser that runs first @rule1@, then @rule2@.
-- Equivalent to @Chain rule1 rule2@.
instance Semigroup GrammarRule where
-- "Succeed" is identity of <>. The second pattern does affect the result tree,
-- but only by changing where trivia tokens are placed in the tree, which is
-- not an issue.
Succeed <> rule = rule
rule <> Succeed = rule
first <> second = Chain first second
-- | @mempty@ is a parser that matches an empty string, or a string of only
-- trivia tokens.
instance Monoid GrammarRule where
mempty = Succeed
-- | Run a "GrammarRule" on an input of tokens. Returns a tuple @(errors, nodes)@.
parse :: [Token] -> GrammarRule -> ([ParseError], [SyntaxNode])
parse tokens grammar =
let initialState =
ParseState
{ pstateInput = tokens,
pstateCurrentLevelNodes = [],
pstateErrors = [],
pstateNextTokenOffset = 0,
pstatePrevTokenOffset = 0,
pstateAcceptableNextTokens = []
}
in case runState (runExceptT (parseRule grammar)) initialState of
(Left err, state) -> case err of
Unwind -> error "Unwind at top-level GrammarRule (impossible, this is a bug in Wasp.Analyzer.Parser.ConcreteParser.Internal)"
ParseError perr ->
let errs = pstateErrors state ++ [perr]
newNode = makeErrorNode perr False
nodes = pstateCurrentLevelNodes state ++ [newNode]
in (errs, nodes)
(_, state) -> (pstateErrors state, pstateCurrentLevelNodes state)
data ParseState = ParseState
{ -- | Remaining tokens to be parsed.
pstateInput :: [Token],
-- | "SyntaxNode"s processed so far at the current level of the CST. An
-- initial top-level is created when the parser is started and each "Group"
-- rule creates a new level.
pstateCurrentLevelNodes :: [SyntaxNode],
-- | Errors collected that will be reported.
pstateErrors :: [ParseError],
-- | 0-based offset from the beginning of the source file, in characters,
-- of the start of the next token (@head . `pstateInput`@).
pstateNextTokenOffset :: !Int,
-- | 0-based offset from the beginning of the source file, in characters,
-- of the start of the last consumed token
pstatePrevTokenOffset :: !Int,
-- | Stack of acceptable next tokens, where entries to the stack are added
-- when we enter a group rule for parsing.
--
-- The first token set in this list is the current follow set and the union
-- of all of the token sets is the extended follow set.
pstateAcceptableNextTokens :: [TokenSet]
}
deriving (Eq, Show, Ord)
data ParseException
= -- | @ParseError parseError@ is used as a signal when an unexpected token is
-- encountered. This kind of exception is caught by error recovery points
-- and added to the output list of errors.
--
-- This may be converted into "Unwind" if error recovery determines unwinding
-- should occur.
ParseError ParseError
| -- | @Unwind tokenKind@ is used as a signal during error recovery. It is used
-- to implement the unwinding behavior described at the top of the module.
--
-- Essentially, it means "I don't know how to parse this, but I know someone
-- above me can. Can you? If not, pass it along."
Unwind
type ParserM a = ExceptT ParseException (State ParseState) a
parseRule :: GrammarRule -> ParserM ()
parseRule Succeed = parseSucceed
parseRule (Token expectedKind maybeLabel) = parseToken expectedKind maybeLabel
parseRule EOF = parseEOF
parseRule (Group label inner) = parseGroup label inner
parseRule (Chain first second) = parseChain first second
parseRule (Alternative left right) = parseAlternative left right
parseSucceed :: ParserM ()
parseSucceed = consumeTrivia
parseToken :: TokenKind -> Maybe SyntaxKind -> ParserM ()
parseToken expectedKind maybeSyntaxKind =
(consumeTrivia >> peek) >>= \case
Just nextToken | tokenKind nextToken == expectedKind -> do
advance
let newNodeKind = fromMaybe (S.Token expectedKind) maybeSyntaxKind
let newNode =
SyntaxNode
{ snodeKind = newNodeKind,
snodeWidth = tokenWidth nextToken,
snodeChildren = []
}
pushNode newNode
nextToken -> throwParseError nextToken (TokenSet.fromKind expectedKind)
parseEOF :: ParserM ()
parseEOF =
(consumeTrivia >> peek) >>= \case
Just nextToken -> do
nextTokenOffset <- gets pstateNextTokenOffset
advance
let nextTokenKind = tokenKind nextToken
-- TODO: consider reporting all token kinds here? not sure
_tokenKinds <- collectUntilEOF
throwError $
ParseError $
UnexpectedToken
(Region nextTokenOffset (nextTokenOffset + tokenWidth nextToken))
nextTokenKind
TokenSet.fromEOF
where
collectUntilEOF :: ParserM [TokenKind]
collectUntilEOF =
peek >>= \case
Just next -> advance >> ((tokenKind next :) <$> collectUntilEOF)
Nothing -> pure []
Nothing -> do
return ()
parseGroup :: SyntaxKind -> GrammarRule -> ParserM ()
parseGroup label inner = do
consumeTrivia
currentLevelNodes <- gets pstateCurrentLevelNodes
modify (\s -> s {pstateCurrentLevelNodes = []})
maybeInnerParserError <- tryError $ parseRule inner
childNodes <- gets pstateCurrentLevelNodes
modify (\s -> s {pstateCurrentLevelNodes = currentLevelNodes})
case maybeInnerParserError of
Nothing -> pushGroupNode childNodes
Just (ParseError parseError) -> handleInnerParseError childNodes parseError
Just Unwind -> handleInnerUnwind childNodes
where
handleInnerParseError :: [SyntaxNode] -> ParseError -> ParserM ()
handleInnerParseError childNodes parseError = do
-- ASSUMPTION: The token errored on has not been consumed
--
-- (1) Check whether the token kind we errored on can be accepted by any
-- following rule.
--
-- (2) Report the error and create the group node containing the error and
-- what it parsed before the error.
--
-- (3) Handle the error:
-- (3a) If it can not be accepted by a following rule, discard the error
-- token.
-- (3b) If unwinding is required to accept the token, begin unwinding.
-- (3c) Otherwise, the error can be handled without unwinding, so do
-- nothing and continue parsing.
-- (1)
let errorTokenKind = case parseError of
UnexpectedEOF _ _ -> Nothing
UnexpectedToken _ k _ -> Just k
errorCanBeHandled <- canAnyFollowingRuleAcceptTokenKind errorTokenKind
errorCanBeHandledWithoutUnwind <- canImmediateFollowingRuleAcceptTokenKind errorTokenKind
-- (2)
logError parseError
let nodeForErr = makeErrorNode parseError errorCanBeHandled
pushGroupNode $ childNodes ++ [nodeForErr]
-- (3)
if not errorCanBeHandled
then -- (3a)
advance
else -- (3b)
unless errorCanBeHandledWithoutUnwind $ throwError Unwind
handleInnerUnwind :: [SyntaxNode] -> ParserM ()
handleInnerUnwind childNodes = do
errorTokenKind <- fmap tokenKind <$> peek
-- When unwinding, check if this is the level where we can parse the error
-- token. If it is, then stop unwinding. Otherwise, we continue unwinding.
errorCanBeHandledWithoutUnwind <- canAnyFollowingRuleAcceptTokenKind errorTokenKind
pushGroupNode childNodes
unless errorCanBeHandledWithoutUnwind $ throwError Unwind
pushGroupNode :: [SyntaxNode] -> ParserM ()
pushGroupNode children =
let width = sum $ map snodeWidth children
in pushNode $
SyntaxNode
{ snodeKind = label,
snodeWidth = width,
snodeChildren = children
}
parseChain :: GrammarRule -> GrammarRule -> ParserM ()
parseChain first second = do
consumeTrivia
-- First, figure out following set for @second@
pushFollowingTokens $ getValidFirstTokens second
maybeError <- tryError (parseRule first)
-- Make sure to remove following set now that we're leaving this "Chain"
popFollowingTokens
-- Run second (or throw error if first failed)
case maybeError of
Nothing -> parseRule second
Just err -> throwError err
parseAlternative :: GrammarRule -> GrammarRule -> ParserM ()
parseAlternative left right
| leftFirstTokens `TokenSet.intersection` rightFirstTokens /= TokenSet.empty =
error $
unlines
[ "[WARNING] Alternative grammar rule has two rules that can accept the same first token."
++ "This will result in the second rule never running on these tokens.",
"\tLeft Tokens:",
"\t" ++ show leftFirstTokens,
"\n\tRight Tokens:",
"\t" ++ show rightFirstTokens
]
| otherwise = do
consumeTrivia
nextToken <- peek
let nextKind = tokenKind <$> nextToken
if nextKind `willSucceedIn` left
then parseRule left
else
if nextKind `willSucceedIn` right
then parseRule right
else throwParseError nextToken (leftFirstTokens `TokenSet.union` rightFirstTokens)
where
leftFirstTokens = getValidFirstTokens left
rightFirstTokens = getValidFirstTokens right
-- | Advance past trivia tokens, making syntax nodes for each token
consumeTrivia :: ParserM ()
consumeTrivia =
peek >>= \case
Nothing -> pure ()
Just nextToken
-- Consume any whitespace
| tokenKindIsTrivia (tokenKind nextToken) -> do
advance
let newNode =
SyntaxNode
{ snodeKind = S.Token (tokenKind nextToken),
snodeWidth = tokenWidth nextToken,
snodeChildren = []
}
pushNode newNode
consumeTrivia
| otherwise -> pure ()
-- | Peek the immediate next token in input (including trivia)
peek :: ParserM (Maybe Token)
peek =
gets pstateInput >>= \case
[] -> return Nothing
tok : _ -> return (Just tok)
-- | Advance past the next token in the input, updating offsets.
advance :: ParserM ()
advance =
gets pstateInput >>= \case
[] -> modify (\s -> s {pstatePrevTokenOffset = pstateNextTokenOffset s})
(consumed : remaining) -> do
newLastOffset <- gets pstateNextTokenOffset
let newNextOffset = newLastOffset + tokenWidth consumed
modify (\s -> s {pstateInput = remaining, pstatePrevTokenOffset = newLastOffset, pstateNextTokenOffset = newNextOffset})
-- | @throwParseError actual expected@ throws an error.
--
-- Assumes @actual@ has not been `advance`d past yet (and does not advance past
-- it).
throwParseError :: Maybe Token -> TokenSet -> ParserM ()
throwParseError Nothing eset = do
offset <- gets pstateNextTokenOffset
throwError $ ParseError $ UnexpectedEOF offset eset
throwParseError (Just actual) expectedTokens = do
offset <- gets pstateNextTokenOffset
throwError $ ParseError $ UnexpectedToken (Region offset (offset + tokenWidth actual)) (tokenKind actual) expectedTokens
-- | Make an error node. Error node should only be produced when catching and
-- pushing errors to the state.
makeErrorNode ::
-- | Parse error that will be used to calculate the width of the error node.
ParseError ->
-- | If "True", give the error node a width of 0. Otherwise, use the
-- width of the error item.
Bool ->
SyntaxNode
makeErrorNode (UnexpectedEOF _ _) _ =
SyntaxNode
{ snodeKind = S.Error,
snodeWidth = 0,
snodeChildren = []
}
makeErrorNode (UnexpectedToken (Region so eo) _ _) zeroWidth =
SyntaxNode
{ snodeKind = S.Error,
snodeWidth = if zeroWidth then 0 else eo - so,
snodeChildren = []
}
-- | Add a node to the output.
pushNode :: SyntaxNode -> ParserM ()
pushNode node = modify (\s -> s {pstateCurrentLevelNodes = pstateCurrentLevelNodes s ++ [node]})
-- | Add an error to the output (does not throw the error).
logError :: ParseError -> ParserM ()
logError err = modify (\s -> s {pstateErrors = pstateErrors s ++ [err]})
-- | Add an "TokenSet" to the following stack.
pushFollowingTokens :: TokenSet -> ParserM ()
pushFollowingTokens eset = modify (\s -> s {pstateAcceptableNextTokens = eset : pstateAcceptableNextTokens s})
-- | Remove the last added "TokenSet" from the following stack.
popFollowingTokens :: ParserM ()
popFollowingTokens = modify (\s -> s {pstateAcceptableNextTokens = drop 1 $ pstateAcceptableNextTokens s})
-- | Check if a "TokenKind" (or end of file) is one of the first possible tokens
-- in a "GrammarRule".
willSucceedIn :: Maybe TokenKind -> GrammarRule -> Bool
_ `willSucceedIn` Succeed = True
k `willSucceedIn` grammarRule = k `TokenSet.member` getValidFirstTokens grammarRule
-- | Find the "TokenSet" of "TokenKind"s that the "GrammarRule" would succeed on
-- as its first input.
getValidFirstTokens :: GrammarRule -> TokenSet
getValidFirstTokens grammarRule = fst $ go grammarRule TokenSet.empty
where
-- Returns (firstTokens, can succeed with no input?)
go :: GrammarRule -> TokenSet -> (TokenSet, Bool)
go Succeed eset = (eset, True)
go (Token tkind _) eset = (tkind `TokenSet.insertKind` eset, False)
go EOF eset = (TokenSet.insertEof eset, False)
go (Group _ p) eset = go p eset
go (Chain first second) eset =
let (eset', firstCanBeEmpty) = go first eset
in if firstCanBeEmpty
then go second eset'
else (eset', False)
go (Alternative left right) eset =
let (eset', leftCanBeEmpty) = go left eset
(eset'', rightCanBeEmpty) = go right eset'
in (eset'', leftCanBeEmpty || rightCanBeEmpty)
-- | Check if any following rule can accept the given "TokenKind". If this is
-- true, then unwinding at the current location will succeed in recovering from
-- an error.
canAnyFollowingRuleAcceptTokenKind :: Maybe TokenKind -> ParserM Bool
canAnyFollowingRuleAcceptTokenKind nextTokenKind = do
followingTokens <- gets pstateAcceptableNextTokens
return $ any (TokenSet.member nextTokenKind) followingTokens
-- | Check if the next rule can accept the given "TokenKind". If this is true,
-- then an error can be recovered from at the current context with no unwinding.
--
-- Pre-condition: @pstateAcceptableNextTokens@ is not empty.
canImmediateFollowingRuleAcceptTokenKind :: Maybe TokenKind -> ParserM Bool
canImmediateFollowingRuleAcceptTokenKind nextTokenKind = do
followingTokens <- gets pstateAcceptableNextTokens
return $ case followingTokens of
[] -> error "canImmediateFollowingRuleAcceptTokenKind called with empty pstateAcceptableNextTokens. This is a parser library error."
(nextTokens : _) -> nextTokenKind `TokenSet.member` nextTokens
infix 9 `as`
-- | Matches a single token of the specified "TokenKind" and produce an output
-- node with the given "SyntaxKind".
--
-- __Example:__
--
-- >>> parse [T.Token T.Identifier 3] $ T.Identifier `as` S.DeclName
-- ([],[SyntaxNode {snodeKind = DeclName, snodeWidth = 3, snodeChildren = []}])
as :: TokenKind -> SyntaxKind -> GrammarRule
tkind `as` skind = Token tkind (Just skind)
infixr 5 <$$>
-- | @syntaxKind '<$$>' rule@ (pronounced group) runs a grammar rule and places the
-- nodes that grammar rule adds to the
-- output in a parent node.
--
-- __Example:__
--
-- >>> parse [T.Token T.Identifier 3, T.Token T.Int 2] $ S.Decl <$$> identifier <> int
-- ([],[SyntaxNode {snodeKind = Decl, snodeWidth = 5, snodeChildren = [SyntaxNode {snodeKind = Token Identifier, snodeWidth = 3, snodeChildren = []},SyntaxNode {snodeKind = Token Int, snodeWidth = 2, snodeChildren = []}]}])
(<$$>) :: SyntaxKind -> GrammarRule -> GrammarRule
label <$$> grammarRule = Group label grammarRule
-- | Run a grammar rule or consume no input.
--
-- __Example:__
--
-- >>> parse [T.Token T.Int 1, T.Token T.Int 1] $ int <> perhaps comma <> int
-- ([],[SyntaxNode {snodeKind = Token Int, snodeWidth = 1, snodeChildren = []},SyntaxNode {snodeKind = Token Int, snodeWidth = 1, snodeChildren = []}])
perhaps :: GrammarRule -> GrammarRule
perhaps grammarRule = grammarRule <|> Succeed
infixr 4 <|>
-- | Run a grammar, choosing either the left or the right rule.
--
-- __Example:__
--
-- >>> let number = int <|> double
-- >>> parse [T.Token T.Int 2] number
-- ([],[SyntaxNode (Token T.Int) 2 []])
-- >>> parse [T.Token T.Double 3] number
-- ([],[SyntaxNode (Token T.Double) 2 []])
(<|>) :: GrammarRule -> GrammarRule -> GrammarRule
left <|> right = Alternative left right
-- Primitive grammar rules
lparen :: GrammarRule
lparen = Token T.LParen Nothing
rparen :: GrammarRule
rparen = Token T.RParen Nothing
lsquare :: GrammarRule
lsquare = Token T.LSquare Nothing
rsquare :: GrammarRule
rsquare = Token T.RSquare Nothing
lcurly :: GrammarRule
lcurly = Token T.LCurly Nothing
rcurly :: GrammarRule
rcurly = Token T.RCurly Nothing
comma :: GrammarRule
comma = Token T.Comma Nothing
colon :: GrammarRule
colon = Token T.Colon Nothing
kwImport :: GrammarRule
kwImport = Token T.KwImport Nothing
kwFrom :: GrammarRule
kwFrom = Token T.KwFrom Nothing
-- | Produces a "S.BoolTrue" syntax node
kwTrue :: GrammarRule
kwTrue = T.KwTrue `as` S.BoolTrue
-- | Produces a "S.BoolFalse" syntax node
kwFalse :: GrammarRule
kwFalse = T.KwFalse `as` S.BoolFalse
-- | Produces a "S.String" syntax node
string :: GrammarRule
string = T.String `as` S.String
-- | Produces a "S.Int" syntax node
int :: GrammarRule
int = T.Int `as` S.Int
-- | Produces a "S.Double" syntax node
double :: GrammarRule
double = T.Double `as` S.Double
lquote :: GrammarRule
lquote = Token T.LQuote Nothing
rquote :: GrammarRule
rquote = Token T.RQuote Nothing
quoted :: GrammarRule
quoted = Token T.Quoted Nothing
-- | Succeeds when the parser is at the end of the file.
eof :: GrammarRule
eof = EOF
-- UTILITIES:
tryError :: MonadError e m => m () -> m (Maybe e)
tryError m = (m >> return Nothing) `catchError` (return . Just)

View File

@ -0,0 +1,6 @@
module Wasp.Analyzer.Parser.Lexer
( Wasp.Analyzer.Parser.Lexer.Lexer.lex,
)
where
import Wasp.Analyzer.Parser.Lexer.Lexer (lex)

View File

@ -1,111 +0,0 @@
{
-- This file is processed by Alex (https://www.haskell.org/alex/) and generates
-- the module `Analyzer.Parser.Lexer`
{-# LANGUAGE NamedFieldPuns #-}
module Wasp.Analyzer.Parser.Lexer
( lexer
) where
import Wasp.Analyzer.Parser.LexerUtils
import Wasp.Analyzer.Parser.Monad
import Wasp.Analyzer.Parser.Token (Token (..), TokenType (..))
import Wasp.Analyzer.Parser.ParseError (ParseError (..))
import Control.Monad.State.Lazy (gets)
import Control.Monad.Except (throwError)
}
-- Character set aliases
$digit = 0-9
$alpha = [a-zA-Z]
$identstart = [_$alpha]
$ident = [_$alpha$digit]
$any = [.$white]
-- Regular expression aliases
@string = \"([^\\\"]|\\.)*\" -- matches string-literal on a single line, from https://stackoverflow.com/a/9260547/3902376
@double = "-"? $digit+ "." $digit+
@integer = "-"? $digit+
@ident = $identstart $ident* "'"*
@linecomment = "//" [^\n\r]*
@blockcomment = "/*" (("*"[^\/]) | [^\*] | $white)* "*/" -- Based on https://stackoverflow.com/a/16165598/1509394 .
-- Tokenization rules (regex -> token)
tokens :-
-- Skips whitespace and comments
<0> $white+ ;
<0> @linecomment ;
<0> @blockcomment ;
-- Quoter rules:
-- Uses Alex start codes to lex quoted characters with different rules:
-- - On "{=tag", enter <quoter> start code and make a TLQuote token
-- - While in <quoter>, if "tag=}" is seen
-- - If this closing tag matches the opening, enter <0> and make a TRQuote token
-- - Otherwise, stay in <quoter> and make a TQuoted token
-- - Otherwise, take one character at a time and make a TQuoted token
<0> "{=" @ident { beginQuoter }
<quoter> @ident "=}" { lexQuoterEndTag }
<quoter> $any { createValueToken TQuoted }
-- Simple tokens
<0> "(" { createConstToken TLParen }
<0> ")" { createConstToken TRParen }
<0> "[" { createConstToken TLSquare }
<0> "]" { createConstToken TRSquare }
<0> "{" { createConstToken TLCurly }
<0> "}" { createConstToken TRCurly }
<0> "," { createConstToken TComma }
<0> ":" { createConstToken TColon }
<0> "import" { createConstToken TImport }
<0> "from" { createConstToken TFrom }
<0> "true" { createConstToken TTrue }
<0> "false" { createConstToken TFalse }
-- Strings, numbers, identifiers
<0> @string { createValueToken $ \s -> TString $ read s }
<0> @double { createValueToken $ \s -> TDouble $ read s }
<0> @integer { createValueToken $ \s -> TInt $ read s }
<0> @ident { createValueToken $ \s -> TIdentifier s }
{
-- | Lexes a single token from the input.
--
-- This function is designed for use with the Happy monadic parser that uses threaded/monadic lexer.
-- This means that parser, as it is building an AST, asks for a single token at a time from the lexer, on the go.
-- This is done in "continuation" style -> parser calls lexer while passing it the function ('parseToken') via which
-- lexer gives control back to the parser.
-- In such setup both lexer and parser are operating in the same 'Parser' monad.
-- Check https://www.haskell.org/happy/doc/html/sec-monads.html#sec-lexers for more details.
--
-- This function internally calls `alexScan`, which is a function generated by Alex responsible for doing actual lexing/scanning.
lexer :: (Token -> Parser a) -> Parser a
lexer parseToken = do
input@(_, _, remainingSource) <- gets parserRemainingInput
startCodeInt <- gets $ startCodeToInt quoter . parserLexerStartCode
case alexScan input startCodeInt of
AlexError _input'@(_, _, c:_) -> do
-- NOTE(martin): @_input'@ is actually the same as @input@ before the scan,
-- that is how AlexError works -> it returns last AlexInput before Alex
-- failed. Therefore, the character it failed on is actually the first
-- character of the remaining source.
pos <- gets parserSourcePosition
throwError $ UnexpectedChar c pos
AlexError (_, _, []) -> error "impossible"
AlexSkip _input' numCharsSkipped -> do
updateParserStateWithSkippedChars numCharsSkipped
lexer parseToken
AlexToken _input' tokenLength mkToken -> do
let lexeme = take tokenLength remainingSource
token <- mkToken lexeme
updateParserStateWithScannedToken token
parseToken token
AlexEOF -> do
token <- createConstToken TEOF ""
updateParserStateWithScannedToken token
parseToken token
}

View File

@ -0,0 +1,142 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
module Wasp.Analyzer.Parser.Lexer.Internal
( -- * Lexer monad code
Lexer,
runLexer,
initialLexState,
LexState,
LexerStartCode (..),
LexInput (..),
getInput,
updateInput,
setStartCode,
getStartCode,
-- * Alex utilities
AlexInput,
startCodeToInt,
alexGetByte,
alexInputPrevChar,
beginQuoter,
lexQuoterEndTag,
createToken,
)
where
import Codec.Binary.UTF8.String (encodeChar)
import Control.Monad.State.Strict (MonadState, State, evalState, get, modify)
import Data.Word (Word8)
import Wasp.Analyzer.Parser.Token (Token (..), TokenKind)
import qualified Wasp.Analyzer.Parser.Token as T
-- LEXER MONAD CODE
newtype Lexer a = Lexer {unLexer :: State LexState a}
deriving (Functor, Applicative, Monad, MonadState LexState)
runLexer :: Lexer a -> LexState -> a
runLexer lexer state = evalState (unLexer lexer) state
initialLexState :: String -> LexState
initialLexState source =
LexState
{ lstateInput = LexInput '\n' [] source,
lstateStartCode = DefaultStartCode
}
data LexState = LexState
{ lstateInput :: LexInput,
lstateStartCode :: LexerStartCode
}
getInput :: Lexer LexInput
getInput = lstateInput <$> get
updateInput :: Int -> Lexer ()
updateInput !consumed = do
(LexInput _ _ remaining) <- getInput
let newInput =
let (prevChar : remaining') = drop (consumed - 1) remaining
in LexInput prevChar [] remaining'
modify $ \s -> s {lstateInput = newInput}
getStartCode :: Lexer LexerStartCode
getStartCode = lstateStartCode <$> get
setStartCode :: LexerStartCode -> Lexer ()
setStartCode startCode = modify $ \s -> s {lstateStartCode = startCode}
-- | A representation of the lexer's start code: https://www.haskell.org/alex/doc/html/alex-files.html#startcodes
data LexerStartCode
= -- | For a start code @DefaultStartCode@, the lexer is in start code <0>
DefaultStartCode
| -- | For a start code @QuoterStartCode tag@, the lexer is in start code <quoter> with opening tag @tag@
QuoterStartCode String
deriving (Show)
-- | The type of the input given to the parser/lexer.
--
-- An input @(prevChar, bs, remainingSource)@ represents:
-- - @prevChar@ The previous character, successfully consumed by the lexer.
-- - @bs@ The yet unconsumed UTF8 bytes of the current character being lexed.
-- - @remainingSource@ The remaining source to be lexed and parsed
-- (including the character currently being lexed as the first char in it).
data LexInput = LexInput Char [Word8] String
-- ALEX UTILITIES
type AlexInput = LexInput
-- | Convert the ParserState's start code to an int for Alex to use
startCodeToInt :: Int -> LexerStartCode -> Int
startCodeToInt _ DefaultStartCode = 0
startCodeToInt quoter (QuoterStartCode _) = quoter
-- | Required by Alex.
--
-- This function is taken from the Alex basic wrapper.
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte (LexInput prevChar (b : bs) remainingSource) = Just (b, LexInput prevChar bs remainingSource)
alexGetByte (LexInput _ [] []) = Nothing
alexGetByte (LexInput _ [] (currChar : remainingSource)) = case encodeChar currChar of
(b : bs) -> Just (b, LexInput currChar bs remainingSource)
[] -> Nothing
-- | Required by Alex.
--
-- This function is taken from the Alex basic wrapper.
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (LexInput prevChar _ _) = prevChar
-- | Takes a lexeme like "{=json" and sets the quoter start code
beginQuoter :: String -> Lexer Token
beginQuoter leftQuoteTag = do
let tag = drop 2 leftQuoteTag
setStartCode (QuoterStartCode tag)
createToken T.LQuote leftQuoteTag
-- | Takes a lexeme like "json=}" and either ends a quoter or add quoted text to
-- the quoter
lexQuoterEndTag :: String -> Lexer Token
lexQuoterEndTag rightQuoteTag = do
startCode <- getStartCode
case startCode of
DefaultStartCode -> error "impossible: lexQuoterEndTag with DefaultStartCode"
QuoterStartCode startTag | startTag == tag -> do
setStartCode DefaultStartCode
createToken T.RQuote rightQuoteTag
_ -> do
createToken T.Quoted rightQuoteTag
where
tag = take (length rightQuoteTag - 2) rightQuoteTag
-- | Makes an action that creates a token from a "TokenKind"
createToken :: TokenKind -> (String -> Lexer Token)
createToken kind lexeme =
return $
Token
{ tokenKind = kind,
tokenWidth = length lexeme
}

View File

@ -0,0 +1,108 @@
{
-- This file is processed by Alex (https://www.haskell.org/alex/) and generates
-- the module `Wasp.Analyzer.Parser.Lexer.Lexer`
module Wasp.Analyzer.Parser.Lexer.Lexer
( Wasp.Analyzer.Parser.Lexer.Lexer.lex
) where
import Wasp.Analyzer.Parser.Lexer.Internal
import Wasp.Analyzer.Parser.Token (Token)
import qualified Wasp.Analyzer.Parser.Token as T
}
-- Character set aliases
$space = [\ \t\f\v\r] -- Non-newline whitespace
$digit = 0-9
$alpha = [a-zA-Z]
$identstart = [_$alpha]
$ident = [_$alpha$digit]
$any = [.$white]
-- Regular expression aliases
-- matches string-literal on a single line, from https://stackoverflow.com/a/9260547/3902376
@string = \"([^\\\"]|\\.)*\"
@double = "-"? $digit+ "." $digit+
@integer = "-"? $digit+
@ident = $identstart $ident* "'"*
@linecomment = "//" [^\n\r]*
-- Based on https://stackoverflow.com/a/16165598/1509394 .
@blockcomment = "/*" (("*"[^\/]) | [^\*] | $white)* "*/"
tokens :-
<0> $space+ { createToken T.White }
<0> \n { createToken T.Newline }
<0> @linecomment { createToken T.Comment }
<0> @blockcomment { createToken T.Comment }
-- Quoter rules:
-- Uses Alex start codes to lex quoted characters with different rules:
-- - On "{=tag", enter <quoter> start code and make a TLQuote token
-- - While in <quoter>, if "tag=}" is seen
-- - If this closing tag matches the opening, enter <0> and make a TRQuote token
-- - Otherwise, stay in <quoter> and make a TQuoted token
-- - Otherwise, take one character at a time and make a TQuoted token
<0> "{=" @ident { beginQuoter }
<quoter> @ident "=}" { lexQuoterEndTag }
<quoter> $any { createToken T.Quoted }
-- Simple tokens
<0> "(" { createToken T.LParen }
<0> ")" { createToken T.RParen }
<0> "[" { createToken T.LSquare }
<0> "]" { createToken T.RSquare }
<0> "{" { createToken T.LCurly }
<0> "}" { createToken T.RCurly }
<0> "," { createToken T.Comma }
<0> ":" { createToken T.Colon }
<0> "import" { createToken T.KwImport }
<0> "from" { createToken T.KwFrom }
<0> "true" { createToken T.KwTrue }
<0> "false" { createToken T.KwFalse }
-- Strings, numbers, identifiers
<0> @string { createToken T.String }
<0> @double { createToken T.Double }
<0> @integer { createToken T.Int }
<0> @ident { createToken T.Identifier }
{
-- | Lexes a single token from the input, returning "Nothing" if the lexer has
-- reached the end of the input. This function uses a continuation passing style
-- so that this function and its consumer can benefit from tail call
-- optimization.
--
-- This function internally calls `alexScan`, which is a function generated by
-- Alex responsible for doing actual lexing/scanning.
lexOne :: (Maybe Token -> Lexer a) -> Lexer a
lexOne continue = do
input@(LexInput _ _ remaining) <- getInput
startCodeInt <- startCodeToInt quoter <$> getStartCode
case alexScan input startCodeInt of
AlexError (LexInput _ _ (c:_)) -> do
token <- createToken T.Error [c]
updateInput 1
continue (Just token)
AlexError (LexInput c _ []) -> do
token <- createToken T.Error [c]
updateInput 1
continue (Just token)
AlexSkip _ _ ->
error "AlexSkip is impossible: lexer should not skip any input"
AlexToken _ numChars makeToken -> do
let lexeme = take numChars remaining
token <- makeToken lexeme
updateInput numChars
continue (Just token)
AlexEOF -> continue Nothing
-- | @lex source@ lexes all of @source@ into "Token"s.
lex :: String -> [Token]
lex source = runLexer (lexOne continue) $ initialLexState source
where
continue :: Maybe Token -> Lexer [Token]
continue Nothing = return []
-- This is written awkwardly like this so it is a tail call to @lexOne@
continue (Just tok) = (tok:) <$> lexOne continue
}

View File

@ -1,77 +0,0 @@
{-# LANGUAGE LambdaCase #-}
module Wasp.Analyzer.Parser.LexerUtils
( AlexInput,
startCodeToInt,
alexGetByte,
alexInputPrevChar,
beginQuoter,
lexQuoterEndTag,
createConstToken,
createValueToken,
)
where
import Codec.Binary.UTF8.String (encodeChar)
import Control.Monad.State.Lazy (gets)
import Data.Word (Word8)
import Wasp.Analyzer.Parser.Monad
import Wasp.Analyzer.Parser.Token (Token (..), TokenType (..))
type AlexInput = ParserInput
-- Convert the ParserState's start code to an int for Alex to use
startCodeToInt :: Int -> LexerStartCode -> Int
startCodeToInt _ DefaultStartCode = 0
startCodeToInt quoter (QuoterStartCode _) = quoter
-- | Required by Alex.
--
-- This function is taken from the Alex basic wrapper.
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte (prevChar, b : bs, remainingSource) = Just (b, (prevChar, bs, remainingSource))
alexGetByte (_, [], []) = Nothing
alexGetByte (_, [], currChar : remainingSource) = case encodeChar currChar of
(b : bs) -> Just (b, (currChar, bs, remainingSource))
[] -> Nothing
-- | Required by Alex.
--
-- This function is taken from the Alex basic wrapper.
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (prevChar, _, _) = prevChar
-- | Takes a lexeme like "{=json" and sets the quoter start code
beginQuoter :: String -> Parser Token
beginQuoter leftQuoteTag = do
let tag = drop 2 leftQuoteTag
setStartCode $ QuoterStartCode tag
createConstToken (TLQuote tag) leftQuoteTag
-- | Takes a lexeme like "json=}" and either ends a quoter or add quoted text to the quoter
lexQuoterEndTag :: String -> Parser Token
lexQuoterEndTag rightQuoteTag =
gets parserLexerStartCode >>= \case
DefaultStartCode -> error "impossible: lexQuoterEndTag with DefaultStartCode"
QuoterStartCode startTag | startTag == tag -> do
setStartCode DefaultStartCode
createConstToken (TRQuote tag) rightQuoteTag
_ -> do
createValueToken TQuoted rightQuoteTag
where
tag = take (length rightQuoteTag - 2) rightQuoteTag
-- | Makes an action that creates a token from a constant TokenType.
createConstToken :: TokenType -> (String -> Parser Token)
createConstToken tokType lexeme = do
position <- gets parserSourcePosition
return $
Token
{ tokenType = tokType,
tokenStartPosition = position,
tokenLexeme = lexeme
}
-- | Makes an action that creates a token using the input lexeme.
createValueToken :: (String -> TokenType) -> (String -> Parser Token)
createValueToken getTokenType lexeme = createConstToken (getTokenType lexeme) lexeme

View File

@ -1,93 +0,0 @@
{-# LANGUAGE FlexibleContexts #-}
module Wasp.Analyzer.Parser.Monad
( ParserState (..),
makeInitialState,
Parser,
updateParserStateWithScannedToken,
updateParserStateWithSkippedChars,
setStartCode,
ParserInput,
LexerStartCode (..),
)
where
import Control.Monad.Except (Except)
import Control.Monad.State.Lazy (StateT, get, modify)
import Data.Word (Word8)
import Wasp.Analyzer.Parser.ParseError (ParseError)
import Wasp.Analyzer.Parser.SourcePosition (SourcePosition (..), calcNextPosition)
import Wasp.Analyzer.Parser.Token
type Parser a = StateT ParserState (Except ParseError) a
updateParserStateWithScannedToken :: Token -> Parser ()
updateParserStateWithScannedToken token = do
updatePositionAndInput (tokenLexeme token)
modify $ \s ->
s
{ lastToLastScannedToken = lastScannedToken s,
lastScannedToken = token
}
updateParserStateWithSkippedChars :: Int -> Parser ()
updateParserStateWithSkippedChars numChars = do
(_, _, remainingSource) <- parserRemainingInput <$> get
let charsSkipped = take numChars remainingSource
updatePositionAndInput charsSkipped
updatePositionAndInput :: String -> Parser ()
updatePositionAndInput parsedSourcePiece = do
position <- parserSourcePosition <$> get
(_, _, remainingSource) <- parserRemainingInput <$> get
let position' = calcNextPosition parsedSourcePiece position
let input' =
let (prevChar : remainingSource') = drop (length parsedSourcePiece - 1) remainingSource
in (prevChar, [], remainingSource')
modify $ \s ->
s
{ parserSourcePosition = position',
parserRemainingInput = input'
}
setStartCode :: LexerStartCode -> Parser ()
setStartCode startCode = modify $ \s -> s {parserLexerStartCode = startCode}
data ParserState = ParserState
{ parserSourcePosition :: SourcePosition,
-- | Last token that was scanned by Alex.
-- NOTE: Token first gets scanned by Alex, and then it gets parsed by Happy.
lastScannedToken :: Token,
-- | Second last token that was scanned by Alex.
lastToLastScannedToken :: Token,
parserRemainingInput :: ParserInput,
parserLexerStartCode :: LexerStartCode
}
deriving (Show)
-- | A representation of the lexer's start code: https://www.haskell.org/alex/doc/html/alex-files.html#startcodes
data LexerStartCode
= -- | For a start code @DefaultStartCode@, the lexer is in start code <0>
DefaultStartCode
| -- | For a start code @QuoterStartCode tag@, the lexer is in start code <quoter> and the opening tag was @tag@
QuoterStartCode String
deriving (Show)
makeInitialState :: String -> ParserState
makeInitialState source =
ParserState
{ parserSourcePosition = SourcePosition 1 1,
lastScannedToken = Token TEOF (SourcePosition 1 1) "\n", -- NOTE: Dummy initial value.
lastToLastScannedToken = Token TEOF (SourcePosition 1 1) "\n", -- NOTE: Dummy initial value.
parserRemainingInput = ('\n', [], source), -- NOTE: '\n' here is dummy initial value.
parserLexerStartCode = DefaultStartCode
}
-- | The type of the input given to the parser/lexer.
--
-- An input @(prevChar, bs, remainingSource)@ represents:
-- - @prevChar@ The previous character, successfully consumed by the lexer.
-- - @bs@ The yet unconsumed UTF8 bytes of the current character being lexed.
-- - @remainingSource@ The remaining source to be lexed and parsed
-- (including the character currently being lexed as the first char in it).
type ParserInput = (Char, [Word8], String)

View File

@ -2,48 +2,80 @@
module Wasp.Analyzer.Parser.ParseError
( ParseError (..),
parseErrorFromCSTParseError,
getErrorMessageAndCtx,
)
where
import Wasp.Analyzer.Parser.Ctx (Ctx, WithCtx (..), ctxFromPos, ctxFromRgn, getCtxRgn)
import Wasp.Analyzer.Parser.SourcePosition (SourcePosition (..))
import Wasp.Analyzer.Parser.SourceRegion (getRgnEnd, getRgnStart)
import Wasp.Analyzer.Parser.Token (Token (..))
import qualified Wasp.Analyzer.Parser.ConcreteParser.ParseError as CST
import Wasp.Analyzer.Parser.Ctx (Ctx (Ctx), WithCtx (..), ctxFromPos, ctxFromRgn, getCtxRgn)
import Wasp.Analyzer.Parser.SourcePosition (SourcePosition (..), offsetToPosition)
import Wasp.Analyzer.Parser.SourceRegion (SourceRegion, getRgnEnd, getRgnStart, offsetRegionToSourceRegion)
import Wasp.Analyzer.Parser.Token (TokenKind)
import Wasp.Analyzer.Parser.TokenSet (TokenSet)
import qualified Wasp.Analyzer.Parser.TokenSet as TokenSet
data ParseError
= -- | A lexical error representing an invalid character. It means that lexer
-- failed to construct/parse a token due to this unexpected character.
UnexpectedChar Char SourcePosition
| -- | In @ParseError token expectedTokens@, @token@ is the token where parse error
-- occured, while @expectedTokens@ is a list of tokens that would (any of them)
-- avoid that error if they were there instead of the @token@.
-- NOTE(martin): These @expectedTokens@ are represented via the names used for them
-- in the grammar defined in Parser.y, under section @%token@ (names are in the
-- first column), that have been a bit prettyfied (check Parser.y for details).
UnexpectedToken Token [String]
= -- | @UnexpectedToken region lexeme errorKind expectedKinds@ is an error that occurs
-- when one of @expectedKinds@ is expected, but the actual next token is
-- @errorKind@.
UnexpectedToken !SourceRegion String !TokenKind TokenSet
| -- | @UnexpectedEOF pos expectedKinds@ is an error that occurs when one of
-- @expectedKinds@ is expected, but the input is empty.
UnexpectedEOF !SourcePosition TokenSet
| -- | Thrown if parser encounters a quoter that has different tags, e.g.
-- {=json psl=}. Then the first String in QuoterDifferentTags will be "json"
-- while the second one will be "psl".
--
-- TODO: This error is never actually used: the lexer will never produce a
-- {= and =} next to each other with different tags.
QuoterDifferentTags (WithCtx String) (WithCtx String)
| -- | @TupleTooFewValues tupleRegion tupleSize@ occurs when a tuple contains
-- less than the required two values.
TupleTooFewValues !SourceRegion !Int
| -- | @MissingSyntax pos expectedSyntax@ occurs when a piece of syntax is not
-- found in the concrete parse tree. @expectedSyntax@ is a noun describing
-- what type of syntax was expected. For example, if the source code is
-- missing a comma after a dictionary entry, it would report @MissingSyntax
-- _ "comma"@.
MissingSyntax !SourcePosition String
deriving (Eq, Show)
-- | @parseErrorFromCSTParseError source cstParseError@ creates a "ParseError"
-- that represents @cstParseError@, using @source@ to find the lexeme
-- representing the token where the error was produced.
parseErrorFromCSTParseError :: String -> CST.ParseError -> ParseError
parseErrorFromCSTParseError source (CST.UnexpectedToken (CST.Region start end) errorKind expected) =
let rgn = offsetRegionToSourceRegion source (CST.Region start end)
lexeme = take (end - start) $ drop start source
in UnexpectedToken rgn lexeme errorKind expected
parseErrorFromCSTParseError source (CST.UnexpectedEOF offset expected) =
let pos = offsetToPosition source offset
in UnexpectedEOF pos expected
getErrorMessageAndCtx :: ParseError -> (String, Ctx)
getErrorMessageAndCtx = \case
UnexpectedChar unexpectedChar pos ->
( "Unexpected character: " ++ [unexpectedChar],
ctxFromPos pos
)
UnexpectedToken unexpectedToken expectedTokens ->
( let unexpectedTokenMessage = "Unexpected token: " ++ tokenLexeme unexpectedToken
UnexpectedToken rgn lexeme _ expectedTokens ->
( let unexpectedTokenMessage = "Unexpected token: " ++ lexeme
expectedTokensMessage =
"Expected one of the following tokens instead: "
++ unwords expectedTokens
in unexpectedTokenMessage ++ if not (null expectedTokens) then "\n" ++ expectedTokensMessage else "",
let tokenStartPos@(SourcePosition sl sc) = tokenStartPosition unexpectedToken
tokenEndPos = SourcePosition sl (sc + length (tokenLexeme unexpectedToken) - 1)
in ctxFromRgn tokenStartPos tokenEndPos
++ TokenSet.showTokenSet expectedTokens
in unexpectedTokenMessage ++ if not (TokenSet.null expectedTokens) then "\n" ++ expectedTokensMessage else "",
ctxFromRgn (getRgnStart rgn) (getRgnEnd rgn)
)
UnexpectedEOF pos expectedTokens ->
( let unexpectedTokenMessage = "Unexpected end of file"
expectedTokensMessage =
"Expected one of the following tokens instead: "
++ TokenSet.showTokenSet expectedTokens
in unexpectedTokenMessage ++ if not (TokenSet.null expectedTokens) then "\n" ++ expectedTokensMessage else "",
ctxFromPos pos
)
QuoterDifferentTags (WithCtx lctx ltag) (WithCtx rctx rtag) ->
let ctx = ctxFromRgn (getRgnStart $ getCtxRgn lctx) (getRgnEnd $ getCtxRgn rctx)
in ("Quoter tags don't match: {=" ++ ltag ++ " ... " ++ rtag ++ "=}", ctx)
TupleTooFewValues region actualLength ->
( "Tuple only contains " ++ show actualLength ++ " values, but it must contain at least 2 values",
Ctx region
)
MissingSyntax pos expectedSyntax -> ("Missing expected " ++ expectedSyntax, ctxFromPos pos)

View File

@ -1,188 +0,0 @@
{
{-# LANGUAGE LambdaCase #-}
-- This file is processed by Happy (https://www.haskell.org/happy/) and generates
-- the module `Wasp.Analyzer.Parser.Parser`
module Wasp.Analyzer.Parser.Parser
( parseStatements,
parseExpression
) where
import Wasp.Analyzer.Parser.Lexer
import Wasp.Analyzer.Parser.AST
import Wasp.Analyzer.Parser.Ctx (WithCtx (..), Ctx (..), ctxFromPos, ctxFromRgn)
import Wasp.Analyzer.Parser.Token
import Wasp.Analyzer.Parser.SourcePosition (SourcePosition (..))
import Wasp.Analyzer.Parser.ParseError
import Wasp.Analyzer.Parser.Monad (Parser, ParserState (..))
import Control.Monad.State.Lazy (get)
import Control.Monad.Except (throwError)
}
-- Lines below tell Happy:
-- - to name the main parsing function `parse` when generating it
-- - that input to parser is `Token` type
-- - to call `parseError` when the parser encounters an error
-- - to provide `parseError` with list of expected tokens that would avoid the error
%name parseStatements Stmts
%name parseExpression Expr
%tokentype { Token }
%error { parseError }
%errorhandlertype explist
-- This sets up Happy to use a monadic parser and threaded lexer.
-- This means that parser generated by Happy will request tokens from lexer as it needs them instead of
-- requiring a list of all tokens up front.
-- Both lexer and parser operate in the 'Parser' monad, which can be used to track shared state and errors.
-- Check https://www.haskell.org/happy/doc/html/sec-monads.html#sec-lexers for more details.
%monad { Parser }
%lexer { lexer } { Token { tokenType = TEOF } }
-- This section defines the names that are used in the grammar section to
-- refer to each type of token.
-- NOTE: If you update it, also update the @prettyShowGrammarToken@ function below.
%token
'(' { Token { tokenType = TLParen } }
')' { Token { tokenType = TRParen } }
'[' { Token { tokenType = TLSquare } }
']' { Token { tokenType = TRSquare } }
'{' { Token { tokenType = TLCurly } }
'}' { Token { tokenType = TRCurly } }
',' { Token { tokenType = TComma } }
':' { Token { tokenType = TColon } }
import { Token { tokenType = TImport } }
from { Token { tokenType = TFrom } }
true { Token { tokenType = TTrue } }
false { Token { tokenType = TFalse } }
string { Token { tokenType = TString $$ } }
int { Token { tokenType = TInt $$ } }
double { Token { tokenType = TDouble $$ } }
'{=' { Token { tokenType = TLQuote $$ } }
quoted { Token { tokenType = TQuoted $$ } }
'=}' { Token { tokenType = TRQuote $$ } }
id { Token { tokenType = TIdentifier $$ } }
%%
-- Grammar rules
Stmts :: { AST }
: StmtWithCtx { AST [$1] }
| Stmts StmtWithCtx { AST $ astStmts $1 ++ [$2] }
StmtWithCtx :: { WithCtx Stmt }
: posStart Stmt posEnd { WithCtx (ctxFromRgn $1 $3) $2 }
Stmt :: { Stmt }
: Decl { $1 }
Decl :: { Stmt }
: id id ExprWithCtx { Decl $1 $2 $3 }
ExprWithCtx :: { WithCtx Expr }
: posStart Expr posEnd { WithCtx (ctxFromRgn $1 $3) $2 }
Expr :: { Expr }
: Dict { $1 }
| List { $1 }
| Tuple { $1 }
| Extimport { $1 }
| Quoter { $1 }
| string { StringLiteral $1 }
| int { IntegerLiteral $1 }
| double { DoubleLiteral $1 }
| true { BoolLiteral True }
| false { BoolLiteral False }
| id { Var $1 }
Dict :: { Expr }
: '{' DictEntries '}' { Dict $2 }
| '{' DictEntries ',' '}' { Dict $2 }
| '{' '}' { Dict [] }
DictEntries :: { [(Identifier, WithCtx Expr)] }
: DictEntry { [$1] }
| DictEntries ',' DictEntry { $1 ++ [$3] }
DictEntry :: { (Identifier, WithCtx Expr) }
: id ':' ExprWithCtx { ($1, $3) }
List :: { Expr }
: '[' ListVals ']' { List $2 }
| '[' ListVals ',' ']' { List $2 }
| '[' ']' { List [] }
ListVals :: { [WithCtx Expr] }
: ExprWithCtx { [$1] }
| ListVals ',' ExprWithCtx { $1 ++ [$3] }
-- We don't allow tuples shorter than 2 elements,
-- since they are not useful + this way we avoid
-- ambiguity between tuple with single element and expression
-- wrapped in parenthesis for purpose of grouping.
Tuple :: { Expr }
: '(' TupleVals ')' { Tuple $2 }
| '(' TupleVals ',' ')' { Tuple $2 }
TupleVals :: { (WithCtx Expr, WithCtx Expr, [WithCtx Expr]) }
: ExprWithCtx ',' ExprWithCtx { ($1, $3, []) }
| TupleVals ',' ExprWithCtx { (\(a, b, c) -> (a, b, c ++ [$3])) $1 }
Extimport :: { Expr }
: import Name from string { ExtImport $2 $4 }
Name :: { ExtImportName }
: id { ExtImportModule $1 }
| '{' id '}' { ExtImportField $2 }
Quoter :: { Expr }
: posStart '{=' posEnd Quoted posStart '=}' posEnd
{% if $2 /= $6
then throwError $ QuoterDifferentTags (WithCtx (ctxFromRgn $1 $3) $2) (WithCtx (ctxFromRgn $5 $7) $6)
else return $ Quoter $2 $4
}
Quoted :: { String }
: quoted { $1 }
| Quoted quoted { $1 ++ $2 }
-- | Special production that returns the start of the next/following token.
-- NOTE(martin): You might wonder why does it use position of the last scanned (therefore *previous*)
-- token to get the position of the token that should be scanned *after* this production?
-- That sounds like it is getting position of one token too early, right? The trick is that Happy
-- always keeps one lookahead token in reserve, so it is actually always one token ahead of what we
-- would expect. Therefore getting the position of the last scanned token actually gives us the position
-- of the token that follows.
posStart :: { SourcePosition }
: {- empty -} {% (tokenStartPosition . lastScannedToken) `fmap` get }
-- | Special production that returns the end of the previous token.
posEnd :: { SourcePosition }
: {- empty -} {% (calcTokenEndPos . lastToLastScannedToken) `fmap` get }
{
parseError :: (Token, [String]) -> Parser a
parseError (token, expectedTokens) =
throwError $ UnexpectedToken token $ prettyShowGrammarToken <$> expectedTokens
-- Input is grammar token name, as defined in %tokens section above (first column),
-- while output is nicer representation of it, ready to be shown around,
-- e.g. in error messages.
prettyShowGrammarToken :: String -> String
prettyShowGrammarToken = \case
"'('" -> "("
"')'" -> ")"
"'['" -> "["
"']'" -> "]"
"'{'" -> "{"
"'}'" -> "}"
"','" -> ","
"':'" -> ":"
"string" -> "<string>"
"int" -> "<int>"
"double" -> "<double>"
"'{='" -> "{=<identifier>"
"quoted" -> "<quoted>"
"'=}'" -> "<identifier>=}"
"id" -> "<identifier>"
s -> s
}

View File

@ -1,6 +1,10 @@
module Wasp.Analyzer.Parser.SourcePosition
( SourcePosition (..),
calcNextPosition,
-- | TODO: Add types for source offsets and regions that use offsets. Name
-- ideas are @SourceLinearPosition@/@SourceLinearRegion@ and @SourceOffset@/
-- @SourceOffsetRegion@.
offsetToPosition,
)
where
@ -21,3 +25,7 @@ calcNextPosition ('\n' : cs) (SourcePosition line _) = calcNextPosition cs $ Sou
calcNextPosition (_ : cs) (SourcePosition line col) = calcNextPosition cs $ SourcePosition line (col + 1)
type SourceFragment = String
offsetToPosition :: String -> Int -> SourcePosition
offsetToPosition source targetOffset =
calcNextPosition (take targetOffset source) (SourcePosition 1 1)

View File

@ -2,9 +2,11 @@ module Wasp.Analyzer.Parser.SourceRegion
( SourceRegion (..),
getRgnStart,
getRgnEnd,
offsetRegionToSourceRegion,
)
where
import qualified Wasp.Analyzer.Parser.ConcreteParser.ParseError as CST
import Wasp.Analyzer.Parser.SourcePosition
-- | @SourceRegion <regionStart> <regionEnd>@
@ -20,3 +22,13 @@ getRgnStart (SourceRegion start _) = start
getRgnEnd :: SourceRegion -> SourcePosition
getRgnEnd (SourceRegion _ end) = end
-- | Convert "CST.Region" to "SourceRegion". This conversion makes sure that the
-- "SourceRegion" represents the same region of text as the original region:
-- "CST.Region" and "SourceRegion" use different conventions for what the end
-- offset/position mean.
offsetRegionToSourceRegion :: String -> CST.Region -> SourceRegion
offsetRegionToSourceRegion source (CST.Region start end) =
let startPos = offsetToPosition source start
endPos = offsetToPosition source (end - 1)
in SourceRegion startPos endPos

View File

@ -1,38 +1,124 @@
module Wasp.Analyzer.Parser.Token where
{-# LANGUAGE DeriveGeneric #-}
import Wasp.Analyzer.Parser.SourcePosition (SourcePosition, calcNextPosition)
module Wasp.Analyzer.Parser.Token
( Token (..),
TokenKind (..),
tokenKindIsTrivia,
showTokenKind,
)
where
data TokenType
= TLParen
| TRParen
| TLSquare
| TRSquare
| TLCurly
| TRCurly
| TComma
| TColon
| TImport
| TFrom
| TTrue
| TFalse
| TString String
| TInt Integer
| TDouble Double
| TLQuote String
| TQuoted String
| TRQuote String
| TIdentifier String
| TEOF
deriving (Eq, Show)
import Control.DeepSeq (NFData)
import Data.Aeson (ToJSON)
import GHC.Generics (Generic)
-- | The kind of token
--
-- This makes no distinction between value-containing tokens and atomic tokens
-- (i.e. "String" and "KwImport") to make kind comparison easier. To recover the
-- value associated with a token, you need the context provided by a "Token".
data TokenKind
= White
| -- | Newlines (only \n) are separated from whitespace to accomodate line-break
-- based error recovery, if that is ever implemented in the future
Newline
| Comment
| LParen
| RParen
| LSquare
| RSquare
| LCurly
| RCurly
| Comma
| Colon
| KwImport
| KwFrom
| KwTrue
| KwFalse
| String
| Int
| Double
| -- | "{= <identifier>"
LQuote
| -- | "<identifier> =}"
RQuote
| Quoted
| -- | Non-keyword identifier
Identifier
| -- | Kind for unexpected characters
Error
deriving (Eq, Ord, Show, Generic)
instance NFData TokenKind
instance ToJSON TokenKind
-- | A token representing a span of text from the source.
--
-- __Note:__
--
-- The lexeme that a token represents is not stored with the token. This is
-- because the CST parser does not need to use lexemes during its parsing. The
-- effect of this is that we need to carry the source string around after parsing
-- and pass it to any functions that will need to get the lexeme from a token
-- or CST nodes. Needing to access a lexeme for a token is the edge case, since
-- for most tokens the lexeme doesn't matter. Some of the cases that need it are:
--
-- - Error reporting
-- - Getting values for literal nodes, variable names, and quoters
--
-- We could store the lexeme inside tokens and/or inside CST nodes. Two ways
-- we could go about storing lexemes in the CST nodes:
--
-- (1) We store a lexeme for every syntax node, which could lead towards
-- somewhat significant memory usage, since the lexeme for each token is
-- duplicated by all of its ancestors.
-- (2) We store a lexeme only for syntax nodes with no children, which makes it
-- kind of complicated to get the lexeme for a non-leaf node.
data Token = Token
{ tokenType :: TokenType,
tokenStartPosition :: SourcePosition,
tokenLexeme :: String
{ tokenKind :: !TokenKind,
-- | The width of the text representing this token. The source position is
-- not stored; only the width. Later, offsets into the source is computed
-- from an entire tree of tokens, and source position is determined on
-- demand (for example, when an offset is going to be displayerd in an error
-- message).
tokenWidth :: !Int
}
deriving (Eq, Show)
deriving (Eq, Show, Ord, Generic)
-- | Calculates source position of the last character in the token lexeme.
calcTokenEndPos :: Token -> SourcePosition
calcTokenEndPos (Token _ startPos "") = startPos
calcTokenEndPos t = calcNextPosition (init $ tokenLexeme t) (tokenStartPosition t)
instance NFData Token
instance ToJSON Token
-- | Check if a "TokenKind" is trivia (a token kind that does not affect the
-- parse structure, namely whitespace and comments)
tokenKindIsTrivia :: TokenKind -> Bool
tokenKindIsTrivia White = True
tokenKindIsTrivia Newline = True
tokenKindIsTrivia Comment = True
tokenKindIsTrivia _ = False
showTokenKind :: TokenKind -> String
showTokenKind White = "<whitespace>"
showTokenKind Newline = "\\n"
showTokenKind Comment = "<comment>"
showTokenKind LParen = "'('"
showTokenKind RParen = "')'"
showTokenKind LSquare = "'['"
showTokenKind RSquare = "']'"
showTokenKind LCurly = "'{'"
showTokenKind RCurly = "'}'"
showTokenKind Comma = "','"
showTokenKind Colon = "':'"
showTokenKind KwImport = "'import'"
showTokenKind KwFrom = "'from'"
showTokenKind KwTrue = "'true'"
showTokenKind KwFalse = "'false'"
showTokenKind String = "<string>"
showTokenKind Int = "<number>"
showTokenKind Double = "<number>"
showTokenKind LQuote = "'{='"
showTokenKind RQuote = "'=}'"
showTokenKind Quoted = "<any>"
showTokenKind Identifier = "<identifier>"
showTokenKind Error = "<error>"

View File

@ -0,0 +1,124 @@
{-# LANGUAGE DeriveGeneric #-}
module Wasp.Analyzer.Parser.TokenSet
( -- * TokenSet
TokenSet,
-- * Membership predicates
member,
eofMember,
kindMember,
Wasp.Analyzer.Parser.TokenSet.null,
-- * Operations
insert,
insertKind,
insertEof,
union,
intersection,
-- * Constructors
empty,
singleton,
fromEOF,
fromKind,
fromList,
-- * Destructors
toList,
showTokenSet,
)
where
import Control.DeepSeq (NFData)
import Data.List (intercalate)
import Data.Set (Set)
import qualified Data.Set as Set
import GHC.Generics (Generic)
import Wasp.Analyzer.Parser.Token (TokenKind, showTokenKind)
-- | A set of "TokenKind"s and possibly EOF (which is not a "TokenKind").
--
-- In functions on a "TokenSet", a @Maybe "TokenKind"@ is either @Nothing@,
-- representing EOF, or @Just k@, representing the kind @k@.
--
-- "TokenSet" is used instead of a plain "Set" to make handling EOF in the set
-- easier.
data TokenSet = TokenSet
{ -- | Check if EOF is part of the "TokenSet"
eofMember :: !Bool,
kindSet :: Set TokenKind
}
deriving (Eq, Ord, Show, Generic)
instance NFData TokenSet
-- | Check if a "TokenKind" or EOF is part of the "TokenSet".
member :: Maybe TokenKind -> TokenSet -> Bool
member Nothing set = eofMember set
member (Just k) set = kindMember k set
-- | Check if a "TokenKind" is part of the "TokenSet".
kindMember :: TokenKind -> TokenSet -> Bool
kindMember k set = k `Set.member` kindSet set
null :: TokenSet -> Bool
null set = Set.null (kindSet set) && not (eofMember set)
-- | Insert a "TokenKind" or EOF into a "TokenSet".
insert :: Maybe TokenKind -> TokenSet -> TokenSet
insert Nothing set = insertEof set
insert (Just k) set = insertKind k set
-- | Insert EOF into a "TokenSet".
insertEof :: TokenSet -> TokenSet
insertEof set = set {eofMember = True}
-- | Insert a "TokenKind" into a "TokenSet".
insertKind :: TokenKind -> TokenSet -> TokenSet
insertKind k set = set {kindSet = Set.insert k (kindSet set)}
-- | Get the union of two "TokenSet"s.
union :: TokenSet -> TokenSet -> TokenSet
union left right =
let unionEofMember = eofMember left || eofMember right
unionKindSet = kindSet left `Set.union` kindSet right
in TokenSet {eofMember = unionEofMember, kindSet = unionKindSet}
-- | Get the intersection of two "TokenSet"s.
intersection :: TokenSet -> TokenSet -> TokenSet
intersection left right =
let intersectionEofMember = eofMember left && eofMember right
intersectionKindSet = kindSet left `Set.intersection` kindSet right
in TokenSet {eofMember = intersectionEofMember, kindSet = intersectionKindSet}
-- | The empty "TokenSet".
empty :: TokenSet
empty = TokenSet {eofMember = False, kindSet = Set.empty}
-- | Create a "TokenSet" containing a single "TokenKind" or EOF.
singleton :: Maybe TokenKind -> TokenSet
singleton Nothing = fromEOF
singleton (Just k) = fromKind k
-- | Create a "TokenSet" containing just EOF.
fromEOF :: TokenSet
fromEOF = TokenSet {eofMember = True, kindSet = Set.empty}
-- | Create a "TokenSet" containing a single "TokenKind".
fromKind :: TokenKind -> TokenSet
fromKind k = TokenSet {eofMember = False, kindSet = Set.singleton k}
-- | Create a "TokenSet" from a list of "TokenKind"s.
fromList :: [TokenKind] -> TokenSet
fromList ks = TokenSet {eofMember = False, kindSet = Set.fromList ks}
-- | Get a list of all "TokenKind"s in a "TokenSet".
toList :: TokenSet -> [TokenKind]
toList set = Set.toList (kindSet set)
showTokenSet :: TokenSet -> String
showTokenSet set =
let kindStrs = map showTokenKind $ toList set
eofStrs = if eofMember set then ["<eof>"] else []
in intercalate "," (kindStrs ++ eofStrs)

View File

@ -1,38 +1,67 @@
module Analyzer.Parser.ParseErrorTest where
import Analyzer.TestUtil (ctx, pos, wctx)
import Analyzer.TestUtil (ctx, pos, rgn, wctx)
import Test.Tasty.Hspec
import Wasp.Analyzer.Parser.ParseError
import Wasp.Analyzer.Parser.Token
import qualified Wasp.Analyzer.Parser.TokenSet as TokenSet
spec_ParseErrorTest :: Spec
spec_ParseErrorTest = do
describe "Analyzer.Parser.ParseError" $ do
describe "Wasp.Analyzer.Parser.ParseError" $ do
describe "getErrorMessageAndCtx returns a human readable error message and the correct position" $ do
let unexpectedCharError = UnexpectedChar '!' (pos 2 42)
unexpectedTokenErrorNoSuggestions =
UnexpectedToken (Token TLCurly (pos 2 3) "{") []
unexpectedTokenErrorWithSuggestions =
UnexpectedToken
(Token TRCurly (pos 100 18) "}")
["<identifier>", ","]
quoterDifferentTagsError =
QuoterDifferentTags
(wctx (1, 5) (1, 7) "foo")
(wctx (1, 20) (1, 22) "bar")
let unexpectedTokenErrorNoSuggestions =
UnexpectedToken (rgn (2, 3) (2, 3)) "}" RCurly (TokenSet.fromList [])
let unexpectedTokenErrorWithSuggestions =
UnexpectedToken (rgn (2, 3) (2, 3)) "}" RCurly (TokenSet.fromList [LCurly])
let unexpectedTokenErrorWithManySuggestions =
UnexpectedToken (rgn (2, 3) (2, 3)) "}" RCurly (TokenSet.fromList [LCurly, Identifier])
let unexpectedEofErrorNoSuggestions =
UnexpectedEOF (pos 2 3) (TokenSet.fromList [])
let unexpectedEofErrorWithSuggestions =
UnexpectedEOF (pos 2 3) (TokenSet.fromList [LCurly])
let quoterDifferentTagsError =
QuoterDifferentTags (wctx (2, 3) (2, 4) "a") (wctx (2, 5) (2, 6) "b")
let tupleTooFewValuesError =
TupleTooFewValues (rgn (2, 3) (2, 5)) 1
let missingSyntaxError =
MissingSyntax (pos 2 3) "comma"
it "for UnexpectedChar error" $ do
getErrorMessageAndCtx unexpectedCharError `shouldBe` ("Unexpected character: !", ctx (2, 42) (2, 42))
it "for UnexpectedToken error" $ do
it "UnexpectedToken" $ do
getErrorMessageAndCtx unexpectedTokenErrorNoSuggestions
`shouldBe` ("Unexpected token: {", ctx (2, 3) (2, 3))
`shouldBe` ("Unexpected token: }", ctx (2, 3) (2, 3))
getErrorMessageAndCtx unexpectedTokenErrorWithSuggestions
`shouldBe` ( "Unexpected token: }\n"
++ "Expected one of the following tokens instead: <identifier> ,",
ctx (100, 18) (100, 18)
++ "Expected one of the following tokens instead: '{'",
ctx (2, 3) (2, 3)
)
getErrorMessageAndCtx unexpectedTokenErrorWithManySuggestions
`shouldBe` ( "Unexpected token: }\n"
++ "Expected one of the following tokens instead: '{',<identifier>",
ctx (2, 3) (2, 3)
)
it "for QuoterDifferentTags error" $ do
it "UnexpecteEOF" $ do
getErrorMessageAndCtx unexpectedEofErrorNoSuggestions
`shouldBe` ( "Unexpected end of file",
ctx (2, 3) (2, 3)
)
getErrorMessageAndCtx unexpectedEofErrorWithSuggestions
`shouldBe` ( "Unexpected end of file\n"
++ "Expected one of the following tokens instead: '{'",
ctx (2, 3) (2, 3)
)
it "QuoterDifferentTags" $ do
getErrorMessageAndCtx quoterDifferentTagsError
`shouldBe` ("Quoter tags don't match: {=foo ... bar=}", ctx (1, 5) (1, 22))
`shouldBe` ("Quoter tags don't match: {=a ... b=}", ctx (2, 3) (2, 6))
it "TupleTooFewValues" $ do
getErrorMessageAndCtx tupleTooFewValuesError
`shouldBe` ( "Tuple only contains 1 values, but it must contain at least 2 values",
ctx (2, 3) (2, 5)
)
it "MissingSyntax" $ do
getErrorMessageAndCtx missingSyntaxError
`shouldBe` ("Missing expected comma", ctx (2, 3) (2, 3))

View File

@ -1,14 +0,0 @@
module Analyzer.Parser.TokenTest where
import Test.Tasty.Hspec
import Wasp.Analyzer.Parser.SourcePosition
import Wasp.Analyzer.Parser.Token
spec_TokenTest :: Spec
spec_TokenTest = do
describe "Analyzer.Parser.Token" $ do
it "calcTokenEndPos works correctly" $ do
calcTokenEndPos (Token (TIdentifier "foo") (SourcePosition 2 3) "foo") `shouldBe` SourcePosition 2 5
calcTokenEndPos (Token (TString "foo") (SourcePosition 5 10) "\"foo\"") `shouldBe` SourcePosition 5 14
calcTokenEndPos (Token TLSquare (SourcePosition 2 3) "[") `shouldBe` SourcePosition 2 3
calcTokenEndPos (Token TEOF (SourcePosition 2 3) "") `shouldBe` SourcePosition 2 3

View File

@ -1,9 +1,14 @@
module Analyzer.ParserTest where
import Analyzer.TestUtil
import Data.Either (isLeft)
import qualified Data.ByteString.Lazy as BS
import qualified Data.ByteString.Lazy.Char8 as BSC
import System.FilePath (replaceExtension, takeBaseName)
import Test.Tasty (TestTree, testGroup)
import Test.Tasty.Golden (findByExtension, goldenVsStringDiff)
import Test.Tasty.Hspec
import Wasp.Analyzer.Parser
import Wasp.Analyzer.Parser hiding (withCtx)
import Wasp.Analyzer.Parser.ParseError (getErrorMessageAndCtx)
import Wasp.Util (indent)
spec_IsValidWaspIdentifier :: Spec
spec_IsValidWaspIdentifier = do
@ -24,316 +29,101 @@ spec_ParseExpression = do
it "Parses int literals" $ do
parseExpression "5" `shouldBe` Right (IntegerLiteral 5)
spec_ParseStatements :: Spec
spec_ParseStatements = do
it "Parses decls, dicts, and literals" $ do
let source =
unlines
[ "test Decl {",
" string: \"Hello Wasp =}\",",
" escapedString: \"Look, a \\\"\",",
" integer: 42,",
" real: 3.14,",
" yes: true,",
" no: false,",
" ident: Wasp,",
" // This is a comment",
" innerDict: { innerDictReal: 2.17 }",
"}"
]
let ast =
AST
[ wctx (1, 1) (11, 1) $
Decl "test" "Decl" $
wctx (1, 11) (11, 1) $
Dict
[ ("string", wctx (2, 11) (2, 25) $ StringLiteral "Hello Wasp =}"),
("escapedString", wctx (3, 18) (3, 29) $ StringLiteral "Look, a \""),
("integer", wctx (4, 12) (4, 13) $ IntegerLiteral 42),
("real", wctx (5, 9) (5, 12) $ DoubleLiteral 3.14),
("yes", wctx (6, 8) (6, 11) $ BoolLiteral True),
("no", wctx (7, 7) (7, 11) $ BoolLiteral False),
("ident", wctx (8, 10) (8, 13) $ Var "Wasp"),
( "innerDict",
wctx (10, 14) (10, 36) $
Dict
[ ("innerDictReal", wctx (10, 31) (10, 34) $ DoubleLiteral 2.17)
]
)
]
]
parseStatements source `shouldBe` Right ast
-- | To add more test cases to the parser, create a `.wasp` and `.golden` file
-- in the `parserTests` directory with wasp source code to parse and the expected
-- output, respectively.
--
-- See `declsDictsAndLiterals` for an example of a test case with a successful
-- parse.
--
-- See `dictNoCloseBracket` for an example of a test case with an unsuccessful
-- parse.
--
-- While the testing framework will create the `.golden` file for you if it does
-- not exist, it is recommended that you manually write the `.golden` file to
-- make sure the output is as expected.
--
-- When the golden file does not match the actual output, a diff will be shown
-- in the terminal.
test_Parser :: IO TestTree
test_Parser = do
waspFiles <- findByExtension [".wasp"] "./test/Analyzer/parserTests"
return $ testGroup "Wasp.Analyzer.Parser" $ map testCase waspFiles
it "Parses comments" $ do
let source =
unlines
[ " // This is some // comment",
"/* comment",
" span//ning",
"multi/*ple lines */",
"test /* *hi* */ Decl 42 // One more comment",
"// And here is final comment"
]
let ast =
AST
[ wctx (5, 1) (5, 23) $
Decl "test" "Decl" $ wctx (5, 22) (5, 23) $ IntegerLiteral 42
]
parseStatements source `shouldBe` Right ast
-- | Run a single golden test case for the given wasp file.
testCase :: FilePath -> TestTree
testCase waspFile =
let astFile = replaceExtension waspFile ".golden"
in goldenVsStringDiff
(takeBaseName waspFile) -- Test case name
(\ref new -> ["diff", "-u", ref, new]) -- Diff command
astFile -- Golden file path
( do
-- Read from wasp file and return parse result
source <- BSC.unpack <$> BS.readFile waspFile
return $ BSC.pack $ showResult $ parseStatements source
)
it "Parses external imports" $ do
let source =
unlines
[ "test Imports {",
" module: import Page from \"page.jsx\",",
" field: import { Page } from \"page.jsx\"",
"}"
]
let ast =
AST
[ wctx (1, 1) (4, 1) $
Decl "test" "Imports" $
wctx (1, 14) (4, 1) $
Dict
[ ("module", wctx (2, 11) (2, 37) $ ExtImport (ExtImportModule "Page") "page.jsx"),
("field", wctx (3, 10) (3, 40) $ ExtImport (ExtImportField "Page") "page.jsx")
]
]
parseStatements source `shouldBe` Right ast
-- | Pretty print the result of a parse. The purpose of a custom implementation
-- here is to make golden file diffs readable/useful.
--
-- To see examples of pretty prints, see the golden files in `./parserTests`.
showResult :: Either ParseError AST -> String
showResult (Left err) =
let (message, ctx) = getErrorMessageAndCtx err
locationStr = "at " ++ showCtx ctx
in "Parse error " ++ locationStr ++ ":\n" ++ indent 2 message ++ "\n"
showResult (Right ast) = showAST ast
it "Parses unary lists" $ do
let source = "test Decl [ 1 ]"
let ast =
AST
[ wctx (1, 1) (1, 15) $
Decl "test" "Decl" $ wctx (1, 11) (1, 15) $ List [wctx (1, 13) (1, 13) $ IntegerLiteral 1]
]
parseStatements source `shouldBe` Right ast
showAST :: AST -> String
showAST (AST stmts) = "(AST\n" ++ indent 2 (concatMap showStmt stmts) ++ ")\n"
it "Parses lists of multiple elements" $ do
let source = "test Decl [ 1, 2, 3 ]"
let ast =
AST
[ wctx (1, 1) (1, 21) $
Decl "test" "Decl" $
wctx (1, 11) (1, 21) $
List
[ wctx (1, 13) (1, 13) $ IntegerLiteral 1,
wctx (1, 16) (1, 16) $ IntegerLiteral 2,
wctx (1, 19) (1, 19) $ IntegerLiteral 3
]
]
parseStatements source `shouldBe` Right ast
showStmt :: WithCtx Stmt -> String
showStmt (WithCtx ctx (Decl typ name body)) =
withCtx "(Decl" ctx ++ " type=" ++ typ ++ " name=" ++ name ++ "\n"
++ indent 2 (showExpr body)
++ ")\n"
it "Parses empty dictionaries and lists" $ do
let source = "test Decl { dict: {}, list: [] }"
let ast =
AST
[ wctx (1, 1) (1, 32) $
Decl "test" "Decl" $
wctx (1, 11) (1, 32) $
Dict
[ ("dict", wctx (1, 19) (1, 20) $ Dict []),
("list", wctx (1, 29) (1, 30) $ List [])
]
]
parseStatements source `shouldBe` Right ast
showExpr :: WithCtx Expr -> String
showExpr (WithCtx ctx expr) = "(" ++ withCtx (exprName expr) ctx ++ showDetails expr ++ ")\n"
where
exprName (Dict _) = "Dict"
exprName (List _) = "List"
exprName (Tuple _) = "Tuple"
exprName (StringLiteral _) = "String"
exprName (IntegerLiteral _) = "Integer"
exprName (DoubleLiteral _) = "Double"
exprName (BoolLiteral _) = "Bool"
exprName (ExtImport _ _) = "ExtImport"
exprName (Var _) = "Var"
exprName (Quoter _ _) = "Quoter"
it "Allows trailing commas in lists and dictionaries" $ do
let source =
unlines
[ "test Decl {",
" list: [ 1, ],",
"}"
]
let ast =
AST
[ wctx (1, 1) (3, 1) $
Decl "test" "Decl" $
wctx (1, 11) (3, 1) $
Dict
[("list", wctx (2, 9) (2, 14) $ List [wctx (2, 11) (2, 11) $ IntegerLiteral 1])]
]
parseStatements source `shouldBe` Right ast
showDetails (Dict []) = ""
showDetails (Dict entries) = "\n" ++ indent 2 (concatMap showEntry entries)
showDetails (List []) = ""
showDetails (List values) = "\n" ++ indent 2 (concatMap showExpr values)
showDetails (Tuple (a, b, cs)) = "\n" ++ indent 2 (concatMap showExpr (a : b : cs))
showDetails (StringLiteral s) = showLiteral s
showDetails (IntegerLiteral n) = showLiteral n
showDetails (DoubleLiteral n) = showLiteral n
showDetails (BoolLiteral b) = showLiteral b
showDetails (ExtImport name path) = " " ++ showExtImportName name ++ " path=" ++ show path
showDetails (Var v) = " variable=" ++ v
showDetails (Quoter tag contents) = " tag=" ++ tag ++ "\n" ++ indent 2 ("{=" ++ contents ++ "=}") ++ "\n"
it "Parses tuples" $ do
let source =
unlines
[ "test Pair (1, \"foo\")",
"test Triple (1, \"foo\", 2)",
"test Quadruple (1, \"foo\", 2, true)",
"test TrailingComma (42, 314,)"
]
let ast =
AST
[ wctx (1, 1) (1, 20) $
Decl "test" "Pair" $
wctx (1, 11) (1, 20) $
Tuple
( wctx (1, 12) (1, 12) $ IntegerLiteral 1,
wctx (1, 15) (1, 19) $ StringLiteral "foo",
[]
),
wctx (2, 1) (2, 25) $
Decl "test" "Triple" $
wctx (2, 13) (2, 25) $
Tuple
( wctx (2, 14) (2, 14) $ IntegerLiteral 1,
wctx (2, 17) (2, 21) $ StringLiteral "foo",
[wctx (2, 24) (2, 24) $ IntegerLiteral 2]
),
wctx (3, 1) (3, 34) $
Decl "test" "Quadruple" $
wctx (3, 16) (3, 34) $
Tuple
( wctx (3, 17) (3, 17) $ IntegerLiteral 1,
wctx (3, 20) (3, 24) $ StringLiteral "foo",
[ wctx (3, 27) (3, 27) $ IntegerLiteral 2,
wctx (3, 30) (3, 33) $ BoolLiteral True
]
),
wctx (4, 1) (4, 29) $
Decl "test" "TrailingComma" $
wctx (4, 20) (4, 29) $
Tuple
( wctx (4, 21) (4, 22) $ IntegerLiteral 42,
wctx (4, 25) (4, 27) $ IntegerLiteral 314,
[]
)
]
parseStatements source `shouldBe` Right ast
showEntry (key, value) = "(DictEntry key=" ++ key ++ "\n" ++ indent 2 (showExpr value) ++ ")\n"
it "Parses quoted PSL" $ do
let source =
unlines
[ "test PSL {=psl",
" id Int @id",
"psl=}"
]
let ast =
AST
[ wctx (1, 1) (3, 5) $
Decl "test" "PSL" $
wctx (1, 10) (3, 5) $
Quoter "psl" "\n id Int @id\n"
]
parseStatements source `shouldBe` Right ast
showExtImportName (ExtImportField name) = "field=" ++ name
showExtImportName (ExtImportModule name) = "module=" ++ name
it "Parses quoted JSON" $ do
let source =
unlines
[ "test JSON {=json",
" \"key\": \"value\"",
"json=}"
]
let ast =
AST
[ wctx (1, 1) (3, 6) $
Decl "test" "JSON" $
wctx (1, 11) (3, 6) $ Quoter "json" "\n \"key\": \"value\"\n"
]
parseStatements source `shouldBe` Right ast
showLiteral :: Show a => a -> String
showLiteral value = " value=" ++ show value
it "Parses multiple quoters" $ do
let source =
unlines
[ "test JSON {=json",
" { \"key\": \"value\" }",
"json=}",
"test JSON2 {=json [1, 2, 3] json=}"
]
let ast =
AST
[ wctx (1, 1) (3, 6) $ Decl "test" "JSON" $ wctx (1, 11) (3, 6) $ Quoter "json" "\n { \"key\": \"value\" }\n",
wctx (4, 1) (4, 34) $ Decl "test" "JSON2" $ wctx (4, 12) (4, 34) $ Quoter "json" " [1, 2, 3] "
]
parseStatements source `shouldBe` Right ast
withCtx :: String -> Ctx -> String
withCtx name ctx = name ++ "@" ++ showCtx ctx
it "Fails to parseStatements a quoter with unmatched tags" $ do
let source = "test Failure {=a b=}"
parseStatements source `shouldSatisfy` isLeft
it "Parses nested quoters correctly" $ do
parseStatements "test Case1 {=foo {=foo foo=} foo=}" `shouldSatisfy` isLeft
parseStatements "test Case2 {=foo foo=} foo=}" `shouldSatisfy` isLeft
parseStatements "test Case3 {=foo {=foo foo=}"
`shouldBe` Right (AST [wctx (1, 1) (1, 28) $ Decl "test" "Case3" $ wctx (1, 12) (1, 28) $ Quoter "foo" " {=foo "])
parseStatements "test Case4 {=foo {=bar foo=}"
`shouldBe` Right (AST [wctx (1, 1) (1, 28) $ Decl "test" "Case4" $ wctx (1, 12) (1, 28) $ Quoter "foo" " {=bar "])
parseStatements "test Case5 {=foo bar=} foo=}"
`shouldBe` Right (AST [wctx (1, 1) (1, 28) $ Decl "test" "Case5" $ wctx (1, 12) (1, 28) $ Quoter "foo" " bar=} "])
parseStatements "test Case6 {=foo {=bar bar=} foo=}"
`shouldBe` Right (AST [wctx (1, 1) (1, 34) $ Decl "test" "Case6" $ wctx (1, 12) (1, 34) $ Quoter "foo" " {=bar bar=} "])
it "Requires dictionaries to have an ending bracket" $ do
let source = "test Decl {"
let expected =
Left $
UnexpectedToken
( Token
{ tokenType = TEOF,
tokenStartPosition = SourcePosition 1 12,
tokenLexeme = ""
}
)
["}", "<identifier>"]
parseStatements source `shouldBe` expected
it "Parses multiple statements" $ do
let source =
unlines
[ "constant Pi 3.14159",
"constant E 2.71828"
]
let ast =
AST
[ wctx (1, 1) (1, 19) $ Decl "constant" "Pi" $ wctx (1, 13) (1, 19) $ DoubleLiteral 3.14159,
wctx (2, 1) (2, 19) $ Decl "constant" "E" $ wctx (2, 13) (2, 19) $ DoubleLiteral 2.71828
]
parseStatements source `shouldBe` Right ast
describe "Fails with UnexpectedChar error if unrecognized character is encountered" $ do
it "e.g. when it encounters '^' after declaration name" $ do
let source = "test Decl ^ {}"
let expected = Left $ UnexpectedChar '^' $ SourcePosition 1 11
parseStatements source `shouldBe` expected
it "e.g. when the identifier contains '!'" $ do
let source = "test De!cl {}"
let expected = Left $ UnexpectedChar '!' $ SourcePosition 1 8
parseStatements source `shouldBe` expected
describe "Fails with ParseError error if unexpected token is encountered" $ do
it "When string follows identifier" $ do
let source = "test \"Declaration\" {}"
let expected =
Left $
UnexpectedToken
( Token
{ tokenType = TString "Declaration",
tokenStartPosition = SourcePosition 1 6,
tokenLexeme = "\"Declaration\""
}
)
["<identifier>"]
parseStatements source `shouldBe` expected
it "When dictionary is missing a comma between the two fields" $ do
let source =
unlines
[ "test Declaration {",
" a: 1",
" b: 2 ",
"}"
]
let expected =
Left $
UnexpectedToken
( Token
{ tokenType = TIdentifier "b",
tokenStartPosition = SourcePosition 3 3,
tokenLexeme = "b"
}
)
["}", ","]
parseStatements source `shouldBe` expected
showCtx :: Ctx -> String
showCtx (Ctx (SourceRegion (SourcePosition sl sc) (SourcePosition el ec)))
| sl == el && sc == ec = show sl ++ ":" ++ show sc
| sl == el = show sl ++ ":" ++ show sc ++ "-" ++ show ec
| otherwise = show sl ++ ":" ++ show sc ++ "-" ++ show el ++ ":" ++ show ec

View File

@ -6,6 +6,9 @@ import qualified Wasp.Analyzer.TypeChecker as T
pos :: Int -> Int -> P.SourcePosition
pos line column = P.SourcePosition line column
rgn :: (Int, Int) -> (Int, Int) -> P.SourceRegion
rgn (sl, sc) (el, ec) = P.SourceRegion (pos sl sc) (pos el ec)
ctx :: (Int, Int) -> (Int, Int) -> P.Ctx
ctx (a, b) (c, d) = P.ctxFromRgn (pos a b) (pos c d)

View File

@ -0,0 +1,5 @@
(AST
(Decl@5:1-23 type=test name=Decl
(Integer@5:22-23 value=42)
)
)

View File

@ -0,0 +1,6 @@
// This is some // comment
/* comment
span//ning
multi/*ple lines */
test /* *hi* */ Decl 42 // One more comment
// And here is final comment

View File

@ -0,0 +1,34 @@
(AST
(Decl@1:1-11:1 type=test name=Decl
(Dict@1:11-11:1
(DictEntry key=string
(String@2:11-25 value="Hello Wasp =}")
)
(DictEntry key=escapedString
(String@3:18-29 value="Look, a \\\"")
)
(DictEntry key=integer
(Integer@4:12-13 value=42)
)
(DictEntry key=real
(Double@5:9-12 value=3.14)
)
(DictEntry key=yes
(Bool@6:8-11 value=True)
)
(DictEntry key=no
(Bool@7:7-11 value=False)
)
(DictEntry key=ident
(Var@8:10-13 variable=Wasp)
)
(DictEntry key=innerDict
(Dict@10:14-36
(DictEntry key=innerDictReal
(Double@10:31-34 value=2.17)
)
)
)
)
)
)

View File

@ -0,0 +1,11 @@
test Decl {
string: "Hello Wasp =}",
escapedString: "Look, a \"",
integer: 42,
real: 3.14,
yes: true,
no: false,
ident: Wasp,
// This is a comment
innerDict: { innerDictReal: 2.17 }
}

View File

@ -0,0 +1,3 @@
Parse error at 3:3:
Unexpected token: b
Expected one of the following tokens instead: '}',','

View File

@ -0,0 +1,4 @@
test Declaration {
a: 1
b: 2
}

View File

@ -0,0 +1,3 @@
Parse error at 2:1:
Unexpected end of file
Expected one of the following tokens instead: '}',<identifier>

View File

@ -0,0 +1 @@
test Decl {

View File

@ -0,0 +1,12 @@
(AST
(Decl@1:1-32 type=test name=Decl
(Dict@1:11-32
(DictEntry key=dict
(Dict@1:19-20)
)
(DictEntry key=list
(List@1:29-30)
)
)
)
)

View File

@ -0,0 +1 @@
test Decl { dict: {}, list: [] }

View File

@ -0,0 +1,12 @@
(AST
(Decl@1:1-4:1 type=test name=Imports
(Dict@1:14-4:1
(DictEntry key=module
(ExtImport@2:11-37 module=Page path="page.jsx")
)
(DictEntry key=field
(ExtImport@3:10-40 field=Page path="page.jsx")
)
)
)
)

View File

@ -0,0 +1,4 @@
test Imports {
module: import Page from "page.jsx",
field: import { Page } from "page.jsx"
}

View File

@ -0,0 +1,9 @@
(AST
(Decl@1:1-21 type=test name=Decl
(List@1:11-21
(Integer@1:13 value=1)
(Integer@1:16 value=2)
(Integer@1:19 value=3)
)
)
)

View File

@ -0,0 +1 @@
test Decl [ 1, 2, 3 ]

View File

@ -0,0 +1,14 @@
(AST
(Decl@1:1-3:6 type=test name=JSON
(Quoter@1:11-3:6 tag=json
{=
"key": "value"
=}
)
)
(Decl@4:1-34 type=test name=JSON2
(Quoter@4:12-34 tag=json
{= [1, 2, 3] =}
)
)
)

View File

@ -0,0 +1,4 @@
test JSON {=json
"key": "value"
json=}
test JSON2 {=json [1, 2, 3] json=}

View File

@ -0,0 +1,8 @@
(AST
(Decl@1:1-19 type=constant name=Pi
(Double@1:13-19 value=3.14159)
)
(Decl@2:1-19 type=constant name=E
(Double@2:13-19 value=2.71828)
)
)

View File

@ -0,0 +1,2 @@
constant Pi 3.14159
constant E 2.71828

View File

@ -0,0 +1,2 @@
(AST
)

View File

@ -0,0 +1,9 @@
(AST
(Decl@1:1-3:6 type=test name=JSON
(Quoter@1:11-3:6 tag=json
{=
"key": "value"
=}
)
)
)

View File

@ -0,0 +1,3 @@
test JSON {=json
"key": "value"
json=}

View File

@ -0,0 +1,9 @@
(AST
(Decl@1:1-3:5 type=test name=PSL
(Quoter@1:10-3:5 tag=psl
{=
id Int @id
=}
)
)
)

View File

@ -0,0 +1,3 @@
test PSL {=psl
id Int @id
psl=}

View File

@ -0,0 +1,7 @@
(AST
(Decl@1:1-43 type=test name=DifferentNestedClose
(Quoter@1:27-43 tag=foo
{= bar=} =}
)
)
)

View File

@ -0,0 +1 @@
test DifferentNestedClose {=foo bar=} foo=}

View File

@ -0,0 +1,7 @@
(AST
(Decl@1:1-42 type=test name=DifferentNestedOpen
(Quoter@1:26-42 tag=foo
{= {=bar =}
)
)
)

View File

@ -0,0 +1 @@
test DifferentNestedOpen {=foo {=bar foo=}

View File

@ -0,0 +1,3 @@
Parse error at 1:38:
Unexpected token: =
Expected one of the following tokens instead: <identifier>

View File

@ -0,0 +1 @@
test SelfNested {=foo {=foo foo=} foo=}

View File

@ -0,0 +1,3 @@
Parse error at 1:36:
Unexpected token: =
Expected one of the following tokens instead: <identifier>

View File

@ -0,0 +1 @@
test TwoClosingTags {=foo foo=} foo=}

View File

@ -0,0 +1,7 @@
(AST
(Decl@1:1-34 type=test name=TwoOpenTags
(Quoter@1:18-34 tag=foo
{= {=foo =}
)
)
)

View File

@ -0,0 +1 @@
test TwoOpenTags {=foo {=foo foo=}

View File

@ -0,0 +1,3 @@
Parse error at 2:1:
Unexpected end of file
Expected one of the following tokens instead: '=}',<any>

View File

@ -0,0 +1 @@
test Failure {=a b=}

View File

@ -0,0 +1,11 @@
(AST
(Decl@1:1-3:1 type=test name=Decl
(Dict@1:11-3:1
(DictEntry key=list
(List@2:9-14
(Integer@2:11 value=1)
)
)
)
)
)

View File

@ -0,0 +1,3 @@
test Decl {
list: [ 1, ],
}

View File

@ -0,0 +1,29 @@
(AST
(Decl@1:1-20 type=test name=Pair
(Tuple@1:11-20
(Integer@1:12 value=1)
(String@1:15-19 value="foo")
)
)
(Decl@2:1-25 type=test name=Triple
(Tuple@2:13-25
(Integer@2:14 value=1)
(String@2:17-21 value="foo")
(Integer@2:24 value=2)
)
)
(Decl@3:1-34 type=test name=Quadruple
(Tuple@3:16-34
(Integer@3:17 value=1)
(String@3:20-24 value="foo")
(Integer@3:27 value=2)
(Bool@3:30-33 value=True)
)
)
(Decl@4:1-29 type=test name=TrailingComma
(Tuple@4:20-29
(Integer@4:21-22 value=42)
(Integer@4:25-27 value=314)
)
)
)

View File

@ -0,0 +1,4 @@
test Pair (1, "foo")
test Triple (1, "foo", 2)
test Quadruple (1, "foo", 2, true)
test TrailingComma (42, 314,)

View File

@ -0,0 +1,7 @@
(AST
(Decl@1:1-15 type=test name=Decl
(List@1:11-15
(Integer@1:13 value=1)
)
)
)

View File

@ -0,0 +1 @@
test Decl [ 1 ]

View File

@ -0,0 +1,3 @@
Parse error at 1:6-18:
Unexpected token: "Declaration"
Expected one of the following tokens instead: <identifier>

View File

@ -0,0 +1 @@
test "Declaration" {}

View File

@ -0,0 +1,3 @@
Parse error at 1:11:
Unexpected token: ^
Expected one of the following tokens instead: '(','[','{','import','true','false',<string>,<number>,<number>,'{=',<identifier>

View File

@ -0,0 +1 @@
test Decl ^ {}

View File

@ -73,7 +73,6 @@ library
hs-source-dirs: src
build-tool-depends:
alex:alex
, happy:happy
build-depends:
, base >= 4.7 && < 5
, Glob ^>= 0.10.2
@ -110,6 +109,7 @@ library
, uuid ^>= 1.3.15
-- 'array' is used by code generated by Alex for src/Analyzer/Parser/Lexer.x
, array ^>= 0.5.4
, deepseq
other-modules: Paths_waspc
exposed-modules:
FilePath.Extra
@ -129,14 +129,20 @@ library
Wasp.Analyzer.Parser
Wasp.Analyzer.Parser.AST
Wasp.Analyzer.Parser.Ctx
Wasp.Analyzer.Parser.LexerUtils
Wasp.Analyzer.Parser.ConcreteParser
Wasp.Analyzer.Parser.ConcreteParser.ParseError
Wasp.Analyzer.Parser.ConcreteParser.CST
Wasp.Analyzer.Parser.ConcreteParser.ParserLib
Wasp.Analyzer.Parser.Lexer
Wasp.Analyzer.Parser.Monad
Wasp.Analyzer.Parser.Lexer.Lexer
Wasp.Analyzer.Parser.Lexer.Internal
Wasp.Analyzer.Parser.ParseError
Wasp.Analyzer.Parser.Parser
Wasp.Analyzer.Parser.AbstractParser
Wasp.Analyzer.Parser.AbstractParser.Monad
Wasp.Analyzer.Parser.SourcePosition
Wasp.Analyzer.Parser.SourceRegion
Wasp.Analyzer.Parser.Token
Wasp.Analyzer.Parser.TokenSet
Wasp.Analyzer.StdTypeDefinitions
Wasp.Analyzer.StdTypeDefinitions.App.Dependency
Wasp.Analyzer.StdTypeDefinitions.Entity
@ -321,18 +327,19 @@ test-suite waspc-test
, strong-path
, text
, unordered-containers
, bytestring
, waspc
, QuickCheck ^>= 2.14
, tasty ^>= 1.4.2
-- tasty-hspec 1.1.7 introduces breaking changes, which is why we have < 1.1.7 .
, tasty-hspec >= 1.1 && < 1.1.7
, tasty-quickcheck ^>= 0.10
, tasty-golden ^>= 2.3.5
other-modules:
Analyzer.Evaluation.EvaluationErrorTest
Analyzer.EvaluatorTest
Analyzer.Parser.ParseErrorTest
Analyzer.Parser.SourcePositionTest
Analyzer.Parser.TokenTest
Analyzer.ParserTest
Analyzer.TestUtil
Analyzer.TypeChecker.InternalTest