mirror of
https://github.com/wasp-lang/wasp.git
synced 2024-11-23 19:29:17 +03:00
Add error resilient parser for LSP (#655)
See waspls/src/Wasp/Backend/ConcreteParser/Internal.hs for description of the parser
This commit is contained in:
parent
0de9997198
commit
879f8e4225
101
waspls/src/Wasp/Backend/ConcreteParser.hs
Normal file
101
waspls/src/Wasp/Backend/ConcreteParser.hs
Normal file
@ -0,0 +1,101 @@
|
||||
module Wasp.Backend.ConcreteParser
|
||||
( -- * Concrete parser
|
||||
|
||||
-- | This module contains functions for converting lists of "Token"s into
|
||||
-- concrete syntax trees. These trees represent any Wasp source file completely,
|
||||
-- including whitespace and comments, even if the source file is invalid.
|
||||
parseCST,
|
||||
parseCSTExpression,
|
||||
)
|
||||
where
|
||||
|
||||
import Wasp.Backend.ConcreteParser.Internal
|
||||
import Wasp.Backend.ConcreteSyntax (SyntaxKind (..), SyntaxNode)
|
||||
import Wasp.Backend.ParseError
|
||||
import Wasp.Backend.Token (Token)
|
||||
import qualified Wasp.Backend.Token as T
|
||||
|
||||
-- | Parse a list of tokens representing a Wasp program into a concrete syntax
|
||||
-- tree.
|
||||
--
|
||||
-- See "SyntaxNode" for a description of what a concrete syntax tree contains.
|
||||
parseCST :: [Token] -> ([ParseError], [SyntaxNode])
|
||||
parseCST tokens = parse tokens (root <> eof)
|
||||
|
||||
-- | Parse a list of tokens into a concrete syntax tree for a wasp expression.
|
||||
-- This is mainly used for testing.
|
||||
--
|
||||
-- See "SyntaxNode" for a description of what a concrete syntax tree contains.
|
||||
parseCSTExpression :: [Token] -> ([ParseError], [SyntaxNode])
|
||||
parseCSTExpression tokens = parse tokens (expr <> eof)
|
||||
|
||||
root :: GrammarRule
|
||||
root = Program <$$> stmts
|
||||
|
||||
stmts :: GrammarRule
|
||||
stmts = eof <|> (stmt <> stmts)
|
||||
|
||||
stmt :: GrammarRule
|
||||
stmt =
|
||||
Decl
|
||||
<$$> (T.Identifier `as` DeclType)
|
||||
<> (T.Identifier `as` DeclName)
|
||||
<> expr
|
||||
|
||||
{- ORMOLU_DISABLE -}
|
||||
expr :: GrammarRule
|
||||
expr =
|
||||
Dict <$$> listLike lcurly dictEntry comma rcurly
|
||||
<|> List <$$> listLike lsquare expr comma rsquare
|
||||
-- Note that we don't check number of tuple element here: this is left to
|
||||
-- the next phase of parsing.
|
||||
<|> Tuple <$$> listLike lparen expr comma rparen
|
||||
<|> quoter
|
||||
<|> int
|
||||
<|> double
|
||||
<|> string
|
||||
<|> (T.Identifier `as` Var)
|
||||
<|> kwTrue
|
||||
<|> kwFalse
|
||||
<|> extImport
|
||||
{- ORMOLU_ENABLE -}
|
||||
|
||||
dictEntry :: GrammarRule
|
||||
dictEntry = DictEntry <$$> (T.Identifier `as` DictKey) <> colon <> expr
|
||||
|
||||
quoter :: GrammarRule
|
||||
quoter = Quoter <$$> lquote <> quoterTail
|
||||
where
|
||||
quoterTail = rquote <|> (quoted <> quoterTail)
|
||||
|
||||
extImport :: GrammarRule
|
||||
extImport =
|
||||
ExtImport
|
||||
<$$> kwImport <> extImportName <> kwFrom <> (T.String `as` ExtImportPath)
|
||||
where
|
||||
extImportName :: GrammarRule
|
||||
extImportName =
|
||||
lcurly <> (T.Identifier `as` ExtImportField) <> rcurly
|
||||
<|> (T.Identifier `as` ExtImportModule)
|
||||
|
||||
-- | @listLike open value sep close@ parses list like structures in the form:
|
||||
--
|
||||
-- @open (value sep)* (value sep?) close@
|
||||
--
|
||||
-- In other words, a list with an optional trailing separator.
|
||||
listLike ::
|
||||
-- | start grammar rule
|
||||
GrammarRule ->
|
||||
-- | value grammar rule
|
||||
GrammarRule ->
|
||||
-- | separator grammar rule
|
||||
GrammarRule ->
|
||||
-- | end grammar rule
|
||||
GrammarRule ->
|
||||
GrammarRule
|
||||
listLike open value sep close =
|
||||
open <> listLikeTail
|
||||
where
|
||||
listLikeTail :: GrammarRule
|
||||
listLikeTail =
|
||||
close <|> (value <> ((sep <> listLikeTail) <|> close))
|
774
waspls/src/Wasp/Backend/ConcreteParser/Internal.hs
Normal file
774
waspls/src/Wasp/Backend/ConcreteParser/Internal.hs
Normal file
@ -0,0 +1,774 @@
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE LambdaCase #-}
|
||||
|
||||
module Wasp.Backend.ConcreteParser.Internal
|
||||
( -- * Internal parsing library
|
||||
GrammarRule,
|
||||
parse,
|
||||
|
||||
-- * Combinators
|
||||
|
||||
-- | The fixity of the operators ('(<>)', '`as`', '(<$$>)', and '(<|>)') was
|
||||
-- chosen to make using operators require as little parentheses as possible:
|
||||
--
|
||||
-- >>> Program <$$> lparen <> rparen
|
||||
-- <|> string
|
||||
-- Alternative (Group Program (Chain (Token LParen) (Token RParen))) (Token String)
|
||||
as,
|
||||
(<$$>),
|
||||
perhaps,
|
||||
(<|>),
|
||||
|
||||
-- * Primitive grammar rules
|
||||
|
||||
-- | These grammar rules (besides `eof`) consume a single token of the kind
|
||||
-- matching their name. The syntax node they produce has an "S.Token" kind,
|
||||
-- unless otherwise specified.
|
||||
lparen,
|
||||
rparen,
|
||||
lsquare,
|
||||
rsquare,
|
||||
lcurly,
|
||||
rcurly,
|
||||
comma,
|
||||
colon,
|
||||
kwImport,
|
||||
kwFrom,
|
||||
kwTrue,
|
||||
kwFalse,
|
||||
string,
|
||||
int,
|
||||
double,
|
||||
lquote,
|
||||
rquote,
|
||||
quoted,
|
||||
-- | A primitive for "T.Identifier" is not included: this should always be
|
||||
-- done with an `as`, i.e. @"T.Identifier" ``as`` "S.DictKey"@
|
||||
eof,
|
||||
|
||||
-- * Error Recovery
|
||||
|
||||
-- | This parser has built-in automatic error recovery. Currently, error
|
||||
-- recovery occurs when an error is produced inside of a group rule. Recovery
|
||||
-- is based on the possible tokens that can follow the current rule.
|
||||
--
|
||||
-- First, we define the follow set of a grammar rule @R@ in the grammar
|
||||
-- @R <> S@ to be the set of token kinds that @S@ can accept as its first
|
||||
-- token. For example, @"let"@ in @"let" <> ("=" <|> "+=")@ has the follow
|
||||
-- set @["=", "+="]@.
|
||||
--
|
||||
-- We can then describe the basic mechanism as follows:
|
||||
--
|
||||
-- For a "GrammarRule" @label '<$$>' first '<>' next@, if an error occurs
|
||||
-- while parsing @first@, check if the token it errored on is in the follow
|
||||
-- set for the group.
|
||||
--
|
||||
-- If yes, then there is a following rule that can accept this token. So
|
||||
-- just continue parsing. Otherwise, no following rule has a use for the
|
||||
-- token, so discard it.
|
||||
--
|
||||
-- In either case, we log an error to report at the end of parsing.
|
||||
--
|
||||
-- As an example, consider the grammar rules:
|
||||
--
|
||||
-- @
|
||||
-- stmts = eof <|> (stmt <> stmts)
|
||||
-- stmt = Stmt <$$> "let" <> identifier <> "=" <> expr <> ";"
|
||||
-- @
|
||||
--
|
||||
-- With the example source text:
|
||||
--
|
||||
-- @
|
||||
-- let x = 2
|
||||
-- let y = 4;
|
||||
-- @
|
||||
--
|
||||
-- A parse error will occur at the beginning of line 2, when @let@ is found
|
||||
-- instead of the expected @;@.
|
||||
--
|
||||
-- First, we find the follow set of the @Stmt@ group. From the definition of
|
||||
-- @stmts@, we can see @stmt@ is followed by @stmts@. @stmts@ can start with
|
||||
-- either @eof@ or @stmt@ (which in turn can start with @"let"@. Hence, the
|
||||
-- follow set is @[eof, "let"]@.
|
||||
--
|
||||
-- Since the "let" token the parser erroed on is in this set, it does not
|
||||
-- get discarded by the group. This leaves it available for the call to
|
||||
-- @stmt@, which can parse successfully because of this, resulting in the
|
||||
-- following parse tree and error:
|
||||
--
|
||||
-- @
|
||||
-- [ Stmt ["let", "x", "=", "2", error]
|
||||
-- , Stmt ["let", "y", "=", "4", ";"]
|
||||
-- ]
|
||||
--
|
||||
-- [Unexpected "let" at line 2 column 1: expected ";"]
|
||||
-- @
|
||||
--
|
||||
-- But if we look at a more complex situation, with nested groups, we need
|
||||
-- to introduce another concept to produce correct parses. For example:
|
||||
--
|
||||
-- @
|
||||
-- list = List <$$> '[' <> expr <> ']'
|
||||
-- tuple = Tuple <$$> '(' <> expr <> ')'
|
||||
-- expr = list <|> tuple <|> number
|
||||
-- @
|
||||
--
|
||||
-- with source:
|
||||
--
|
||||
-- @
|
||||
-- [(2 ]
|
||||
-- @
|
||||
--
|
||||
-- If we were to apply the same logic as before, we would find the follow
|
||||
-- set of the @2@ to be @[")"]@. Since @]@ is not in this set, we discard
|
||||
-- the right square bracket. But what we want to do is leave it so that
|
||||
-- the @list@ rule can use it as its closing bracket.
|
||||
--
|
||||
-- To handle this scenario, we introduce an unwinding behavior to travel up
|
||||
-- through group rules until an appropriate point is found to recover. We
|
||||
-- do this first by defining the extended follow set of a rule @R@ as
|
||||
-- the union of the follow sets of @R@ and its ancestor rules.
|
||||
--
|
||||
-- For example, we find the follow set and extended follow set of the @2@ in
|
||||
-- the example code. To start, we write out the tree for grammar rules that
|
||||
-- are used:
|
||||
--
|
||||
-- @
|
||||
-- List <$$> '[' <> (Tuple <$$> '(' <> number <> ')') <> ']'
|
||||
-- @
|
||||
--
|
||||
-- Now, we see the follow set of the @2@ is just @[')']@. Then, the follow
|
||||
-- set of its ancestors (@Tuple<$$>@ and @List<$$>@) are @[']']@ and @[]@,
|
||||
-- respectively. So, the extended follow set is @[')', ']']@.
|
||||
--
|
||||
-- Unwinding starts when:
|
||||
--
|
||||
-- (1) The next token __is not__ in the follow set of the current rule
|
||||
-- (2) The next token __is__ in the extended follow set of the current rule
|
||||
--
|
||||
-- While unwinding, the parser will stop at each group ancestor of the
|
||||
-- original rule to determine whether to continue unwinding. Unwinding stops
|
||||
-- when a group rule @S@ is reached such that the follow set of @S@ includes
|
||||
-- the next token. At this point, the same thing happens as in the first
|
||||
-- description of error recovery.
|
||||
--
|
||||
-- The example rules and source would produce the following parse tree and
|
||||
-- errors:
|
||||
--
|
||||
-- @
|
||||
-- [List ["[", Tuple ["(", "2"], "]"]]
|
||||
--
|
||||
-- [Unexpected "]" at line 1 column 9: expected expression]
|
||||
-- @
|
||||
--
|
||||
-- ** Error Recovery Rules
|
||||
--
|
||||
-- A description of the full error recovery process:
|
||||
--
|
||||
-- 1. While parsing @first@ in @label '<$$>' first '<>' next@, an
|
||||
-- unexpected token is encountered.
|
||||
-- 2. The follow set and extended follow set for the group node are
|
||||
-- calculated.
|
||||
-- 3. If the unexpected token is in the follow set, continue parsing normally.
|
||||
-- 4. If the unexpected token is not in the follow set, but is in the extended
|
||||
-- follow set, begin unwinding:
|
||||
-- a. Stop at each group rule ancestor @S@ and compute the follow set of
|
||||
-- @S@.
|
||||
-- b. If the follow set of @S@ contains the unexpected token, stop
|
||||
-- unwinding and resume parsing.
|
||||
-- c. Otherwise, keep unwinding.
|
||||
-- 5. If the unexpected token is not in the extended follow set, discard the
|
||||
-- token and continue parsing.
|
||||
|
||||
-- * Future improvements
|
||||
|
||||
-- |
|
||||
-- __TODO #1:__
|
||||
--
|
||||
-- Move error recovery from the group rule to the '<>' rules.
|
||||
--
|
||||
-- __TODO #2:__
|
||||
--
|
||||
-- The current error recovery and the upgraded version described in TODO #1
|
||||
-- can do what feels like the wrong thing in some cases. For example,
|
||||
-- consider the following invalid wasp code:
|
||||
--
|
||||
-- @
|
||||
-- route TestRoute { path: "/", to:
|
||||
--
|
||||
-- page TestPage { fn: import TestPage from "@ext/TestPage.js" }
|
||||
-- @
|
||||
--
|
||||
-- To a human, this should parse as two declaration statements. But the
|
||||
-- current system identifies @page@ as a variable for the dictionary entry
|
||||
-- @to:@. Then it treats @TestPage@ as the key for another entry, and
|
||||
-- complains that it found @{@ instead of @:@.
|
||||
--
|
||||
-- This is due to the unwinding behavior on errors being very short-sighted:
|
||||
-- it only cares whether the next non-white token will succeed.
|
||||
--
|
||||
-- A possible fix to this is to explore all possible (or many possible)
|
||||
-- unwinding stopping points and figuring out which one results in the
|
||||
-- fewest future parse errors.
|
||||
--
|
||||
-- In case this has performance issues with having to explore lots of possible
|
||||
-- parse trees, we could limit how many branches we check, similar to optimizing
|
||||
-- DFS for state tree searches (like a chess solver).
|
||||
--
|
||||
-- __TODO #3:__
|
||||
--
|
||||
-- Change wasp language a bit to make the start of a declaration unambiguous.
|
||||
-- This would make it much easier for the parser to recover from an unfinished
|
||||
-- declaration.
|
||||
--
|
||||
-- Ideas for this:
|
||||
--
|
||||
-- (1) Make all type names reserved keywords in the grammar
|
||||
-- (2) Distinguish between type names and variable names based on the case
|
||||
-- of the first letter (lowercase = type, uppercase = variable)
|
||||
--
|
||||
-- __TODO #4:__
|
||||
--
|
||||
-- When the next token is not in the follow set of the rule, try discarding
|
||||
-- multiple tokens, stopping when we do find a token in the follow set.
|
||||
)
|
||||
where
|
||||
|
||||
import Control.Monad (unless)
|
||||
import Control.Monad.Except (ExceptT, MonadError (catchError, throwError), runExceptT)
|
||||
import Control.Monad.State.Strict (State, gets, modify, runState)
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Wasp.Backend.ConcreteSyntax (SyntaxKind, SyntaxNode (SyntaxNode, snodeChildren, snodeKind, snodeWidth))
|
||||
import qualified Wasp.Backend.ConcreteSyntax as S
|
||||
import Wasp.Backend.ParseError
|
||||
import Wasp.Backend.Token (Token (tokenKind, tokenWidth), TokenKind, tokenKindIsTrivia)
|
||||
import qualified Wasp.Backend.Token as T
|
||||
import Wasp.Backend.TokenSet (TokenSet)
|
||||
import qualified Wasp.Backend.TokenSet as TokenSet
|
||||
|
||||
-- | The type of a grammar rule. Use combinators and primitives to build more
|
||||
-- complex grammars. Run with `parse`, which supports reasonable automatic error
|
||||
-- recovery.
|
||||
--
|
||||
-- Every rule begins parsing by consuming trivia tokens at the beginning of the
|
||||
-- input and creating a syntax node for each trivia token.
|
||||
data GrammarRule
|
||||
= -- | Always succeeds, consuming only trivia tokens.
|
||||
Succeed
|
||||
| -- | Checks if the first non-trivia token is the expected token
|
||||
-- kind. If it is, it consumes the token and outputs a syntax node. Otherwise,
|
||||
-- it does not consume the token and throws a parse error exception.
|
||||
--
|
||||
-- If a "SyntaxKind" is specified, use it to label the syntax node created.
|
||||
Token TokenKind (Maybe SyntaxKind)
|
||||
| -- | Succeeds if the input is empty after consuming trivia tokens at the
|
||||
-- beginning of the input.
|
||||
--
|
||||
-- If it fails, it will report an error at the first non-trivia token while
|
||||
-- also consuming all remaining input.
|
||||
EOF
|
||||
| -- | Run the inner grammar rule and wraps its resulting nodes in a node
|
||||
-- labeled by the given syntax kind. If it fails, apply the error recovery
|
||||
-- strategy (this is currently the only place where error recovery is
|
||||
-- performed).
|
||||
--
|
||||
-- This is the only rule that adds structure and depth into the CST.
|
||||
Group SyntaxKind GrammarRule
|
||||
| -- | Run the first grammar rule. If it succeeds, run the second rule.
|
||||
Chain GrammarRule GrammarRule
|
||||
| -- | Run the grammar rule that can accept the next token in the input as its
|
||||
-- first token.
|
||||
--
|
||||
-- Invariant: There can be no overlap in which token kinds each parser can
|
||||
-- accept. The parser will use a Haskell 'error' in this case.
|
||||
--
|
||||
-- TODO: Remove the invariant and use backtracking to try each rule
|
||||
Alternative GrammarRule GrammarRule
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
-- | @rule1 '<>' rule2@ is a parser that runs first @rule1@, then @rule2@.
|
||||
-- Equivalent to @Chain rule1 rule2@.
|
||||
instance Semigroup GrammarRule where
|
||||
-- "Succeed" is identity of <>. The second pattern does affect the result tree,
|
||||
-- but only by changing where trivia tokens are placed in the tree, which is
|
||||
-- not an issue.
|
||||
Succeed <> rule = rule
|
||||
rule <> Succeed = rule
|
||||
first <> second = Chain first second
|
||||
|
||||
-- | @mempty@ is a parser that matches an empty string, or a string of only
|
||||
-- trivia tokens.
|
||||
instance Monoid GrammarRule where
|
||||
mempty = Succeed
|
||||
|
||||
-- | Run a "GrammarRule" on an input of tokens. Returns a tuple @(errors, nodes)@.
|
||||
parse :: [Token] -> GrammarRule -> ([ParseError], [SyntaxNode])
|
||||
parse tokens grammar =
|
||||
let initialState =
|
||||
ParseState
|
||||
{ pstateInput = tokens,
|
||||
pstateCurrentLevelNodes = [],
|
||||
pstateErrors = [],
|
||||
pstateNextTokenOffset = 0,
|
||||
pstatePrevTokenOffset = 0,
|
||||
pstateAcceptableNextTokens = []
|
||||
}
|
||||
in case runState (runExceptT (parseRule grammar)) initialState of
|
||||
(Left err, state) -> case err of
|
||||
Unwind -> error "Unwind at top-level GrammarRule (impossible, this is a bug in Wasp.Backend.ConcreteParser.Internal)"
|
||||
ParseError perr ->
|
||||
let errs = pstateErrors state ++ [perr]
|
||||
newNode = makeErrorNode perr False
|
||||
nodes = pstateCurrentLevelNodes state ++ [newNode]
|
||||
in (errs, nodes)
|
||||
(_, state) -> (pstateErrors state, pstateCurrentLevelNodes state)
|
||||
|
||||
data ParseState = ParseState
|
||||
{ -- | Remaining tokens to be parsed.
|
||||
pstateInput :: [Token],
|
||||
-- | "SyntaxNode"s processed so far at the current level of the CST. An
|
||||
-- initial top-level is created when the parser is started and each "Group"
|
||||
-- rule creates a new level.
|
||||
pstateCurrentLevelNodes :: [SyntaxNode],
|
||||
-- | Errors collected that will be reported.
|
||||
pstateErrors :: [ParseError],
|
||||
-- | 0-based offset from the beginning of the source file, in characters,
|
||||
-- of the start of the next token (@head . `pstateInput`@).
|
||||
pstateNextTokenOffset :: !Int,
|
||||
-- | 0-based offset from the beginning of the source file, in characters,
|
||||
-- of the start of the last consumed token
|
||||
pstatePrevTokenOffset :: !Int,
|
||||
-- | Stack of acceptable next tokens, where entries to the stack are added
|
||||
-- when we enter a group rule for parsing.
|
||||
--
|
||||
-- The first token set in this list is the current follow set and the union
|
||||
-- of all of the token sets is the extended follow set.
|
||||
pstateAcceptableNextTokens :: [TokenSet]
|
||||
}
|
||||
deriving (Eq, Show, Ord)
|
||||
|
||||
data ParseException
|
||||
= -- | @ParseError parseError@ is used as a signal when an unexpected token is
|
||||
-- encountered. This kind of exception is caught by error recovery points
|
||||
-- and added to the output list of errors.
|
||||
--
|
||||
-- This may be converted into "Unwind" if error recovery determines unwinding
|
||||
-- should occur.
|
||||
ParseError ParseError
|
||||
| -- | @Unwind tokenKind@ is used as a signal during error recovery. It is used
|
||||
-- to implement the unwinding behavior described at the top of the module.
|
||||
--
|
||||
-- Essentially, it means "I don't know how to parse this, but I know someone
|
||||
-- above me can. Can you? If not, pass it along."
|
||||
Unwind
|
||||
|
||||
type ParserM a = ExceptT ParseException (State ParseState) a
|
||||
|
||||
parseRule :: GrammarRule -> ParserM ()
|
||||
parseRule Succeed = parseSucceed
|
||||
parseRule (Token expectedKind maybeLabel) = parseToken expectedKind maybeLabel
|
||||
parseRule EOF = parseEOF
|
||||
parseRule (Group label inner) = parseGroup label inner
|
||||
parseRule (Chain first second) = parseChain first second
|
||||
parseRule (Alternative left right) = parseAlternative left right
|
||||
|
||||
parseSucceed :: ParserM ()
|
||||
parseSucceed = consumeTrivia
|
||||
|
||||
parseToken :: TokenKind -> Maybe SyntaxKind -> ParserM ()
|
||||
parseToken expectedKind maybeSyntaxKind =
|
||||
(consumeTrivia >> peek) >>= \case
|
||||
Just nextToken | tokenKind nextToken == expectedKind -> do
|
||||
advance
|
||||
let newNodeKind = fromMaybe (S.Token expectedKind) maybeSyntaxKind
|
||||
let newNode =
|
||||
SyntaxNode
|
||||
{ snodeKind = newNodeKind,
|
||||
snodeWidth = tokenWidth nextToken,
|
||||
snodeChildren = []
|
||||
}
|
||||
pushNode newNode
|
||||
nextToken -> throwParseError nextToken (TokenSet.fromKind expectedKind)
|
||||
|
||||
parseEOF :: ParserM ()
|
||||
parseEOF =
|
||||
(consumeTrivia >> peek) >>= \case
|
||||
Just nextToken -> do
|
||||
nextTokenOffset <- gets pstateNextTokenOffset
|
||||
advance
|
||||
let nextTokenKind = tokenKind nextToken
|
||||
-- TODO: consider reporting all token kinds here? not sure
|
||||
_tokenKinds <- collectUntilEOF
|
||||
throwError $
|
||||
ParseError $
|
||||
UnexpectedToken
|
||||
(Region nextTokenOffset (nextTokenOffset + tokenWidth nextToken))
|
||||
nextTokenKind
|
||||
TokenSet.fromEOF
|
||||
where
|
||||
collectUntilEOF :: ParserM [TokenKind]
|
||||
collectUntilEOF =
|
||||
peek >>= \case
|
||||
Just next -> advance >> ((tokenKind next :) <$> collectUntilEOF)
|
||||
Nothing -> pure []
|
||||
Nothing -> do
|
||||
return ()
|
||||
|
||||
parseGroup :: SyntaxKind -> GrammarRule -> ParserM ()
|
||||
parseGroup label inner = do
|
||||
consumeTrivia
|
||||
currentLevelNodes <- gets pstateCurrentLevelNodes
|
||||
modify (\s -> s {pstateCurrentLevelNodes = []})
|
||||
maybeInnerParserError <- tryError $ parseRule inner
|
||||
childNodes <- gets pstateCurrentLevelNodes
|
||||
modify (\s -> s {pstateCurrentLevelNodes = currentLevelNodes})
|
||||
case maybeInnerParserError of
|
||||
Nothing -> pushGroupNode childNodes
|
||||
Just (ParseError parseError) -> handleInnerParseError childNodes parseError
|
||||
Just Unwind -> handleInnerUnwind childNodes
|
||||
where
|
||||
handleInnerParseError :: [SyntaxNode] -> ParseError -> ParserM ()
|
||||
handleInnerParseError childNodes parseError = do
|
||||
-- ASSUMPTION: The token errored on has not been consumed
|
||||
--
|
||||
-- (1) Check whether the token kind we errored on can be accepted by any
|
||||
-- following rule.
|
||||
--
|
||||
-- (2) Report the error and create the group node containing the error and
|
||||
-- what it parsed before the error.
|
||||
--
|
||||
-- (3) Handle the error:
|
||||
-- (3a) If it can not be accepted by a following rule, discard the error
|
||||
-- token.
|
||||
-- (3b) If unwinding is required to accept the token, begin unwinding.
|
||||
-- (3c) Otherwise, the error can be handled without unwinding, so do
|
||||
-- nothing and continue parsing.
|
||||
|
||||
-- (1)
|
||||
let errorTokenKind = case parseError of
|
||||
UnexpectedEOF _ _ -> Nothing
|
||||
UnexpectedToken _ k _ -> Just k
|
||||
errorCanBeHandled <- canAnyFollowingRuleAcceptTokenKind errorTokenKind
|
||||
errorCanBeHandledWithoutUnwind <- canImmediateFollowingRuleAcceptTokenKind errorTokenKind
|
||||
|
||||
-- (2)
|
||||
logError parseError
|
||||
let nodeForErr = makeErrorNode parseError errorCanBeHandled
|
||||
pushGroupNode $ childNodes ++ [nodeForErr]
|
||||
|
||||
-- (3)
|
||||
if not errorCanBeHandled
|
||||
then -- (3a)
|
||||
advance
|
||||
else -- (3b)
|
||||
unless errorCanBeHandledWithoutUnwind $ throwError Unwind
|
||||
|
||||
handleInnerUnwind :: [SyntaxNode] -> ParserM ()
|
||||
handleInnerUnwind childNodes = do
|
||||
errorTokenKind <- fmap tokenKind <$> peek
|
||||
-- When unwinding, check if this is the level where we can parse the error
|
||||
-- token. If it is, then stop unwinding. Otherwise, we continue unwinding.
|
||||
errorCanBeHandledWithoutUnwind <- canAnyFollowingRuleAcceptTokenKind errorTokenKind
|
||||
|
||||
pushGroupNode childNodes
|
||||
|
||||
unless errorCanBeHandledWithoutUnwind $ throwError Unwind
|
||||
|
||||
pushGroupNode :: [SyntaxNode] -> ParserM ()
|
||||
pushGroupNode children =
|
||||
let width = sum $ map snodeWidth children
|
||||
in pushNode $
|
||||
SyntaxNode
|
||||
{ snodeKind = label,
|
||||
snodeWidth = width,
|
||||
snodeChildren = children
|
||||
}
|
||||
|
||||
parseChain :: GrammarRule -> GrammarRule -> ParserM ()
|
||||
parseChain first second = do
|
||||
consumeTrivia
|
||||
-- First, figure out following set for @second@
|
||||
pushFollowingTokens $ getValidFirstTokens second
|
||||
maybeError <- tryError (parseRule first)
|
||||
-- Make sure to remove following set now that we're leaving this "Chain"
|
||||
popFollowingTokens
|
||||
-- Run second (or throw error if first failed)
|
||||
case maybeError of
|
||||
Nothing -> parseRule second
|
||||
Just err -> throwError err
|
||||
|
||||
parseAlternative :: GrammarRule -> GrammarRule -> ParserM ()
|
||||
parseAlternative left right
|
||||
| leftFirstTokens `TokenSet.intersection` rightFirstTokens /= TokenSet.empty =
|
||||
error $
|
||||
unlines
|
||||
[ "[WARNING] Alternative grammar rule has two rules that can accept the same first token."
|
||||
++ "This will result in the second rule never running on these tokens.",
|
||||
"\tLeft Tokens:",
|
||||
"\t" ++ show leftFirstTokens,
|
||||
"\n\tRight Tokens:",
|
||||
"\t" ++ show rightFirstTokens
|
||||
]
|
||||
| otherwise = do
|
||||
consumeTrivia
|
||||
nextToken <- peek
|
||||
let nextKind = tokenKind <$> nextToken
|
||||
if nextKind `willSucceedIn` left
|
||||
then parseRule left
|
||||
else
|
||||
if nextKind `willSucceedIn` right
|
||||
then parseRule right
|
||||
else throwParseError nextToken (leftFirstTokens `TokenSet.union` rightFirstTokens)
|
||||
where
|
||||
leftFirstTokens = getValidFirstTokens left
|
||||
rightFirstTokens = getValidFirstTokens right
|
||||
|
||||
-- | Advance past trivia tokens, making syntax nodes for each token
|
||||
consumeTrivia :: ParserM ()
|
||||
consumeTrivia =
|
||||
peek >>= \case
|
||||
Nothing -> pure ()
|
||||
Just nextToken
|
||||
-- Consume any whitespace
|
||||
| tokenKindIsTrivia (tokenKind nextToken) -> do
|
||||
advance
|
||||
let newNode =
|
||||
SyntaxNode
|
||||
{ snodeKind = S.Token (tokenKind nextToken),
|
||||
snodeWidth = tokenWidth nextToken,
|
||||
snodeChildren = []
|
||||
}
|
||||
pushNode newNode
|
||||
consumeTrivia
|
||||
| otherwise -> pure ()
|
||||
|
||||
-- | Peek the immediate next token in input (including trivia)
|
||||
peek :: ParserM (Maybe Token)
|
||||
peek =
|
||||
gets pstateInput >>= \case
|
||||
[] -> return Nothing
|
||||
tok : _ -> return (Just tok)
|
||||
|
||||
-- | Advance past the next token in the input, updating offsets.
|
||||
advance :: ParserM ()
|
||||
advance =
|
||||
gets pstateInput >>= \case
|
||||
[] -> modify (\s -> s {pstatePrevTokenOffset = pstateNextTokenOffset s})
|
||||
(consumed : remaining) -> do
|
||||
newLastOffset <- gets pstateNextTokenOffset
|
||||
let newNextOffset = newLastOffset + tokenWidth consumed
|
||||
modify (\s -> s {pstateInput = remaining, pstatePrevTokenOffset = newLastOffset, pstateNextTokenOffset = newNextOffset})
|
||||
|
||||
-- | @throwParseError actual expected@ throws an error.
|
||||
--
|
||||
-- Assumes @actual@ has not been `advance`d past yet (and does not advance past
|
||||
-- it).
|
||||
throwParseError :: Maybe Token -> TokenSet -> ParserM ()
|
||||
throwParseError Nothing eset = do
|
||||
offset <- gets pstateNextTokenOffset
|
||||
throwError $ ParseError $ UnexpectedEOF offset eset
|
||||
throwParseError (Just actual) expectedTokens = do
|
||||
offset <- gets pstateNextTokenOffset
|
||||
throwError $ ParseError $ UnexpectedToken (Region offset (offset + tokenWidth actual)) (tokenKind actual) expectedTokens
|
||||
|
||||
-- | Make an error node. Error node should only be produced when catching and
|
||||
-- pushing errors to the state.
|
||||
makeErrorNode ::
|
||||
-- | Parse error that will be used to calculate the width of the error node.
|
||||
ParseError ->
|
||||
-- | If "True", give the error node a width of 0. Otherwise, use the
|
||||
-- width of the error item.
|
||||
Bool ->
|
||||
SyntaxNode
|
||||
makeErrorNode (UnexpectedEOF _ _) _ =
|
||||
SyntaxNode
|
||||
{ snodeKind = S.Error,
|
||||
snodeWidth = 0,
|
||||
snodeChildren = []
|
||||
}
|
||||
makeErrorNode (UnexpectedToken (Region so eo) _ _) zeroWidth =
|
||||
SyntaxNode
|
||||
{ snodeKind = S.Error,
|
||||
snodeWidth = if zeroWidth then 0 else eo - so,
|
||||
snodeChildren = []
|
||||
}
|
||||
|
||||
-- | Add a node to the output.
|
||||
pushNode :: SyntaxNode -> ParserM ()
|
||||
pushNode node = modify (\s -> s {pstateCurrentLevelNodes = pstateCurrentLevelNodes s ++ [node]})
|
||||
|
||||
-- | Add an error to the output (does not throw the error).
|
||||
logError :: ParseError -> ParserM ()
|
||||
logError err = modify (\s -> s {pstateErrors = pstateErrors s ++ [err]})
|
||||
|
||||
-- | Add an "TokenSet" to the following stack.
|
||||
pushFollowingTokens :: TokenSet -> ParserM ()
|
||||
pushFollowingTokens eset = modify (\s -> s {pstateAcceptableNextTokens = eset : pstateAcceptableNextTokens s})
|
||||
|
||||
-- | Remove the last added "TokenSet" from the following stack.
|
||||
popFollowingTokens :: ParserM ()
|
||||
popFollowingTokens = modify (\s -> s {pstateAcceptableNextTokens = drop 1 $ pstateAcceptableNextTokens s})
|
||||
|
||||
-- | Check if a "TokenKind" (or end of file) is one of the first possible tokens
|
||||
-- in a "GrammarRule".
|
||||
willSucceedIn :: Maybe TokenKind -> GrammarRule -> Bool
|
||||
_ `willSucceedIn` Succeed = True
|
||||
k `willSucceedIn` grammarRule = k `TokenSet.member` getValidFirstTokens grammarRule
|
||||
|
||||
-- | Find the "TokenSet" of "TokenKind"s that the "GrammarRule" would succeed on
|
||||
-- as its first input.
|
||||
getValidFirstTokens :: GrammarRule -> TokenSet
|
||||
getValidFirstTokens grammarRule = fst $ go grammarRule TokenSet.empty
|
||||
where
|
||||
-- Returns (firstTokens, can succeed with no input?)
|
||||
go :: GrammarRule -> TokenSet -> (TokenSet, Bool)
|
||||
go Succeed eset = (eset, True)
|
||||
go (Token tkind _) eset = (tkind `TokenSet.insertKind` eset, False)
|
||||
go EOF eset = (TokenSet.insertEof eset, False)
|
||||
go (Group _ p) eset = go p eset
|
||||
go (Chain first second) eset =
|
||||
let (eset', firstCanBeEmpty) = go first eset
|
||||
in if firstCanBeEmpty
|
||||
then go second eset'
|
||||
else (eset', False)
|
||||
go (Alternative left right) eset =
|
||||
let (eset', leftCanBeEmpty) = go left eset
|
||||
(eset'', rightCanBeEmpty) = go right eset'
|
||||
in (eset'', leftCanBeEmpty || rightCanBeEmpty)
|
||||
|
||||
-- | Check if any following rule can accept the given "TokenKind". If this is
|
||||
-- true, then unwinding at the current location will succeed in recovering from
|
||||
-- an error.
|
||||
canAnyFollowingRuleAcceptTokenKind :: Maybe TokenKind -> ParserM Bool
|
||||
canAnyFollowingRuleAcceptTokenKind nextTokenKind = do
|
||||
followingTokens <- gets pstateAcceptableNextTokens
|
||||
return $ any (TokenSet.member nextTokenKind) followingTokens
|
||||
|
||||
-- | Check if the next rule can accept the given "TokenKind". If this is true,
|
||||
-- then an error can be recovered from at the current context with no unwinding.
|
||||
--
|
||||
-- Pre-condition: @pstateAcceptableNextTokens@ is not empty.
|
||||
canImmediateFollowingRuleAcceptTokenKind :: Maybe TokenKind -> ParserM Bool
|
||||
canImmediateFollowingRuleAcceptTokenKind nextTokenKind = do
|
||||
followingTokens <- gets pstateAcceptableNextTokens
|
||||
return $ case followingTokens of
|
||||
[] -> error "canImmediateFollowingRuleAcceptTokenKind called with empty pstateAcceptableNextTokens. This is a parser library error."
|
||||
(nextTokens : _) -> nextTokenKind `TokenSet.member` nextTokens
|
||||
|
||||
infix 9 `as`
|
||||
|
||||
-- | Matches a single token of the specified "TokenKind" and produce an output
|
||||
-- node with the given "SyntaxKind".
|
||||
--
|
||||
-- __Example:__
|
||||
--
|
||||
-- >>> parse [T.Token T.Identifier 3] $ T.Identifier `as` S.DeclName
|
||||
-- ([],[SyntaxNode {snodeKind = DeclName, snodeWidth = 3, snodeChildren = []}])
|
||||
as :: TokenKind -> SyntaxKind -> GrammarRule
|
||||
tkind `as` skind = Token tkind (Just skind)
|
||||
|
||||
infixr 5 <$$>
|
||||
|
||||
-- | @syntaxKind '<$$>' rule@ (pronounced group) runs a grammar rule and places the
|
||||
-- nodes that grammar rule adds to the
|
||||
-- output in a parent node.
|
||||
--
|
||||
-- __Example:__
|
||||
--
|
||||
-- >>> parse [T.Token T.Identifier 3, T.Token T.Int 2] $ S.Decl <$$> identifier <> int
|
||||
-- ([],[SyntaxNode {snodeKind = Decl, snodeWidth = 5, snodeChildren = [SyntaxNode {snodeKind = Token Identifier, snodeWidth = 3, snodeChildren = []},SyntaxNode {snodeKind = Token Int, snodeWidth = 2, snodeChildren = []}]}])
|
||||
(<$$>) :: SyntaxKind -> GrammarRule -> GrammarRule
|
||||
label <$$> grammarRule = Group label grammarRule
|
||||
|
||||
-- | Run a grammar rule or consume no input.
|
||||
--
|
||||
-- __Example:__
|
||||
--
|
||||
-- >>> parse [T.Token T.Int 1, T.Token T.Int 1] $ int <> perhaps comma <> int
|
||||
-- ([],[SyntaxNode {snodeKind = Token Int, snodeWidth = 1, snodeChildren = []},SyntaxNode {snodeKind = Token Int, snodeWidth = 1, snodeChildren = []}])
|
||||
perhaps :: GrammarRule -> GrammarRule
|
||||
perhaps grammarRule = grammarRule <|> Succeed
|
||||
|
||||
infixr 4 <|>
|
||||
|
||||
-- | Run a grammar, choosing either the left or the right rule.
|
||||
--
|
||||
-- __Example:__
|
||||
--
|
||||
-- >>> let number = int <|> double
|
||||
-- >>> parse [T.Token T.Int 2] number
|
||||
-- ([],[SyntaxNode (Token T.Int) 2 []])
|
||||
-- >>> parse [T.Token T.Double 3] number
|
||||
-- ([],[SyntaxNode (Token T.Double) 2 []])
|
||||
(<|>) :: GrammarRule -> GrammarRule -> GrammarRule
|
||||
left <|> right = Alternative left right
|
||||
|
||||
-- Primitive grammar rules
|
||||
|
||||
lparen :: GrammarRule
|
||||
lparen = Token T.LParen Nothing
|
||||
|
||||
rparen :: GrammarRule
|
||||
rparen = Token T.RParen Nothing
|
||||
|
||||
lsquare :: GrammarRule
|
||||
lsquare = Token T.LSquare Nothing
|
||||
|
||||
rsquare :: GrammarRule
|
||||
rsquare = Token T.RSquare Nothing
|
||||
|
||||
lcurly :: GrammarRule
|
||||
lcurly = Token T.LCurly Nothing
|
||||
|
||||
rcurly :: GrammarRule
|
||||
rcurly = Token T.RCurly Nothing
|
||||
|
||||
comma :: GrammarRule
|
||||
comma = Token T.Comma Nothing
|
||||
|
||||
colon :: GrammarRule
|
||||
colon = Token T.Colon Nothing
|
||||
|
||||
kwImport :: GrammarRule
|
||||
kwImport = Token T.KwImport Nothing
|
||||
|
||||
kwFrom :: GrammarRule
|
||||
kwFrom = Token T.KwFrom Nothing
|
||||
|
||||
-- | Produces a "S.BoolTrue" syntax node
|
||||
kwTrue :: GrammarRule
|
||||
kwTrue = T.KwTrue `as` S.BoolTrue
|
||||
|
||||
-- | Produces a "S.BoolFalse" syntax node
|
||||
kwFalse :: GrammarRule
|
||||
kwFalse = T.KwFalse `as` S.BoolFalse
|
||||
|
||||
-- | Produces a "S.String" syntax node
|
||||
string :: GrammarRule
|
||||
string = T.String `as` S.String
|
||||
|
||||
-- | Produces a "S.Int" syntax node
|
||||
int :: GrammarRule
|
||||
int = T.Int `as` S.Int
|
||||
|
||||
-- | Produces a "S.Double" syntax node
|
||||
double :: GrammarRule
|
||||
double = T.Double `as` S.Double
|
||||
|
||||
lquote :: GrammarRule
|
||||
lquote = Token T.LQuote Nothing
|
||||
|
||||
rquote :: GrammarRule
|
||||
rquote = Token T.RQuote Nothing
|
||||
|
||||
quoted :: GrammarRule
|
||||
quoted = Token T.Quoted Nothing
|
||||
|
||||
-- | Succeeds when the parser is at the end of the file.
|
||||
eof :: GrammarRule
|
||||
eof = EOF
|
||||
|
||||
-- UTILITIES:
|
||||
|
||||
tryError :: MonadError e m => m () -> m (Maybe e)
|
||||
tryError m = (m >> return Nothing) `catchError` (return . Just)
|
89
waspls/src/Wasp/Backend/ConcreteSyntax.hs
Normal file
89
waspls/src/Wasp/Backend/ConcreteSyntax.hs
Normal file
@ -0,0 +1,89 @@
|
||||
{-# LANGUAGE DeriveGeneric #-}
|
||||
|
||||
module Wasp.Backend.ConcreteSyntax
|
||||
( -- * Concrete Syntax
|
||||
|
||||
-- | This data structure for storing syntax information is inspired by the
|
||||
-- rust analyzer's parsing library rowan (https://github.com/rust-analyzer/rowan).
|
||||
SyntaxNode (..),
|
||||
SyntaxKind (..),
|
||||
cstPrettyPrint,
|
||||
syntaxKindIsTrivia,
|
||||
)
|
||||
where
|
||||
|
||||
import Control.DeepSeq (NFData)
|
||||
import Data.Aeson (ToJSON)
|
||||
import Data.List (foldl', intercalate)
|
||||
import GHC.Generics (Generic)
|
||||
import Wasp.Backend.Token (TokenKind, tokenKindIsTrivia)
|
||||
|
||||
-- | The kind of a "SyntaxNode".
|
||||
data SyntaxKind
|
||||
= Token !TokenKind
|
||||
| Program
|
||||
| Decl
|
||||
| DeclType
|
||||
| DeclName
|
||||
| Dict
|
||||
| DictEntry
|
||||
| DictKey
|
||||
| List
|
||||
| Tuple
|
||||
| ExtImport
|
||||
| ExtImportModule
|
||||
| ExtImportField
|
||||
| ExtImportPath
|
||||
| String
|
||||
| Int
|
||||
| Double
|
||||
| BoolTrue
|
||||
| BoolFalse
|
||||
| Var
|
||||
| Quoter
|
||||
| Error
|
||||
deriving (Eq, Ord, Show, Generic)
|
||||
|
||||
instance NFData SyntaxKind
|
||||
|
||||
instance ToJSON SyntaxKind
|
||||
|
||||
-- | A node in a concrete syntax tree. We use such a loosely typed system so
|
||||
-- that we can store all pieces of source syntax in the tree, including
|
||||
-- comments, whitespace, and unexpected tokens/characters.
|
||||
--
|
||||
-- Later, this CST is processed into an AST, a more strongly typed format that
|
||||
-- leaves out all of the trivia tokens.
|
||||
data SyntaxNode = SyntaxNode
|
||||
{ snodeKind :: !SyntaxKind,
|
||||
-- | Width of this "SyntaxNode". For nodes with children, this is the sum
|
||||
-- of their children's widths. For nodes without children, this is the
|
||||
-- width of the source text covered by the node.
|
||||
snodeWidth :: !Int,
|
||||
snodeChildren :: [SyntaxNode]
|
||||
}
|
||||
deriving (Eq, Ord, Show, Generic)
|
||||
|
||||
instance NFData SyntaxNode
|
||||
|
||||
instance ToJSON SyntaxNode
|
||||
|
||||
-- | Pretty print a concrete syntax tree. Shows tree of syntax kinds and their
|
||||
-- offsets in the source file.
|
||||
cstPrettyPrint :: SyntaxNode -> String
|
||||
cstPrettyPrint node = go 0 "" node
|
||||
where
|
||||
go :: Int -> String -> SyntaxNode -> String
|
||||
go offset indent pnode =
|
||||
let nodeTxt = indent ++ show (snodeKind pnode) ++ "@" ++ show offset ++ ".." ++ show (offset + snodeWidth pnode)
|
||||
childrenTxt =
|
||||
fst $
|
||||
foldl'
|
||||
(\(strs, o) child -> (strs ++ [go o (indent ++ " ") child], o + snodeWidth child))
|
||||
([], offset)
|
||||
(snodeChildren pnode)
|
||||
in intercalate "\n" (nodeTxt : childrenTxt)
|
||||
|
||||
syntaxKindIsTrivia :: SyntaxKind -> Bool
|
||||
syntaxKindIsTrivia (Token k) = tokenKindIsTrivia k
|
||||
syntaxKindIsTrivia _ = False
|
6
waspls/src/Wasp/Backend/Lexer.hs
Normal file
6
waspls/src/Wasp/Backend/Lexer.hs
Normal file
@ -0,0 +1,6 @@
|
||||
module Wasp.Backend.Lexer
|
||||
( Wasp.Backend.Lexer.Lexer.lex,
|
||||
)
|
||||
where
|
||||
|
||||
import Wasp.Backend.Lexer.Lexer (lex)
|
142
waspls/src/Wasp/Backend/Lexer/Internal.hs
Normal file
142
waspls/src/Wasp/Backend/Lexer/Internal.hs
Normal file
@ -0,0 +1,142 @@
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
|
||||
|
||||
module Wasp.Backend.Lexer.Internal
|
||||
( -- * Lexer monad code
|
||||
Lexer,
|
||||
runLexer,
|
||||
initialLexState,
|
||||
LexState,
|
||||
LexerStartCode (..),
|
||||
LexInput (..),
|
||||
getInput,
|
||||
updateInput,
|
||||
setStartCode,
|
||||
getStartCode,
|
||||
|
||||
-- * Alex utilities
|
||||
AlexInput,
|
||||
startCodeToInt,
|
||||
alexGetByte,
|
||||
alexInputPrevChar,
|
||||
beginQuoter,
|
||||
lexQuoterEndTag,
|
||||
createToken,
|
||||
)
|
||||
where
|
||||
|
||||
import Codec.Binary.UTF8.String (encodeChar)
|
||||
import Control.Monad.State.Strict (MonadState, State, evalState, get, modify)
|
||||
import Data.Word (Word8)
|
||||
import Wasp.Backend.Token (Token (..), TokenKind)
|
||||
import qualified Wasp.Backend.Token as T
|
||||
|
||||
-- LEXER MONAD CODE
|
||||
|
||||
newtype Lexer a = Lexer {unLexer :: State LexState a}
|
||||
deriving (Functor, Applicative, Monad, MonadState LexState)
|
||||
|
||||
runLexer :: Lexer a -> LexState -> a
|
||||
runLexer lexer state = evalState (unLexer lexer) state
|
||||
|
||||
initialLexState :: String -> LexState
|
||||
initialLexState source =
|
||||
LexState
|
||||
{ lstateInput = LexInput '\n' [] source,
|
||||
lstateStartCode = DefaultStartCode
|
||||
}
|
||||
|
||||
data LexState = LexState
|
||||
{ lstateInput :: LexInput,
|
||||
lstateStartCode :: LexerStartCode
|
||||
}
|
||||
|
||||
getInput :: Lexer LexInput
|
||||
getInput = lstateInput <$> get
|
||||
|
||||
updateInput :: Int -> Lexer ()
|
||||
updateInput !consumed = do
|
||||
(LexInput _ _ remaining) <- getInput
|
||||
let newInput =
|
||||
let (prevChar : remaining') = drop (consumed - 1) remaining
|
||||
in LexInput prevChar [] remaining'
|
||||
modify $ \s -> s {lstateInput = newInput}
|
||||
|
||||
getStartCode :: Lexer LexerStartCode
|
||||
getStartCode = lstateStartCode <$> get
|
||||
|
||||
setStartCode :: LexerStartCode -> Lexer ()
|
||||
setStartCode startCode = modify $ \s -> s {lstateStartCode = startCode}
|
||||
|
||||
-- | A representation of the lexer's start code: https://www.haskell.org/alex/doc/html/alex-files.html#startcodes
|
||||
data LexerStartCode
|
||||
= -- | For a start code @DefaultStartCode@, the lexer is in start code <0>
|
||||
DefaultStartCode
|
||||
| -- | For a start code @QuoterStartCode tag@, the lexer is in start code <quoter> with opening tag @tag@
|
||||
QuoterStartCode String
|
||||
deriving (Show)
|
||||
|
||||
-- | The type of the input given to the parser/lexer.
|
||||
--
|
||||
-- An input @(prevChar, bs, remainingSource)@ represents:
|
||||
-- - @prevChar@ The previous character, successfully consumed by the lexer.
|
||||
-- - @bs@ The yet unconsumed UTF8 bytes of the current character being lexed.
|
||||
-- - @remainingSource@ The remaining source to be lexed and parsed
|
||||
-- (including the character currently being lexed as the first char in it).
|
||||
data LexInput = LexInput Char [Word8] String
|
||||
|
||||
-- ALEX UTILITIES
|
||||
|
||||
type AlexInput = LexInput
|
||||
|
||||
-- | Convert the ParserState's start code to an int for Alex to use
|
||||
startCodeToInt :: Int -> LexerStartCode -> Int
|
||||
startCodeToInt _ DefaultStartCode = 0
|
||||
startCodeToInt quoter (QuoterStartCode _) = quoter
|
||||
|
||||
-- | Required by Alex.
|
||||
--
|
||||
-- This function is taken from the Alex basic wrapper.
|
||||
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
|
||||
alexGetByte (LexInput prevChar (b : bs) remainingSource) = Just (b, LexInput prevChar bs remainingSource)
|
||||
alexGetByte (LexInput _ [] []) = Nothing
|
||||
alexGetByte (LexInput _ [] (currChar : remainingSource)) = case encodeChar currChar of
|
||||
(b : bs) -> Just (b, LexInput currChar bs remainingSource)
|
||||
[] -> Nothing
|
||||
|
||||
-- | Required by Alex.
|
||||
--
|
||||
-- This function is taken from the Alex basic wrapper.
|
||||
alexInputPrevChar :: AlexInput -> Char
|
||||
alexInputPrevChar (LexInput prevChar _ _) = prevChar
|
||||
|
||||
-- | Takes a lexeme like "{=json" and sets the quoter start code
|
||||
beginQuoter :: String -> Lexer Token
|
||||
beginQuoter leftQuoteTag = do
|
||||
let tag = drop 2 leftQuoteTag
|
||||
setStartCode (QuoterStartCode tag)
|
||||
createToken T.LQuote leftQuoteTag
|
||||
|
||||
-- | Takes a lexeme like "json=}" and either ends a quoter or add quoted text to
|
||||
-- the quoter
|
||||
lexQuoterEndTag :: String -> Lexer Token
|
||||
lexQuoterEndTag rightQuoteTag = do
|
||||
startCode <- getStartCode
|
||||
case startCode of
|
||||
DefaultStartCode -> error "impossible: lexQuoterEndTag with DefaultStartCode"
|
||||
QuoterStartCode startTag | startTag == tag -> do
|
||||
setStartCode DefaultStartCode
|
||||
createToken T.RQuote rightQuoteTag
|
||||
_ -> do
|
||||
createToken T.Quoted rightQuoteTag
|
||||
where
|
||||
tag = take (length rightQuoteTag - 2) rightQuoteTag
|
||||
|
||||
-- | Makes an action that creates a token from a "TokenKind"
|
||||
createToken :: TokenKind -> (String -> Lexer Token)
|
||||
createToken kind lexeme =
|
||||
return $
|
||||
Token
|
||||
{ tokenKind = kind,
|
||||
tokenWidth = length lexeme
|
||||
}
|
108
waspls/src/Wasp/Backend/Lexer/Lexer.x
Normal file
108
waspls/src/Wasp/Backend/Lexer/Lexer.x
Normal file
@ -0,0 +1,108 @@
|
||||
{
|
||||
-- This file is processed by Alex (https://www.haskell.org/alex/) and generates
|
||||
-- the module `Wasp.Backend.Lexer.Lexer`
|
||||
|
||||
module Wasp.Backend.Lexer.Lexer
|
||||
( Wasp.Backend.Lexer.Lexer.lex
|
||||
) where
|
||||
|
||||
import Wasp.Backend.Lexer.Internal
|
||||
import Wasp.Backend.Token (Token)
|
||||
import qualified Wasp.Backend.Token as T
|
||||
}
|
||||
|
||||
-- Character set aliases
|
||||
$space = [\ \t\f\v\r] -- Non-newline whitespace
|
||||
$digit = 0-9
|
||||
$alpha = [a-zA-Z]
|
||||
$identstart = [_$alpha]
|
||||
$ident = [_$alpha$digit]
|
||||
$any = [.$white]
|
||||
|
||||
-- Regular expression aliases
|
||||
-- matches string-literal on a single line, from https://stackoverflow.com/a/9260547/3902376
|
||||
@string = \"([^\\\"]|\\.)*\"
|
||||
@double = "-"? $digit+ "." $digit+
|
||||
@integer = "-"? $digit+
|
||||
@ident = $identstart $ident* "'"*
|
||||
@linecomment = "//" [^\n\r]*
|
||||
-- Based on https://stackoverflow.com/a/16165598/1509394 .
|
||||
@blockcomment = "/*" (("*"[^\/]) | [^\*] | $white)* "*/"
|
||||
|
||||
tokens :-
|
||||
|
||||
<0> $space+ { createToken T.White }
|
||||
<0> \n { createToken T.Newline }
|
||||
<0> @linecomment { createToken T.Comment }
|
||||
<0> @blockcomment { createToken T.Comment }
|
||||
|
||||
-- Quoter rules:
|
||||
-- Uses Alex start codes to lex quoted characters with different rules:
|
||||
-- - On "{=tag", enter <quoter> start code and make a TLQuote token
|
||||
-- - While in <quoter>, if "tag=}" is seen
|
||||
-- - If this closing tag matches the opening, enter <0> and make a TRQuote token
|
||||
-- - Otherwise, stay in <quoter> and make a TQuoted token
|
||||
-- - Otherwise, take one character at a time and make a TQuoted token
|
||||
<0> "{=" @ident { beginQuoter }
|
||||
<quoter> @ident "=}" { lexQuoterEndTag }
|
||||
<quoter> $any { createToken T.Quoted }
|
||||
|
||||
-- Simple tokens
|
||||
<0> "(" { createToken T.LParen }
|
||||
<0> ")" { createToken T.RParen }
|
||||
<0> "[" { createToken T.LSquare }
|
||||
<0> "]" { createToken T.RSquare }
|
||||
<0> "{" { createToken T.LCurly }
|
||||
<0> "}" { createToken T.RCurly }
|
||||
<0> "," { createToken T.Comma }
|
||||
<0> ":" { createToken T.Colon }
|
||||
<0> "import" { createToken T.KwImport }
|
||||
<0> "from" { createToken T.KwFrom }
|
||||
<0> "true" { createToken T.KwTrue }
|
||||
<0> "false" { createToken T.KwFalse }
|
||||
|
||||
-- Strings, numbers, identifiers
|
||||
<0> @string { createToken T.String }
|
||||
<0> @double { createToken T.Double }
|
||||
<0> @integer { createToken T.Int }
|
||||
<0> @ident { createToken T.Identifier }
|
||||
|
||||
{
|
||||
-- | Lexes a single token from the input, returning "Nothing" if the lexer has
|
||||
-- reached the end of the input. This function uses a continuation passing style
|
||||
-- so that this function and its consumer can benefit from tail call
|
||||
-- optimization.
|
||||
--
|
||||
-- This function internally calls `alexScan`, which is a function generated by
|
||||
-- Alex responsible for doing actual lexing/scanning.
|
||||
lexOne :: (Maybe Token -> Lexer a) -> Lexer a
|
||||
lexOne continue = do
|
||||
input@(LexInput _ _ remaining) <- getInput
|
||||
startCodeInt <- startCodeToInt quoter <$> getStartCode
|
||||
case alexScan input startCodeInt of
|
||||
AlexError (LexInput _ _ (c:_)) -> do
|
||||
token <- createToken T.Error [c]
|
||||
updateInput 1
|
||||
continue (Just token)
|
||||
AlexError (LexInput c _ []) -> do
|
||||
token <- createToken T.Error [c]
|
||||
updateInput 1
|
||||
continue (Just token)
|
||||
AlexSkip _ _ ->
|
||||
error "AlexSkip is impossible: lexer should not skip any input"
|
||||
AlexToken _ numChars makeToken -> do
|
||||
let lexeme = take numChars remaining
|
||||
token <- makeToken lexeme
|
||||
updateInput numChars
|
||||
continue (Just token)
|
||||
AlexEOF -> continue Nothing
|
||||
|
||||
-- | @lex source@ lexes all of @source@ into "Token"s.
|
||||
lex :: String -> [Token]
|
||||
lex source = runLexer (lexOne continue) $ initialLexState source
|
||||
where
|
||||
continue :: Maybe Token -> Lexer [Token]
|
||||
continue Nothing = return []
|
||||
-- This is written awkwardly like this so it is a tail call to @lexOne@
|
||||
continue (Just tok) = (tok:) <$> lexOne continue
|
||||
}
|
111
waspls/src/Wasp/Backend/ParseError.hs
Normal file
111
waspls/src/Wasp/Backend/ParseError.hs
Normal file
@ -0,0 +1,111 @@
|
||||
{-# LANGUAGE DeriveGeneric #-}
|
||||
|
||||
module Wasp.Backend.ParseError
|
||||
( -- * Parse error
|
||||
ParseError (..),
|
||||
|
||||
-- * Display functions
|
||||
showError,
|
||||
showErrorMessage,
|
||||
errorRegion,
|
||||
|
||||
-- * Source positions
|
||||
Region (..),
|
||||
SourcePos (..),
|
||||
offsetToSourcePos,
|
||||
)
|
||||
where
|
||||
|
||||
import Control.DeepSeq (NFData)
|
||||
import Data.List (intercalate)
|
||||
import GHC.Generics (Generic)
|
||||
import Wasp.Backend.Token (TokenKind)
|
||||
import qualified Wasp.Backend.Token as T
|
||||
import Wasp.Backend.TokenSet (TokenSet)
|
||||
import qualified Wasp.Backend.TokenSet as TokenSet
|
||||
|
||||
data ParseError
|
||||
= UnexpectedToken !Region !TokenKind TokenSet
|
||||
| UnexpectedEOF !Int TokenSet
|
||||
deriving (Eq, Ord, Show, Generic)
|
||||
|
||||
instance NFData ParseError
|
||||
|
||||
data Region = Region !Int !Int deriving (Eq, Ord, Show, Generic)
|
||||
|
||||
instance NFData Region
|
||||
|
||||
data SourcePos = SourcePos !Int !Int deriving (Eq, Ord, Show, Generic)
|
||||
|
||||
instance NFData SourcePos
|
||||
|
||||
offsetToSourcePos :: String -> Int -> SourcePos
|
||||
offsetToSourcePos source targetOffset = reach 0 (SourcePos 1 1) source
|
||||
where
|
||||
reach :: Int -> SourcePos -> String -> SourcePos
|
||||
reach o (SourcePos l c) remaining
|
||||
| o == targetOffset = SourcePos l c
|
||||
| [] <- remaining = SourcePos l c
|
||||
| ('\n' : remaining') <- remaining =
|
||||
let sp' = SourcePos (l + 1) 1
|
||||
in reach (o + 1) sp' remaining'
|
||||
| (_ : remaining') <- remaining =
|
||||
let sp' = SourcePos l (c + 1)
|
||||
in reach (o + 1) sp' remaining'
|
||||
|
||||
showError :: String -> ParseError -> String
|
||||
showError source msg =
|
||||
let (Region so eo) = errorRegion msg
|
||||
start = offsetToSourcePos source so
|
||||
end = offsetToSourcePos source eo
|
||||
in "Parse error at " ++ showRegion start end ++ " (" ++ show so ++ ".." ++ show eo ++ ")\n " ++ showErrorMessage msg
|
||||
|
||||
errorRegion :: ParseError -> Region
|
||||
errorRegion (UnexpectedEOF o _) = Region o o
|
||||
errorRegion (UnexpectedToken rgn _ _) = rgn
|
||||
|
||||
showErrorMessage :: ParseError -> String
|
||||
showErrorMessage (UnexpectedEOF _ expecteds) =
|
||||
"Unexpected end of file, " ++ showExpected expecteds
|
||||
showErrorMessage (UnexpectedToken _ actual expecteds) =
|
||||
"Unexpected token " ++ showTokenKind actual ++ ", " ++ showExpected expecteds
|
||||
|
||||
showExpected :: TokenSet -> String
|
||||
showExpected expecteds = "expected one of " ++ showExpecteds expecteds
|
||||
|
||||
showExpecteds :: TokenSet -> String
|
||||
showExpecteds expecteds =
|
||||
let kindStrs = map showTokenKind $ TokenSet.toList expecteds
|
||||
eofStrs = if TokenSet.eofMember expecteds then ["<eof>"] else []
|
||||
in intercalate "," (kindStrs ++ eofStrs)
|
||||
|
||||
showTokenKind :: TokenKind -> String
|
||||
showTokenKind T.White = "<whitespace>"
|
||||
showTokenKind T.Newline = "\\n"
|
||||
showTokenKind T.Comment = "<comment>"
|
||||
showTokenKind T.LParen = "'('"
|
||||
showTokenKind T.RParen = "')'"
|
||||
showTokenKind T.LSquare = "'['"
|
||||
showTokenKind T.RSquare = "']'"
|
||||
showTokenKind T.LCurly = "'{'"
|
||||
showTokenKind T.RCurly = "'}'"
|
||||
showTokenKind T.Comma = "','"
|
||||
showTokenKind T.Colon = "':'"
|
||||
showTokenKind T.KwImport = "'import'"
|
||||
showTokenKind T.KwFrom = "'from'"
|
||||
showTokenKind T.KwTrue = "'true'"
|
||||
showTokenKind T.KwFalse = "'false'"
|
||||
showTokenKind T.String = "<string>"
|
||||
showTokenKind T.Int = "<number>"
|
||||
showTokenKind T.Double = "<number>"
|
||||
showTokenKind T.LQuote = "'{='"
|
||||
showTokenKind T.RQuote = "'=}'"
|
||||
showTokenKind T.Quoted = "<any>" -- Should be impossible, hard to prove though
|
||||
showTokenKind T.Identifier = "<identifier>"
|
||||
showTokenKind T.Error = "<error>"
|
||||
|
||||
showRegion :: SourcePos -> SourcePos -> String
|
||||
showRegion start@(SourcePos sl sc) end@(SourcePos el ec)
|
||||
| start == end = show sl ++ ":" ++ show sc
|
||||
| sl == el = show sl ++ ":" ++ show sc ++ "-" ++ show ec
|
||||
| otherwise = show sl ++ ":" ++ show sc ++ "-" ++ show el ++ ":" ++ show ec
|
81
waspls/src/Wasp/Backend/Token.hs
Normal file
81
waspls/src/Wasp/Backend/Token.hs
Normal file
@ -0,0 +1,81 @@
|
||||
{-# LANGUAGE DeriveGeneric #-}
|
||||
|
||||
module Wasp.Backend.Token
|
||||
( Token (..),
|
||||
TokenKind (..),
|
||||
tokenKindIsTrivia,
|
||||
)
|
||||
where
|
||||
|
||||
import Control.DeepSeq (NFData)
|
||||
import Data.Aeson (ToJSON)
|
||||
import GHC.Generics (Generic)
|
||||
|
||||
-- | The kind of token
|
||||
--
|
||||
-- This makes no distinction between value-containing tokens and atomic tokens
|
||||
-- (i.e. "String" and "KwImport") to make kind comparison easier. To recover the
|
||||
-- value associated with a token, you need the context provided by a "Token".
|
||||
data TokenKind
|
||||
= White
|
||||
| -- | Newlines (only \n) are separated from whitespace to accomodate line-break
|
||||
-- based error recovery, if that is ever implemented in the future
|
||||
Newline
|
||||
| Comment
|
||||
| LParen
|
||||
| RParen
|
||||
| LSquare
|
||||
| RSquare
|
||||
| LCurly
|
||||
| RCurly
|
||||
| Comma
|
||||
| Colon
|
||||
| KwImport
|
||||
| KwFrom
|
||||
| KwTrue
|
||||
| KwFalse
|
||||
| String
|
||||
| Int
|
||||
| Double
|
||||
| -- | "{= <identifier>"
|
||||
LQuote
|
||||
| -- | "<identifier> =}"
|
||||
RQuote
|
||||
| Quoted
|
||||
| -- | Non-keyword identifier
|
||||
Identifier
|
||||
| -- | Kind for unexpected characters
|
||||
Error
|
||||
deriving (Eq, Ord, Show, Generic)
|
||||
|
||||
instance NFData TokenKind
|
||||
|
||||
instance ToJSON TokenKind
|
||||
|
||||
-- | A token representing a span of text from the source. For value-containing
|
||||
-- tokens, the value can be recovered using @tokenText@.
|
||||
data Token = Token
|
||||
{ tokenKind :: !TokenKind,
|
||||
-- | The width of the text representing this token. The source position is
|
||||
-- not stored; only the width. Later, offsets into the source is computed
|
||||
-- from an entire tree of tokens, and source position is determined on
|
||||
-- demand (for example, when an offset is going to be displayerd in an error
|
||||
-- message).
|
||||
--
|
||||
-- This is guaranteed to be equivalent to @length . tokenText@, but is
|
||||
-- stored explicitly since it is accessed frequently.
|
||||
tokenWidth :: !Int
|
||||
}
|
||||
deriving (Eq, Show, Ord, Generic)
|
||||
|
||||
instance NFData Token
|
||||
|
||||
instance ToJSON Token
|
||||
|
||||
-- | Check if a "TokenKind" is trivia (a token kind that does not affect the
|
||||
-- parse structure, namely whitespace and comments)
|
||||
tokenKindIsTrivia :: TokenKind -> Bool
|
||||
tokenKindIsTrivia White = True
|
||||
tokenKindIsTrivia Newline = True
|
||||
tokenKindIsTrivia Comment = True
|
||||
tokenKindIsTrivia _ = False
|
110
waspls/src/Wasp/Backend/TokenSet.hs
Normal file
110
waspls/src/Wasp/Backend/TokenSet.hs
Normal file
@ -0,0 +1,110 @@
|
||||
{-# LANGUAGE DeriveGeneric #-}
|
||||
|
||||
module Wasp.Backend.TokenSet
|
||||
( -- * TokenSet
|
||||
|
||||
-- | A set of "TokenKind"s and possibly EOF (which is not a "TokenKind").
|
||||
--
|
||||
-- In functions on a "TokenSet", a @Maybe "TokenKind"@ is either @Nothing@,
|
||||
-- representing EOF, or @Just k@, representing the kind @k@.
|
||||
TokenSet,
|
||||
|
||||
-- * Membership predicates
|
||||
member,
|
||||
eofMember,
|
||||
kindMember,
|
||||
|
||||
-- * Operations
|
||||
insert,
|
||||
insertKind,
|
||||
insertEof,
|
||||
union,
|
||||
intersection,
|
||||
|
||||
-- * Constructors
|
||||
empty,
|
||||
singleton,
|
||||
fromEOF,
|
||||
fromKind,
|
||||
fromList,
|
||||
|
||||
-- * Destructors
|
||||
toList,
|
||||
)
|
||||
where
|
||||
|
||||
import Control.DeepSeq (NFData)
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
import GHC.Generics (Generic)
|
||||
import Wasp.Backend.Token (TokenKind)
|
||||
|
||||
data TokenSet = TokenSet
|
||||
{ -- | Check if EOF is part of the "TokenSet"
|
||||
eofMember :: !Bool,
|
||||
kindSet :: Set TokenKind
|
||||
}
|
||||
deriving (Eq, Ord, Show, Generic)
|
||||
|
||||
instance NFData TokenSet
|
||||
|
||||
-- | Check if a "TokenKind" or EOF is part of the "TokenSet".
|
||||
member :: Maybe TokenKind -> TokenSet -> Bool
|
||||
member Nothing set = eofMember set
|
||||
member (Just k) set = kindMember k set
|
||||
|
||||
-- | Check if a "TokenKind" is part of the "TokenSet".
|
||||
kindMember :: TokenKind -> TokenSet -> Bool
|
||||
kindMember k set = k `Set.member` kindSet set
|
||||
|
||||
-- | Insert a "TokenKind" or EOF into a "TokenSet".
|
||||
insert :: Maybe TokenKind -> TokenSet -> TokenSet
|
||||
insert Nothing set = insertEof set
|
||||
insert (Just k) set = insertKind k set
|
||||
|
||||
-- | Insert EOF into a "TokenSet".
|
||||
insertEof :: TokenSet -> TokenSet
|
||||
insertEof set = set {eofMember = True}
|
||||
|
||||
-- | Insert a "TokenKind" into a "TokenSet".
|
||||
insertKind :: TokenKind -> TokenSet -> TokenSet
|
||||
insertKind k set = set {kindSet = Set.insert k (kindSet set)}
|
||||
|
||||
-- | Get the union of two "TokenSet"s.
|
||||
union :: TokenSet -> TokenSet -> TokenSet
|
||||
union left right =
|
||||
let unionEofMember = eofMember left || eofMember right
|
||||
unionKindSet = kindSet left `Set.union` kindSet right
|
||||
in TokenSet {eofMember = unionEofMember, kindSet = unionKindSet}
|
||||
|
||||
-- | Get the intersection of two "TokenSet"s.
|
||||
intersection :: TokenSet -> TokenSet -> TokenSet
|
||||
intersection left right =
|
||||
let intersectionEofMember = eofMember left && eofMember right
|
||||
intersectionKindSet = kindSet left `Set.intersection` kindSet right
|
||||
in TokenSet {eofMember = intersectionEofMember, kindSet = intersectionKindSet}
|
||||
|
||||
-- | The empty "TokenSet".
|
||||
empty :: TokenSet
|
||||
empty = TokenSet {eofMember = False, kindSet = Set.empty}
|
||||
|
||||
-- | Create a "TokenSet" containing a single "TokenKind" or EOF.
|
||||
singleton :: Maybe TokenKind -> TokenSet
|
||||
singleton Nothing = fromEOF
|
||||
singleton (Just k) = fromKind k
|
||||
|
||||
-- | Create a "TokenSet" containing just EOF.
|
||||
fromEOF :: TokenSet
|
||||
fromEOF = TokenSet {eofMember = True, kindSet = Set.empty}
|
||||
|
||||
-- | Create a "TokenSet" containing a single "TokenKind".
|
||||
fromKind :: TokenKind -> TokenSet
|
||||
fromKind k = TokenSet {eofMember = False, kindSet = Set.singleton k}
|
||||
|
||||
-- | Create a "TokenSet" from a list of "TokenKind"s.
|
||||
fromList :: [TokenKind] -> TokenSet
|
||||
fromList ks = TokenSet {eofMember = False, kindSet = Set.fromList ks}
|
||||
|
||||
-- | Get a list of all "TokenKind"s in a "TokenSet".
|
||||
toList :: TokenSet -> [TokenKind]
|
||||
toList set = Set.toList (kindSet set)
|
5
waspls/test/TastyDiscoverDriver.hs
Normal file
5
waspls/test/TastyDiscoverDriver.hs
Normal file
@ -0,0 +1,5 @@
|
||||
{-# OPTIONS_GHC -F -pgmF tasty-discover -optF --modules=*Test.hs #-}
|
||||
|
||||
-- -optF --modules=*Test.hs tells tasty-discover to pick up only files that match *Test.hs.
|
||||
-- I did not do this for any strong reason so we can remove it in the future if we figure out
|
||||
-- it is too restrictive.
|
39
waspls/test/TestUtil.hs
Normal file
39
waspls/test/TestUtil.hs
Normal file
@ -0,0 +1,39 @@
|
||||
module TestUtil
|
||||
( printDiff,
|
||||
shouldBeWithDiff,
|
||||
Diffable (toLines),
|
||||
)
|
||||
where
|
||||
|
||||
import Data.Algorithm.Diff (Diff, PolyDiff (Both, First, Second), getDiff)
|
||||
import Data.List (intercalate)
|
||||
import Test.Tasty.Hspec (Expectation, expectationFailure)
|
||||
|
||||
shouldBeWithDiff :: (Eq a, Diffable a) => a -> a -> Expectation
|
||||
shouldBeWithDiff actual expected
|
||||
| actual == expected = pure ()
|
||||
| otherwise =
|
||||
expectationFailure $
|
||||
"Actual is not expected\n"
|
||||
++ printDiff (getDiff (toLines expected) (toLines actual))
|
||||
++ "\ESC[31m"
|
||||
|
||||
printDiff :: [Diff String] -> String
|
||||
printDiff diffs = intercalate "\n" $ map printDiffLine diffs
|
||||
where
|
||||
printDiffLine :: Diff String -> String
|
||||
printDiffLine (First s) = " \ESC[31m- " ++ s
|
||||
printDiffLine (Second s) = " \ESC[36m+ " ++ s
|
||||
printDiffLine (Both s _) = " \ESC[0m " ++ s
|
||||
|
||||
class Diffable a where
|
||||
toLines :: a -> [String]
|
||||
|
||||
instance Diffable Char where
|
||||
toLines c = [[c]]
|
||||
|
||||
instance Diffable a => Diffable [a] where
|
||||
toLines xs = concatMap toLines xs
|
||||
|
||||
instance (Diffable a, Diffable b) => Diffable (a, b) where
|
||||
toLines (a, b) = toLines a ++ toLines b
|
271
waspls/test/Wasp/Backend/ConcreteParserTest.hs
Normal file
271
waspls/test/Wasp/Backend/ConcreteParserTest.hs
Normal file
@ -0,0 +1,271 @@
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# OPTIONS_GHC -Wno-orphans #-}
|
||||
|
||||
module Wasp.Backend.ConcreteParserTest where
|
||||
|
||||
import Control.DeepSeq (deepseq)
|
||||
import Test.Tasty.Hspec (Spec, describe, it)
|
||||
import Test.Tasty.QuickCheck
|
||||
import TestUtil
|
||||
import Wasp.Backend.ConcreteParser
|
||||
import Wasp.Backend.ConcreteSyntax
|
||||
import qualified Wasp.Backend.Lexer as L
|
||||
import Wasp.Backend.ParseError
|
||||
import qualified Wasp.Backend.Token as T
|
||||
import qualified Wasp.Backend.TokenSet as TokenSet
|
||||
|
||||
token :: T.TokenKind -> String -> T.Token
|
||||
token kind text = T.Token {T.tokenKind = kind, T.tokenWidth = length text}
|
||||
|
||||
node :: SyntaxKind -> Int -> [SyntaxNode] -> SyntaxNode
|
||||
node kind width children = SyntaxNode {snodeKind = kind, snodeWidth = width, snodeChildren = children}
|
||||
|
||||
spec_ParseCSTExpression :: Spec
|
||||
spec_ParseCSTExpression = do
|
||||
it "Parses identifiers" $ do
|
||||
parseCSTExpression [token T.Identifier "foo"]
|
||||
`shouldBeWithDiff` ([], [node Var 3 []])
|
||||
it "Parses double literals" $ do
|
||||
parseCSTExpression [token T.Double "0.5"]
|
||||
`shouldBeWithDiff` ([], [node Double 3 []])
|
||||
it "Parses int literals" $ do
|
||||
parseCSTExpression [token T.Int "0.5"]
|
||||
`shouldBeWithDiff` ([], [node Int 3 []])
|
||||
it "Parses true literals" $ do
|
||||
parseCSTExpression [token T.KwTrue "true"]
|
||||
`shouldBeWithDiff` ([], [node BoolTrue 4 []])
|
||||
it "Parses false literals" $ do
|
||||
parseCSTExpression [token T.KwFalse "false"]
|
||||
`shouldBeWithDiff` ([], [node BoolFalse 5 []])
|
||||
it "Parses external imports" $ do
|
||||
parseCSTExpression [token T.KwImport "import", token T.Identifier "main", token T.KwFrom "from", token T.String "\"@ext/main.js\""]
|
||||
`shouldBeWithDiff` ( [],
|
||||
[ node
|
||||
ExtImport
|
||||
28
|
||||
[ node (Token T.KwImport) 6 [],
|
||||
node ExtImportModule 4 [],
|
||||
node (Token T.KwFrom) 4 [],
|
||||
node ExtImportPath 14 []
|
||||
]
|
||||
]
|
||||
)
|
||||
it "Parses quoted expressions" $ do
|
||||
parseCSTExpression [token T.LQuote "{=txt", token T.Quoted "h", token T.Quoted "e", token T.Quoted "llo", token T.RQuote "txt=}"]
|
||||
`shouldBeWithDiff` ( [],
|
||||
[ node
|
||||
Quoter
|
||||
15
|
||||
[ node (Token T.LQuote) 5 [],
|
||||
node (Token T.Quoted) 1 [],
|
||||
node (Token T.Quoted) 1 [],
|
||||
node (Token T.Quoted) 3 [],
|
||||
node (Token T.RQuote) 5 []
|
||||
]
|
||||
]
|
||||
)
|
||||
|
||||
describe "Lists" $ do
|
||||
it "Parses empty lists" $ do
|
||||
parseCSTExpression [token T.LSquare "[", token T.RSquare "]"]
|
||||
`shouldBeWithDiff` ([], [node List 2 [node (Token T.LSquare) 1 [], node (Token T.RSquare) 1 []]])
|
||||
it "Parses list with items" $ do
|
||||
parseCSTExpression [token T.LSquare "[", token T.Identifier "foo", token T.Comma ",", token T.Int "3", token T.RSquare "]"]
|
||||
`shouldBeWithDiff` ( [],
|
||||
[ node
|
||||
List
|
||||
7
|
||||
[ node (Token T.LSquare) 1 [],
|
||||
node Var 3 [],
|
||||
node (Token T.Comma) 1 [],
|
||||
node Int 1 [],
|
||||
node (Token T.RSquare) 1 []
|
||||
]
|
||||
]
|
||||
)
|
||||
it "Parses lists with a trailing comma" $ do
|
||||
parseCSTExpression [token T.LSquare "[", token T.Int "1", token T.Comma ",", token T.RSquare "]"]
|
||||
`shouldBeWithDiff` ( [],
|
||||
[ node
|
||||
List
|
||||
4
|
||||
[ node (Token T.LSquare) 1 [],
|
||||
node Int 1 [],
|
||||
node (Token T.Comma) 1 [],
|
||||
node (Token T.RSquare) 1 []
|
||||
]
|
||||
]
|
||||
)
|
||||
|
||||
spec_ConcreteParser :: Spec
|
||||
spec_ConcreteParser =
|
||||
describe "Wasp.Backend.ConcreteParser" $ do
|
||||
it "works on a simple example" $ do
|
||||
let tokens =
|
||||
[ token T.Identifier "app",
|
||||
token T.White " ",
|
||||
token T.Identifier "Test",
|
||||
token T.White " ",
|
||||
token T.LCurly "{",
|
||||
token T.Newline "\n",
|
||||
token T.White " ",
|
||||
token T.Comment "// this is the title of the app",
|
||||
token T.Newline "\n",
|
||||
token T.White " ",
|
||||
token T.Identifier "title",
|
||||
token T.Colon ":",
|
||||
token T.String "\"hello world\"",
|
||||
token T.Newline "\n",
|
||||
token T.RCurly "}"
|
||||
]
|
||||
let errors = []
|
||||
let tree =
|
||||
node
|
||||
Program
|
||||
68
|
||||
[ node
|
||||
Decl
|
||||
68
|
||||
[ node DeclType 3 [],
|
||||
node (Token T.White) 1 [],
|
||||
node DeclName 4 [],
|
||||
node (Token T.White) 1 [],
|
||||
node
|
||||
Dict
|
||||
59
|
||||
[ node (Token T.LCurly) 1 [],
|
||||
node (Token T.Newline) 1 [],
|
||||
node (Token T.White) 2 [],
|
||||
node (Token T.Comment) 31 [],
|
||||
node (Token T.Newline) 1 [],
|
||||
node (Token T.White) 2 [],
|
||||
node
|
||||
DictEntry
|
||||
19
|
||||
[ node DictKey 5 [],
|
||||
node (Token T.Colon) 1 [],
|
||||
node String 13 []
|
||||
],
|
||||
node (Token T.Newline) 1 [],
|
||||
node (Token T.RCurly) 1 []
|
||||
]
|
||||
]
|
||||
]
|
||||
let (actualErrors, actualTrees) = parseCST tokens
|
||||
actualTrees `shouldBeWithDiff` [tree]
|
||||
actualErrors `shouldBeWithDiff` errors
|
||||
|
||||
it "works for incomplete source" $ do
|
||||
let tokens =
|
||||
[ token T.Identifier "app",
|
||||
token T.Identifier "Test",
|
||||
token T.LCurly "{",
|
||||
token T.Identifier "title",
|
||||
token T.Colon ":"
|
||||
]
|
||||
let errors = [UnexpectedEOF 14 $ TokenSet.fromList [T.LParen, T.LCurly, T.LSquare, T.KwImport, T.KwTrue, T.KwFalse, T.String, T.Double, T.Int, T.Identifier, T.LQuote]]
|
||||
let tree =
|
||||
node
|
||||
Program
|
||||
14
|
||||
[ node
|
||||
Decl
|
||||
14
|
||||
[ node DeclType 3 [],
|
||||
node DeclName 4 [],
|
||||
node
|
||||
Dict
|
||||
7
|
||||
[ node (Token T.LCurly) 1 [],
|
||||
node
|
||||
DictEntry
|
||||
6
|
||||
[ node DictKey 5 [],
|
||||
node (Token T.Colon) 1 [],
|
||||
node Error 0 []
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
parseCST tokens `shouldBeWithDiff` (errors, [tree])
|
||||
|
||||
it "works when incomplete with closing bracket" $ do
|
||||
let tokens =
|
||||
[ token T.Identifier "route",
|
||||
token T.White " ",
|
||||
token T.Identifier "TestRoute",
|
||||
token T.White " ",
|
||||
token T.LCurly "{",
|
||||
token T.Newline "\n",
|
||||
token T.White " ",
|
||||
token T.Identifier "path",
|
||||
token T.Colon ":",
|
||||
token T.White " ",
|
||||
token T.String "\"/\"",
|
||||
token T.Comma ",",
|
||||
token T.Newline "\n",
|
||||
token T.White " ",
|
||||
token T.Identifier "to",
|
||||
token T.Colon ":",
|
||||
token T.White " ",
|
||||
token T.Newline "\n",
|
||||
token T.RCurly "}"
|
||||
]
|
||||
let errors =
|
||||
[ UnexpectedToken (Region 38 39) T.RCurly $ TokenSet.fromList [T.LParen, T.LCurly, T.LSquare, T.KwImport, T.KwTrue, T.KwFalse, T.String, T.Double, T.Int, T.Identifier, T.LQuote]
|
||||
]
|
||||
let tree =
|
||||
node
|
||||
Program
|
||||
39
|
||||
[ node
|
||||
Decl
|
||||
39
|
||||
[ node DeclType 5 [],
|
||||
node (Token T.White) 1 [],
|
||||
node DeclName 9 [],
|
||||
node (Token T.White) 1 [],
|
||||
node
|
||||
Dict
|
||||
23
|
||||
[ node (Token T.LCurly) 1 [],
|
||||
node (Token T.Newline) 1 [],
|
||||
node (Token T.White) 2 [],
|
||||
node
|
||||
DictEntry
|
||||
9
|
||||
[ node DictKey 4 [],
|
||||
node (Token T.Colon) 1 [],
|
||||
node (Token T.White) 1 [],
|
||||
node String 3 []
|
||||
],
|
||||
node (Token T.Comma) 1 [],
|
||||
node (Token T.Newline) 1 [],
|
||||
node (Token T.White) 2 [],
|
||||
node
|
||||
DictEntry
|
||||
5
|
||||
[ node DictKey 2 [],
|
||||
node (Token T.Colon) 1 [],
|
||||
node (Token T.White) 1 [],
|
||||
node (Token T.Newline) 1 [],
|
||||
node Error 0 []
|
||||
],
|
||||
node (Token T.RCurly) 1 []
|
||||
]
|
||||
]
|
||||
]
|
||||
parseCST tokens `shouldBeWithDiff` (errors, [tree])
|
||||
|
||||
-- DEPENDS ON: LexerTest#never fails to lex
|
||||
-- TODO: remove dependency by making arbitrary instance for Token
|
||||
--
|
||||
-- The point of this test is to ensure some sort of parse tree is always built
|
||||
it "never fails to parse" $
|
||||
property $ \source -> parseCST (L.lex source) `deepseq` True
|
||||
|
||||
instance Diffable SyntaxNode where
|
||||
toLines n = lines $ cstPrettyPrint n
|
||||
|
||||
instance Diffable ParseError where
|
||||
toLines err = ["At " ++ show (errorRegion err), " " ++ showErrorMessage err]
|
66
waspls/test/Wasp/Backend/LexerTest.hs
Normal file
66
waspls/test/Wasp/Backend/LexerTest.hs
Normal file
@ -0,0 +1,66 @@
|
||||
{-# OPTIONS_GHC -Wno-orphans #-}
|
||||
|
||||
module Wasp.Backend.LexerTest where
|
||||
|
||||
import Control.DeepSeq (deepseq)
|
||||
import Data.List (isInfixOf)
|
||||
import Test.Tasty.Hspec
|
||||
import Test.Tasty.QuickCheck
|
||||
import TestUtil
|
||||
import qualified Wasp.Backend.Lexer as L
|
||||
import Wasp.Backend.Token
|
||||
|
||||
token :: TokenKind -> String -> Token
|
||||
token kind text = Token {tokenKind = kind, tokenWidth = length text}
|
||||
|
||||
spec_Lexer :: Spec
|
||||
spec_Lexer = do
|
||||
it "works for a simple example" $ do
|
||||
let source = "app Test { name: \"hello world\" }\n\n\n // this is a comment\nimport from"
|
||||
let expected =
|
||||
[ token Identifier "app",
|
||||
token White " ",
|
||||
token Identifier "Test",
|
||||
token White " ",
|
||||
token LCurly "{",
|
||||
token White " ",
|
||||
token Identifier "name",
|
||||
token Colon ":",
|
||||
token White " ",
|
||||
token String "\"hello world\"",
|
||||
token White " ",
|
||||
token RCurly "}",
|
||||
token Newline "\n",
|
||||
token Newline "\n",
|
||||
token Newline "\n",
|
||||
token White " ",
|
||||
token Comment "// this is a comment",
|
||||
token Newline "\n",
|
||||
token KwImport "import",
|
||||
token White " ",
|
||||
token KwFrom "from"
|
||||
]
|
||||
L.lex source `shouldBeWithDiff` expected
|
||||
|
||||
it "only makes Quoted tokens after an LQuote and before an RQuote" $ do
|
||||
property $ \inner ->
|
||||
if "txt=}" `isInfixOf` inner
|
||||
then discard
|
||||
else case L.lex $ "{=txt " ++ inner ++ " txt=}" of
|
||||
[] -> False
|
||||
open : rest
|
||||
| tokenKind open /= LQuote -> False
|
||||
| otherwise ->
|
||||
all ((== Quoted) . tokenKind) (init rest)
|
||||
&& (tokenKind (last rest) == RQuote)
|
||||
|
||||
it "never fails to lex" $ do
|
||||
-- @deepseq@ is used to force evaluation of the tokens, making sure an
|
||||
-- error never occurs (there are calls to @error@ in the lexer that we
|
||||
-- need to make sure never actually happen)
|
||||
property $ \source -> L.lex source `deepseq` True
|
||||
|
||||
instance Diffable Token where
|
||||
toLines tok =
|
||||
[ show (tokenKind tok) ++ "[" ++ show (tokenWidth tok) ++ "]"
|
||||
]
|
@ -42,14 +42,26 @@ library
|
||||
import: common-all
|
||||
exposed-modules:
|
||||
Wasp.LSP.Server
|
||||
Wasp.LSP.Core
|
||||
Wasp.LSP.Handlers
|
||||
Wasp.Backend.Token
|
||||
Wasp.Backend.Lexer
|
||||
Wasp.Backend.TokenSet
|
||||
Wasp.Backend.ParseError
|
||||
Wasp.Backend.ConcreteSyntax
|
||||
Wasp.Backend.ConcreteParser
|
||||
Wasp.Backend.ConcreteParser.Internal
|
||||
other-modules:
|
||||
Paths_waspls
|
||||
Wasp.LSP.Core
|
||||
Wasp.LSP.Handlers
|
||||
Wasp.Backend.Lexer.Internal
|
||||
Wasp.Backend.Lexer.Lexer
|
||||
hs-source-dirs:
|
||||
src
|
||||
build-tool-depends:
|
||||
, alex:alex
|
||||
build-depends:
|
||||
, base ^>=4.14.3.0
|
||||
, waspc
|
||||
, lsp ^>=1.4.0.0
|
||||
, lsp-types ^>=1.4.0.1
|
||||
, containers ^>=0.6.5.1
|
||||
@ -60,7 +72,34 @@ library
|
||||
, hslogger ^>=1.3.1.0
|
||||
, aeson ^>=1.5.6
|
||||
, lens ^>=5.1
|
||||
, waspc
|
||||
, aeson-pretty ^>= 0.8
|
||||
, utf8-string ^>= 1.0.2
|
||||
-- 'array' is used by code generated by Alex for src/Analyzer/Parser/Lexer.x
|
||||
, array ^>= 0.5.4
|
||||
, deepseq
|
||||
|
||||
test-suite waspls-test
|
||||
import: common-all, common-exe
|
||||
type: exitcode-stdio-1.0
|
||||
hs-source-dirs: test
|
||||
main-is: TastyDiscoverDriver.hs
|
||||
build-tool-depends:
|
||||
, tasty-discover:tasty-discover
|
||||
build-depends:
|
||||
, base
|
||||
, waspls
|
||||
, deepseq
|
||||
, containers ^>= 0.6.5
|
||||
, Diff ^>= 0.4.1
|
||||
, QuickCheck ^>= 2.14
|
||||
, tasty ^>= 1.4.2
|
||||
-- tasty-hspec 1.1.7 introduces breaking changes, which is why we have < 1.1.7 .
|
||||
, tasty-hspec >= 1.1 && < 1.1.7
|
||||
, tasty-quickcheck ^>= 0.10
|
||||
other-modules:
|
||||
TestUtil
|
||||
Wasp.Backend.LexerTest
|
||||
Wasp.Backend.ConcreteParserTest
|
||||
|
||||
executable waspls
|
||||
import: common-all, common-exe
|
||||
|
Loading…
Reference in New Issue
Block a user