Add error resilient parser for LSP (#655)

See waspls/src/Wasp/Backend/ConcreteParser/Internal.hs for description of the parser
2024-11-23 19:29:17 +03:00 · 2022-07-26 15:14:19 -04:00 · 2022-07-26 15:14:19 -04:00 · 879f8e4225
commit 879f8e4225
parent 0de9997198
14 changed files with 1945 additions and 3 deletions
--- a/waspls/src/Wasp/Backend/ConcreteParser.hs
+++ b/waspls/src/Wasp/Backend/ConcreteParser.hs
@ -0,0 +1,101 @@
+module Wasp.Backend.ConcreteParser
+  ( -- * Concrete parser
+
+    -- | This module contains functions for converting lists of "Token"s into
+    -- concrete syntax trees. These trees represent any Wasp source file completely,
+    -- including whitespace and comments, even if the source file is invalid.
+    parseCST,
+    parseCSTExpression,
+  )
+where
+
+import Wasp.Backend.ConcreteParser.Internal
+import Wasp.Backend.ConcreteSyntax (SyntaxKind (..), SyntaxNode)
+import Wasp.Backend.ParseError
+import Wasp.Backend.Token (Token)
+import qualified Wasp.Backend.Token as T
+
+-- | Parse a list of tokens representing a Wasp program into a concrete syntax
+-- tree.
+--
+-- See "SyntaxNode" for a description of what a concrete syntax tree contains.
+parseCST :: [Token] -> ([ParseError], [SyntaxNode])
+parseCST tokens = parse tokens (root <> eof)
+
+-- | Parse a list of tokens into a concrete syntax tree for a wasp expression.
+-- This is mainly used for testing.
+--
+-- See "SyntaxNode" for a description of what a concrete syntax tree contains.
+parseCSTExpression :: [Token] -> ([ParseError], [SyntaxNode])
+parseCSTExpression tokens = parse tokens (expr <> eof)
+
+root :: GrammarRule
+root = Program <$$> stmts
+
+stmts :: GrammarRule
+stmts = eof <|> (stmt <> stmts)
+
+stmt :: GrammarRule
+stmt =
+  Decl
+    <$$> (T.Identifier `as` DeclType)
+    <> (T.Identifier `as` DeclName)
+    <> expr
+
+{- ORMOLU_DISABLE -}
+expr :: GrammarRule
+expr =
+        Dict <$$> listLike lcurly dictEntry comma rcurly
+    <|> List <$$> listLike lsquare expr comma rsquare
+    -- Note that we don't check number of tuple element here: this is left to
+    -- the next phase of parsing.
+    <|> Tuple <$$> listLike lparen expr comma rparen
+    <|> quoter
+    <|> int
+    <|> double
+    <|> string
+    <|> (T.Identifier `as` Var)
+    <|> kwTrue
+    <|> kwFalse
+    <|> extImport
+{- ORMOLU_ENABLE -}
+
+dictEntry :: GrammarRule
+dictEntry = DictEntry <$$> (T.Identifier `as` DictKey) <> colon <> expr
+
+quoter :: GrammarRule
+quoter = Quoter <$$> lquote <> quoterTail
+  where
+    quoterTail = rquote <|> (quoted <> quoterTail)
+
+extImport :: GrammarRule
+extImport =
+  ExtImport
+    <$$> kwImport <> extImportName <> kwFrom <> (T.String `as` ExtImportPath)
+  where
+    extImportName :: GrammarRule
+    extImportName =
+      lcurly <> (T.Identifier `as` ExtImportField) <> rcurly
+        <|> (T.Identifier `as` ExtImportModule)
+
+-- | @listLike open value sep close@ parses list like structures in the form:
+--
+-- @open (value sep)* (value sep?) close@
+--
+-- In other words, a list with an optional trailing separator.
+listLike ::
+  -- | start grammar rule
+  GrammarRule ->
+  -- | value grammar rule
+  GrammarRule ->
+  -- | separator grammar rule
+  GrammarRule ->
+  -- | end grammar rule
+  GrammarRule ->
+  GrammarRule
+listLike open value sep close =
+  open <> listLikeTail
+  where
+    listLikeTail :: GrammarRule
+    listLikeTail =
+      close <|> (value <> ((sep <> listLikeTail) <|> close))
--- a/waspls/src/Wasp/Backend/ConcreteParser/Internal.hs
+++ b/waspls/src/Wasp/Backend/ConcreteParser/Internal.hs
@ -0,0 +1,774 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE LambdaCase #-}
+
+module Wasp.Backend.ConcreteParser.Internal
+  ( -- * Internal parsing library
+    GrammarRule,
+    parse,
+
+    -- * Combinators
+
+    -- | The fixity of the operators ('(<>)', '`as`', '(<$$>)', and '(<|>)') was
+    -- chosen to make using operators require as little parentheses as possible:
+    --
+    -- >>> Program <$$> lparen <> rparen
+    --       <|> string
+    -- Alternative (Group Program (Chain (Token LParen) (Token RParen))) (Token String)
+    as,
+    (<$$>),
+    perhaps,
+    (<|>),
+
+    -- * Primitive grammar rules
+
+    -- | These grammar rules (besides `eof`) consume a single token of the kind
+    -- matching their name. The syntax node they produce has an "S.Token" kind,
+    -- unless otherwise specified.
+    lparen,
+    rparen,
+    lsquare,
+    rsquare,
+    lcurly,
+    rcurly,
+    comma,
+    colon,
+    kwImport,
+    kwFrom,
+    kwTrue,
+    kwFalse,
+    string,
+    int,
+    double,
+    lquote,
+    rquote,
+    quoted,
+    -- | A primitive for "T.Identifier" is not included: this should always be
+    -- done with an `as`, i.e. @"T.Identifier" ``as`` "S.DictKey"@
+    eof,
+
+    -- * Error Recovery
+
+    -- | This parser has built-in automatic error recovery. Currently, error
+    -- recovery occurs when an error is produced inside of a group rule. Recovery
+    -- is based on the possible tokens that can follow the current rule.
+    --
+    -- First, we define the follow set of a grammar rule @R@ in the grammar
+    -- @R <> S@ to be the set of token kinds that @S@ can accept as its first
+    -- token. For example, @"let"@ in @"let" <> ("=" <|> "+=")@ has the follow
+    -- set @["=", "+="]@.
+    --
+    -- We can then describe the basic mechanism as follows:
+    --
+    -- For a "GrammarRule" @label '<$$>' first '<>' next@, if an error occurs
+    -- while parsing @first@, check if the token it errored on is in the follow
+    -- set for the group.
+    --
+    -- If yes, then there is a following rule that can accept this token. So
+    -- just continue parsing. Otherwise, no following rule has a use for the
+    -- token, so discard it.
+    --
+    -- In either case, we log an error to report at the end of parsing.
+    --
+    -- As an example, consider the grammar rules:
+    --
+    -- @
+    -- stmts = eof <|> (stmt <> stmts)
+    -- stmt = Stmt <$$> "let" <> identifier <> "=" <> expr <> ";"
+    -- @
+    --
+    -- With the example source text:
+    --
+    -- @
+    -- let x = 2
+    -- let y = 4;
+    -- @
+    --
+    -- A parse error will occur at the beginning of line 2, when @let@ is found
+    -- instead of the expected @;@.
+    --
+    -- First, we find the follow set of the @Stmt@ group. From the definition of
+    -- @stmts@, we can see @stmt@ is followed by @stmts@. @stmts@ can start with
+    -- either @eof@ or @stmt@ (which in turn can start with @"let"@. Hence, the
+    -- follow set is @[eof, "let"]@.
+    --
+    -- Since the "let" token the parser erroed on is in this set, it does not
+    -- get discarded by the group. This leaves it available for the call to
+    -- @stmt@, which can parse successfully because of this, resulting in the
+    -- following parse tree and error:
+    --
+    -- @
+    -- [ Stmt ["let", "x", "=", "2", error]
+    -- , Stmt ["let", "y", "=", "4", ";"]
+    -- ]
+    --
+    -- [Unexpected "let" at line 2 column 1: expected ";"]
+    -- @
+    --
+    -- But if we look at a more complex situation, with nested groups, we need
+    -- to introduce another concept to produce correct parses. For example:
+    --
+    -- @
+    -- list = List <$$> '[' <> expr <> ']'
+    -- tuple = Tuple <$$> '(' <> expr <> ')'
+    -- expr = list <|> tuple <|> number
+    -- @
+    --
+    -- with source:
+    --
+    -- @
+    -- [(2 ]
+    -- @
+    --
+    -- If we were to apply the same logic as before, we would find the follow
+    -- set of the @2@ to be @[")"]@. Since @]@ is not in this set, we discard
+    -- the right square bracket. But what we want to do is leave it so that
+    -- the @list@ rule can use it as its closing bracket.
+    --
+    -- To handle this scenario, we introduce an unwinding behavior to travel up
+    -- through group rules until an appropriate point is found to recover. We
+    -- do this first by defining the extended follow set of a rule @R@ as
+    -- the union of the follow sets of @R@ and its ancestor rules.
+    --
+    -- For example, we find the follow set and extended follow set of the @2@ in
+    -- the example code. To start, we write out the tree for grammar rules that
+    -- are used:
+    --
+    -- @
+    -- List <$$> '[' <> (Tuple <$$> '(' <> number <> ')') <> ']'
+    -- @
+    --
+    -- Now, we see the follow set of the @2@ is just @[')']@. Then, the follow
+    -- set of its ancestors (@Tuple<$$>@ and @List<$$>@) are @[']']@ and @[]@,
+    -- respectively. So, the extended follow set is @[')', ']']@.
+    --
+    -- Unwinding starts when:
+    --
+    -- (1) The next token __is not__ in the follow set of the current rule
+    -- (2) The next token __is__ in the extended follow set of the current rule
+    --
+    -- While unwinding, the parser will stop at each group ancestor of the
+    -- original rule to determine whether to continue unwinding. Unwinding stops
+    -- when a group rule @S@ is reached such that the follow set of @S@ includes
+    -- the next token. At this point, the same thing happens as in the first
+    -- description of error recovery.
+    --
+    -- The example rules and source would produce the following parse tree and
+    -- errors:
+    --
+    -- @
+    -- [List ["[", Tuple ["(", "2"], "]"]]
+    --
+    -- [Unexpected "]" at line 1 column 9: expected expression]
+    -- @
+    --
+    -- ** Error Recovery Rules
+    --
+    -- A description of the full error recovery process:
+    --
+    -- 1. While parsing @first@ in @label '<$$>' first '<>' next@, an
+    --    unexpected token is encountered.
+    -- 2. The follow set and extended follow set for the group node are
+    --    calculated.
+    -- 3. If the unexpected token is in the follow set, continue parsing normally.
+    -- 4. If the unexpected token is not in the follow set, but is in the extended
+    --    follow set, begin unwinding:
+    --    a. Stop at each group rule ancestor @S@ and compute the follow set of
+    --       @S@.
+    --    b. If the follow set of @S@ contains the unexpected token, stop
+    --       unwinding and resume parsing.
+    --    c. Otherwise, keep unwinding.
+    -- 5. If the unexpected token is not in the extended follow set, discard the
+    --    token and continue parsing.
+
+    -- * Future improvements
+
+    -- |
+    -- __TODO #1:__
+    --
+    -- Move error recovery from the group rule to the '<>' rules.
+    --
+    -- __TODO #2:__
+    --
+    -- The current error recovery and the upgraded version described in TODO #1
+    -- can do what feels like the wrong thing in some cases. For example,
+    -- consider the following invalid wasp code:
+    --
+    -- @
+    -- route TestRoute { path: "/", to:
+    --
+    -- page TestPage { fn: import TestPage from "@ext/TestPage.js" }
+    -- @
+    --
+    -- To a human, this should parse as two declaration statements. But the
+    -- current system identifies @page@ as a variable for the dictionary entry
+    -- @to:@. Then it treats @TestPage@ as the key for another entry, and
+    -- complains that it found @{@ instead of @:@.
+    --
+    -- This is due to the unwinding behavior on errors being very short-sighted:
+    -- it only cares whether the next non-white token will succeed.
+    --
+    -- A possible fix to this is to explore all possible (or many possible)
+    -- unwinding stopping points and figuring out which one results in the
+    -- fewest future parse errors.
+    --
+    -- In case this has performance issues with having to explore lots of possible
+    -- parse trees, we could limit how many branches we check, similar to optimizing
+    -- DFS for state tree searches (like a chess solver).
+    --
+    -- __TODO #3:__
+    --
+    -- Change wasp language a bit to make the start of a declaration unambiguous.
+    -- This would make it much easier for the parser to recover from an unfinished
+    -- declaration.
+    --
+    -- Ideas for this:
+    --
+    -- (1) Make all type names reserved keywords in the grammar
+    -- (2) Distinguish between type names and variable names based on the case
+    --     of the first letter (lowercase = type, uppercase = variable)
+    --
+    -- __TODO #4:__
+    --
+    -- When the next token is not in the follow set of the rule, try discarding
+    -- multiple tokens, stopping when we do find a token in the follow set.
+  )
+where
+
+import Control.Monad (unless)
+import Control.Monad.Except (ExceptT, MonadError (catchError, throwError), runExceptT)
+import Control.Monad.State.Strict (State, gets, modify, runState)
+import Data.Maybe (fromMaybe)
+import Wasp.Backend.ConcreteSyntax (SyntaxKind, SyntaxNode (SyntaxNode, snodeChildren, snodeKind, snodeWidth))
+import qualified Wasp.Backend.ConcreteSyntax as S
+import Wasp.Backend.ParseError
+import Wasp.Backend.Token (Token (tokenKind, tokenWidth), TokenKind, tokenKindIsTrivia)
+import qualified Wasp.Backend.Token as T
+import Wasp.Backend.TokenSet (TokenSet)
+import qualified Wasp.Backend.TokenSet as TokenSet
+
+-- | The type of a grammar rule. Use combinators and primitives to build more
+-- complex grammars. Run with `parse`, which supports reasonable automatic error
+-- recovery.
+--
+-- Every rule begins parsing by consuming trivia tokens at the beginning of the
+-- input and creating a syntax node for each trivia token.
+data GrammarRule
+  = -- | Always succeeds, consuming only trivia tokens.
+    Succeed
+  | -- | Checks if the first non-trivia token is the expected token
+    -- kind. If it is, it consumes the token and outputs a syntax node. Otherwise,
+    -- it does not consume the token and throws a parse error exception.
+    --
+    -- If a "SyntaxKind" is specified, use it to label the syntax node created.
+    Token TokenKind (Maybe SyntaxKind)
+  | -- | Succeeds if the input is empty after consuming trivia tokens at the
+    -- beginning of the input.
+    --
+    -- If it fails, it will report an error at the first non-trivia token while
+    -- also consuming all remaining input.
+    EOF
+  | -- | Run the inner grammar rule and wraps its resulting nodes in a node
+    -- labeled by the given syntax kind. If it fails, apply the error recovery
+    -- strategy (this is currently the only place where error recovery is
+    -- performed).
+    --
+    -- This is the only rule that adds structure and depth into the CST.
+    Group SyntaxKind GrammarRule
+  | -- | Run the first grammar rule. If it succeeds, run the second rule.
+    Chain GrammarRule GrammarRule
+  | -- | Run the grammar rule that can accept the next token in the input as its
+    -- first token.
+    --
+    -- Invariant: There can be no overlap in which token kinds each parser can
+    -- accept. The parser will use a Haskell 'error' in this case.
+    --
+    -- TODO: Remove the invariant and use backtracking to try each rule
+    Alternative GrammarRule GrammarRule
+  deriving (Eq, Ord, Show)
+
+-- | @rule1 '<>' rule2@ is a parser that runs first @rule1@, then @rule2@.
+-- Equivalent to @Chain rule1 rule2@.
+instance Semigroup GrammarRule where
+  -- "Succeed" is identity of <>. The second pattern does affect the result tree,
+  -- but only by changing where trivia tokens are placed in the tree, which is
+  -- not an issue.
+  Succeed <> rule = rule
+  rule <> Succeed = rule
+  first <> second = Chain first second
+
+-- | @mempty@ is a parser that matches an empty string, or a string of only
+-- trivia tokens.
+instance Monoid GrammarRule where
+  mempty = Succeed
+
+-- | Run a "GrammarRule" on an input of tokens. Returns a tuple @(errors, nodes)@.
+parse :: [Token] -> GrammarRule -> ([ParseError], [SyntaxNode])
+parse tokens grammar =
+  let initialState =
+        ParseState
+          { pstateInput = tokens,
+            pstateCurrentLevelNodes = [],
+            pstateErrors = [],
+            pstateNextTokenOffset = 0,
+            pstatePrevTokenOffset = 0,
+            pstateAcceptableNextTokens = []
+          }
+   in case runState (runExceptT (parseRule grammar)) initialState of
+        (Left err, state) -> case err of
+          Unwind -> error "Unwind at top-level GrammarRule (impossible, this is a bug in Wasp.Backend.ConcreteParser.Internal)"
+          ParseError perr ->
+            let errs = pstateErrors state ++ [perr]
+                newNode = makeErrorNode perr False
+                nodes = pstateCurrentLevelNodes state ++ [newNode]
+             in (errs, nodes)
+        (_, state) -> (pstateErrors state, pstateCurrentLevelNodes state)
+
+data ParseState = ParseState
+  { -- | Remaining tokens to be parsed.
+    pstateInput :: [Token],
+    -- | "SyntaxNode"s processed so far at the current level of the CST. An
+    -- initial top-level is created when the parser is started and each "Group"
+    -- rule creates a new level.
+    pstateCurrentLevelNodes :: [SyntaxNode],
+    -- | Errors collected that will be reported.
+    pstateErrors :: [ParseError],
+    -- | 0-based offset from the beginning of the source file, in characters,
+    -- of the start of the next token (@head . `pstateInput`@).
+    pstateNextTokenOffset :: !Int,
+    -- | 0-based offset from the beginning of the source file, in characters,
+    -- of the start of the last consumed token
+    pstatePrevTokenOffset :: !Int,
+    -- | Stack of acceptable next tokens, where entries to the stack are added
+    -- when we enter a group rule for parsing.
+    --
+    -- The first token set in this list is the current follow set and the union
+    -- of all of the token sets is the extended follow set.
+    pstateAcceptableNextTokens :: [TokenSet]
+  }
+  deriving (Eq, Show, Ord)
+
+data ParseException
+  = -- | @ParseError parseError@ is used as a signal when an unexpected token is
+    -- encountered. This kind of exception is caught by error recovery points
+    -- and added to the output list of errors.
+    --
+    -- This may be converted into "Unwind" if error recovery determines unwinding
+    -- should occur.
+    ParseError ParseError
+  | -- | @Unwind tokenKind@ is used as a signal during error recovery. It is used
+    -- to implement the unwinding behavior described at the top of the module.
+    --
+    -- Essentially, it means "I don't know how to parse this, but I know someone
+    -- above me can. Can you? If not, pass it along."
+    Unwind
+
+type ParserM a = ExceptT ParseException (State ParseState) a
+
+parseRule :: GrammarRule -> ParserM ()
+parseRule Succeed = parseSucceed
+parseRule (Token expectedKind maybeLabel) = parseToken expectedKind maybeLabel
+parseRule EOF = parseEOF
+parseRule (Group label inner) = parseGroup label inner
+parseRule (Chain first second) = parseChain first second
+parseRule (Alternative left right) = parseAlternative left right
+
+parseSucceed :: ParserM ()
+parseSucceed = consumeTrivia
+
+parseToken :: TokenKind -> Maybe SyntaxKind -> ParserM ()
+parseToken expectedKind maybeSyntaxKind =
+  (consumeTrivia >> peek) >>= \case
+    Just nextToken | tokenKind nextToken == expectedKind -> do
+      advance
+      let newNodeKind = fromMaybe (S.Token expectedKind) maybeSyntaxKind
+      let newNode =
+            SyntaxNode
+              { snodeKind = newNodeKind,
+                snodeWidth = tokenWidth nextToken,
+                snodeChildren = []
+              }
+      pushNode newNode
+    nextToken -> throwParseError nextToken (TokenSet.fromKind expectedKind)
+
+parseEOF :: ParserM ()
+parseEOF =
+  (consumeTrivia >> peek) >>= \case
+    Just nextToken -> do
+      nextTokenOffset <- gets pstateNextTokenOffset
+      advance
+      let nextTokenKind = tokenKind nextToken
+      -- TODO: consider reporting all token kinds here? not sure
+      _tokenKinds <- collectUntilEOF
+      throwError $
+        ParseError $
+          UnexpectedToken
+            (Region nextTokenOffset (nextTokenOffset + tokenWidth nextToken))
+            nextTokenKind
+            TokenSet.fromEOF
+      where
+        collectUntilEOF :: ParserM [TokenKind]
+        collectUntilEOF =
+          peek >>= \case
+            Just next -> advance >> ((tokenKind next :) <$> collectUntilEOF)
+            Nothing -> pure []
+    Nothing -> do
+      return ()
+
+parseGroup :: SyntaxKind -> GrammarRule -> ParserM ()
+parseGroup label inner = do
+  consumeTrivia
+  currentLevelNodes <- gets pstateCurrentLevelNodes
+  modify (\s -> s {pstateCurrentLevelNodes = []})
+  maybeInnerParserError <- tryError $ parseRule inner
+  childNodes <- gets pstateCurrentLevelNodes
+  modify (\s -> s {pstateCurrentLevelNodes = currentLevelNodes})
+  case maybeInnerParserError of
+    Nothing -> pushGroupNode childNodes
+    Just (ParseError parseError) -> handleInnerParseError childNodes parseError
+    Just Unwind -> handleInnerUnwind childNodes
+  where
+    handleInnerParseError :: [SyntaxNode] -> ParseError -> ParserM ()
+    handleInnerParseError childNodes parseError = do
+      -- ASSUMPTION: The token errored on has not been consumed
+      --
+      -- (1) Check whether the token kind we errored on can be accepted by any
+      -- following rule.
+      --
+      -- (2) Report the error and create the group node containing the error and
+      -- what it parsed before the error.
+      --
+      -- (3) Handle the error:
+      --   (3a) If it can not be accepted by a following rule, discard the error
+      --        token.
+      --   (3b) If unwinding is required to accept the token, begin unwinding.
+      --   (3c) Otherwise, the error can be handled without unwinding, so do
+      --        nothing and continue parsing.
+
+      -- (1)
+      let errorTokenKind = case parseError of
+            UnexpectedEOF _ _ -> Nothing
+            UnexpectedToken _ k _ -> Just k
+      errorCanBeHandled <- canAnyFollowingRuleAcceptTokenKind errorTokenKind
+      errorCanBeHandledWithoutUnwind <- canImmediateFollowingRuleAcceptTokenKind errorTokenKind
+
+      -- (2)
+      logError parseError
+      let nodeForErr = makeErrorNode parseError errorCanBeHandled
+      pushGroupNode $ childNodes ++ [nodeForErr]
+
+      -- (3)
+      if not errorCanBeHandled
+        then -- (3a)
+          advance
+        else -- (3b)
+          unless errorCanBeHandledWithoutUnwind $ throwError Unwind
+
+    handleInnerUnwind :: [SyntaxNode] -> ParserM ()
+    handleInnerUnwind childNodes = do
+      errorTokenKind <- fmap tokenKind <$> peek
+      -- When unwinding, check if this is the level where we can parse the error
+      -- token. If it is, then stop unwinding. Otherwise, we continue unwinding.
+      errorCanBeHandledWithoutUnwind <- canAnyFollowingRuleAcceptTokenKind errorTokenKind
+
+      pushGroupNode childNodes
+
+      unless errorCanBeHandledWithoutUnwind $ throwError Unwind
+
+    pushGroupNode :: [SyntaxNode] -> ParserM ()
+    pushGroupNode children =
+      let width = sum $ map snodeWidth children
+       in pushNode $
+            SyntaxNode
+              { snodeKind = label,
+                snodeWidth = width,
+                snodeChildren = children
+              }
+
+parseChain :: GrammarRule -> GrammarRule -> ParserM ()
+parseChain first second = do
+  consumeTrivia
+  -- First, figure out following set for @second@
+  pushFollowingTokens $ getValidFirstTokens second
+  maybeError <- tryError (parseRule first)
+  -- Make sure to remove following set now that we're leaving this "Chain"
+  popFollowingTokens
+  -- Run second (or throw error if first failed)
+  case maybeError of
+    Nothing -> parseRule second
+    Just err -> throwError err
+
+parseAlternative :: GrammarRule -> GrammarRule -> ParserM ()
+parseAlternative left right
+  | leftFirstTokens `TokenSet.intersection` rightFirstTokens /= TokenSet.empty =
+    error $
+      unlines
+        [ "[WARNING] Alternative grammar rule has two rules that can accept the same first token."
+            ++ "This will result in the second rule never running on these tokens.",
+          "\tLeft Tokens:",
+          "\t" ++ show leftFirstTokens,
+          "\n\tRight Tokens:",
+          "\t" ++ show rightFirstTokens
+        ]
+  | otherwise = do
+    consumeTrivia
+    nextToken <- peek
+    let nextKind = tokenKind <$> nextToken
+    if nextKind `willSucceedIn` left
+      then parseRule left
+      else
+        if nextKind `willSucceedIn` right
+          then parseRule right
+          else throwParseError nextToken (leftFirstTokens `TokenSet.union` rightFirstTokens)
+  where
+    leftFirstTokens = getValidFirstTokens left
+    rightFirstTokens = getValidFirstTokens right
+
+-- | Advance past trivia tokens, making syntax nodes for each token
+consumeTrivia :: ParserM ()
+consumeTrivia =
+  peek >>= \case
+    Nothing -> pure ()
+    Just nextToken
+      -- Consume any whitespace
+      | tokenKindIsTrivia (tokenKind nextToken) -> do
+        advance
+        let newNode =
+              SyntaxNode
+                { snodeKind = S.Token (tokenKind nextToken),
+                  snodeWidth = tokenWidth nextToken,
+                  snodeChildren = []
+                }
+        pushNode newNode
+        consumeTrivia
+      | otherwise -> pure ()
+
+-- | Peek the immediate next token in input (including trivia)
+peek :: ParserM (Maybe Token)
+peek =
+  gets pstateInput >>= \case
+    [] -> return Nothing
+    tok : _ -> return (Just tok)
+
+-- | Advance past the next token in the input, updating offsets.
+advance :: ParserM ()
+advance =
+  gets pstateInput >>= \case
+    [] -> modify (\s -> s {pstatePrevTokenOffset = pstateNextTokenOffset s})
+    (consumed : remaining) -> do
+      newLastOffset <- gets pstateNextTokenOffset
+      let newNextOffset = newLastOffset + tokenWidth consumed
+      modify (\s -> s {pstateInput = remaining, pstatePrevTokenOffset = newLastOffset, pstateNextTokenOffset = newNextOffset})
+
+-- | @throwParseError actual expected@ throws an error.
+--
+-- Assumes @actual@ has not been `advance`d past yet (and does not advance past
+-- it).
+throwParseError :: Maybe Token -> TokenSet -> ParserM ()
+throwParseError Nothing eset = do
+  offset <- gets pstateNextTokenOffset
+  throwError $ ParseError $ UnexpectedEOF offset eset
+throwParseError (Just actual) expectedTokens = do
+  offset <- gets pstateNextTokenOffset
+  throwError $ ParseError $ UnexpectedToken (Region offset (offset + tokenWidth actual)) (tokenKind actual) expectedTokens
+
+-- | Make an error node. Error node should only be produced when catching and
+-- pushing errors to the state.
+makeErrorNode ::
+  -- | Parse error that will be used to calculate the width of the error node.
+  ParseError ->
+  -- | If "True", give the error node a width of 0. Otherwise, use the
+  -- width of the error item.
+  Bool ->
+  SyntaxNode
+makeErrorNode (UnexpectedEOF _ _) _ =
+  SyntaxNode
+    { snodeKind = S.Error,
+      snodeWidth = 0,
+      snodeChildren = []
+    }
+makeErrorNode (UnexpectedToken (Region so eo) _ _) zeroWidth =
+  SyntaxNode
+    { snodeKind = S.Error,
+      snodeWidth = if zeroWidth then 0 else eo - so,
+      snodeChildren = []
+    }
+
+-- | Add a node to the output.
+pushNode :: SyntaxNode -> ParserM ()
+pushNode node = modify (\s -> s {pstateCurrentLevelNodes = pstateCurrentLevelNodes s ++ [node]})
+
+-- | Add an error to the output (does not throw the error).
+logError :: ParseError -> ParserM ()
+logError err = modify (\s -> s {pstateErrors = pstateErrors s ++ [err]})
+
+-- | Add an "TokenSet" to the following stack.
+pushFollowingTokens :: TokenSet -> ParserM ()
+pushFollowingTokens eset = modify (\s -> s {pstateAcceptableNextTokens = eset : pstateAcceptableNextTokens s})
+
+-- | Remove the last added "TokenSet" from the following stack.
+popFollowingTokens :: ParserM ()
+popFollowingTokens = modify (\s -> s {pstateAcceptableNextTokens = drop 1 $ pstateAcceptableNextTokens s})
+
+-- | Check if a "TokenKind" (or end of file) is one of the first possible tokens
+-- in a "GrammarRule".
+willSucceedIn :: Maybe TokenKind -> GrammarRule -> Bool
+_ `willSucceedIn` Succeed = True
+k `willSucceedIn` grammarRule = k `TokenSet.member` getValidFirstTokens grammarRule
+
+-- | Find the "TokenSet" of "TokenKind"s that the "GrammarRule" would succeed on
+-- as its first input.
+getValidFirstTokens :: GrammarRule -> TokenSet
+getValidFirstTokens grammarRule = fst $ go grammarRule TokenSet.empty
+  where
+    -- Returns (firstTokens, can succeed with no input?)
+    go :: GrammarRule -> TokenSet -> (TokenSet, Bool)
+    go Succeed eset = (eset, True)
+    go (Token tkind _) eset = (tkind `TokenSet.insertKind` eset, False)
+    go EOF eset = (TokenSet.insertEof eset, False)
+    go (Group _ p) eset = go p eset
+    go (Chain first second) eset =
+      let (eset', firstCanBeEmpty) = go first eset
+       in if firstCanBeEmpty
+            then go second eset'
+            else (eset', False)
+    go (Alternative left right) eset =
+      let (eset', leftCanBeEmpty) = go left eset
+          (eset'', rightCanBeEmpty) = go right eset'
+       in (eset'', leftCanBeEmpty || rightCanBeEmpty)
+
+-- | Check if any following rule can accept the given "TokenKind". If this is
+-- true, then unwinding at the current location will succeed in recovering from
+-- an error.
+canAnyFollowingRuleAcceptTokenKind :: Maybe TokenKind -> ParserM Bool
+canAnyFollowingRuleAcceptTokenKind nextTokenKind = do
+  followingTokens <- gets pstateAcceptableNextTokens
+  return $ any (TokenSet.member nextTokenKind) followingTokens
+
+-- | Check if the next rule can accept the given "TokenKind". If this is true,
+-- then an error can be recovered from at the current context with no unwinding.
+--
+-- Pre-condition: @pstateAcceptableNextTokens@ is not empty.
+canImmediateFollowingRuleAcceptTokenKind :: Maybe TokenKind -> ParserM Bool
+canImmediateFollowingRuleAcceptTokenKind nextTokenKind = do
+  followingTokens <- gets pstateAcceptableNextTokens
+  return $ case followingTokens of
+    [] -> error "canImmediateFollowingRuleAcceptTokenKind called with empty pstateAcceptableNextTokens. This is a parser library error."
+    (nextTokens : _) -> nextTokenKind `TokenSet.member` nextTokens
+
+infix 9 `as`
+
+-- | Matches a single token of the specified "TokenKind" and produce an output
+-- node with the given "SyntaxKind".
+--
+-- __Example:__
+--
+-- >>> parse [T.Token T.Identifier 3] $ T.Identifier `as` S.DeclName
+-- ([],[SyntaxNode {snodeKind = DeclName, snodeWidth = 3, snodeChildren = []}])
+as :: TokenKind -> SyntaxKind -> GrammarRule
+tkind `as` skind = Token tkind (Just skind)
+
+infixr 5 <$$>
+
+-- | @syntaxKind '<$$>' rule@ (pronounced group) runs a grammar rule and places the
+-- nodes that grammar rule adds to the
+-- output in a parent node.
+--
+-- __Example:__
+--
+-- >>> parse [T.Token T.Identifier 3, T.Token T.Int 2] $ S.Decl <$$> identifier <> int
+-- ([],[SyntaxNode {snodeKind = Decl, snodeWidth = 5, snodeChildren = [SyntaxNode {snodeKind = Token Identifier, snodeWidth = 3, snodeChildren = []},SyntaxNode {snodeKind = Token Int, snodeWidth = 2, snodeChildren = []}]}])
+(<$$>) :: SyntaxKind -> GrammarRule -> GrammarRule
+label <$$> grammarRule = Group label grammarRule
+
+-- | Run a grammar rule or consume no input.
+--
+-- __Example:__
+--
+-- >>> parse [T.Token T.Int 1, T.Token T.Int 1] $ int <> perhaps comma <> int
+-- ([],[SyntaxNode {snodeKind = Token Int, snodeWidth = 1, snodeChildren = []},SyntaxNode {snodeKind = Token Int, snodeWidth = 1, snodeChildren = []}])
+perhaps :: GrammarRule -> GrammarRule
+perhaps grammarRule = grammarRule <|> Succeed
+
+infixr 4 <|>
+
+-- | Run a grammar, choosing either the left or the right rule.
+--
+-- __Example:__
+--
+-- >>> let number = int <|> double
+-- >>> parse [T.Token T.Int 2] number
+-- ([],[SyntaxNode (Token T.Int) 2 []])
+-- >>> parse [T.Token  T.Double 3] number
+-- ([],[SyntaxNode (Token T.Double) 2 []])
+(<|>) :: GrammarRule -> GrammarRule -> GrammarRule
+left <|> right = Alternative left right
+
+-- Primitive grammar rules
+
+lparen :: GrammarRule
+lparen = Token T.LParen Nothing
+
+rparen :: GrammarRule
+rparen = Token T.RParen Nothing
+
+lsquare :: GrammarRule
+lsquare = Token T.LSquare Nothing
+
+rsquare :: GrammarRule
+rsquare = Token T.RSquare Nothing
+
+lcurly :: GrammarRule
+lcurly = Token T.LCurly Nothing
+
+rcurly :: GrammarRule
+rcurly = Token T.RCurly Nothing
+
+comma :: GrammarRule
+comma = Token T.Comma Nothing
+
+colon :: GrammarRule
+colon = Token T.Colon Nothing
+
+kwImport :: GrammarRule
+kwImport = Token T.KwImport Nothing
+
+kwFrom :: GrammarRule
+kwFrom = Token T.KwFrom Nothing
+
+-- | Produces a "S.BoolTrue" syntax node
+kwTrue :: GrammarRule
+kwTrue = T.KwTrue `as` S.BoolTrue
+
+-- | Produces a "S.BoolFalse" syntax node
+kwFalse :: GrammarRule
+kwFalse = T.KwFalse `as` S.BoolFalse
+
+-- | Produces a "S.String" syntax node
+string :: GrammarRule
+string = T.String `as` S.String
+
+-- | Produces a "S.Int" syntax node
+int :: GrammarRule
+int = T.Int `as` S.Int
+
+-- | Produces a "S.Double" syntax node
+double :: GrammarRule
+double = T.Double `as` S.Double
+
+lquote :: GrammarRule
+lquote = Token T.LQuote Nothing
+
+rquote :: GrammarRule
+rquote = Token T.RQuote Nothing
+
+quoted :: GrammarRule
+quoted = Token T.Quoted Nothing
+
+-- | Succeeds when the parser is at the end of the file.
+eof :: GrammarRule
+eof = EOF
+
+-- UTILITIES:
+
+tryError :: MonadError e m => m () -> m (Maybe e)
+tryError m = (m >> return Nothing) `catchError` (return . Just)
--- a/waspls/src/Wasp/Backend/ConcreteSyntax.hs
+++ b/waspls/src/Wasp/Backend/ConcreteSyntax.hs
@ -0,0 +1,89 @@
+{-# LANGUAGE DeriveGeneric #-}
+
+module Wasp.Backend.ConcreteSyntax
+  ( -- * Concrete Syntax
+
+    -- | This data structure for storing syntax information is inspired by the
+    -- rust analyzer's parsing library rowan (https://github.com/rust-analyzer/rowan).
+    SyntaxNode (..),
+    SyntaxKind (..),
+    cstPrettyPrint,
+    syntaxKindIsTrivia,
+  )
+where
+
+import Control.DeepSeq (NFData)
+import Data.Aeson (ToJSON)
+import Data.List (foldl', intercalate)
+import GHC.Generics (Generic)
+import Wasp.Backend.Token (TokenKind, tokenKindIsTrivia)
+
+-- | The kind of a "SyntaxNode".
+data SyntaxKind
+  = Token !TokenKind
+  | Program
+  | Decl
+  | DeclType
+  | DeclName
+  | Dict
+  | DictEntry
+  | DictKey
+  | List
+  | Tuple
+  | ExtImport
+  | ExtImportModule
+  | ExtImportField
+  | ExtImportPath
+  | String
+  | Int
+  | Double
+  | BoolTrue
+  | BoolFalse
+  | Var
+  | Quoter
+  | Error
+  deriving (Eq, Ord, Show, Generic)
+
+instance NFData SyntaxKind
+
+instance ToJSON SyntaxKind
+
+-- | A node in a concrete syntax tree. We use such a loosely typed system so
+-- that we can store all pieces of source syntax in the tree, including
+-- comments, whitespace, and unexpected tokens/characters.
+--
+-- Later, this CST is processed into an AST, a more strongly typed format that
+-- leaves out all of the trivia tokens.
+data SyntaxNode = SyntaxNode
+  { snodeKind :: !SyntaxKind,
+    -- | Width of this "SyntaxNode". For nodes with children, this is the sum
+    -- of their children's widths. For nodes without children, this is the
+    -- width of the source text covered by the node.
+    snodeWidth :: !Int,
+    snodeChildren :: [SyntaxNode]
+  }
+  deriving (Eq, Ord, Show, Generic)
+
+instance NFData SyntaxNode
+
+instance ToJSON SyntaxNode
+
+-- | Pretty print a concrete syntax tree. Shows tree of syntax kinds and their
+-- offsets in the source file.
+cstPrettyPrint :: SyntaxNode -> String
+cstPrettyPrint node = go 0 "" node
+  where
+    go :: Int -> String -> SyntaxNode -> String
+    go offset indent pnode =
+      let nodeTxt = indent ++ show (snodeKind pnode) ++ "@" ++ show offset ++ ".." ++ show (offset + snodeWidth pnode)
+          childrenTxt =
+            fst $
+              foldl'
+                (\(strs, o) child -> (strs ++ [go o (indent ++ "  ") child], o + snodeWidth child))
+                ([], offset)
+                (snodeChildren pnode)
+       in intercalate "\n" (nodeTxt : childrenTxt)
+
+syntaxKindIsTrivia :: SyntaxKind -> Bool
+syntaxKindIsTrivia (Token k) = tokenKindIsTrivia k
+syntaxKindIsTrivia _ = False
--- a/waspls/src/Wasp/Backend/Lexer.hs
+++ b/waspls/src/Wasp/Backend/Lexer.hs
@ -0,0 +1,6 @@
+module Wasp.Backend.Lexer
+  ( Wasp.Backend.Lexer.Lexer.lex,
+  )
+where
+
+import Wasp.Backend.Lexer.Lexer (lex)
--- a/waspls/src/Wasp/Backend/Lexer/Internal.hs
+++ b/waspls/src/Wasp/Backend/Lexer/Internal.hs
@ -0,0 +1,142 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE GeneralisedNewtypeDeriving #-}
+
+module Wasp.Backend.Lexer.Internal
+  ( -- * Lexer monad code
+    Lexer,
+    runLexer,
+    initialLexState,
+    LexState,
+    LexerStartCode (..),
+    LexInput (..),
+    getInput,
+    updateInput,
+    setStartCode,
+    getStartCode,
+
+    -- * Alex utilities
+    AlexInput,
+    startCodeToInt,
+    alexGetByte,
+    alexInputPrevChar,
+    beginQuoter,
+    lexQuoterEndTag,
+    createToken,
+  )
+where
+
+import Codec.Binary.UTF8.String (encodeChar)
+import Control.Monad.State.Strict (MonadState, State, evalState, get, modify)
+import Data.Word (Word8)
+import Wasp.Backend.Token (Token (..), TokenKind)
+import qualified Wasp.Backend.Token as T
+
+-- LEXER MONAD CODE
+
+newtype Lexer a = Lexer {unLexer :: State LexState a}
+  deriving (Functor, Applicative, Monad, MonadState LexState)
+
+runLexer :: Lexer a -> LexState -> a
+runLexer lexer state = evalState (unLexer lexer) state
+
+initialLexState :: String -> LexState
+initialLexState source =
+  LexState
+    { lstateInput = LexInput '\n' [] source,
+      lstateStartCode = DefaultStartCode
+    }
+
+data LexState = LexState
+  { lstateInput :: LexInput,
+    lstateStartCode :: LexerStartCode
+  }
+
+getInput :: Lexer LexInput
+getInput = lstateInput <$> get
+
+updateInput :: Int -> Lexer ()
+updateInput !consumed = do
+  (LexInput _ _ remaining) <- getInput
+  let newInput =
+        let (prevChar : remaining') = drop (consumed - 1) remaining
+         in LexInput prevChar [] remaining'
+  modify $ \s -> s {lstateInput = newInput}
+
+getStartCode :: Lexer LexerStartCode
+getStartCode = lstateStartCode <$> get
+
+setStartCode :: LexerStartCode -> Lexer ()
+setStartCode startCode = modify $ \s -> s {lstateStartCode = startCode}
+
+-- | A representation of the lexer's start code: https://www.haskell.org/alex/doc/html/alex-files.html#startcodes
+data LexerStartCode
+  = -- | For a start code @DefaultStartCode@, the lexer is in start code <0>
+    DefaultStartCode
+  | -- | For a start code @QuoterStartCode tag@, the lexer is in start code <quoter> with opening tag @tag@
+    QuoterStartCode String
+  deriving (Show)
+
+-- | The type of the input given to the parser/lexer.
+--
+--   An input @(prevChar, bs, remainingSource)@ represents:
+--   - @prevChar@ The previous character, successfully consumed by the lexer.
+--   - @bs@ The yet unconsumed UTF8 bytes of the current character being lexed.
+--   - @remainingSource@ The remaining source to be lexed and parsed
+--           (including the character currently being lexed as the first char in it).
+data LexInput = LexInput Char [Word8] String
+
+-- ALEX UTILITIES
+
+type AlexInput = LexInput
+
+-- | Convert the ParserState's start code to an int for Alex to use
+startCodeToInt :: Int -> LexerStartCode -> Int
+startCodeToInt _ DefaultStartCode = 0
+startCodeToInt quoter (QuoterStartCode _) = quoter
+
+-- | Required by Alex.
+--
+-- This function is taken from the Alex basic wrapper.
+alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
+alexGetByte (LexInput prevChar (b : bs) remainingSource) = Just (b, LexInput prevChar bs remainingSource)
+alexGetByte (LexInput _ [] []) = Nothing
+alexGetByte (LexInput _ [] (currChar : remainingSource)) = case encodeChar currChar of
+  (b : bs) -> Just (b, LexInput currChar bs remainingSource)
+  [] -> Nothing
+
+-- | Required by Alex.
+--
+-- This function is taken from the Alex basic wrapper.
+alexInputPrevChar :: AlexInput -> Char
+alexInputPrevChar (LexInput prevChar _ _) = prevChar
+
+-- | Takes a lexeme like "{=json" and sets the quoter start code
+beginQuoter :: String -> Lexer Token
+beginQuoter leftQuoteTag = do
+  let tag = drop 2 leftQuoteTag
+  setStartCode (QuoterStartCode tag)
+  createToken T.LQuote leftQuoteTag
+
+-- | Takes a lexeme like "json=}" and either ends a quoter or add quoted text to
+-- the quoter
+lexQuoterEndTag :: String -> Lexer Token
+lexQuoterEndTag rightQuoteTag = do
+  startCode <- getStartCode
+  case startCode of
+    DefaultStartCode -> error "impossible: lexQuoterEndTag with DefaultStartCode"
+    QuoterStartCode startTag | startTag == tag -> do
+      setStartCode DefaultStartCode
+      createToken T.RQuote rightQuoteTag
+    _ -> do
+      createToken T.Quoted rightQuoteTag
+  where
+    tag = take (length rightQuoteTag - 2) rightQuoteTag
+
+-- | Makes an action that creates a token from a "TokenKind"
+createToken :: TokenKind -> (String -> Lexer Token)
+createToken kind lexeme =
+  return $
+    Token
+      { tokenKind = kind,
+        tokenWidth = length lexeme
+      }
--- a/waspls/src/Wasp/Backend/Lexer/Lexer.x
+++ b/waspls/src/Wasp/Backend/Lexer/Lexer.x
@ -0,0 +1,108 @@
+{
+-- This file is processed by Alex (https://www.haskell.org/alex/) and generates
+-- the module `Wasp.Backend.Lexer.Lexer`
+
+module Wasp.Backend.Lexer.Lexer
+  ( Wasp.Backend.Lexer.Lexer.lex
+  ) where
+
+import Wasp.Backend.Lexer.Internal
+import Wasp.Backend.Token (Token)
+import qualified Wasp.Backend.Token as T
+}
+
+-- Character set aliases
+$space = [\ \t\f\v\r] -- Non-newline whitespace
+$digit = 0-9
+$alpha = [a-zA-Z]
+$identstart = [_$alpha]
+$ident = [_$alpha$digit]
+$any = [.$white]
+
+-- Regular expression aliases
+-- matches string-literal on a single line, from https://stackoverflow.com/a/9260547/3902376
+@string = \"([^\\\"]|\\.)*\"
+@double = "-"? $digit+ "." $digit+
+@integer = "-"? $digit+
+@ident = $identstart $ident* "'"*
+@linecomment = "//" [^\n\r]*
+-- Based on https://stackoverflow.com/a/16165598/1509394 .
+@blockcomment = "/*" (("*"[^\/]) | [^\*] | $white)* "*/"
+
+tokens :-
+
+<0>       $space+ { createToken T.White }
+<0>       \n { createToken T.Newline } 
+<0>       @linecomment { createToken T.Comment }
+<0>       @blockcomment { createToken T.Comment }
+
+-- Quoter rules:
+-- Uses Alex start codes to lex quoted characters with different rules:
+-- - On "{=tag", enter <quoter> start code and make a TLQuote token
+-- - While in <quoter>, if "tag=}" is seen
+--   - If this closing tag matches the opening, enter <0> and make a TRQuote token
+--   - Otherwise, stay in <quoter> and make a TQuoted token
+-- - Otherwise, take one character at a time and make a TQuoted token
+<0>       "{=" @ident { beginQuoter }
+<quoter>  @ident "=}" { lexQuoterEndTag }
+<quoter>  $any { createToken T.Quoted }
+
+-- Simple tokens
+<0>       "(" { createToken T.LParen }
+<0>       ")" { createToken T.RParen }
+<0>       "[" { createToken T.LSquare }
+<0>       "]" { createToken T.RSquare }
+<0>       "{" { createToken T.LCurly }
+<0>       "}" { createToken T.RCurly }
+<0>       "," { createToken T.Comma }
+<0>       ":" { createToken T.Colon }
+<0>       "import" { createToken T.KwImport }
+<0>       "from" { createToken T.KwFrom }
+<0>       "true" { createToken T.KwTrue }
+<0>       "false" { createToken T.KwFalse }
+
+-- Strings, numbers, identifiers
+<0>       @string { createToken T.String }
+<0>       @double { createToken T.Double }
+<0>       @integer { createToken T.Int }
+<0>       @ident { createToken T.Identifier }
+
+{
+-- | Lexes a single token from the input, returning "Nothing" if the lexer has
+-- reached the end of the input. This function uses a continuation passing style
+-- so that this function and its consumer can benefit from tail call
+-- optimization.
+--
+-- This function internally calls `alexScan`, which is a function generated by
+-- Alex responsible for doing actual lexing/scanning.
+lexOne :: (Maybe Token -> Lexer a) -> Lexer a
+lexOne continue = do
+  input@(LexInput _ _ remaining) <- getInput
+  startCodeInt <- startCodeToInt quoter <$> getStartCode
+  case alexScan input startCodeInt of
+    AlexError (LexInput _ _ (c:_)) -> do
+      token <- createToken T.Error [c]
+      updateInput 1
+      continue (Just token)
+    AlexError (LexInput c _ []) -> do
+      token <- createToken T.Error [c]
+      updateInput 1
+      continue (Just token)
+    AlexSkip _ _ ->
+      error "AlexSkip is impossible: lexer should not skip any input"
+    AlexToken _ numChars makeToken -> do
+      let lexeme = take numChars remaining
+      token <- makeToken lexeme
+      updateInput numChars
+      continue (Just token)
+    AlexEOF -> continue Nothing
+
+-- | @lex source@ lexes all of @source@ into "Token"s.
+lex :: String -> [Token]
+lex source = runLexer (lexOne continue) $ initialLexState source
+  where
+    continue :: Maybe Token -> Lexer [Token]
+    continue Nothing = return []
+    -- This is written awkwardly like this so it is a tail call to @lexOne@
+    continue (Just tok) = (tok:) <$> lexOne continue
+}
--- a/waspls/src/Wasp/Backend/ParseError.hs
+++ b/waspls/src/Wasp/Backend/ParseError.hs
@ -0,0 +1,111 @@
+{-# LANGUAGE DeriveGeneric #-}
+
+module Wasp.Backend.ParseError
+  ( -- * Parse error
+    ParseError (..),
+
+    -- * Display functions
+    showError,
+    showErrorMessage,
+    errorRegion,
+
+    -- * Source positions
+    Region (..),
+    SourcePos (..),
+    offsetToSourcePos,
+  )
+where
+
+import Control.DeepSeq (NFData)
+import Data.List (intercalate)
+import GHC.Generics (Generic)
+import Wasp.Backend.Token (TokenKind)
+import qualified Wasp.Backend.Token as T
+import Wasp.Backend.TokenSet (TokenSet)
+import qualified Wasp.Backend.TokenSet as TokenSet
+
+data ParseError
+  = UnexpectedToken !Region !TokenKind TokenSet
+  | UnexpectedEOF !Int TokenSet
+  deriving (Eq, Ord, Show, Generic)
+
+instance NFData ParseError
+
+data Region = Region !Int !Int deriving (Eq, Ord, Show, Generic)
+
+instance NFData Region
+
+data SourcePos = SourcePos !Int !Int deriving (Eq, Ord, Show, Generic)
+
+instance NFData SourcePos
+
+offsetToSourcePos :: String -> Int -> SourcePos
+offsetToSourcePos source targetOffset = reach 0 (SourcePos 1 1) source
+  where
+    reach :: Int -> SourcePos -> String -> SourcePos
+    reach o (SourcePos l c) remaining
+      | o == targetOffset = SourcePos l c
+      | [] <- remaining = SourcePos l c
+      | ('\n' : remaining') <- remaining =
+        let sp' = SourcePos (l + 1) 1
+         in reach (o + 1) sp' remaining'
+      | (_ : remaining') <- remaining =
+        let sp' = SourcePos l (c + 1)
+         in reach (o + 1) sp' remaining'
+
+showError :: String -> ParseError -> String
+showError source msg =
+  let (Region so eo) = errorRegion msg
+      start = offsetToSourcePos source so
+      end = offsetToSourcePos source eo
+   in "Parse error at " ++ showRegion start end ++ " (" ++ show so ++ ".." ++ show eo ++ ")\n  " ++ showErrorMessage msg
+
+errorRegion :: ParseError -> Region
+errorRegion (UnexpectedEOF o _) = Region o o
+errorRegion (UnexpectedToken rgn _ _) = rgn
+
+showErrorMessage :: ParseError -> String
+showErrorMessage (UnexpectedEOF _ expecteds) =
+  "Unexpected end of file, " ++ showExpected expecteds
+showErrorMessage (UnexpectedToken _ actual expecteds) =
+  "Unexpected token " ++ showTokenKind actual ++ ", " ++ showExpected expecteds
+
+showExpected :: TokenSet -> String
+showExpected expecteds = "expected one of " ++ showExpecteds expecteds
+
+showExpecteds :: TokenSet -> String
+showExpecteds expecteds =
+  let kindStrs = map showTokenKind $ TokenSet.toList expecteds
+      eofStrs = if TokenSet.eofMember expecteds then ["<eof>"] else []
+   in intercalate "," (kindStrs ++ eofStrs)
+
+showTokenKind :: TokenKind -> String
+showTokenKind T.White = "<whitespace>"
+showTokenKind T.Newline = "\\n"
+showTokenKind T.Comment = "<comment>"
+showTokenKind T.LParen = "'('"
+showTokenKind T.RParen = "')'"
+showTokenKind T.LSquare = "'['"
+showTokenKind T.RSquare = "']'"
+showTokenKind T.LCurly = "'{'"
+showTokenKind T.RCurly = "'}'"
+showTokenKind T.Comma = "','"
+showTokenKind T.Colon = "':'"
+showTokenKind T.KwImport = "'import'"
+showTokenKind T.KwFrom = "'from'"
+showTokenKind T.KwTrue = "'true'"
+showTokenKind T.KwFalse = "'false'"
+showTokenKind T.String = "<string>"
+showTokenKind T.Int = "<number>"
+showTokenKind T.Double = "<number>"
+showTokenKind T.LQuote = "'{='"
+showTokenKind T.RQuote = "'=}'"
+showTokenKind T.Quoted = "<any>" -- Should be impossible, hard to prove though
+showTokenKind T.Identifier = "<identifier>"
+showTokenKind T.Error = "<error>"
+
+showRegion :: SourcePos -> SourcePos -> String
+showRegion start@(SourcePos sl sc) end@(SourcePos el ec)
+  | start == end = show sl ++ ":" ++ show sc
+  | sl == el = show sl ++ ":" ++ show sc ++ "-" ++ show ec
+  | otherwise = show sl ++ ":" ++ show sc ++ "-" ++ show el ++ ":" ++ show ec
--- a/waspls/src/Wasp/Backend/Token.hs
+++ b/waspls/src/Wasp/Backend/Token.hs
@ -0,0 +1,81 @@
+{-# LANGUAGE DeriveGeneric #-}
+
+module Wasp.Backend.Token
+  ( Token (..),
+    TokenKind (..),
+    tokenKindIsTrivia,
+  )
+where
+
+import Control.DeepSeq (NFData)
+import Data.Aeson (ToJSON)
+import GHC.Generics (Generic)
+
+-- | The kind of token
+--
+-- This makes no distinction between value-containing tokens and atomic tokens
+-- (i.e. "String" and "KwImport") to make kind comparison easier. To recover the
+-- value associated with a token, you need the context provided by a "Token".
+data TokenKind
+  = White
+  | -- | Newlines (only \n) are separated from whitespace to accomodate line-break
+    -- based error recovery, if that is ever implemented in the future
+    Newline
+  | Comment
+  | LParen
+  | RParen
+  | LSquare
+  | RSquare
+  | LCurly
+  | RCurly
+  | Comma
+  | Colon
+  | KwImport
+  | KwFrom
+  | KwTrue
+  | KwFalse
+  | String
+  | Int
+  | Double
+  | -- | "{= <identifier>"
+    LQuote
+  | -- | "<identifier> =}"
+    RQuote
+  | Quoted
+  | -- | Non-keyword identifier
+    Identifier
+  | -- | Kind for unexpected characters
+    Error
+  deriving (Eq, Ord, Show, Generic)
+
+instance NFData TokenKind
+
+instance ToJSON TokenKind
+
+-- | A token representing a span of text from the source. For value-containing
+-- tokens, the value can be recovered using @tokenText@.
+data Token = Token
+  { tokenKind :: !TokenKind,
+    -- | The width of the text representing this token. The source position is
+    -- not stored; only the width. Later, offsets into the source is computed
+    -- from an entire tree of tokens, and source position is determined on
+    -- demand (for example, when an offset is going to be displayerd in an error
+    -- message).
+    --
+    -- This is guaranteed to be equivalent to @length . tokenText@, but is
+    -- stored explicitly since it is accessed frequently.
+    tokenWidth :: !Int
+  }
+  deriving (Eq, Show, Ord, Generic)
+
+instance NFData Token
+
+instance ToJSON Token
+
+-- | Check if a "TokenKind" is trivia (a token kind that does not affect the
+-- parse structure, namely whitespace and comments)
+tokenKindIsTrivia :: TokenKind -> Bool
+tokenKindIsTrivia White = True
+tokenKindIsTrivia Newline = True
+tokenKindIsTrivia Comment = True
+tokenKindIsTrivia _ = False
--- a/waspls/src/Wasp/Backend/TokenSet.hs
+++ b/waspls/src/Wasp/Backend/TokenSet.hs
@ -0,0 +1,110 @@
+{-# LANGUAGE DeriveGeneric #-}
+
+module Wasp.Backend.TokenSet
+  ( -- * TokenSet
+
+    -- | A set of "TokenKind"s and possibly EOF (which is not a "TokenKind").
+    --
+    -- In functions on a "TokenSet", a @Maybe "TokenKind"@ is either @Nothing@,
+    -- representing EOF, or @Just k@, representing the kind @k@.
+    TokenSet,
+
+    -- * Membership predicates
+    member,
+    eofMember,
+    kindMember,
+
+    -- * Operations
+    insert,
+    insertKind,
+    insertEof,
+    union,
+    intersection,
+
+    -- * Constructors
+    empty,
+    singleton,
+    fromEOF,
+    fromKind,
+    fromList,
+
+    -- * Destructors
+    toList,
+  )
+where
+
+import Control.DeepSeq (NFData)
+import Data.Set (Set)
+import qualified Data.Set as Set
+import GHC.Generics (Generic)
+import Wasp.Backend.Token (TokenKind)
+
+data TokenSet = TokenSet
+  { -- | Check if EOF is part of the "TokenSet"
+    eofMember :: !Bool,
+    kindSet :: Set TokenKind
+  }
+  deriving (Eq, Ord, Show, Generic)
+
+instance NFData TokenSet
+
+-- | Check if a "TokenKind" or EOF is part of the "TokenSet".
+member :: Maybe TokenKind -> TokenSet -> Bool
+member Nothing set = eofMember set
+member (Just k) set = kindMember k set
+
+-- | Check if a "TokenKind" is part of the "TokenSet".
+kindMember :: TokenKind -> TokenSet -> Bool
+kindMember k set = k `Set.member` kindSet set
+
+-- | Insert a "TokenKind" or EOF into a "TokenSet".
+insert :: Maybe TokenKind -> TokenSet -> TokenSet
+insert Nothing set = insertEof set
+insert (Just k) set = insertKind k set
+
+-- | Insert EOF into a "TokenSet".
+insertEof :: TokenSet -> TokenSet
+insertEof set = set {eofMember = True}
+
+-- | Insert a "TokenKind" into a "TokenSet".
+insertKind :: TokenKind -> TokenSet -> TokenSet
+insertKind k set = set {kindSet = Set.insert k (kindSet set)}
+
+-- | Get the union of two "TokenSet"s.
+union :: TokenSet -> TokenSet -> TokenSet
+union left right =
+  let unionEofMember = eofMember left || eofMember right
+      unionKindSet = kindSet left `Set.union` kindSet right
+   in TokenSet {eofMember = unionEofMember, kindSet = unionKindSet}
+
+-- | Get the intersection of two "TokenSet"s.
+intersection :: TokenSet -> TokenSet -> TokenSet
+intersection left right =
+  let intersectionEofMember = eofMember left && eofMember right
+      intersectionKindSet = kindSet left `Set.intersection` kindSet right
+   in TokenSet {eofMember = intersectionEofMember, kindSet = intersectionKindSet}
+
+-- | The empty "TokenSet".
+empty :: TokenSet
+empty = TokenSet {eofMember = False, kindSet = Set.empty}
+
+-- | Create a "TokenSet" containing a single "TokenKind" or EOF.
+singleton :: Maybe TokenKind -> TokenSet
+singleton Nothing = fromEOF
+singleton (Just k) = fromKind k
+
+-- | Create a "TokenSet" containing just EOF.
+fromEOF :: TokenSet
+fromEOF = TokenSet {eofMember = True, kindSet = Set.empty}
+
+-- | Create a "TokenSet" containing a single "TokenKind".
+fromKind :: TokenKind -> TokenSet
+fromKind k = TokenSet {eofMember = False, kindSet = Set.singleton k}
+
+-- | Create a "TokenSet" from a list of "TokenKind"s.
+fromList :: [TokenKind] -> TokenSet
+fromList ks = TokenSet {eofMember = False, kindSet = Set.fromList ks}
+
+-- | Get a list of all "TokenKind"s in a "TokenSet".
+toList :: TokenSet -> [TokenKind]
+toList set = Set.toList (kindSet set)
--- a/waspls/test/TastyDiscoverDriver.hs
+++ b/waspls/test/TastyDiscoverDriver.hs
@ -0,0 +1,5 @@
+{-# OPTIONS_GHC -F -pgmF tasty-discover -optF --modules=*Test.hs #-}
+
+-- -optF --modules=*Test.hs tells tasty-discover to pick up only files that match *Test.hs.
+--   I did not do this for any strong reason so we can remove it in the future if we figure out
+--   it is too restrictive.
--- a/waspls/test/TestUtil.hs
+++ b/waspls/test/TestUtil.hs
@ -0,0 +1,39 @@
+module TestUtil
+  ( printDiff,
+    shouldBeWithDiff,
+    Diffable (toLines),
+  )
+where
+
+import Data.Algorithm.Diff (Diff, PolyDiff (Both, First, Second), getDiff)
+import Data.List (intercalate)
+import Test.Tasty.Hspec (Expectation, expectationFailure)
+
+shouldBeWithDiff :: (Eq a, Diffable a) => a -> a -> Expectation
+shouldBeWithDiff actual expected
+  | actual == expected = pure ()
+  | otherwise =
+    expectationFailure $
+      "Actual is not expected\n"
+        ++ printDiff (getDiff (toLines expected) (toLines actual))
+        ++ "\ESC[31m"
+
+printDiff :: [Diff String] -> String
+printDiff diffs = intercalate "\n" $ map printDiffLine diffs
+  where
+    printDiffLine :: Diff String -> String
+    printDiffLine (First s) = "  \ESC[31m- " ++ s
+    printDiffLine (Second s) = "  \ESC[36m+ " ++ s
+    printDiffLine (Both s _) = "  \ESC[0m  " ++ s
+
+class Diffable a where
+  toLines :: a -> [String]
+
+instance Diffable Char where
+  toLines c = [[c]]
+
+instance Diffable a => Diffable [a] where
+  toLines xs = concatMap toLines xs
+
+instance (Diffable a, Diffable b) => Diffable (a, b) where
+  toLines (a, b) = toLines a ++ toLines b
--- a/waspls/test/Wasp/Backend/ConcreteParserTest.hs
+++ b/waspls/test/Wasp/Backend/ConcreteParserTest.hs
@ -0,0 +1,271 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module Wasp.Backend.ConcreteParserTest where
+
+import Control.DeepSeq (deepseq)
+import Test.Tasty.Hspec (Spec, describe, it)
+import Test.Tasty.QuickCheck
+import TestUtil
+import Wasp.Backend.ConcreteParser
+import Wasp.Backend.ConcreteSyntax
+import qualified Wasp.Backend.Lexer as L
+import Wasp.Backend.ParseError
+import qualified Wasp.Backend.Token as T
+import qualified Wasp.Backend.TokenSet as TokenSet
+
+token :: T.TokenKind -> String -> T.Token
+token kind text = T.Token {T.tokenKind = kind, T.tokenWidth = length text}
+
+node :: SyntaxKind -> Int -> [SyntaxNode] -> SyntaxNode
+node kind width children = SyntaxNode {snodeKind = kind, snodeWidth = width, snodeChildren = children}
+
+spec_ParseCSTExpression :: Spec
+spec_ParseCSTExpression = do
+  it "Parses identifiers" $ do
+    parseCSTExpression [token T.Identifier "foo"]
+      `shouldBeWithDiff` ([], [node Var 3 []])
+  it "Parses double literals" $ do
+    parseCSTExpression [token T.Double "0.5"]
+      `shouldBeWithDiff` ([], [node Double 3 []])
+  it "Parses int literals" $ do
+    parseCSTExpression [token T.Int "0.5"]
+      `shouldBeWithDiff` ([], [node Int 3 []])
+  it "Parses true literals" $ do
+    parseCSTExpression [token T.KwTrue "true"]
+      `shouldBeWithDiff` ([], [node BoolTrue 4 []])
+  it "Parses false literals" $ do
+    parseCSTExpression [token T.KwFalse "false"]
+      `shouldBeWithDiff` ([], [node BoolFalse 5 []])
+  it "Parses external imports" $ do
+    parseCSTExpression [token T.KwImport "import", token T.Identifier "main", token T.KwFrom "from", token T.String "\"@ext/main.js\""]
+      `shouldBeWithDiff` ( [],
+                           [ node
+                               ExtImport
+                               28
+                               [ node (Token T.KwImport) 6 [],
+                                 node ExtImportModule 4 [],
+                                 node (Token T.KwFrom) 4 [],
+                                 node ExtImportPath 14 []
+                               ]
+                           ]
+                         )
+  it "Parses quoted expressions" $ do
+    parseCSTExpression [token T.LQuote "{=txt", token T.Quoted "h", token T.Quoted "e", token T.Quoted "llo", token T.RQuote "txt=}"]
+      `shouldBeWithDiff` ( [],
+                           [ node
+                               Quoter
+                               15
+                               [ node (Token T.LQuote) 5 [],
+                                 node (Token T.Quoted) 1 [],
+                                 node (Token T.Quoted) 1 [],
+                                 node (Token T.Quoted) 3 [],
+                                 node (Token T.RQuote) 5 []
+                               ]
+                           ]
+                         )
+
+  describe "Lists" $ do
+    it "Parses empty lists" $ do
+      parseCSTExpression [token T.LSquare "[", token T.RSquare "]"]
+        `shouldBeWithDiff` ([], [node List 2 [node (Token T.LSquare) 1 [], node (Token T.RSquare) 1 []]])
+    it "Parses list with items" $ do
+      parseCSTExpression [token T.LSquare "[", token T.Identifier "foo", token T.Comma ",", token T.Int "3", token T.RSquare "]"]
+        `shouldBeWithDiff` ( [],
+                             [ node
+                                 List
+                                 7
+                                 [ node (Token T.LSquare) 1 [],
+                                   node Var 3 [],
+                                   node (Token T.Comma) 1 [],
+                                   node Int 1 [],
+                                   node (Token T.RSquare) 1 []
+                                 ]
+                             ]
+                           )
+    it "Parses lists with a trailing comma" $ do
+      parseCSTExpression [token T.LSquare "[", token T.Int "1", token T.Comma ",", token T.RSquare "]"]
+        `shouldBeWithDiff` ( [],
+                             [ node
+                                 List
+                                 4
+                                 [ node (Token T.LSquare) 1 [],
+                                   node Int 1 [],
+                                   node (Token T.Comma) 1 [],
+                                   node (Token T.RSquare) 1 []
+                                 ]
+                             ]
+                           )
+
+spec_ConcreteParser :: Spec
+spec_ConcreteParser =
+  describe "Wasp.Backend.ConcreteParser" $ do
+    it "works on a simple example" $ do
+      let tokens =
+            [ token T.Identifier "app",
+              token T.White " ",
+              token T.Identifier "Test",
+              token T.White " ",
+              token T.LCurly "{",
+              token T.Newline "\n",
+              token T.White "  ",
+              token T.Comment "// this is the title of the app",
+              token T.Newline "\n",
+              token T.White "  ",
+              token T.Identifier "title",
+              token T.Colon ":",
+              token T.String "\"hello world\"",
+              token T.Newline "\n",
+              token T.RCurly "}"
+            ]
+      let errors = []
+      let tree =
+            node
+              Program
+              68
+              [ node
+                  Decl
+                  68
+                  [ node DeclType 3 [],
+                    node (Token T.White) 1 [],
+                    node DeclName 4 [],
+                    node (Token T.White) 1 [],
+                    node
+                      Dict
+                      59
+                      [ node (Token T.LCurly) 1 [],
+                        node (Token T.Newline) 1 [],
+                        node (Token T.White) 2 [],
+                        node (Token T.Comment) 31 [],
+                        node (Token T.Newline) 1 [],
+                        node (Token T.White) 2 [],
+                        node
+                          DictEntry
+                          19
+                          [ node DictKey 5 [],
+                            node (Token T.Colon) 1 [],
+                            node String 13 []
+                          ],
+                        node (Token T.Newline) 1 [],
+                        node (Token T.RCurly) 1 []
+                      ]
+                  ]
+              ]
+      let (actualErrors, actualTrees) = parseCST tokens
+      actualTrees `shouldBeWithDiff` [tree]
+      actualErrors `shouldBeWithDiff` errors
+
+    it "works for incomplete source" $ do
+      let tokens =
+            [ token T.Identifier "app",
+              token T.Identifier "Test",
+              token T.LCurly "{",
+              token T.Identifier "title",
+              token T.Colon ":"
+            ]
+      let errors = [UnexpectedEOF 14 $ TokenSet.fromList [T.LParen, T.LCurly, T.LSquare, T.KwImport, T.KwTrue, T.KwFalse, T.String, T.Double, T.Int, T.Identifier, T.LQuote]]
+      let tree =
+            node
+              Program
+              14
+              [ node
+                  Decl
+                  14
+                  [ node DeclType 3 [],
+                    node DeclName 4 [],
+                    node
+                      Dict
+                      7
+                      [ node (Token T.LCurly) 1 [],
+                        node
+                          DictEntry
+                          6
+                          [ node DictKey 5 [],
+                            node (Token T.Colon) 1 [],
+                            node Error 0 []
+                          ]
+                      ]
+                  ]
+              ]
+      parseCST tokens `shouldBeWithDiff` (errors, [tree])
+
+    it "works when incomplete with closing bracket" $ do
+      let tokens =
+            [ token T.Identifier "route",
+              token T.White " ",
+              token T.Identifier "TestRoute",
+              token T.White " ",
+              token T.LCurly "{",
+              token T.Newline "\n",
+              token T.White "  ",
+              token T.Identifier "path",
+              token T.Colon ":",
+              token T.White " ",
+              token T.String "\"/\"",
+              token T.Comma ",",
+              token T.Newline "\n",
+              token T.White "  ",
+              token T.Identifier "to",
+              token T.Colon ":",
+              token T.White " ",
+              token T.Newline "\n",
+              token T.RCurly "}"
+            ]
+      let errors =
+            [ UnexpectedToken (Region 38 39) T.RCurly $ TokenSet.fromList [T.LParen, T.LCurly, T.LSquare, T.KwImport, T.KwTrue, T.KwFalse, T.String, T.Double, T.Int, T.Identifier, T.LQuote]
+            ]
+      let tree =
+            node
+              Program
+              39
+              [ node
+                  Decl
+                  39
+                  [ node DeclType 5 [],
+                    node (Token T.White) 1 [],
+                    node DeclName 9 [],
+                    node (Token T.White) 1 [],
+                    node
+                      Dict
+                      23
+                      [ node (Token T.LCurly) 1 [],
+                        node (Token T.Newline) 1 [],
+                        node (Token T.White) 2 [],
+                        node
+                          DictEntry
+                          9
+                          [ node DictKey 4 [],
+                            node (Token T.Colon) 1 [],
+                            node (Token T.White) 1 [],
+                            node String 3 []
+                          ],
+                        node (Token T.Comma) 1 [],
+                        node (Token T.Newline) 1 [],
+                        node (Token T.White) 2 [],
+                        node
+                          DictEntry
+                          5
+                          [ node DictKey 2 [],
+                            node (Token T.Colon) 1 [],
+                            node (Token T.White) 1 [],
+                            node (Token T.Newline) 1 [],
+                            node Error 0 []
+                          ],
+                        node (Token T.RCurly) 1 []
+                      ]
+                  ]
+              ]
+      parseCST tokens `shouldBeWithDiff` (errors, [tree])
+
+    -- DEPENDS ON: LexerTest#never fails to lex
+    -- TODO: remove dependency by making arbitrary instance for Token
+    --
+    -- The point of this test is to ensure some sort of parse tree is always built
+    it "never fails to parse" $
+      property $ \source -> parseCST (L.lex source) `deepseq` True
+
+instance Diffable SyntaxNode where
+  toLines n = lines $ cstPrettyPrint n
+
+instance Diffable ParseError where
+  toLines err = ["At " ++ show (errorRegion err), "  " ++ showErrorMessage err]
--- a/waspls/test/Wasp/Backend/LexerTest.hs
+++ b/waspls/test/Wasp/Backend/LexerTest.hs
@ -0,0 +1,66 @@
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module Wasp.Backend.LexerTest where
+
+import Control.DeepSeq (deepseq)
+import Data.List (isInfixOf)
+import Test.Tasty.Hspec
+import Test.Tasty.QuickCheck
+import TestUtil
+import qualified Wasp.Backend.Lexer as L
+import Wasp.Backend.Token
+
+token :: TokenKind -> String -> Token
+token kind text = Token {tokenKind = kind, tokenWidth = length text}
+
+spec_Lexer :: Spec
+spec_Lexer = do
+  it "works for a simple example" $ do
+    let source = "app Test { name: \"hello world\" }\n\n\n  // this is a comment\nimport  from"
+    let expected =
+          [ token Identifier "app",
+            token White " ",
+            token Identifier "Test",
+            token White " ",
+            token LCurly "{",
+            token White " ",
+            token Identifier "name",
+            token Colon ":",
+            token White " ",
+            token String "\"hello world\"",
+            token White " ",
+            token RCurly "}",
+            token Newline "\n",
+            token Newline "\n",
+            token Newline "\n",
+            token White "  ",
+            token Comment "// this is a comment",
+            token Newline "\n",
+            token KwImport "import",
+            token White "  ",
+            token KwFrom "from"
+          ]
+    L.lex source `shouldBeWithDiff` expected
+
+  it "only makes Quoted tokens after an LQuote and before an RQuote" $ do
+    property $ \inner ->
+      if "txt=}" `isInfixOf` inner
+        then discard
+        else case L.lex $ "{=txt " ++ inner ++ " txt=}" of
+          [] -> False
+          open : rest
+            | tokenKind open /= LQuote -> False
+            | otherwise ->
+              all ((== Quoted) . tokenKind) (init rest)
+                && (tokenKind (last rest) == RQuote)
+
+  it "never fails to lex" $ do
+    -- @deepseq@ is used to force evaluation of the tokens, making sure an
+    -- error never occurs (there are calls to @error@ in the lexer that we
+    -- need to make sure never actually happen)
+    property $ \source -> L.lex source `deepseq` True
+
+instance Diffable Token where
+  toLines tok =
+    [ show (tokenKind tok) ++ "[" ++ show (tokenWidth tok) ++ "]"
+    ]
--- a/waspls/waspls.cabal
+++ b/waspls/waspls.cabal
@ -42,14 +42,26 @@ library
  import:           common-all
  exposed-modules:
    Wasp.LSP.Server
-    Wasp.LSP.Core
-    Wasp.LSP.Handlers
+    Wasp.Backend.Token
+    Wasp.Backend.Lexer
+    Wasp.Backend.TokenSet
+    Wasp.Backend.ParseError
+    Wasp.Backend.ConcreteSyntax
+    Wasp.Backend.ConcreteParser
+    Wasp.Backend.ConcreteParser.Internal
  other-modules:
    Paths_waspls
+    Wasp.LSP.Core
+    Wasp.LSP.Handlers
+    Wasp.Backend.Lexer.Internal
+    Wasp.Backend.Lexer.Lexer
  hs-source-dirs:
    src
+  build-tool-depends:
+    , alex:alex
  build-depends:
    , base ^>=4.14.3.0
+    , waspc
    , lsp ^>=1.4.0.0
    , lsp-types ^>=1.4.0.1
    , containers ^>=0.6.5.1
@ -60,7 +72,34 @@ library
    , hslogger ^>=1.3.1.0
    , aeson ^>=1.5.6
    , lens ^>=5.1
-    , waspc
+    , aeson-pretty          ^>= 0.8
+    , utf8-string           ^>= 1.0.2
+    -- 'array' is used by code generated by Alex for src/Analyzer/Parser/Lexer.x
+    , array                 ^>= 0.5.4
+    , deepseq
+
+test-suite waspls-test
+  import:           common-all, common-exe
+  type:             exitcode-stdio-1.0
+  hs-source-dirs:   test
+  main-is:          TastyDiscoverDriver.hs
+  build-tool-depends:
+    , tasty-discover:tasty-discover
+  build-depends:
+    , base
+    , waspls
+    , deepseq
+    , containers            ^>= 0.6.5
+    , Diff                  ^>= 0.4.1
+    , QuickCheck            ^>= 2.14
+    , tasty                 ^>= 1.4.2
+    -- tasty-hspec 1.1.7 introduces breaking changes, which is why we have < 1.1.7 .
+    , tasty-hspec           >= 1.1 && < 1.1.7
+    , tasty-quickcheck      ^>= 0.10
+  other-modules:
+    TestUtil
+    Wasp.Backend.LexerTest
+    Wasp.Backend.ConcreteParserTest

 executable waspls
  import:           common-all, common-exe