Improved parse errors (added expected tokens, nicer error messages).

- Added expected tokens. - Nice error messages + source position.
2024-10-26 17:10:02 +03:00 · 2021-12-09 14:23:27 +01:00 · 2021-12-09 14:23:27 +01:00 · c7ce883b5a
commit c7ce883b5a
parent 470d4a5889
10 changed files with 280 additions and 69 deletions
--- a/waspc/src/Wasp/Analyzer.hs
+++ b/waspc/src/Wasp/Analyzer.hs
@ -110,29 +110,31 @@ module Wasp.Analyzer

    -- * API
    analyze,
-    E.takeDecls,
+    takeDecls,
    AnalyzeError (..),
+    getErrorMessage,
+    getErrorSourcePosition,
+    SourcePosition (..),
  )
 where

 import Control.Arrow (left)
 import Control.Monad ((>=>))
-import Wasp.Analyzer.Evaluator (Decl)
-import qualified Wasp.Analyzer.Evaluator as E
-import qualified Wasp.Analyzer.Parser as P
+import Wasp.Analyzer.AnalyzeError
+  ( AnalyzeError (..),
+    SourcePosition (..),
+    getErrorMessage,
+    getErrorSourcePosition,
+  )
+import Wasp.Analyzer.Evaluator (Decl, evaluate, takeDecls)
+import Wasp.Analyzer.Parser (parse)
 import Wasp.Analyzer.StdTypeDefinitions (stdTypes)
-import qualified Wasp.Analyzer.TypeChecker as T
-
-data AnalyzeError
-  = ParseError P.ParseError
-  | TypeError T.TypeError
-  | EvaluationError E.EvaluationError
-  deriving (Show, Eq)
+import Wasp.Analyzer.TypeChecker (typeCheck)

 -- | Takes a Wasp source file and produces a list of declarations or a
 --   description of an error in the source file.
 analyze :: String -> Either AnalyzeError [Decl]
 analyze =
-  (left ParseError . P.parse)
-    >=> (left TypeError . T.typeCheck stdTypes)
-    >=> (left EvaluationError . E.evaluate stdTypes)
+  (left ParseError . parse)
+    >=> (left TypeError . typeCheck stdTypes)
+    >=> (left EvaluationError . evaluate stdTypes)
--- a/waspc/src/Wasp/Analyzer/AnalyzeError.hs
+++ b/waspc/src/Wasp/Analyzer/AnalyzeError.hs
@ -0,0 +1,29 @@
+module Wasp.Analyzer.AnalyzeError
+  ( AnalyzeError (..),
+    getErrorMessage,
+    getErrorSourcePosition,
+    SourcePosition (..),
+  )
+where
+
+import qualified Wasp.Analyzer.Evaluator.EvaluationError as EE
+import Wasp.Analyzer.Parser (SourcePosition (..))
+import qualified Wasp.Analyzer.Parser.ParseError as PE
+import qualified Wasp.Analyzer.TypeChecker.TypeError as TE
+import Wasp.Util (indent)
+
+data AnalyzeError
+  = ParseError PE.ParseError
+  | TypeError TE.TypeError
+  | EvaluationError EE.EvaluationError
+  deriving (Show, Eq)
+
+getErrorMessage :: AnalyzeError -> String
+getErrorMessage (ParseError e) = "Parse error:\n" ++ indent 2 (PE.getErrorMessage e)
+getErrorMessage (TypeError e) = "Type error:\n" ++ error "TODO"
+getErrorMessage (EvaluationError e) = "Evaluation error:\n" ++ error "TODO"
+
+getErrorSourcePosition :: AnalyzeError -> SourcePosition
+getErrorSourcePosition (ParseError e) = PE.getSourcePosition e
+getErrorSourcePosition (TypeError e) = error "TODO"
+getErrorSourcePosition (EvaluationError e) = error "TODO"
--- a/waspc/src/Wasp/Analyzer/Parser/Lexer.x
+++ b/waspc/src/Wasp/Analyzer/Parser/Lexer.x
@ -81,17 +81,19 @@ startCodeToInt (QuoterStartCode _) = quoter
 --
 --   This function is taken from the Alex basic wrapper.
 alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
-alexGetByte (c, (b:bs), s) = Just (b, (c, bs, s))
-alexGetByte (_, [], []) = Nothing
-alexGetByte (_, [], (c:s)) = case encodeChar c of
-                               (b:bs) -> Just (b, (c, bs, s))
-                               [] -> Nothing
+alexGetByte (prevChar, (currChar, (b:bs)), remainingSource) =
+  Just (b, (prevChar, (currChar, bs), remainingSource))
+alexGetByte (_, (_, []), []) = Nothing
+alexGetByte (_, (currChar, []), (newChar:remainingSource)) =
+  case encodeChar newChar of
+    (b:bs) -> Just (b, (currChar, (newChar, bs), remainingSource))
+    [] -> Nothing

 -- | Required by Alex.
 --
 --   This function is taken from the Alex basic wrapper.
 alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (c, _, _) = c
+alexInputPrevChar (prevChar, _, _) = prevChar

 -- | Lexes a single token from the input.
 --
@ -105,14 +107,19 @@ alexInputPrevChar (c, _, _) = c
 --   This function internally calls `alexScan`, which is a function generated by Alex responsible for doing actual lexing/scanning.
 lexer :: (Token -> Parser a) -> Parser a
 lexer parseToken = do
-  input@(previousChar, _, remainingSource) <- gets parserRemainingInput
+  input@(_, _, remainingSource) <- gets parserRemainingInput
  startCodeInt <- gets $ startCodeToInt . parserLexerStartCode
  case alexScan input startCodeInt of
    AlexEOF -> do
      createConstToken TEOF "" >>= parseToken
-    AlexError _ -> do
+    AlexError _input'@(_, _, c:_) -> do
+      -- NOTE(martin): @_input'@ is actually the same as @input@ before the scan,
+      --   that is how AlexError works -> it returns last AlexInput before Alex
+      --   failed. Therefore, the character it failed on is actually the first
+      --   character of the remaining source.
      pos <- gets parserSourcePosition
-      throwError $ UnexpectedChar previousChar pos
+      throwError $ UnexpectedChar c pos
+    AlexError (_, _, []) -> error "impossible"
    AlexSkip input' numCharsSkipped -> do
      updatePosition $ take numCharsSkipped remainingSource
      putInput input'
--- a/waspc/src/Wasp/Analyzer/Parser/Monad.hs
+++ b/waspc/src/Wasp/Analyzer/Parser/Monad.hs
@ -56,14 +56,17 @@ initialState :: String -> ParserState
 initialState source =
  ParserState
    { parserSourcePosition = SourcePosition 1 1,
-      parserRemainingInput = ('\n', [], source),
+      -- NOTE: We use '\n' here as dummy value to start with.
+      parserRemainingInput = ('\n', ('\n', []), source),
      parserLexerStartCode = DefaultStartCode
    }

 -- | The type of the input given to the parser/lexer
 --
--   An input @(c, bs, str)@ represents
--   - @c@ The previous character consumed by the lexer
--   - @bs@ The UTF8 bytes of the current character being lexed
--   - @str@ The remaining input to be lexed and parsed
-type ParserInput = (Char, [Word8], String)
+--   An input @(prevChar, (currChar, bs), remainingSource)@ represents
+--   - @prevChar@ The previous character, successfully consumed by the lexer
+--   - @currChar@ The current character being lexed
+--   - @bs@ The yet unconsumed UTF8 bytes of the current character being lexed
+--   - @remainingSource@ The remaining source to be lexed and parsed
+--           (excluding the character currently being lexed)
+type ParserInput = (Char, (Char, [Word8]), String)
--- a/waspc/src/Wasp/Analyzer/Parser/ParseError.hs
+++ b/waspc/src/Wasp/Analyzer/Parser/ParseError.hs
@ -1,11 +1,46 @@
-module Wasp.Analyzer.Parser.ParseError where
+{-# LANGUAGE NamedFieldPuns #-}
+
+module Wasp.Analyzer.Parser.ParseError
+  ( ParseError (..),
+    getErrorMessage,
+    getSourcePosition,
+  )
+where

 import Wasp.Analyzer.Parser.Token

 data ParseError
-  = -- | A lexical error representing an invalid character
+  = -- | A lexical error representing an invalid character. It means that lexer
+    -- failed to construct/parse a token due to this unexpected character.
    UnexpectedChar Char SourcePosition
-  | -- | A parse error caused by some token
-    ParseError Token
-  | QuoterDifferentTags (String, SourcePosition) (String, SourcePosition)
+  | -- | In @ParseError token expectedTokens@, @token@ is the token where parse error
+    -- occured, while @expectedTokens@ is a list of tokens that would (any of them)
+    -- avoid that error if they were there instead of the @token@.
+    -- NOTE(martin): These @expectedTokens@ are represented via the names used for them
+    --   in the grammar defined in Parser.y, under section @%token@ (names are in the
+    --   first column), that have been a bit prettyfied (check Parser.y for details).
+    UnexpectedToken Token [String]
+  | -- | Thrown if parser encounters a quoter that has different tags, e.g.
+    -- {=json psl=}. Then the first String in QuoterDifferentTags will be "json"
+    -- while the second one will be "psl".
+    QuoterDifferentTags (String, SourcePosition) (String, SourcePosition)
  deriving (Eq, Show)
+
+getErrorMessage :: ParseError -> String
+getErrorMessage (UnexpectedChar unexpectedChar _) =
+  "Unexpected character: " ++ [unexpectedChar]
+getErrorMessage (UnexpectedToken unexpectedToken expectedTokens) =
+  unexpectedTokenMessage
+    ++ if not (null expectedTokens) then "\n" ++ expectedTokensMessage else ""
+  where
+    unexpectedTokenMessage = "Unexpected token: " ++ tokenLexeme unexpectedToken
+    expectedTokensMessage =
+      "Expected one of the following tokens instead: "
+        ++ unwords expectedTokens
+getErrorMessage (QuoterDifferentTags (ltag, _) (rtag, _)) =
+  "Quoter tags don't match: {=" ++ ltag ++ " ... " ++ rtag ++ "=}"
+
+getSourcePosition :: ParseError -> SourcePosition
+getSourcePosition (UnexpectedChar _ pos) = pos
+getSourcePosition (UnexpectedToken Token {tokenPosition} _) = tokenPosition
+getSourcePosition (QuoterDifferentTags _ (_, rpos)) = rpos
--- a/waspc/src/Wasp/Analyzer/Parser/Parser.y
+++ b/waspc/src/Wasp/Analyzer/Parser/Parser.y
@ -1,4 +1,6 @@
 {
+{-# LANGUAGE LambdaCase #-}
+
 -- This file is processed by Happy (https://www.haskell.org/happy/) and generates
 -- the module `Wasp.Analyzer.Parser.Parser`

@ -18,10 +20,12 @@ import Control.Monad.Except (throwError)
 -- Lines below tell Happy:
 --  - to name the main parsing function `parse` when generating it
 --  - that input to parser is `Token` type
--  - to call`parseError` when the parser encounters an error
+--  - to call `parseError` when the parser encounters an error
+--  - to provide `parseError` with list of expected tokens that would avoid the error
 %name parse
 %tokentype { Token }
 %error { parseError }
+%errorhandlertype explist

 -- This sets up Happy to use a monadic parser and threaded lexer.
 -- This means that parser generated by Happy will request tokens from lexer as it needs them instead of
@ -33,28 +37,27 @@ import Control.Monad.Except (throwError)

 -- This section defines the names that are used in the grammar section to
 -- refer to each type of token.
-
-
+-- NOTE: If you update it, also update the @prettyShowGrammarToken@ function below.
 %token
-  '('    { Token { tokenType = TLParen } }
-  ')'    { Token { tokenType = TRParen } }
-  '['    { Token { tokenType = TLSquare } }
-  ']'    { Token { tokenType = TRSquare } }
-  '{'    { Token { tokenType = TLCurly } }
-  '}'    { Token { tokenType = TRCurly } }
-  ','    { Token { tokenType = TComma } }
-  ':'    { Token { tokenType = TColon } }
-  import { Token { tokenType = TImport } }
-  from   { Token { tokenType = TFrom } }
-  true   { Token { tokenType = TTrue } }
-  false  { Token { tokenType = TFalse } }
-  string { Token { tokenType = TString $$ } }
-  int    { Token { tokenType = TInt $$ } }
-  double { Token { tokenType = TDouble $$ } }
-  '{='   { Token { tokenType = TLQuote $$ } }
-  quoted { Token { tokenType = TQuoted $$ } }
-  '=}'   { Token { tokenType =  TRQuote $$ } }
-  ident  { Token { tokenType = TIdentifier $$ } }
+  '('        { Token { tokenType = TLParen } }
+  ')'        { Token { tokenType = TRParen } }
+  '['        { Token { tokenType = TLSquare } }
+  ']'        { Token { tokenType = TRSquare } }
+  '{'        { Token { tokenType = TLCurly } }
+  '}'        { Token { tokenType = TRCurly } }
+  ','        { Token { tokenType = TComma } }
+  ':'        { Token { tokenType = TColon } }
+  import     { Token { tokenType = TImport } }
+  from       { Token { tokenType = TFrom } }
+  true       { Token { tokenType = TTrue } }
+  false      { Token { tokenType = TFalse } }
+  string     { Token { tokenType = TString $$ } }
+  int        { Token { tokenType = TInt $$ } }
+  double     { Token { tokenType = TDouble $$ } }
+  '{='       { Token { tokenType = TLQuote $$ } }
+  quoted     { Token { tokenType = TQuoted $$ } }
+  '=}'       { Token { tokenType =  TRQuote $$ } }
+  identifier { Token { tokenType = TIdentifier $$ } }

 %%
 -- Grammar rules
@ -66,7 +69,7 @@ Wasp :: { AST }
 Stmt :: { Stmt }
  : Decl { $1 }
 Decl :: { Stmt }
-  : ident ident Expr { Decl $1 $2 $3 }
+  : identifier identifier Expr { Decl $1 $2 $3 }

 Expr :: { Expr }
  : Dict { $1 }
@ -79,7 +82,7 @@ Expr :: { Expr }
  | double { DoubleLiteral $1 }
  | true { BoolLiteral True }
  | false { BoolLiteral False }
-  | ident { Var $1 }
+  | identifier { Var $1 }

 Dict :: { Expr }
  : '{' DictEntries '}' { Dict $2 }
@ -89,7 +92,7 @@ DictEntries :: { [(Identifier, Expr)] }
  : DictEntry { [$1] }
  | DictEntries ',' DictEntry { $1 ++ [$3] }
 DictEntry :: { (Identifier, Expr) }
-  : ident ':' Expr { ($1, $3) }
+  : identifier ':' Expr { ($1, $3) }

 List :: { Expr }
  : '[' ListVals ']' { List $2 }
@ -113,8 +116,8 @@ TupleVals :: { (Expr, Expr, [Expr]) }
 Extimport :: { Expr }
  : import Name from string { ExtImport $2 $4 }
 Name :: { ExtImportName }
-  : ident { ExtImportModule $1 }
-  | '{' ident '}' { ExtImportField $2 }
+  : identifier { ExtImportModule $1 }
+  | '{' identifier '}' { ExtImportField $2 }

 Quoter :: { Expr }
  : SourcePosition '{=' Quoted SourcePosition '=}' {% if $2 /= $5
@ -129,6 +132,29 @@ SourcePosition :: { SourcePosition }
  : {- empty -} {% fmap parserSourcePosition get }

 {
-parseError :: Token -> Parser a
-parseError token = throwError $ ParseError token
+parseError :: (Token, [String]) -> Parser a
+parseError (token, expectedTokens) =
+  throwError $ UnexpectedToken token $ prettyShowGrammarToken <$> expectedTokens
+
+-- Input is grammar token name, as defined in %tokens section above (first column),
+-- while output is nicer representation of it, ready to be shown around,
+-- e.g. in error messages.
+prettyShowGrammarToken :: String -> String
+prettyShowGrammarToken = \case
+  "'('" -> "("
+  "')'" -> ")"
+  "'['" -> "["
+  "']'" -> "]"
+  "'{'" -> "{"
+  "'}'" -> "}"
+  "','" -> ","
+  "':'" -> ":"
+  "string" -> "<string>"
+  "int" -> "<int>"
+  "double" -> "<double>"
+  "'{='" -> "{=<identifier>"
+  "quoted" -> "<quoted>"
+  "'=}'" -> "<identifier>=}"
+  "identifier" -> "<identifier>"
+  s -> s
 }
--- a/waspc/src/Wasp/Util.hs
+++ b/waspc/src/Wasp/Util.hs
@ -5,12 +5,15 @@ module Wasp.Util
    toUpperFirst,
    headSafe,
    jsonSet,
+    indent,
  )
 where

 import qualified Data.Aeson as Aeson
 import Data.Char (isUpper, toLower, toUpper)
 import qualified Data.HashMap.Strict as M
+import Data.List (intercalate)
+import Data.List.Split (splitOn)
 import qualified Data.Text as Text

 camelToKebabCase :: String -> String
@ -43,3 +46,6 @@ headSafe xs = Just (head xs)
 jsonSet :: Text.Text -> Aeson.Value -> Aeson.Value -> Aeson.Value
 jsonSet key value (Aeson.Object o) = Aeson.Object $ M.insert key value o
 jsonSet _ _ _ = error "Input JSON must be an object"
+
+indent :: Int -> String -> String
+indent numSpaces = intercalate "\n" . map (replicate numSpaces ' ' ++) . splitOn "\n"
--- a/waspc/test/Analyzer/Parser/ParseErrorTest.hs
+++ b/waspc/test/Analyzer/Parser/ParseErrorTest.hs
@ -0,0 +1,45 @@
+module Analyzer.Parser.ParseErrorTest where
+
+import Test.Tasty.Hspec
+import Wasp.Analyzer.Parser.ParseError
+import Wasp.Analyzer.Parser.Token
+
+spec_ParseErrorTest :: Spec
+spec_ParseErrorTest = do
+  describe "Analyzer.Parser.ParseError" $ do
+    let unexpectedCharError = UnexpectedChar '!' (SourcePosition 2 42)
+        unexpectedTokenErrorNoSuggestions =
+          UnexpectedToken (Token TLCurly (SourcePosition 2 3) "{") []
+        unexpectedTokenErrorWithSuggestions =
+          UnexpectedToken
+            (Token TRCurly (SourcePosition 100 18) "}")
+            ["<identifier>", ","]
+        quoterDifferentTagsError =
+          QuoterDifferentTags
+            ("foo", SourcePosition 1 5)
+            ("bar", SourcePosition 1 20)
+    describe "getErrorMessage returns human readable error message" $ do
+      it "for UnexpectedChar error" $ do
+        getErrorMessage unexpectedCharError `shouldBe` "Unexpected character: !"
+      it "for UnexpectedToken error" $ do
+        getErrorMessage unexpectedTokenErrorNoSuggestions
+          `shouldBe` "Unexpected token: {"
+        getErrorMessage unexpectedTokenErrorWithSuggestions
+          `shouldBe` ( "Unexpected token: }\n"
+                         ++ "Expected one of the following tokens instead: <identifier> ,"
+                     )
+      it "for QuoterDifferentTags error" $ do
+        getErrorMessage quoterDifferentTagsError
+          `shouldBe` "Quoter tags don't match: {=foo ... bar=}"
+
+    describe "getSourcePosition returns correct position" $ do
+      it "for UnexpectedChar error" $ do
+        getSourcePosition unexpectedCharError `shouldBe` SourcePosition 2 42
+      it "for UnexpectedToken error" $ do
+        getSourcePosition unexpectedTokenErrorNoSuggestions
+          `shouldBe` SourcePosition 2 3
+        getSourcePosition unexpectedTokenErrorWithSuggestions
+          `shouldBe` SourcePosition 100 18
+      it "for QuoterDifferentTags error" $ do
+        getSourcePosition quoterDifferentTagsError
+          `shouldBe` SourcePosition 1 20
--- a/waspc/test/Analyzer/ParserTest.hs
+++ b/waspc/test/Analyzer/ParserTest.hs
@ -192,12 +192,14 @@ spec_Parser = do
      let source = "test Decl {"
      let expected =
            Left $
-              ParseError $
-                Token
-                  { tokenType = TEOF,
-                    tokenPosition = SourcePosition 1 12,
-                    tokenLexeme = ""
-                  }
+              UnexpectedToken
+                ( Token
+                    { tokenType = TEOF,
+                      tokenPosition = SourcePosition 1 12,
+                      tokenLexeme = ""
+                    }
+                )
+                ["}", "<identifier>"]
      parse source `shouldBe` expected

    it "Parses multiple statements" $ do
@ -212,3 +214,49 @@ spec_Parser = do
                Decl "constant" "E" $ DoubleLiteral 2.71828
              ]
      parse source `shouldBe` Right ast
+
+    describe "Fails with UnexpectedChar error if unrecognized character is encountered" $ do
+      it "e.g. when it encounters '^' after declaration name" $ do
+        let source = "test Decl ^ {}"
+        let expected = Left $ UnexpectedChar '^' $ SourcePosition 1 11
+        parse source `shouldBe` expected
+
+      it "e.g. when the identifier contains '!'" $ do
+        let source = "test De!cl {}"
+        let expected = Left $ UnexpectedChar '!' $ SourcePosition 1 8
+        parse source `shouldBe` expected
+
+    describe "Fails with ParseError error if unexpected token is encountered" $ do
+      it "When string follows identifier" $ do
+        let source = "test \"Declaration\" {}"
+        let expected =
+              Left $
+                UnexpectedToken
+                  ( Token
+                      { tokenType = TString "Declaration",
+                        tokenPosition = SourcePosition 1 6,
+                        tokenLexeme = "\"Declaration\""
+                      }
+                  )
+                  ["<identifier>"]
+        parse source `shouldBe` expected
+
+      it "When dictionary is missing a comma between the two fields" $ do
+        let source =
+              unlines
+                [ "test Declaration {",
+                  "  a: 1",
+                  "  b: 2 ",
+                  "}"
+                ]
+        let expected =
+              Left $
+                UnexpectedToken
+                  ( Token
+                      { tokenType = TIdentifier "b",
+                        tokenPosition = SourcePosition 3 3,
+                        tokenLexeme = "b"
+                      }
+                  )
+                  ["}", ","]
+        parse source `shouldBe` expected
--- a/waspc/test/UtilTest.hs
+++ b/waspc/test/UtilTest.hs
@ -61,3 +61,13 @@ spec_jsonSet = do
            [ "prop1" .= newStrValue
            ]
    jsonSet "prop1" (toJSON newStrValue) inputObj `shouldBe` expectedObj
+
+spec_indent :: Spec
+spec_indent = do
+  describe "indent should indent given text correctly" $ do
+    it "when just one line of text" $ do
+      indent 2 "foo" `shouldBe` "  foo"
+    it "when multiple lines of text" $ do
+      indent 3 "foo\nbar" `shouldBe` "   foo\n   bar"
+    it "when text is already somewhat indented" $ do
+      indent 4 "  foo\n  bar" `shouldBe` "      foo\n      bar"