Improved parse errors (added expected tokens, nicer error messages).

- Added expected tokens.
- Nice error messages + source position.
This commit is contained in:
Martin Sosic 2021-12-09 14:23:27 +01:00 committed by Martin Šošić
parent 470d4a5889
commit c7ce883b5a
10 changed files with 280 additions and 69 deletions

View File

@ -110,29 +110,31 @@ module Wasp.Analyzer
-- * API
analyze,
E.takeDecls,
takeDecls,
AnalyzeError (..),
getErrorMessage,
getErrorSourcePosition,
SourcePosition (..),
)
where
import Control.Arrow (left)
import Control.Monad ((>=>))
import Wasp.Analyzer.Evaluator (Decl)
import qualified Wasp.Analyzer.Evaluator as E
import qualified Wasp.Analyzer.Parser as P
import Wasp.Analyzer.AnalyzeError
( AnalyzeError (..),
SourcePosition (..),
getErrorMessage,
getErrorSourcePosition,
)
import Wasp.Analyzer.Evaluator (Decl, evaluate, takeDecls)
import Wasp.Analyzer.Parser (parse)
import Wasp.Analyzer.StdTypeDefinitions (stdTypes)
import qualified Wasp.Analyzer.TypeChecker as T
data AnalyzeError
= ParseError P.ParseError
| TypeError T.TypeError
| EvaluationError E.EvaluationError
deriving (Show, Eq)
import Wasp.Analyzer.TypeChecker (typeCheck)
-- | Takes a Wasp source file and produces a list of declarations or a
-- description of an error in the source file.
analyze :: String -> Either AnalyzeError [Decl]
analyze =
(left ParseError . P.parse)
>=> (left TypeError . T.typeCheck stdTypes)
>=> (left EvaluationError . E.evaluate stdTypes)
(left ParseError . parse)
>=> (left TypeError . typeCheck stdTypes)
>=> (left EvaluationError . evaluate stdTypes)

View File

@ -0,0 +1,29 @@
module Wasp.Analyzer.AnalyzeError
( AnalyzeError (..),
getErrorMessage,
getErrorSourcePosition,
SourcePosition (..),
)
where
import qualified Wasp.Analyzer.Evaluator.EvaluationError as EE
import Wasp.Analyzer.Parser (SourcePosition (..))
import qualified Wasp.Analyzer.Parser.ParseError as PE
import qualified Wasp.Analyzer.TypeChecker.TypeError as TE
import Wasp.Util (indent)
data AnalyzeError
= ParseError PE.ParseError
| TypeError TE.TypeError
| EvaluationError EE.EvaluationError
deriving (Show, Eq)
getErrorMessage :: AnalyzeError -> String
getErrorMessage (ParseError e) = "Parse error:\n" ++ indent 2 (PE.getErrorMessage e)
getErrorMessage (TypeError e) = "Type error:\n" ++ error "TODO"
getErrorMessage (EvaluationError e) = "Evaluation error:\n" ++ error "TODO"
getErrorSourcePosition :: AnalyzeError -> SourcePosition
getErrorSourcePosition (ParseError e) = PE.getSourcePosition e
getErrorSourcePosition (TypeError e) = error "TODO"
getErrorSourcePosition (EvaluationError e) = error "TODO"

View File

@ -81,17 +81,19 @@ startCodeToInt (QuoterStartCode _) = quoter
--
-- This function is taken from the Alex basic wrapper.
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte (c, (b:bs), s) = Just (b, (c, bs, s))
alexGetByte (_, [], []) = Nothing
alexGetByte (_, [], (c:s)) = case encodeChar c of
(b:bs) -> Just (b, (c, bs, s))
[] -> Nothing
alexGetByte (prevChar, (currChar, (b:bs)), remainingSource) =
Just (b, (prevChar, (currChar, bs), remainingSource))
alexGetByte (_, (_, []), []) = Nothing
alexGetByte (_, (currChar, []), (newChar:remainingSource)) =
case encodeChar newChar of
(b:bs) -> Just (b, (currChar, (newChar, bs), remainingSource))
[] -> Nothing
-- | Required by Alex.
--
-- This function is taken from the Alex basic wrapper.
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (c, _, _) = c
alexInputPrevChar (prevChar, _, _) = prevChar
-- | Lexes a single token from the input.
--
@ -105,14 +107,19 @@ alexInputPrevChar (c, _, _) = c
-- This function internally calls `alexScan`, which is a function generated by Alex responsible for doing actual lexing/scanning.
lexer :: (Token -> Parser a) -> Parser a
lexer parseToken = do
input@(previousChar, _, remainingSource) <- gets parserRemainingInput
input@(_, _, remainingSource) <- gets parserRemainingInput
startCodeInt <- gets $ startCodeToInt . parserLexerStartCode
case alexScan input startCodeInt of
AlexEOF -> do
createConstToken TEOF "" >>= parseToken
AlexError _ -> do
AlexError _input'@(_, _, c:_) -> do
-- NOTE(martin): @_input'@ is actually the same as @input@ before the scan,
-- that is how AlexError works -> it returns last AlexInput before Alex
-- failed. Therefore, the character it failed on is actually the first
-- character of the remaining source.
pos <- gets parserSourcePosition
throwError $ UnexpectedChar previousChar pos
throwError $ UnexpectedChar c pos
AlexError (_, _, []) -> error "impossible"
AlexSkip input' numCharsSkipped -> do
updatePosition $ take numCharsSkipped remainingSource
putInput input'

View File

@ -56,14 +56,17 @@ initialState :: String -> ParserState
initialState source =
ParserState
{ parserSourcePosition = SourcePosition 1 1,
parserRemainingInput = ('\n', [], source),
-- NOTE: We use '\n' here as dummy value to start with.
parserRemainingInput = ('\n', ('\n', []), source),
parserLexerStartCode = DefaultStartCode
}
-- | The type of the input given to the parser/lexer
--
-- An input @(c, bs, str)@ represents
-- - @c@ The previous character consumed by the lexer
-- - @bs@ The UTF8 bytes of the current character being lexed
-- - @str@ The remaining input to be lexed and parsed
type ParserInput = (Char, [Word8], String)
-- An input @(prevChar, (currChar, bs), remainingSource)@ represents
-- - @prevChar@ The previous character, successfully consumed by the lexer
-- - @currChar@ The current character being lexed
-- - @bs@ The yet unconsumed UTF8 bytes of the current character being lexed
-- - @remainingSource@ The remaining source to be lexed and parsed
-- (excluding the character currently being lexed)
type ParserInput = (Char, (Char, [Word8]), String)

View File

@ -1,11 +1,46 @@
module Wasp.Analyzer.Parser.ParseError where
{-# LANGUAGE NamedFieldPuns #-}
module Wasp.Analyzer.Parser.ParseError
( ParseError (..),
getErrorMessage,
getSourcePosition,
)
where
import Wasp.Analyzer.Parser.Token
data ParseError
= -- | A lexical error representing an invalid character
= -- | A lexical error representing an invalid character. It means that lexer
-- failed to construct/parse a token due to this unexpected character.
UnexpectedChar Char SourcePosition
| -- | A parse error caused by some token
ParseError Token
| QuoterDifferentTags (String, SourcePosition) (String, SourcePosition)
| -- | In @ParseError token expectedTokens@, @token@ is the token where parse error
-- occured, while @expectedTokens@ is a list of tokens that would (any of them)
-- avoid that error if they were there instead of the @token@.
-- NOTE(martin): These @expectedTokens@ are represented via the names used for them
-- in the grammar defined in Parser.y, under section @%token@ (names are in the
-- first column), that have been a bit prettyfied (check Parser.y for details).
UnexpectedToken Token [String]
| -- | Thrown if parser encounters a quoter that has different tags, e.g.
-- {=json psl=}. Then the first String in QuoterDifferentTags will be "json"
-- while the second one will be "psl".
QuoterDifferentTags (String, SourcePosition) (String, SourcePosition)
deriving (Eq, Show)
getErrorMessage :: ParseError -> String
getErrorMessage (UnexpectedChar unexpectedChar _) =
"Unexpected character: " ++ [unexpectedChar]
getErrorMessage (UnexpectedToken unexpectedToken expectedTokens) =
unexpectedTokenMessage
++ if not (null expectedTokens) then "\n" ++ expectedTokensMessage else ""
where
unexpectedTokenMessage = "Unexpected token: " ++ tokenLexeme unexpectedToken
expectedTokensMessage =
"Expected one of the following tokens instead: "
++ unwords expectedTokens
getErrorMessage (QuoterDifferentTags (ltag, _) (rtag, _)) =
"Quoter tags don't match: {=" ++ ltag ++ " ... " ++ rtag ++ "=}"
getSourcePosition :: ParseError -> SourcePosition
getSourcePosition (UnexpectedChar _ pos) = pos
getSourcePosition (UnexpectedToken Token {tokenPosition} _) = tokenPosition
getSourcePosition (QuoterDifferentTags _ (_, rpos)) = rpos

View File

@ -1,4 +1,6 @@
{
{-# LANGUAGE LambdaCase #-}
-- This file is processed by Happy (https://www.haskell.org/happy/) and generates
-- the module `Wasp.Analyzer.Parser.Parser`
@ -18,10 +20,12 @@ import Control.Monad.Except (throwError)
-- Lines below tell Happy:
-- - to name the main parsing function `parse` when generating it
-- - that input to parser is `Token` type
-- - to call`parseError` when the parser encounters an error
-- - to call `parseError` when the parser encounters an error
-- - to provide `parseError` with list of expected tokens that would avoid the error
%name parse
%tokentype { Token }
%error { parseError }
%errorhandlertype explist
-- This sets up Happy to use a monadic parser and threaded lexer.
-- This means that parser generated by Happy will request tokens from lexer as it needs them instead of
@ -33,28 +37,27 @@ import Control.Monad.Except (throwError)
-- This section defines the names that are used in the grammar section to
-- refer to each type of token.
-- NOTE: If you update it, also update the @prettyShowGrammarToken@ function below.
%token
'(' { Token { tokenType = TLParen } }
')' { Token { tokenType = TRParen } }
'[' { Token { tokenType = TLSquare } }
']' { Token { tokenType = TRSquare } }
'{' { Token { tokenType = TLCurly } }
'}' { Token { tokenType = TRCurly } }
',' { Token { tokenType = TComma } }
':' { Token { tokenType = TColon } }
import { Token { tokenType = TImport } }
from { Token { tokenType = TFrom } }
true { Token { tokenType = TTrue } }
false { Token { tokenType = TFalse } }
string { Token { tokenType = TString $$ } }
int { Token { tokenType = TInt $$ } }
double { Token { tokenType = TDouble $$ } }
'{=' { Token { tokenType = TLQuote $$ } }
quoted { Token { tokenType = TQuoted $$ } }
'=}' { Token { tokenType = TRQuote $$ } }
ident { Token { tokenType = TIdentifier $$ } }
'(' { Token { tokenType = TLParen } }
')' { Token { tokenType = TRParen } }
'[' { Token { tokenType = TLSquare } }
']' { Token { tokenType = TRSquare } }
'{' { Token { tokenType = TLCurly } }
'}' { Token { tokenType = TRCurly } }
',' { Token { tokenType = TComma } }
':' { Token { tokenType = TColon } }
import { Token { tokenType = TImport } }
from { Token { tokenType = TFrom } }
true { Token { tokenType = TTrue } }
false { Token { tokenType = TFalse } }
string { Token { tokenType = TString $$ } }
int { Token { tokenType = TInt $$ } }
double { Token { tokenType = TDouble $$ } }
'{=' { Token { tokenType = TLQuote $$ } }
quoted { Token { tokenType = TQuoted $$ } }
'=}' { Token { tokenType = TRQuote $$ } }
identifier { Token { tokenType = TIdentifier $$ } }
%%
-- Grammar rules
@ -66,7 +69,7 @@ Wasp :: { AST }
Stmt :: { Stmt }
: Decl { $1 }
Decl :: { Stmt }
: ident ident Expr { Decl $1 $2 $3 }
: identifier identifier Expr { Decl $1 $2 $3 }
Expr :: { Expr }
: Dict { $1 }
@ -79,7 +82,7 @@ Expr :: { Expr }
| double { DoubleLiteral $1 }
| true { BoolLiteral True }
| false { BoolLiteral False }
| ident { Var $1 }
| identifier { Var $1 }
Dict :: { Expr }
: '{' DictEntries '}' { Dict $2 }
@ -89,7 +92,7 @@ DictEntries :: { [(Identifier, Expr)] }
: DictEntry { [$1] }
| DictEntries ',' DictEntry { $1 ++ [$3] }
DictEntry :: { (Identifier, Expr) }
: ident ':' Expr { ($1, $3) }
: identifier ':' Expr { ($1, $3) }
List :: { Expr }
: '[' ListVals ']' { List $2 }
@ -113,8 +116,8 @@ TupleVals :: { (Expr, Expr, [Expr]) }
Extimport :: { Expr }
: import Name from string { ExtImport $2 $4 }
Name :: { ExtImportName }
: ident { ExtImportModule $1 }
| '{' ident '}' { ExtImportField $2 }
: identifier { ExtImportModule $1 }
| '{' identifier '}' { ExtImportField $2 }
Quoter :: { Expr }
: SourcePosition '{=' Quoted SourcePosition '=}' {% if $2 /= $5
@ -129,6 +132,29 @@ SourcePosition :: { SourcePosition }
: {- empty -} {% fmap parserSourcePosition get }
{
parseError :: Token -> Parser a
parseError token = throwError $ ParseError token
parseError :: (Token, [String]) -> Parser a
parseError (token, expectedTokens) =
throwError $ UnexpectedToken token $ prettyShowGrammarToken <$> expectedTokens
-- Input is grammar token name, as defined in %tokens section above (first column),
-- while output is nicer representation of it, ready to be shown around,
-- e.g. in error messages.
prettyShowGrammarToken :: String -> String
prettyShowGrammarToken = \case
"'('" -> "("
"')'" -> ")"
"'['" -> "["
"']'" -> "]"
"'{'" -> "{"
"'}'" -> "}"
"','" -> ","
"':'" -> ":"
"string" -> "<string>"
"int" -> "<int>"
"double" -> "<double>"
"'{='" -> "{=<identifier>"
"quoted" -> "<quoted>"
"'=}'" -> "<identifier>=}"
"identifier" -> "<identifier>"
s -> s
}

View File

@ -5,12 +5,15 @@ module Wasp.Util
toUpperFirst,
headSafe,
jsonSet,
indent,
)
where
import qualified Data.Aeson as Aeson
import Data.Char (isUpper, toLower, toUpper)
import qualified Data.HashMap.Strict as M
import Data.List (intercalate)
import Data.List.Split (splitOn)
import qualified Data.Text as Text
camelToKebabCase :: String -> String
@ -43,3 +46,6 @@ headSafe xs = Just (head xs)
jsonSet :: Text.Text -> Aeson.Value -> Aeson.Value -> Aeson.Value
jsonSet key value (Aeson.Object o) = Aeson.Object $ M.insert key value o
jsonSet _ _ _ = error "Input JSON must be an object"
indent :: Int -> String -> String
indent numSpaces = intercalate "\n" . map (replicate numSpaces ' ' ++) . splitOn "\n"

View File

@ -0,0 +1,45 @@
module Analyzer.Parser.ParseErrorTest where
import Test.Tasty.Hspec
import Wasp.Analyzer.Parser.ParseError
import Wasp.Analyzer.Parser.Token
spec_ParseErrorTest :: Spec
spec_ParseErrorTest = do
describe "Analyzer.Parser.ParseError" $ do
let unexpectedCharError = UnexpectedChar '!' (SourcePosition 2 42)
unexpectedTokenErrorNoSuggestions =
UnexpectedToken (Token TLCurly (SourcePosition 2 3) "{") []
unexpectedTokenErrorWithSuggestions =
UnexpectedToken
(Token TRCurly (SourcePosition 100 18) "}")
["<identifier>", ","]
quoterDifferentTagsError =
QuoterDifferentTags
("foo", SourcePosition 1 5)
("bar", SourcePosition 1 20)
describe "getErrorMessage returns human readable error message" $ do
it "for UnexpectedChar error" $ do
getErrorMessage unexpectedCharError `shouldBe` "Unexpected character: !"
it "for UnexpectedToken error" $ do
getErrorMessage unexpectedTokenErrorNoSuggestions
`shouldBe` "Unexpected token: {"
getErrorMessage unexpectedTokenErrorWithSuggestions
`shouldBe` ( "Unexpected token: }\n"
++ "Expected one of the following tokens instead: <identifier> ,"
)
it "for QuoterDifferentTags error" $ do
getErrorMessage quoterDifferentTagsError
`shouldBe` "Quoter tags don't match: {=foo ... bar=}"
describe "getSourcePosition returns correct position" $ do
it "for UnexpectedChar error" $ do
getSourcePosition unexpectedCharError `shouldBe` SourcePosition 2 42
it "for UnexpectedToken error" $ do
getSourcePosition unexpectedTokenErrorNoSuggestions
`shouldBe` SourcePosition 2 3
getSourcePosition unexpectedTokenErrorWithSuggestions
`shouldBe` SourcePosition 100 18
it "for QuoterDifferentTags error" $ do
getSourcePosition quoterDifferentTagsError
`shouldBe` SourcePosition 1 20

View File

@ -192,12 +192,14 @@ spec_Parser = do
let source = "test Decl {"
let expected =
Left $
ParseError $
Token
{ tokenType = TEOF,
tokenPosition = SourcePosition 1 12,
tokenLexeme = ""
}
UnexpectedToken
( Token
{ tokenType = TEOF,
tokenPosition = SourcePosition 1 12,
tokenLexeme = ""
}
)
["}", "<identifier>"]
parse source `shouldBe` expected
it "Parses multiple statements" $ do
@ -212,3 +214,49 @@ spec_Parser = do
Decl "constant" "E" $ DoubleLiteral 2.71828
]
parse source `shouldBe` Right ast
describe "Fails with UnexpectedChar error if unrecognized character is encountered" $ do
it "e.g. when it encounters '^' after declaration name" $ do
let source = "test Decl ^ {}"
let expected = Left $ UnexpectedChar '^' $ SourcePosition 1 11
parse source `shouldBe` expected
it "e.g. when the identifier contains '!'" $ do
let source = "test De!cl {}"
let expected = Left $ UnexpectedChar '!' $ SourcePosition 1 8
parse source `shouldBe` expected
describe "Fails with ParseError error if unexpected token is encountered" $ do
it "When string follows identifier" $ do
let source = "test \"Declaration\" {}"
let expected =
Left $
UnexpectedToken
( Token
{ tokenType = TString "Declaration",
tokenPosition = SourcePosition 1 6,
tokenLexeme = "\"Declaration\""
}
)
["<identifier>"]
parse source `shouldBe` expected
it "When dictionary is missing a comma between the two fields" $ do
let source =
unlines
[ "test Declaration {",
" a: 1",
" b: 2 ",
"}"
]
let expected =
Left $
UnexpectedToken
( Token
{ tokenType = TIdentifier "b",
tokenPosition = SourcePosition 3 3,
tokenLexeme = "b"
}
)
["}", ","]
parse source `shouldBe` expected

View File

@ -61,3 +61,13 @@ spec_jsonSet = do
[ "prop1" .= newStrValue
]
jsonSet "prop1" (toJSON newStrValue) inputObj `shouldBe` expectedObj
spec_indent :: Spec
spec_indent = do
describe "indent should indent given text correctly" $ do
it "when just one line of text" $ do
indent 2 "foo" `shouldBe` " foo"
it "when multiple lines of text" $ do
indent 3 "foo\nbar" `shouldBe` " foo\n bar"
it "when text is already somewhat indented" $ do
indent 4 " foo\n bar" `shouldBe` " foo\n bar"