mirror of
https://github.com/wasp-lang/wasp.git
synced 2024-10-27 01:21:18 +03:00
Improved parse errors (added expected tokens, nicer error messages).
- Added expected tokens. - Nice error messages + source position.
This commit is contained in:
parent
470d4a5889
commit
c7ce883b5a
@ -110,29 +110,31 @@ module Wasp.Analyzer
|
||||
|
||||
-- * API
|
||||
analyze,
|
||||
E.takeDecls,
|
||||
takeDecls,
|
||||
AnalyzeError (..),
|
||||
getErrorMessage,
|
||||
getErrorSourcePosition,
|
||||
SourcePosition (..),
|
||||
)
|
||||
where
|
||||
|
||||
import Control.Arrow (left)
|
||||
import Control.Monad ((>=>))
|
||||
import Wasp.Analyzer.Evaluator (Decl)
|
||||
import qualified Wasp.Analyzer.Evaluator as E
|
||||
import qualified Wasp.Analyzer.Parser as P
|
||||
import Wasp.Analyzer.AnalyzeError
|
||||
( AnalyzeError (..),
|
||||
SourcePosition (..),
|
||||
getErrorMessage,
|
||||
getErrorSourcePosition,
|
||||
)
|
||||
import Wasp.Analyzer.Evaluator (Decl, evaluate, takeDecls)
|
||||
import Wasp.Analyzer.Parser (parse)
|
||||
import Wasp.Analyzer.StdTypeDefinitions (stdTypes)
|
||||
import qualified Wasp.Analyzer.TypeChecker as T
|
||||
|
||||
data AnalyzeError
|
||||
= ParseError P.ParseError
|
||||
| TypeError T.TypeError
|
||||
| EvaluationError E.EvaluationError
|
||||
deriving (Show, Eq)
|
||||
import Wasp.Analyzer.TypeChecker (typeCheck)
|
||||
|
||||
-- | Takes a Wasp source file and produces a list of declarations or a
|
||||
-- description of an error in the source file.
|
||||
analyze :: String -> Either AnalyzeError [Decl]
|
||||
analyze =
|
||||
(left ParseError . P.parse)
|
||||
>=> (left TypeError . T.typeCheck stdTypes)
|
||||
>=> (left EvaluationError . E.evaluate stdTypes)
|
||||
(left ParseError . parse)
|
||||
>=> (left TypeError . typeCheck stdTypes)
|
||||
>=> (left EvaluationError . evaluate stdTypes)
|
||||
|
29
waspc/src/Wasp/Analyzer/AnalyzeError.hs
Normal file
29
waspc/src/Wasp/Analyzer/AnalyzeError.hs
Normal file
@ -0,0 +1,29 @@
|
||||
module Wasp.Analyzer.AnalyzeError
|
||||
( AnalyzeError (..),
|
||||
getErrorMessage,
|
||||
getErrorSourcePosition,
|
||||
SourcePosition (..),
|
||||
)
|
||||
where
|
||||
|
||||
import qualified Wasp.Analyzer.Evaluator.EvaluationError as EE
|
||||
import Wasp.Analyzer.Parser (SourcePosition (..))
|
||||
import qualified Wasp.Analyzer.Parser.ParseError as PE
|
||||
import qualified Wasp.Analyzer.TypeChecker.TypeError as TE
|
||||
import Wasp.Util (indent)
|
||||
|
||||
data AnalyzeError
|
||||
= ParseError PE.ParseError
|
||||
| TypeError TE.TypeError
|
||||
| EvaluationError EE.EvaluationError
|
||||
deriving (Show, Eq)
|
||||
|
||||
getErrorMessage :: AnalyzeError -> String
|
||||
getErrorMessage (ParseError e) = "Parse error:\n" ++ indent 2 (PE.getErrorMessage e)
|
||||
getErrorMessage (TypeError e) = "Type error:\n" ++ error "TODO"
|
||||
getErrorMessage (EvaluationError e) = "Evaluation error:\n" ++ error "TODO"
|
||||
|
||||
getErrorSourcePosition :: AnalyzeError -> SourcePosition
|
||||
getErrorSourcePosition (ParseError e) = PE.getSourcePosition e
|
||||
getErrorSourcePosition (TypeError e) = error "TODO"
|
||||
getErrorSourcePosition (EvaluationError e) = error "TODO"
|
@ -81,17 +81,19 @@ startCodeToInt (QuoterStartCode _) = quoter
|
||||
--
|
||||
-- This function is taken from the Alex basic wrapper.
|
||||
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
|
||||
alexGetByte (c, (b:bs), s) = Just (b, (c, bs, s))
|
||||
alexGetByte (_, [], []) = Nothing
|
||||
alexGetByte (_, [], (c:s)) = case encodeChar c of
|
||||
(b:bs) -> Just (b, (c, bs, s))
|
||||
alexGetByte (prevChar, (currChar, (b:bs)), remainingSource) =
|
||||
Just (b, (prevChar, (currChar, bs), remainingSource))
|
||||
alexGetByte (_, (_, []), []) = Nothing
|
||||
alexGetByte (_, (currChar, []), (newChar:remainingSource)) =
|
||||
case encodeChar newChar of
|
||||
(b:bs) -> Just (b, (currChar, (newChar, bs), remainingSource))
|
||||
[] -> Nothing
|
||||
|
||||
-- | Required by Alex.
|
||||
--
|
||||
-- This function is taken from the Alex basic wrapper.
|
||||
alexInputPrevChar :: AlexInput -> Char
|
||||
alexInputPrevChar (c, _, _) = c
|
||||
alexInputPrevChar (prevChar, _, _) = prevChar
|
||||
|
||||
-- | Lexes a single token from the input.
|
||||
--
|
||||
@ -105,14 +107,19 @@ alexInputPrevChar (c, _, _) = c
|
||||
-- This function internally calls `alexScan`, which is a function generated by Alex responsible for doing actual lexing/scanning.
|
||||
lexer :: (Token -> Parser a) -> Parser a
|
||||
lexer parseToken = do
|
||||
input@(previousChar, _, remainingSource) <- gets parserRemainingInput
|
||||
input@(_, _, remainingSource) <- gets parserRemainingInput
|
||||
startCodeInt <- gets $ startCodeToInt . parserLexerStartCode
|
||||
case alexScan input startCodeInt of
|
||||
AlexEOF -> do
|
||||
createConstToken TEOF "" >>= parseToken
|
||||
AlexError _ -> do
|
||||
AlexError _input'@(_, _, c:_) -> do
|
||||
-- NOTE(martin): @_input'@ is actually the same as @input@ before the scan,
|
||||
-- that is how AlexError works -> it returns last AlexInput before Alex
|
||||
-- failed. Therefore, the character it failed on is actually the first
|
||||
-- character of the remaining source.
|
||||
pos <- gets parserSourcePosition
|
||||
throwError $ UnexpectedChar previousChar pos
|
||||
throwError $ UnexpectedChar c pos
|
||||
AlexError (_, _, []) -> error "impossible"
|
||||
AlexSkip input' numCharsSkipped -> do
|
||||
updatePosition $ take numCharsSkipped remainingSource
|
||||
putInput input'
|
||||
|
@ -56,14 +56,17 @@ initialState :: String -> ParserState
|
||||
initialState source =
|
||||
ParserState
|
||||
{ parserSourcePosition = SourcePosition 1 1,
|
||||
parserRemainingInput = ('\n', [], source),
|
||||
-- NOTE: We use '\n' here as dummy value to start with.
|
||||
parserRemainingInput = ('\n', ('\n', []), source),
|
||||
parserLexerStartCode = DefaultStartCode
|
||||
}
|
||||
|
||||
-- | The type of the input given to the parser/lexer
|
||||
--
|
||||
-- An input @(c, bs, str)@ represents
|
||||
-- - @c@ The previous character consumed by the lexer
|
||||
-- - @bs@ The UTF8 bytes of the current character being lexed
|
||||
-- - @str@ The remaining input to be lexed and parsed
|
||||
type ParserInput = (Char, [Word8], String)
|
||||
-- An input @(prevChar, (currChar, bs), remainingSource)@ represents
|
||||
-- - @prevChar@ The previous character, successfully consumed by the lexer
|
||||
-- - @currChar@ The current character being lexed
|
||||
-- - @bs@ The yet unconsumed UTF8 bytes of the current character being lexed
|
||||
-- - @remainingSource@ The remaining source to be lexed and parsed
|
||||
-- (excluding the character currently being lexed)
|
||||
type ParserInput = (Char, (Char, [Word8]), String)
|
||||
|
@ -1,11 +1,46 @@
|
||||
module Wasp.Analyzer.Parser.ParseError where
|
||||
{-# LANGUAGE NamedFieldPuns #-}
|
||||
|
||||
module Wasp.Analyzer.Parser.ParseError
|
||||
( ParseError (..),
|
||||
getErrorMessage,
|
||||
getSourcePosition,
|
||||
)
|
||||
where
|
||||
|
||||
import Wasp.Analyzer.Parser.Token
|
||||
|
||||
data ParseError
|
||||
= -- | A lexical error representing an invalid character
|
||||
= -- | A lexical error representing an invalid character. It means that lexer
|
||||
-- failed to construct/parse a token due to this unexpected character.
|
||||
UnexpectedChar Char SourcePosition
|
||||
| -- | A parse error caused by some token
|
||||
ParseError Token
|
||||
| QuoterDifferentTags (String, SourcePosition) (String, SourcePosition)
|
||||
| -- | In @ParseError token expectedTokens@, @token@ is the token where parse error
|
||||
-- occured, while @expectedTokens@ is a list of tokens that would (any of them)
|
||||
-- avoid that error if they were there instead of the @token@.
|
||||
-- NOTE(martin): These @expectedTokens@ are represented via the names used for them
|
||||
-- in the grammar defined in Parser.y, under section @%token@ (names are in the
|
||||
-- first column), that have been a bit prettyfied (check Parser.y for details).
|
||||
UnexpectedToken Token [String]
|
||||
| -- | Thrown if parser encounters a quoter that has different tags, e.g.
|
||||
-- {=json psl=}. Then the first String in QuoterDifferentTags will be "json"
|
||||
-- while the second one will be "psl".
|
||||
QuoterDifferentTags (String, SourcePosition) (String, SourcePosition)
|
||||
deriving (Eq, Show)
|
||||
|
||||
getErrorMessage :: ParseError -> String
|
||||
getErrorMessage (UnexpectedChar unexpectedChar _) =
|
||||
"Unexpected character: " ++ [unexpectedChar]
|
||||
getErrorMessage (UnexpectedToken unexpectedToken expectedTokens) =
|
||||
unexpectedTokenMessage
|
||||
++ if not (null expectedTokens) then "\n" ++ expectedTokensMessage else ""
|
||||
where
|
||||
unexpectedTokenMessage = "Unexpected token: " ++ tokenLexeme unexpectedToken
|
||||
expectedTokensMessage =
|
||||
"Expected one of the following tokens instead: "
|
||||
++ unwords expectedTokens
|
||||
getErrorMessage (QuoterDifferentTags (ltag, _) (rtag, _)) =
|
||||
"Quoter tags don't match: {=" ++ ltag ++ " ... " ++ rtag ++ "=}"
|
||||
|
||||
getSourcePosition :: ParseError -> SourcePosition
|
||||
getSourcePosition (UnexpectedChar _ pos) = pos
|
||||
getSourcePosition (UnexpectedToken Token {tokenPosition} _) = tokenPosition
|
||||
getSourcePosition (QuoterDifferentTags _ (_, rpos)) = rpos
|
||||
|
@ -1,4 +1,6 @@
|
||||
{
|
||||
{-# LANGUAGE LambdaCase #-}
|
||||
|
||||
-- This file is processed by Happy (https://www.haskell.org/happy/) and generates
|
||||
-- the module `Wasp.Analyzer.Parser.Parser`
|
||||
|
||||
@ -18,10 +20,12 @@ import Control.Monad.Except (throwError)
|
||||
-- Lines below tell Happy:
|
||||
-- - to name the main parsing function `parse` when generating it
|
||||
-- - that input to parser is `Token` type
|
||||
-- - to call`parseError` when the parser encounters an error
|
||||
-- - to call `parseError` when the parser encounters an error
|
||||
-- - to provide `parseError` with list of expected tokens that would avoid the error
|
||||
%name parse
|
||||
%tokentype { Token }
|
||||
%error { parseError }
|
||||
%errorhandlertype explist
|
||||
|
||||
-- This sets up Happy to use a monadic parser and threaded lexer.
|
||||
-- This means that parser generated by Happy will request tokens from lexer as it needs them instead of
|
||||
@ -33,8 +37,7 @@ import Control.Monad.Except (throwError)
|
||||
|
||||
-- This section defines the names that are used in the grammar section to
|
||||
-- refer to each type of token.
|
||||
|
||||
|
||||
-- NOTE: If you update it, also update the @prettyShowGrammarToken@ function below.
|
||||
%token
|
||||
'(' { Token { tokenType = TLParen } }
|
||||
')' { Token { tokenType = TRParen } }
|
||||
@ -54,7 +57,7 @@ import Control.Monad.Except (throwError)
|
||||
'{=' { Token { tokenType = TLQuote $$ } }
|
||||
quoted { Token { tokenType = TQuoted $$ } }
|
||||
'=}' { Token { tokenType = TRQuote $$ } }
|
||||
ident { Token { tokenType = TIdentifier $$ } }
|
||||
identifier { Token { tokenType = TIdentifier $$ } }
|
||||
|
||||
%%
|
||||
-- Grammar rules
|
||||
@ -66,7 +69,7 @@ Wasp :: { AST }
|
||||
Stmt :: { Stmt }
|
||||
: Decl { $1 }
|
||||
Decl :: { Stmt }
|
||||
: ident ident Expr { Decl $1 $2 $3 }
|
||||
: identifier identifier Expr { Decl $1 $2 $3 }
|
||||
|
||||
Expr :: { Expr }
|
||||
: Dict { $1 }
|
||||
@ -79,7 +82,7 @@ Expr :: { Expr }
|
||||
| double { DoubleLiteral $1 }
|
||||
| true { BoolLiteral True }
|
||||
| false { BoolLiteral False }
|
||||
| ident { Var $1 }
|
||||
| identifier { Var $1 }
|
||||
|
||||
Dict :: { Expr }
|
||||
: '{' DictEntries '}' { Dict $2 }
|
||||
@ -89,7 +92,7 @@ DictEntries :: { [(Identifier, Expr)] }
|
||||
: DictEntry { [$1] }
|
||||
| DictEntries ',' DictEntry { $1 ++ [$3] }
|
||||
DictEntry :: { (Identifier, Expr) }
|
||||
: ident ':' Expr { ($1, $3) }
|
||||
: identifier ':' Expr { ($1, $3) }
|
||||
|
||||
List :: { Expr }
|
||||
: '[' ListVals ']' { List $2 }
|
||||
@ -113,8 +116,8 @@ TupleVals :: { (Expr, Expr, [Expr]) }
|
||||
Extimport :: { Expr }
|
||||
: import Name from string { ExtImport $2 $4 }
|
||||
Name :: { ExtImportName }
|
||||
: ident { ExtImportModule $1 }
|
||||
| '{' ident '}' { ExtImportField $2 }
|
||||
: identifier { ExtImportModule $1 }
|
||||
| '{' identifier '}' { ExtImportField $2 }
|
||||
|
||||
Quoter :: { Expr }
|
||||
: SourcePosition '{=' Quoted SourcePosition '=}' {% if $2 /= $5
|
||||
@ -129,6 +132,29 @@ SourcePosition :: { SourcePosition }
|
||||
: {- empty -} {% fmap parserSourcePosition get }
|
||||
|
||||
{
|
||||
parseError :: Token -> Parser a
|
||||
parseError token = throwError $ ParseError token
|
||||
parseError :: (Token, [String]) -> Parser a
|
||||
parseError (token, expectedTokens) =
|
||||
throwError $ UnexpectedToken token $ prettyShowGrammarToken <$> expectedTokens
|
||||
|
||||
-- Input is grammar token name, as defined in %tokens section above (first column),
|
||||
-- while output is nicer representation of it, ready to be shown around,
|
||||
-- e.g. in error messages.
|
||||
prettyShowGrammarToken :: String -> String
|
||||
prettyShowGrammarToken = \case
|
||||
"'('" -> "("
|
||||
"')'" -> ")"
|
||||
"'['" -> "["
|
||||
"']'" -> "]"
|
||||
"'{'" -> "{"
|
||||
"'}'" -> "}"
|
||||
"','" -> ","
|
||||
"':'" -> ":"
|
||||
"string" -> "<string>"
|
||||
"int" -> "<int>"
|
||||
"double" -> "<double>"
|
||||
"'{='" -> "{=<identifier>"
|
||||
"quoted" -> "<quoted>"
|
||||
"'=}'" -> "<identifier>=}"
|
||||
"identifier" -> "<identifier>"
|
||||
s -> s
|
||||
}
|
||||
|
@ -5,12 +5,15 @@ module Wasp.Util
|
||||
toUpperFirst,
|
||||
headSafe,
|
||||
jsonSet,
|
||||
indent,
|
||||
)
|
||||
where
|
||||
|
||||
import qualified Data.Aeson as Aeson
|
||||
import Data.Char (isUpper, toLower, toUpper)
|
||||
import qualified Data.HashMap.Strict as M
|
||||
import Data.List (intercalate)
|
||||
import Data.List.Split (splitOn)
|
||||
import qualified Data.Text as Text
|
||||
|
||||
camelToKebabCase :: String -> String
|
||||
@ -43,3 +46,6 @@ headSafe xs = Just (head xs)
|
||||
jsonSet :: Text.Text -> Aeson.Value -> Aeson.Value -> Aeson.Value
|
||||
jsonSet key value (Aeson.Object o) = Aeson.Object $ M.insert key value o
|
||||
jsonSet _ _ _ = error "Input JSON must be an object"
|
||||
|
||||
indent :: Int -> String -> String
|
||||
indent numSpaces = intercalate "\n" . map (replicate numSpaces ' ' ++) . splitOn "\n"
|
||||
|
45
waspc/test/Analyzer/Parser/ParseErrorTest.hs
Normal file
45
waspc/test/Analyzer/Parser/ParseErrorTest.hs
Normal file
@ -0,0 +1,45 @@
|
||||
module Analyzer.Parser.ParseErrorTest where
|
||||
|
||||
import Test.Tasty.Hspec
|
||||
import Wasp.Analyzer.Parser.ParseError
|
||||
import Wasp.Analyzer.Parser.Token
|
||||
|
||||
spec_ParseErrorTest :: Spec
|
||||
spec_ParseErrorTest = do
|
||||
describe "Analyzer.Parser.ParseError" $ do
|
||||
let unexpectedCharError = UnexpectedChar '!' (SourcePosition 2 42)
|
||||
unexpectedTokenErrorNoSuggestions =
|
||||
UnexpectedToken (Token TLCurly (SourcePosition 2 3) "{") []
|
||||
unexpectedTokenErrorWithSuggestions =
|
||||
UnexpectedToken
|
||||
(Token TRCurly (SourcePosition 100 18) "}")
|
||||
["<identifier>", ","]
|
||||
quoterDifferentTagsError =
|
||||
QuoterDifferentTags
|
||||
("foo", SourcePosition 1 5)
|
||||
("bar", SourcePosition 1 20)
|
||||
describe "getErrorMessage returns human readable error message" $ do
|
||||
it "for UnexpectedChar error" $ do
|
||||
getErrorMessage unexpectedCharError `shouldBe` "Unexpected character: !"
|
||||
it "for UnexpectedToken error" $ do
|
||||
getErrorMessage unexpectedTokenErrorNoSuggestions
|
||||
`shouldBe` "Unexpected token: {"
|
||||
getErrorMessage unexpectedTokenErrorWithSuggestions
|
||||
`shouldBe` ( "Unexpected token: }\n"
|
||||
++ "Expected one of the following tokens instead: <identifier> ,"
|
||||
)
|
||||
it "for QuoterDifferentTags error" $ do
|
||||
getErrorMessage quoterDifferentTagsError
|
||||
`shouldBe` "Quoter tags don't match: {=foo ... bar=}"
|
||||
|
||||
describe "getSourcePosition returns correct position" $ do
|
||||
it "for UnexpectedChar error" $ do
|
||||
getSourcePosition unexpectedCharError `shouldBe` SourcePosition 2 42
|
||||
it "for UnexpectedToken error" $ do
|
||||
getSourcePosition unexpectedTokenErrorNoSuggestions
|
||||
`shouldBe` SourcePosition 2 3
|
||||
getSourcePosition unexpectedTokenErrorWithSuggestions
|
||||
`shouldBe` SourcePosition 100 18
|
||||
it "for QuoterDifferentTags error" $ do
|
||||
getSourcePosition quoterDifferentTagsError
|
||||
`shouldBe` SourcePosition 1 20
|
@ -192,12 +192,14 @@ spec_Parser = do
|
||||
let source = "test Decl {"
|
||||
let expected =
|
||||
Left $
|
||||
ParseError $
|
||||
Token
|
||||
UnexpectedToken
|
||||
( Token
|
||||
{ tokenType = TEOF,
|
||||
tokenPosition = SourcePosition 1 12,
|
||||
tokenLexeme = ""
|
||||
}
|
||||
)
|
||||
["}", "<identifier>"]
|
||||
parse source `shouldBe` expected
|
||||
|
||||
it "Parses multiple statements" $ do
|
||||
@ -212,3 +214,49 @@ spec_Parser = do
|
||||
Decl "constant" "E" $ DoubleLiteral 2.71828
|
||||
]
|
||||
parse source `shouldBe` Right ast
|
||||
|
||||
describe "Fails with UnexpectedChar error if unrecognized character is encountered" $ do
|
||||
it "e.g. when it encounters '^' after declaration name" $ do
|
||||
let source = "test Decl ^ {}"
|
||||
let expected = Left $ UnexpectedChar '^' $ SourcePosition 1 11
|
||||
parse source `shouldBe` expected
|
||||
|
||||
it "e.g. when the identifier contains '!'" $ do
|
||||
let source = "test De!cl {}"
|
||||
let expected = Left $ UnexpectedChar '!' $ SourcePosition 1 8
|
||||
parse source `shouldBe` expected
|
||||
|
||||
describe "Fails with ParseError error if unexpected token is encountered" $ do
|
||||
it "When string follows identifier" $ do
|
||||
let source = "test \"Declaration\" {}"
|
||||
let expected =
|
||||
Left $
|
||||
UnexpectedToken
|
||||
( Token
|
||||
{ tokenType = TString "Declaration",
|
||||
tokenPosition = SourcePosition 1 6,
|
||||
tokenLexeme = "\"Declaration\""
|
||||
}
|
||||
)
|
||||
["<identifier>"]
|
||||
parse source `shouldBe` expected
|
||||
|
||||
it "When dictionary is missing a comma between the two fields" $ do
|
||||
let source =
|
||||
unlines
|
||||
[ "test Declaration {",
|
||||
" a: 1",
|
||||
" b: 2 ",
|
||||
"}"
|
||||
]
|
||||
let expected =
|
||||
Left $
|
||||
UnexpectedToken
|
||||
( Token
|
||||
{ tokenType = TIdentifier "b",
|
||||
tokenPosition = SourcePosition 3 3,
|
||||
tokenLexeme = "b"
|
||||
}
|
||||
)
|
||||
["}", ","]
|
||||
parse source `shouldBe` expected
|
||||
|
@ -61,3 +61,13 @@ spec_jsonSet = do
|
||||
[ "prop1" .= newStrValue
|
||||
]
|
||||
jsonSet "prop1" (toJSON newStrValue) inputObj `shouldBe` expectedObj
|
||||
|
||||
spec_indent :: Spec
|
||||
spec_indent = do
|
||||
describe "indent should indent given text correctly" $ do
|
||||
it "when just one line of text" $ do
|
||||
indent 2 "foo" `shouldBe` " foo"
|
||||
it "when multiple lines of text" $ do
|
||||
indent 3 "foo\nbar" `shouldBe` " foo\n bar"
|
||||
it "when text is already somewhat indented" $ do
|
||||
indent 4 " foo\n bar" `shouldBe` " foo\n bar"
|
||||
|
Loading…
Reference in New Issue
Block a user