diff --git a/idris2api.ipkg b/idris2api.ipkg index b19e21542..169afcc70 100644 --- a/idris2api.ipkg +++ b/idris2api.ipkg @@ -118,6 +118,7 @@ modules = Libraries.Text.Bounded, Libraries.Text.Lexer, Libraries.Text.Lexer.Core, + Libraries.Text.Lexer.Tokenizer, Libraries.Text.Literate, Libraries.Text.Parser, Libraries.Text.Parser.Core, diff --git a/src/Core/Core.idr b/src/Core/Core.idr index 853ad088a..b8c9001b9 100644 --- a/src/Core/Core.idr +++ b/src/Core/Core.idr @@ -133,8 +133,10 @@ data Error : Type where GenericMsg : FC -> String -> Error TTCError : TTCErrorMsg -> Error FileErr : String -> FileError -> Error + LitFail : FC -> Error + LexFail : FC -> String -> Error ParseFail : (Show token, Pretty token) => - FC -> ParseError token -> Error + FC -> String -> List token -> Error ModuleNotFound : FC -> ModuleIdent -> Error CyclicImports : List ModuleIdent -> Error ForceNeeded : Error @@ -295,7 +297,9 @@ Show Error where show (GenericMsg fc str) = show fc ++ ":" ++ str show (TTCError msg) = "Error in TTC file: " ++ show msg show (FileErr fname err) = "File error (" ++ fname ++ "): " ++ show err - show (ParseFail fc err) = "Parse error (" ++ show err ++ ")" + show (LitFail fc) = show fc ++ ":Can't parse literate" + show (LexFail fc err) = show fc ++ ":Lexer error (" ++ show err ++ ")" + show (ParseFail fc err toks) = "Parse error (" ++ show err ++ "): " ++ show toks show (ModuleNotFound fc ns) = show fc ++ ":" ++ show ns ++ " not found" show (CyclicImports ns) @@ -373,7 +377,9 @@ getErrorLoc (BadRunElab loc _ _) = Just loc getErrorLoc (GenericMsg loc _) = Just loc getErrorLoc (TTCError _) = Nothing getErrorLoc (FileErr _ _) = Nothing -getErrorLoc (ParseFail loc _) = Just loc +getErrorLoc (LitFail loc) = Just loc +getErrorLoc (LexFail loc _) = Just loc +getErrorLoc (ParseFail loc _ _) = Just loc getErrorLoc (ModuleNotFound loc _) = Just loc getErrorLoc (CyclicImports _) = Nothing getErrorLoc ForceNeeded = Nothing diff --git a/src/Idris/Error.idr b/src/Idris/Error.idr index 4a5aa703b..3d6f4e20f 100644 --- a/src/Idris/Error.idr +++ b/src/Idris/Error.idr @@ -406,8 +406,12 @@ perror (TTCError msg) <++> parens (pretty "the most likely case is that the ./build directory in your current project contains files from a previous build of idris2 or the idris2 executable is from a different build than the installed .ttc files") perror (FileErr fname err) = pure $ errorDesc (reflow "File error in" <++> pretty fname <++> colon) <++> pretty (show err) -perror (ParseFail _ err) - = pure $ pretty err +perror (LitFail fc) + = pure $ errorDesc (reflow "Can't parse literate.") <+> line <+> !(ploc fc) +perror (LexFail fc msg) + = pure $ errorDesc (pretty msg) <+> line <+> !(ploc fc) +perror (ParseFail fc msg toks) + = pure $ errorDesc (pretty msg) <+> line <+> !(ploc fc) perror (ModuleNotFound fc ns) = pure $ errorDesc ("Module" <++> annotate FileCtxt (pretty ns) <++> reflow "not found") <+> line <+> !(ploc fc) perror (CyclicImports ns) diff --git a/src/Idris/IDEMode/Parser.idr b/src/Idris/IDEMode/Parser.idr index fdbde684d..91a20d1ab 100644 --- a/src/Idris/IDEMode/Parser.idr +++ b/src/Idris/IDEMode/Parser.idr @@ -30,7 +30,7 @@ ideTokens = [(digits, \x => IntegerLit (cast x)), (stringLit, \x => StringLit 0 (fromMaybe "" (escape 0 (stripQuotes x)))), (identAllowDashes, \x => Ident x), - (space, Comment)] + (space, (const Comment))] idelex : String -> Either (Int, Int, String) (List (WithBounds Token)) idelex str @@ -43,7 +43,7 @@ idelex str where notComment : WithBounds Token -> Bool notComment t = case t.val of - Comment _ => False + Comment => False _ => True covering @@ -66,7 +66,7 @@ sexp ideParser : {e : _} -> String -> Grammar Token e ty -> Either (ParseError Token) ty ideParser str p - = do toks <- mapError LexFail $ idelex str + = do toks <- mapError (\err => LexFail (NoRuleApply, err)) $ idelex str parsed <- mapError toGenericParsingError $ parse p toks Right (fst parsed) diff --git a/src/Idris/Package.idr b/src/Idris/Package.idr index 46485399c..e8a3e66c9 100644 --- a/src/Idris/Package.idr +++ b/src/Idris/Package.idr @@ -474,12 +474,6 @@ clean pkg opts -- `opts` is not used but might be in the future delete $ ttFile <.> "ttc" delete $ ttFile <.> "ttm" -getParseErrorLoc : String -> ParseError Token -> FC -getParseErrorLoc fname (ParseFail _ (Just pos) _) = MkFC fname pos pos -getParseErrorLoc fname (LexFail (l, c, _)) = MkFC fname (l, c) (l, c) -getParseErrorLoc fname (LitFail _) = MkFC fname (0, 0) (0, 0) -- TODO: Remove this unused case -getParseErrorLoc fname _ = replFC - -- Just load the given module, if it exists, which will involve building -- it if necessary runRepl : {auto c : Ref Ctxt Defs} -> @@ -506,8 +500,7 @@ parsePkgFile file = do (do desc <- parsePkgDesc file eoi pure desc) - | Left (FileFail err) => throw (FileErr file err) - | Left err => throw (ParseFail (getParseErrorLoc file err) err) + | Left err => throw (fromParseError file err) addFields fs (initPkgDesc pname) processPackage : {auto c : Ref Ctxt Defs} -> @@ -525,8 +518,7 @@ processPackage cmd file opts (do desc <- parsePkgDesc file eoi pure desc) - | Left (FileFail err) => throw (FileErr file err) - | Left err => throw (ParseFail (getParseErrorLoc file err) err) + | Left err => throw (fromParseError file err) pkg <- addFields fs (initPkgDesc pname) maybe (pure ()) setBuildDir (builddir pkg) setOutputDir (outputdir pkg) @@ -628,8 +620,7 @@ findIpkg fname (do desc <- parsePkgDesc ipkgn eoi pure desc) - | Left (FileFail err) => throw (FileErr ipkgn err) - | Left err => throw (ParseFail (getParseErrorLoc ipkgn err) err) + | Left err => throw (fromParseError ipkgn err) pkg <- addFields fs (initPkgDesc pname) maybe (pure ()) setBuildDir (builddir pkg) setOutputDir (outputdir pkg) diff --git a/src/Idris/Parser.idr b/src/Idris/Parser.idr index 5b18edf7c..c81f0e1d5 100644 --- a/src/Idris/Parser.idr +++ b/src/Idris/Parser.idr @@ -390,7 +390,7 @@ mutual pure (PDotted (boundToFC fname b) b.val) <|> do b <- bounds (symbol "`(" *> expr pdef fname indents <* symbol ")") pure (PQuote (boundToFC fname b) b.val) - <|> do b <- bounds (symbol "`{{" *> name <* symbol "}" <* symbol "}") + <|> do b <- bounds (symbol "`{{" *> name <* symbol "}}") pure (PQuoteName (boundToFC fname b) b.val) <|> do b <- bounds (symbol "`[" *> nonEmptyBlock (topDecl fname) <* symbol "]") pure (PQuoteDecl (boundToFC fname b) (collectDefs (concat b.val))) diff --git a/src/Idris/ProcessIdr.idr b/src/Idris/ProcessIdr.idr index 513316d04..1bef7c40e 100644 --- a/src/Idris/ProcessIdr.idr +++ b/src/Idris/ProcessIdr.idr @@ -4,6 +4,7 @@ import Compiler.Inline import Core.Binary import Core.Context +import Core.Core import Core.Context.Log import Core.Directory import Core.Env @@ -26,6 +27,7 @@ import Idris.Syntax import Idris.Pretty import Data.List +import Libraries.Data.String.Extra import Libraries.Data.NameMap import System.File @@ -187,11 +189,13 @@ modTime fname pure (cast t) export -getParseErrorLoc : String -> ParseError Token -> FC -getParseErrorLoc fname (ParseFail _ (Just pos) _) = MkFC fname pos pos -getParseErrorLoc fname (LexFail (l, c, _)) = MkFC fname (l, c) (l, c) -getParseErrorLoc fname (LitFail (MkLitErr l c _)) = MkFC fname (l, 0) (l, 0) -getParseErrorLoc fname _ = replFC +fromParseError : (Show token, Pretty token) => String -> ParseError token -> Error +fromParseError fname (FileFail err) = FileErr fname err +fromParseError fname (LitFail (MkLitErr l c _)) = LitFail (MkFC fname (l, c) (l, c + 1)) +fromParseError fname (LexFail (ComposeNotClosing begin end, _, _, _)) = LexFail (MkFC fname begin end) "Bracket is not properly closed." +fromParseError fname (LexFail (_, l, c, _)) = LexFail (MkFC fname (l, c) (l, c + 1)) "Can't recognoise token." +fromParseError fname (ParseFail msg (Just (l, c)) toks) = ParseFail (MkFC fname (l, c) (l, c + 1)) (msg +> '.') toks +fromParseError fname (ParseFail msg Nothing toks) = ParseFail replFC (msg +> '.') toks export readHeader : {auto c : Ref Ctxt Defs} -> @@ -199,17 +203,11 @@ readHeader : {auto c : Ref Ctxt Defs} -> readHeader path = do Right res <- coreLift (readFile path) | Left err => throw (FileErr path err) - case runParserTo (isLitFile path) isColon res (progHdr path) of - Left err => throw (ParseFail (getParseErrorLoc path err) err) + -- Stop at the first :, that's definitely not part of the header, to + -- save lexing the whole file unnecessarily + case runParserTo (isLitFile path) (is ':') res (progHdr path) of + Left err => throw (fromParseError path err) Right mod => pure mod - where - -- Stop at the first :, that's definitely not part of the header, to - -- save lexing the whole file unnecessarily - isColon : WithBounds Token -> Bool - isColon t - = case t.val of - Symbol ":" => True - _ => False %foreign "scheme:collect" prim__gc : Int -> PrimIO () @@ -271,7 +269,7 @@ processMod srcf ttcf msg sourcecode do iputStrLn msg Right mod <- logTime ("++ Parsing " ++ srcf) $ pure (runParser (isLitFile srcf) sourcecode (do p <- prog srcf; eoi; pure p)) - | Left err => pure (Just [ParseFail (getParseErrorLoc srcf err) err]) + | Left err => pure (Just [fromParseError srcf err]) initHash traverse addPublicHash (sort hs) resetNextVar diff --git a/src/Libraries/Text/Lexer.idr b/src/Libraries/Text/Lexer.idr index 6e6ce1dfb..a5bf031f5 100644 --- a/src/Libraries/Text/Lexer.idr +++ b/src/Libraries/Text/Lexer.idr @@ -126,6 +126,13 @@ mutual many : Lexer -> Recognise False many l = opt (some l) +||| Repeat the sub-lexer `l` one or more times until the lexer +||| `stopBefore` is encountered. `stopBefore` will not be consumed. +||| /((?!`stopBefore`)`l`)\+/ +export +someUntil : (stopBefore : Recognise c) -> (l : Lexer) -> Lexer +someUntil stopBefore l = some (reject stopBefore <+> l) + ||| Repeat the sub-lexer `l` zero or more times until the lexer ||| `stopBefore` is encountered. `stopBefore` will not be consumed. ||| Not guaranteed to consume input. @@ -313,18 +320,19 @@ export quote : (q : Lexer) -> (l : Lexer) -> Lexer quote q l = surround q q l -||| Recognise an escape character (often '\\') followed by a sub-lexer +||| Recognise an escape sub-lexer (often '\\') followed by +||| another sub-lexer ||| /[`esc`]`l`/ export -escape : (esc : Char) -> Lexer -> Lexer -escape esc l = is esc <+> l +escape : (esc : Lexer) -> Lexer -> Lexer +escape esc l = esc <+> l ||| Recognise a string literal, including escaped characters. ||| (Note: doesn't yet handle escape sequences such as \123) ||| /"(\\\\.|.)\*?"/ export stringLit : Lexer -stringLit = quote (is '"') (escape '\\' any <|> any) +stringLit = quote (is '"') (escape (is '\\') any <|> any) ||| Recognise a character literal, including escaped characters. ||| (Note: doesn't yet handle escape sequences such as \123) @@ -332,7 +340,7 @@ stringLit = quote (is '"') (escape '\\' any <|> any) export charLit : Lexer charLit = let q = '\'' in - is q <+> (escape '\\' (control <|> any) <|> isNot q) <+> is q + is q <+> (escape (is '\\') (control <|> any) <|> isNot q) <+> is q where lexStr : List String -> Lexer lexStr [] = fail diff --git a/src/Libraries/Text/Lexer/Core.idr b/src/Libraries/Text/Lexer/Core.idr index b1188a466..d78f7a0ad 100644 --- a/src/Libraries/Text/Lexer/Core.idr +++ b/src/Libraries/Text/Lexer/Core.idr @@ -97,6 +97,7 @@ strTail start (MkStrLen str len) -- If the string is recognised, returns the index at which the token -- ends +export scan : Recognise c -> List Char -> List Char -> Maybe (List Char, List Char) scan Empty tok str = pure (tok, str) scan Fail tok str = Nothing diff --git a/src/Libraries/Text/Lexer/Tokenizer.idr b/src/Libraries/Text/Lexer/Tokenizer.idr new file mode 100644 index 000000000..792fb1137 --- /dev/null +++ b/src/Libraries/Text/Lexer/Tokenizer.idr @@ -0,0 +1,152 @@ +module Libraries.Text.Lexer.Tokenizer + +import public Libraries.Control.Delayed +import public Libraries.Text.Bounded +import Libraries.Data.Bool.Extra +import Libraries.Data.String.Extra +import Libraries.Text.Lexer.Core +import Libraries.Text.Lexer +import Libraries.Text.PrettyPrint.Prettyprinter +import Libraries.Text.PrettyPrint.Prettyprinter.Util +import Data.List +import Data.Either +import Data.Nat +import Data.Strings + +%default total + +||| Description of a language's tokenization rule. +export +data Tokenizer : (tokenType : Type) -> Type where + Match : Lexer -> (String -> tokenType) -> Tokenizer tokenType + Compose : (begin : Lexer) -> + (mapBegin : String -> tokenType) -> + (tagger : String -> tag) -> + (middle : Inf (tag -> Tokenizer tokenType)) -> + (end : tag -> Lexer) -> + (mapEnd : String -> tokenType) -> + Tokenizer tokenType + Alt : Tokenizer tokenType -> Lazy (Tokenizer tokenType) -> Tokenizer tokenType + +||| Alternative tokenizer rules. +export +(<|>) : Tokenizer t -> Lazy (Tokenizer t) -> Tokenizer t +(<|>) = Alt + +||| Match on a recogniser and cast the string to a token. +export +match : Lexer -> (String -> a) -> Tokenizer a +match = Match + +||| Compose other tokenizer. Language composition should be quoted between +||| a begin lexer and a end lexer. The begin token can be used to generate +||| the composition tokenizer and the end lexer. +export +compose : (begin : Lexer) -> + (mapBegin : String -> a) -> + (tagger : String -> tag) -> + (middle : Inf (tag -> Tokenizer a)) -> + (end : tag -> Lexer) -> + (mapEnd : String -> a) -> + Tokenizer a +compose = Compose + +||| Stop reason why tokenizer can't make more progress. +||| @ ComposeNotClosing carries the span of composition begin token in the +||| form of `(startLine, startCol), (endLine, endCol)`. +public export +data StopReason = EndInput | NoRuleApply | ComposeNotClosing (Int, Int) (Int, Int) + +export +Show StopReason where + show EndInput = "EndInput" + show NoRuleApply = "NoRuleApply" + show (ComposeNotClosing start end) = "ComposeNotClosing " ++ show start ++ " " ++ show end + +export +Pretty StopReason where + pretty EndInput = pretty "EndInput" + pretty NoRuleApply = pretty "NoRuleApply" + pretty (ComposeNotClosing start end) = "ComposeNotClosing" <++> pretty start <++> pretty end + +tokenise : Lexer -> + Tokenizer a -> + (line, col : Int) -> List (WithBounds a) -> + List Char -> + (List (WithBounds a), (StopReason, Int, Int, List Char)) +tokenise reject tokenizer line col acc [] = (reverse acc, EndInput, (line, col, [])) +tokenise reject tokenizer line col acc str + = case scan reject [] str of + Just _ => (reverse acc, (EndInput, line, col, str)) + Nothing => case getFirstMatch tokenizer str of + Right (toks, line', col', rest) => + -- assert total because getFirstMatch must consume something + assert_total (tokenise reject tokenizer line' col' (toks ++ acc) rest) + Left reason => (reverse acc, reason, (line, col, str)) + where + countNLs : List Char -> Nat + countNLs str = List.length (filter (== '\n') str) + + getCols : List Char -> Int -> Int + getCols x c + = case span (/= '\n') x of + (incol, []) => c + cast (length incol) + (incol, _) => cast (length incol) + + -- get the next lexeme using the `Lexer` in argument, its position and the input + -- Returns the new position, the lexeme parsed and the rest of the input + -- If parsing the lexer fails, this returns `Nothing` + getNext : (lexer : Lexer) -> (line, col : Int) -> (input : List Char) -> Maybe (String, Int, Int, List Char) + getNext lexer line col str = + let Just (token, rest) = scan lexer [] str + | _ => Nothing + line' = line + cast (countNLs token) + col' = getCols token col + tokenStr = fastPack $ reverse token + in pure (tokenStr, line', col', rest) + + getFirstMatch : Tokenizer a -> List Char -> + Either StopReason (List (WithBounds a), Int, Int, List Char) + getFirstMatch (Match lex fn) str + = let Just (tok, line', col', rest) = getNext lex line col str + | _ => Left NoRuleApply + tok' = MkBounded (fn tok) False line col line' col' + in Right ([tok'], line', col', rest) + getFirstMatch (Compose begin mapBegin tagger middleFn endFn mapEnd) str + = let Just (beginTok', line', col' , rest) = getNext begin line col str + | Nothing => Left NoRuleApply + tag = tagger beginTok' + middle = middleFn tag + end = endFn tag + beginTok'' = MkBounded (mapBegin beginTok') False line col line' col' + (midToks, (reason, line'', col'', rest'')) = + tokenise end middle line' col' [] rest + in case reason of + (ComposeNotClosing _ _) => Left reason + _ => let Just (endTok', lineEnd, colEnd, restEnd) = + getNext end line'' col'' rest'' + | _ => Left $ ComposeNotClosing (line, col) (line', col') + endTok'' = MkBounded (mapEnd endTok') False line'' col'' lineEnd colEnd + in Right ([endTok''] ++ reverse midToks ++ [beginTok''], lineEnd, colEnd, restEnd) + getFirstMatch (Alt t1 t2) str + = case getFirstMatch t1 str of + Right result => Right result + Left reason@(ComposeNotClosing _ _) => Left reason + Left _ => getFirstMatch t2 str + +export +lexTo : Lexer -> + Tokenizer a -> + String -> + (List (WithBounds a), (StopReason, Int, Int, String)) +lexTo reject tokenizer str + = let (ts, reason, (l, c, str')) = + tokenise reject tokenizer 0 0 [] (fastUnpack str) in + (ts, reason, (l, c, fastPack str')) + +||| Given a tokenizer and an input string, return a list of recognised tokens, +||| and the line, column, and remainder of the input at the first point in the string +||| where there are no recognised tokens. +export +lex : Tokenizer a -> String -> (List (WithBounds a), (StopReason, Int, Int, String)) +lex tokenizer str = lexTo (pred $ const False) tokenizer str diff --git a/src/Parser/Lexer/Source.idr b/src/Parser/Lexer/Source.idr index 1bf101ffb..6110310b9 100644 --- a/src/Parser/Lexer/Source.idr +++ b/src/Parser/Lexer/Source.idr @@ -8,6 +8,7 @@ import Data.Maybe import Data.Strings import Libraries.Data.String.Extra import public Libraries.Text.Bounded +import Libraries.Text.Lexer.Tokenizer import Libraries.Text.PrettyPrint.Prettyprinter import Libraries.Text.PrettyPrint.Prettyprinter.Util @@ -33,7 +34,7 @@ data Token | DotIdent String -- .ident | Symbol String -- Comments - | Comment String + | Comment | DocComment String -- Special | CGDirective String @@ -56,7 +57,7 @@ Show Token where show (DotIdent x) = "dot+identifier " ++ x show (Symbol x) = "symbol " ++ x -- Comments - show (Comment _) = "comment" + show Comment = "comment" show (DocComment c) = "doc comment: \"" ++ c ++ "\"" -- Special show (CGDirective x) = "CGDirective " ++ x @@ -79,7 +80,7 @@ Pretty Token where pretty (DotIdent x) = pretty "dot+identifier" <++> pretty x pretty (Symbol x) = pretty "symbol" <++> pretty x -- Comments - pretty (Comment _) = pretty "comment" + pretty Comment = pretty "comment" pretty (DocComment c) = reflow "doc comment:" <++> dquotes (pretty c) -- Special pretty (CGDirective x) = pretty "CGDirective" <++> pretty x @@ -190,16 +191,27 @@ keywords = ["data", "module", "where", "let", "in", "do", "record", special : List String special = ["%lam", "%pi", "%imppi", "%let"] --- Special symbols - things which can't be a prefix of another symbol, and --- don't match 'validSymbol' export symbols : List String -symbols - = [".(", -- for things such as Foo.Bar.(+) - "@{", - "[|", "|]", - "(", ")", "{", "}", "[", "]", ",", ";", "_", - "`(", "`{{", "`[", "`"] +symbols = [",", ";", "_", "`"] + +export +groupSymbols : List String +groupSymbols = [".(", -- for things such as Foo.Bar.(+) + "@{", "[|", "(", "{", "[", "`(", "`{{", "`["] + +export +groupClose : String -> String +groupClose ".(" = ")" +groupClose "@{" = "}" +groupClose "[|" = "|]" +groupClose "(" = ")" +groupClose "[" = "]" +groupClose "{" = "}" +groupClose "`(" = ")" +groupClose "`{{" = "}}" +groupClose "`[" = "]" +groupClose _ = "" export isOpChar : Char -> Bool @@ -212,7 +224,7 @@ validSymbol = some (pred isOpChar) export reservedSymbols : List String reservedSymbols - = symbols ++ + = symbols ++ groupSymbols ++ (groupClose <$> groupSymbols) ++ ["%", "\\", ":", "=", ":=", "|", "|||", "<-", "->", "=>", "?", "!", "&", "**", "..", "~"] @@ -246,31 +258,32 @@ fromOctLit str fromMaybe 0 (fromOct (reverse num)) -- ^-- can't happen if the literal lexed correctly -rawTokens : TokenMap Token +rawTokens : Tokenizer Token rawTokens = - [(comment, Comment), - (blockComment, Comment), - (docComment, DocComment . drop 3), - (cgDirective, mkDirective), - (holeIdent, \x => HoleIdent (assert_total (strTail x)))] ++ - map (\x => (exact x, Symbol)) symbols ++ - [(doubleLit, \x => DoubleLit (cast x)), - (binLit, \x => IntegerLit (fromBinLit x)), - (hexLit, \x => IntegerLit (fromHexLit x)), - (octLit, \x => IntegerLit (fromOctLit x)), - (digits, \x => IntegerLit (cast x)), - (stringLit, \x => StringLit 0 (stripQuotes x)), - (stringLit1, \x => StringLit 1 (stripSurrounds 2 2 x)), - (stringLit2, \x => StringLit 2 (stripSurrounds 3 3 x)), - (stringLit3, \x => StringLit 3 (stripSurrounds 4 4 x)), - (charLit, \x => CharLit (stripQuotes x)), - (dotIdent, \x => DotIdent (assert_total $ strTail x)), - (namespacedIdent, parseNamespace), - (identNormal, parseIdent), - (pragma, \x => Pragma (assert_total $ strTail x)), - (space, Comment), - (validSymbol, Symbol), - (symbol, Unrecognised)] + match comment (const Comment) + <|> match blockComment (const Comment) + <|> match docComment (DocComment . drop 3) + <|> match cgDirective mkDirective + <|> match holeIdent (\x => HoleIdent (assert_total (strTail x))) + <|> compose (choice $ exact <$> groupSymbols) Symbol id (\_ => rawTokens) (exact . groupClose) Symbol + <|> match (choice $ exact <$> symbols) Symbol + <|> match doubleLit (\x => DoubleLit (cast x)) + <|> match binLit (\x => IntegerLit (fromBinLit x)) + <|> match hexLit (\x => IntegerLit (fromHexLit x)) + <|> match octLit (\x => IntegerLit (fromOctLit x)) + <|> match digits (\x => IntegerLit (cast x)) + <|> match stringLit (\x => StringLit 0 (stripQuotes x)) + <|> match stringLit1 (\x => StringLit 1 (stripSurrounds 2 2 x)) + <|> match stringLit2 (\x => StringLit 2 (stripSurrounds 3 3 x)) + <|> match stringLit3 (\x => StringLit 3 (stripSurrounds 4 4 x)) + <|> match charLit (\x => CharLit (stripQuotes x)) + <|> match dotIdent (\x => DotIdent (assert_total $ strTail x)) + <|> match namespacedIdent parseNamespace + <|> match identNormal parseIdent + <|> match pragma (\x => Pragma (assert_total $ strTail x)) + <|> match space (const Comment) + <|> match validSymbol Symbol + <|> match symbol Unrecognised where parseIdent : String -> Token parseIdent x = if x `elem` keywords then Keyword x @@ -281,21 +294,21 @@ rawTokens = (Just ns, n) => DotSepIdent ns n export -lexTo : (WithBounds Token -> Bool) -> - String -> Either (Int, Int, String) (List (WithBounds Token)) -lexTo pred str - = case lexTo pred rawTokens str of +lexTo : Lexer -> + String -> Either (StopReason, Int, Int, String) (List (WithBounds Token)) +lexTo reject str + = case lexTo reject rawTokens str of -- Add the EndInput token so that we'll have a line and column -- number to read when storing spans in the file - (tok, (l, c, "")) => Right (filter notComment tok ++ + (tok, (EndInput, l, c, _)) => Right (filter notComment tok ++ [MkBounded EndInput False l c l c]) (_, fail) => Left fail where notComment : WithBounds Token -> Bool notComment t = case t.val of - Comment _ => False + Comment => False _ => True export -lex : String -> Either (Int, Int, String) (List (WithBounds Token)) -lex = lexTo (const False) +lex : String -> Either (StopReason, Int, Int, String) (List (WithBounds Token)) +lex = lexTo (pred $ const False) diff --git a/src/Parser/Package.idr b/src/Parser/Package.idr index 7d345ccc1..e1915ef10 100644 --- a/src/Parser/Package.idr +++ b/src/Parser/Package.idr @@ -14,7 +14,7 @@ import Libraries.Utils.Either export runParser : String -> Rule ty -> Either (ParseError Token) ty runParser str p - = do toks <- mapError LexFail $ lex str + = do toks <- mapError (\err => LexFail (NoRuleApply, err)) $ lex str parsed <- mapError toGenericParsingError $ parse p toks Right (fst parsed) diff --git a/src/Parser/Source.idr b/src/Parser/Source.idr index 3a4d338fd..762b44b17 100644 --- a/src/Parser/Source.idr +++ b/src/Parser/Source.idr @@ -11,18 +11,18 @@ import Libraries.Utils.Either export runParserTo : {e : _} -> - Maybe LiterateStyle -> (WithBounds Token -> Bool) -> + Maybe LiterateStyle -> Lexer -> String -> Grammar Token e ty -> Either (ParseError Token) ty -runParserTo lit pred str p +runParserTo lit reject str p = do str <- mapError LitFail $ unlit lit str - toks <- mapError LexFail $ lexTo pred str + toks <- mapError LexFail $ lexTo reject str parsed <- mapError toGenericParsingError $ parse p toks Right (fst parsed) export runParser : {e : _} -> Maybe LiterateStyle -> String -> Grammar Token e ty -> Either (ParseError Token) ty -runParser lit = runParserTo lit (const False) +runParser lit = runParserTo lit (pred $ const False) export covering parseFile : (fn : String) -> Rule ty -> IO (Either (ParseError Token) ty) diff --git a/src/Parser/Support.idr b/src/Parser/Support.idr index 482e8eee5..2e8e49d8d 100644 --- a/src/Parser/Support.idr +++ b/src/Parser/Support.idr @@ -1,5 +1,6 @@ module Parser.Support +import public Libraries.Text.Lexer.Tokenizer import public Libraries.Text.Lexer import public Libraries.Text.Parser import Libraries.Text.PrettyPrint.Prettyprinter @@ -17,7 +18,7 @@ import System.File public export data ParseError tok = ParseFail String (Maybe (Int, Int)) (List tok) - | LexFail (Int, Int, String) + | LexFail (StopReason, Int, Int, String) | FileFail FileError | LitFail LiterateError @@ -26,8 +27,8 @@ Show tok => Show (ParseError tok) where show (ParseFail err loc toks) = "Parse error: " ++ err ++ " (next tokens: " ++ show (take 10 toks) ++ ")" - show (LexFail (c, l, str)) - = "Lex error at " ++ show (c, l) ++ " input: " ++ str + show (LexFail (reason, c, l, str)) + = "Lex error " ++ show reason ++ " at " ++ show (c, l) ++ " input: " ++ str show (FileFail err) = "File error: " ++ show err show (LitFail (MkLitErr l c str)) @@ -42,8 +43,8 @@ Pretty tok => Pretty (ParseError tok) where prettyLine : Maybe (Int, Int) -> Doc ann prettyLine Nothing = emptyDoc prettyLine (Just (r, c)) = space <+> "at" <++> "line" <++> pretty (r + 1) <+> ":" <+> pretty (c + 1) - pretty (LexFail (c, l, str)) - = reflow "Lex error at" <++> pretty (c, l) <++> pretty "input:" <++> pretty str + pretty (LexFail (reason, c, l, str)) + = reflow "Lex error" <++> pretty reason <++> "at" <++> pretty (c, l) <++> pretty "input:" <++> pretty str pretty (FileFail err) = reflow "File error:" <++> pretty (show err) pretty (LitFail (MkLitErr l c str)) diff --git a/tests/idris2/perf005/expected b/tests/idris2/perf005/expected index cc8d65ceb..bc396d653 100644 --- a/tests/idris2/perf005/expected +++ b/tests/idris2/perf005/expected @@ -12,11 +12,30 @@ Lambda.idr:62:3--88:9 67 | (Func 1/1: Building Bad1 (Bad1.idr) -Error: Parse error at line 3:1: -Couldn't parse declaration (next tokens: [data, identifier Bad, symbol =, identifier BadDCon, symbol (, identifier x, symbol :, identifier Nat, symbol ), end of input]) +Error: Couldn't parse declaration. + +Bad1.idr:3:1--3:2 + 1 | module Bad1 + 2 | + 3 | data Bad = BadDCon (x : Nat) + ^ + 1/1: Building Bad2 (Bad2.idr) -Error: Parse error at line 3:13: -Expected 'case', 'if', 'do', application or operator expression (next tokens: [symbol (, identifier whatever, symbol :, identifier Int, symbol ), end of input]) +Error: Expected 'case', 'if', 'do', application or operator expression. + +Bad2.idr:3:13--3:14 + 1 | module Bad2 + 2 | + 3 | badReturn : (whatever : Int) + ^ + 1/1: Building Bad3 (Bad3.idr) -Error: Parse error at line 4:1: -Couldn't parse declaration (next tokens: [identifier badExpr, symbol (, identifier whatever, symbol :, symbol (, symbol ), symbol ), end of input]) +Error: Couldn't parse declaration. + +Bad3.idr:4:1--4:2 + 1 | module Bad3 + 2 | + 3 | badExpr : () + 4 | badExpr (whatever : ()) + ^ + diff --git a/tests/idris2/perror001/expected b/tests/idris2/perror001/expected index fc256dd8e..ea25d1cef 100644 --- a/tests/idris2/perror001/expected +++ b/tests/idris2/perror001/expected @@ -1,3 +1,10 @@ 1/1: Building PError (PError.idr) -Error: Parse error at line 5:1: -Expected ')' (next tokens: [identifier bar, identifier x, symbol =, let, identifier y, symbol =, literal 42, in, identifier y, symbol +]) +Error: Bracket is not properly closed. + +PError.idr:4:7--4:8 + 1 | foo : Int -> Int + 2 | foo x = x + 3 | + 4 | bar : (Int -> Int + ^ + diff --git a/tests/idris2/perror002/expected b/tests/idris2/perror002/expected index f01dadcc6..2fdf0fcbf 100644 --- a/tests/idris2/perror002/expected +++ b/tests/idris2/perror002/expected @@ -1,3 +1,11 @@ 1/1: Building PError (PError.idr) -Error: Parse error at line 7:1: -Expected ')' (next tokens: [identifier baz, symbol :, identifier Int, symbol ->, identifier Int, identifier baz, identifier x, symbol =, identifier x, end of input]) +Error: Bracket is not properly closed. + +PError.idr:5:23--5:24 + 1 | foo : Int -> Int + 2 | foo x = x + 3 | + 4 | bar : Int -> Int + 5 | bar x = let y = 42 in (y + x + ^ + diff --git a/tests/idris2/perror003/expected b/tests/idris2/perror003/expected index df87875cf..15f3bae24 100644 --- a/tests/idris2/perror003/expected +++ b/tests/idris2/perror003/expected @@ -1,6 +1,22 @@ 1/1: Building PError (PError.idr) -Error: Parse error at line 5:9: -Expected 'case', 'if', 'do', application or operator expression (next tokens: [let, identifier y, symbol =, symbol (, literal 42, in, identifier y, symbol +, identifier x, identifier baz]) +Error: Bracket is not properly closed. + +PError.idr:5:17--5:18 + 1 | foo : Int -> Int + 2 | foo x = x + 3 | + 4 | bar : Int -> Int + 5 | bar x = let y = (42 in y + x + ^ + 1/1: Building PError2 (PError2.idr) -Error: Parse error at line 5:9: -Expected 'case', 'if', 'do', application or operator expression (next tokens: [let, identifier y, symbol :=, symbol (, literal 42, in, identifier y, symbol +, identifier x, identifier baz]) +Error: Bracket is not properly closed. + +PError2.idr:5:18--5:19 + 1 | foo : Int -> Int + 2 | foo x = x + 3 | + 4 | bar : Int -> Int + 5 | bar x = let y := (42 in y + x + ^ + diff --git a/tests/idris2/perror004/expected b/tests/idris2/perror004/expected index 26b909a21..278c7eb34 100644 --- a/tests/idris2/perror004/expected +++ b/tests/idris2/perror004/expected @@ -1,3 +1,10 @@ 1/1: Building PError (PError.idr) -Error: Parse error at line 4:33: -Wrong number of 'with' arguments (next tokens: [symbol =, identifier x, identifier foo, identifier x, identifier y, symbol |, identifier True, symbol |, identifier True, symbol =]) +Error: Wrong number of 'with' arguments. + +PError.idr:4:33--4:34 + 1 | foo : Nat -> Nat -> Bool + 2 | foo x y with (x == y) + 3 | foo x y | True with (y == x) + 4 | foo x y | True | False | 10 = x + ^ + diff --git a/tests/idris2/perror005/expected b/tests/idris2/perror005/expected index 68f034a5e..a5225854e 100644 --- a/tests/idris2/perror005/expected +++ b/tests/idris2/perror005/expected @@ -1,3 +1,11 @@ 1/1: Building PError (PError.idr) -Error: Parse error at line 7:1: -Expected 'in' (next tokens: [identifier baz, symbol :, identifier Int, symbol ->, identifier Int, identifier baz, identifier x, symbol =, identifier x, end of input]) +Error: Expected 'in'. + +PError.idr:7:1--7:2 + 3 | + 4 | bar : Int -> Int + 5 | bar x = let y = 42 + 6 | + 7 | baz : Int -> Int + ^ + diff --git a/tests/idris2/perror006/expected b/tests/idris2/perror006/expected index 5ecee7ed3..6dcdfb6ee 100644 --- a/tests/idris2/perror006/expected +++ b/tests/idris2/perror006/expected @@ -1,3 +1,11 @@ 1/1: Building PError (PError.idr) -Error: Parse error at line 7:1: -Expected 'else' (next tokens: [identifier baz, symbol :, identifier Int, symbol ->, identifier Int, identifier baz, identifier x, symbol =, identifier x, end of input]) +Error: Expected 'else'. + +PError.idr:7:1--7:2 + 3 | + 4 | bar : Int -> Int + 5 | bar x = if x == 95 then 0 + 6 | + 7 | baz : Int -> Int + ^ +