Merge pull request #1034 from andylokandy/lexer

lexer: add a language-composable tokenizer
This commit is contained in:
André Videla 2021-02-15 11:45:37 +00:00 committed by GitHub
commit 8c7a08f282
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 364 additions and 116 deletions

View File

@ -118,6 +118,7 @@ modules =
Libraries.Text.Bounded,
Libraries.Text.Lexer,
Libraries.Text.Lexer.Core,
Libraries.Text.Lexer.Tokenizer,
Libraries.Text.Literate,
Libraries.Text.Parser,
Libraries.Text.Parser.Core,

View File

@ -133,8 +133,10 @@ data Error : Type where
GenericMsg : FC -> String -> Error
TTCError : TTCErrorMsg -> Error
FileErr : String -> FileError -> Error
LitFail : FC -> Error
LexFail : FC -> String -> Error
ParseFail : (Show token, Pretty token) =>
FC -> ParseError token -> Error
FC -> String -> List token -> Error
ModuleNotFound : FC -> ModuleIdent -> Error
CyclicImports : List ModuleIdent -> Error
ForceNeeded : Error
@ -295,7 +297,9 @@ Show Error where
show (GenericMsg fc str) = show fc ++ ":" ++ str
show (TTCError msg) = "Error in TTC file: " ++ show msg
show (FileErr fname err) = "File error (" ++ fname ++ "): " ++ show err
show (ParseFail fc err) = "Parse error (" ++ show err ++ ")"
show (LitFail fc) = show fc ++ ":Can't parse literate"
show (LexFail fc err) = show fc ++ ":Lexer error (" ++ show err ++ ")"
show (ParseFail fc err toks) = "Parse error (" ++ show err ++ "): " ++ show toks
show (ModuleNotFound fc ns)
= show fc ++ ":" ++ show ns ++ " not found"
show (CyclicImports ns)
@ -373,7 +377,9 @@ getErrorLoc (BadRunElab loc _ _) = Just loc
getErrorLoc (GenericMsg loc _) = Just loc
getErrorLoc (TTCError _) = Nothing
getErrorLoc (FileErr _ _) = Nothing
getErrorLoc (ParseFail loc _) = Just loc
getErrorLoc (LitFail loc) = Just loc
getErrorLoc (LexFail loc _) = Just loc
getErrorLoc (ParseFail loc _ _) = Just loc
getErrorLoc (ModuleNotFound loc _) = Just loc
getErrorLoc (CyclicImports _) = Nothing
getErrorLoc ForceNeeded = Nothing

View File

@ -406,8 +406,12 @@ perror (TTCError msg)
<++> parens (pretty "the most likely case is that the ./build directory in your current project contains files from a previous build of idris2 or the idris2 executable is from a different build than the installed .ttc files")
perror (FileErr fname err)
= pure $ errorDesc (reflow "File error in" <++> pretty fname <++> colon) <++> pretty (show err)
perror (ParseFail _ err)
= pure $ pretty err
perror (LitFail fc)
= pure $ errorDesc (reflow "Can't parse literate.") <+> line <+> !(ploc fc)
perror (LexFail fc msg)
= pure $ errorDesc (pretty msg) <+> line <+> !(ploc fc)
perror (ParseFail fc msg toks)
= pure $ errorDesc (pretty msg) <+> line <+> !(ploc fc)
perror (ModuleNotFound fc ns)
= pure $ errorDesc ("Module" <++> annotate FileCtxt (pretty ns) <++> reflow "not found") <+> line <+> !(ploc fc)
perror (CyclicImports ns)

View File

@ -30,7 +30,7 @@ ideTokens =
[(digits, \x => IntegerLit (cast x)),
(stringLit, \x => StringLit 0 (fromMaybe "" (escape 0 (stripQuotes x)))),
(identAllowDashes, \x => Ident x),
(space, Comment)]
(space, (const Comment))]
idelex : String -> Either (Int, Int, String) (List (WithBounds Token))
idelex str
@ -43,7 +43,7 @@ idelex str
where
notComment : WithBounds Token -> Bool
notComment t = case t.val of
Comment _ => False
Comment => False
_ => True
covering
@ -66,7 +66,7 @@ sexp
ideParser : {e : _} -> String -> Grammar Token e ty -> Either (ParseError Token) ty
ideParser str p
= do toks <- mapError LexFail $ idelex str
= do toks <- mapError (\err => LexFail (NoRuleApply, err)) $ idelex str
parsed <- mapError toGenericParsingError $ parse p toks
Right (fst parsed)

View File

@ -474,12 +474,6 @@ clean pkg opts -- `opts` is not used but might be in the future
delete $ ttFile <.> "ttc"
delete $ ttFile <.> "ttm"
getParseErrorLoc : String -> ParseError Token -> FC
getParseErrorLoc fname (ParseFail _ (Just pos) _) = MkFC fname pos pos
getParseErrorLoc fname (LexFail (l, c, _)) = MkFC fname (l, c) (l, c)
getParseErrorLoc fname (LitFail _) = MkFC fname (0, 0) (0, 0) -- TODO: Remove this unused case
getParseErrorLoc fname _ = replFC
-- Just load the given module, if it exists, which will involve building
-- it if necessary
runRepl : {auto c : Ref Ctxt Defs} ->
@ -506,8 +500,7 @@ parsePkgFile file = do
(do desc <- parsePkgDesc file
eoi
pure desc)
| Left (FileFail err) => throw (FileErr file err)
| Left err => throw (ParseFail (getParseErrorLoc file err) err)
| Left err => throw (fromParseError file err)
addFields fs (initPkgDesc pname)
processPackage : {auto c : Ref Ctxt Defs} ->
@ -525,8 +518,7 @@ processPackage cmd file opts
(do desc <- parsePkgDesc file
eoi
pure desc)
| Left (FileFail err) => throw (FileErr file err)
| Left err => throw (ParseFail (getParseErrorLoc file err) err)
| Left err => throw (fromParseError file err)
pkg <- addFields fs (initPkgDesc pname)
maybe (pure ()) setBuildDir (builddir pkg)
setOutputDir (outputdir pkg)
@ -628,8 +620,7 @@ findIpkg fname
(do desc <- parsePkgDesc ipkgn
eoi
pure desc)
| Left (FileFail err) => throw (FileErr ipkgn err)
| Left err => throw (ParseFail (getParseErrorLoc ipkgn err) err)
| Left err => throw (fromParseError ipkgn err)
pkg <- addFields fs (initPkgDesc pname)
maybe (pure ()) setBuildDir (builddir pkg)
setOutputDir (outputdir pkg)

View File

@ -390,7 +390,7 @@ mutual
pure (PDotted (boundToFC fname b) b.val)
<|> do b <- bounds (symbol "`(" *> expr pdef fname indents <* symbol ")")
pure (PQuote (boundToFC fname b) b.val)
<|> do b <- bounds (symbol "`{{" *> name <* symbol "}" <* symbol "}")
<|> do b <- bounds (symbol "`{{" *> name <* symbol "}}")
pure (PQuoteName (boundToFC fname b) b.val)
<|> do b <- bounds (symbol "`[" *> nonEmptyBlock (topDecl fname) <* symbol "]")
pure (PQuoteDecl (boundToFC fname b) (collectDefs (concat b.val)))

View File

@ -4,6 +4,7 @@ import Compiler.Inline
import Core.Binary
import Core.Context
import Core.Core
import Core.Context.Log
import Core.Directory
import Core.Env
@ -26,6 +27,7 @@ import Idris.Syntax
import Idris.Pretty
import Data.List
import Libraries.Data.String.Extra
import Libraries.Data.NameMap
import System.File
@ -187,11 +189,13 @@ modTime fname
pure (cast t)
export
getParseErrorLoc : String -> ParseError Token -> FC
getParseErrorLoc fname (ParseFail _ (Just pos) _) = MkFC fname pos pos
getParseErrorLoc fname (LexFail (l, c, _)) = MkFC fname (l, c) (l, c)
getParseErrorLoc fname (LitFail (MkLitErr l c _)) = MkFC fname (l, 0) (l, 0)
getParseErrorLoc fname _ = replFC
fromParseError : (Show token, Pretty token) => String -> ParseError token -> Error
fromParseError fname (FileFail err) = FileErr fname err
fromParseError fname (LitFail (MkLitErr l c _)) = LitFail (MkFC fname (l, c) (l, c + 1))
fromParseError fname (LexFail (ComposeNotClosing begin end, _, _, _)) = LexFail (MkFC fname begin end) "Bracket is not properly closed."
fromParseError fname (LexFail (_, l, c, _)) = LexFail (MkFC fname (l, c) (l, c + 1)) "Can't recognoise token."
fromParseError fname (ParseFail msg (Just (l, c)) toks) = ParseFail (MkFC fname (l, c) (l, c + 1)) (msg +> '.') toks
fromParseError fname (ParseFail msg Nothing toks) = ParseFail replFC (msg +> '.') toks
export
readHeader : {auto c : Ref Ctxt Defs} ->
@ -199,17 +203,11 @@ readHeader : {auto c : Ref Ctxt Defs} ->
readHeader path
= do Right res <- coreLift (readFile path)
| Left err => throw (FileErr path err)
case runParserTo (isLitFile path) isColon res (progHdr path) of
Left err => throw (ParseFail (getParseErrorLoc path err) err)
-- Stop at the first :, that's definitely not part of the header, to
-- save lexing the whole file unnecessarily
case runParserTo (isLitFile path) (is ':') res (progHdr path) of
Left err => throw (fromParseError path err)
Right mod => pure mod
where
-- Stop at the first :, that's definitely not part of the header, to
-- save lexing the whole file unnecessarily
isColon : WithBounds Token -> Bool
isColon t
= case t.val of
Symbol ":" => True
_ => False
%foreign "scheme:collect"
prim__gc : Int -> PrimIO ()
@ -271,7 +269,7 @@ processMod srcf ttcf msg sourcecode
do iputStrLn msg
Right mod <- logTime ("++ Parsing " ++ srcf) $
pure (runParser (isLitFile srcf) sourcecode (do p <- prog srcf; eoi; pure p))
| Left err => pure (Just [ParseFail (getParseErrorLoc srcf err) err])
| Left err => pure (Just [fromParseError srcf err])
initHash
traverse addPublicHash (sort hs)
resetNextVar

View File

@ -126,6 +126,13 @@ mutual
many : Lexer -> Recognise False
many l = opt (some l)
||| Repeat the sub-lexer `l` one or more times until the lexer
||| `stopBefore` is encountered. `stopBefore` will not be consumed.
||| /((?!`stopBefore`)`l`)\+/
export
someUntil : (stopBefore : Recognise c) -> (l : Lexer) -> Lexer
someUntil stopBefore l = some (reject stopBefore <+> l)
||| Repeat the sub-lexer `l` zero or more times until the lexer
||| `stopBefore` is encountered. `stopBefore` will not be consumed.
||| Not guaranteed to consume input.
@ -313,18 +320,19 @@ export
quote : (q : Lexer) -> (l : Lexer) -> Lexer
quote q l = surround q q l
||| Recognise an escape character (often '\\') followed by a sub-lexer
||| Recognise an escape sub-lexer (often '\\') followed by
||| another sub-lexer
||| /[`esc`]`l`/
export
escape : (esc : Char) -> Lexer -> Lexer
escape esc l = is esc <+> l
escape : (esc : Lexer) -> Lexer -> Lexer
escape esc l = esc <+> l
||| Recognise a string literal, including escaped characters.
||| (Note: doesn't yet handle escape sequences such as \123)
||| /"(\\\\.|.)\*?"/
export
stringLit : Lexer
stringLit = quote (is '"') (escape '\\' any <|> any)
stringLit = quote (is '"') (escape (is '\\') any <|> any)
||| Recognise a character literal, including escaped characters.
||| (Note: doesn't yet handle escape sequences such as \123)
@ -332,7 +340,7 @@ stringLit = quote (is '"') (escape '\\' any <|> any)
export
charLit : Lexer
charLit = let q = '\'' in
is q <+> (escape '\\' (control <|> any) <|> isNot q) <+> is q
is q <+> (escape (is '\\') (control <|> any) <|> isNot q) <+> is q
where
lexStr : List String -> Lexer
lexStr [] = fail

View File

@ -97,6 +97,7 @@ strTail start (MkStrLen str len)
-- If the string is recognised, returns the index at which the token
-- ends
export
scan : Recognise c -> List Char -> List Char -> Maybe (List Char, List Char)
scan Empty tok str = pure (tok, str)
scan Fail tok str = Nothing

View File

@ -0,0 +1,152 @@
module Libraries.Text.Lexer.Tokenizer
import public Libraries.Control.Delayed
import public Libraries.Text.Bounded
import Libraries.Data.Bool.Extra
import Libraries.Data.String.Extra
import Libraries.Text.Lexer.Core
import Libraries.Text.Lexer
import Libraries.Text.PrettyPrint.Prettyprinter
import Libraries.Text.PrettyPrint.Prettyprinter.Util
import Data.List
import Data.Either
import Data.Nat
import Data.Strings
%default total
||| Description of a language's tokenization rule.
export
data Tokenizer : (tokenType : Type) -> Type where
Match : Lexer -> (String -> tokenType) -> Tokenizer tokenType
Compose : (begin : Lexer) ->
(mapBegin : String -> tokenType) ->
(tagger : String -> tag) ->
(middle : Inf (tag -> Tokenizer tokenType)) ->
(end : tag -> Lexer) ->
(mapEnd : String -> tokenType) ->
Tokenizer tokenType
Alt : Tokenizer tokenType -> Lazy (Tokenizer tokenType) -> Tokenizer tokenType
||| Alternative tokenizer rules.
export
(<|>) : Tokenizer t -> Lazy (Tokenizer t) -> Tokenizer t
(<|>) = Alt
||| Match on a recogniser and cast the string to a token.
export
match : Lexer -> (String -> a) -> Tokenizer a
match = Match
||| Compose other tokenizer. Language composition should be quoted between
||| a begin lexer and a end lexer. The begin token can be used to generate
||| the composition tokenizer and the end lexer.
export
compose : (begin : Lexer) ->
(mapBegin : String -> a) ->
(tagger : String -> tag) ->
(middle : Inf (tag -> Tokenizer a)) ->
(end : tag -> Lexer) ->
(mapEnd : String -> a) ->
Tokenizer a
compose = Compose
||| Stop reason why tokenizer can't make more progress.
||| @ ComposeNotClosing carries the span of composition begin token in the
||| form of `(startLine, startCol), (endLine, endCol)`.
public export
data StopReason = EndInput | NoRuleApply | ComposeNotClosing (Int, Int) (Int, Int)
export
Show StopReason where
show EndInput = "EndInput"
show NoRuleApply = "NoRuleApply"
show (ComposeNotClosing start end) = "ComposeNotClosing " ++ show start ++ " " ++ show end
export
Pretty StopReason where
pretty EndInput = pretty "EndInput"
pretty NoRuleApply = pretty "NoRuleApply"
pretty (ComposeNotClosing start end) = "ComposeNotClosing" <++> pretty start <++> pretty end
tokenise : Lexer ->
Tokenizer a ->
(line, col : Int) -> List (WithBounds a) ->
List Char ->
(List (WithBounds a), (StopReason, Int, Int, List Char))
tokenise reject tokenizer line col acc [] = (reverse acc, EndInput, (line, col, []))
tokenise reject tokenizer line col acc str
= case scan reject [] str of
Just _ => (reverse acc, (EndInput, line, col, str))
Nothing => case getFirstMatch tokenizer str of
Right (toks, line', col', rest) =>
-- assert total because getFirstMatch must consume something
assert_total (tokenise reject tokenizer line' col' (toks ++ acc) rest)
Left reason => (reverse acc, reason, (line, col, str))
where
countNLs : List Char -> Nat
countNLs str = List.length (filter (== '\n') str)
getCols : List Char -> Int -> Int
getCols x c
= case span (/= '\n') x of
(incol, []) => c + cast (length incol)
(incol, _) => cast (length incol)
-- get the next lexeme using the `Lexer` in argument, its position and the input
-- Returns the new position, the lexeme parsed and the rest of the input
-- If parsing the lexer fails, this returns `Nothing`
getNext : (lexer : Lexer) -> (line, col : Int) -> (input : List Char) -> Maybe (String, Int, Int, List Char)
getNext lexer line col str =
let Just (token, rest) = scan lexer [] str
| _ => Nothing
line' = line + cast (countNLs token)
col' = getCols token col
tokenStr = fastPack $ reverse token
in pure (tokenStr, line', col', rest)
getFirstMatch : Tokenizer a -> List Char ->
Either StopReason (List (WithBounds a), Int, Int, List Char)
getFirstMatch (Match lex fn) str
= let Just (tok, line', col', rest) = getNext lex line col str
| _ => Left NoRuleApply
tok' = MkBounded (fn tok) False line col line' col'
in Right ([tok'], line', col', rest)
getFirstMatch (Compose begin mapBegin tagger middleFn endFn mapEnd) str
= let Just (beginTok', line', col' , rest) = getNext begin line col str
| Nothing => Left NoRuleApply
tag = tagger beginTok'
middle = middleFn tag
end = endFn tag
beginTok'' = MkBounded (mapBegin beginTok') False line col line' col'
(midToks, (reason, line'', col'', rest'')) =
tokenise end middle line' col' [] rest
in case reason of
(ComposeNotClosing _ _) => Left reason
_ => let Just (endTok', lineEnd, colEnd, restEnd) =
getNext end line'' col'' rest''
| _ => Left $ ComposeNotClosing (line, col) (line', col')
endTok'' = MkBounded (mapEnd endTok') False line'' col'' lineEnd colEnd
in Right ([endTok''] ++ reverse midToks ++ [beginTok''], lineEnd, colEnd, restEnd)
getFirstMatch (Alt t1 t2) str
= case getFirstMatch t1 str of
Right result => Right result
Left reason@(ComposeNotClosing _ _) => Left reason
Left _ => getFirstMatch t2 str
export
lexTo : Lexer ->
Tokenizer a ->
String ->
(List (WithBounds a), (StopReason, Int, Int, String))
lexTo reject tokenizer str
= let (ts, reason, (l, c, str')) =
tokenise reject tokenizer 0 0 [] (fastUnpack str) in
(ts, reason, (l, c, fastPack str'))
||| Given a tokenizer and an input string, return a list of recognised tokens,
||| and the line, column, and remainder of the input at the first point in the string
||| where there are no recognised tokens.
export
lex : Tokenizer a -> String -> (List (WithBounds a), (StopReason, Int, Int, String))
lex tokenizer str = lexTo (pred $ const False) tokenizer str

View File

@ -8,6 +8,7 @@ import Data.Maybe
import Data.Strings
import Libraries.Data.String.Extra
import public Libraries.Text.Bounded
import Libraries.Text.Lexer.Tokenizer
import Libraries.Text.PrettyPrint.Prettyprinter
import Libraries.Text.PrettyPrint.Prettyprinter.Util
@ -33,7 +34,7 @@ data Token
| DotIdent String -- .ident
| Symbol String
-- Comments
| Comment String
| Comment
| DocComment String
-- Special
| CGDirective String
@ -56,7 +57,7 @@ Show Token where
show (DotIdent x) = "dot+identifier " ++ x
show (Symbol x) = "symbol " ++ x
-- Comments
show (Comment _) = "comment"
show Comment = "comment"
show (DocComment c) = "doc comment: \"" ++ c ++ "\""
-- Special
show (CGDirective x) = "CGDirective " ++ x
@ -79,7 +80,7 @@ Pretty Token where
pretty (DotIdent x) = pretty "dot+identifier" <++> pretty x
pretty (Symbol x) = pretty "symbol" <++> pretty x
-- Comments
pretty (Comment _) = pretty "comment"
pretty Comment = pretty "comment"
pretty (DocComment c) = reflow "doc comment:" <++> dquotes (pretty c)
-- Special
pretty (CGDirective x) = pretty "CGDirective" <++> pretty x
@ -190,16 +191,27 @@ keywords = ["data", "module", "where", "let", "in", "do", "record",
special : List String
special = ["%lam", "%pi", "%imppi", "%let"]
-- Special symbols - things which can't be a prefix of another symbol, and
-- don't match 'validSymbol'
export
symbols : List String
symbols
= [".(", -- for things such as Foo.Bar.(+)
"@{",
"[|", "|]",
"(", ")", "{", "}", "[", "]", ",", ";", "_",
"`(", "`{{", "`[", "`"]
symbols = [",", ";", "_", "`"]
export
groupSymbols : List String
groupSymbols = [".(", -- for things such as Foo.Bar.(+)
"@{", "[|", "(", "{", "[", "`(", "`{{", "`["]
export
groupClose : String -> String
groupClose ".(" = ")"
groupClose "@{" = "}"
groupClose "[|" = "|]"
groupClose "(" = ")"
groupClose "[" = "]"
groupClose "{" = "}"
groupClose "`(" = ")"
groupClose "`{{" = "}}"
groupClose "`[" = "]"
groupClose _ = ""
export
isOpChar : Char -> Bool
@ -212,7 +224,7 @@ validSymbol = some (pred isOpChar)
export
reservedSymbols : List String
reservedSymbols
= symbols ++
= symbols ++ groupSymbols ++ (groupClose <$> groupSymbols) ++
["%", "\\", ":", "=", ":=", "|", "|||", "<-", "->", "=>", "?", "!",
"&", "**", "..", "~"]
@ -246,31 +258,32 @@ fromOctLit str
fromMaybe 0 (fromOct (reverse num))
-- ^-- can't happen if the literal lexed correctly
rawTokens : TokenMap Token
rawTokens : Tokenizer Token
rawTokens =
[(comment, Comment),
(blockComment, Comment),
(docComment, DocComment . drop 3),
(cgDirective, mkDirective),
(holeIdent, \x => HoleIdent (assert_total (strTail x)))] ++
map (\x => (exact x, Symbol)) symbols ++
[(doubleLit, \x => DoubleLit (cast x)),
(binLit, \x => IntegerLit (fromBinLit x)),
(hexLit, \x => IntegerLit (fromHexLit x)),
(octLit, \x => IntegerLit (fromOctLit x)),
(digits, \x => IntegerLit (cast x)),
(stringLit, \x => StringLit 0 (stripQuotes x)),
(stringLit1, \x => StringLit 1 (stripSurrounds 2 2 x)),
(stringLit2, \x => StringLit 2 (stripSurrounds 3 3 x)),
(stringLit3, \x => StringLit 3 (stripSurrounds 4 4 x)),
(charLit, \x => CharLit (stripQuotes x)),
(dotIdent, \x => DotIdent (assert_total $ strTail x)),
(namespacedIdent, parseNamespace),
(identNormal, parseIdent),
(pragma, \x => Pragma (assert_total $ strTail x)),
(space, Comment),
(validSymbol, Symbol),
(symbol, Unrecognised)]
match comment (const Comment)
<|> match blockComment (const Comment)
<|> match docComment (DocComment . drop 3)
<|> match cgDirective mkDirective
<|> match holeIdent (\x => HoleIdent (assert_total (strTail x)))
<|> compose (choice $ exact <$> groupSymbols) Symbol id (\_ => rawTokens) (exact . groupClose) Symbol
<|> match (choice $ exact <$> symbols) Symbol
<|> match doubleLit (\x => DoubleLit (cast x))
<|> match binLit (\x => IntegerLit (fromBinLit x))
<|> match hexLit (\x => IntegerLit (fromHexLit x))
<|> match octLit (\x => IntegerLit (fromOctLit x))
<|> match digits (\x => IntegerLit (cast x))
<|> match stringLit (\x => StringLit 0 (stripQuotes x))
<|> match stringLit1 (\x => StringLit 1 (stripSurrounds 2 2 x))
<|> match stringLit2 (\x => StringLit 2 (stripSurrounds 3 3 x))
<|> match stringLit3 (\x => StringLit 3 (stripSurrounds 4 4 x))
<|> match charLit (\x => CharLit (stripQuotes x))
<|> match dotIdent (\x => DotIdent (assert_total $ strTail x))
<|> match namespacedIdent parseNamespace
<|> match identNormal parseIdent
<|> match pragma (\x => Pragma (assert_total $ strTail x))
<|> match space (const Comment)
<|> match validSymbol Symbol
<|> match symbol Unrecognised
where
parseIdent : String -> Token
parseIdent x = if x `elem` keywords then Keyword x
@ -281,21 +294,21 @@ rawTokens =
(Just ns, n) => DotSepIdent ns n
export
lexTo : (WithBounds Token -> Bool) ->
String -> Either (Int, Int, String) (List (WithBounds Token))
lexTo pred str
= case lexTo pred rawTokens str of
lexTo : Lexer ->
String -> Either (StopReason, Int, Int, String) (List (WithBounds Token))
lexTo reject str
= case lexTo reject rawTokens str of
-- Add the EndInput token so that we'll have a line and column
-- number to read when storing spans in the file
(tok, (l, c, "")) => Right (filter notComment tok ++
(tok, (EndInput, l, c, _)) => Right (filter notComment tok ++
[MkBounded EndInput False l c l c])
(_, fail) => Left fail
where
notComment : WithBounds Token -> Bool
notComment t = case t.val of
Comment _ => False
Comment => False
_ => True
export
lex : String -> Either (Int, Int, String) (List (WithBounds Token))
lex = lexTo (const False)
lex : String -> Either (StopReason, Int, Int, String) (List (WithBounds Token))
lex = lexTo (pred $ const False)

View File

@ -14,7 +14,7 @@ import Libraries.Utils.Either
export
runParser : String -> Rule ty -> Either (ParseError Token) ty
runParser str p
= do toks <- mapError LexFail $ lex str
= do toks <- mapError (\err => LexFail (NoRuleApply, err)) $ lex str
parsed <- mapError toGenericParsingError $ parse p toks
Right (fst parsed)

View File

@ -11,18 +11,18 @@ import Libraries.Utils.Either
export
runParserTo : {e : _} ->
Maybe LiterateStyle -> (WithBounds Token -> Bool) ->
Maybe LiterateStyle -> Lexer ->
String -> Grammar Token e ty -> Either (ParseError Token) ty
runParserTo lit pred str p
runParserTo lit reject str p
= do str <- mapError LitFail $ unlit lit str
toks <- mapError LexFail $ lexTo pred str
toks <- mapError LexFail $ lexTo reject str
parsed <- mapError toGenericParsingError $ parse p toks
Right (fst parsed)
export
runParser : {e : _} ->
Maybe LiterateStyle -> String -> Grammar Token e ty -> Either (ParseError Token) ty
runParser lit = runParserTo lit (const False)
runParser lit = runParserTo lit (pred $ const False)
export covering
parseFile : (fn : String) -> Rule ty -> IO (Either (ParseError Token) ty)

View File

@ -1,5 +1,6 @@
module Parser.Support
import public Libraries.Text.Lexer.Tokenizer
import public Libraries.Text.Lexer
import public Libraries.Text.Parser
import Libraries.Text.PrettyPrint.Prettyprinter
@ -17,7 +18,7 @@ import System.File
public export
data ParseError tok
= ParseFail String (Maybe (Int, Int)) (List tok)
| LexFail (Int, Int, String)
| LexFail (StopReason, Int, Int, String)
| FileFail FileError
| LitFail LiterateError
@ -26,8 +27,8 @@ Show tok => Show (ParseError tok) where
show (ParseFail err loc toks)
= "Parse error: " ++ err ++ " (next tokens: "
++ show (take 10 toks) ++ ")"
show (LexFail (c, l, str))
= "Lex error at " ++ show (c, l) ++ " input: " ++ str
show (LexFail (reason, c, l, str))
= "Lex error " ++ show reason ++ " at " ++ show (c, l) ++ " input: " ++ str
show (FileFail err)
= "File error: " ++ show err
show (LitFail (MkLitErr l c str))
@ -42,8 +43,8 @@ Pretty tok => Pretty (ParseError tok) where
prettyLine : Maybe (Int, Int) -> Doc ann
prettyLine Nothing = emptyDoc
prettyLine (Just (r, c)) = space <+> "at" <++> "line" <++> pretty (r + 1) <+> ":" <+> pretty (c + 1)
pretty (LexFail (c, l, str))
= reflow "Lex error at" <++> pretty (c, l) <++> pretty "input:" <++> pretty str
pretty (LexFail (reason, c, l, str))
= reflow "Lex error" <++> pretty reason <++> "at" <++> pretty (c, l) <++> pretty "input:" <++> pretty str
pretty (FileFail err)
= reflow "File error:" <++> pretty (show err)
pretty (LitFail (MkLitErr l c str))

View File

@ -12,11 +12,30 @@ Lambda.idr:62:3--88:9
67 | (Func
1/1: Building Bad1 (Bad1.idr)
Error: Parse error at line 3:1:
Couldn't parse declaration (next tokens: [data, identifier Bad, symbol =, identifier BadDCon, symbol (, identifier x, symbol :, identifier Nat, symbol ), end of input])
Error: Couldn't parse declaration.
Bad1.idr:3:1--3:2
1 | module Bad1
2 |
3 | data Bad = BadDCon (x : Nat)
^
1/1: Building Bad2 (Bad2.idr)
Error: Parse error at line 3:13:
Expected 'case', 'if', 'do', application or operator expression (next tokens: [symbol (, identifier whatever, symbol :, identifier Int, symbol ), end of input])
Error: Expected 'case', 'if', 'do', application or operator expression.
Bad2.idr:3:13--3:14
1 | module Bad2
2 |
3 | badReturn : (whatever : Int)
^
1/1: Building Bad3 (Bad3.idr)
Error: Parse error at line 4:1:
Couldn't parse declaration (next tokens: [identifier badExpr, symbol (, identifier whatever, symbol :, symbol (, symbol ), symbol ), end of input])
Error: Couldn't parse declaration.
Bad3.idr:4:1--4:2
1 | module Bad3
2 |
3 | badExpr : ()
4 | badExpr (whatever : ())
^

View File

@ -1,3 +1,10 @@
1/1: Building PError (PError.idr)
Error: Parse error at line 5:1:
Expected ')' (next tokens: [identifier bar, identifier x, symbol =, let, identifier y, symbol =, literal 42, in, identifier y, symbol +])
Error: Bracket is not properly closed.
PError.idr:4:7--4:8
1 | foo : Int -> Int
2 | foo x = x
3 |
4 | bar : (Int -> Int
^

View File

@ -1,3 +1,11 @@
1/1: Building PError (PError.idr)
Error: Parse error at line 7:1:
Expected ')' (next tokens: [identifier baz, symbol :, identifier Int, symbol ->, identifier Int, identifier baz, identifier x, symbol =, identifier x, end of input])
Error: Bracket is not properly closed.
PError.idr:5:23--5:24
1 | foo : Int -> Int
2 | foo x = x
3 |
4 | bar : Int -> Int
5 | bar x = let y = 42 in (y + x
^

View File

@ -1,6 +1,22 @@
1/1: Building PError (PError.idr)
Error: Parse error at line 5:9:
Expected 'case', 'if', 'do', application or operator expression (next tokens: [let, identifier y, symbol =, symbol (, literal 42, in, identifier y, symbol +, identifier x, identifier baz])
Error: Bracket is not properly closed.
PError.idr:5:17--5:18
1 | foo : Int -> Int
2 | foo x = x
3 |
4 | bar : Int -> Int
5 | bar x = let y = (42 in y + x
^
1/1: Building PError2 (PError2.idr)
Error: Parse error at line 5:9:
Expected 'case', 'if', 'do', application or operator expression (next tokens: [let, identifier y, symbol :=, symbol (, literal 42, in, identifier y, symbol +, identifier x, identifier baz])
Error: Bracket is not properly closed.
PError2.idr:5:18--5:19
1 | foo : Int -> Int
2 | foo x = x
3 |
4 | bar : Int -> Int
5 | bar x = let y := (42 in y + x
^

View File

@ -1,3 +1,10 @@
1/1: Building PError (PError.idr)
Error: Parse error at line 4:33:
Wrong number of 'with' arguments (next tokens: [symbol =, identifier x, identifier foo, identifier x, identifier y, symbol |, identifier True, symbol |, identifier True, symbol =])
Error: Wrong number of 'with' arguments.
PError.idr:4:33--4:34
1 | foo : Nat -> Nat -> Bool
2 | foo x y with (x == y)
3 | foo x y | True with (y == x)
4 | foo x y | True | False | 10 = x
^

View File

@ -1,3 +1,11 @@
1/1: Building PError (PError.idr)
Error: Parse error at line 7:1:
Expected 'in' (next tokens: [identifier baz, symbol :, identifier Int, symbol ->, identifier Int, identifier baz, identifier x, symbol =, identifier x, end of input])
Error: Expected 'in'.
PError.idr:7:1--7:2
3 |
4 | bar : Int -> Int
5 | bar x = let y = 42
6 |
7 | baz : Int -> Int
^

View File

@ -1,3 +1,11 @@
1/1: Building PError (PError.idr)
Error: Parse error at line 7:1:
Expected 'else' (next tokens: [identifier baz, symbol :, identifier Int, symbol ->, identifier Int, identifier baz, identifier x, symbol =, identifier x, end of input])
Error: Expected 'else'.
PError.idr:7:1--7:2
3 |
4 | bar : Int -> Int
5 | bar x = if x == 95 then 0
6 |
7 | baz : Int -> Int
^