mirror of
https://github.com/idris-lang/Idris2.git
synced 2025-01-03 09:12:03 +03:00
Merge pull request #1034 from andylokandy/lexer
lexer: add a language-composable tokenizer
This commit is contained in:
commit
8c7a08f282
@ -118,6 +118,7 @@ modules =
|
||||
Libraries.Text.Bounded,
|
||||
Libraries.Text.Lexer,
|
||||
Libraries.Text.Lexer.Core,
|
||||
Libraries.Text.Lexer.Tokenizer,
|
||||
Libraries.Text.Literate,
|
||||
Libraries.Text.Parser,
|
||||
Libraries.Text.Parser.Core,
|
||||
|
@ -133,8 +133,10 @@ data Error : Type where
|
||||
GenericMsg : FC -> String -> Error
|
||||
TTCError : TTCErrorMsg -> Error
|
||||
FileErr : String -> FileError -> Error
|
||||
LitFail : FC -> Error
|
||||
LexFail : FC -> String -> Error
|
||||
ParseFail : (Show token, Pretty token) =>
|
||||
FC -> ParseError token -> Error
|
||||
FC -> String -> List token -> Error
|
||||
ModuleNotFound : FC -> ModuleIdent -> Error
|
||||
CyclicImports : List ModuleIdent -> Error
|
||||
ForceNeeded : Error
|
||||
@ -295,7 +297,9 @@ Show Error where
|
||||
show (GenericMsg fc str) = show fc ++ ":" ++ str
|
||||
show (TTCError msg) = "Error in TTC file: " ++ show msg
|
||||
show (FileErr fname err) = "File error (" ++ fname ++ "): " ++ show err
|
||||
show (ParseFail fc err) = "Parse error (" ++ show err ++ ")"
|
||||
show (LitFail fc) = show fc ++ ":Can't parse literate"
|
||||
show (LexFail fc err) = show fc ++ ":Lexer error (" ++ show err ++ ")"
|
||||
show (ParseFail fc err toks) = "Parse error (" ++ show err ++ "): " ++ show toks
|
||||
show (ModuleNotFound fc ns)
|
||||
= show fc ++ ":" ++ show ns ++ " not found"
|
||||
show (CyclicImports ns)
|
||||
@ -373,7 +377,9 @@ getErrorLoc (BadRunElab loc _ _) = Just loc
|
||||
getErrorLoc (GenericMsg loc _) = Just loc
|
||||
getErrorLoc (TTCError _) = Nothing
|
||||
getErrorLoc (FileErr _ _) = Nothing
|
||||
getErrorLoc (ParseFail loc _) = Just loc
|
||||
getErrorLoc (LitFail loc) = Just loc
|
||||
getErrorLoc (LexFail loc _) = Just loc
|
||||
getErrorLoc (ParseFail loc _ _) = Just loc
|
||||
getErrorLoc (ModuleNotFound loc _) = Just loc
|
||||
getErrorLoc (CyclicImports _) = Nothing
|
||||
getErrorLoc ForceNeeded = Nothing
|
||||
|
@ -406,8 +406,12 @@ perror (TTCError msg)
|
||||
<++> parens (pretty "the most likely case is that the ./build directory in your current project contains files from a previous build of idris2 or the idris2 executable is from a different build than the installed .ttc files")
|
||||
perror (FileErr fname err)
|
||||
= pure $ errorDesc (reflow "File error in" <++> pretty fname <++> colon) <++> pretty (show err)
|
||||
perror (ParseFail _ err)
|
||||
= pure $ pretty err
|
||||
perror (LitFail fc)
|
||||
= pure $ errorDesc (reflow "Can't parse literate.") <+> line <+> !(ploc fc)
|
||||
perror (LexFail fc msg)
|
||||
= pure $ errorDesc (pretty msg) <+> line <+> !(ploc fc)
|
||||
perror (ParseFail fc msg toks)
|
||||
= pure $ errorDesc (pretty msg) <+> line <+> !(ploc fc)
|
||||
perror (ModuleNotFound fc ns)
|
||||
= pure $ errorDesc ("Module" <++> annotate FileCtxt (pretty ns) <++> reflow "not found") <+> line <+> !(ploc fc)
|
||||
perror (CyclicImports ns)
|
||||
|
@ -30,7 +30,7 @@ ideTokens =
|
||||
[(digits, \x => IntegerLit (cast x)),
|
||||
(stringLit, \x => StringLit 0 (fromMaybe "" (escape 0 (stripQuotes x)))),
|
||||
(identAllowDashes, \x => Ident x),
|
||||
(space, Comment)]
|
||||
(space, (const Comment))]
|
||||
|
||||
idelex : String -> Either (Int, Int, String) (List (WithBounds Token))
|
||||
idelex str
|
||||
@ -43,7 +43,7 @@ idelex str
|
||||
where
|
||||
notComment : WithBounds Token -> Bool
|
||||
notComment t = case t.val of
|
||||
Comment _ => False
|
||||
Comment => False
|
||||
_ => True
|
||||
|
||||
covering
|
||||
@ -66,7 +66,7 @@ sexp
|
||||
|
||||
ideParser : {e : _} -> String -> Grammar Token e ty -> Either (ParseError Token) ty
|
||||
ideParser str p
|
||||
= do toks <- mapError LexFail $ idelex str
|
||||
= do toks <- mapError (\err => LexFail (NoRuleApply, err)) $ idelex str
|
||||
parsed <- mapError toGenericParsingError $ parse p toks
|
||||
Right (fst parsed)
|
||||
|
||||
|
@ -474,12 +474,6 @@ clean pkg opts -- `opts` is not used but might be in the future
|
||||
delete $ ttFile <.> "ttc"
|
||||
delete $ ttFile <.> "ttm"
|
||||
|
||||
getParseErrorLoc : String -> ParseError Token -> FC
|
||||
getParseErrorLoc fname (ParseFail _ (Just pos) _) = MkFC fname pos pos
|
||||
getParseErrorLoc fname (LexFail (l, c, _)) = MkFC fname (l, c) (l, c)
|
||||
getParseErrorLoc fname (LitFail _) = MkFC fname (0, 0) (0, 0) -- TODO: Remove this unused case
|
||||
getParseErrorLoc fname _ = replFC
|
||||
|
||||
-- Just load the given module, if it exists, which will involve building
|
||||
-- it if necessary
|
||||
runRepl : {auto c : Ref Ctxt Defs} ->
|
||||
@ -506,8 +500,7 @@ parsePkgFile file = do
|
||||
(do desc <- parsePkgDesc file
|
||||
eoi
|
||||
pure desc)
|
||||
| Left (FileFail err) => throw (FileErr file err)
|
||||
| Left err => throw (ParseFail (getParseErrorLoc file err) err)
|
||||
| Left err => throw (fromParseError file err)
|
||||
addFields fs (initPkgDesc pname)
|
||||
|
||||
processPackage : {auto c : Ref Ctxt Defs} ->
|
||||
@ -525,8 +518,7 @@ processPackage cmd file opts
|
||||
(do desc <- parsePkgDesc file
|
||||
eoi
|
||||
pure desc)
|
||||
| Left (FileFail err) => throw (FileErr file err)
|
||||
| Left err => throw (ParseFail (getParseErrorLoc file err) err)
|
||||
| Left err => throw (fromParseError file err)
|
||||
pkg <- addFields fs (initPkgDesc pname)
|
||||
maybe (pure ()) setBuildDir (builddir pkg)
|
||||
setOutputDir (outputdir pkg)
|
||||
@ -628,8 +620,7 @@ findIpkg fname
|
||||
(do desc <- parsePkgDesc ipkgn
|
||||
eoi
|
||||
pure desc)
|
||||
| Left (FileFail err) => throw (FileErr ipkgn err)
|
||||
| Left err => throw (ParseFail (getParseErrorLoc ipkgn err) err)
|
||||
| Left err => throw (fromParseError ipkgn err)
|
||||
pkg <- addFields fs (initPkgDesc pname)
|
||||
maybe (pure ()) setBuildDir (builddir pkg)
|
||||
setOutputDir (outputdir pkg)
|
||||
|
@ -390,7 +390,7 @@ mutual
|
||||
pure (PDotted (boundToFC fname b) b.val)
|
||||
<|> do b <- bounds (symbol "`(" *> expr pdef fname indents <* symbol ")")
|
||||
pure (PQuote (boundToFC fname b) b.val)
|
||||
<|> do b <- bounds (symbol "`{{" *> name <* symbol "}" <* symbol "}")
|
||||
<|> do b <- bounds (symbol "`{{" *> name <* symbol "}}")
|
||||
pure (PQuoteName (boundToFC fname b) b.val)
|
||||
<|> do b <- bounds (symbol "`[" *> nonEmptyBlock (topDecl fname) <* symbol "]")
|
||||
pure (PQuoteDecl (boundToFC fname b) (collectDefs (concat b.val)))
|
||||
|
@ -4,6 +4,7 @@ import Compiler.Inline
|
||||
|
||||
import Core.Binary
|
||||
import Core.Context
|
||||
import Core.Core
|
||||
import Core.Context.Log
|
||||
import Core.Directory
|
||||
import Core.Env
|
||||
@ -26,6 +27,7 @@ import Idris.Syntax
|
||||
import Idris.Pretty
|
||||
|
||||
import Data.List
|
||||
import Libraries.Data.String.Extra
|
||||
import Libraries.Data.NameMap
|
||||
|
||||
import System.File
|
||||
@ -187,11 +189,13 @@ modTime fname
|
||||
pure (cast t)
|
||||
|
||||
export
|
||||
getParseErrorLoc : String -> ParseError Token -> FC
|
||||
getParseErrorLoc fname (ParseFail _ (Just pos) _) = MkFC fname pos pos
|
||||
getParseErrorLoc fname (LexFail (l, c, _)) = MkFC fname (l, c) (l, c)
|
||||
getParseErrorLoc fname (LitFail (MkLitErr l c _)) = MkFC fname (l, 0) (l, 0)
|
||||
getParseErrorLoc fname _ = replFC
|
||||
fromParseError : (Show token, Pretty token) => String -> ParseError token -> Error
|
||||
fromParseError fname (FileFail err) = FileErr fname err
|
||||
fromParseError fname (LitFail (MkLitErr l c _)) = LitFail (MkFC fname (l, c) (l, c + 1))
|
||||
fromParseError fname (LexFail (ComposeNotClosing begin end, _, _, _)) = LexFail (MkFC fname begin end) "Bracket is not properly closed."
|
||||
fromParseError fname (LexFail (_, l, c, _)) = LexFail (MkFC fname (l, c) (l, c + 1)) "Can't recognoise token."
|
||||
fromParseError fname (ParseFail msg (Just (l, c)) toks) = ParseFail (MkFC fname (l, c) (l, c + 1)) (msg +> '.') toks
|
||||
fromParseError fname (ParseFail msg Nothing toks) = ParseFail replFC (msg +> '.') toks
|
||||
|
||||
export
|
||||
readHeader : {auto c : Ref Ctxt Defs} ->
|
||||
@ -199,17 +203,11 @@ readHeader : {auto c : Ref Ctxt Defs} ->
|
||||
readHeader path
|
||||
= do Right res <- coreLift (readFile path)
|
||||
| Left err => throw (FileErr path err)
|
||||
case runParserTo (isLitFile path) isColon res (progHdr path) of
|
||||
Left err => throw (ParseFail (getParseErrorLoc path err) err)
|
||||
-- Stop at the first :, that's definitely not part of the header, to
|
||||
-- save lexing the whole file unnecessarily
|
||||
case runParserTo (isLitFile path) (is ':') res (progHdr path) of
|
||||
Left err => throw (fromParseError path err)
|
||||
Right mod => pure mod
|
||||
where
|
||||
-- Stop at the first :, that's definitely not part of the header, to
|
||||
-- save lexing the whole file unnecessarily
|
||||
isColon : WithBounds Token -> Bool
|
||||
isColon t
|
||||
= case t.val of
|
||||
Symbol ":" => True
|
||||
_ => False
|
||||
|
||||
%foreign "scheme:collect"
|
||||
prim__gc : Int -> PrimIO ()
|
||||
@ -271,7 +269,7 @@ processMod srcf ttcf msg sourcecode
|
||||
do iputStrLn msg
|
||||
Right mod <- logTime ("++ Parsing " ++ srcf) $
|
||||
pure (runParser (isLitFile srcf) sourcecode (do p <- prog srcf; eoi; pure p))
|
||||
| Left err => pure (Just [ParseFail (getParseErrorLoc srcf err) err])
|
||||
| Left err => pure (Just [fromParseError srcf err])
|
||||
initHash
|
||||
traverse addPublicHash (sort hs)
|
||||
resetNextVar
|
||||
|
@ -126,6 +126,13 @@ mutual
|
||||
many : Lexer -> Recognise False
|
||||
many l = opt (some l)
|
||||
|
||||
||| Repeat the sub-lexer `l` one or more times until the lexer
|
||||
||| `stopBefore` is encountered. `stopBefore` will not be consumed.
|
||||
||| /((?!`stopBefore`)`l`)\+/
|
||||
export
|
||||
someUntil : (stopBefore : Recognise c) -> (l : Lexer) -> Lexer
|
||||
someUntil stopBefore l = some (reject stopBefore <+> l)
|
||||
|
||||
||| Repeat the sub-lexer `l` zero or more times until the lexer
|
||||
||| `stopBefore` is encountered. `stopBefore` will not be consumed.
|
||||
||| Not guaranteed to consume input.
|
||||
@ -313,18 +320,19 @@ export
|
||||
quote : (q : Lexer) -> (l : Lexer) -> Lexer
|
||||
quote q l = surround q q l
|
||||
|
||||
||| Recognise an escape character (often '\\') followed by a sub-lexer
|
||||
||| Recognise an escape sub-lexer (often '\\') followed by
|
||||
||| another sub-lexer
|
||||
||| /[`esc`]`l`/
|
||||
export
|
||||
escape : (esc : Char) -> Lexer -> Lexer
|
||||
escape esc l = is esc <+> l
|
||||
escape : (esc : Lexer) -> Lexer -> Lexer
|
||||
escape esc l = esc <+> l
|
||||
|
||||
||| Recognise a string literal, including escaped characters.
|
||||
||| (Note: doesn't yet handle escape sequences such as \123)
|
||||
||| /"(\\\\.|.)\*?"/
|
||||
export
|
||||
stringLit : Lexer
|
||||
stringLit = quote (is '"') (escape '\\' any <|> any)
|
||||
stringLit = quote (is '"') (escape (is '\\') any <|> any)
|
||||
|
||||
||| Recognise a character literal, including escaped characters.
|
||||
||| (Note: doesn't yet handle escape sequences such as \123)
|
||||
@ -332,7 +340,7 @@ stringLit = quote (is '"') (escape '\\' any <|> any)
|
||||
export
|
||||
charLit : Lexer
|
||||
charLit = let q = '\'' in
|
||||
is q <+> (escape '\\' (control <|> any) <|> isNot q) <+> is q
|
||||
is q <+> (escape (is '\\') (control <|> any) <|> isNot q) <+> is q
|
||||
where
|
||||
lexStr : List String -> Lexer
|
||||
lexStr [] = fail
|
||||
|
@ -97,6 +97,7 @@ strTail start (MkStrLen str len)
|
||||
|
||||
-- If the string is recognised, returns the index at which the token
|
||||
-- ends
|
||||
export
|
||||
scan : Recognise c -> List Char -> List Char -> Maybe (List Char, List Char)
|
||||
scan Empty tok str = pure (tok, str)
|
||||
scan Fail tok str = Nothing
|
||||
|
152
src/Libraries/Text/Lexer/Tokenizer.idr
Normal file
152
src/Libraries/Text/Lexer/Tokenizer.idr
Normal file
@ -0,0 +1,152 @@
|
||||
module Libraries.Text.Lexer.Tokenizer
|
||||
|
||||
import public Libraries.Control.Delayed
|
||||
import public Libraries.Text.Bounded
|
||||
import Libraries.Data.Bool.Extra
|
||||
import Libraries.Data.String.Extra
|
||||
import Libraries.Text.Lexer.Core
|
||||
import Libraries.Text.Lexer
|
||||
import Libraries.Text.PrettyPrint.Prettyprinter
|
||||
import Libraries.Text.PrettyPrint.Prettyprinter.Util
|
||||
import Data.List
|
||||
import Data.Either
|
||||
import Data.Nat
|
||||
import Data.Strings
|
||||
|
||||
%default total
|
||||
|
||||
||| Description of a language's tokenization rule.
|
||||
export
|
||||
data Tokenizer : (tokenType : Type) -> Type where
|
||||
Match : Lexer -> (String -> tokenType) -> Tokenizer tokenType
|
||||
Compose : (begin : Lexer) ->
|
||||
(mapBegin : String -> tokenType) ->
|
||||
(tagger : String -> tag) ->
|
||||
(middle : Inf (tag -> Tokenizer tokenType)) ->
|
||||
(end : tag -> Lexer) ->
|
||||
(mapEnd : String -> tokenType) ->
|
||||
Tokenizer tokenType
|
||||
Alt : Tokenizer tokenType -> Lazy (Tokenizer tokenType) -> Tokenizer tokenType
|
||||
|
||||
||| Alternative tokenizer rules.
|
||||
export
|
||||
(<|>) : Tokenizer t -> Lazy (Tokenizer t) -> Tokenizer t
|
||||
(<|>) = Alt
|
||||
|
||||
||| Match on a recogniser and cast the string to a token.
|
||||
export
|
||||
match : Lexer -> (String -> a) -> Tokenizer a
|
||||
match = Match
|
||||
|
||||
||| Compose other tokenizer. Language composition should be quoted between
|
||||
||| a begin lexer and a end lexer. The begin token can be used to generate
|
||||
||| the composition tokenizer and the end lexer.
|
||||
export
|
||||
compose : (begin : Lexer) ->
|
||||
(mapBegin : String -> a) ->
|
||||
(tagger : String -> tag) ->
|
||||
(middle : Inf (tag -> Tokenizer a)) ->
|
||||
(end : tag -> Lexer) ->
|
||||
(mapEnd : String -> a) ->
|
||||
Tokenizer a
|
||||
compose = Compose
|
||||
|
||||
||| Stop reason why tokenizer can't make more progress.
|
||||
||| @ ComposeNotClosing carries the span of composition begin token in the
|
||||
||| form of `(startLine, startCol), (endLine, endCol)`.
|
||||
public export
|
||||
data StopReason = EndInput | NoRuleApply | ComposeNotClosing (Int, Int) (Int, Int)
|
||||
|
||||
export
|
||||
Show StopReason where
|
||||
show EndInput = "EndInput"
|
||||
show NoRuleApply = "NoRuleApply"
|
||||
show (ComposeNotClosing start end) = "ComposeNotClosing " ++ show start ++ " " ++ show end
|
||||
|
||||
export
|
||||
Pretty StopReason where
|
||||
pretty EndInput = pretty "EndInput"
|
||||
pretty NoRuleApply = pretty "NoRuleApply"
|
||||
pretty (ComposeNotClosing start end) = "ComposeNotClosing" <++> pretty start <++> pretty end
|
||||
|
||||
tokenise : Lexer ->
|
||||
Tokenizer a ->
|
||||
(line, col : Int) -> List (WithBounds a) ->
|
||||
List Char ->
|
||||
(List (WithBounds a), (StopReason, Int, Int, List Char))
|
||||
tokenise reject tokenizer line col acc [] = (reverse acc, EndInput, (line, col, []))
|
||||
tokenise reject tokenizer line col acc str
|
||||
= case scan reject [] str of
|
||||
Just _ => (reverse acc, (EndInput, line, col, str))
|
||||
Nothing => case getFirstMatch tokenizer str of
|
||||
Right (toks, line', col', rest) =>
|
||||
-- assert total because getFirstMatch must consume something
|
||||
assert_total (tokenise reject tokenizer line' col' (toks ++ acc) rest)
|
||||
Left reason => (reverse acc, reason, (line, col, str))
|
||||
where
|
||||
countNLs : List Char -> Nat
|
||||
countNLs str = List.length (filter (== '\n') str)
|
||||
|
||||
getCols : List Char -> Int -> Int
|
||||
getCols x c
|
||||
= case span (/= '\n') x of
|
||||
(incol, []) => c + cast (length incol)
|
||||
(incol, _) => cast (length incol)
|
||||
|
||||
-- get the next lexeme using the `Lexer` in argument, its position and the input
|
||||
-- Returns the new position, the lexeme parsed and the rest of the input
|
||||
-- If parsing the lexer fails, this returns `Nothing`
|
||||
getNext : (lexer : Lexer) -> (line, col : Int) -> (input : List Char) -> Maybe (String, Int, Int, List Char)
|
||||
getNext lexer line col str =
|
||||
let Just (token, rest) = scan lexer [] str
|
||||
| _ => Nothing
|
||||
line' = line + cast (countNLs token)
|
||||
col' = getCols token col
|
||||
tokenStr = fastPack $ reverse token
|
||||
in pure (tokenStr, line', col', rest)
|
||||
|
||||
getFirstMatch : Tokenizer a -> List Char ->
|
||||
Either StopReason (List (WithBounds a), Int, Int, List Char)
|
||||
getFirstMatch (Match lex fn) str
|
||||
= let Just (tok, line', col', rest) = getNext lex line col str
|
||||
| _ => Left NoRuleApply
|
||||
tok' = MkBounded (fn tok) False line col line' col'
|
||||
in Right ([tok'], line', col', rest)
|
||||
getFirstMatch (Compose begin mapBegin tagger middleFn endFn mapEnd) str
|
||||
= let Just (beginTok', line', col' , rest) = getNext begin line col str
|
||||
| Nothing => Left NoRuleApply
|
||||
tag = tagger beginTok'
|
||||
middle = middleFn tag
|
||||
end = endFn tag
|
||||
beginTok'' = MkBounded (mapBegin beginTok') False line col line' col'
|
||||
(midToks, (reason, line'', col'', rest'')) =
|
||||
tokenise end middle line' col' [] rest
|
||||
in case reason of
|
||||
(ComposeNotClosing _ _) => Left reason
|
||||
_ => let Just (endTok', lineEnd, colEnd, restEnd) =
|
||||
getNext end line'' col'' rest''
|
||||
| _ => Left $ ComposeNotClosing (line, col) (line', col')
|
||||
endTok'' = MkBounded (mapEnd endTok') False line'' col'' lineEnd colEnd
|
||||
in Right ([endTok''] ++ reverse midToks ++ [beginTok''], lineEnd, colEnd, restEnd)
|
||||
getFirstMatch (Alt t1 t2) str
|
||||
= case getFirstMatch t1 str of
|
||||
Right result => Right result
|
||||
Left reason@(ComposeNotClosing _ _) => Left reason
|
||||
Left _ => getFirstMatch t2 str
|
||||
|
||||
export
|
||||
lexTo : Lexer ->
|
||||
Tokenizer a ->
|
||||
String ->
|
||||
(List (WithBounds a), (StopReason, Int, Int, String))
|
||||
lexTo reject tokenizer str
|
||||
= let (ts, reason, (l, c, str')) =
|
||||
tokenise reject tokenizer 0 0 [] (fastUnpack str) in
|
||||
(ts, reason, (l, c, fastPack str'))
|
||||
|
||||
||| Given a tokenizer and an input string, return a list of recognised tokens,
|
||||
||| and the line, column, and remainder of the input at the first point in the string
|
||||
||| where there are no recognised tokens.
|
||||
export
|
||||
lex : Tokenizer a -> String -> (List (WithBounds a), (StopReason, Int, Int, String))
|
||||
lex tokenizer str = lexTo (pred $ const False) tokenizer str
|
@ -8,6 +8,7 @@ import Data.Maybe
|
||||
import Data.Strings
|
||||
import Libraries.Data.String.Extra
|
||||
import public Libraries.Text.Bounded
|
||||
import Libraries.Text.Lexer.Tokenizer
|
||||
import Libraries.Text.PrettyPrint.Prettyprinter
|
||||
import Libraries.Text.PrettyPrint.Prettyprinter.Util
|
||||
|
||||
@ -33,7 +34,7 @@ data Token
|
||||
| DotIdent String -- .ident
|
||||
| Symbol String
|
||||
-- Comments
|
||||
| Comment String
|
||||
| Comment
|
||||
| DocComment String
|
||||
-- Special
|
||||
| CGDirective String
|
||||
@ -56,7 +57,7 @@ Show Token where
|
||||
show (DotIdent x) = "dot+identifier " ++ x
|
||||
show (Symbol x) = "symbol " ++ x
|
||||
-- Comments
|
||||
show (Comment _) = "comment"
|
||||
show Comment = "comment"
|
||||
show (DocComment c) = "doc comment: \"" ++ c ++ "\""
|
||||
-- Special
|
||||
show (CGDirective x) = "CGDirective " ++ x
|
||||
@ -79,7 +80,7 @@ Pretty Token where
|
||||
pretty (DotIdent x) = pretty "dot+identifier" <++> pretty x
|
||||
pretty (Symbol x) = pretty "symbol" <++> pretty x
|
||||
-- Comments
|
||||
pretty (Comment _) = pretty "comment"
|
||||
pretty Comment = pretty "comment"
|
||||
pretty (DocComment c) = reflow "doc comment:" <++> dquotes (pretty c)
|
||||
-- Special
|
||||
pretty (CGDirective x) = pretty "CGDirective" <++> pretty x
|
||||
@ -190,16 +191,27 @@ keywords = ["data", "module", "where", "let", "in", "do", "record",
|
||||
special : List String
|
||||
special = ["%lam", "%pi", "%imppi", "%let"]
|
||||
|
||||
-- Special symbols - things which can't be a prefix of another symbol, and
|
||||
-- don't match 'validSymbol'
|
||||
export
|
||||
symbols : List String
|
||||
symbols
|
||||
= [".(", -- for things such as Foo.Bar.(+)
|
||||
"@{",
|
||||
"[|", "|]",
|
||||
"(", ")", "{", "}", "[", "]", ",", ";", "_",
|
||||
"`(", "`{{", "`[", "`"]
|
||||
symbols = [",", ";", "_", "`"]
|
||||
|
||||
export
|
||||
groupSymbols : List String
|
||||
groupSymbols = [".(", -- for things such as Foo.Bar.(+)
|
||||
"@{", "[|", "(", "{", "[", "`(", "`{{", "`["]
|
||||
|
||||
export
|
||||
groupClose : String -> String
|
||||
groupClose ".(" = ")"
|
||||
groupClose "@{" = "}"
|
||||
groupClose "[|" = "|]"
|
||||
groupClose "(" = ")"
|
||||
groupClose "[" = "]"
|
||||
groupClose "{" = "}"
|
||||
groupClose "`(" = ")"
|
||||
groupClose "`{{" = "}}"
|
||||
groupClose "`[" = "]"
|
||||
groupClose _ = ""
|
||||
|
||||
export
|
||||
isOpChar : Char -> Bool
|
||||
@ -212,7 +224,7 @@ validSymbol = some (pred isOpChar)
|
||||
export
|
||||
reservedSymbols : List String
|
||||
reservedSymbols
|
||||
= symbols ++
|
||||
= symbols ++ groupSymbols ++ (groupClose <$> groupSymbols) ++
|
||||
["%", "\\", ":", "=", ":=", "|", "|||", "<-", "->", "=>", "?", "!",
|
||||
"&", "**", "..", "~"]
|
||||
|
||||
@ -246,31 +258,32 @@ fromOctLit str
|
||||
fromMaybe 0 (fromOct (reverse num))
|
||||
-- ^-- can't happen if the literal lexed correctly
|
||||
|
||||
rawTokens : TokenMap Token
|
||||
rawTokens : Tokenizer Token
|
||||
rawTokens =
|
||||
[(comment, Comment),
|
||||
(blockComment, Comment),
|
||||
(docComment, DocComment . drop 3),
|
||||
(cgDirective, mkDirective),
|
||||
(holeIdent, \x => HoleIdent (assert_total (strTail x)))] ++
|
||||
map (\x => (exact x, Symbol)) symbols ++
|
||||
[(doubleLit, \x => DoubleLit (cast x)),
|
||||
(binLit, \x => IntegerLit (fromBinLit x)),
|
||||
(hexLit, \x => IntegerLit (fromHexLit x)),
|
||||
(octLit, \x => IntegerLit (fromOctLit x)),
|
||||
(digits, \x => IntegerLit (cast x)),
|
||||
(stringLit, \x => StringLit 0 (stripQuotes x)),
|
||||
(stringLit1, \x => StringLit 1 (stripSurrounds 2 2 x)),
|
||||
(stringLit2, \x => StringLit 2 (stripSurrounds 3 3 x)),
|
||||
(stringLit3, \x => StringLit 3 (stripSurrounds 4 4 x)),
|
||||
(charLit, \x => CharLit (stripQuotes x)),
|
||||
(dotIdent, \x => DotIdent (assert_total $ strTail x)),
|
||||
(namespacedIdent, parseNamespace),
|
||||
(identNormal, parseIdent),
|
||||
(pragma, \x => Pragma (assert_total $ strTail x)),
|
||||
(space, Comment),
|
||||
(validSymbol, Symbol),
|
||||
(symbol, Unrecognised)]
|
||||
match comment (const Comment)
|
||||
<|> match blockComment (const Comment)
|
||||
<|> match docComment (DocComment . drop 3)
|
||||
<|> match cgDirective mkDirective
|
||||
<|> match holeIdent (\x => HoleIdent (assert_total (strTail x)))
|
||||
<|> compose (choice $ exact <$> groupSymbols) Symbol id (\_ => rawTokens) (exact . groupClose) Symbol
|
||||
<|> match (choice $ exact <$> symbols) Symbol
|
||||
<|> match doubleLit (\x => DoubleLit (cast x))
|
||||
<|> match binLit (\x => IntegerLit (fromBinLit x))
|
||||
<|> match hexLit (\x => IntegerLit (fromHexLit x))
|
||||
<|> match octLit (\x => IntegerLit (fromOctLit x))
|
||||
<|> match digits (\x => IntegerLit (cast x))
|
||||
<|> match stringLit (\x => StringLit 0 (stripQuotes x))
|
||||
<|> match stringLit1 (\x => StringLit 1 (stripSurrounds 2 2 x))
|
||||
<|> match stringLit2 (\x => StringLit 2 (stripSurrounds 3 3 x))
|
||||
<|> match stringLit3 (\x => StringLit 3 (stripSurrounds 4 4 x))
|
||||
<|> match charLit (\x => CharLit (stripQuotes x))
|
||||
<|> match dotIdent (\x => DotIdent (assert_total $ strTail x))
|
||||
<|> match namespacedIdent parseNamespace
|
||||
<|> match identNormal parseIdent
|
||||
<|> match pragma (\x => Pragma (assert_total $ strTail x))
|
||||
<|> match space (const Comment)
|
||||
<|> match validSymbol Symbol
|
||||
<|> match symbol Unrecognised
|
||||
where
|
||||
parseIdent : String -> Token
|
||||
parseIdent x = if x `elem` keywords then Keyword x
|
||||
@ -281,21 +294,21 @@ rawTokens =
|
||||
(Just ns, n) => DotSepIdent ns n
|
||||
|
||||
export
|
||||
lexTo : (WithBounds Token -> Bool) ->
|
||||
String -> Either (Int, Int, String) (List (WithBounds Token))
|
||||
lexTo pred str
|
||||
= case lexTo pred rawTokens str of
|
||||
lexTo : Lexer ->
|
||||
String -> Either (StopReason, Int, Int, String) (List (WithBounds Token))
|
||||
lexTo reject str
|
||||
= case lexTo reject rawTokens str of
|
||||
-- Add the EndInput token so that we'll have a line and column
|
||||
-- number to read when storing spans in the file
|
||||
(tok, (l, c, "")) => Right (filter notComment tok ++
|
||||
(tok, (EndInput, l, c, _)) => Right (filter notComment tok ++
|
||||
[MkBounded EndInput False l c l c])
|
||||
(_, fail) => Left fail
|
||||
where
|
||||
notComment : WithBounds Token -> Bool
|
||||
notComment t = case t.val of
|
||||
Comment _ => False
|
||||
Comment => False
|
||||
_ => True
|
||||
|
||||
export
|
||||
lex : String -> Either (Int, Int, String) (List (WithBounds Token))
|
||||
lex = lexTo (const False)
|
||||
lex : String -> Either (StopReason, Int, Int, String) (List (WithBounds Token))
|
||||
lex = lexTo (pred $ const False)
|
||||
|
@ -14,7 +14,7 @@ import Libraries.Utils.Either
|
||||
export
|
||||
runParser : String -> Rule ty -> Either (ParseError Token) ty
|
||||
runParser str p
|
||||
= do toks <- mapError LexFail $ lex str
|
||||
= do toks <- mapError (\err => LexFail (NoRuleApply, err)) $ lex str
|
||||
parsed <- mapError toGenericParsingError $ parse p toks
|
||||
Right (fst parsed)
|
||||
|
||||
|
@ -11,18 +11,18 @@ import Libraries.Utils.Either
|
||||
|
||||
export
|
||||
runParserTo : {e : _} ->
|
||||
Maybe LiterateStyle -> (WithBounds Token -> Bool) ->
|
||||
Maybe LiterateStyle -> Lexer ->
|
||||
String -> Grammar Token e ty -> Either (ParseError Token) ty
|
||||
runParserTo lit pred str p
|
||||
runParserTo lit reject str p
|
||||
= do str <- mapError LitFail $ unlit lit str
|
||||
toks <- mapError LexFail $ lexTo pred str
|
||||
toks <- mapError LexFail $ lexTo reject str
|
||||
parsed <- mapError toGenericParsingError $ parse p toks
|
||||
Right (fst parsed)
|
||||
|
||||
export
|
||||
runParser : {e : _} ->
|
||||
Maybe LiterateStyle -> String -> Grammar Token e ty -> Either (ParseError Token) ty
|
||||
runParser lit = runParserTo lit (const False)
|
||||
runParser lit = runParserTo lit (pred $ const False)
|
||||
|
||||
export covering
|
||||
parseFile : (fn : String) -> Rule ty -> IO (Either (ParseError Token) ty)
|
||||
|
@ -1,5 +1,6 @@
|
||||
module Parser.Support
|
||||
|
||||
import public Libraries.Text.Lexer.Tokenizer
|
||||
import public Libraries.Text.Lexer
|
||||
import public Libraries.Text.Parser
|
||||
import Libraries.Text.PrettyPrint.Prettyprinter
|
||||
@ -17,7 +18,7 @@ import System.File
|
||||
public export
|
||||
data ParseError tok
|
||||
= ParseFail String (Maybe (Int, Int)) (List tok)
|
||||
| LexFail (Int, Int, String)
|
||||
| LexFail (StopReason, Int, Int, String)
|
||||
| FileFail FileError
|
||||
| LitFail LiterateError
|
||||
|
||||
@ -26,8 +27,8 @@ Show tok => Show (ParseError tok) where
|
||||
show (ParseFail err loc toks)
|
||||
= "Parse error: " ++ err ++ " (next tokens: "
|
||||
++ show (take 10 toks) ++ ")"
|
||||
show (LexFail (c, l, str))
|
||||
= "Lex error at " ++ show (c, l) ++ " input: " ++ str
|
||||
show (LexFail (reason, c, l, str))
|
||||
= "Lex error " ++ show reason ++ " at " ++ show (c, l) ++ " input: " ++ str
|
||||
show (FileFail err)
|
||||
= "File error: " ++ show err
|
||||
show (LitFail (MkLitErr l c str))
|
||||
@ -42,8 +43,8 @@ Pretty tok => Pretty (ParseError tok) where
|
||||
prettyLine : Maybe (Int, Int) -> Doc ann
|
||||
prettyLine Nothing = emptyDoc
|
||||
prettyLine (Just (r, c)) = space <+> "at" <++> "line" <++> pretty (r + 1) <+> ":" <+> pretty (c + 1)
|
||||
pretty (LexFail (c, l, str))
|
||||
= reflow "Lex error at" <++> pretty (c, l) <++> pretty "input:" <++> pretty str
|
||||
pretty (LexFail (reason, c, l, str))
|
||||
= reflow "Lex error" <++> pretty reason <++> "at" <++> pretty (c, l) <++> pretty "input:" <++> pretty str
|
||||
pretty (FileFail err)
|
||||
= reflow "File error:" <++> pretty (show err)
|
||||
pretty (LitFail (MkLitErr l c str))
|
||||
|
@ -12,11 +12,30 @@ Lambda.idr:62:3--88:9
|
||||
67 | (Func
|
||||
|
||||
1/1: Building Bad1 (Bad1.idr)
|
||||
Error: Parse error at line 3:1:
|
||||
Couldn't parse declaration (next tokens: [data, identifier Bad, symbol =, identifier BadDCon, symbol (, identifier x, symbol :, identifier Nat, symbol ), end of input])
|
||||
Error: Couldn't parse declaration.
|
||||
|
||||
Bad1.idr:3:1--3:2
|
||||
1 | module Bad1
|
||||
2 |
|
||||
3 | data Bad = BadDCon (x : Nat)
|
||||
^
|
||||
|
||||
1/1: Building Bad2 (Bad2.idr)
|
||||
Error: Parse error at line 3:13:
|
||||
Expected 'case', 'if', 'do', application or operator expression (next tokens: [symbol (, identifier whatever, symbol :, identifier Int, symbol ), end of input])
|
||||
Error: Expected 'case', 'if', 'do', application or operator expression.
|
||||
|
||||
Bad2.idr:3:13--3:14
|
||||
1 | module Bad2
|
||||
2 |
|
||||
3 | badReturn : (whatever : Int)
|
||||
^
|
||||
|
||||
1/1: Building Bad3 (Bad3.idr)
|
||||
Error: Parse error at line 4:1:
|
||||
Couldn't parse declaration (next tokens: [identifier badExpr, symbol (, identifier whatever, symbol :, symbol (, symbol ), symbol ), end of input])
|
||||
Error: Couldn't parse declaration.
|
||||
|
||||
Bad3.idr:4:1--4:2
|
||||
1 | module Bad3
|
||||
2 |
|
||||
3 | badExpr : ()
|
||||
4 | badExpr (whatever : ())
|
||||
^
|
||||
|
||||
|
@ -1,3 +1,10 @@
|
||||
1/1: Building PError (PError.idr)
|
||||
Error: Parse error at line 5:1:
|
||||
Expected ')' (next tokens: [identifier bar, identifier x, symbol =, let, identifier y, symbol =, literal 42, in, identifier y, symbol +])
|
||||
Error: Bracket is not properly closed.
|
||||
|
||||
PError.idr:4:7--4:8
|
||||
1 | foo : Int -> Int
|
||||
2 | foo x = x
|
||||
3 |
|
||||
4 | bar : (Int -> Int
|
||||
^
|
||||
|
||||
|
@ -1,3 +1,11 @@
|
||||
1/1: Building PError (PError.idr)
|
||||
Error: Parse error at line 7:1:
|
||||
Expected ')' (next tokens: [identifier baz, symbol :, identifier Int, symbol ->, identifier Int, identifier baz, identifier x, symbol =, identifier x, end of input])
|
||||
Error: Bracket is not properly closed.
|
||||
|
||||
PError.idr:5:23--5:24
|
||||
1 | foo : Int -> Int
|
||||
2 | foo x = x
|
||||
3 |
|
||||
4 | bar : Int -> Int
|
||||
5 | bar x = let y = 42 in (y + x
|
||||
^
|
||||
|
||||
|
@ -1,6 +1,22 @@
|
||||
1/1: Building PError (PError.idr)
|
||||
Error: Parse error at line 5:9:
|
||||
Expected 'case', 'if', 'do', application or operator expression (next tokens: [let, identifier y, symbol =, symbol (, literal 42, in, identifier y, symbol +, identifier x, identifier baz])
|
||||
Error: Bracket is not properly closed.
|
||||
|
||||
PError.idr:5:17--5:18
|
||||
1 | foo : Int -> Int
|
||||
2 | foo x = x
|
||||
3 |
|
||||
4 | bar : Int -> Int
|
||||
5 | bar x = let y = (42 in y + x
|
||||
^
|
||||
|
||||
1/1: Building PError2 (PError2.idr)
|
||||
Error: Parse error at line 5:9:
|
||||
Expected 'case', 'if', 'do', application or operator expression (next tokens: [let, identifier y, symbol :=, symbol (, literal 42, in, identifier y, symbol +, identifier x, identifier baz])
|
||||
Error: Bracket is not properly closed.
|
||||
|
||||
PError2.idr:5:18--5:19
|
||||
1 | foo : Int -> Int
|
||||
2 | foo x = x
|
||||
3 |
|
||||
4 | bar : Int -> Int
|
||||
5 | bar x = let y := (42 in y + x
|
||||
^
|
||||
|
||||
|
@ -1,3 +1,10 @@
|
||||
1/1: Building PError (PError.idr)
|
||||
Error: Parse error at line 4:33:
|
||||
Wrong number of 'with' arguments (next tokens: [symbol =, identifier x, identifier foo, identifier x, identifier y, symbol |, identifier True, symbol |, identifier True, symbol =])
|
||||
Error: Wrong number of 'with' arguments.
|
||||
|
||||
PError.idr:4:33--4:34
|
||||
1 | foo : Nat -> Nat -> Bool
|
||||
2 | foo x y with (x == y)
|
||||
3 | foo x y | True with (y == x)
|
||||
4 | foo x y | True | False | 10 = x
|
||||
^
|
||||
|
||||
|
@ -1,3 +1,11 @@
|
||||
1/1: Building PError (PError.idr)
|
||||
Error: Parse error at line 7:1:
|
||||
Expected 'in' (next tokens: [identifier baz, symbol :, identifier Int, symbol ->, identifier Int, identifier baz, identifier x, symbol =, identifier x, end of input])
|
||||
Error: Expected 'in'.
|
||||
|
||||
PError.idr:7:1--7:2
|
||||
3 |
|
||||
4 | bar : Int -> Int
|
||||
5 | bar x = let y = 42
|
||||
6 |
|
||||
7 | baz : Int -> Int
|
||||
^
|
||||
|
||||
|
@ -1,3 +1,11 @@
|
||||
1/1: Building PError (PError.idr)
|
||||
Error: Parse error at line 7:1:
|
||||
Expected 'else' (next tokens: [identifier baz, symbol :, identifier Int, symbol ->, identifier Int, identifier baz, identifier x, symbol =, identifier x, end of input])
|
||||
Error: Expected 'else'.
|
||||
|
||||
PError.idr:7:1--7:2
|
||||
3 |
|
||||
4 | bar : Int -> Int
|
||||
5 | bar x = if x == 95 then 0
|
||||
6 |
|
||||
7 | baz : Int -> Int
|
||||
^
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user