mirror of
https://github.com/edwinb/Idris2-boot.git
synced 2024-12-22 12:21:30 +03:00
e6121e0935
This is the result of running the command: $ find . -name '*.idr' -type f -exec sed -i -E 's/\s+$//' {} + I confirmed before running it that this would not affect any markdown formatting in documentation comments.
562 lines
17 KiB
Idris
562 lines
17 KiB
Idris
module Parser.Support
|
|
|
|
import public Text.Lexer
|
|
import public Parser.Lexer
|
|
import public Text.Parser
|
|
|
|
import Core.TT
|
|
import Data.List.Views
|
|
|
|
%default total
|
|
|
|
public export
|
|
Rule : Type -> Type
|
|
Rule ty = Grammar (TokenData Token) True ty
|
|
|
|
public export
|
|
EmptyRule : Type -> Type
|
|
EmptyRule ty = Grammar (TokenData Token) False ty
|
|
|
|
public export
|
|
data ParseError = ParseFail String (Maybe (Int, Int)) (List Token)
|
|
| LexFail (Int, Int, String)
|
|
| FileFail FileError
|
|
|
|
export
|
|
Show ParseError where
|
|
show (ParseFail err loc toks)
|
|
= "Parse error: " ++ err ++ " (next tokens: "
|
|
++ show (take 10 toks) ++ ")"
|
|
show (LexFail (c, l, str))
|
|
= "Lex error at " ++ show (c, l) ++ " input: " ++ str
|
|
show (FileFail err)
|
|
= "File error: " ++ show err
|
|
|
|
export
|
|
eoi : EmptyRule ()
|
|
eoi
|
|
= do nextIs "Expected end of input" (isEOI . tok)
|
|
pure ()
|
|
where
|
|
isEOI : Token -> Bool
|
|
isEOI EndInput = True
|
|
isEOI _ = False
|
|
|
|
export
|
|
runParserTo : (TokenData Token -> Bool) ->
|
|
String -> Grammar (TokenData Token) e ty -> Either ParseError ty
|
|
runParserTo pred str p
|
|
= case lexTo pred str of
|
|
Left err => Left $ LexFail err
|
|
Right toks =>
|
|
case parse p toks of
|
|
Left (Error err []) =>
|
|
Left $ ParseFail err Nothing []
|
|
Left (Error err (t :: ts)) =>
|
|
Left $ ParseFail err (Just (line t, col t))
|
|
(map tok (t :: ts))
|
|
Right (val, _) => Right val
|
|
|
|
export
|
|
runParser : String -> Grammar (TokenData Token) e ty -> Either ParseError ty
|
|
runParser = runParserTo (const False)
|
|
|
|
export
|
|
parseFile : (fn : String) -> Rule ty -> IO (Either ParseError ty)
|
|
parseFile fn p
|
|
= do Right str <- readFile fn
|
|
| Left err => pure (Left (FileFail err))
|
|
pure (runParser str p)
|
|
|
|
|
|
-- Some basic parsers used by all the intermediate forms
|
|
|
|
export
|
|
location : EmptyRule (Int, Int)
|
|
location
|
|
= do tok <- peek
|
|
pure (line tok, col tok)
|
|
|
|
export
|
|
column : EmptyRule Int
|
|
column
|
|
= do (line, col) <- location
|
|
pure col
|
|
|
|
hex : Char -> Maybe Int
|
|
hex '0' = Just 0
|
|
hex '1' = Just 1
|
|
hex '2' = Just 2
|
|
hex '3' = Just 3
|
|
hex '4' = Just 4
|
|
hex '5' = Just 5
|
|
hex '6' = Just 6
|
|
hex '7' = Just 7
|
|
hex '8' = Just 8
|
|
hex '9' = Just 9
|
|
hex 'a' = Just 10
|
|
hex 'b' = Just 11
|
|
hex 'c' = Just 12
|
|
hex 'd' = Just 13
|
|
hex 'e' = Just 14
|
|
hex 'f' = Just 15
|
|
hex _ = Nothing
|
|
|
|
dec : Char -> Maybe Int
|
|
dec '0' = Just 0
|
|
dec '1' = Just 1
|
|
dec '2' = Just 2
|
|
dec '3' = Just 3
|
|
dec '4' = Just 4
|
|
dec '5' = Just 5
|
|
dec '6' = Just 6
|
|
dec '7' = Just 7
|
|
dec '8' = Just 8
|
|
dec '9' = Just 9
|
|
dec _ = Nothing
|
|
|
|
oct : Char -> Maybe Int
|
|
oct '0' = Just 0
|
|
oct '1' = Just 1
|
|
oct '2' = Just 2
|
|
oct '3' = Just 3
|
|
oct '4' = Just 4
|
|
oct '5' = Just 5
|
|
oct '6' = Just 6
|
|
oct '7' = Just 7
|
|
oct _ = Nothing
|
|
|
|
getEsc : String -> Maybe Char
|
|
getEsc "NUL" = Just '\NUL'
|
|
getEsc "SOH" = Just '\SOH'
|
|
getEsc "STX" = Just '\STX'
|
|
getEsc "ETX" = Just '\ETX'
|
|
getEsc "EOT" = Just '\EOT'
|
|
getEsc "ENQ" = Just '\ENQ'
|
|
getEsc "ACK" = Just '\ACK'
|
|
getEsc "BEL" = Just '\BEL'
|
|
getEsc "BS" = Just '\BS'
|
|
getEsc "HT" = Just '\HT'
|
|
getEsc "LF" = Just '\LF'
|
|
getEsc "VT" = Just '\VT'
|
|
getEsc "FF" = Just '\FF'
|
|
getEsc "CR" = Just '\CR'
|
|
getEsc "SO" = Just '\SO'
|
|
getEsc "SI" = Just '\SI'
|
|
getEsc "DLE" = Just '\DLE'
|
|
getEsc "DC1" = Just '\DC1'
|
|
getEsc "DC2" = Just '\DC2'
|
|
getEsc "DC3" = Just '\DC3'
|
|
getEsc "DC4" = Just '\DC4'
|
|
getEsc "NAK" = Just '\NAK'
|
|
getEsc "SYN" = Just '\SYN'
|
|
getEsc "ETB" = Just '\ETB'
|
|
getEsc "CAN" = Just '\CAN'
|
|
getEsc "EM" = Just '\EM'
|
|
getEsc "SUB" = Just '\SUB'
|
|
getEsc "ESC" = Just '\ESC'
|
|
getEsc "FS" = Just '\FS'
|
|
getEsc "GS" = Just '\GS'
|
|
getEsc "RS" = Just '\RS'
|
|
getEsc "US" = Just '\US'
|
|
getEsc "SP" = Just '\SP'
|
|
getEsc "DEL" = Just '\DEL'
|
|
getEsc str = Nothing
|
|
|
|
escape' : List Char -> Maybe (List Char)
|
|
escape' [] = pure []
|
|
escape' ('\\' :: '\\' :: xs) = pure $ '\\' :: !(escape' xs)
|
|
escape' ('\\' :: '&' :: xs) = pure !(escape' xs)
|
|
escape' ('\\' :: 'a' :: xs) = pure $ '\a' :: !(escape' xs)
|
|
escape' ('\\' :: 'b' :: xs) = pure $ '\b' :: !(escape' xs)
|
|
escape' ('\\' :: 'f' :: xs) = pure $ '\f' :: !(escape' xs)
|
|
escape' ('\\' :: 'n' :: xs) = pure $ '\n' :: !(escape' xs)
|
|
escape' ('\\' :: 'r' :: xs) = pure $ '\r' :: !(escape' xs)
|
|
escape' ('\\' :: 't' :: xs) = pure $ '\t' :: !(escape' xs)
|
|
escape' ('\\' :: 'v' :: xs) = pure $ '\v' :: !(escape' xs)
|
|
escape' ('\\' :: '\'' :: xs) = pure $ '\'' :: !(escape' xs)
|
|
escape' ('\\' :: '\"' :: xs) = pure $ '\"' :: !(escape' xs)
|
|
escape' ('\\' :: 'x' :: xs)
|
|
= case span isHexDigit xs of
|
|
([], rest) => assert_total (escape' rest)
|
|
(ds, rest) => pure $ cast !(toHex 1 (reverse ds)) ::
|
|
!(assert_total (escape' rest))
|
|
where
|
|
toHex : Int -> List Char -> Maybe Int
|
|
toHex _ [] = Just 0
|
|
toHex m (d :: ds)
|
|
= pure $ !(hex (toLower d)) * m + !(toHex (m*16) ds)
|
|
escape' ('\\' :: 'o' :: xs)
|
|
= case span isOctDigit xs of
|
|
([], rest) => assert_total (escape' rest)
|
|
(ds, rest) => pure $ cast !(toOct 1 (reverse ds)) ::
|
|
!(assert_total (escape' rest))
|
|
where
|
|
toOct : Int -> List Char -> Maybe Int
|
|
toOct _ [] = Just 0
|
|
toOct m (d :: ds)
|
|
= pure $ !(oct (toLower d)) * m + !(toOct (m*8) ds)
|
|
escape' ('\\' :: xs)
|
|
= case span isDigit xs of
|
|
([], (a :: b :: c :: rest)) =>
|
|
case getEsc (pack (the (List _) [a, b, c])) of
|
|
Just v => Just (v :: !(assert_total (escape' rest)))
|
|
Nothing => case getEsc (pack (the (List _) [a, b])) of
|
|
Just v => Just (v :: !(assert_total (escape' (c :: rest))))
|
|
Nothing => escape' xs
|
|
([], (a :: b :: [])) =>
|
|
case getEsc (pack (the (List _) [a, b])) of
|
|
Just v => Just (v :: [])
|
|
Nothing => escape' xs
|
|
([], rest) => assert_total (escape' rest)
|
|
(ds, rest) => Just $ cast (cast {to=Int} (pack ds)) ::
|
|
!(assert_total (escape' rest))
|
|
escape' (x :: xs) = Just $ x :: !(escape' xs)
|
|
|
|
escape : String -> Maybe String
|
|
escape x = pure $ pack !(escape' (unpack x))
|
|
|
|
getCharLit : String -> Maybe Char
|
|
getCharLit str
|
|
= do e <- escape str
|
|
if length e == 1
|
|
then Just (assert_total (strHead e))
|
|
else if length e == 0 -- parsed the NULL character that terminated the string!
|
|
then Just '\0000'
|
|
else Nothing
|
|
|
|
export
|
|
constant : Rule Constant
|
|
constant
|
|
= terminal "Expected constant"
|
|
(\x => case tok x of
|
|
Literal i => Just (BI i)
|
|
StrLit s => case escape s of
|
|
Nothing => Nothing
|
|
Just s' => Just (Str s')
|
|
CharLit c => case getCharLit c of
|
|
Nothing => Nothing
|
|
Just c' => Just (Ch c')
|
|
DoubleLit d => Just (Db d)
|
|
Keyword "Int" => Just IntType
|
|
Keyword "Integer" => Just IntegerType
|
|
Keyword "String" => Just StringType
|
|
Keyword "Char" => Just CharType
|
|
Keyword "Double" => Just DoubleType
|
|
_ => Nothing)
|
|
|
|
export
|
|
intLit : Rule Integer
|
|
intLit
|
|
= terminal "Expected integer literal"
|
|
(\x => case tok x of
|
|
Literal i => Just i
|
|
_ => Nothing)
|
|
|
|
export
|
|
strLit : Rule String
|
|
strLit
|
|
= terminal "Expected string literal"
|
|
(\x => case tok x of
|
|
StrLit s => Just s
|
|
_ => Nothing)
|
|
|
|
export
|
|
symbol : String -> Rule ()
|
|
symbol req
|
|
= terminal ("Expected '" ++ req ++ "'")
|
|
(\x => case tok x of
|
|
Symbol s => if s == req then Just ()
|
|
else Nothing
|
|
_ => Nothing)
|
|
|
|
export
|
|
keyword : String -> Rule ()
|
|
keyword req
|
|
= terminal ("Expected '" ++ req ++ "'")
|
|
(\x => case tok x of
|
|
Keyword s => if s == req then Just ()
|
|
else Nothing
|
|
_ => Nothing)
|
|
|
|
export
|
|
exactIdent : String -> Rule ()
|
|
exactIdent req
|
|
= terminal ("Expected " ++ req)
|
|
(\x => case tok x of
|
|
Ident s => if s == req then Just ()
|
|
else Nothing
|
|
_ => Nothing)
|
|
|
|
export
|
|
operator : Rule String
|
|
operator
|
|
= terminal "Expected operator"
|
|
(\x => case tok x of
|
|
Symbol s =>
|
|
if s `elem` reservedSymbols
|
|
then Nothing
|
|
else Just s
|
|
_ => Nothing)
|
|
|
|
identPart : Rule String
|
|
identPart
|
|
= terminal "Expected name"
|
|
(\x => case tok x of
|
|
Ident str => Just str
|
|
_ => Nothing)
|
|
|
|
export
|
|
namespace_ : Rule (List String)
|
|
namespace_
|
|
= do ns <- sepBy1 (do col <- column
|
|
symbol "."
|
|
col' <- column
|
|
if (col' - col == 1)
|
|
then pure ()
|
|
else fail "No whitepace allowed after namespace separator")
|
|
identPart
|
|
pure (reverse ns) -- innermost first, so reverse
|
|
|
|
export
|
|
unqualifiedName : Rule String
|
|
unqualifiedName = identPart
|
|
|
|
export
|
|
holeName : Rule String
|
|
holeName
|
|
= terminal "Expected hole name"
|
|
(\x => case tok x of
|
|
HoleIdent str => Just str
|
|
_ => Nothing)
|
|
|
|
export
|
|
name : Rule Name
|
|
name
|
|
= do ns <- namespace_
|
|
(do symbol ".("
|
|
op <- operator
|
|
symbol ")"
|
|
pure (NS ns (UN op))) <|>
|
|
(pure (mkFullName ns))
|
|
<|> do symbol "("
|
|
op <- operator
|
|
symbol ")"
|
|
pure (UN op)
|
|
where
|
|
mkFullName : List String -> Name
|
|
mkFullName [] = UN "NONE" -- Can't happen :)
|
|
mkFullName [n] = UN n
|
|
mkFullName (n :: ns) = NS ns (UN n)
|
|
|
|
export
|
|
IndentInfo : Type
|
|
IndentInfo = Int
|
|
|
|
export
|
|
init : IndentInfo
|
|
init = 0
|
|
|
|
continueF : EmptyRule () -> (indent : IndentInfo) -> EmptyRule ()
|
|
continueF err indent
|
|
= do eoi; err
|
|
<|> do keyword "where"; err
|
|
<|> do col <- column
|
|
if (col <= indent)
|
|
then err
|
|
else pure ()
|
|
|
|
-- Fail if this is the end of a block entry or end of file
|
|
export
|
|
continue : (indent : IndentInfo) -> EmptyRule ()
|
|
continue = continueF (fail "Unexpected end of expression")
|
|
|
|
-- As 'continue' but failing is fatal (i.e. entire parse fails)
|
|
export
|
|
mustContinue : (indent : IndentInfo) -> Maybe String -> EmptyRule ()
|
|
mustContinue indent Nothing
|
|
= continueF (fatalError "Unexpected end of expression") indent
|
|
mustContinue indent (Just req)
|
|
= continueF (fatalError ("Expected '" ++ req ++ "'")) indent
|
|
|
|
data ValidIndent
|
|
= AnyIndent -- In {}, entries can begin in any column
|
|
| AtPos Int -- Entry must begin in a specific column
|
|
| AfterPos Int -- Entry can begin in this column or later
|
|
| EndOfBlock -- Block is finished
|
|
|
|
Show ValidIndent where
|
|
show AnyIndent = "[any]"
|
|
show (AtPos i) = "[col " ++ show i ++ "]"
|
|
show (AfterPos i) = "[after " ++ show i ++ "]"
|
|
show EndOfBlock = "[EOB]"
|
|
|
|
checkValid : ValidIndent -> Int -> EmptyRule ()
|
|
checkValid AnyIndent c = pure ()
|
|
checkValid (AtPos x) c = if c == x
|
|
then pure ()
|
|
else fail "Invalid indentation"
|
|
checkValid (AfterPos x) c = if c >= x
|
|
then pure ()
|
|
else fail "Invalid indentation"
|
|
checkValid EndOfBlock c = fail "End of block"
|
|
|
|
-- Any token which indicates the end of a statement/block
|
|
isTerminator : Token -> Bool
|
|
isTerminator (Symbol ",") = True
|
|
isTerminator (Symbol "]") = True
|
|
isTerminator (Symbol ";") = True
|
|
isTerminator (Symbol "}") = True
|
|
isTerminator (Symbol ")") = True
|
|
isTerminator (Symbol "|") = True
|
|
isTerminator (Keyword "in") = True
|
|
isTerminator (Keyword "where") = True
|
|
isTerminator EndInput = True
|
|
isTerminator _ = False
|
|
|
|
-- Check we're at the end of a block entry, given the start column
|
|
-- of the block.
|
|
-- It's the end if we have a terminating token, or the next token starts
|
|
-- in or before indent. Works by looking ahead but not consuming.
|
|
export
|
|
atEnd : (indent : IndentInfo) -> EmptyRule ()
|
|
atEnd indent
|
|
= eoi
|
|
<|> do nextIs "Expected end of block" (isTerminator . tok)
|
|
pure ()
|
|
<|> do col <- column
|
|
if (col <= indent)
|
|
then pure ()
|
|
else fail "Not the end of a block entry"
|
|
|
|
-- Check we're at the end, but only by looking at indentation
|
|
export
|
|
atEndIndent : (indent : IndentInfo) -> EmptyRule ()
|
|
atEndIndent indent
|
|
= eoi
|
|
<|> do col <- column
|
|
if (col <= indent)
|
|
then pure ()
|
|
else fail "Not the end of a block entry"
|
|
|
|
|
|
-- Parse a terminator, return where the next block entry
|
|
-- must start, given where the current block entry started
|
|
terminator : ValidIndent -> Int -> EmptyRule ValidIndent
|
|
terminator valid laststart
|
|
= do eoi
|
|
pure EndOfBlock
|
|
<|> do symbol ";"
|
|
pure (afterSemi valid)
|
|
<|> do col <- column
|
|
afterDedent valid col
|
|
<|> pure EndOfBlock
|
|
where
|
|
-- Expected indentation for the next token can either be anything (if
|
|
-- we're inside a brace delimited block) or anywhere after the initial
|
|
-- column (if we're inside an indentation delimited block)
|
|
afterSemi : ValidIndent -> ValidIndent
|
|
afterSemi AnyIndent = AnyIndent -- in braces, anything goes
|
|
afterSemi (AtPos c) = AfterPos c -- not in braces, after the last start position
|
|
afterSemi (AfterPos c) = AfterPos c
|
|
afterSemi EndOfBlock = EndOfBlock
|
|
|
|
-- Expected indentation for the next token can either be anything (if
|
|
-- we're inside a brace delimited block) or in exactly the initial column
|
|
-- (if we're inside an indentation delimited block)
|
|
afterDedent : ValidIndent -> Int -> EmptyRule ValidIndent
|
|
afterDedent AnyIndent col
|
|
= if col <= laststart
|
|
then pure AnyIndent
|
|
else fail "Not the end of a block entry"
|
|
afterDedent (AfterPos c) col
|
|
= if col <= laststart
|
|
then pure (AtPos c)
|
|
else fail "Not the end of a block entry"
|
|
afterDedent (AtPos c) col
|
|
= if col <= laststart
|
|
then pure (AtPos c)
|
|
else fail "Not the end of a block entry"
|
|
afterDedent EndOfBlock col = pure EndOfBlock
|
|
|
|
-- Parse an entry in a block
|
|
blockEntry : ValidIndent -> (IndentInfo -> Rule ty) ->
|
|
Rule (ty, ValidIndent)
|
|
blockEntry valid rule
|
|
= do col <- column
|
|
checkValid valid col
|
|
p <- rule col
|
|
valid' <- terminator valid col
|
|
pure (p, valid')
|
|
|
|
blockEntries : ValidIndent -> (IndentInfo -> Rule ty) ->
|
|
EmptyRule (List ty)
|
|
blockEntries valid rule
|
|
= do eoi; pure []
|
|
<|> do res <- blockEntry valid rule
|
|
ts <- blockEntries (snd res) rule
|
|
pure (fst res :: ts)
|
|
<|> pure []
|
|
|
|
export
|
|
block : (IndentInfo -> Rule ty) -> EmptyRule (List ty)
|
|
block item
|
|
= do symbol "{"
|
|
commit
|
|
ps <- blockEntries AnyIndent item
|
|
symbol "}"
|
|
pure ps
|
|
<|> do col <- column
|
|
blockEntries (AtPos col) item
|
|
|
|
export
|
|
blockAfter : Int -> (IndentInfo -> Rule ty) -> EmptyRule (List ty)
|
|
blockAfter mincol item
|
|
= do symbol "{"
|
|
commit
|
|
ps <- blockEntries AnyIndent item
|
|
symbol "}"
|
|
pure ps
|
|
<|> do col <- column
|
|
if col <= mincol
|
|
then pure []
|
|
else blockEntries (AtPos col) item
|
|
|
|
export
|
|
blockWithOptHeaderAfter : Int -> (IndentInfo -> Rule hd) -> (IndentInfo -> Rule ty) -> EmptyRule (Maybe hd, List ty)
|
|
blockWithOptHeaderAfter {ty} mincol header item
|
|
= do symbol "{"
|
|
commit
|
|
hidt <- optional $ blockEntry AnyIndent header
|
|
restOfBlock hidt
|
|
<|> do col <- column
|
|
if col <= mincol
|
|
then pure (Nothing, [])
|
|
else do hidt <- optional $ blockEntry (AtPos col) header
|
|
ps <- blockEntries (AtPos col) item
|
|
pure (map fst hidt, ps)
|
|
where
|
|
restOfBlock : Maybe (hd, ValidIndent) -> Rule (Maybe hd, List ty)
|
|
restOfBlock (Just (h, idt)) = do ps <- blockEntries idt item
|
|
symbol "}"
|
|
pure (Just h, ps)
|
|
restOfBlock Nothing = do ps <- blockEntries AnyIndent item
|
|
symbol "}"
|
|
pure (Nothing, ps)
|
|
|
|
export
|
|
nonEmptyBlock : (IndentInfo -> Rule ty) -> Rule (List ty)
|
|
nonEmptyBlock item
|
|
= do symbol "{"
|
|
commit
|
|
res <- blockEntry AnyIndent item
|
|
ps <- blockEntries (snd res) item
|
|
symbol "}"
|
|
pure (fst res :: ps)
|
|
<|> do col <- column
|
|
res <- blockEntry (AtPos col) item
|
|
ps <- blockEntries (snd res) item
|
|
pure (fst res :: ps)
|
|
|
|
|