Idris2/libs/contrib/Data/String/Parser.idr

274 lines
8.4 KiB
Idris
Raw Normal View History

2020-07-04 15:45:29 +03:00
||| A simple parser combinator library for strings. Inspired by attoparsec zepto.
module Data.String.Parser
import Control.Monad.Identity
2020-07-05 11:22:23 +03:00
import Control.Monad.Trans
2020-07-04 15:45:29 +03:00
import Data.Strings
import Data.Fin
import Data.List
2020-07-04 15:45:29 +03:00
2020-08-22 10:13:34 +03:00
%default total
2020-07-04 15:45:29 +03:00
2020-07-05 17:39:34 +03:00
||| The input state, pos is position in the string and maxPos is the length of the input string.
public export
2020-07-04 15:45:29 +03:00
record State where
constructor S
input : String
pos : Int
2020-07-05 01:59:57 +03:00
maxPos : Int
2020-07-04 15:45:29 +03:00
2020-07-05 15:22:49 +03:00
Show State where
2020-08-22 10:13:34 +03:00
show s = "(" ++ show s.input ++ ", " ++ show s.pos ++ ", " ++ show s.maxPos ++ ")"
2020-07-05 15:22:49 +03:00
2020-07-05 17:39:34 +03:00
||| Result of applying a parser
public export
2020-07-04 15:45:29 +03:00
data Result a = Fail Int String | OK a State
public export
record ParseT (m : Type -> Type) (a : Type) where
constructor P
runParser : State -> m (Result a)
public export
Parser : Type -> Type
2020-08-22 10:13:34 +03:00
Parser = ParseT Identity
2020-07-04 15:45:29 +03:00
public export
implementation Monad m => Functor (ParseT m) where
map f p = P $ \state =>
do res <- p.runParser state
case res of
OK r state' => pure (OK (f r) state')
Fail i err => pure (Fail i err)
public export
Monad m => Applicative (ParseT m) where
pure x = P $ \s => pure (OK x s)
2020-08-22 10:13:34 +03:00
f <*> x = P $ \s => case !(f.runParser s) of
OK f' s' => case !(x.runParser s') of
OK r rs => pure (OK (f' r) rs)
2020-07-04 15:45:29 +03:00
Fail i err => pure (Fail i err)
Fail i err => pure (Fail i err)
public export
Monad m => Monad (ParseT m) where
m >>= k = P $ \state =>
do res <- m.runParser state
case res of
OK a state' => (k a).runParser state'
Fail i err => pure (Fail i err)
public export
Monad m => Alternative (ParseT m) where
empty = P $ \s => pure $ Fail (s.pos) "no alternative left"
a <|> b = P $ \s => case !(a.runParser s) of
OK r s' => pure $ OK r s'
Fail _ _ => b.runParser s
2020-07-05 11:22:23 +03:00
public export
MonadTrans ParseT where
2020-08-22 10:13:34 +03:00
lift x = P $ \s => do res <- x
2020-07-05 11:22:23 +03:00
pure $ OK res s
2020-07-04 15:45:29 +03:00
2020-07-05 17:39:34 +03:00
||| Run a parser in a monad
||| Returns a tuple of the result and final position on success.
||| Returns an error message on failure.
export
2020-07-05 17:39:34 +03:00
parseT : Monad m => ParseT m a -> String -> m (Either String (a, Int))
parseT p str = do res <- p.runParser (S str 0 (strLength str))
2020-07-04 15:45:29 +03:00
case res of
2020-07-05 17:39:34 +03:00
OK r s => pure $ Right (r, s.pos)
2020-07-05 11:22:23 +03:00
Fail i err => pure $ Left $ fastAppend ["Parse failed at position ", show i, ": ", err]
2020-07-04 15:45:29 +03:00
2020-07-05 17:39:34 +03:00
||| Run a parser in a pure function
||| Returns a tuple of the result and final position on success.
||| Returns an error message on failure.
export
2020-07-05 17:39:34 +03:00
parse : Parser a -> String -> Either String (a, Int)
2020-07-04 15:45:29 +03:00
parse p str = runIdentity $ parseT p str
2020-07-06 15:13:56 +03:00
||| Combinator that replaces the error message on failure.
||| This allows combinators to output relevant errors
export
(<?>) : Monad m => ParseT m a -> String -> ParseT m a
(<?>) p msg = P $ \s => case !(p.runParser s) of
OK r s' => pure $ OK r s'
Fail i _ => pure $ Fail i msg
infixl 0 <?>
2020-08-22 10:13:34 +03:00
||| Fail with some error message
export
2020-08-22 10:13:34 +03:00
fail : Monad m => String -> ParseT m a
fail x = P $ \s => pure $ Fail s.pos x
2020-07-05 17:39:34 +03:00
2020-08-22 10:13:34 +03:00
||| Returns the result of the parser `p` or `def` if it fails.
export
2020-08-22 10:13:34 +03:00
option : Monad m => a -> ParseT m a -> ParseT m a
option def p = p <|> pure def
2020-07-05 01:59:57 +03:00
2020-08-22 10:13:34 +03:00
||| Returns a Maybe that contains the result of `p` if it succeeds or `Nothing` if it fails.
export
2020-08-22 10:13:34 +03:00
optional : Monad m => ParseT m a -> ParseT m (Maybe a)
optional p = (Just <$> p) <|> pure Nothing
||| Discards the result of a parser
export
skip : Monad m => ParseT m a -> ParseT m ()
skip = ignore
2020-07-05 01:59:57 +03:00
mutual
2020-07-05 17:39:34 +03:00
||| Succeeds if `p` succeeds, will continue to match `p` until it fails
||| and accumulate the results in a list
export
2020-08-22 10:13:34 +03:00
covering
2020-07-05 01:59:57 +03:00
some : Monad m => ParseT m a -> ParseT m (List a)
some p = pure (!p :: !(many p))
2020-07-05 17:39:34 +03:00
||| Always succeeds, will accumulate the results of `p` in a list until it fails.
export
2020-08-22 10:13:34 +03:00
covering
2020-07-05 01:59:57 +03:00
many : Monad m => ParseT m a -> ParseT m (List a)
many p = some p <|> pure []
2020-07-04 15:45:29 +03:00
2020-08-22 10:13:34 +03:00
||| Parse left-nested lists of the form `((init op arg) op arg) op arg`
export
covering
hchainl : Monad m => ParseT m init -> ParseT m (init -> arg -> init) -> ParseT m arg -> ParseT m init
hchainl pini pop parg = pini >>= go
where
covering
go : init -> ParseT m init
go x = (do op <- pop
arg <- parg
go $ op x arg) <|> pure x
||| Parse right-nested lists of the form `arg op (arg op (arg op end))`
export
covering
hchainr : Monad m => ParseT m arg -> ParseT m (arg -> end -> end) -> ParseT m end -> ParseT m end
hchainr parg pop pend = go id <*> pend
where
covering
go : (end -> end) -> ParseT m (end -> end)
go f = (do arg <- parg
op <- pop
go $ f . op arg) <|> pure f
||| Succeeds if the next char satisfies the predicate `f`
export
satisfy : Monad m => (Char -> Bool) -> ParseT m Char
satisfy f = P $ \s => pure $ if s.pos < s.maxPos
then let ch = assert_total $ strIndex s.input s.pos in
if f ch
then OK ch (S s.input (s.pos + 1) s.maxPos)
else Fail (s.pos) "satisfy"
else Fail (s.pos) "satisfy"
2020-07-05 17:39:34 +03:00
||| Always succeeds, applies the predicate `f` on chars until it fails and creates a string
||| from the results.
export
2020-08-22 10:13:34 +03:00
covering
takeWhile : Monad m => (Char -> Bool) -> ParseT m String
2020-08-22 10:13:34 +03:00
takeWhile f = pack <$> many (satisfy f)
2020-07-05 15:22:49 +03:00
2020-08-22 10:13:34 +03:00
||| Succeeds if the string `str` follows.
2020-07-05 15:22:49 +03:00
export
2020-08-22 10:13:34 +03:00
string : Monad m => String -> ParseT m ()
string str = P $ \s => pure $ let len = strLength str in
if s.pos+len <= s.maxPos
then let head = strSubstr s.pos len s.input in
if head == str
then OK () (S s.input (s.pos + len) s.maxPos)
else Fail (s.pos) ("string " ++ show str)
else Fail (s.pos) ("string " ++ show str)
||| Succeeds if the end of the string is reached.
2020-07-05 17:00:29 +03:00
export
2020-08-22 10:13:34 +03:00
eos : Monad m => ParseT m ()
eos = P $ \s => pure $ if s.pos == s.maxPos
then OK () s
else Fail s.pos "expected the end of the string"
2020-08-06 13:59:03 +03:00
2020-08-22 10:13:34 +03:00
||| Succeeds if the next char is `c`
2020-08-06 13:59:03 +03:00
export
2020-08-22 10:13:34 +03:00
char : Monad m => Char -> ParseT m ()
char c = ignore $ satisfy (== c)
2020-08-06 13:59:03 +03:00
||| Parses a space character
export
2020-08-22 10:13:34 +03:00
space : Monad m => ParseT m Char
2020-08-06 13:59:03 +03:00
space = satisfy isSpace
||| Parses one or more space characters
export
2020-08-22 10:13:34 +03:00
covering
spaces : Monad m => ParseT m ()
2020-08-06 13:59:03 +03:00
spaces = skip (many space) <?> "white space"
2020-08-22 10:13:34 +03:00
||| Discards brackets around a matching parser
export
parens : Monad m => ParseT m a -> ParseT m a
parens p = char '(' *> p <* char ')'
2020-08-06 13:59:03 +03:00
||| Discards whitespace after a matching parser
export
2020-08-22 10:13:34 +03:00
covering
lexeme : Monad m => ParseT m a -> ParseT m a
2020-08-06 13:59:03 +03:00
lexeme p = p <* spaces
||| Matches a specific string, then skips following whitespace
export
2020-08-22 10:13:34 +03:00
covering
token : Monad m => String -> ParseT m ()
2020-08-06 13:59:03 +03:00
token s = lexeme (skip (string s)) <?> "token " ++ show s
||| Matches a single digit
export
2020-08-22 10:13:34 +03:00
digit : Monad m => ParseT m (Fin 10)
digit = do x <- satisfy isDigit
case lookup x digits of
2020-08-22 10:13:34 +03:00
Nothing => fail "not a digit"
Just y => pure y
where
digits : List (Char, Fin 10)
digits = [ ('0', 0)
, ('1', 1)
, ('2', 2)
, ('3', 3)
, ('4', 4)
, ('5', 5)
, ('6', 6)
, ('7', 7)
, ('8', 8)
, ('9', 9)
]
fromDigits : Num a => ((Fin 10) -> a) -> List (Fin 10) -> a
fromDigits f xs = foldl (addDigit) 0 xs
where
addDigit : a -> (Fin 10) -> a
addDigit num d = 10*num + (f d)
intFromDigits : List (Fin 10) -> Integer
intFromDigits = fromDigits finToInteger
natFromDigits : List (Fin 10) -> Nat
natFromDigits = fromDigits finToNat
||| Matches a natural number
export
2020-08-22 10:13:34 +03:00
covering
natural : Monad m => ParseT m Nat
natural = natFromDigits <$> some digit
||| Matches an integer, eg. "12", "-4"
export
2020-08-22 10:13:34 +03:00
covering
integer : Monad m => ParseT m Integer
integer = do minus <- optional (char '-')
x <- some digit
2020-08-22 10:13:34 +03:00
pure $ case minus of
Nothing => intFromDigits x
Just _ => (intFromDigits x)*(-1)