mirror of
https://github.com/idris-lang/Idris2.git
synced 2024-12-01 09:49:24 +03:00
0958f1fd8b
This fixes a productivity issue with map in the parser library
281 lines
11 KiB
Idris
281 lines
11 KiB
Idris
module Text.Parser.Core
|
|
|
|
import public Control.Delayed
|
|
import Data.List
|
|
|
|
||| Description of a language's grammar. The `tok` parameter is the type
|
|
||| of tokens, and the `consumes` flag is True if the language is guaranteed
|
|
||| to be non-empty - that is, successfully parsing the language is guaranteed
|
|
||| to consume some input.
|
|
public export
|
|
data Grammar : (tok : Type) -> (consumes : Bool) -> Type -> Type where
|
|
Empty : (val : ty) -> Grammar tok False ty
|
|
Terminal : String -> (tok -> Maybe a) -> Grammar tok True a
|
|
NextIs : String -> (tok -> Bool) -> Grammar tok False tok
|
|
EOF : Grammar tok False ()
|
|
|
|
Fail : Bool -> String -> Grammar tok c ty
|
|
Commit : Grammar tok False ()
|
|
MustWork : Grammar tok c a -> Grammar tok c a
|
|
|
|
SeqEat : {c2 : _} -> Grammar tok True a -> Inf (a -> Grammar tok c2 b) ->
|
|
Grammar tok True b
|
|
SeqEmpty : {c1, c2 : _} -> Grammar tok c1 a -> (a -> Grammar tok c2 b) ->
|
|
Grammar tok (c1 || c2) b
|
|
Alt : {c1, c2 : _} -> Grammar tok c1 ty -> Grammar tok c2 ty ->
|
|
Grammar tok (c1 && c2) ty
|
|
|
|
||| Sequence two grammars. If either consumes some input, the sequence is
|
|
||| guaranteed to consume some input. If the first one consumes input, the
|
|
||| second is allowed to be recursive (because it means some input has been
|
|
||| consumed and therefore the input is smaller)
|
|
public export %inline %tcinline
|
|
(>>=) : {c1, c2 : Bool} ->
|
|
Grammar tok c1 a ->
|
|
inf c1 (a -> Grammar tok c2 b) ->
|
|
Grammar tok (c1 || c2) b
|
|
(>>=) {c1 = False} = SeqEmpty
|
|
(>>=) {c1 = True} = SeqEat
|
|
|
|
||| Sequence two grammars. If either consumes some input, the sequence is
|
|
||| guaranteed to consume input. This is an explicitly non-infinite version
|
|
||| of `>>=`.
|
|
export
|
|
seq : {c1, c2 : _} -> Grammar tok c1 a ->
|
|
(a -> Grammar tok c2 b) ->
|
|
Grammar tok (c1 || c2) b
|
|
seq = SeqEmpty
|
|
|
|
||| Sequence a grammar followed by the grammar it returns.
|
|
export
|
|
join : {c1, c2 : Bool} ->
|
|
Grammar tok c1 (Grammar tok c2 a) ->
|
|
Grammar tok (c1 || c2) a
|
|
join {c1 = False} p = SeqEmpty p id
|
|
join {c1 = True} p = SeqEat p id
|
|
|
|
||| Give two alternative grammars. If both consume, the combination is
|
|
||| guaranteed to consume.
|
|
export
|
|
(<|>) : {c1, c2 : _} ->
|
|
Grammar tok c1 ty ->
|
|
Grammar tok c2 ty ->
|
|
Grammar tok (c1 && c2) ty
|
|
(<|>) = Alt
|
|
|
|
||| Allows the result of a grammar to be mapped to a different value.
|
|
export
|
|
{c : _} -> Functor (Grammar tok c) where
|
|
map f (Empty val) = Empty (f val)
|
|
map f (Fail fatal msg) = Fail fatal msg
|
|
map f (MustWork g) = MustWork (map f g)
|
|
map f (Terminal msg g) = Terminal msg (\t => map f (g t))
|
|
map f (Alt x y) = Alt (map f x) (map f y)
|
|
map f (SeqEat act next)
|
|
= SeqEat act (\val => map f (next val))
|
|
map f (SeqEmpty act next)
|
|
= SeqEmpty act (\val => map f (next val))
|
|
-- The remaining constructors (NextIs, EOF, Commit) have a fixed type,
|
|
-- so a sequence must be used.
|
|
map {c = False} f p = SeqEmpty p (Empty . f)
|
|
|
|
||| Sequence a grammar with value type `a -> b` and a grammar
|
|
||| with value type `a`. If both succeed, apply the function
|
|
||| from the first grammar to the value from the second grammar.
|
|
||| Guaranteed to consume if either grammar consumes.
|
|
export
|
|
(<*>) : {c1, c2 : _} ->
|
|
Grammar tok c1 (a -> b) ->
|
|
Inf (Grammar tok c2 a) ->
|
|
Grammar tok (c1 || c2) b
|
|
(<*>) {c1 = False} x y = SeqEmpty x (\f => map f y)
|
|
(<*>) {c1 = True } x y = SeqEmpty x (\f => map f (Force y))
|
|
|
|
||| Sequence two grammars. If both succeed, use the value of the first one.
|
|
||| Guaranteed to consume if either grammar consumes.
|
|
export
|
|
(<*) : {c1, c2 : _} ->
|
|
Grammar tok c1 a ->
|
|
Inf (Grammar tok c2 b) ->
|
|
Grammar tok (c1 || c2) a
|
|
(<*) x y = map const x <*> y
|
|
|
|
||| Sequence two grammars. If both succeed, use the value of the second one.
|
|
||| Guaranteed to consume if either grammar consumes.
|
|
export
|
|
(*>) : {c1, c2 : _} ->
|
|
Grammar tok c1 a ->
|
|
Inf (Grammar tok c2 b) ->
|
|
Grammar tok (c1 || c2) b
|
|
(*>) x y = map (const id) x <*> y
|
|
|
|
||| Produce a grammar that can parse a different type of token by providing a
|
|
||| function converting the new token type into the original one.
|
|
export
|
|
mapToken : (a -> b) -> Grammar b c ty -> Grammar a c ty
|
|
mapToken f (Empty val) = Empty val
|
|
mapToken f (Terminal msg g) = Terminal msg (g . f)
|
|
mapToken f (NextIs msg g) = SeqEmpty (NextIs msg (g . f)) (Empty . f)
|
|
mapToken f EOF = EOF
|
|
mapToken f (Fail fatal msg) = Fail fatal msg
|
|
mapToken f (MustWork g) = MustWork (mapToken f g)
|
|
mapToken f Commit = Commit
|
|
mapToken f (SeqEat act next) = SeqEat (mapToken f act) (\x => mapToken f (next x))
|
|
mapToken f (SeqEmpty act next) = SeqEmpty (mapToken f act) (\x => mapToken f (next x))
|
|
mapToken f (Alt x y) = Alt (mapToken f x) (mapToken f y)
|
|
|
|
||| Always succeed with the given value.
|
|
export
|
|
pure : (val : ty) -> Grammar tok False ty
|
|
pure = Empty
|
|
|
|
||| Check whether the next token satisfies a predicate
|
|
export
|
|
nextIs : String -> (tok -> Bool) -> Grammar tok False tok
|
|
nextIs = NextIs
|
|
|
|
||| Look at the next token in the input
|
|
export
|
|
peek : Grammar tok False tok
|
|
peek = nextIs "Unrecognised token" (const True)
|
|
|
|
||| Succeeds if running the predicate on the next token returns Just x,
|
|
||| returning x. Otherwise fails.
|
|
export
|
|
terminal : String -> (tok -> Maybe a) -> Grammar tok True a
|
|
terminal = Terminal
|
|
|
|
||| Always fail with a message
|
|
export
|
|
fail : String -> Grammar tok c ty
|
|
fail = Fail False
|
|
|
|
export
|
|
fatalError : String -> Grammar tok c ty
|
|
fatalError = Fail True
|
|
|
|
||| Succeed if the input is empty
|
|
export
|
|
eof : Grammar tok False ()
|
|
eof = EOF
|
|
|
|
||| Commit to an alternative; if the current branch of an alternative
|
|
||| fails to parse, no more branches will be tried
|
|
export
|
|
commit : Grammar tok False ()
|
|
commit = Commit
|
|
|
|
||| If the parser fails, treat it as a fatal error
|
|
export
|
|
mustWork : Grammar tok c ty -> Grammar tok c ty
|
|
mustWork = MustWork
|
|
|
|
data ParseResult : List tok -> (consumes : Bool) -> Type -> Type where
|
|
Failure : {xs : List tok} ->
|
|
(committed : Bool) -> (fatal : Bool) ->
|
|
(err : String) -> (rest : List tok) -> ParseResult xs c ty
|
|
EmptyRes : (committed : Bool) ->
|
|
(val : ty) -> (more : List tok) -> ParseResult more False ty
|
|
NonEmptyRes : {xs : List tok} ->
|
|
(committed : Bool) ->
|
|
(val : ty) -> (more : List tok) ->
|
|
ParseResult (x :: xs ++ more) c ty
|
|
|
|
-- Take the result of an alternative branch, reset the commit flag to
|
|
-- the commit flag from the outer alternative, and weaken the 'consumes'
|
|
-- flag to take both alternatives into account
|
|
weakenRes : {whatever, c : Bool} -> {xs : List tok} ->
|
|
(com' : Bool) ->
|
|
ParseResult xs c ty -> ParseResult xs (whatever && c) ty
|
|
weakenRes com' (Failure com fatal msg ts) = Failure com' fatal msg ts
|
|
weakenRes {whatever=True} com' (EmptyRes com val xs) = EmptyRes com' val xs
|
|
weakenRes {whatever=False} com' (EmptyRes com val xs) = EmptyRes com' val xs
|
|
weakenRes com' (NonEmptyRes {xs} com val more) = NonEmptyRes {xs} com' val more
|
|
|
|
doParse : (commit : Bool) ->
|
|
(act : Grammar tok c ty) ->
|
|
(xs : List tok) ->
|
|
ParseResult xs c ty
|
|
-- doParse com xs act with (sizeAccessible xs)
|
|
doParse com (Empty val) xs = EmptyRes com val xs
|
|
doParse com (Fail fatal str) [] = Failure com fatal str []
|
|
doParse com (Fail fatal str) (x :: xs) = Failure com fatal str (x :: xs)
|
|
doParse com Commit xs = EmptyRes True () xs
|
|
doParse com (MustWork g) xs =
|
|
let p' = doParse com g xs in
|
|
case p' of
|
|
Failure com' _ msg ts => Failure com' True msg ts
|
|
res => res
|
|
doParse com (Terminal err f) [] = Failure com False "End of input" []
|
|
doParse com (Terminal err f) (x :: xs)
|
|
= maybe
|
|
(Failure com False err (x :: xs))
|
|
(\a => NonEmptyRes com {xs=[]} a xs)
|
|
(f x)
|
|
doParse com EOF [] = EmptyRes com () []
|
|
doParse com EOF (x :: xs)
|
|
= Failure com False "Expected end of input" (x :: xs)
|
|
doParse com (NextIs err f) [] = Failure com False "End of input" []
|
|
doParse com (NextIs err f) (x :: xs)
|
|
= if f x
|
|
then EmptyRes com x (x :: xs)
|
|
else Failure com False err (x :: xs)
|
|
doParse com (Alt {c1} {c2} x y) xs
|
|
= let p' = doParse False x xs in
|
|
case p' of
|
|
Failure com' fatal msg ts
|
|
=> if com' || fatal
|
|
-- If the alternative had committed, don't try the
|
|
-- other branch (and reset commit flag)
|
|
then Failure com fatal msg ts
|
|
else weakenRes {whatever = c1} com (doParse False y xs)
|
|
-- Successfully parsed the first option, so use the outer commit flag
|
|
EmptyRes _ val xs => EmptyRes com val xs
|
|
NonEmptyRes {xs=xs'} _ val more => NonEmptyRes {xs=xs'} com val more
|
|
doParse com (SeqEmpty {c1} {c2} act next) xs
|
|
= let p' = assert_total (doParse {c = c1} com act xs) in
|
|
case p' of
|
|
Failure com fatal msg ts => Failure com fatal msg ts
|
|
EmptyRes com val xs =>
|
|
case assert_total (doParse com (next val) xs) of
|
|
Failure com' fatal msg ts => Failure com' fatal msg ts
|
|
EmptyRes com' val xs => EmptyRes com' val xs
|
|
NonEmptyRes {xs=xs'} com' val more =>
|
|
NonEmptyRes {xs=xs'} com' val more
|
|
NonEmptyRes {x} {xs=ys} com val more =>
|
|
case (assert_total (doParse com (next val) more)) of
|
|
Failure com' fatal msg ts => Failure com' fatal msg ts
|
|
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
|
|
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
|
|
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
|
|
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
|
|
doParse com (SeqEat act next) xs with (doParse com act xs)
|
|
doParse com (SeqEat act next) xs | Failure com' fatal msg ts
|
|
= Failure com' fatal msg ts
|
|
doParse com (SeqEat act next) (x :: (ys ++ more)) | (NonEmptyRes {xs=ys} com' val more)
|
|
= let p' = assert_total (doParse com' (next val) more) in
|
|
case p' of
|
|
Failure com' fatal msg ts => Failure com' fatal msg ts
|
|
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
|
|
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
|
|
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
|
|
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
|
|
-- This next line is not strictly necessary, but it stops the coverage
|
|
-- checker taking a really long time and eating lots of memory...
|
|
-- doParse _ _ _ = Failure True True "Help the coverage checker!" []
|
|
|
|
public export
|
|
data ParseError tok = Error String (List tok)
|
|
|
|
||| Parse a list of tokens according to the given grammar. If successful,
|
|
||| returns a pair of the parse result and the unparsed tokens (the remaining
|
|
||| input).
|
|
export
|
|
parse : {c : _} -> (act : Grammar tok c ty) -> (xs : List tok) ->
|
|
Either (ParseError tok) (ty, List tok)
|
|
parse act xs
|
|
= case doParse False act xs of
|
|
Failure _ _ msg ts => Left (Error msg ts)
|
|
EmptyRes _ val rest => pure (val, rest)
|
|
NonEmptyRes _ val rest => pure (val, rest)
|