Update contrib Text.Parser to match Library.Text.Parser (#1808)

Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org>
This commit is contained in:
Alex Humphreys 2021-08-06 09:03:13 +00:00 committed by GitHub
parent ca95781a74
commit f3855d7100
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 1054 additions and 437 deletions

View File

@ -7,7 +7,6 @@ import Data.String
%default total %default total
infixl 5 +> infixl 5 +>
infixr 5 <+ infixr 5 <+
@ -94,20 +93,14 @@ public export
indentLines : (n : Nat) -> String -> String indentLines : (n : Nat) -> String -> String
indentLines n str = unlines $ map (indent n) $ lines str indentLines n str = unlines $ map (indent n) $ lines str
||| Return a string of the given character repeated
||| `n` times.
export
fastReplicate : (n : Nat) -> Char -> String
fastReplicate n c = fastPack $ replicate n c
||| Left-justify a string to the given length, using the ||| Left-justify a string to the given length, using the
||| specified fill character on the right. ||| specified fill character on the right.
export export
justifyLeft : Nat -> Char -> String -> String justifyLeft : Nat -> Char -> String -> String
justifyLeft n c s = s ++ fastReplicate (n `minus` length s) c justifyLeft n c s = s ++ replicate (n `minus` length s) c
||| Right-justify a string to the given length, using the ||| Right-justify a string to the given length, using the
||| specified fill character on the left. ||| specified fill character on the left.
export export
justifyRight : Nat -> Char -> String -> String justifyRight : Nat -> Char -> String -> String
justifyRight n c s = fastReplicate (n `minus` length s) c ++ s justifyRight n c s = replicate (n `minus` length s) c ++ s

View File

@ -6,6 +6,8 @@ import Language.JSON.Parser
import public Language.JSON.Data import public Language.JSON.Data
import public Text.Bounded
%default total %default total
||| Parse a JSON string. ||| Parse a JSON string.

View File

@ -34,8 +34,8 @@ jsonTokenMap = toTokenMap $
] ]
export export
lexJSON : String -> Maybe (List JSONToken) lexJSON : String -> Maybe (List (WithBounds JSONToken))
lexJSON str lexJSON str
= case lex jsonTokenMap str of = case lex jsonTokenMap str of
(tokens, _, _, "") => Just $ map TokenData.tok tokens (tokens, _, _, "") => Just $ tokens
_ => Nothing _ => Nothing

View File

@ -10,20 +10,20 @@ import public Language.JSON.Tokens
%default total %default total
private private
punct : Punctuation -> Grammar JSONToken True () punct : Punctuation -> Grammar state JSONToken True ()
punct p = match $ JTPunct p punct p = match $ JTPunct p
private private
rawString : Grammar JSONToken True String rawString : Grammar state JSONToken True String
rawString = do mstr <- match JTString rawString = do mstr <- match JTString
the (Grammar _ False _) $ the (Grammar _ _ False _) $
case mstr of case mstr of
Just str => pure str Just str => pure str
Nothing => fail "invalid string" Nothing => fail "invalid string"
mutual mutual
private private
json : Grammar JSONToken True JSON json : Grammar state JSONToken True JSON
json = object json = object
<|> array <|> array
<|> string <|> string
@ -32,14 +32,14 @@ mutual
<|> null <|> null
private private
object : Grammar JSONToken True JSON object : Grammar state JSONToken True JSON
object = do punct $ Curly Open object = do punct $ Curly Open
commit commit
props <- properties props <- properties
punct $ Curly Close punct $ Curly Close
pure $ JObject props pure $ JObject props
where where
properties : Grammar JSONToken False (List (String, JSON)) properties : Grammar state JSONToken False (List (String, JSON))
properties = sepBy (punct Comma) $ properties = sepBy (punct Comma) $
do key <- rawString do key <- rawString
punct Colon punct Colon
@ -47,34 +47,34 @@ mutual
pure (key, value) pure (key, value)
private private
array : Grammar JSONToken True JSON array : Grammar state JSONToken True JSON
array = do punct (Square Open) array = do punct (Square Open)
commit commit
vals <- values vals <- values
punct (Square Close) punct (Square Close)
pure (JArray vals) pure (JArray vals)
where where
values : Grammar JSONToken False (List JSON) values : Grammar state JSONToken False (List JSON)
values = sepBy (punct Comma) json values = sepBy (punct Comma) json
private private
string : Grammar JSONToken True JSON string : Grammar state JSONToken True JSON
string = map JString rawString string = map JString rawString
private private
boolean : Grammar JSONToken True JSON boolean : Grammar state JSONToken True JSON
boolean = map JBoolean $ match JTBoolean boolean = map JBoolean $ match JTBoolean
private private
number : Grammar JSONToken True JSON number : Grammar state JSONToken True JSON
number = map JNumber $ match JTNumber number = map JNumber $ match JTNumber
private private
null : Grammar JSONToken True JSON null : Grammar state JSONToken True JSON
null = map (const JNull) $ match JTNull null = map (const JNull) $ match JTNull
export export
parseJSON : List JSONToken -> Maybe JSON parseJSON : List (WithBounds JSONToken) -> Maybe JSON
parseJSON toks = case parse json $ filter (not . ignored) toks of parseJSON toks = case parse json $ filter (not . ignored) toks of
Right (j, []) => Just j Right (j, []) => Just j
_ => Nothing _ => Nothing

View File

@ -12,7 +12,7 @@ quo = is '"'
export export
esc : Lexer -> Lexer esc : Lexer -> Lexer
esc = escape '\\' esc = escape (is '\\')
private private
unicodeEscape : Lexer unicodeEscape : Lexer
@ -36,7 +36,7 @@ jsonStringTokenMap = toTokenMap $
] ]
export export
lexString : String -> Maybe (List JSONStringToken) lexString : String -> Maybe (List (WithBounds JSONStringToken))
lexString x = case lex jsonStringTokenMap x of lexString x = case lex jsonStringTokenMap x of
(toks, _, _, "") => Just $ map TokenData.tok toks (toks, _, _, "") => Just $ toks
_ => Nothing _ => Nothing

View File

@ -7,20 +7,20 @@ import Text.Parser
%default total %default total
private private
stringChar : Grammar JSONStringToken True Char stringChar : Grammar state JSONStringToken True Char
stringChar = match JSTChar stringChar = match JSTChar
<|> match JSTSimpleEscape <|> match JSTSimpleEscape
<|> match JSTUnicodeEscape <|> match JSTUnicodeEscape
private private
quotedString : Grammar JSONStringToken True String quotedString : Grammar state JSONStringToken True String
quotedString = let q = match JSTQuote in quotedString = let q = match JSTQuote in
do chars <- between q q (many stringChar) do chars <- between q q (many stringChar)
eof eof
pure $ pack chars pure $ pack chars
export export
parseString : List JSONStringToken -> Maybe String parseString : List (WithBounds JSONStringToken) -> Maybe String
parseString toks = case parse quotedString toks of parseString toks = case parse quotedString toks of
Right (str, []) => Just str Right (str, []) => Just str
_ => Nothing _ => Nothing

View File

@ -2,6 +2,7 @@ module Language.JSON.Tokens
import Language.JSON.String import Language.JSON.String
import Text.Token import Text.Token
import Text.Bounded
%default total %default total
@ -76,6 +77,6 @@ TokenKind JSONTokenKind where
tokValue JTIgnore _ = () tokValue JTIgnore _ = ()
export export
ignored : JSONToken -> Bool ignored : WithBounds JSONToken -> Bool
ignored (Tok JTIgnore _) = True ignored (MkBounded (Tok JTIgnore _) _ _) = True
ignored _ = False ignored _ = False

View File

@ -153,15 +153,15 @@ pathTokenMap = toTokenMap $
, (some $ non $ oneOf "/\\:?", PTText) , (some $ non $ oneOf "/\\:?", PTText)
] ]
lexPath : String -> List PathToken lexPath : String -> List (WithBounds PathToken)
lexPath str = lexPath str =
let let
(tokens, _, _, _) = lex pathTokenMap str (tokens, _, _, _) = lex pathTokenMap str
in in
map TokenData.tok tokens tokens -- TokenData.tok tokens
-- match both '/' and '\\' regardless of the platform. -- match both '/' and '\\' regardless of the platform.
bodySeparator : Grammar PathToken True () bodySeparator : Grammar state PathToken True ()
bodySeparator = (match $ PTPunct '\\') <|> (match $ PTPunct '/') bodySeparator = (match $ PTPunct '\\') <|> (match $ PTPunct '/')
-- Windows will automatically translate '/' to '\\'. And the verbatim prefix, -- Windows will automatically translate '/' to '\\'. And the verbatim prefix,
@ -169,7 +169,7 @@ bodySeparator = (match $ PTPunct '\\') <|> (match $ PTPunct '/')
-- However, we just parse it and ignore it. -- However, we just parse it and ignore it.
-- --
-- Example: \\?\ -- Example: \\?\
verbatim : Grammar PathToken True () verbatim : Grammar state PathToken True ()
verbatim = verbatim =
do do
ignore $ count (exactly 2) $ match $ PTPunct '\\' ignore $ count (exactly 2) $ match $ PTPunct '\\'
@ -178,7 +178,7 @@ verbatim =
pure () pure ()
-- Example: \\server\share -- Example: \\server\share
unc : Grammar PathToken True Volume unc : Grammar state PathToken True Volume
unc = unc =
do do
ignore $ count (exactly 2) $ match $ PTPunct '\\' ignore $ count (exactly 2) $ match $ PTPunct '\\'
@ -188,7 +188,7 @@ unc =
pure $ UNC server share pure $ UNC server share
-- Example: \\?\server\share -- Example: \\?\server\share
verbatimUnc : Grammar PathToken True Volume verbatimUnc : Grammar state PathToken True Volume
verbatimUnc = verbatimUnc =
do do
verbatim verbatim
@ -198,7 +198,7 @@ verbatimUnc =
pure $ UNC server share pure $ UNC server share
-- Example: C: -- Example: C:
disk : Grammar PathToken True Volume disk : Grammar state PathToken True Volume
disk = disk =
do do
text <- match PTText text <- match PTText
@ -209,21 +209,21 @@ disk =
pure $ Disk (toUpper disk) pure $ Disk (toUpper disk)
-- Example: \\?\C: -- Example: \\?\C:
verbatimDisk : Grammar PathToken True Volume verbatimDisk : Grammar state PathToken True Volume
verbatimDisk = verbatimDisk =
do do
verbatim verbatim
disk <- disk disk <- disk
pure disk pure disk
parseVolume : Grammar PathToken True Volume parseVolume : Grammar state PathToken True Volume
parseVolume = parseVolume =
verbatimUnc verbatimUnc
<|> verbatimDisk <|> verbatimDisk
<|> unc <|> unc
<|> disk <|> disk
parseBody : Grammar PathToken True Body parseBody : Grammar state PathToken True Body
parseBody = parseBody =
do do
text <- match PTText text <- match PTText
@ -232,7 +232,7 @@ parseBody =
"." => CurDir "." => CurDir
normal => Normal normal normal => Normal normal
parsePath : Grammar PathToken False Path parsePath : Grammar state PathToken False Path
parsePath = parsePath =
do do
vol <- optional parseVolume vol <- optional parseVolume

View File

@ -0,0 +1,91 @@
module Text.Bounded
%default total
public export
record Bounds where
constructor MkBounds
startLine : Int
startCol : Int
endLine : Int
endCol : Int
Show Bounds where
showPrec d (MkBounds sl sc el ec) =
showCon d "MkBounds" (concat [showArg sl, showArg sc, showArg el, showArg ec])
export
startBounds : Bounds -> (Int, Int)
startBounds b = (b.startLine, b.startCol)
export
endBounds : Bounds -> (Int, Int)
endBounds b = (b.endLine, b.endCol)
export
Eq Bounds where
(MkBounds sl sc el ec) == (MkBounds sl' sc' el' ec') =
sl == sl'
&& sc == sc'
&& el == el'
&& ec == ec'
public export
record WithBounds ty where
constructor MkBounded
val : ty
isIrrelevant : Bool
bounds : Bounds
export
start : WithBounds ty -> (Int, Int)
start = startBounds . bounds
export
end : WithBounds ty -> (Int, Int)
end = endBounds . bounds
export
Eq ty => Eq (WithBounds ty) where
(MkBounded val ir bd) == (MkBounded val' ir' bd') =
val == val' && ir == ir' && bd == bd'
export
Show ty => Show (WithBounds ty) where
showPrec d (MkBounded val ir bd) =
showCon d "MkBounded" (concat [showArg ir, showArg val, showArg bd])
export
Functor WithBounds where
map f (MkBounded val ir bd) = MkBounded (f val) ir bd
export
Foldable WithBounds where
foldr c n b = c b.val n
export
Traversable WithBounds where
traverse f (MkBounded v a bd) = (\ v => MkBounded v a bd) <$> f v
export
irrelevantBounds : ty -> WithBounds ty
irrelevantBounds x = MkBounded x True (MkBounds (-1) (-1) (-1) (-1))
export
removeIrrelevance : WithBounds ty -> WithBounds ty
removeIrrelevance (MkBounded val ir bd) = MkBounded val True bd
export
mergeBounds : WithBounds ty -> WithBounds ty' -> WithBounds ty'
mergeBounds (MkBounded _ True _) (MkBounded val True _) = irrelevantBounds val
mergeBounds (MkBounded _ True _) b2 = b2
mergeBounds b1 (MkBounded val True _) = const val <$> b1
mergeBounds b1 b2 =
let (ur, uc) = min (start b1) (start b2)
(lr, lc) = max (end b1) (end b2) in
MkBounded b2.val False (MkBounds ur uc lr lc)
export
joinBounds : WithBounds (WithBounds ty) -> WithBounds ty
joinBounds b = mergeBounds b b.val

View File

@ -0,0 +1,69 @@
module Text.Distance.Levenshtein
import Data.List
import Data.Maybe
import Data.String
import Data.IOMatrix
import Data.List.Extra
%default total
||| Self-evidently correct but O(3 ^ (min mn)) complexity
spec : String -> String -> Nat
spec a b = loop (fastUnpack a) (fastUnpack b) where
loop : List Char -> List Char -> Nat
loop [] ys = length ys -- deletions
loop xs [] = length xs -- insertions
loop (x :: xs) (y :: ys)
= if x == y then loop xs ys -- match
else 1 + minimum [ loop (x :: xs) ys -- insert y
, loop xs (y :: ys) -- delete x
, loop xs ys -- substitute y for x
]
||| Dynamic programming
export
compute : HasIO io => String -> String -> io Nat
compute a b = do
let w = strLength a
let h = strLength b
-- In mat[i][j], we store the distance between
-- * the suffix of a of size i
-- * the suffix of b of size j
-- So we need a matrix of size (|a|+1) * (|b|+1)
mat <- new (w+1) (h+1)
-- Whenever one of the two suffixes of interest is empty, the only
-- winning move is to:
-- * delete all of the first
-- * insert all of the second
-- i.e. the cost is the length of the non-zero suffix
for_ [0..w] $ \ i => write mat i 0 i -- deletions
for_ [0..h] $ \ j => write mat 0 j j -- insertions
-- We introduce a specialised `read` for ease of use
let get = \i, j => case !(read {io} mat i j) of
Nothing => assert_total $
idris_crash "INTERNAL ERROR: Badly initialised matrix"
Just n => pure n
-- We fill the matrix from the bottom up, using the same formula we used
-- in the specification's `loop`.
for_ [1..h] $ \ j => do
for_ [1..w] $ \ i => do
-- here we change Levenshtein slightly so that we may only substitute
-- alpha / numerical characters for similar ones. This avoids suggesting
-- "#" as a replacement for an out of scope "n".
let cost = let c = assert_total $ strIndex a (i-1)
d = assert_total $ strIndex b (j-1)
in if c == d then 0 else
if isAlpha c && isAlpha d then 1 else
if isDigit c && isDigit d then 1 else 2
write mat i j $
minimum [ 1 + !(get i (j-1)) -- insert y
, 1 + !(get (i-1) j) -- delete x
, cost + !(get (i-1) (j-1)) -- equal or substitute y for x
]
-- Once the matrix is fully filled, we can simply read the top right corner
integerToNat . cast <$> get w h

View File

@ -8,6 +8,8 @@ import public Text.Lexer.Core
import public Text.Quantity import public Text.Quantity
import public Text.Token import public Text.Token
%default total
export export
toTokenMap : List (Lexer, k) -> TokenMap (Token k) toTokenMap : List (Lexer, k) -> TokenMap (Token k)
toTokenMap = map $ \(l, kind) => (l, Tok kind) toTokenMap = map $ \(l, kind) => (l, Tok kind)
@ -37,7 +39,7 @@ non l = reject l <+> any
export export
choiceMap : {c : Bool} -> choiceMap : {c : Bool} ->
Foldable t => (a -> Recognise c) -> t a -> Recognise c Foldable t => (a -> Recognise c) -> t a -> Recognise c
choiceMap f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in choiceMap {c} f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
f x <|> acc) f x <|> acc)
fail xs fail xs
@ -46,13 +48,13 @@ choiceMap f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
export export
choice : {c : _} -> choice : {c : _} ->
Foldable t => t (Recognise c) -> Recognise c Foldable t => t (Recognise c) -> Recognise c
choice = Lexer.choiceMap id choice = choiceMap id
||| Sequence a list of recognisers. Guaranteed to consume input if the list is ||| Sequence a list of recognisers. Guaranteed to consume input if the list is
||| non-empty and the recognisers consume. ||| non-empty and the recognisers consume.
export export
concat : {c : _} -> concat : {c : _} ->
(xs : List (Recognise c)) -> Recognise (c && isCons xs) (xs : List (Recognise c)) -> Recognise (isCons xs && c)
concat = Lexer.Core.concatMap id concat = Lexer.Core.concatMap id
||| Recognise a specific character. ||| Recognise a specific character.
@ -124,6 +126,13 @@ mutual
many : Lexer -> Recognise False many : Lexer -> Recognise False
many l = opt (some l) many l = opt (some l)
||| Repeat the sub-lexer `l` one or more times until the lexer
||| `stopBefore` is encountered. `stopBefore` will not be consumed.
||| /((?!`stopBefore`)`l`)\+/
export
someUntil : (stopBefore : Recognise c) -> (l : Lexer) -> Lexer
someUntil stopBefore l = some (reject stopBefore <+> l)
||| Repeat the sub-lexer `l` zero or more times until the lexer ||| Repeat the sub-lexer `l` zero or more times until the lexer
||| `stopBefore` is encountered. `stopBefore` will not be consumed. ||| `stopBefore` is encountered. `stopBefore` will not be consumed.
||| Not guaranteed to consume input. ||| Not guaranteed to consume input.
@ -140,14 +149,6 @@ export
manyThen : (stopAfter : Recognise c) -> (l : Lexer) -> Recognise c manyThen : (stopAfter : Recognise c) -> (l : Lexer) -> Recognise c
manyThen stopAfter l = manyUntil stopAfter l <+> stopAfter manyThen stopAfter l = manyUntil stopAfter l <+> stopAfter
||| Recognise many instances of `l` until an instance of `end` is
||| encountered.
|||
||| Useful for defining comments.
export
manyTill : (l : Lexer) -> (end : Lexer) -> Recognise False
manyTill l end = end <|> opt (l <+> manyTill l end)
||| Recognise a sub-lexer repeated as specified by `q`. Fails if `q` has ||| Recognise a sub-lexer repeated as specified by `q`. Fails if `q` has
||| `min` and `max` in the wrong order. Consumes input unless `min q` is zero. ||| `min` and `max` in the wrong order. Consumes input unless `min q` is zero.
||| /`l`{`q`}/ ||| /`l`{`q`}/
@ -172,6 +173,18 @@ export
digits : Lexer digits : Lexer
digits = some digit digits = some digit
||| Recognise a single binary digit
||| /[0-1]/
export
binDigit : Lexer
binDigit = pred (\c => c == '0' || c == '1')
||| Recognise one or more binary digits
||| /[0-1]+/
export
binDigits : Lexer
binDigits = some binDigit
||| Recognise a single hexidecimal digit ||| Recognise a single hexidecimal digit
||| /[0-9A-Fa-f]/ ||| /[0-9A-Fa-f]/
export export
@ -188,13 +201,13 @@ hexDigits = some hexDigit
||| /[0-8]/ ||| /[0-8]/
export export
octDigit : Lexer octDigit : Lexer
octDigit = pred isHexDigit octDigit = pred isOctDigit
||| Recognise one or more octal digits ||| Recognise one or more octal digits
||| /[0-8]+/ ||| /[0-8]+/
export export
octDigits : Lexer octDigits : Lexer
octDigits = some hexDigit octDigits = some octDigit
||| Recognise a single alpha character ||| Recognise a single alpha character
||| /[A-Za-z]/ ||| /[A-Za-z]/
@ -307,18 +320,19 @@ export
quote : (q : Lexer) -> (l : Lexer) -> Lexer quote : (q : Lexer) -> (l : Lexer) -> Lexer
quote q l = surround q q l quote q l = surround q q l
||| Recognise an escape character (often '\\') followed by a sub-lexer ||| Recognise an escape sub-lexer (often '\\') followed by
||| another sub-lexer
||| /[`esc`]`l`/ ||| /[`esc`]`l`/
export export
escape : (esc : Char) -> Lexer -> Lexer escape : (esc : Lexer) -> Lexer -> Lexer
escape esc l = is esc <+> l escape esc l = esc <+> l
||| Recognise a string literal, including escaped characters. ||| Recognise a string literal, including escaped characters.
||| (Note: doesn't yet handle escape sequences such as \123) ||| (Note: doesn't yet handle escape sequences such as \123)
||| /"(\\\\.|.)\*?"/ ||| /"(\\\\.|.)\*?"/
export export
stringLit : Lexer stringLit : Lexer
stringLit = quote (is '"') (escape '\\' any <|> any) stringLit = quote (is '"') (escape (is '\\') any <|> any)
||| Recognise a character literal, including escaped characters. ||| Recognise a character literal, including escaped characters.
||| (Note: doesn't yet handle escape sequences such as \123) ||| (Note: doesn't yet handle escape sequences such as \123)
@ -326,7 +340,7 @@ stringLit = quote (is '"') (escape '\\' any <|> any)
export export
charLit : Lexer charLit : Lexer
charLit = let q = '\'' in charLit = let q = '\'' in
is q <+> (escape '\\' (control <|> any) <|> isNot q) <+> is q is q <+> (escape (is '\\') (control <|> any) <|> isNot q) <+> is q
where where
lexStr : List String -> Lexer lexStr : List String -> Lexer
lexStr [] = fail lexStr [] = fail
@ -348,12 +362,40 @@ export
intLit : Lexer intLit : Lexer
intLit = opt (is '-') <+> digits intLit = opt (is '-') <+> digits
||| Recognise a binary literal, prefixed by "0b"
||| /0b[0-1]+/
export
binLit : Lexer
binLit = exact "0b" <+> binDigits
||| Recognise a hexidecimal literal, prefixed by "0x" or "0X" ||| Recognise a hexidecimal literal, prefixed by "0x" or "0X"
||| /0[Xx][0-9A-Fa-f]+/ ||| /0[Xx][0-9A-Fa-f]+/
export export
hexLit : Lexer hexLit : Lexer
hexLit = approx "0x" <+> hexDigits hexLit = approx "0x" <+> hexDigits
||| Recognise an octal literal, prefixed by "0o"
||| /0o[0-9A-Fa-f]+/
export
octLit : Lexer
octLit = exact "0o" <+> octDigits
export
digitsUnderscoredLit : Lexer
digitsUnderscoredLit = digits <+> many (is '_' <+> digits)
export
binUnderscoredLit : Lexer
binUnderscoredLit = binLit <+> many (is '_' <+> binDigits)
export
hexUnderscoredLit : Lexer
hexUnderscoredLit = hexLit <+> many (is '_' <+> hexDigits)
export
octUnderscoredLit : Lexer
octUnderscoredLit = octLit <+> many (is '_' <+> octDigits)
||| Recognise `start`, then recognise all input until a newline is encountered, ||| Recognise `start`, then recognise all input until a newline is encountered,
||| and consume the newline. Will succeed if end-of-input is encountered before ||| and consume the newline. Will succeed if end-of-input is encountered before
||| a newline. ||| a newline.

View File

@ -1,15 +1,19 @@
module Text.Lexer.Core module Text.Lexer.Core
import public Control.Delayed
import Data.Bool
import Data.List import Data.List
import Data.Maybe
import Data.Nat import Data.Nat
import Data.String import Data.String
import public Control.Delayed
import public Text.Bounded
%default total
||| A language of token recognisers. ||| A language of token recognisers.
||| @ consumes If `True`, this recogniser is guaranteed to consume at ||| @ consumes If `True`, this recogniser is guaranteed to consume at
||| least one character of input when it succeeds. ||| least one character of input when it succeeds.
public export export
data Recognise : (consumes : Bool) -> Type where data Recognise : (consumes : Bool) -> Type where
Empty : Recognise False Empty : Recognise False
Fail : Recognise c Fail : Recognise c
@ -17,6 +21,7 @@ data Recognise : (consumes : Bool) -> Type where
Pred : (Char -> Bool) -> Recognise True Pred : (Char -> Bool) -> Recognise True
SeqEat : Recognise True -> Inf (Recognise e) -> Recognise True SeqEat : Recognise True -> Inf (Recognise e) -> Recognise True
SeqEmpty : Recognise e1 -> Recognise e2 -> Recognise (e1 || e2) SeqEmpty : Recognise e1 -> Recognise e2 -> Recognise (e1 || e2)
SeqSame : Recognise e -> Recognise e -> Recognise e
Alt : Recognise e1 -> Recognise e2 -> Recognise (e1 && e2) Alt : Recognise e1 -> Recognise e2 -> Recognise (e1 && e2)
||| A token recogniser. Guaranteed to consume at least one character. ||| A token recogniser. Guaranteed to consume at least one character.
@ -24,8 +29,6 @@ public export
Lexer : Type Lexer : Type
Lexer = Recognise True Lexer = Recognise True
-- %allow_overloads (<+>)
||| Sequence two recognisers. If either consumes a character, the sequence ||| Sequence two recognisers. If either consumes a character, the sequence
||| is guaranteed to consume a character. ||| is guaranteed to consume a character.
export %inline export %inline
@ -34,11 +37,8 @@ export %inline
(<+>) {c1 = False} = SeqEmpty (<+>) {c1 = False} = SeqEmpty
(<+>) {c1 = True} = SeqEat (<+>) {c1 = True} = SeqEat
%allow_overloads (<|>)
||| Alternative recognisers. If both consume, the combination is guaranteed ||| Alternative recognisers. If both consume, the combination is guaranteed
||| to consumer a character. ||| to consume a character.
%inline
export export
(<|>) : Recognise c1 -> Recognise c2 -> Recognise (c1 && c2) (<|>) : Recognise c1 -> Recognise c2 -> Recognise (c1 && c2)
(<|>) = Alt (<|>) = Alt
@ -49,13 +49,11 @@ fail : Recognise c
fail = Fail fail = Fail
||| Recognise no input (doesn't consume any input) ||| Recognise no input (doesn't consume any input)
%inline
export export
empty : Recognise False empty : Recognise False
empty = Empty empty = Empty
||| Recognise a character that matches a predicate ||| Recognise a character that matches a predicate
%inline
export export
pred : (Char -> Bool) -> Lexer pred : (Char -> Bool) -> Lexer
pred = Pred pred = Pred
@ -70,26 +68,18 @@ export
reject : Recognise c -> Recognise False reject : Recognise c -> Recognise False
reject = Lookahead False reject = Lookahead False
%allow_overloads concatMap
||| Sequence the recognisers resulting from applying a function to each element ||| Sequence the recognisers resulting from applying a function to each element
||| of a list. The resulting recogniser will consume input if the produced ||| of a list. The resulting recogniser will consume input if the produced
||| recognisers consume and the list is non-empty. ||| recognisers consume and the list is non-empty.
export export
concatMap : (a -> Recognise c) -> (xs : List a) -> concatMap : (a -> Recognise c) -> (xs : List a) -> Recognise (isCons xs && c)
Recognise (c && (isCons xs)) concatMap _ [] = Empty
concatMap _ [] = rewrite andFalseFalse c in Empty concatMap f (x :: []) = f x
concatMap f (x :: xs) concatMap f (x :: xs@(_ :: _)) = SeqSame (f x) (concatMap f xs)
= rewrite andTrueNeutral c in
rewrite sym (orSameAndRightNeutral c (isCons xs)) in
SeqEmpty (f x) (Core.concatMap f xs)
data StrLen : Type where data StrLen : Type where
MkStrLen : String -> Nat -> StrLen MkStrLen : String -> Nat -> StrLen
Show StrLen where
show (MkStrLen str n) = str ++ "(" ++ show n ++ ")"
getString : StrLen -> String getString : StrLen -> String
getString (MkStrLen str n) = str getString (MkStrLen str n) = str
@ -105,19 +95,9 @@ strTail : Nat -> StrLen -> StrLen
strTail start (MkStrLen str len) strTail start (MkStrLen str len)
= MkStrLen (substr start len str) (minus len start) = MkStrLen (substr start len str) (minus len start)
isJust : Maybe a -> Bool
isJust Nothing = False
isJust (Just x) = True
export
unpack' : String -> List Char
unpack' str
= case strUncons str of
Nothing => []
Just (x, xs) => x :: unpack' xs
-- If the string is recognised, returns the index at which the token -- If the string is recognised, returns the index at which the token
-- ends -- ends
export
scan : Recognise c -> List Char -> List Char -> Maybe (List Char, List Char) scan : Recognise c -> List Char -> List Char -> Maybe (List Char, List Char)
scan Empty tok str = pure (tok, str) scan Empty tok str = pure (tok, str)
scan Fail tok str = Nothing scan Fail tok str = Nothing
@ -137,6 +117,9 @@ scan (SeqEat r1 r2) tok str
scan (SeqEmpty r1 r2) tok str scan (SeqEmpty r1 r2) tok str
= do (tok', rest) <- scan r1 tok str = do (tok', rest) <- scan r1 tok str
scan r2 tok' rest scan r2 tok' rest
scan (SeqSame r1 r2) tok str
= do (tok', rest) <- scan r1 tok str
scan r2 tok' rest
scan (Alt r1 r2) tok str scan (Alt r1 r2) tok str
= maybe (scan r2 tok str) Just (scan r1 tok str) = maybe (scan r2 tok str) Just (scan r1 tok str)
@ -148,27 +131,15 @@ public export
TokenMap : (tokenType : Type) -> Type TokenMap : (tokenType : Type) -> Type
TokenMap tokenType = List (Lexer, String -> tokenType) TokenMap tokenType = List (Lexer, String -> tokenType)
||| A token, and the line and column where it was in the input tokenise : (a -> Bool) ->
public export
record TokenData a where
constructor MkToken
line : Int
col : Int
tok : a
export
Show a => Show (TokenData a) where
show t = show (line t) ++ ":" ++ show (col t) ++ ":" ++ show (tok t)
tokenise : (TokenData a -> Bool) ->
(line : Int) -> (col : Int) -> (line : Int) -> (col : Int) ->
List (TokenData a) -> TokenMap a -> List (WithBounds a) -> TokenMap a ->
List Char -> (List (TokenData a), (Int, Int, List Char)) List Char -> (List (WithBounds a), (Int, Int, List Char))
tokenise pred line col acc tmap str tokenise pred line col acc tmap str
= case getFirstToken tmap str of = case getFirstToken tmap str of
Just (tok, line', col', rest) => Just (tok, line', col', rest) =>
-- assert total because getFirstToken must consume something -- assert total because getFirstToken must consume something
if pred tok if pred tok.val
then (reverse acc, (line, col, [])) then (reverse acc, (line, col, []))
else assert_total (tokenise pred line' col' (tok :: acc) tmap rest) else assert_total (tokenise pred line' col' (tok :: acc) tmap rest)
Nothing => (reverse acc, (line, col, str)) Nothing => (reverse acc, (line, col, str))
@ -183,13 +154,15 @@ tokenise pred line col acc tmap str
(incol, _) => cast (length incol) (incol, _) => cast (length incol)
getFirstToken : TokenMap a -> List Char -> getFirstToken : TokenMap a -> List Char ->
Maybe (TokenData a, Int, Int, List Char) Maybe (WithBounds a, Int, Int, List Char)
getFirstToken [] str = Nothing getFirstToken [] str = Nothing
getFirstToken ((lex, fn) :: ts) str getFirstToken ((lex, fn) :: ts) str
= case scan lex [] str of = case scan lex [] str of
Just (tok, rest) => Just (MkToken line col (fn (pack (reverse tok))), Just (tok, rest) =>
line + cast (countNLs tok), let line' = line + cast (countNLs tok)
getCols tok col, rest) col' = getCols tok col in
Just (MkBounded (fn (fastPack (reverse tok))) False (MkBounds line col line' col'),
line', col', rest)
Nothing => getFirstToken ts str Nothing => getFirstToken ts str
||| Given a mapping from lexers to token generating functions (the ||| Given a mapping from lexers to token generating functions (the
@ -197,14 +170,14 @@ tokenise pred line col acc tmap str
||| and the line, column, and remainder of the input at the first point in the ||| and the line, column, and remainder of the input at the first point in the
||| string where there are no recognised tokens. ||| string where there are no recognised tokens.
export export
lex : TokenMap a -> String -> (List (TokenData a), (Int, Int, String)) lex : TokenMap a -> String -> (List (WithBounds a), (Int, Int, String))
lex tmap str lex tmap str
= let (ts, (l, c, str')) = tokenise (const False) 0 0 [] tmap (fastUnpack str) in = let (ts, (l, c, str')) = tokenise (const False) 0 0 [] tmap (unpack str) in
(ts, (l, c, pack str')) (ts, (l, c, fastPack str'))
export export
lexTo : (TokenData a -> Bool) -> lexTo : (a -> Bool) ->
TokenMap a -> String -> (List (TokenData a), (Int, Int, String)) TokenMap a -> String -> (List (WithBounds a), (Int, Int, String))
lexTo pred tmap str lexTo pred tmap str
= let (ts, (l, c, str')) = tokenise pred 0 0 [] tmap (fastUnpack str) in = let (ts, (l, c, str')) = tokenise pred 0 0 [] tmap (unpack str) in
(ts, (l, c, pack str')) (ts, (l, c, fastPack str'))

View File

@ -0,0 +1,153 @@
module Text.Lexer.Tokenizer
import Data.List
import Data.Either
import Data.Nat
import Data.String
import Data.String.Extra
import Text.Lexer.Core
import Text.Lexer
import Text.PrettyPrint.Prettyprinter
import Text.PrettyPrint.Prettyprinter.Util
import public Control.Delayed
import public Text.Bounded
%default total
||| Description of a language's tokenization rule.
export
data Tokenizer : (tokenType : Type) -> Type where
Match : Lexer -> (String -> tokenType) -> Tokenizer tokenType
Compose : (begin : Lexer) ->
(mapBegin : String -> tokenType) ->
(tagger : String -> tag) ->
(middle : Inf (tag -> Tokenizer tokenType)) ->
(end : tag -> Lexer) ->
(mapEnd : String -> tokenType) ->
Tokenizer tokenType
Alt : Tokenizer tokenType -> Lazy (Tokenizer tokenType) -> Tokenizer tokenType
||| Alternative tokenizer rules.
export
(<|>) : Tokenizer t -> Lazy (Tokenizer t) -> Tokenizer t
(<|>) = Alt
||| Match on a recogniser and cast the string to a token.
export
match : Lexer -> (String -> a) -> Tokenizer a
match = Match
||| Compose other tokenizer. Language composition should be quoted between
||| a begin lexer and a end lexer. The begin token can be used to generate
||| the composition tokenizer and the end lexer.
export
compose : (begin : Lexer) ->
(mapBegin : String -> a) ->
(tagger : String -> tag) ->
(middle : Inf (tag -> Tokenizer a)) ->
(end : tag -> Lexer) ->
(mapEnd : String -> a) ->
Tokenizer a
compose = Compose
||| Stop reason why tokenizer can't make more progress.
||| @ ComposeNotClosing carries the span of composition begin token in the
||| form of `(startLine, startCol), (endLine, endCol)`.
public export
data StopReason = EndInput | NoRuleApply | ComposeNotClosing (Int, Int) (Int, Int)
export
Show StopReason where
show EndInput = "EndInput"
show NoRuleApply = "NoRuleApply"
show (ComposeNotClosing start end) = "ComposeNotClosing " ++ show start ++ " " ++ show end
export
Pretty StopReason where
pretty EndInput = pretty "EndInput"
pretty NoRuleApply = pretty "NoRuleApply"
pretty (ComposeNotClosing start end) = "ComposeNotClosing" <++> pretty start <++> pretty end
tokenise : Lexer ->
Tokenizer a ->
(line, col : Int) -> List (WithBounds a) ->
List Char ->
(List (WithBounds a), (StopReason, Int, Int, List Char))
tokenise reject tokenizer line col acc [] = (reverse acc, EndInput, (line, col, []))
tokenise reject tokenizer line col acc str
= case scan reject [] str of
Just _ => (reverse acc, (EndInput, line, col, str))
Nothing => case getFirstMatch tokenizer str of
Right (toks, line', col', rest) =>
-- assert total because getFirstMatch must consume something
assert_total (tokenise reject tokenizer line' col' (toks ++ acc) rest)
Left reason => (reverse acc, reason, (line, col, str))
where
countNLs : List Char -> Nat
countNLs str = List.length (filter (== '\n') str)
getCols : List Char -> Int -> Int
getCols x c
= case span (/= '\n') x of
(incol, []) => c + cast (length incol)
(incol, _) => cast (length incol)
-- get the next lexeme using the `Lexer` in argument, its position and the input
-- Returns the new position, the lexeme parsed and the rest of the input
-- If parsing the lexer fails, this returns `Nothing`
getNext : (lexer : Lexer) -> (line, col : Int) -> (input : List Char) -> Maybe (String, Int, Int, List Char)
getNext lexer line col str =
let Just (token, rest) = scan lexer [] str
| _ => Nothing
line' = line + cast (countNLs token)
col' = getCols token col
tokenStr = fastPack $ reverse token
in pure (tokenStr, line', col', rest)
getFirstMatch : Tokenizer a -> List Char ->
Either StopReason (List (WithBounds a), Int, Int, List Char)
getFirstMatch (Match lex fn) str
= let Just (tok, line', col', rest) = getNext lex line col str
| _ => Left NoRuleApply
tok' = MkBounded (fn tok) False (MkBounds line col line' col')
in Right ([tok'], line', col', rest)
getFirstMatch (Compose begin mapBegin tagger middleFn endFn mapEnd) str
= let Just (beginTok', line', col' , rest) = getNext begin line col str
| Nothing => Left NoRuleApply
tag = tagger beginTok'
middle = middleFn tag
end = endFn tag
beginTok'' = MkBounded (mapBegin beginTok') False (MkBounds line col line' col')
(midToks, (reason, line'', col'', rest'')) =
tokenise end middle line' col' [] rest
in case reason of
(ComposeNotClosing _ _) => Left reason
_ => let Just (endTok', lineEnd, colEnd, restEnd) =
getNext end line'' col'' rest''
| _ => Left $ ComposeNotClosing (line, col) (line', col')
endTok'' = MkBounded (mapEnd endTok') False (MkBounds line'' col'' lineEnd colEnd)
in Right ([endTok''] ++ reverse midToks ++ [beginTok''], lineEnd, colEnd, restEnd)
getFirstMatch (Alt t1 t2) str
= case getFirstMatch t1 str of
Right result => Right result
Left reason@(ComposeNotClosing _ _) => Left reason
Left _ => getFirstMatch t2 str
export
lexTo : Lexer ->
Tokenizer a ->
String ->
(List (WithBounds a), (StopReason, Int, Int, String))
lexTo reject tokenizer str
= let (ts, reason, (l, c, str')) =
tokenise reject tokenizer 0 0 [] (fastUnpack str) in
(ts, reason, (l, c, fastPack str'))
||| Given a tokenizer and an input string, return a list of recognised tokens,
||| and the line, column, and remainder of the input at the first point in the string
||| where there are no recognised tokens.
export
lex : Tokenizer a -> String -> (List (WithBounds a), (StopReason, Int, Int, String))
lex tokenizer str = lexTo (pred $ const False) tokenizer str

View File

@ -0,0 +1,234 @@
||| A simple module to process 'literate' documents.
|||
||| The module uses a lexer to split the document into code blocks,
||| delineated by user-defined markers, and code lines that are
||| indicated be a line marker. The lexer returns a document stripped
||| of non-code elements but preserving the original document's line
||| count. Column numbering of code lines are not preserved.
|||
||| The underlying tokeniser is greedy.
|||
||| Once it identifies a line marker it reads a prettifying space then
||| consumes until the end of line. Once identifies a starting code
||| block marker, the lexer will consume input until the next
||| identifiable end block is encountered. Any other content is
||| treated as part of the original document.
|||
||| Thus, the input literate files *must* be well-formed w.r.t
||| to code line markers and code blocks.
|||
||| A further restriction is that literate documents cannot contain
||| the markers within the document's main text: This will confuse the
||| lexer.
|||
module Text.Literate
import Text.Lexer
import Data.List
import Data.List1
import Data.List.Views
import Data.String
import Data.String.Extra
%default total
untilEOL : Recognise False
untilEOL = manyUntil newline any
line : String -> Lexer
line s = exact s <+> (newline <|> space <+> untilEOL)
block : String -> String -> Lexer
block s e = surround (exact s <+> untilEOL) (exact e <+> untilEOL) any
notCodeLine : Lexer
notCodeLine = newline
<|> any <+> untilEOL
data Token = CodeBlock String String String
| Any String
| CodeLine String String
Show Token where
showPrec d (CodeBlock l r x) = showCon d "CodeBlock" $ showArg l ++ showArg r ++ showArg x
showPrec d (Any x) = showCon d "Any" $ showArg x
showPrec d (CodeLine m x) = showCon d "CodeLine" $ showArg m ++ showArg x
rawTokens : (delims : List (String, String))
-> (markers : List String)
-> TokenMap (Token)
rawTokens delims ls =
map (\(l,r) => (block l r, CodeBlock (trim l) (trim r))) delims
++ map (\m => (line m, CodeLine (trim m))) ls
++ [(notCodeLine, Any)]
namespace Compat
-- `reduce` below was depending on the old behaviour of `lines` before #1585
-- was merged. That old `lines` function is added here to preserve behaviour
-- of `reduce`.
lines' : List Char -> List1 (List Char)
lines' [] = singleton []
lines' s = case break isNL s of
(l, s') => l ::: case s' of
[] => []
_ :: s'' => forget $ lines' (assert_smaller s s'')
export
lines : String -> List1 String
lines s = map pack (lines' (unpack s))
||| Merge the tokens into a single source file.
reduce : List (WithBounds Token) -> List String -> String
reduce [] acc = fastAppend (reverse acc)
reduce (MkBounded (Any x) _ _ :: rest) acc =
-- newline will always be tokenized as a single token
if x == "\n"
then reduce rest ("\n"::acc)
else reduce rest acc
reduce (MkBounded (CodeLine m src) _ _ :: rest) acc =
if m == trim src
then reduce rest ("\n"::acc)
else reduce rest ((substr (length m + 1) -- remove space to right of marker.
(length src)
src
)::acc)
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc with (Compat.lines src) -- Strip the deliminators surrounding the block.
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc | (s ::: ys) with (snocList ys)
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc | (s ::: []) | Empty = reduce rest acc -- 2
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc | (s ::: (srcs ++ [f])) | (Snoc f srcs rec) =
-- the "\n" counts for the open deliminator; the closing deliminator should always be followed by a (Any "\n"), so we don't add a newline
reduce rest (((unlines srcs) ++ "\n") :: "\n" :: acc)
-- [ NOTE ] 1 & 2 shouldn't happen as code blocks are well formed i.e. have two deliminators.
public export
record LiterateError where
constructor MkLitErr
line : Int
column : Int
input : String
||| Description of literate styles.
|||
||| A 'literate' style comprises of
|||
||| + a list of code block deliminators (`deliminators`);
||| + a list of code line markers (`line_markers`); and
||| + a list of known file extensions `file_extensions`.
|||
||| Some example specifications:
|||
||| + Bird Style
|||
|||```
|||MkLitStyle Nil [">"] [".lidr"]
|||```
|||
||| + Literate Haskell (for LaTeX)
|||
|||```
|||MkLitStyle [("\\begin{code}", "\\end{code}"),("\\begin{spec}","\\end{spec}")]
||| Nil
||| [".lhs", ".tex"]
|||```
|||
||| + OrgMode
|||
|||```
|||MkLitStyle [("#+BEGIN_SRC idris","#+END_SRC"), ("#+COMMENT idris","#+END_COMMENT")]
||| ["#+IDRIS:"]
||| [".org"]
|||```
|||
||| + Common Mark
|||
|||```
|||MkLitStyle [("```idris","```"), ("<!-- idris","--!>")]
||| Nil
||| [".md", ".markdown"]
|||```
|||
public export
record LiterateStyle where
constructor MkLitStyle
||| The pairs of start and end tags for code blocks.
deliminators : List (String, String)
||| Line markers that indicate a line contains code.
line_markers : List String
||| Recognised file extensions. Not used by the module, but will be
||| of use when connecting to code that reads in the original source
||| files.
file_extensions : List String
||| Given a 'literate specification' extract the code from the
||| literate source file (`litStr`) that follows the presented style.
|||
||| @specification The literate specification to use.
||| @litStr The literate source file.
|||
||| Returns a `LiterateError` if the literate file contains malformed
||| code blocks or code lines.
|||
export
extractCode : (specification : LiterateStyle)
-> (litStr : String)
-> Either LiterateError String
extractCode (MkLitStyle delims markers exts) str =
case lex (rawTokens delims markers) str of
(toks, (_,_,"")) => Right (reduce toks Nil)
(_, (l,c,i)) => Left (MkLitErr l c i)
||| Synonym for `extractCode`.
export
unlit : (specification : LiterateStyle)
-> (litStr : String)
-> Either LiterateError String
unlit = extractCode
||| Is the provided line marked up using a line marker?
|||
||| If the line is suffixed by any one of the style's set of line
||| markers then return length of literate line marker, and the code stripped from the line
||| marker. Otherwise, return Nothing and the unmarked line.
export
isLiterateLine : (specification : LiterateStyle)
-> (str : String)
-> Pair (Maybe String) String
isLiterateLine (MkLitStyle delims markers _) str with (lex (rawTokens delims markers) str)
isLiterateLine (MkLitStyle delims markers _) str | ([MkBounded (CodeLine m str') _ _], (_,_, "")) = (Just m, str')
isLiterateLine (MkLitStyle delims markers _) str | (_, _) = (Nothing, str)
||| Given a 'literate specification' embed the given code using the
||| literate style provided.
|||
||| If the style uses deliminators to denote code blocks use the first
||| pair of deliminators in the style. Otherwise use first linemarker
||| in the style. If there is **no style** return the presented code
||| string unembedded.
|||
|||
||| @specification The literate specification to use.
||| @code The code to embed,
|||
|||
export
embedCode : (specification : LiterateStyle)
-> (code : String)
-> String
embedCode (MkLitStyle ((s,e)::delims) _ _) str = unlines [s,str,e]
embedCode (MkLitStyle Nil (m::markers) _) str = unwords [m, str]
embedCode (MkLitStyle _ _ _) str = str
||| Synonm for `embedCode`
export
relit : (specification : LiterateStyle)
-> (code : String)
-> String
relit = embedCode
-- --------------------------------------------------------------------- [ EOF ]

View File

@ -2,49 +2,50 @@ module Text.Parser
import Data.Bool import Data.Bool
import Data.List import Data.List
import Data.List1
import Data.Nat import Data.Nat
import Data.Vect import public Data.List1
import public Text.Parser.Core import public Text.Parser.Core
import public Text.Quantity import public Text.Quantity
import public Text.Token import public Text.Token
%default total
||| Parse a terminal based on a kind of token. ||| Parse a terminal based on a kind of token.
export export
match : (Eq k, TokenKind k) => match : (Eq k, TokenKind k) =>
(kind : k) -> (kind : k) ->
Grammar (Token k) True (TokType kind) Grammar state (Token k) True (TokType kind)
match kind = terminal "Unrecognised input" $ match k = terminal "Unrecognised input" $
\(Tok kind' text) => if kind' == kind \t => if t.kind == k
then Just $ tokValue kind text then Just $ tokValue k t.text
else Nothing else Nothing
||| Optionally parse a thing, with a default value if the grammar doesn't ||| Optionally parse a thing, with a default value if the grammar doesn't
||| match. May match the empty input. ||| match. May match the empty input.
export export
option : {c : Bool} -> option : {c : Bool} ->
(def : a) -> (p : Grammar tok c a) -> (def : a) -> (p : Grammar state tok c a) ->
Grammar tok False a Grammar state tok False a
option {c = False} def p = p <|> pure def option {c = False} def p = p <|> pure def
option {c = True} def p = p <|> pure def option {c = True} def p = p <|> pure def
||| Optionally parse a thing. ||| Optionally parse a thing.
||| To provide a default value, use `option` instead. ||| To provide a default value, use `option` instead.
export export
optional : {c : _} -> optional : {c : Bool} ->
(p : Grammar tok c a) -> (p : Grammar state tok c a) ->
Grammar tok False (Maybe a) Grammar state tok False (Maybe a)
optional p = option Nothing (map Just p) optional p = option Nothing (map Just p)
||| Try to parse one thing or the other, producing a value that indicates ||| Try to parse one thing or the other, producing a value that indicates
||| which option succeeded. If both would succeed, the left option ||| which option succeeded. If both would succeed, the left option
||| takes priority. ||| takes priority.
export export
choose : {c1, c2 : _} -> choose : {c1, c2 : Bool} ->
(l : Grammar tok c1 a) -> (l : Grammar state tok c1 a) ->
(r : Grammar tok c2 b) -> (r : Grammar state tok c2 b) ->
Grammar tok (c1 && c2) (Either a b) Grammar state tok (c1 && c2) (Either a b)
choose l r = map Left l <|> map Right r choose l r = map Left l <|> map Right r
||| Produce a grammar by applying a function to each element of a container, ||| Produce a grammar by applying a function to each element of a container,
@ -52,10 +53,10 @@ choose l r = map Left l <|> map Right r
||| container is empty. ||| container is empty.
export export
choiceMap : {c : Bool} -> choiceMap : {c : Bool} ->
(a -> Grammar tok c b) -> (a -> Grammar state tok c b) ->
Foldable t => t a -> Foldable t => t a ->
Grammar tok c b Grammar state tok c b
choiceMap f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in choiceMap {c} f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
f x <|> acc) f x <|> acc)
(fail "No more options") xs (fail "No more options") xs
@ -64,36 +65,30 @@ choiceMap f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
||| Try each grammar in a container until the first one succeeds. ||| Try each grammar in a container until the first one succeeds.
||| Fails if the container is empty. ||| Fails if the container is empty.
export export
choice : {c : _} -> choice : Foldable t =>
Foldable t => t (Grammar tok c a) -> {c : Bool} ->
Grammar tok c a t (Grammar state tok c a) ->
choice = Parser.choiceMap id Grammar state tok c a
choice = choiceMap id
mutual mutual
||| Parse one or more things ||| Parse one or more things
export export
some : Grammar tok True a -> some : Grammar state tok True a ->
Grammar tok True (List1 a) Grammar state tok True (List1 a)
some p = pure (!p ::: !(many p)) some p = pure (!p ::: !(many p))
||| Parse zero or more things (may match the empty input) ||| Parse zero or more things (may match the empty input)
export export
many : Grammar tok True a -> many : Grammar state tok True a ->
Grammar tok False (List a) Grammar state tok False (List a)
many p = option [] (forget <$> some p) many p = option [] (forget <$> some p)
||| Parse one or more instances of `p`, returning the parsed items and proof
||| that the resulting list is non-empty.
export
some' : (p : Grammar tok True a) ->
Grammar tok True (xs : List a ** NonEmpty xs)
some' p = pure (!p :: !(many p) ** IsNonEmpty)
mutual mutual
private private
count1 : (q : Quantity) -> count1 : (q : Quantity) ->
(p : Grammar tok True a) -> (p : Grammar state tok True a) ->
Grammar tok True (List a) Grammar state tok True (List a)
count1 q p = do x <- p count1 q p = do x <- p
seq (count q p) seq (count q p)
(\xs => pure (x :: xs)) (\xs => pure (x :: xs))
@ -101,31 +96,23 @@ mutual
||| Parse `p`, repeated as specified by `q`, returning the list of values. ||| Parse `p`, repeated as specified by `q`, returning the list of values.
export export
count : (q : Quantity) -> count : (q : Quantity) ->
(p : Grammar tok True a) -> (p : Grammar state tok True a) ->
Grammar tok (isSucc (min q)) (List a) Grammar state tok (isSucc (min q)) (List a)
count (Qty Z Nothing) p = many p count (Qty Z Nothing) p = many p
count (Qty Z (Just Z)) _ = pure [] count (Qty Z (Just Z)) _ = pure []
count (Qty Z (Just (S max))) p = option [] $ count1 (atMost max) p count (Qty Z (Just (S max))) p = option [] $ count1 (atMost max) p
count (Qty (S min) Nothing) p = count1 (atLeast min) p count (Qty (S min) Nothing) p = count1 (atLeast min) p
count (Qty (S min) (Just Z)) _ = fail "Quantity out of order" count (Qty (S min) (Just Z)) _ = fail "Quantity out of order"
count (Qty (S min) (Just (S max))) p = count1 (between min max) p count (Qty (S min) (Just (S max))) p = count1 (between (S min) max) p
||| Parse `p` `n` times, returning the vector of values.
export
countExactly : (n : Nat) ->
(p : Grammar tok True a) ->
Grammar tok (isSucc n) (Vect n a)
countExactly Z p = Empty []
countExactly (S k) p = [| p :: countExactly k p |]
mutual mutual
||| Parse one or more instances of `p` until `end` succeeds, returning the ||| Parse one or more instances of `p` until `end` succeeds, returning the
||| list of values from `p`. Guaranteed to consume input. ||| list of values from `p`. Guaranteed to consume input.
export export
someTill : {c : Bool} -> someTill : {c : Bool} ->
(end : Grammar tok c e) -> (end : Grammar state tok c e) ->
(p : Grammar tok True a) -> (p : Grammar state tok True a) ->
Grammar tok True (List1 a) Grammar state tok True (List1 a)
someTill {c} end p = do x <- p someTill {c} end p = do x <- p
seq (manyTill end p) seq (manyTill end p)
(\xs => pure (x ::: xs)) (\xs => pure (x ::: xs))
@ -134,9 +121,9 @@ mutual
||| list of values from `p`. Guaranteed to consume input if `end` consumes. ||| list of values from `p`. Guaranteed to consume input if `end` consumes.
export export
manyTill : {c : Bool} -> manyTill : {c : Bool} ->
(end : Grammar tok c e) -> (end : Grammar state tok c e) ->
(p : Grammar tok True a) -> (p : Grammar state tok True a) ->
Grammar tok c (List a) Grammar state tok c (List a)
manyTill {c} end p = rewrite sym (andTrueNeutral c) in manyTill {c} end p = rewrite sym (andTrueNeutral c) in
map (const []) end <|> (forget <$> someTill end p) map (const []) end <|> (forget <$> someTill end p)
@ -145,28 +132,28 @@ mutual
||| returning its value. ||| returning its value.
export export
afterSome : {c : Bool} -> afterSome : {c : Bool} ->
(skip : Grammar tok True s) -> (skip : Grammar state tok True s) ->
(p : Grammar tok c a) -> (p : Grammar state tok c a) ->
Grammar tok True a Grammar state tok True a
afterSome skip p = do ignore skip afterSome skip p = do ignore $ skip
afterMany skip p afterMany skip p
||| Parse zero or more instance of `skip` until `p` is encountered, ||| Parse zero or more instance of `skip` until `p` is encountered,
||| returning its value. ||| returning its value.
export export
afterMany : {c : Bool} -> afterMany : {c : Bool} ->
(skip : Grammar tok True s) -> (skip : Grammar state tok True s) ->
(p : Grammar tok c a) -> (p : Grammar state tok c a) ->
Grammar tok c a Grammar state tok c a
afterMany {c} skip p = rewrite sym (andTrueNeutral c) in afterMany {c} skip p = rewrite sym (andTrueNeutral c) in
p <|> afterSome skip p p <|> afterSome skip p
||| Parse one or more things, each separated by another thing. ||| Parse one or more things, each separated by another thing.
export export
sepBy1 : {c : Bool} -> sepBy1 : {c : Bool} ->
(sep : Grammar tok True s) -> (sep : Grammar state tok True s) ->
(p : Grammar tok c a) -> (p : Grammar state tok c a) ->
Grammar tok c (List1 a) Grammar state tok c (List1 a)
sepBy1 {c} sep p = rewrite sym (orFalseNeutral c) in sepBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
[| p ::: many (sep *> p) |] [| p ::: many (sep *> p) |]
@ -174,18 +161,18 @@ sepBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
||| match the empty input. ||| match the empty input.
export export
sepBy : {c : Bool} -> sepBy : {c : Bool} ->
(sep : Grammar tok True s) -> (sep : Grammar state tok True s) ->
(p : Grammar tok c a) -> (p : Grammar state tok c a) ->
Grammar tok False (List a) Grammar state tok False (List a)
sepBy sep p = option [] $ forget <$> sepBy1 sep p sepBy sep p = option [] $ forget <$> sepBy1 sep p
||| Parse one or more instances of `p` separated by and optionally terminated by ||| Parse one or more instances of `p` separated by and optionally terminated by
||| `sep`. ||| `sep`.
export export
sepEndBy1 : {c : Bool} -> sepEndBy1 : {c : Bool} ->
(sep : Grammar tok True s) -> (sep : Grammar state tok True s) ->
(p : Grammar tok c a) -> (p : Grammar state tok c a) ->
Grammar tok c (List1 a) Grammar state tok c (List1 a)
sepEndBy1 {c} sep p = rewrite sym (orFalseNeutral c) in sepEndBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
sepBy1 sep p <* optional sep sepBy1 sep p <* optional sep
@ -193,32 +180,49 @@ sepEndBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
||| by `sep`. Will not match a separator by itself. ||| by `sep`. Will not match a separator by itself.
export export
sepEndBy : {c : Bool} -> sepEndBy : {c : Bool} ->
(sep : Grammar tok True s) -> (sep : Grammar state tok True s) ->
(p : Grammar tok c a) -> (p : Grammar state tok c a) ->
Grammar tok False (List a) Grammar state tok False (List a)
sepEndBy sep p = option [] $ forget <$> sepEndBy1 sep p sepEndBy sep p = option [] $ forget <$> sepEndBy1 sep p
||| Parse one or more instances of `p`, separated and terminated by `sep`. ||| Parse one or more instances of `p`, separated and terminated by `sep`.
export export
endBy1 : {c : Bool} -> endBy1 : {c : Bool} ->
(sep : Grammar tok True s) -> (sep : Grammar state tok True s) ->
(p : Grammar tok c a) -> (p : Grammar state tok c a) ->
Grammar tok True (List1 a) Grammar state tok True (List1 a)
endBy1 sep p = some $ rewrite sym (orTrueTrue c) in endBy1 {c} sep p = some $ rewrite sym (orTrueTrue c) in
p <* sep p <* sep
export export
endBy : {c : Bool} -> endBy : {c : Bool} ->
(sep : Grammar tok True s) -> (sep : Grammar state tok True s) ->
(p : Grammar tok c a) -> (p : Grammar state tok c a) ->
Grammar tok False (List a) Grammar state tok False (List a)
endBy sep p = option [] $ forget <$> endBy1 sep p endBy sep p = option [] $ forget <$> endBy1 sep p
||| Parse an instance of `p` that is between `left` and `right`. ||| Parse an instance of `p` that is between `left` and `right`.
export export
between : {c : _} -> between : {c : Bool} ->
(left : Grammar tok True l) -> (left : Grammar state tok True l) ->
(right : Inf (Grammar tok True r)) -> (right : Grammar state tok True r) ->
(p : Inf (Grammar tok c a)) -> (p : Grammar state tok c a) ->
Grammar tok True a Grammar state tok True a
between left right contents = left *> contents <* right between left right contents = left *> contents <* right
export
location : Grammar state token False (Int, Int)
location = startBounds <$> position
export
endLocation : Grammar state token False (Int, Int)
endLocation = endBounds <$> position
export
column : Grammar state token False Int
column = snd <$> location
public export
when : Bool -> Lazy (Grammar state token False ()) -> Grammar state token False ()
when True y = y
when False y = pure ()

View File

@ -1,45 +1,66 @@
module Text.Parser.Core module Text.Parser.Core
import public Control.Delayed import Data.Bool
import Data.List import Data.List
import Data.List1
import public Control.Delayed
import public Text.Bounded
%default total
-- TODO: Add some primitives for helping with error messages.
-- e.g. perhaps set a string to state what we're currently trying to
-- parse, or to say what the next expected token is in words
||| Description of a language's grammar. The `tok` parameter is the type ||| Description of a language's grammar. The `tok` parameter is the type
||| of tokens, and the `consumes` flag is True if the language is guaranteed ||| of tokens, and the `consumes` flag is True if the language is guaranteed
||| to be non-empty - that is, successfully parsing the language is guaranteed ||| to be non-empty - that is, successfully parsing the language is guaranteed
||| to consume some input. ||| to consume some input.
public export export
data Grammar : (tok : Type) -> (consumes : Bool) -> Type -> Type where data Grammar : (state : Type) -> (tok : Type) -> (consumes : Bool) -> Type -> Type where
Empty : (val : ty) -> Grammar tok False ty Empty : (val : ty) -> Grammar state tok False ty
Terminal : String -> (tok -> Maybe a) -> Grammar tok True a Terminal : String -> (tok -> Maybe a) -> Grammar state tok True a
NextIs : String -> (tok -> Bool) -> Grammar tok False tok NextIs : String -> (tok -> Bool) -> Grammar state tok False tok
EOF : Grammar tok False () EOF : Grammar state tok False ()
Fail : Bool -> String -> Grammar tok c ty Fail : (location : Maybe Bounds) -> (fatal : Bool) -> String -> Grammar state tok c ty
Commit : Grammar tok False () Try : Grammar state tok c ty -> Grammar state tok c ty
MustWork : Grammar tok c a -> Grammar tok c a
SeqEat : {c2 : _} -> Grammar tok True a -> Inf (a -> Grammar tok c2 b) -> Commit : Grammar state tok False ()
Grammar tok True b MustWork : Grammar state tok c a -> Grammar state tok c a
SeqEmpty : {c1, c2 : _} -> Grammar tok c1 a -> (a -> Grammar tok c2 b) ->
Grammar tok (c1 || c2) b
ThenEat : {c2 : _} -> Grammar tok True () -> Inf (Grammar tok c2 b) -> SeqEat : {c2 : Bool} ->
Grammar tok True b Grammar state tok True a -> Inf (a -> Grammar state tok c2 b) ->
ThenEmpty : {c1, c2 : _} -> Grammar tok c1 () -> Grammar tok c2 b -> Grammar state tok True b
Grammar tok (c1 || c2) b SeqEmpty : {c1, c2 : Bool} ->
Grammar state tok c1 a -> (a -> Grammar state tok c2 b) ->
Grammar state tok (c1 || c2) b
Alt : {c1, c2 : _} -> Grammar tok c1 ty -> Grammar tok c2 ty -> ThenEat : {c2 : Bool} ->
Grammar tok (c1 && c2) ty Grammar state tok True () -> Inf (Grammar state tok c2 a) ->
Grammar state tok True a
ThenEmpty : {c1, c2 : Bool} ->
Grammar state tok c1 () -> Grammar state tok c2 a ->
Grammar state tok (c1 || c2) a
Alt : {c1, c2 : Bool} ->
Grammar state tok c1 ty -> Lazy (Grammar state tok c2 ty) ->
Grammar state tok (c1 && c2) ty
Bounds : Grammar state tok c ty -> Grammar state tok c (WithBounds ty)
Position : Grammar state tok False Bounds
Act : state -> Grammar state tok False ()
||| Sequence two grammars. If either consumes some input, the sequence is ||| Sequence two grammars. If either consumes some input, the sequence is
||| guaranteed to consume some input. If the first one consumes input, the ||| guaranteed to consume some input. If the first one consumes input, the
||| second is allowed to be recursive (because it means some input has been ||| second is allowed to be recursive (because it means some input has been
||| consumed and therefore the input is smaller) ||| consumed and therefore the input is smaller)
public export %inline %tcinline export %inline
(>>=) : {c1, c2 : Bool} -> (>>=) : {c1, c2 : Bool} ->
Grammar tok c1 a -> Grammar state tok c1 a ->
inf c1 (a -> Grammar tok c2 b) -> inf c1 (a -> Grammar state tok c2 b) ->
Grammar tok (c1 || c2) b Grammar state tok (c1 || c2) b
(>>=) {c1 = False} = SeqEmpty (>>=) {c1 = False} = SeqEmpty
(>>=) {c1 = True} = SeqEat (>>=) {c1 = True} = SeqEat
@ -49,97 +70,117 @@ public export %inline %tcinline
||| consumed and therefore the input is smaller) ||| consumed and therefore the input is smaller)
public export %inline %tcinline public export %inline %tcinline
(>>) : {c1, c2 : Bool} -> (>>) : {c1, c2 : Bool} ->
Grammar tok c1 () -> Grammar state tok c1 () ->
inf c1 (Grammar tok c2 a) -> inf c1 (Grammar state tok c2 a) ->
Grammar tok (c1 || c2) a Grammar state tok (c1 || c2) a
(>>) {c1 = False} = ThenEmpty (>>) {c1 = False} = ThenEmpty
(>>) {c1 = True} = ThenEat (>>) {c1 = True} = ThenEat
||| Sequence two grammars. If either consumes some input, the sequence is ||| Sequence two grammars. If either consumes some input, the sequence is
||| guaranteed to consume input. This is an explicitly non-infinite version ||| guaranteed to consume input. This is an explicitly non-infinite version
||| of `>>=`. ||| of `>>=`.
export export %inline
seq : {c1, c2 : _} -> Grammar tok c1 a -> seq : {c1,c2 : Bool} ->
(a -> Grammar tok c2 b) -> Grammar state tok c1 a ->
Grammar tok (c1 || c2) b (a -> Grammar state tok c2 b) ->
Grammar state tok (c1 || c2) b
seq = SeqEmpty seq = SeqEmpty
||| Sequence a grammar followed by the grammar it returns. ||| Sequence a grammar followed by the grammar it returns.
export export %inline
join : {c1, c2 : Bool} -> join : {c1,c2 : Bool} ->
Grammar tok c1 (Grammar tok c2 a) -> Grammar state tok c1 (Grammar state tok c2 a) ->
Grammar tok (c1 || c2) a Grammar state tok (c1 || c2) a
join {c1 = False} p = SeqEmpty p id join {c1 = False} p = SeqEmpty p id
join {c1 = True} p = SeqEat p id join {c1 = True} p = SeqEat p id
||| Give two alternative grammars. If both consume, the combination is
||| guaranteed to consume.
export
(<|>) : {c1, c2 : _} ->
Grammar tok c1 ty ->
Grammar tok c2 ty ->
Grammar tok (c1 && c2) ty
(<|>) = Alt
||| Allows the result of a grammar to be mapped to a different value. ||| Allows the result of a grammar to be mapped to a different value.
export export
{c : _} -> Functor (Grammar tok c) where {c : _} ->
Functor (Grammar state tok c) where
map f (Empty val) = Empty (f val) map f (Empty val) = Empty (f val)
map f (Fail fatal msg) = Fail fatal msg map f (Fail bd fatal msg) = Fail bd fatal msg
map f (Try g) = Try (map f g)
map f (MustWork g) = MustWork (map f g) map f (MustWork g) = MustWork (map f g)
map f (Terminal msg g) = Terminal msg (\t => map f (g t)) map f (Terminal msg g) = Terminal msg (map f . g)
map f (Alt x y) = Alt (map f x) (map f y) map f (Alt x y) = Alt (map f x) (map f y)
map f (SeqEat act next) map f (SeqEat act next)
= SeqEat act (\val => map f (next val)) = SeqEat act (\val => map f (next val))
map f (SeqEmpty act next) map f (SeqEmpty act next)
= SeqEmpty act (\val => map f (next val)) = SeqEmpty act (\ val => map f (next val))
map f (ThenEat act next) map f (ThenEat act next)
= ThenEat act (map f next) = ThenEat act (map f next)
map f (ThenEmpty act next) map f (ThenEmpty act next)
= ThenEmpty act (map f next) = ThenEmpty act (map f next)
map {c} f (Bounds act)
= rewrite sym $ orFalseNeutral c in
SeqEmpty (Bounds act) (Empty . f) -- Bounds (map f act)
-- The remaining constructors (NextIs, EOF, Commit) have a fixed type, -- The remaining constructors (NextIs, EOF, Commit) have a fixed type,
-- so a sequence must be used. -- so a sequence must be used.
map {c = False} f p = SeqEmpty p (Empty . f) map {c = False} f p = SeqEmpty p (Empty . f)
||| Give two alternative grammars. If both consume, the combination is
||| guaranteed to consume.
export %inline
(<|>) : {c1,c2 : Bool} ->
Grammar state tok c1 ty ->
Lazy (Grammar state tok c2 ty) ->
Grammar state tok (c1 && c2) ty
(<|>) = Alt
infixr 2 <||>
||| Take the tagged disjunction of two grammars. If both consume, the
||| combination is guaranteed to consume.
export
(<||>) : {c1,c2 : Bool} ->
Grammar state tok c1 a ->
Lazy (Grammar state tok c2 b) ->
Grammar state tok (c1 && c2) (Either a b)
(<||>) p q = (Left <$> p) <|> (Right <$> q)
||| Sequence a grammar with value type `a -> b` and a grammar ||| Sequence a grammar with value type `a -> b` and a grammar
||| with value type `a`. If both succeed, apply the function ||| with value type `a`. If both succeed, apply the function
||| from the first grammar to the value from the second grammar. ||| from the first grammar to the value from the second grammar.
||| Guaranteed to consume if either grammar consumes. ||| Guaranteed to consume if either grammar consumes.
export export %inline
(<*>) : {c1, c2 : _} -> (<*>) : {c1, c2 : Bool} ->
Grammar tok c1 (a -> b) -> Grammar state tok c1 (a -> b) ->
Inf (Grammar tok c2 a) -> Grammar state tok c2 a ->
Grammar tok (c1 || c2) b Grammar state tok (c1 || c2) b
(<*>) {c1 = False} x y = SeqEmpty x (\f => map f y) (<*>) x y = SeqEmpty x (\f => map f y)
(<*>) {c1 = True } x y = SeqEmpty x (\f => map f (Force y))
||| Sequence two grammars. If both succeed, use the value of the first one. ||| Sequence two grammars. If both succeed, use the value of the first one.
||| Guaranteed to consume if either grammar consumes. ||| Guaranteed to consume if either grammar consumes.
export export %inline
(<*) : {c1, c2 : _} -> (<*) : {c1,c2 : Bool} ->
Grammar tok c1 a -> Grammar state tok c1 a ->
Inf (Grammar tok c2 b) -> Grammar state tok c2 b ->
Grammar tok (c1 || c2) a Grammar state tok (c1 || c2) a
(<*) x y = map const x <*> y (<*) x y = map const x <*> y
||| Sequence two grammars. If both succeed, use the value of the second one. ||| Sequence two grammars. If both succeed, use the value of the second one.
||| Guaranteed to consume if either grammar consumes. ||| Guaranteed to consume if either grammar consumes.
export export %inline
(*>) : {c1, c2 : _} -> (*>) : {c1,c2 : Bool} ->
Grammar tok c1 a -> Grammar state tok c1 a ->
Inf (Grammar tok c2 b) -> Grammar state tok c2 b ->
Grammar tok (c1 || c2) b Grammar state tok (c1 || c2) b
(*>) x y = map (const id) x <*> y (*>) x y = map (const id) x <*> y
export %inline
act : state -> Grammar state tok False ()
act = Act
||| Produce a grammar that can parse a different type of token by providing a ||| Produce a grammar that can parse a different type of token by providing a
||| function converting the new token type into the original one. ||| function converting the new token type into the original one.
export export
mapToken : (a -> b) -> Grammar b c ty -> Grammar a c ty mapToken : (a -> b) -> Grammar state b c ty -> Grammar state a c ty
mapToken f (Empty val) = Empty val mapToken f (Empty val) = Empty val
mapToken f (Terminal msg g) = Terminal msg (g . f) mapToken f (Terminal msg g) = Terminal msg (g . f)
mapToken f (NextIs msg g) = SeqEmpty (NextIs msg (g . f)) (Empty . f) mapToken f (NextIs msg g) = SeqEmpty (NextIs msg (g . f)) (Empty . f)
mapToken f EOF = EOF mapToken f EOF = EOF
mapToken f (Fail fatal msg) = Fail fatal msg mapToken f (Fail bd fatal msg) = Fail bd fatal msg
mapToken f (Try g) = Try (mapToken f g)
mapToken f (MustWork g) = MustWork (mapToken f g) mapToken f (MustWork g) = MustWork (mapToken f g)
mapToken f Commit = Commit mapToken f Commit = Commit
mapToken f (SeqEat act next) mapToken f (SeqEat act next)
@ -151,177 +192,183 @@ mapToken f (ThenEat act next)
mapToken f (ThenEmpty act next) mapToken f (ThenEmpty act next)
= ThenEmpty (mapToken f act) (mapToken f next) = ThenEmpty (mapToken f act) (mapToken f next)
mapToken f (Alt x y) = Alt (mapToken f x) (mapToken f y) mapToken f (Alt x y) = Alt (mapToken f x) (mapToken f y)
mapToken f (Bounds act) = Bounds (mapToken f act)
mapToken f Position = Position
mapToken f (Act action) = Act action
||| Always succeed with the given value. ||| Always succeed with the given value.
export export %inline
pure : (val : ty) -> Grammar tok False ty pure : (val : ty) -> Grammar state tok False ty
pure = Empty pure = Empty
||| Check whether the next token satisfies a predicate ||| Check whether the next token satisfies a predicate
export export %inline
nextIs : String -> (tok -> Bool) -> Grammar tok False tok nextIs : String -> (tok -> Bool) -> Grammar state tok False tok
nextIs = NextIs nextIs = NextIs
||| Look at the next token in the input ||| Look at the next token in the input
export export %inline
peek : Grammar tok False tok peek : Grammar state tok False tok
peek = nextIs "Unrecognised token" (const True) peek = nextIs "Unrecognised token" (const True)
||| Succeeds if running the predicate on the next token returns Just x, ||| Succeeds if running the predicate on the next token returns Just x,
||| returning x. Otherwise fails. ||| returning x. Otherwise fails.
export export %inline
terminal : String -> (tok -> Maybe a) -> Grammar tok True a terminal : String -> (tok -> Maybe a) -> Grammar state tok True a
terminal = Terminal terminal = Terminal
||| Always fail with a message ||| Always fail with a message
export export %inline
fail : String -> Grammar tok c ty fail : String -> Grammar state tok c ty
fail = Fail False fail = Fail Nothing False
export ||| Always fail with a message and a location
fatalError : String -> Grammar tok c ty export %inline
fatalError = Fail True failLoc : Bounds -> String -> Grammar state tok c ty
failLoc b = Fail (Just b) False
||| Fail with no possibility for recovery (i.e.
||| no alternative parsing can succeed).
export %inline
fatalError : String -> Grammar state tok c ty
fatalError = Fail Nothing True
||| Fail with no possibility for recovery (i.e.
||| no alternative parsing can succeed).
export %inline
fatalLoc : Bounds -> String -> Grammar state tok c ty
fatalLoc b = Fail (Just b) True
||| Catch a fatal error
export %inline
try : Grammar state tok c ty -> Grammar state tok c ty
try = Try
||| Succeed if the input is empty ||| Succeed if the input is empty
export export %inline
eof : Grammar tok False () eof : Grammar state tok False ()
eof = EOF eof = EOF
||| Commit to an alternative; if the current branch of an alternative ||| Commit to an alternative; if the current branch of an alternative
||| fails to parse, no more branches will be tried ||| fails to parse, no more branches will be tried
export export %inline
commit : Grammar tok False () commit : Grammar state tok False ()
commit = Commit commit = Commit
||| If the parser fails, treat it as a fatal error ||| If the parser fails, treat it as a fatal error
export export %inline
mustWork : Grammar tok c ty -> Grammar tok c ty mustWork : {c : Bool} -> Grammar state tok c ty -> Grammar state tok c ty
mustWork = MustWork mustWork = MustWork
data ParseResult : List tok -> (consumes : Bool) -> Type -> Type where export %inline
Failure : {xs : List tok} -> bounds : Grammar state tok c ty -> Grammar state tok c (WithBounds ty)
(committed : Bool) -> (fatal : Bool) -> bounds = Bounds
(err : String) -> (rest : List tok) -> ParseResult xs c ty
EmptyRes : (committed : Bool) ->
(val : ty) -> (more : List tok) -> ParseResult more False ty
NonEmptyRes : {xs : List tok} ->
(committed : Bool) ->
(val : ty) -> (more : List tok) ->
ParseResult (x :: xs ++ more) c ty
-- Take the result of an alternative branch, reset the commit flag to export %inline
-- the commit flag from the outer alternative, and weaken the 'consumes' position : Grammar state tok False Bounds
-- flag to take both alternatives into account position = Position
weakenRes : {whatever, c : Bool} -> {xs : List tok} ->
(com' : Bool) ->
ParseResult xs c ty -> ParseResult xs (whatever && c) ty
weakenRes com' (Failure com fatal msg ts) = Failure com' fatal msg ts
weakenRes {whatever=True} com' (EmptyRes com val xs) = EmptyRes com' val xs
weakenRes {whatever=False} com' (EmptyRes com val xs) = EmptyRes com' val xs
weakenRes com' (NonEmptyRes {xs} com val more) = NonEmptyRes {xs} com' val more
doParse : (commit : Bool) -> public export
(act : Grammar tok c ty) -> data ParsingError tok = Error String (Maybe Bounds)
(xs : List tok) ->
ParseResult xs c ty data ParseResult : Type -> Type -> Type -> Type where
-- doParse com xs act with (sizeAccessible xs) Failure : (committed : Bool) -> (fatal : Bool) ->
doParse com (Empty val) xs = EmptyRes com val xs List1 (ParsingError tok) -> ParseResult state tok ty
doParse com (Fail fatal str) [] = Failure com fatal str [] Res : state -> (committed : Bool) ->
doParse com (Fail fatal str) (x :: xs) = Failure com fatal str (x :: xs) (val : WithBounds ty) -> (more : List (WithBounds tok)) -> ParseResult state tok ty
doParse com Commit xs = EmptyRes True () xs
doParse com (MustWork g) xs = mergeWith : WithBounds ty -> ParseResult state tok sy -> ParseResult state tok sy
let p' = doParse com g xs in mergeWith x (Res s committed val more) = Res s committed (mergeBounds x val) more
case p' of mergeWith x v = v
Failure com' _ msg ts => Failure com' True msg ts
doParse : Semigroup state => state -> (commit : Bool) ->
(act : Grammar state tok c ty) ->
(xs : List (WithBounds tok)) ->
ParseResult state tok ty
doParse s com (Empty val) xs = Res s com (irrelevantBounds val) xs
doParse s com (Fail location fatal str) xs
= Failure com fatal (Error str (location <|> (bounds <$> head' xs)) ::: Nil)
doParse s com (Try g) xs = case doParse s com g xs of
-- recover from fatal match but still propagate the 'commit'
Failure com _ errs => Failure com False errs
res => res res => res
doParse com (Terminal err f) [] = Failure com False "End of input" [] doParse s com Commit xs = Res s True (irrelevantBounds ()) xs
doParse com (Terminal err f) (x :: xs) doParse s com (MustWork g) xs =
= maybe case assert_total (doParse s com g xs) of
(Failure com False err (x :: xs)) Failure com' _ errs => Failure com' True errs
(\a => NonEmptyRes com {xs=[]} a xs) res => res
(f x) doParse s com (Terminal err f) [] = Failure com False (Error "End of input" Nothing ::: Nil)
doParse com EOF [] = EmptyRes com () [] doParse s com (Terminal err f) (x :: xs) =
doParse com EOF (x :: xs) case f x.val of
= Failure com False "Expected end of input" (x :: xs) Nothing => Failure com False (Error err (Just x.bounds) ::: Nil)
doParse com (NextIs err f) [] = Failure com False "End of input" [] Just a => Res s com (const a <$> x) xs
doParse com (NextIs err f) (x :: xs) doParse s com EOF [] = Res s com (irrelevantBounds ()) []
= if f x doParse s com EOF (x :: xs) = Failure com False (Error "Expected end of input" (Just x.bounds) ::: Nil)
then EmptyRes com x (x :: xs) doParse s com (NextIs err f) [] = Failure com False (Error "End of input" Nothing ::: Nil)
else Failure com False err (x :: xs) doParse s com (NextIs err f) (x :: xs)
doParse com (Alt {c1} {c2} x y) xs = if f x.val
= let p' = doParse False x xs in then Res s com (removeIrrelevance x) (x :: xs)
case p' of else Failure com False (Error err (Just x.bounds) ::: Nil)
Failure com' fatal msg ts doParse s com (Alt {c1} {c2} x y) xs
= case doParse s False x xs of
Failure com' fatal errs
=> if com' || fatal => if com' || fatal
-- If the alternative had committed, don't try the -- If the alternative had committed, don't try the
-- other branch (and reset commit flag) -- other branch (and reset commit flag)
then Failure com fatal msg ts then Failure com fatal errs
else weakenRes {whatever = c1} com (doParse False y xs) else case (assert_total doParse s False y xs) of
(Failure com'' fatal' errs') => if com'' || fatal'
-- Only add the errors together if the second branch
-- is also non-committed and non-fatal.
then Failure com'' fatal' errs'
else Failure False False (errs ++ errs')
(Res s _ val xs) => Res s com val xs
-- Successfully parsed the first option, so use the outer commit flag -- Successfully parsed the first option, so use the outer commit flag
EmptyRes _ val xs => EmptyRes com val xs Res s _ val xs => Res s com val xs
NonEmptyRes {xs=xs'} _ val more => NonEmptyRes {xs=xs'} com val more doParse s com (SeqEmpty act next) xs
doParse com (SeqEmpty {c1} {c2} act next) xs = case assert_total (doParse s com act xs) of
= let p' = assert_total (doParse {c = c1} com act xs) in Failure com fatal errs => Failure com fatal errs
case p' of Res s com v xs =>
Failure com fatal msg ts => Failure com fatal msg ts mergeWith v (assert_total $ doParse s com (next v.val) xs)
EmptyRes com val xs => assert_total (doParse com (next val) xs) doParse s com (SeqEat act next) xs
NonEmptyRes {x} {xs=ys} com val more => = case assert_total (doParse s com act xs) of
case (assert_total (doParse com (next val) more)) of Failure com fatal errs => Failure com fatal errs
Failure com' fatal msg ts => Failure com' fatal msg ts Res s com v xs =>
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more mergeWith v (assert_total $ doParse s com (next v.val) xs)
NonEmptyRes {x=x1} {xs=xs1} com' val more' => doParse s com (ThenEmpty act next) xs
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in = case assert_total (doParse s com act xs) of
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more' Failure com fatal errs => Failure com fatal errs
doParse com (SeqEat act next) xs with (doParse com act xs) Res s com v xs =>
doParse com (SeqEat act next) xs | Failure com' fatal msg ts mergeWith v (assert_total $ doParse s com next xs)
= Failure com' fatal msg ts doParse s com (ThenEat act next) xs
doParse com (SeqEat act next) (x :: (ys ++ more)) | (NonEmptyRes {xs=ys} com' val more) = case assert_total (doParse s com act xs) of
= let p' = assert_total (doParse com' (next val) more) in Failure com fatal errs => Failure com fatal errs
case p' of Res s com v xs =>
Failure com' fatal msg ts => Failure com' fatal msg ts mergeWith v (assert_total $ doParse s com next xs)
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more doParse s com (Bounds act) xs
NonEmptyRes {x=x1} {xs=xs1} com' val more' => = case assert_total (doParse s com act xs) of
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in Failure com fatal errs => Failure com fatal errs
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more' Res s com v xs => Res s com (const v <$> v) xs
doParse com (ThenEmpty {c1} {c2} act next) xs doParse s com Position [] = Failure com False (Error "End of input" Nothing ::: Nil)
= let p' = assert_total (doParse {c = c1} com act xs) in doParse s com Position (x :: xs)
case p' of = Res s com (irrelevantBounds x.bounds) (x :: xs)
Failure com fatal msg ts => Failure com fatal msg ts doParse s com (Act action) xs = Res (s <+> action) com (irrelevantBounds ()) xs
EmptyRes com val xs => assert_total (doParse com next xs)
NonEmptyRes {x} {xs=ys} com val more =>
case (assert_total (doParse com next more)) of
Failure com' fatal msg ts => Failure com' fatal msg ts
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
doParse com (ThenEat act next) xs with (doParse com act xs)
doParse com (ThenEat act next) xs | Failure com' fatal msg ts
= Failure com' fatal msg ts
doParse com (ThenEat act next) (x :: (ys ++ more)) | (NonEmptyRes {xs=ys} com' val more)
= let p' = assert_total (doParse com' next more) in
case p' of
Failure com' fatal msg ts => Failure com' fatal msg ts
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
-- This next line is not strictly necessary, but it stops the coverage
-- checker taking a really long time and eating lots of memory...
-- doParse _ _ _ = Failure True True "Help the coverage checker!" []
public export
data ParseError tok = Error String (List tok)
||| Parse a list of tokens according to the given grammar. If successful, ||| Parse a list of tokens according to the given grammar. If successful,
||| returns a pair of the parse result and the unparsed tokens (the remaining ||| returns a pair of the parse result and the unparsed tokens (the remaining
||| input). ||| input).
export export
parse : {c : _} -> (act : Grammar tok c ty) -> (xs : List tok) -> parse : {c : Bool} -> (act : Grammar () tok c ty) -> (xs : List (WithBounds tok)) ->
Either (ParseError tok) (ty, List tok) Either (List1 (ParsingError tok)) (ty, List (WithBounds tok))
parse act xs parse act xs
= case doParse False act xs of = case doParse neutral False act xs of
Failure _ _ msg ts => Left (Error msg ts) Failure _ _ errs => Left errs
EmptyRes _ val rest => pure (val, rest) Res _ _ v rest => Right (v.val, rest)
NonEmptyRes _ val rest => pure (val, rest)
export
parseWith : Monoid state => {c : Bool} -> (act : Grammar state tok c ty) -> (xs : List (WithBounds tok)) ->
Either (List1 (ParsingError tok)) (state, ty, List (WithBounds tok))
parseWith act xs
= case doParse neutral False act xs of
Failure _ _ errs => Left errs
Res s _ v rest => Right (s, v.val, rest)

View File

@ -413,10 +413,6 @@ Pretty Char where
export Pretty Nat where pretty = unsafeTextWithoutNewLines . show export Pretty Nat where pretty = unsafeTextWithoutNewLines . show
export Pretty Int where pretty = unsafeTextWithoutNewLines . show export Pretty Int where pretty = unsafeTextWithoutNewLines . show
export Pretty Integer where pretty = unsafeTextWithoutNewLines . show export Pretty Integer where pretty = unsafeTextWithoutNewLines . show
export Pretty Int8 where pretty = unsafeTextWithoutNewLines . show
export Pretty Int16 where pretty = unsafeTextWithoutNewLines . show
export Pretty Int32 where pretty = unsafeTextWithoutNewLines . show
export Pretty Int64 where pretty = unsafeTextWithoutNewLines . show
export Pretty Double where pretty = unsafeTextWithoutNewLines . show export Pretty Double where pretty = unsafeTextWithoutNewLines . show
export Pretty Bits8 where pretty = unsafeTextWithoutNewLines . show export Pretty Bits8 where pretty = unsafeTextWithoutNewLines . show
export Pretty Bits16 where pretty = unsafeTextWithoutNewLines . show export Pretty Bits16 where pretty = unsafeTextWithoutNewLines . show

View File

@ -3,6 +3,8 @@ module Text.PrettyPrint.Prettyprinter.Render.HTML
import Data.List import Data.List
import Data.String import Data.String
%default covering
export export
htmlEscape : String -> String htmlEscape : String -> String
htmlEscape s = fastAppend $ reverse $ go [] s htmlEscape s = fastAppend $ reverse $ go [] s

View File

@ -6,7 +6,7 @@ import public Control.ANSI
import Control.Monad.ST import Control.Monad.ST
import Text.PrettyPrint.Prettyprinter.Doc import Text.PrettyPrint.Prettyprinter.Doc
%default total %default covering
public export public export
AnsiStyle : Type AnsiStyle : Type

View File

@ -68,6 +68,7 @@ sdocToTreeParser (SLine i rest) = case sdocToTreeParser rest of
(Just tree, rest') => (Just $ STConcat [STLine i, tree], rest') (Just tree, rest') => (Just $ STConcat [STLine i, tree], rest')
(Nothing, rest') => (Just $ STLine i, rest') (Nothing, rest') => (Just $ STLine i, rest')
sdocToTreeParser (SAnnPush ann rest) = case sdocToTreeParser rest of sdocToTreeParser (SAnnPush ann rest) = case sdocToTreeParser rest of
(tree, Nothing) => (Nothing, Nothing)
(Just tree, Nothing) => (Just $ STAnn ann tree, Nothing) (Just tree, Nothing) => (Just $ STAnn ann tree, Nothing)
(Just tree, Just rest') => case sdocToTreeParser rest' of (Just tree, Just rest') => case sdocToTreeParser rest' of
(Just tree', rest'') => (Just $ STConcat [STAnn ann tree, tree'], rest'') (Just tree', rest'') => (Just $ STConcat [STAnn ann tree, tree'], rest'')

View File

@ -101,8 +101,10 @@ angles : Doc ann -> Doc ann
angles = enclose langle rangle angles = enclose langle rangle
export export
brackets : Doc ann -> Doc ann brackets : {default lbracket ldelim : Doc ann} ->
brackets = enclose lbracket rbracket {default rbracket rdelim : Doc ann} ->
Doc ann -> Doc ann
brackets {ldelim, rdelim} = enclose ldelim rdelim
export export
braces : Doc ann -> Doc ann braces : Doc ann -> Doc ann

View File

@ -1,5 +1,7 @@
module Text.Quantity module Text.Quantity
%default total
public export public export
record Quantity where record Quantity where
constructor Qty constructor Qty

View File

@ -1,5 +1,7 @@
module Text.Token module Text.Token
%default total
||| For a type `kind`, specify a way of converting the recognised ||| For a type `kind`, specify a way of converting the recognised
||| string into a value. ||| string into a value.
||| |||

View File

@ -159,17 +159,20 @@ modules = Control.ANSI,
System.Random, System.Random,
System.Path, System.Path,
Text.Token, Text.Bounded,
Text.Quantity,
Text.Parser,
Text.Lexer, Text.Lexer,
Text.Parser.Core,
Text.Lexer.Core, Text.Lexer.Core,
Text.Lexer.Tokenizer,
Text.Literate,
Text.Parser,
Text.Parser.Core,
Text.PrettyPrint.Prettyprinter, Text.PrettyPrint.Prettyprinter,
Text.PrettyPrint.Prettyprinter.Doc, Text.PrettyPrint.Prettyprinter.Doc,
Text.PrettyPrint.Prettyprinter.Symbols,
Text.PrettyPrint.Prettyprinter.Util,
Text.PrettyPrint.Prettyprinter.SimpleDocTree,
Text.PrettyPrint.Prettyprinter.Render.HTML, Text.PrettyPrint.Prettyprinter.Render.HTML,
Text.PrettyPrint.Prettyprinter.Render.String, Text.PrettyPrint.Prettyprinter.Render.String,
Text.PrettyPrint.Prettyprinter.Render.Terminal Text.PrettyPrint.Prettyprinter.Render.Terminal,
Text.PrettyPrint.Prettyprinter.SimpleDocTree,
Text.PrettyPrint.Prettyprinter.Symbols,
Text.PrettyPrint.Prettyprinter.Util,
Text.Quantity,
Text.Token