Update contrib Text.Parser to match Library.Text.Parser (#1808)

Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org>
This commit is contained in:
Alex Humphreys 2021-08-06 09:03:13 +00:00 committed by GitHub
parent ca95781a74
commit f3855d7100
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 1054 additions and 437 deletions

View File

@ -7,7 +7,6 @@ import Data.String
%default total
infixl 5 +>
infixr 5 <+
@ -94,20 +93,14 @@ public export
indentLines : (n : Nat) -> String -> String
indentLines n str = unlines $ map (indent n) $ lines str
||| Return a string of the given character repeated
||| `n` times.
export
fastReplicate : (n : Nat) -> Char -> String
fastReplicate n c = fastPack $ replicate n c
||| Left-justify a string to the given length, using the
||| specified fill character on the right.
export
justifyLeft : Nat -> Char -> String -> String
justifyLeft n c s = s ++ fastReplicate (n `minus` length s) c
justifyLeft n c s = s ++ replicate (n `minus` length s) c
||| Right-justify a string to the given length, using the
||| specified fill character on the left.
export
justifyRight : Nat -> Char -> String -> String
justifyRight n c s = fastReplicate (n `minus` length s) c ++ s
justifyRight n c s = replicate (n `minus` length s) c ++ s

View File

@ -6,6 +6,8 @@ import Language.JSON.Parser
import public Language.JSON.Data
import public Text.Bounded
%default total
||| Parse a JSON string.

View File

@ -34,8 +34,8 @@ jsonTokenMap = toTokenMap $
]
export
lexJSON : String -> Maybe (List JSONToken)
lexJSON : String -> Maybe (List (WithBounds JSONToken))
lexJSON str
= case lex jsonTokenMap str of
(tokens, _, _, "") => Just $ map TokenData.tok tokens
(tokens, _, _, "") => Just $ tokens
_ => Nothing

View File

@ -10,20 +10,20 @@ import public Language.JSON.Tokens
%default total
private
punct : Punctuation -> Grammar JSONToken True ()
punct : Punctuation -> Grammar state JSONToken True ()
punct p = match $ JTPunct p
private
rawString : Grammar JSONToken True String
rawString : Grammar state JSONToken True String
rawString = do mstr <- match JTString
the (Grammar _ False _) $
the (Grammar _ _ False _) $
case mstr of
Just str => pure str
Nothing => fail "invalid string"
mutual
private
json : Grammar JSONToken True JSON
json : Grammar state JSONToken True JSON
json = object
<|> array
<|> string
@ -32,14 +32,14 @@ mutual
<|> null
private
object : Grammar JSONToken True JSON
object : Grammar state JSONToken True JSON
object = do punct $ Curly Open
commit
props <- properties
punct $ Curly Close
pure $ JObject props
where
properties : Grammar JSONToken False (List (String, JSON))
properties : Grammar state JSONToken False (List (String, JSON))
properties = sepBy (punct Comma) $
do key <- rawString
punct Colon
@ -47,34 +47,34 @@ mutual
pure (key, value)
private
array : Grammar JSONToken True JSON
array : Grammar state JSONToken True JSON
array = do punct (Square Open)
commit
vals <- values
punct (Square Close)
pure (JArray vals)
where
values : Grammar JSONToken False (List JSON)
values : Grammar state JSONToken False (List JSON)
values = sepBy (punct Comma) json
private
string : Grammar JSONToken True JSON
string : Grammar state JSONToken True JSON
string = map JString rawString
private
boolean : Grammar JSONToken True JSON
boolean : Grammar state JSONToken True JSON
boolean = map JBoolean $ match JTBoolean
private
number : Grammar JSONToken True JSON
number : Grammar state JSONToken True JSON
number = map JNumber $ match JTNumber
private
null : Grammar JSONToken True JSON
null : Grammar state JSONToken True JSON
null = map (const JNull) $ match JTNull
export
parseJSON : List JSONToken -> Maybe JSON
parseJSON : List (WithBounds JSONToken) -> Maybe JSON
parseJSON toks = case parse json $ filter (not . ignored) toks of
Right (j, []) => Just j
_ => Nothing

View File

@ -12,7 +12,7 @@ quo = is '"'
export
esc : Lexer -> Lexer
esc = escape '\\'
esc = escape (is '\\')
private
unicodeEscape : Lexer
@ -36,7 +36,7 @@ jsonStringTokenMap = toTokenMap $
]
export
lexString : String -> Maybe (List JSONStringToken)
lexString : String -> Maybe (List (WithBounds JSONStringToken))
lexString x = case lex jsonStringTokenMap x of
(toks, _, _, "") => Just $ map TokenData.tok toks
(toks, _, _, "") => Just $ toks
_ => Nothing

View File

@ -7,20 +7,20 @@ import Text.Parser
%default total
private
stringChar : Grammar JSONStringToken True Char
stringChar : Grammar state JSONStringToken True Char
stringChar = match JSTChar
<|> match JSTSimpleEscape
<|> match JSTUnicodeEscape
private
quotedString : Grammar JSONStringToken True String
quotedString : Grammar state JSONStringToken True String
quotedString = let q = match JSTQuote in
do chars <- between q q (many stringChar)
eof
pure $ pack chars
export
parseString : List JSONStringToken -> Maybe String
parseString : List (WithBounds JSONStringToken) -> Maybe String
parseString toks = case parse quotedString toks of
Right (str, []) => Just str
_ => Nothing

View File

@ -2,6 +2,7 @@ module Language.JSON.Tokens
import Language.JSON.String
import Text.Token
import Text.Bounded
%default total
@ -76,6 +77,6 @@ TokenKind JSONTokenKind where
tokValue JTIgnore _ = ()
export
ignored : JSONToken -> Bool
ignored (Tok JTIgnore _) = True
ignored : WithBounds JSONToken -> Bool
ignored (MkBounded (Tok JTIgnore _) _ _) = True
ignored _ = False

View File

@ -153,15 +153,15 @@ pathTokenMap = toTokenMap $
, (some $ non $ oneOf "/\\:?", PTText)
]
lexPath : String -> List PathToken
lexPath : String -> List (WithBounds PathToken)
lexPath str =
let
(tokens, _, _, _) = lex pathTokenMap str
in
map TokenData.tok tokens
tokens -- TokenData.tok tokens
-- match both '/' and '\\' regardless of the platform.
bodySeparator : Grammar PathToken True ()
bodySeparator : Grammar state PathToken True ()
bodySeparator = (match $ PTPunct '\\') <|> (match $ PTPunct '/')
-- Windows will automatically translate '/' to '\\'. And the verbatim prefix,
@ -169,7 +169,7 @@ bodySeparator = (match $ PTPunct '\\') <|> (match $ PTPunct '/')
-- However, we just parse it and ignore it.
--
-- Example: \\?\
verbatim : Grammar PathToken True ()
verbatim : Grammar state PathToken True ()
verbatim =
do
ignore $ count (exactly 2) $ match $ PTPunct '\\'
@ -178,7 +178,7 @@ verbatim =
pure ()
-- Example: \\server\share
unc : Grammar PathToken True Volume
unc : Grammar state PathToken True Volume
unc =
do
ignore $ count (exactly 2) $ match $ PTPunct '\\'
@ -188,7 +188,7 @@ unc =
pure $ UNC server share
-- Example: \\?\server\share
verbatimUnc : Grammar PathToken True Volume
verbatimUnc : Grammar state PathToken True Volume
verbatimUnc =
do
verbatim
@ -198,7 +198,7 @@ verbatimUnc =
pure $ UNC server share
-- Example: C:
disk : Grammar PathToken True Volume
disk : Grammar state PathToken True Volume
disk =
do
text <- match PTText
@ -209,21 +209,21 @@ disk =
pure $ Disk (toUpper disk)
-- Example: \\?\C:
verbatimDisk : Grammar PathToken True Volume
verbatimDisk : Grammar state PathToken True Volume
verbatimDisk =
do
verbatim
disk <- disk
pure disk
parseVolume : Grammar PathToken True Volume
parseVolume : Grammar state PathToken True Volume
parseVolume =
verbatimUnc
<|> verbatimDisk
<|> unc
<|> disk
parseBody : Grammar PathToken True Body
parseBody : Grammar state PathToken True Body
parseBody =
do
text <- match PTText
@ -232,7 +232,7 @@ parseBody =
"." => CurDir
normal => Normal normal
parsePath : Grammar PathToken False Path
parsePath : Grammar state PathToken False Path
parsePath =
do
vol <- optional parseVolume

View File

@ -0,0 +1,91 @@
module Text.Bounded
%default total
public export
record Bounds where
constructor MkBounds
startLine : Int
startCol : Int
endLine : Int
endCol : Int
Show Bounds where
showPrec d (MkBounds sl sc el ec) =
showCon d "MkBounds" (concat [showArg sl, showArg sc, showArg el, showArg ec])
export
startBounds : Bounds -> (Int, Int)
startBounds b = (b.startLine, b.startCol)
export
endBounds : Bounds -> (Int, Int)
endBounds b = (b.endLine, b.endCol)
export
Eq Bounds where
(MkBounds sl sc el ec) == (MkBounds sl' sc' el' ec') =
sl == sl'
&& sc == sc'
&& el == el'
&& ec == ec'
public export
record WithBounds ty where
constructor MkBounded
val : ty
isIrrelevant : Bool
bounds : Bounds
export
start : WithBounds ty -> (Int, Int)
start = startBounds . bounds
export
end : WithBounds ty -> (Int, Int)
end = endBounds . bounds
export
Eq ty => Eq (WithBounds ty) where
(MkBounded val ir bd) == (MkBounded val' ir' bd') =
val == val' && ir == ir' && bd == bd'
export
Show ty => Show (WithBounds ty) where
showPrec d (MkBounded val ir bd) =
showCon d "MkBounded" (concat [showArg ir, showArg val, showArg bd])
export
Functor WithBounds where
map f (MkBounded val ir bd) = MkBounded (f val) ir bd
export
Foldable WithBounds where
foldr c n b = c b.val n
export
Traversable WithBounds where
traverse f (MkBounded v a bd) = (\ v => MkBounded v a bd) <$> f v
export
irrelevantBounds : ty -> WithBounds ty
irrelevantBounds x = MkBounded x True (MkBounds (-1) (-1) (-1) (-1))
export
removeIrrelevance : WithBounds ty -> WithBounds ty
removeIrrelevance (MkBounded val ir bd) = MkBounded val True bd
export
mergeBounds : WithBounds ty -> WithBounds ty' -> WithBounds ty'
mergeBounds (MkBounded _ True _) (MkBounded val True _) = irrelevantBounds val
mergeBounds (MkBounded _ True _) b2 = b2
mergeBounds b1 (MkBounded val True _) = const val <$> b1
mergeBounds b1 b2 =
let (ur, uc) = min (start b1) (start b2)
(lr, lc) = max (end b1) (end b2) in
MkBounded b2.val False (MkBounds ur uc lr lc)
export
joinBounds : WithBounds (WithBounds ty) -> WithBounds ty
joinBounds b = mergeBounds b b.val

View File

@ -0,0 +1,69 @@
module Text.Distance.Levenshtein
import Data.List
import Data.Maybe
import Data.String
import Data.IOMatrix
import Data.List.Extra
%default total
||| Self-evidently correct but O(3 ^ (min mn)) complexity
spec : String -> String -> Nat
spec a b = loop (fastUnpack a) (fastUnpack b) where
loop : List Char -> List Char -> Nat
loop [] ys = length ys -- deletions
loop xs [] = length xs -- insertions
loop (x :: xs) (y :: ys)
= if x == y then loop xs ys -- match
else 1 + minimum [ loop (x :: xs) ys -- insert y
, loop xs (y :: ys) -- delete x
, loop xs ys -- substitute y for x
]
||| Dynamic programming
export
compute : HasIO io => String -> String -> io Nat
compute a b = do
let w = strLength a
let h = strLength b
-- In mat[i][j], we store the distance between
-- * the suffix of a of size i
-- * the suffix of b of size j
-- So we need a matrix of size (|a|+1) * (|b|+1)
mat <- new (w+1) (h+1)
-- Whenever one of the two suffixes of interest is empty, the only
-- winning move is to:
-- * delete all of the first
-- * insert all of the second
-- i.e. the cost is the length of the non-zero suffix
for_ [0..w] $ \ i => write mat i 0 i -- deletions
for_ [0..h] $ \ j => write mat 0 j j -- insertions
-- We introduce a specialised `read` for ease of use
let get = \i, j => case !(read {io} mat i j) of
Nothing => assert_total $
idris_crash "INTERNAL ERROR: Badly initialised matrix"
Just n => pure n
-- We fill the matrix from the bottom up, using the same formula we used
-- in the specification's `loop`.
for_ [1..h] $ \ j => do
for_ [1..w] $ \ i => do
-- here we change Levenshtein slightly so that we may only substitute
-- alpha / numerical characters for similar ones. This avoids suggesting
-- "#" as a replacement for an out of scope "n".
let cost = let c = assert_total $ strIndex a (i-1)
d = assert_total $ strIndex b (j-1)
in if c == d then 0 else
if isAlpha c && isAlpha d then 1 else
if isDigit c && isDigit d then 1 else 2
write mat i j $
minimum [ 1 + !(get i (j-1)) -- insert y
, 1 + !(get (i-1) j) -- delete x
, cost + !(get (i-1) (j-1)) -- equal or substitute y for x
]
-- Once the matrix is fully filled, we can simply read the top right corner
integerToNat . cast <$> get w h

View File

@ -8,6 +8,8 @@ import public Text.Lexer.Core
import public Text.Quantity
import public Text.Token
%default total
export
toTokenMap : List (Lexer, k) -> TokenMap (Token k)
toTokenMap = map $ \(l, kind) => (l, Tok kind)
@ -37,22 +39,22 @@ non l = reject l <+> any
export
choiceMap : {c : Bool} ->
Foldable t => (a -> Recognise c) -> t a -> Recognise c
choiceMap f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
f x <|> acc)
fail xs
choiceMap {c} f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
f x <|> acc)
fail xs
||| Recognise the first matching recogniser in a container. Consumes input if
||| recognisers in the list consume. Fails if the container is empty.
export
choice : {c : _} ->
Foldable t => t (Recognise c) -> Recognise c
choice = Lexer.choiceMap id
choice = choiceMap id
||| Sequence a list of recognisers. Guaranteed to consume input if the list is
||| non-empty and the recognisers consume.
export
concat : {c : _} ->
(xs : List (Recognise c)) -> Recognise (c && isCons xs)
(xs : List (Recognise c)) -> Recognise (isCons xs && c)
concat = Lexer.Core.concatMap id
||| Recognise a specific character.
@ -124,6 +126,13 @@ mutual
many : Lexer -> Recognise False
many l = opt (some l)
||| Repeat the sub-lexer `l` one or more times until the lexer
||| `stopBefore` is encountered. `stopBefore` will not be consumed.
||| /((?!`stopBefore`)`l`)\+/
export
someUntil : (stopBefore : Recognise c) -> (l : Lexer) -> Lexer
someUntil stopBefore l = some (reject stopBefore <+> l)
||| Repeat the sub-lexer `l` zero or more times until the lexer
||| `stopBefore` is encountered. `stopBefore` will not be consumed.
||| Not guaranteed to consume input.
@ -140,14 +149,6 @@ export
manyThen : (stopAfter : Recognise c) -> (l : Lexer) -> Recognise c
manyThen stopAfter l = manyUntil stopAfter l <+> stopAfter
||| Recognise many instances of `l` until an instance of `end` is
||| encountered.
|||
||| Useful for defining comments.
export
manyTill : (l : Lexer) -> (end : Lexer) -> Recognise False
manyTill l end = end <|> opt (l <+> manyTill l end)
||| Recognise a sub-lexer repeated as specified by `q`. Fails if `q` has
||| `min` and `max` in the wrong order. Consumes input unless `min q` is zero.
||| /`l`{`q`}/
@ -172,6 +173,18 @@ export
digits : Lexer
digits = some digit
||| Recognise a single binary digit
||| /[0-1]/
export
binDigit : Lexer
binDigit = pred (\c => c == '0' || c == '1')
||| Recognise one or more binary digits
||| /[0-1]+/
export
binDigits : Lexer
binDigits = some binDigit
||| Recognise a single hexidecimal digit
||| /[0-9A-Fa-f]/
export
@ -188,13 +201,13 @@ hexDigits = some hexDigit
||| /[0-8]/
export
octDigit : Lexer
octDigit = pred isHexDigit
octDigit = pred isOctDigit
||| Recognise one or more octal digits
||| /[0-8]+/
export
octDigits : Lexer
octDigits = some hexDigit
octDigits = some octDigit
||| Recognise a single alpha character
||| /[A-Za-z]/
@ -307,18 +320,19 @@ export
quote : (q : Lexer) -> (l : Lexer) -> Lexer
quote q l = surround q q l
||| Recognise an escape character (often '\\') followed by a sub-lexer
||| Recognise an escape sub-lexer (often '\\') followed by
||| another sub-lexer
||| /[`esc`]`l`/
export
escape : (esc : Char) -> Lexer -> Lexer
escape esc l = is esc <+> l
escape : (esc : Lexer) -> Lexer -> Lexer
escape esc l = esc <+> l
||| Recognise a string literal, including escaped characters.
||| (Note: doesn't yet handle escape sequences such as \123)
||| /"(\\\\.|.)\*?"/
export
stringLit : Lexer
stringLit = quote (is '"') (escape '\\' any <|> any)
stringLit = quote (is '"') (escape (is '\\') any <|> any)
||| Recognise a character literal, including escaped characters.
||| (Note: doesn't yet handle escape sequences such as \123)
@ -326,7 +340,7 @@ stringLit = quote (is '"') (escape '\\' any <|> any)
export
charLit : Lexer
charLit = let q = '\'' in
is q <+> (escape '\\' (control <|> any) <|> isNot q) <+> is q
is q <+> (escape (is '\\') (control <|> any) <|> isNot q) <+> is q
where
lexStr : List String -> Lexer
lexStr [] = fail
@ -348,12 +362,40 @@ export
intLit : Lexer
intLit = opt (is '-') <+> digits
||| Recognise a binary literal, prefixed by "0b"
||| /0b[0-1]+/
export
binLit : Lexer
binLit = exact "0b" <+> binDigits
||| Recognise a hexidecimal literal, prefixed by "0x" or "0X"
||| /0[Xx][0-9A-Fa-f]+/
export
hexLit : Lexer
hexLit = approx "0x" <+> hexDigits
||| Recognise an octal literal, prefixed by "0o"
||| /0o[0-9A-Fa-f]+/
export
octLit : Lexer
octLit = exact "0o" <+> octDigits
export
digitsUnderscoredLit : Lexer
digitsUnderscoredLit = digits <+> many (is '_' <+> digits)
export
binUnderscoredLit : Lexer
binUnderscoredLit = binLit <+> many (is '_' <+> binDigits)
export
hexUnderscoredLit : Lexer
hexUnderscoredLit = hexLit <+> many (is '_' <+> hexDigits)
export
octUnderscoredLit : Lexer
octUnderscoredLit = octLit <+> many (is '_' <+> octDigits)
||| Recognise `start`, then recognise all input until a newline is encountered,
||| and consume the newline. Will succeed if end-of-input is encountered before
||| a newline.

View File

@ -1,15 +1,19 @@
module Text.Lexer.Core
import public Control.Delayed
import Data.Bool
import Data.List
import Data.Maybe
import Data.Nat
import Data.String
import public Control.Delayed
import public Text.Bounded
%default total
||| A language of token recognisers.
||| @ consumes If `True`, this recogniser is guaranteed to consume at
||| least one character of input when it succeeds.
public export
export
data Recognise : (consumes : Bool) -> Type where
Empty : Recognise False
Fail : Recognise c
@ -17,6 +21,7 @@ data Recognise : (consumes : Bool) -> Type where
Pred : (Char -> Bool) -> Recognise True
SeqEat : Recognise True -> Inf (Recognise e) -> Recognise True
SeqEmpty : Recognise e1 -> Recognise e2 -> Recognise (e1 || e2)
SeqSame : Recognise e -> Recognise e -> Recognise e
Alt : Recognise e1 -> Recognise e2 -> Recognise (e1 && e2)
||| A token recogniser. Guaranteed to consume at least one character.
@ -24,8 +29,6 @@ public export
Lexer : Type
Lexer = Recognise True
-- %allow_overloads (<+>)
||| Sequence two recognisers. If either consumes a character, the sequence
||| is guaranteed to consume a character.
export %inline
@ -34,11 +37,8 @@ export %inline
(<+>) {c1 = False} = SeqEmpty
(<+>) {c1 = True} = SeqEat
%allow_overloads (<|>)
||| Alternative recognisers. If both consume, the combination is guaranteed
||| to consumer a character.
%inline
||| to consume a character.
export
(<|>) : Recognise c1 -> Recognise c2 -> Recognise (c1 && c2)
(<|>) = Alt
@ -49,13 +49,11 @@ fail : Recognise c
fail = Fail
||| Recognise no input (doesn't consume any input)
%inline
export
empty : Recognise False
empty = Empty
||| Recognise a character that matches a predicate
%inline
export
pred : (Char -> Bool) -> Lexer
pred = Pred
@ -70,33 +68,25 @@ export
reject : Recognise c -> Recognise False
reject = Lookahead False
%allow_overloads concatMap
||| Sequence the recognisers resulting from applying a function to each element
||| of a list. The resulting recogniser will consume input if the produced
||| recognisers consume and the list is non-empty.
export
concatMap : (a -> Recognise c) -> (xs : List a) ->
Recognise (c && (isCons xs))
concatMap _ [] = rewrite andFalseFalse c in Empty
concatMap f (x :: xs)
= rewrite andTrueNeutral c in
rewrite sym (orSameAndRightNeutral c (isCons xs)) in
SeqEmpty (f x) (Core.concatMap f xs)
concatMap : (a -> Recognise c) -> (xs : List a) -> Recognise (isCons xs && c)
concatMap _ [] = Empty
concatMap f (x :: []) = f x
concatMap f (x :: xs@(_ :: _)) = SeqSame (f x) (concatMap f xs)
data StrLen : Type where
MkStrLen : String -> Nat -> StrLen
Show StrLen where
show (MkStrLen str n) = str ++ "(" ++ show n ++ ")"
getString : StrLen -> String
getString (MkStrLen str n) = str
strIndex : StrLen -> Nat -> Maybe Char
strIndex (MkStrLen str len) i
= if cast {to = Integer} i >= cast len then Nothing
else Just (assert_total (prim__strIndex str (cast i)))
else Just (assert_total (prim__strIndex str (cast i)))
mkStr : String -> StrLen
mkStr str = MkStrLen str (length str)
@ -105,19 +95,9 @@ strTail : Nat -> StrLen -> StrLen
strTail start (MkStrLen str len)
= MkStrLen (substr start len str) (minus len start)
isJust : Maybe a -> Bool
isJust Nothing = False
isJust (Just x) = True
export
unpack' : String -> List Char
unpack' str
= case strUncons str of
Nothing => []
Just (x, xs) => x :: unpack' xs
-- If the string is recognised, returns the index at which the token
-- ends
export
scan : Recognise c -> List Char -> List Char -> Maybe (List Char, List Char)
scan Empty tok str = pure (tok, str)
scan Fail tok str = Nothing
@ -137,6 +117,9 @@ scan (SeqEat r1 r2) tok str
scan (SeqEmpty r1 r2) tok str
= do (tok', rest) <- scan r1 tok str
scan r2 tok' rest
scan (SeqSame r1 r2) tok str
= do (tok', rest) <- scan r1 tok str
scan r2 tok' rest
scan (Alt r1 r2) tok str
= maybe (scan r2 tok str) Just (scan r1 tok str)
@ -148,27 +131,15 @@ public export
TokenMap : (tokenType : Type) -> Type
TokenMap tokenType = List (Lexer, String -> tokenType)
||| A token, and the line and column where it was in the input
public export
record TokenData a where
constructor MkToken
line : Int
col : Int
tok : a
export
Show a => Show (TokenData a) where
show t = show (line t) ++ ":" ++ show (col t) ++ ":" ++ show (tok t)
tokenise : (TokenData a -> Bool) ->
tokenise : (a -> Bool) ->
(line : Int) -> (col : Int) ->
List (TokenData a) -> TokenMap a ->
List Char -> (List (TokenData a), (Int, Int, List Char))
List (WithBounds a) -> TokenMap a ->
List Char -> (List (WithBounds a), (Int, Int, List Char))
tokenise pred line col acc tmap str
= case getFirstToken tmap str of
Just (tok, line', col', rest) =>
-- assert total because getFirstToken must consume something
if pred tok
if pred tok.val
then (reverse acc, (line, col, []))
else assert_total (tokenise pred line' col' (tok :: acc) tmap rest)
Nothing => (reverse acc, (line, col, str))
@ -183,13 +154,15 @@ tokenise pred line col acc tmap str
(incol, _) => cast (length incol)
getFirstToken : TokenMap a -> List Char ->
Maybe (TokenData a, Int, Int, List Char)
Maybe (WithBounds a, Int, Int, List Char)
getFirstToken [] str = Nothing
getFirstToken ((lex, fn) :: ts) str
= case scan lex [] str of
Just (tok, rest) => Just (MkToken line col (fn (pack (reverse tok))),
line + cast (countNLs tok),
getCols tok col, rest)
Just (tok, rest) =>
let line' = line + cast (countNLs tok)
col' = getCols tok col in
Just (MkBounded (fn (fastPack (reverse tok))) False (MkBounds line col line' col'),
line', col', rest)
Nothing => getFirstToken ts str
||| Given a mapping from lexers to token generating functions (the
@ -197,14 +170,14 @@ tokenise pred line col acc tmap str
||| and the line, column, and remainder of the input at the first point in the
||| string where there are no recognised tokens.
export
lex : TokenMap a -> String -> (List (TokenData a), (Int, Int, String))
lex : TokenMap a -> String -> (List (WithBounds a), (Int, Int, String))
lex tmap str
= let (ts, (l, c, str')) = tokenise (const False) 0 0 [] tmap (fastUnpack str) in
(ts, (l, c, pack str'))
= let (ts, (l, c, str')) = tokenise (const False) 0 0 [] tmap (unpack str) in
(ts, (l, c, fastPack str'))
export
lexTo : (TokenData a -> Bool) ->
TokenMap a -> String -> (List (TokenData a), (Int, Int, String))
lexTo : (a -> Bool) ->
TokenMap a -> String -> (List (WithBounds a), (Int, Int, String))
lexTo pred tmap str
= let (ts, (l, c, str')) = tokenise pred 0 0 [] tmap (fastUnpack str) in
(ts, (l, c, pack str'))
= let (ts, (l, c, str')) = tokenise pred 0 0 [] tmap (unpack str) in
(ts, (l, c, fastPack str'))

View File

@ -0,0 +1,153 @@
module Text.Lexer.Tokenizer
import Data.List
import Data.Either
import Data.Nat
import Data.String
import Data.String.Extra
import Text.Lexer.Core
import Text.Lexer
import Text.PrettyPrint.Prettyprinter
import Text.PrettyPrint.Prettyprinter.Util
import public Control.Delayed
import public Text.Bounded
%default total
||| Description of a language's tokenization rule.
export
data Tokenizer : (tokenType : Type) -> Type where
Match : Lexer -> (String -> tokenType) -> Tokenizer tokenType
Compose : (begin : Lexer) ->
(mapBegin : String -> tokenType) ->
(tagger : String -> tag) ->
(middle : Inf (tag -> Tokenizer tokenType)) ->
(end : tag -> Lexer) ->
(mapEnd : String -> tokenType) ->
Tokenizer tokenType
Alt : Tokenizer tokenType -> Lazy (Tokenizer tokenType) -> Tokenizer tokenType
||| Alternative tokenizer rules.
export
(<|>) : Tokenizer t -> Lazy (Tokenizer t) -> Tokenizer t
(<|>) = Alt
||| Match on a recogniser and cast the string to a token.
export
match : Lexer -> (String -> a) -> Tokenizer a
match = Match
||| Compose other tokenizer. Language composition should be quoted between
||| a begin lexer and a end lexer. The begin token can be used to generate
||| the composition tokenizer and the end lexer.
export
compose : (begin : Lexer) ->
(mapBegin : String -> a) ->
(tagger : String -> tag) ->
(middle : Inf (tag -> Tokenizer a)) ->
(end : tag -> Lexer) ->
(mapEnd : String -> a) ->
Tokenizer a
compose = Compose
||| Stop reason why tokenizer can't make more progress.
||| @ ComposeNotClosing carries the span of composition begin token in the
||| form of `(startLine, startCol), (endLine, endCol)`.
public export
data StopReason = EndInput | NoRuleApply | ComposeNotClosing (Int, Int) (Int, Int)
export
Show StopReason where
show EndInput = "EndInput"
show NoRuleApply = "NoRuleApply"
show (ComposeNotClosing start end) = "ComposeNotClosing " ++ show start ++ " " ++ show end
export
Pretty StopReason where
pretty EndInput = pretty "EndInput"
pretty NoRuleApply = pretty "NoRuleApply"
pretty (ComposeNotClosing start end) = "ComposeNotClosing" <++> pretty start <++> pretty end
tokenise : Lexer ->
Tokenizer a ->
(line, col : Int) -> List (WithBounds a) ->
List Char ->
(List (WithBounds a), (StopReason, Int, Int, List Char))
tokenise reject tokenizer line col acc [] = (reverse acc, EndInput, (line, col, []))
tokenise reject tokenizer line col acc str
= case scan reject [] str of
Just _ => (reverse acc, (EndInput, line, col, str))
Nothing => case getFirstMatch tokenizer str of
Right (toks, line', col', rest) =>
-- assert total because getFirstMatch must consume something
assert_total (tokenise reject tokenizer line' col' (toks ++ acc) rest)
Left reason => (reverse acc, reason, (line, col, str))
where
countNLs : List Char -> Nat
countNLs str = List.length (filter (== '\n') str)
getCols : List Char -> Int -> Int
getCols x c
= case span (/= '\n') x of
(incol, []) => c + cast (length incol)
(incol, _) => cast (length incol)
-- get the next lexeme using the `Lexer` in argument, its position and the input
-- Returns the new position, the lexeme parsed and the rest of the input
-- If parsing the lexer fails, this returns `Nothing`
getNext : (lexer : Lexer) -> (line, col : Int) -> (input : List Char) -> Maybe (String, Int, Int, List Char)
getNext lexer line col str =
let Just (token, rest) = scan lexer [] str
| _ => Nothing
line' = line + cast (countNLs token)
col' = getCols token col
tokenStr = fastPack $ reverse token
in pure (tokenStr, line', col', rest)
getFirstMatch : Tokenizer a -> List Char ->
Either StopReason (List (WithBounds a), Int, Int, List Char)
getFirstMatch (Match lex fn) str
= let Just (tok, line', col', rest) = getNext lex line col str
| _ => Left NoRuleApply
tok' = MkBounded (fn tok) False (MkBounds line col line' col')
in Right ([tok'], line', col', rest)
getFirstMatch (Compose begin mapBegin tagger middleFn endFn mapEnd) str
= let Just (beginTok', line', col' , rest) = getNext begin line col str
| Nothing => Left NoRuleApply
tag = tagger beginTok'
middle = middleFn tag
end = endFn tag
beginTok'' = MkBounded (mapBegin beginTok') False (MkBounds line col line' col')
(midToks, (reason, line'', col'', rest'')) =
tokenise end middle line' col' [] rest
in case reason of
(ComposeNotClosing _ _) => Left reason
_ => let Just (endTok', lineEnd, colEnd, restEnd) =
getNext end line'' col'' rest''
| _ => Left $ ComposeNotClosing (line, col) (line', col')
endTok'' = MkBounded (mapEnd endTok') False (MkBounds line'' col'' lineEnd colEnd)
in Right ([endTok''] ++ reverse midToks ++ [beginTok''], lineEnd, colEnd, restEnd)
getFirstMatch (Alt t1 t2) str
= case getFirstMatch t1 str of
Right result => Right result
Left reason@(ComposeNotClosing _ _) => Left reason
Left _ => getFirstMatch t2 str
export
lexTo : Lexer ->
Tokenizer a ->
String ->
(List (WithBounds a), (StopReason, Int, Int, String))
lexTo reject tokenizer str
= let (ts, reason, (l, c, str')) =
tokenise reject tokenizer 0 0 [] (fastUnpack str) in
(ts, reason, (l, c, fastPack str'))
||| Given a tokenizer and an input string, return a list of recognised tokens,
||| and the line, column, and remainder of the input at the first point in the string
||| where there are no recognised tokens.
export
lex : Tokenizer a -> String -> (List (WithBounds a), (StopReason, Int, Int, String))
lex tokenizer str = lexTo (pred $ const False) tokenizer str

View File

@ -0,0 +1,234 @@
||| A simple module to process 'literate' documents.
|||
||| The module uses a lexer to split the document into code blocks,
||| delineated by user-defined markers, and code lines that are
||| indicated be a line marker. The lexer returns a document stripped
||| of non-code elements but preserving the original document's line
||| count. Column numbering of code lines are not preserved.
|||
||| The underlying tokeniser is greedy.
|||
||| Once it identifies a line marker it reads a prettifying space then
||| consumes until the end of line. Once identifies a starting code
||| block marker, the lexer will consume input until the next
||| identifiable end block is encountered. Any other content is
||| treated as part of the original document.
|||
||| Thus, the input literate files *must* be well-formed w.r.t
||| to code line markers and code blocks.
|||
||| A further restriction is that literate documents cannot contain
||| the markers within the document's main text: This will confuse the
||| lexer.
|||
module Text.Literate
import Text.Lexer
import Data.List
import Data.List1
import Data.List.Views
import Data.String
import Data.String.Extra
%default total
untilEOL : Recognise False
untilEOL = manyUntil newline any
line : String -> Lexer
line s = exact s <+> (newline <|> space <+> untilEOL)
block : String -> String -> Lexer
block s e = surround (exact s <+> untilEOL) (exact e <+> untilEOL) any
notCodeLine : Lexer
notCodeLine = newline
<|> any <+> untilEOL
data Token = CodeBlock String String String
| Any String
| CodeLine String String
Show Token where
showPrec d (CodeBlock l r x) = showCon d "CodeBlock" $ showArg l ++ showArg r ++ showArg x
showPrec d (Any x) = showCon d "Any" $ showArg x
showPrec d (CodeLine m x) = showCon d "CodeLine" $ showArg m ++ showArg x
rawTokens : (delims : List (String, String))
-> (markers : List String)
-> TokenMap (Token)
rawTokens delims ls =
map (\(l,r) => (block l r, CodeBlock (trim l) (trim r))) delims
++ map (\m => (line m, CodeLine (trim m))) ls
++ [(notCodeLine, Any)]
namespace Compat
-- `reduce` below was depending on the old behaviour of `lines` before #1585
-- was merged. That old `lines` function is added here to preserve behaviour
-- of `reduce`.
lines' : List Char -> List1 (List Char)
lines' [] = singleton []
lines' s = case break isNL s of
(l, s') => l ::: case s' of
[] => []
_ :: s'' => forget $ lines' (assert_smaller s s'')
export
lines : String -> List1 String
lines s = map pack (lines' (unpack s))
||| Merge the tokens into a single source file.
reduce : List (WithBounds Token) -> List String -> String
reduce [] acc = fastAppend (reverse acc)
reduce (MkBounded (Any x) _ _ :: rest) acc =
-- newline will always be tokenized as a single token
if x == "\n"
then reduce rest ("\n"::acc)
else reduce rest acc
reduce (MkBounded (CodeLine m src) _ _ :: rest) acc =
if m == trim src
then reduce rest ("\n"::acc)
else reduce rest ((substr (length m + 1) -- remove space to right of marker.
(length src)
src
)::acc)
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc with (Compat.lines src) -- Strip the deliminators surrounding the block.
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc | (s ::: ys) with (snocList ys)
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc | (s ::: []) | Empty = reduce rest acc -- 2
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc | (s ::: (srcs ++ [f])) | (Snoc f srcs rec) =
-- the "\n" counts for the open deliminator; the closing deliminator should always be followed by a (Any "\n"), so we don't add a newline
reduce rest (((unlines srcs) ++ "\n") :: "\n" :: acc)
-- [ NOTE ] 1 & 2 shouldn't happen as code blocks are well formed i.e. have two deliminators.
public export
record LiterateError where
constructor MkLitErr
line : Int
column : Int
input : String
||| Description of literate styles.
|||
||| A 'literate' style comprises of
|||
||| + a list of code block deliminators (`deliminators`);
||| + a list of code line markers (`line_markers`); and
||| + a list of known file extensions `file_extensions`.
|||
||| Some example specifications:
|||
||| + Bird Style
|||
|||```
|||MkLitStyle Nil [">"] [".lidr"]
|||```
|||
||| + Literate Haskell (for LaTeX)
|||
|||```
|||MkLitStyle [("\\begin{code}", "\\end{code}"),("\\begin{spec}","\\end{spec}")]
||| Nil
||| [".lhs", ".tex"]
|||```
|||
||| + OrgMode
|||
|||```
|||MkLitStyle [("#+BEGIN_SRC idris","#+END_SRC"), ("#+COMMENT idris","#+END_COMMENT")]
||| ["#+IDRIS:"]
||| [".org"]
|||```
|||
||| + Common Mark
|||
|||```
|||MkLitStyle [("```idris","```"), ("<!-- idris","--!>")]
||| Nil
||| [".md", ".markdown"]
|||```
|||
public export
record LiterateStyle where
constructor MkLitStyle
||| The pairs of start and end tags for code blocks.
deliminators : List (String, String)
||| Line markers that indicate a line contains code.
line_markers : List String
||| Recognised file extensions. Not used by the module, but will be
||| of use when connecting to code that reads in the original source
||| files.
file_extensions : List String
||| Given a 'literate specification' extract the code from the
||| literate source file (`litStr`) that follows the presented style.
|||
||| @specification The literate specification to use.
||| @litStr The literate source file.
|||
||| Returns a `LiterateError` if the literate file contains malformed
||| code blocks or code lines.
|||
export
extractCode : (specification : LiterateStyle)
-> (litStr : String)
-> Either LiterateError String
extractCode (MkLitStyle delims markers exts) str =
case lex (rawTokens delims markers) str of
(toks, (_,_,"")) => Right (reduce toks Nil)
(_, (l,c,i)) => Left (MkLitErr l c i)
||| Synonym for `extractCode`.
export
unlit : (specification : LiterateStyle)
-> (litStr : String)
-> Either LiterateError String
unlit = extractCode
||| Is the provided line marked up using a line marker?
|||
||| If the line is suffixed by any one of the style's set of line
||| markers then return length of literate line marker, and the code stripped from the line
||| marker. Otherwise, return Nothing and the unmarked line.
export
isLiterateLine : (specification : LiterateStyle)
-> (str : String)
-> Pair (Maybe String) String
isLiterateLine (MkLitStyle delims markers _) str with (lex (rawTokens delims markers) str)
isLiterateLine (MkLitStyle delims markers _) str | ([MkBounded (CodeLine m str') _ _], (_,_, "")) = (Just m, str')
isLiterateLine (MkLitStyle delims markers _) str | (_, _) = (Nothing, str)
||| Given a 'literate specification' embed the given code using the
||| literate style provided.
|||
||| If the style uses deliminators to denote code blocks use the first
||| pair of deliminators in the style. Otherwise use first linemarker
||| in the style. If there is **no style** return the presented code
||| string unembedded.
|||
|||
||| @specification The literate specification to use.
||| @code The code to embed,
|||
|||
export
embedCode : (specification : LiterateStyle)
-> (code : String)
-> String
embedCode (MkLitStyle ((s,e)::delims) _ _) str = unlines [s,str,e]
embedCode (MkLitStyle Nil (m::markers) _) str = unwords [m, str]
embedCode (MkLitStyle _ _ _) str = str
||| Synonm for `embedCode`
export
relit : (specification : LiterateStyle)
-> (code : String)
-> String
relit = embedCode
-- --------------------------------------------------------------------- [ EOF ]

View File

@ -2,49 +2,50 @@ module Text.Parser
import Data.Bool
import Data.List
import Data.List1
import Data.Nat
import Data.Vect
import public Data.List1
import public Text.Parser.Core
import public Text.Quantity
import public Text.Token
%default total
||| Parse a terminal based on a kind of token.
export
match : (Eq k, TokenKind k) =>
(kind : k) ->
Grammar (Token k) True (TokType kind)
match kind = terminal "Unrecognised input" $
\(Tok kind' text) => if kind' == kind
then Just $ tokValue kind text
else Nothing
Grammar state (Token k) True (TokType kind)
match k = terminal "Unrecognised input" $
\t => if t.kind == k
then Just $ tokValue k t.text
else Nothing
||| Optionally parse a thing, with a default value if the grammar doesn't
||| match. May match the empty input.
export
option : {c : Bool} ->
(def : a) -> (p : Grammar tok c a) ->
Grammar tok False a
(def : a) -> (p : Grammar state tok c a) ->
Grammar state tok False a
option {c = False} def p = p <|> pure def
option {c = True} def p = p <|> pure def
||| Optionally parse a thing.
||| To provide a default value, use `option` instead.
export
optional : {c : _} ->
(p : Grammar tok c a) ->
Grammar tok False (Maybe a)
optional : {c : Bool} ->
(p : Grammar state tok c a) ->
Grammar state tok False (Maybe a)
optional p = option Nothing (map Just p)
||| Try to parse one thing or the other, producing a value that indicates
||| which option succeeded. If both would succeed, the left option
||| takes priority.
export
choose : {c1, c2 : _} ->
(l : Grammar tok c1 a) ->
(r : Grammar tok c2 b) ->
Grammar tok (c1 && c2) (Either a b)
choose : {c1, c2 : Bool} ->
(l : Grammar state tok c1 a) ->
(r : Grammar state tok c2 b) ->
Grammar state tok (c1 && c2) (Either a b)
choose l r = map Left l <|> map Right r
||| Produce a grammar by applying a function to each element of a container,
@ -52,48 +53,42 @@ choose l r = map Left l <|> map Right r
||| container is empty.
export
choiceMap : {c : Bool} ->
(a -> Grammar tok c b) ->
(a -> Grammar state tok c b) ->
Foldable t => t a ->
Grammar tok c b
choiceMap f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
f x <|> acc)
(fail "No more options") xs
Grammar state tok c b
choiceMap {c} f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
f x <|> acc)
(fail "No more options") xs
%hide Prelude.(>>=)
||| Try each grammar in a container until the first one succeeds.
||| Fails if the container is empty.
export
choice : {c : _} ->
Foldable t => t (Grammar tok c a) ->
Grammar tok c a
choice = Parser.choiceMap id
choice : Foldable t =>
{c : Bool} ->
t (Grammar state tok c a) ->
Grammar state tok c a
choice = choiceMap id
mutual
||| Parse one or more things
export
some : Grammar tok True a ->
Grammar tok True (List1 a)
some : Grammar state tok True a ->
Grammar state tok True (List1 a)
some p = pure (!p ::: !(many p))
||| Parse zero or more things (may match the empty input)
export
many : Grammar tok True a ->
Grammar tok False (List a)
many : Grammar state tok True a ->
Grammar state tok False (List a)
many p = option [] (forget <$> some p)
||| Parse one or more instances of `p`, returning the parsed items and proof
||| that the resulting list is non-empty.
export
some' : (p : Grammar tok True a) ->
Grammar tok True (xs : List a ** NonEmpty xs)
some' p = pure (!p :: !(many p) ** IsNonEmpty)
mutual
private
count1 : (q : Quantity) ->
(p : Grammar tok True a) ->
Grammar tok True (List a)
(p : Grammar state tok True a) ->
Grammar state tok True (List a)
count1 q p = do x <- p
seq (count q p)
(\xs => pure (x :: xs))
@ -101,31 +96,23 @@ mutual
||| Parse `p`, repeated as specified by `q`, returning the list of values.
export
count : (q : Quantity) ->
(p : Grammar tok True a) ->
Grammar tok (isSucc (min q)) (List a)
(p : Grammar state tok True a) ->
Grammar state tok (isSucc (min q)) (List a)
count (Qty Z Nothing) p = many p
count (Qty Z (Just Z)) _ = pure []
count (Qty Z (Just (S max))) p = option [] $ count1 (atMost max) p
count (Qty (S min) Nothing) p = count1 (atLeast min) p
count (Qty (S min) (Just Z)) _ = fail "Quantity out of order"
count (Qty (S min) (Just (S max))) p = count1 (between min max) p
||| Parse `p` `n` times, returning the vector of values.
export
countExactly : (n : Nat) ->
(p : Grammar tok True a) ->
Grammar tok (isSucc n) (Vect n a)
countExactly Z p = Empty []
countExactly (S k) p = [| p :: countExactly k p |]
count (Qty (S min) (Just (S max))) p = count1 (between (S min) max) p
mutual
||| Parse one or more instances of `p` until `end` succeeds, returning the
||| list of values from `p`. Guaranteed to consume input.
export
someTill : {c : Bool} ->
(end : Grammar tok c e) ->
(p : Grammar tok True a) ->
Grammar tok True (List1 a)
(end : Grammar state tok c e) ->
(p : Grammar state tok True a) ->
Grammar state tok True (List1 a)
someTill {c} end p = do x <- p
seq (manyTill end p)
(\xs => pure (x ::: xs))
@ -134,9 +121,9 @@ mutual
||| list of values from `p`. Guaranteed to consume input if `end` consumes.
export
manyTill : {c : Bool} ->
(end : Grammar tok c e) ->
(p : Grammar tok True a) ->
Grammar tok c (List a)
(end : Grammar state tok c e) ->
(p : Grammar state tok True a) ->
Grammar state tok c (List a)
manyTill {c} end p = rewrite sym (andTrueNeutral c) in
map (const []) end <|> (forget <$> someTill end p)
@ -145,28 +132,28 @@ mutual
||| returning its value.
export
afterSome : {c : Bool} ->
(skip : Grammar tok True s) ->
(p : Grammar tok c a) ->
Grammar tok True a
afterSome skip p = do ignore skip
(skip : Grammar state tok True s) ->
(p : Grammar state tok c a) ->
Grammar state tok True a
afterSome skip p = do ignore $ skip
afterMany skip p
||| Parse zero or more instance of `skip` until `p` is encountered,
||| returning its value.
export
afterMany : {c : Bool} ->
(skip : Grammar tok True s) ->
(p : Grammar tok c a) ->
Grammar tok c a
(skip : Grammar state tok True s) ->
(p : Grammar state tok c a) ->
Grammar state tok c a
afterMany {c} skip p = rewrite sym (andTrueNeutral c) in
p <|> afterSome skip p
||| Parse one or more things, each separated by another thing.
export
sepBy1 : {c : Bool} ->
(sep : Grammar tok True s) ->
(p : Grammar tok c a) ->
Grammar tok c (List1 a)
(sep : Grammar state tok True s) ->
(p : Grammar state tok c a) ->
Grammar state tok c (List1 a)
sepBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
[| p ::: many (sep *> p) |]
@ -174,18 +161,18 @@ sepBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
||| match the empty input.
export
sepBy : {c : Bool} ->
(sep : Grammar tok True s) ->
(p : Grammar tok c a) ->
Grammar tok False (List a)
(sep : Grammar state tok True s) ->
(p : Grammar state tok c a) ->
Grammar state tok False (List a)
sepBy sep p = option [] $ forget <$> sepBy1 sep p
||| Parse one or more instances of `p` separated by and optionally terminated by
||| `sep`.
export
sepEndBy1 : {c : Bool} ->
(sep : Grammar tok True s) ->
(p : Grammar tok c a) ->
Grammar tok c (List1 a)
(sep : Grammar state tok True s) ->
(p : Grammar state tok c a) ->
Grammar state tok c (List1 a)
sepEndBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
sepBy1 sep p <* optional sep
@ -193,32 +180,49 @@ sepEndBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
||| by `sep`. Will not match a separator by itself.
export
sepEndBy : {c : Bool} ->
(sep : Grammar tok True s) ->
(p : Grammar tok c a) ->
Grammar tok False (List a)
(sep : Grammar state tok True s) ->
(p : Grammar state tok c a) ->
Grammar state tok False (List a)
sepEndBy sep p = option [] $ forget <$> sepEndBy1 sep p
||| Parse one or more instances of `p`, separated and terminated by `sep`.
export
endBy1 : {c : Bool} ->
(sep : Grammar tok True s) ->
(p : Grammar tok c a) ->
Grammar tok True (List1 a)
endBy1 sep p = some $ rewrite sym (orTrueTrue c) in
p <* sep
(sep : Grammar state tok True s) ->
(p : Grammar state tok c a) ->
Grammar state tok True (List1 a)
endBy1 {c} sep p = some $ rewrite sym (orTrueTrue c) in
p <* sep
export
endBy : {c : Bool} ->
(sep : Grammar tok True s) ->
(p : Grammar tok c a) ->
Grammar tok False (List a)
(sep : Grammar state tok True s) ->
(p : Grammar state tok c a) ->
Grammar state tok False (List a)
endBy sep p = option [] $ forget <$> endBy1 sep p
||| Parse an instance of `p` that is between `left` and `right`.
export
between : {c : _} ->
(left : Grammar tok True l) ->
(right : Inf (Grammar tok True r)) ->
(p : Inf (Grammar tok c a)) ->
Grammar tok True a
between : {c : Bool} ->
(left : Grammar state tok True l) ->
(right : Grammar state tok True r) ->
(p : Grammar state tok c a) ->
Grammar state tok True a
between left right contents = left *> contents <* right
export
location : Grammar state token False (Int, Int)
location = startBounds <$> position
export
endLocation : Grammar state token False (Int, Int)
endLocation = endBounds <$> position
export
column : Grammar state token False Int
column = snd <$> location
public export
when : Bool -> Lazy (Grammar state token False ()) -> Grammar state token False ()
when True y = y
when False y = pure ()

View File

@ -1,47 +1,68 @@
module Text.Parser.Core
import public Control.Delayed
import Data.Bool
import Data.List
import Data.List1
import public Control.Delayed
import public Text.Bounded
%default total
-- TODO: Add some primitives for helping with error messages.
-- e.g. perhaps set a string to state what we're currently trying to
-- parse, or to say what the next expected token is in words
||| Description of a language's grammar. The `tok` parameter is the type
||| of tokens, and the `consumes` flag is True if the language is guaranteed
||| to be non-empty - that is, successfully parsing the language is guaranteed
||| to consume some input.
public export
data Grammar : (tok : Type) -> (consumes : Bool) -> Type -> Type where
Empty : (val : ty) -> Grammar tok False ty
Terminal : String -> (tok -> Maybe a) -> Grammar tok True a
NextIs : String -> (tok -> Bool) -> Grammar tok False tok
EOF : Grammar tok False ()
export
data Grammar : (state : Type) -> (tok : Type) -> (consumes : Bool) -> Type -> Type where
Empty : (val : ty) -> Grammar state tok False ty
Terminal : String -> (tok -> Maybe a) -> Grammar state tok True a
NextIs : String -> (tok -> Bool) -> Grammar state tok False tok
EOF : Grammar state tok False ()
Fail : Bool -> String -> Grammar tok c ty
Commit : Grammar tok False ()
MustWork : Grammar tok c a -> Grammar tok c a
Fail : (location : Maybe Bounds) -> (fatal : Bool) -> String -> Grammar state tok c ty
Try : Grammar state tok c ty -> Grammar state tok c ty
SeqEat : {c2 : _} -> Grammar tok True a -> Inf (a -> Grammar tok c2 b) ->
Grammar tok True b
SeqEmpty : {c1, c2 : _} -> Grammar tok c1 a -> (a -> Grammar tok c2 b) ->
Grammar tok (c1 || c2) b
Commit : Grammar state tok False ()
MustWork : Grammar state tok c a -> Grammar state tok c a
ThenEat : {c2 : _} -> Grammar tok True () -> Inf (Grammar tok c2 b) ->
Grammar tok True b
ThenEmpty : {c1, c2 : _} -> Grammar tok c1 () -> Grammar tok c2 b ->
Grammar tok (c1 || c2) b
SeqEat : {c2 : Bool} ->
Grammar state tok True a -> Inf (a -> Grammar state tok c2 b) ->
Grammar state tok True b
SeqEmpty : {c1, c2 : Bool} ->
Grammar state tok c1 a -> (a -> Grammar state tok c2 b) ->
Grammar state tok (c1 || c2) b
Alt : {c1, c2 : _} -> Grammar tok c1 ty -> Grammar tok c2 ty ->
Grammar tok (c1 && c2) ty
ThenEat : {c2 : Bool} ->
Grammar state tok True () -> Inf (Grammar state tok c2 a) ->
Grammar state tok True a
ThenEmpty : {c1, c2 : Bool} ->
Grammar state tok c1 () -> Grammar state tok c2 a ->
Grammar state tok (c1 || c2) a
Alt : {c1, c2 : Bool} ->
Grammar state tok c1 ty -> Lazy (Grammar state tok c2 ty) ->
Grammar state tok (c1 && c2) ty
Bounds : Grammar state tok c ty -> Grammar state tok c (WithBounds ty)
Position : Grammar state tok False Bounds
Act : state -> Grammar state tok False ()
||| Sequence two grammars. If either consumes some input, the sequence is
||| guaranteed to consume some input. If the first one consumes input, the
||| second is allowed to be recursive (because it means some input has been
||| consumed and therefore the input is smaller)
public export %inline %tcinline
export %inline
(>>=) : {c1, c2 : Bool} ->
Grammar tok c1 a ->
inf c1 (a -> Grammar tok c2 b) ->
Grammar tok (c1 || c2) b
Grammar state tok c1 a ->
inf c1 (a -> Grammar state tok c2 b) ->
Grammar state tok (c1 || c2) b
(>>=) {c1 = False} = SeqEmpty
(>>=) {c1 = True} = SeqEat
(>>=) {c1 = True} = SeqEat
||| Sequence two grammars. If either consumes some input, the sequence is
||| guaranteed to consume some input. If the first one consumes input, the
@ -49,279 +70,305 @@ public export %inline %tcinline
||| consumed and therefore the input is smaller)
public export %inline %tcinline
(>>) : {c1, c2 : Bool} ->
Grammar tok c1 () ->
inf c1 (Grammar tok c2 a) ->
Grammar tok (c1 || c2) a
Grammar state tok c1 () ->
inf c1 (Grammar state tok c2 a) ->
Grammar state tok (c1 || c2) a
(>>) {c1 = False} = ThenEmpty
(>>) {c1 = True} = ThenEat
||| Sequence two grammars. If either consumes some input, the sequence is
||| guaranteed to consume input. This is an explicitly non-infinite version
||| of `>>=`.
export
seq : {c1, c2 : _} -> Grammar tok c1 a ->
(a -> Grammar tok c2 b) ->
Grammar tok (c1 || c2) b
export %inline
seq : {c1,c2 : Bool} ->
Grammar state tok c1 a ->
(a -> Grammar state tok c2 b) ->
Grammar state tok (c1 || c2) b
seq = SeqEmpty
||| Sequence a grammar followed by the grammar it returns.
export
join : {c1, c2 : Bool} ->
Grammar tok c1 (Grammar tok c2 a) ->
Grammar tok (c1 || c2) a
export %inline
join : {c1,c2 : Bool} ->
Grammar state tok c1 (Grammar state tok c2 a) ->
Grammar state tok (c1 || c2) a
join {c1 = False} p = SeqEmpty p id
join {c1 = True} p = SeqEat p id
||| Give two alternative grammars. If both consume, the combination is
||| guaranteed to consume.
export
(<|>) : {c1, c2 : _} ->
Grammar tok c1 ty ->
Grammar tok c2 ty ->
Grammar tok (c1 && c2) ty
(<|>) = Alt
||| Allows the result of a grammar to be mapped to a different value.
export
{c : _} -> Functor (Grammar tok c) where
{c : _} ->
Functor (Grammar state tok c) where
map f (Empty val) = Empty (f val)
map f (Fail fatal msg) = Fail fatal msg
map f (Fail bd fatal msg) = Fail bd fatal msg
map f (Try g) = Try (map f g)
map f (MustWork g) = MustWork (map f g)
map f (Terminal msg g) = Terminal msg (\t => map f (g t))
map f (Terminal msg g) = Terminal msg (map f . g)
map f (Alt x y) = Alt (map f x) (map f y)
map f (SeqEat act next)
= SeqEat act (\val => map f (next val))
map f (SeqEmpty act next)
= SeqEmpty act (\val => map f (next val))
= SeqEmpty act (\ val => map f (next val))
map f (ThenEat act next)
= ThenEat act (map f next)
map f (ThenEmpty act next)
= ThenEmpty act (map f next)
map {c} f (Bounds act)
= rewrite sym $ orFalseNeutral c in
SeqEmpty (Bounds act) (Empty . f) -- Bounds (map f act)
-- The remaining constructors (NextIs, EOF, Commit) have a fixed type,
-- so a sequence must be used.
map {c = False} f p = SeqEmpty p (Empty . f)
||| Give two alternative grammars. If both consume, the combination is
||| guaranteed to consume.
export %inline
(<|>) : {c1,c2 : Bool} ->
Grammar state tok c1 ty ->
Lazy (Grammar state tok c2 ty) ->
Grammar state tok (c1 && c2) ty
(<|>) = Alt
infixr 2 <||>
||| Take the tagged disjunction of two grammars. If both consume, the
||| combination is guaranteed to consume.
export
(<||>) : {c1,c2 : Bool} ->
Grammar state tok c1 a ->
Lazy (Grammar state tok c2 b) ->
Grammar state tok (c1 && c2) (Either a b)
(<||>) p q = (Left <$> p) <|> (Right <$> q)
||| Sequence a grammar with value type `a -> b` and a grammar
||| with value type `a`. If both succeed, apply the function
||| from the first grammar to the value from the second grammar.
||| Guaranteed to consume if either grammar consumes.
export
(<*>) : {c1, c2 : _} ->
Grammar tok c1 (a -> b) ->
Inf (Grammar tok c2 a) ->
Grammar tok (c1 || c2) b
(<*>) {c1 = False} x y = SeqEmpty x (\f => map f y)
(<*>) {c1 = True } x y = SeqEmpty x (\f => map f (Force y))
export %inline
(<*>) : {c1, c2 : Bool} ->
Grammar state tok c1 (a -> b) ->
Grammar state tok c2 a ->
Grammar state tok (c1 || c2) b
(<*>) x y = SeqEmpty x (\f => map f y)
||| Sequence two grammars. If both succeed, use the value of the first one.
||| Guaranteed to consume if either grammar consumes.
export
(<*) : {c1, c2 : _} ->
Grammar tok c1 a ->
Inf (Grammar tok c2 b) ->
Grammar tok (c1 || c2) a
export %inline
(<*) : {c1,c2 : Bool} ->
Grammar state tok c1 a ->
Grammar state tok c2 b ->
Grammar state tok (c1 || c2) a
(<*) x y = map const x <*> y
||| Sequence two grammars. If both succeed, use the value of the second one.
||| Guaranteed to consume if either grammar consumes.
export
(*>) : {c1, c2 : _} ->
Grammar tok c1 a ->
Inf (Grammar tok c2 b) ->
Grammar tok (c1 || c2) b
export %inline
(*>) : {c1,c2 : Bool} ->
Grammar state tok c1 a ->
Grammar state tok c2 b ->
Grammar state tok (c1 || c2) b
(*>) x y = map (const id) x <*> y
export %inline
act : state -> Grammar state tok False ()
act = Act
||| Produce a grammar that can parse a different type of token by providing a
||| function converting the new token type into the original one.
export
mapToken : (a -> b) -> Grammar b c ty -> Grammar a c ty
mapToken : (a -> b) -> Grammar state b c ty -> Grammar state a c ty
mapToken f (Empty val) = Empty val
mapToken f (Terminal msg g) = Terminal msg (g . f)
mapToken f (NextIs msg g) = SeqEmpty (NextIs msg (g . f)) (Empty . f)
mapToken f EOF = EOF
mapToken f (Fail fatal msg) = Fail fatal msg
mapToken f (Fail bd fatal msg) = Fail bd fatal msg
mapToken f (Try g) = Try (mapToken f g)
mapToken f (MustWork g) = MustWork (mapToken f g)
mapToken f Commit = Commit
mapToken f (SeqEat act next)
= SeqEat (mapToken f act) (\x => mapToken f (next x))
= SeqEat (mapToken f act) (\x => mapToken f (next x))
mapToken f (SeqEmpty act next)
= SeqEmpty (mapToken f act) (\x => mapToken f (next x))
= SeqEmpty (mapToken f act) (\x => mapToken f (next x))
mapToken f (ThenEat act next)
= ThenEat (mapToken f act) (mapToken f next)
= ThenEat (mapToken f act) (mapToken f next)
mapToken f (ThenEmpty act next)
= ThenEmpty (mapToken f act) (mapToken f next)
= ThenEmpty (mapToken f act) (mapToken f next)
mapToken f (Alt x y) = Alt (mapToken f x) (mapToken f y)
mapToken f (Bounds act) = Bounds (mapToken f act)
mapToken f Position = Position
mapToken f (Act action) = Act action
||| Always succeed with the given value.
export
pure : (val : ty) -> Grammar tok False ty
export %inline
pure : (val : ty) -> Grammar state tok False ty
pure = Empty
||| Check whether the next token satisfies a predicate
export
nextIs : String -> (tok -> Bool) -> Grammar tok False tok
export %inline
nextIs : String -> (tok -> Bool) -> Grammar state tok False tok
nextIs = NextIs
||| Look at the next token in the input
export
peek : Grammar tok False tok
export %inline
peek : Grammar state tok False tok
peek = nextIs "Unrecognised token" (const True)
||| Succeeds if running the predicate on the next token returns Just x,
||| returning x. Otherwise fails.
export
terminal : String -> (tok -> Maybe a) -> Grammar tok True a
export %inline
terminal : String -> (tok -> Maybe a) -> Grammar state tok True a
terminal = Terminal
||| Always fail with a message
export
fail : String -> Grammar tok c ty
fail = Fail False
export %inline
fail : String -> Grammar state tok c ty
fail = Fail Nothing False
export
fatalError : String -> Grammar tok c ty
fatalError = Fail True
||| Always fail with a message and a location
export %inline
failLoc : Bounds -> String -> Grammar state tok c ty
failLoc b = Fail (Just b) False
||| Fail with no possibility for recovery (i.e.
||| no alternative parsing can succeed).
export %inline
fatalError : String -> Grammar state tok c ty
fatalError = Fail Nothing True
||| Fail with no possibility for recovery (i.e.
||| no alternative parsing can succeed).
export %inline
fatalLoc : Bounds -> String -> Grammar state tok c ty
fatalLoc b = Fail (Just b) True
||| Catch a fatal error
export %inline
try : Grammar state tok c ty -> Grammar state tok c ty
try = Try
||| Succeed if the input is empty
export
eof : Grammar tok False ()
export %inline
eof : Grammar state tok False ()
eof = EOF
||| Commit to an alternative; if the current branch of an alternative
||| fails to parse, no more branches will be tried
export
commit : Grammar tok False ()
export %inline
commit : Grammar state tok False ()
commit = Commit
||| If the parser fails, treat it as a fatal error
export
mustWork : Grammar tok c ty -> Grammar tok c ty
export %inline
mustWork : {c : Bool} -> Grammar state tok c ty -> Grammar state tok c ty
mustWork = MustWork
data ParseResult : List tok -> (consumes : Bool) -> Type -> Type where
Failure : {xs : List tok} ->
(committed : Bool) -> (fatal : Bool) ->
(err : String) -> (rest : List tok) -> ParseResult xs c ty
EmptyRes : (committed : Bool) ->
(val : ty) -> (more : List tok) -> ParseResult more False ty
NonEmptyRes : {xs : List tok} ->
(committed : Bool) ->
(val : ty) -> (more : List tok) ->
ParseResult (x :: xs ++ more) c ty
export %inline
bounds : Grammar state tok c ty -> Grammar state tok c (WithBounds ty)
bounds = Bounds
-- Take the result of an alternative branch, reset the commit flag to
-- the commit flag from the outer alternative, and weaken the 'consumes'
-- flag to take both alternatives into account
weakenRes : {whatever, c : Bool} -> {xs : List tok} ->
(com' : Bool) ->
ParseResult xs c ty -> ParseResult xs (whatever && c) ty
weakenRes com' (Failure com fatal msg ts) = Failure com' fatal msg ts
weakenRes {whatever=True} com' (EmptyRes com val xs) = EmptyRes com' val xs
weakenRes {whatever=False} com' (EmptyRes com val xs) = EmptyRes com' val xs
weakenRes com' (NonEmptyRes {xs} com val more) = NonEmptyRes {xs} com' val more
doParse : (commit : Bool) ->
(act : Grammar tok c ty) ->
(xs : List tok) ->
ParseResult xs c ty
-- doParse com xs act with (sizeAccessible xs)
doParse com (Empty val) xs = EmptyRes com val xs
doParse com (Fail fatal str) [] = Failure com fatal str []
doParse com (Fail fatal str) (x :: xs) = Failure com fatal str (x :: xs)
doParse com Commit xs = EmptyRes True () xs
doParse com (MustWork g) xs =
let p' = doParse com g xs in
case p' of
Failure com' _ msg ts => Failure com' True msg ts
res => res
doParse com (Terminal err f) [] = Failure com False "End of input" []
doParse com (Terminal err f) (x :: xs)
= maybe
(Failure com False err (x :: xs))
(\a => NonEmptyRes com {xs=[]} a xs)
(f x)
doParse com EOF [] = EmptyRes com () []
doParse com EOF (x :: xs)
= Failure com False "Expected end of input" (x :: xs)
doParse com (NextIs err f) [] = Failure com False "End of input" []
doParse com (NextIs err f) (x :: xs)
= if f x
then EmptyRes com x (x :: xs)
else Failure com False err (x :: xs)
doParse com (Alt {c1} {c2} x y) xs
= let p' = doParse False x xs in
case p' of
Failure com' fatal msg ts
=> if com' || fatal
-- If the alternative had committed, don't try the
-- other branch (and reset commit flag)
then Failure com fatal msg ts
else weakenRes {whatever = c1} com (doParse False y xs)
-- Successfully parsed the first option, so use the outer commit flag
EmptyRes _ val xs => EmptyRes com val xs
NonEmptyRes {xs=xs'} _ val more => NonEmptyRes {xs=xs'} com val more
doParse com (SeqEmpty {c1} {c2} act next) xs
= let p' = assert_total (doParse {c = c1} com act xs) in
case p' of
Failure com fatal msg ts => Failure com fatal msg ts
EmptyRes com val xs => assert_total (doParse com (next val) xs)
NonEmptyRes {x} {xs=ys} com val more =>
case (assert_total (doParse com (next val) more)) of
Failure com' fatal msg ts => Failure com' fatal msg ts
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
doParse com (SeqEat act next) xs with (doParse com act xs)
doParse com (SeqEat act next) xs | Failure com' fatal msg ts
= Failure com' fatal msg ts
doParse com (SeqEat act next) (x :: (ys ++ more)) | (NonEmptyRes {xs=ys} com' val more)
= let p' = assert_total (doParse com' (next val) more) in
case p' of
Failure com' fatal msg ts => Failure com' fatal msg ts
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
doParse com (ThenEmpty {c1} {c2} act next) xs
= let p' = assert_total (doParse {c = c1} com act xs) in
case p' of
Failure com fatal msg ts => Failure com fatal msg ts
EmptyRes com val xs => assert_total (doParse com next xs)
NonEmptyRes {x} {xs=ys} com val more =>
case (assert_total (doParse com next more)) of
Failure com' fatal msg ts => Failure com' fatal msg ts
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
doParse com (ThenEat act next) xs with (doParse com act xs)
doParse com (ThenEat act next) xs | Failure com' fatal msg ts
= Failure com' fatal msg ts
doParse com (ThenEat act next) (x :: (ys ++ more)) | (NonEmptyRes {xs=ys} com' val more)
= let p' = assert_total (doParse com' next more) in
case p' of
Failure com' fatal msg ts => Failure com' fatal msg ts
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
-- This next line is not strictly necessary, but it stops the coverage
-- checker taking a really long time and eating lots of memory...
-- doParse _ _ _ = Failure True True "Help the coverage checker!" []
export %inline
position : Grammar state tok False Bounds
position = Position
public export
data ParseError tok = Error String (List tok)
data ParsingError tok = Error String (Maybe Bounds)
data ParseResult : Type -> Type -> Type -> Type where
Failure : (committed : Bool) -> (fatal : Bool) ->
List1 (ParsingError tok) -> ParseResult state tok ty
Res : state -> (committed : Bool) ->
(val : WithBounds ty) -> (more : List (WithBounds tok)) -> ParseResult state tok ty
mergeWith : WithBounds ty -> ParseResult state tok sy -> ParseResult state tok sy
mergeWith x (Res s committed val more) = Res s committed (mergeBounds x val) more
mergeWith x v = v
doParse : Semigroup state => state -> (commit : Bool) ->
(act : Grammar state tok c ty) ->
(xs : List (WithBounds tok)) ->
ParseResult state tok ty
doParse s com (Empty val) xs = Res s com (irrelevantBounds val) xs
doParse s com (Fail location fatal str) xs
= Failure com fatal (Error str (location <|> (bounds <$> head' xs)) ::: Nil)
doParse s com (Try g) xs = case doParse s com g xs of
-- recover from fatal match but still propagate the 'commit'
Failure com _ errs => Failure com False errs
res => res
doParse s com Commit xs = Res s True (irrelevantBounds ()) xs
doParse s com (MustWork g) xs =
case assert_total (doParse s com g xs) of
Failure com' _ errs => Failure com' True errs
res => res
doParse s com (Terminal err f) [] = Failure com False (Error "End of input" Nothing ::: Nil)
doParse s com (Terminal err f) (x :: xs) =
case f x.val of
Nothing => Failure com False (Error err (Just x.bounds) ::: Nil)
Just a => Res s com (const a <$> x) xs
doParse s com EOF [] = Res s com (irrelevantBounds ()) []
doParse s com EOF (x :: xs) = Failure com False (Error "Expected end of input" (Just x.bounds) ::: Nil)
doParse s com (NextIs err f) [] = Failure com False (Error "End of input" Nothing ::: Nil)
doParse s com (NextIs err f) (x :: xs)
= if f x.val
then Res s com (removeIrrelevance x) (x :: xs)
else Failure com False (Error err (Just x.bounds) ::: Nil)
doParse s com (Alt {c1} {c2} x y) xs
= case doParse s False x xs of
Failure com' fatal errs
=> if com' || fatal
-- If the alternative had committed, don't try the
-- other branch (and reset commit flag)
then Failure com fatal errs
else case (assert_total doParse s False y xs) of
(Failure com'' fatal' errs') => if com'' || fatal'
-- Only add the errors together if the second branch
-- is also non-committed and non-fatal.
then Failure com'' fatal' errs'
else Failure False False (errs ++ errs')
(Res s _ val xs) => Res s com val xs
-- Successfully parsed the first option, so use the outer commit flag
Res s _ val xs => Res s com val xs
doParse s com (SeqEmpty act next) xs
= case assert_total (doParse s com act xs) of
Failure com fatal errs => Failure com fatal errs
Res s com v xs =>
mergeWith v (assert_total $ doParse s com (next v.val) xs)
doParse s com (SeqEat act next) xs
= case assert_total (doParse s com act xs) of
Failure com fatal errs => Failure com fatal errs
Res s com v xs =>
mergeWith v (assert_total $ doParse s com (next v.val) xs)
doParse s com (ThenEmpty act next) xs
= case assert_total (doParse s com act xs) of
Failure com fatal errs => Failure com fatal errs
Res s com v xs =>
mergeWith v (assert_total $ doParse s com next xs)
doParse s com (ThenEat act next) xs
= case assert_total (doParse s com act xs) of
Failure com fatal errs => Failure com fatal errs
Res s com v xs =>
mergeWith v (assert_total $ doParse s com next xs)
doParse s com (Bounds act) xs
= case assert_total (doParse s com act xs) of
Failure com fatal errs => Failure com fatal errs
Res s com v xs => Res s com (const v <$> v) xs
doParse s com Position [] = Failure com False (Error "End of input" Nothing ::: Nil)
doParse s com Position (x :: xs)
= Res s com (irrelevantBounds x.bounds) (x :: xs)
doParse s com (Act action) xs = Res (s <+> action) com (irrelevantBounds ()) xs
||| Parse a list of tokens according to the given grammar. If successful,
||| returns a pair of the parse result and the unparsed tokens (the remaining
||| input).
export
parse : {c : _} -> (act : Grammar tok c ty) -> (xs : List tok) ->
Either (ParseError tok) (ty, List tok)
parse : {c : Bool} -> (act : Grammar () tok c ty) -> (xs : List (WithBounds tok)) ->
Either (List1 (ParsingError tok)) (ty, List (WithBounds tok))
parse act xs
= case doParse False act xs of
Failure _ _ msg ts => Left (Error msg ts)
EmptyRes _ val rest => pure (val, rest)
NonEmptyRes _ val rest => pure (val, rest)
= case doParse neutral False act xs of
Failure _ _ errs => Left errs
Res _ _ v rest => Right (v.val, rest)
export
parseWith : Monoid state => {c : Bool} -> (act : Grammar state tok c ty) -> (xs : List (WithBounds tok)) ->
Either (List1 (ParsingError tok)) (state, ty, List (WithBounds tok))
parseWith act xs
= case doParse neutral False act xs of
Failure _ _ errs => Left errs
Res s _ v rest => Right (s, v.val, rest)

View File

@ -413,10 +413,6 @@ Pretty Char where
export Pretty Nat where pretty = unsafeTextWithoutNewLines . show
export Pretty Int where pretty = unsafeTextWithoutNewLines . show
export Pretty Integer where pretty = unsafeTextWithoutNewLines . show
export Pretty Int8 where pretty = unsafeTextWithoutNewLines . show
export Pretty Int16 where pretty = unsafeTextWithoutNewLines . show
export Pretty Int32 where pretty = unsafeTextWithoutNewLines . show
export Pretty Int64 where pretty = unsafeTextWithoutNewLines . show
export Pretty Double where pretty = unsafeTextWithoutNewLines . show
export Pretty Bits8 where pretty = unsafeTextWithoutNewLines . show
export Pretty Bits16 where pretty = unsafeTextWithoutNewLines . show

View File

@ -3,6 +3,8 @@ module Text.PrettyPrint.Prettyprinter.Render.HTML
import Data.List
import Data.String
%default covering
export
htmlEscape : String -> String
htmlEscape s = fastAppend $ reverse $ go [] s

View File

@ -6,7 +6,7 @@ import public Control.ANSI
import Control.Monad.ST
import Text.PrettyPrint.Prettyprinter.Doc
%default total
%default covering
public export
AnsiStyle : Type

View File

@ -68,6 +68,7 @@ sdocToTreeParser (SLine i rest) = case sdocToTreeParser rest of
(Just tree, rest') => (Just $ STConcat [STLine i, tree], rest')
(Nothing, rest') => (Just $ STLine i, rest')
sdocToTreeParser (SAnnPush ann rest) = case sdocToTreeParser rest of
(tree, Nothing) => (Nothing, Nothing)
(Just tree, Nothing) => (Just $ STAnn ann tree, Nothing)
(Just tree, Just rest') => case sdocToTreeParser rest' of
(Just tree', rest'') => (Just $ STConcat [STAnn ann tree, tree'], rest'')

View File

@ -101,8 +101,10 @@ angles : Doc ann -> Doc ann
angles = enclose langle rangle
export
brackets : Doc ann -> Doc ann
brackets = enclose lbracket rbracket
brackets : {default lbracket ldelim : Doc ann} ->
{default rbracket rdelim : Doc ann} ->
Doc ann -> Doc ann
brackets {ldelim, rdelim} = enclose ldelim rdelim
export
braces : Doc ann -> Doc ann

View File

@ -1,5 +1,7 @@
module Text.Quantity
%default total
public export
record Quantity where
constructor Qty

View File

@ -1,5 +1,7 @@
module Text.Token
%default total
||| For a type `kind`, specify a way of converting the recognised
||| string into a value.
|||

View File

@ -159,17 +159,20 @@ modules = Control.ANSI,
System.Random,
System.Path,
Text.Token,
Text.Quantity,
Text.Parser,
Text.Bounded,
Text.Lexer,
Text.Parser.Core,
Text.Lexer.Core,
Text.Lexer.Tokenizer,
Text.Literate,
Text.Parser,
Text.Parser.Core,
Text.PrettyPrint.Prettyprinter,
Text.PrettyPrint.Prettyprinter.Doc,
Text.PrettyPrint.Prettyprinter.Symbols,
Text.PrettyPrint.Prettyprinter.Util,
Text.PrettyPrint.Prettyprinter.SimpleDocTree,
Text.PrettyPrint.Prettyprinter.Render.HTML,
Text.PrettyPrint.Prettyprinter.Render.String,
Text.PrettyPrint.Prettyprinter.Render.Terminal
Text.PrettyPrint.Prettyprinter.Render.Terminal,
Text.PrettyPrint.Prettyprinter.SimpleDocTree,
Text.PrettyPrint.Prettyprinter.Symbols,
Text.PrettyPrint.Prettyprinter.Util,
Text.Quantity,
Text.Token