mirror of
https://github.com/idris-lang/Idris2.git
synced 2025-01-05 06:37:23 +03:00
Update contrib Text.Parser to match Library.Text.Parser (#1808)
Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org>
This commit is contained in:
parent
ca95781a74
commit
f3855d7100
@ -7,7 +7,6 @@ import Data.String
|
||||
|
||||
%default total
|
||||
|
||||
|
||||
infixl 5 +>
|
||||
infixr 5 <+
|
||||
|
||||
@ -94,20 +93,14 @@ public export
|
||||
indentLines : (n : Nat) -> String -> String
|
||||
indentLines n str = unlines $ map (indent n) $ lines str
|
||||
|
||||
||| Return a string of the given character repeated
|
||||
||| `n` times.
|
||||
export
|
||||
fastReplicate : (n : Nat) -> Char -> String
|
||||
fastReplicate n c = fastPack $ replicate n c
|
||||
|
||||
||| Left-justify a string to the given length, using the
|
||||
||| specified fill character on the right.
|
||||
export
|
||||
justifyLeft : Nat -> Char -> String -> String
|
||||
justifyLeft n c s = s ++ fastReplicate (n `minus` length s) c
|
||||
justifyLeft n c s = s ++ replicate (n `minus` length s) c
|
||||
|
||||
||| Right-justify a string to the given length, using the
|
||||
||| specified fill character on the left.
|
||||
export
|
||||
justifyRight : Nat -> Char -> String -> String
|
||||
justifyRight n c s = fastReplicate (n `minus` length s) c ++ s
|
||||
justifyRight n c s = replicate (n `minus` length s) c ++ s
|
||||
|
@ -6,6 +6,8 @@ import Language.JSON.Parser
|
||||
|
||||
import public Language.JSON.Data
|
||||
|
||||
import public Text.Bounded
|
||||
|
||||
%default total
|
||||
|
||||
||| Parse a JSON string.
|
||||
|
@ -34,8 +34,8 @@ jsonTokenMap = toTokenMap $
|
||||
]
|
||||
|
||||
export
|
||||
lexJSON : String -> Maybe (List JSONToken)
|
||||
lexJSON : String -> Maybe (List (WithBounds JSONToken))
|
||||
lexJSON str
|
||||
= case lex jsonTokenMap str of
|
||||
(tokens, _, _, "") => Just $ map TokenData.tok tokens
|
||||
(tokens, _, _, "") => Just $ tokens
|
||||
_ => Nothing
|
||||
|
@ -10,20 +10,20 @@ import public Language.JSON.Tokens
|
||||
%default total
|
||||
|
||||
private
|
||||
punct : Punctuation -> Grammar JSONToken True ()
|
||||
punct : Punctuation -> Grammar state JSONToken True ()
|
||||
punct p = match $ JTPunct p
|
||||
|
||||
private
|
||||
rawString : Grammar JSONToken True String
|
||||
rawString : Grammar state JSONToken True String
|
||||
rawString = do mstr <- match JTString
|
||||
the (Grammar _ False _) $
|
||||
the (Grammar _ _ False _) $
|
||||
case mstr of
|
||||
Just str => pure str
|
||||
Nothing => fail "invalid string"
|
||||
|
||||
mutual
|
||||
private
|
||||
json : Grammar JSONToken True JSON
|
||||
json : Grammar state JSONToken True JSON
|
||||
json = object
|
||||
<|> array
|
||||
<|> string
|
||||
@ -32,14 +32,14 @@ mutual
|
||||
<|> null
|
||||
|
||||
private
|
||||
object : Grammar JSONToken True JSON
|
||||
object : Grammar state JSONToken True JSON
|
||||
object = do punct $ Curly Open
|
||||
commit
|
||||
props <- properties
|
||||
punct $ Curly Close
|
||||
pure $ JObject props
|
||||
where
|
||||
properties : Grammar JSONToken False (List (String, JSON))
|
||||
properties : Grammar state JSONToken False (List (String, JSON))
|
||||
properties = sepBy (punct Comma) $
|
||||
do key <- rawString
|
||||
punct Colon
|
||||
@ -47,34 +47,34 @@ mutual
|
||||
pure (key, value)
|
||||
|
||||
private
|
||||
array : Grammar JSONToken True JSON
|
||||
array : Grammar state JSONToken True JSON
|
||||
array = do punct (Square Open)
|
||||
commit
|
||||
vals <- values
|
||||
punct (Square Close)
|
||||
pure (JArray vals)
|
||||
where
|
||||
values : Grammar JSONToken False (List JSON)
|
||||
values : Grammar state JSONToken False (List JSON)
|
||||
values = sepBy (punct Comma) json
|
||||
|
||||
private
|
||||
string : Grammar JSONToken True JSON
|
||||
string : Grammar state JSONToken True JSON
|
||||
string = map JString rawString
|
||||
|
||||
private
|
||||
boolean : Grammar JSONToken True JSON
|
||||
boolean : Grammar state JSONToken True JSON
|
||||
boolean = map JBoolean $ match JTBoolean
|
||||
|
||||
private
|
||||
number : Grammar JSONToken True JSON
|
||||
number : Grammar state JSONToken True JSON
|
||||
number = map JNumber $ match JTNumber
|
||||
|
||||
private
|
||||
null : Grammar JSONToken True JSON
|
||||
null : Grammar state JSONToken True JSON
|
||||
null = map (const JNull) $ match JTNull
|
||||
|
||||
export
|
||||
parseJSON : List JSONToken -> Maybe JSON
|
||||
parseJSON : List (WithBounds JSONToken) -> Maybe JSON
|
||||
parseJSON toks = case parse json $ filter (not . ignored) toks of
|
||||
Right (j, []) => Just j
|
||||
_ => Nothing
|
||||
|
@ -12,7 +12,7 @@ quo = is '"'
|
||||
|
||||
export
|
||||
esc : Lexer -> Lexer
|
||||
esc = escape '\\'
|
||||
esc = escape (is '\\')
|
||||
|
||||
private
|
||||
unicodeEscape : Lexer
|
||||
@ -36,7 +36,7 @@ jsonStringTokenMap = toTokenMap $
|
||||
]
|
||||
|
||||
export
|
||||
lexString : String -> Maybe (List JSONStringToken)
|
||||
lexString : String -> Maybe (List (WithBounds JSONStringToken))
|
||||
lexString x = case lex jsonStringTokenMap x of
|
||||
(toks, _, _, "") => Just $ map TokenData.tok toks
|
||||
(toks, _, _, "") => Just $ toks
|
||||
_ => Nothing
|
||||
|
@ -7,20 +7,20 @@ import Text.Parser
|
||||
%default total
|
||||
|
||||
private
|
||||
stringChar : Grammar JSONStringToken True Char
|
||||
stringChar : Grammar state JSONStringToken True Char
|
||||
stringChar = match JSTChar
|
||||
<|> match JSTSimpleEscape
|
||||
<|> match JSTUnicodeEscape
|
||||
|
||||
private
|
||||
quotedString : Grammar JSONStringToken True String
|
||||
quotedString : Grammar state JSONStringToken True String
|
||||
quotedString = let q = match JSTQuote in
|
||||
do chars <- between q q (many stringChar)
|
||||
eof
|
||||
pure $ pack chars
|
||||
|
||||
export
|
||||
parseString : List JSONStringToken -> Maybe String
|
||||
parseString : List (WithBounds JSONStringToken) -> Maybe String
|
||||
parseString toks = case parse quotedString toks of
|
||||
Right (str, []) => Just str
|
||||
_ => Nothing
|
||||
|
@ -2,6 +2,7 @@ module Language.JSON.Tokens
|
||||
|
||||
import Language.JSON.String
|
||||
import Text.Token
|
||||
import Text.Bounded
|
||||
|
||||
%default total
|
||||
|
||||
@ -76,6 +77,6 @@ TokenKind JSONTokenKind where
|
||||
tokValue JTIgnore _ = ()
|
||||
|
||||
export
|
||||
ignored : JSONToken -> Bool
|
||||
ignored (Tok JTIgnore _) = True
|
||||
ignored : WithBounds JSONToken -> Bool
|
||||
ignored (MkBounded (Tok JTIgnore _) _ _) = True
|
||||
ignored _ = False
|
||||
|
@ -153,15 +153,15 @@ pathTokenMap = toTokenMap $
|
||||
, (some $ non $ oneOf "/\\:?", PTText)
|
||||
]
|
||||
|
||||
lexPath : String -> List PathToken
|
||||
lexPath : String -> List (WithBounds PathToken)
|
||||
lexPath str =
|
||||
let
|
||||
(tokens, _, _, _) = lex pathTokenMap str
|
||||
in
|
||||
map TokenData.tok tokens
|
||||
tokens -- TokenData.tok tokens
|
||||
|
||||
-- match both '/' and '\\' regardless of the platform.
|
||||
bodySeparator : Grammar PathToken True ()
|
||||
bodySeparator : Grammar state PathToken True ()
|
||||
bodySeparator = (match $ PTPunct '\\') <|> (match $ PTPunct '/')
|
||||
|
||||
-- Windows will automatically translate '/' to '\\'. And the verbatim prefix,
|
||||
@ -169,7 +169,7 @@ bodySeparator = (match $ PTPunct '\\') <|> (match $ PTPunct '/')
|
||||
-- However, we just parse it and ignore it.
|
||||
--
|
||||
-- Example: \\?\
|
||||
verbatim : Grammar PathToken True ()
|
||||
verbatim : Grammar state PathToken True ()
|
||||
verbatim =
|
||||
do
|
||||
ignore $ count (exactly 2) $ match $ PTPunct '\\'
|
||||
@ -178,7 +178,7 @@ verbatim =
|
||||
pure ()
|
||||
|
||||
-- Example: \\server\share
|
||||
unc : Grammar PathToken True Volume
|
||||
unc : Grammar state PathToken True Volume
|
||||
unc =
|
||||
do
|
||||
ignore $ count (exactly 2) $ match $ PTPunct '\\'
|
||||
@ -188,7 +188,7 @@ unc =
|
||||
pure $ UNC server share
|
||||
|
||||
-- Example: \\?\server\share
|
||||
verbatimUnc : Grammar PathToken True Volume
|
||||
verbatimUnc : Grammar state PathToken True Volume
|
||||
verbatimUnc =
|
||||
do
|
||||
verbatim
|
||||
@ -198,7 +198,7 @@ verbatimUnc =
|
||||
pure $ UNC server share
|
||||
|
||||
-- Example: C:
|
||||
disk : Grammar PathToken True Volume
|
||||
disk : Grammar state PathToken True Volume
|
||||
disk =
|
||||
do
|
||||
text <- match PTText
|
||||
@ -209,21 +209,21 @@ disk =
|
||||
pure $ Disk (toUpper disk)
|
||||
|
||||
-- Example: \\?\C:
|
||||
verbatimDisk : Grammar PathToken True Volume
|
||||
verbatimDisk : Grammar state PathToken True Volume
|
||||
verbatimDisk =
|
||||
do
|
||||
verbatim
|
||||
disk <- disk
|
||||
pure disk
|
||||
|
||||
parseVolume : Grammar PathToken True Volume
|
||||
parseVolume : Grammar state PathToken True Volume
|
||||
parseVolume =
|
||||
verbatimUnc
|
||||
<|> verbatimDisk
|
||||
<|> unc
|
||||
<|> disk
|
||||
|
||||
parseBody : Grammar PathToken True Body
|
||||
parseBody : Grammar state PathToken True Body
|
||||
parseBody =
|
||||
do
|
||||
text <- match PTText
|
||||
@ -232,7 +232,7 @@ parseBody =
|
||||
"." => CurDir
|
||||
normal => Normal normal
|
||||
|
||||
parsePath : Grammar PathToken False Path
|
||||
parsePath : Grammar state PathToken False Path
|
||||
parsePath =
|
||||
do
|
||||
vol <- optional parseVolume
|
||||
|
91
libs/contrib/Text/Bounded.idr
Normal file
91
libs/contrib/Text/Bounded.idr
Normal file
@ -0,0 +1,91 @@
|
||||
module Text.Bounded
|
||||
|
||||
%default total
|
||||
|
||||
|
||||
public export
|
||||
record Bounds where
|
||||
constructor MkBounds
|
||||
startLine : Int
|
||||
startCol : Int
|
||||
endLine : Int
|
||||
endCol : Int
|
||||
|
||||
Show Bounds where
|
||||
showPrec d (MkBounds sl sc el ec) =
|
||||
showCon d "MkBounds" (concat [showArg sl, showArg sc, showArg el, showArg ec])
|
||||
|
||||
export
|
||||
startBounds : Bounds -> (Int, Int)
|
||||
startBounds b = (b.startLine, b.startCol)
|
||||
|
||||
export
|
||||
endBounds : Bounds -> (Int, Int)
|
||||
endBounds b = (b.endLine, b.endCol)
|
||||
|
||||
export
|
||||
Eq Bounds where
|
||||
(MkBounds sl sc el ec) == (MkBounds sl' sc' el' ec') =
|
||||
sl == sl'
|
||||
&& sc == sc'
|
||||
&& el == el'
|
||||
&& ec == ec'
|
||||
|
||||
public export
|
||||
record WithBounds ty where
|
||||
constructor MkBounded
|
||||
val : ty
|
||||
isIrrelevant : Bool
|
||||
bounds : Bounds
|
||||
|
||||
export
|
||||
start : WithBounds ty -> (Int, Int)
|
||||
start = startBounds . bounds
|
||||
|
||||
export
|
||||
end : WithBounds ty -> (Int, Int)
|
||||
end = endBounds . bounds
|
||||
|
||||
export
|
||||
Eq ty => Eq (WithBounds ty) where
|
||||
(MkBounded val ir bd) == (MkBounded val' ir' bd') =
|
||||
val == val' && ir == ir' && bd == bd'
|
||||
|
||||
export
|
||||
Show ty => Show (WithBounds ty) where
|
||||
showPrec d (MkBounded val ir bd) =
|
||||
showCon d "MkBounded" (concat [showArg ir, showArg val, showArg bd])
|
||||
|
||||
export
|
||||
Functor WithBounds where
|
||||
map f (MkBounded val ir bd) = MkBounded (f val) ir bd
|
||||
|
||||
export
|
||||
Foldable WithBounds where
|
||||
foldr c n b = c b.val n
|
||||
|
||||
export
|
||||
Traversable WithBounds where
|
||||
traverse f (MkBounded v a bd) = (\ v => MkBounded v a bd) <$> f v
|
||||
|
||||
export
|
||||
irrelevantBounds : ty -> WithBounds ty
|
||||
irrelevantBounds x = MkBounded x True (MkBounds (-1) (-1) (-1) (-1))
|
||||
|
||||
export
|
||||
removeIrrelevance : WithBounds ty -> WithBounds ty
|
||||
removeIrrelevance (MkBounded val ir bd) = MkBounded val True bd
|
||||
|
||||
export
|
||||
mergeBounds : WithBounds ty -> WithBounds ty' -> WithBounds ty'
|
||||
mergeBounds (MkBounded _ True _) (MkBounded val True _) = irrelevantBounds val
|
||||
mergeBounds (MkBounded _ True _) b2 = b2
|
||||
mergeBounds b1 (MkBounded val True _) = const val <$> b1
|
||||
mergeBounds b1 b2 =
|
||||
let (ur, uc) = min (start b1) (start b2)
|
||||
(lr, lc) = max (end b1) (end b2) in
|
||||
MkBounded b2.val False (MkBounds ur uc lr lc)
|
||||
|
||||
export
|
||||
joinBounds : WithBounds (WithBounds ty) -> WithBounds ty
|
||||
joinBounds b = mergeBounds b b.val
|
69
libs/contrib/Text/Distance/Levenshtein.idr
Normal file
69
libs/contrib/Text/Distance/Levenshtein.idr
Normal file
@ -0,0 +1,69 @@
|
||||
module Text.Distance.Levenshtein
|
||||
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import Data.String
|
||||
import Data.IOMatrix
|
||||
import Data.List.Extra
|
||||
|
||||
%default total
|
||||
|
||||
||| Self-evidently correct but O(3 ^ (min mn)) complexity
|
||||
spec : String -> String -> Nat
|
||||
spec a b = loop (fastUnpack a) (fastUnpack b) where
|
||||
|
||||
loop : List Char -> List Char -> Nat
|
||||
loop [] ys = length ys -- deletions
|
||||
loop xs [] = length xs -- insertions
|
||||
loop (x :: xs) (y :: ys)
|
||||
= if x == y then loop xs ys -- match
|
||||
else 1 + minimum [ loop (x :: xs) ys -- insert y
|
||||
, loop xs (y :: ys) -- delete x
|
||||
, loop xs ys -- substitute y for x
|
||||
]
|
||||
|
||||
||| Dynamic programming
|
||||
export
|
||||
compute : HasIO io => String -> String -> io Nat
|
||||
compute a b = do
|
||||
let w = strLength a
|
||||
let h = strLength b
|
||||
-- In mat[i][j], we store the distance between
|
||||
-- * the suffix of a of size i
|
||||
-- * the suffix of b of size j
|
||||
-- So we need a matrix of size (|a|+1) * (|b|+1)
|
||||
mat <- new (w+1) (h+1)
|
||||
-- Whenever one of the two suffixes of interest is empty, the only
|
||||
-- winning move is to:
|
||||
-- * delete all of the first
|
||||
-- * insert all of the second
|
||||
-- i.e. the cost is the length of the non-zero suffix
|
||||
for_ [0..w] $ \ i => write mat i 0 i -- deletions
|
||||
for_ [0..h] $ \ j => write mat 0 j j -- insertions
|
||||
|
||||
-- We introduce a specialised `read` for ease of use
|
||||
let get = \i, j => case !(read {io} mat i j) of
|
||||
Nothing => assert_total $
|
||||
idris_crash "INTERNAL ERROR: Badly initialised matrix"
|
||||
Just n => pure n
|
||||
|
||||
-- We fill the matrix from the bottom up, using the same formula we used
|
||||
-- in the specification's `loop`.
|
||||
for_ [1..h] $ \ j => do
|
||||
for_ [1..w] $ \ i => do
|
||||
-- here we change Levenshtein slightly so that we may only substitute
|
||||
-- alpha / numerical characters for similar ones. This avoids suggesting
|
||||
-- "#" as a replacement for an out of scope "n".
|
||||
let cost = let c = assert_total $ strIndex a (i-1)
|
||||
d = assert_total $ strIndex b (j-1)
|
||||
in if c == d then 0 else
|
||||
if isAlpha c && isAlpha d then 1 else
|
||||
if isDigit c && isDigit d then 1 else 2
|
||||
write mat i j $
|
||||
minimum [ 1 + !(get i (j-1)) -- insert y
|
||||
, 1 + !(get (i-1) j) -- delete x
|
||||
, cost + !(get (i-1) (j-1)) -- equal or substitute y for x
|
||||
]
|
||||
|
||||
-- Once the matrix is fully filled, we can simply read the top right corner
|
||||
integerToNat . cast <$> get w h
|
@ -8,6 +8,8 @@ import public Text.Lexer.Core
|
||||
import public Text.Quantity
|
||||
import public Text.Token
|
||||
|
||||
%default total
|
||||
|
||||
export
|
||||
toTokenMap : List (Lexer, k) -> TokenMap (Token k)
|
||||
toTokenMap = map $ \(l, kind) => (l, Tok kind)
|
||||
@ -37,22 +39,22 @@ non l = reject l <+> any
|
||||
export
|
||||
choiceMap : {c : Bool} ->
|
||||
Foldable t => (a -> Recognise c) -> t a -> Recognise c
|
||||
choiceMap f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
|
||||
f x <|> acc)
|
||||
fail xs
|
||||
choiceMap {c} f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
|
||||
f x <|> acc)
|
||||
fail xs
|
||||
|
||||
||| Recognise the first matching recogniser in a container. Consumes input if
|
||||
||| recognisers in the list consume. Fails if the container is empty.
|
||||
export
|
||||
choice : {c : _} ->
|
||||
Foldable t => t (Recognise c) -> Recognise c
|
||||
choice = Lexer.choiceMap id
|
||||
choice = choiceMap id
|
||||
|
||||
||| Sequence a list of recognisers. Guaranteed to consume input if the list is
|
||||
||| non-empty and the recognisers consume.
|
||||
export
|
||||
concat : {c : _} ->
|
||||
(xs : List (Recognise c)) -> Recognise (c && isCons xs)
|
||||
(xs : List (Recognise c)) -> Recognise (isCons xs && c)
|
||||
concat = Lexer.Core.concatMap id
|
||||
|
||||
||| Recognise a specific character.
|
||||
@ -124,6 +126,13 @@ mutual
|
||||
many : Lexer -> Recognise False
|
||||
many l = opt (some l)
|
||||
|
||||
||| Repeat the sub-lexer `l` one or more times until the lexer
|
||||
||| `stopBefore` is encountered. `stopBefore` will not be consumed.
|
||||
||| /((?!`stopBefore`)`l`)\+/
|
||||
export
|
||||
someUntil : (stopBefore : Recognise c) -> (l : Lexer) -> Lexer
|
||||
someUntil stopBefore l = some (reject stopBefore <+> l)
|
||||
|
||||
||| Repeat the sub-lexer `l` zero or more times until the lexer
|
||||
||| `stopBefore` is encountered. `stopBefore` will not be consumed.
|
||||
||| Not guaranteed to consume input.
|
||||
@ -140,14 +149,6 @@ export
|
||||
manyThen : (stopAfter : Recognise c) -> (l : Lexer) -> Recognise c
|
||||
manyThen stopAfter l = manyUntil stopAfter l <+> stopAfter
|
||||
|
||||
||| Recognise many instances of `l` until an instance of `end` is
|
||||
||| encountered.
|
||||
|||
|
||||
||| Useful for defining comments.
|
||||
export
|
||||
manyTill : (l : Lexer) -> (end : Lexer) -> Recognise False
|
||||
manyTill l end = end <|> opt (l <+> manyTill l end)
|
||||
|
||||
||| Recognise a sub-lexer repeated as specified by `q`. Fails if `q` has
|
||||
||| `min` and `max` in the wrong order. Consumes input unless `min q` is zero.
|
||||
||| /`l`{`q`}/
|
||||
@ -172,6 +173,18 @@ export
|
||||
digits : Lexer
|
||||
digits = some digit
|
||||
|
||||
||| Recognise a single binary digit
|
||||
||| /[0-1]/
|
||||
export
|
||||
binDigit : Lexer
|
||||
binDigit = pred (\c => c == '0' || c == '1')
|
||||
|
||||
||| Recognise one or more binary digits
|
||||
||| /[0-1]+/
|
||||
export
|
||||
binDigits : Lexer
|
||||
binDigits = some binDigit
|
||||
|
||||
||| Recognise a single hexidecimal digit
|
||||
||| /[0-9A-Fa-f]/
|
||||
export
|
||||
@ -188,13 +201,13 @@ hexDigits = some hexDigit
|
||||
||| /[0-8]/
|
||||
export
|
||||
octDigit : Lexer
|
||||
octDigit = pred isHexDigit
|
||||
octDigit = pred isOctDigit
|
||||
|
||||
||| Recognise one or more octal digits
|
||||
||| /[0-8]+/
|
||||
export
|
||||
octDigits : Lexer
|
||||
octDigits = some hexDigit
|
||||
octDigits = some octDigit
|
||||
|
||||
||| Recognise a single alpha character
|
||||
||| /[A-Za-z]/
|
||||
@ -307,18 +320,19 @@ export
|
||||
quote : (q : Lexer) -> (l : Lexer) -> Lexer
|
||||
quote q l = surround q q l
|
||||
|
||||
||| Recognise an escape character (often '\\') followed by a sub-lexer
|
||||
||| Recognise an escape sub-lexer (often '\\') followed by
|
||||
||| another sub-lexer
|
||||
||| /[`esc`]`l`/
|
||||
export
|
||||
escape : (esc : Char) -> Lexer -> Lexer
|
||||
escape esc l = is esc <+> l
|
||||
escape : (esc : Lexer) -> Lexer -> Lexer
|
||||
escape esc l = esc <+> l
|
||||
|
||||
||| Recognise a string literal, including escaped characters.
|
||||
||| (Note: doesn't yet handle escape sequences such as \123)
|
||||
||| /"(\\\\.|.)\*?"/
|
||||
export
|
||||
stringLit : Lexer
|
||||
stringLit = quote (is '"') (escape '\\' any <|> any)
|
||||
stringLit = quote (is '"') (escape (is '\\') any <|> any)
|
||||
|
||||
||| Recognise a character literal, including escaped characters.
|
||||
||| (Note: doesn't yet handle escape sequences such as \123)
|
||||
@ -326,7 +340,7 @@ stringLit = quote (is '"') (escape '\\' any <|> any)
|
||||
export
|
||||
charLit : Lexer
|
||||
charLit = let q = '\'' in
|
||||
is q <+> (escape '\\' (control <|> any) <|> isNot q) <+> is q
|
||||
is q <+> (escape (is '\\') (control <|> any) <|> isNot q) <+> is q
|
||||
where
|
||||
lexStr : List String -> Lexer
|
||||
lexStr [] = fail
|
||||
@ -348,12 +362,40 @@ export
|
||||
intLit : Lexer
|
||||
intLit = opt (is '-') <+> digits
|
||||
|
||||
||| Recognise a binary literal, prefixed by "0b"
|
||||
||| /0b[0-1]+/
|
||||
export
|
||||
binLit : Lexer
|
||||
binLit = exact "0b" <+> binDigits
|
||||
|
||||
||| Recognise a hexidecimal literal, prefixed by "0x" or "0X"
|
||||
||| /0[Xx][0-9A-Fa-f]+/
|
||||
export
|
||||
hexLit : Lexer
|
||||
hexLit = approx "0x" <+> hexDigits
|
||||
|
||||
||| Recognise an octal literal, prefixed by "0o"
|
||||
||| /0o[0-9A-Fa-f]+/
|
||||
export
|
||||
octLit : Lexer
|
||||
octLit = exact "0o" <+> octDigits
|
||||
|
||||
export
|
||||
digitsUnderscoredLit : Lexer
|
||||
digitsUnderscoredLit = digits <+> many (is '_' <+> digits)
|
||||
|
||||
export
|
||||
binUnderscoredLit : Lexer
|
||||
binUnderscoredLit = binLit <+> many (is '_' <+> binDigits)
|
||||
|
||||
export
|
||||
hexUnderscoredLit : Lexer
|
||||
hexUnderscoredLit = hexLit <+> many (is '_' <+> hexDigits)
|
||||
|
||||
export
|
||||
octUnderscoredLit : Lexer
|
||||
octUnderscoredLit = octLit <+> many (is '_' <+> octDigits)
|
||||
|
||||
||| Recognise `start`, then recognise all input until a newline is encountered,
|
||||
||| and consume the newline. Will succeed if end-of-input is encountered before
|
||||
||| a newline.
|
||||
|
@ -1,15 +1,19 @@
|
||||
module Text.Lexer.Core
|
||||
|
||||
import public Control.Delayed
|
||||
import Data.Bool
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import Data.Nat
|
||||
import Data.String
|
||||
|
||||
import public Control.Delayed
|
||||
import public Text.Bounded
|
||||
|
||||
%default total
|
||||
|
||||
||| A language of token recognisers.
|
||||
||| @ consumes If `True`, this recogniser is guaranteed to consume at
|
||||
||| least one character of input when it succeeds.
|
||||
public export
|
||||
export
|
||||
data Recognise : (consumes : Bool) -> Type where
|
||||
Empty : Recognise False
|
||||
Fail : Recognise c
|
||||
@ -17,6 +21,7 @@ data Recognise : (consumes : Bool) -> Type where
|
||||
Pred : (Char -> Bool) -> Recognise True
|
||||
SeqEat : Recognise True -> Inf (Recognise e) -> Recognise True
|
||||
SeqEmpty : Recognise e1 -> Recognise e2 -> Recognise (e1 || e2)
|
||||
SeqSame : Recognise e -> Recognise e -> Recognise e
|
||||
Alt : Recognise e1 -> Recognise e2 -> Recognise (e1 && e2)
|
||||
|
||||
||| A token recogniser. Guaranteed to consume at least one character.
|
||||
@ -24,8 +29,6 @@ public export
|
||||
Lexer : Type
|
||||
Lexer = Recognise True
|
||||
|
||||
-- %allow_overloads (<+>)
|
||||
|
||||
||| Sequence two recognisers. If either consumes a character, the sequence
|
||||
||| is guaranteed to consume a character.
|
||||
export %inline
|
||||
@ -34,11 +37,8 @@ export %inline
|
||||
(<+>) {c1 = False} = SeqEmpty
|
||||
(<+>) {c1 = True} = SeqEat
|
||||
|
||||
%allow_overloads (<|>)
|
||||
|
||||
||| Alternative recognisers. If both consume, the combination is guaranteed
|
||||
||| to consumer a character.
|
||||
%inline
|
||||
||| to consume a character.
|
||||
export
|
||||
(<|>) : Recognise c1 -> Recognise c2 -> Recognise (c1 && c2)
|
||||
(<|>) = Alt
|
||||
@ -49,13 +49,11 @@ fail : Recognise c
|
||||
fail = Fail
|
||||
|
||||
||| Recognise no input (doesn't consume any input)
|
||||
%inline
|
||||
export
|
||||
empty : Recognise False
|
||||
empty = Empty
|
||||
|
||||
||| Recognise a character that matches a predicate
|
||||
%inline
|
||||
export
|
||||
pred : (Char -> Bool) -> Lexer
|
||||
pred = Pred
|
||||
@ -70,33 +68,25 @@ export
|
||||
reject : Recognise c -> Recognise False
|
||||
reject = Lookahead False
|
||||
|
||||
%allow_overloads concatMap
|
||||
|
||||
||| Sequence the recognisers resulting from applying a function to each element
|
||||
||| of a list. The resulting recogniser will consume input if the produced
|
||||
||| recognisers consume and the list is non-empty.
|
||||
export
|
||||
concatMap : (a -> Recognise c) -> (xs : List a) ->
|
||||
Recognise (c && (isCons xs))
|
||||
concatMap _ [] = rewrite andFalseFalse c in Empty
|
||||
concatMap f (x :: xs)
|
||||
= rewrite andTrueNeutral c in
|
||||
rewrite sym (orSameAndRightNeutral c (isCons xs)) in
|
||||
SeqEmpty (f x) (Core.concatMap f xs)
|
||||
concatMap : (a -> Recognise c) -> (xs : List a) -> Recognise (isCons xs && c)
|
||||
concatMap _ [] = Empty
|
||||
concatMap f (x :: []) = f x
|
||||
concatMap f (x :: xs@(_ :: _)) = SeqSame (f x) (concatMap f xs)
|
||||
|
||||
data StrLen : Type where
|
||||
MkStrLen : String -> Nat -> StrLen
|
||||
|
||||
Show StrLen where
|
||||
show (MkStrLen str n) = str ++ "(" ++ show n ++ ")"
|
||||
|
||||
getString : StrLen -> String
|
||||
getString (MkStrLen str n) = str
|
||||
|
||||
strIndex : StrLen -> Nat -> Maybe Char
|
||||
strIndex (MkStrLen str len) i
|
||||
= if cast {to = Integer} i >= cast len then Nothing
|
||||
else Just (assert_total (prim__strIndex str (cast i)))
|
||||
else Just (assert_total (prim__strIndex str (cast i)))
|
||||
|
||||
mkStr : String -> StrLen
|
||||
mkStr str = MkStrLen str (length str)
|
||||
@ -105,19 +95,9 @@ strTail : Nat -> StrLen -> StrLen
|
||||
strTail start (MkStrLen str len)
|
||||
= MkStrLen (substr start len str) (minus len start)
|
||||
|
||||
isJust : Maybe a -> Bool
|
||||
isJust Nothing = False
|
||||
isJust (Just x) = True
|
||||
|
||||
export
|
||||
unpack' : String -> List Char
|
||||
unpack' str
|
||||
= case strUncons str of
|
||||
Nothing => []
|
||||
Just (x, xs) => x :: unpack' xs
|
||||
|
||||
-- If the string is recognised, returns the index at which the token
|
||||
-- ends
|
||||
export
|
||||
scan : Recognise c -> List Char -> List Char -> Maybe (List Char, List Char)
|
||||
scan Empty tok str = pure (tok, str)
|
||||
scan Fail tok str = Nothing
|
||||
@ -137,6 +117,9 @@ scan (SeqEat r1 r2) tok str
|
||||
scan (SeqEmpty r1 r2) tok str
|
||||
= do (tok', rest) <- scan r1 tok str
|
||||
scan r2 tok' rest
|
||||
scan (SeqSame r1 r2) tok str
|
||||
= do (tok', rest) <- scan r1 tok str
|
||||
scan r2 tok' rest
|
||||
scan (Alt r1 r2) tok str
|
||||
= maybe (scan r2 tok str) Just (scan r1 tok str)
|
||||
|
||||
@ -148,27 +131,15 @@ public export
|
||||
TokenMap : (tokenType : Type) -> Type
|
||||
TokenMap tokenType = List (Lexer, String -> tokenType)
|
||||
|
||||
||| A token, and the line and column where it was in the input
|
||||
public export
|
||||
record TokenData a where
|
||||
constructor MkToken
|
||||
line : Int
|
||||
col : Int
|
||||
tok : a
|
||||
|
||||
export
|
||||
Show a => Show (TokenData a) where
|
||||
show t = show (line t) ++ ":" ++ show (col t) ++ ":" ++ show (tok t)
|
||||
|
||||
tokenise : (TokenData a -> Bool) ->
|
||||
tokenise : (a -> Bool) ->
|
||||
(line : Int) -> (col : Int) ->
|
||||
List (TokenData a) -> TokenMap a ->
|
||||
List Char -> (List (TokenData a), (Int, Int, List Char))
|
||||
List (WithBounds a) -> TokenMap a ->
|
||||
List Char -> (List (WithBounds a), (Int, Int, List Char))
|
||||
tokenise pred line col acc tmap str
|
||||
= case getFirstToken tmap str of
|
||||
Just (tok, line', col', rest) =>
|
||||
-- assert total because getFirstToken must consume something
|
||||
if pred tok
|
||||
if pred tok.val
|
||||
then (reverse acc, (line, col, []))
|
||||
else assert_total (tokenise pred line' col' (tok :: acc) tmap rest)
|
||||
Nothing => (reverse acc, (line, col, str))
|
||||
@ -183,13 +154,15 @@ tokenise pred line col acc tmap str
|
||||
(incol, _) => cast (length incol)
|
||||
|
||||
getFirstToken : TokenMap a -> List Char ->
|
||||
Maybe (TokenData a, Int, Int, List Char)
|
||||
Maybe (WithBounds a, Int, Int, List Char)
|
||||
getFirstToken [] str = Nothing
|
||||
getFirstToken ((lex, fn) :: ts) str
|
||||
= case scan lex [] str of
|
||||
Just (tok, rest) => Just (MkToken line col (fn (pack (reverse tok))),
|
||||
line + cast (countNLs tok),
|
||||
getCols tok col, rest)
|
||||
Just (tok, rest) =>
|
||||
let line' = line + cast (countNLs tok)
|
||||
col' = getCols tok col in
|
||||
Just (MkBounded (fn (fastPack (reverse tok))) False (MkBounds line col line' col'),
|
||||
line', col', rest)
|
||||
Nothing => getFirstToken ts str
|
||||
|
||||
||| Given a mapping from lexers to token generating functions (the
|
||||
@ -197,14 +170,14 @@ tokenise pred line col acc tmap str
|
||||
||| and the line, column, and remainder of the input at the first point in the
|
||||
||| string where there are no recognised tokens.
|
||||
export
|
||||
lex : TokenMap a -> String -> (List (TokenData a), (Int, Int, String))
|
||||
lex : TokenMap a -> String -> (List (WithBounds a), (Int, Int, String))
|
||||
lex tmap str
|
||||
= let (ts, (l, c, str')) = tokenise (const False) 0 0 [] tmap (fastUnpack str) in
|
||||
(ts, (l, c, pack str'))
|
||||
= let (ts, (l, c, str')) = tokenise (const False) 0 0 [] tmap (unpack str) in
|
||||
(ts, (l, c, fastPack str'))
|
||||
|
||||
export
|
||||
lexTo : (TokenData a -> Bool) ->
|
||||
TokenMap a -> String -> (List (TokenData a), (Int, Int, String))
|
||||
lexTo : (a -> Bool) ->
|
||||
TokenMap a -> String -> (List (WithBounds a), (Int, Int, String))
|
||||
lexTo pred tmap str
|
||||
= let (ts, (l, c, str')) = tokenise pred 0 0 [] tmap (fastUnpack str) in
|
||||
(ts, (l, c, pack str'))
|
||||
= let (ts, (l, c, str')) = tokenise pred 0 0 [] tmap (unpack str) in
|
||||
(ts, (l, c, fastPack str'))
|
||||
|
153
libs/contrib/Text/Lexer/Tokenizer.idr
Normal file
153
libs/contrib/Text/Lexer/Tokenizer.idr
Normal file
@ -0,0 +1,153 @@
|
||||
module Text.Lexer.Tokenizer
|
||||
|
||||
import Data.List
|
||||
import Data.Either
|
||||
import Data.Nat
|
||||
import Data.String
|
||||
|
||||
import Data.String.Extra
|
||||
import Text.Lexer.Core
|
||||
import Text.Lexer
|
||||
import Text.PrettyPrint.Prettyprinter
|
||||
import Text.PrettyPrint.Prettyprinter.Util
|
||||
|
||||
import public Control.Delayed
|
||||
import public Text.Bounded
|
||||
|
||||
%default total
|
||||
|
||||
||| Description of a language's tokenization rule.
|
||||
export
|
||||
data Tokenizer : (tokenType : Type) -> Type where
|
||||
Match : Lexer -> (String -> tokenType) -> Tokenizer tokenType
|
||||
Compose : (begin : Lexer) ->
|
||||
(mapBegin : String -> tokenType) ->
|
||||
(tagger : String -> tag) ->
|
||||
(middle : Inf (tag -> Tokenizer tokenType)) ->
|
||||
(end : tag -> Lexer) ->
|
||||
(mapEnd : String -> tokenType) ->
|
||||
Tokenizer tokenType
|
||||
Alt : Tokenizer tokenType -> Lazy (Tokenizer tokenType) -> Tokenizer tokenType
|
||||
|
||||
||| Alternative tokenizer rules.
|
||||
export
|
||||
(<|>) : Tokenizer t -> Lazy (Tokenizer t) -> Tokenizer t
|
||||
(<|>) = Alt
|
||||
|
||||
||| Match on a recogniser and cast the string to a token.
|
||||
export
|
||||
match : Lexer -> (String -> a) -> Tokenizer a
|
||||
match = Match
|
||||
|
||||
||| Compose other tokenizer. Language composition should be quoted between
|
||||
||| a begin lexer and a end lexer. The begin token can be used to generate
|
||||
||| the composition tokenizer and the end lexer.
|
||||
export
|
||||
compose : (begin : Lexer) ->
|
||||
(mapBegin : String -> a) ->
|
||||
(tagger : String -> tag) ->
|
||||
(middle : Inf (tag -> Tokenizer a)) ->
|
||||
(end : tag -> Lexer) ->
|
||||
(mapEnd : String -> a) ->
|
||||
Tokenizer a
|
||||
compose = Compose
|
||||
|
||||
||| Stop reason why tokenizer can't make more progress.
|
||||
||| @ ComposeNotClosing carries the span of composition begin token in the
|
||||
||| form of `(startLine, startCol), (endLine, endCol)`.
|
||||
public export
|
||||
data StopReason = EndInput | NoRuleApply | ComposeNotClosing (Int, Int) (Int, Int)
|
||||
|
||||
export
|
||||
Show StopReason where
|
||||
show EndInput = "EndInput"
|
||||
show NoRuleApply = "NoRuleApply"
|
||||
show (ComposeNotClosing start end) = "ComposeNotClosing " ++ show start ++ " " ++ show end
|
||||
|
||||
export
|
||||
Pretty StopReason where
|
||||
pretty EndInput = pretty "EndInput"
|
||||
pretty NoRuleApply = pretty "NoRuleApply"
|
||||
pretty (ComposeNotClosing start end) = "ComposeNotClosing" <++> pretty start <++> pretty end
|
||||
|
||||
tokenise : Lexer ->
|
||||
Tokenizer a ->
|
||||
(line, col : Int) -> List (WithBounds a) ->
|
||||
List Char ->
|
||||
(List (WithBounds a), (StopReason, Int, Int, List Char))
|
||||
tokenise reject tokenizer line col acc [] = (reverse acc, EndInput, (line, col, []))
|
||||
tokenise reject tokenizer line col acc str
|
||||
= case scan reject [] str of
|
||||
Just _ => (reverse acc, (EndInput, line, col, str))
|
||||
Nothing => case getFirstMatch tokenizer str of
|
||||
Right (toks, line', col', rest) =>
|
||||
-- assert total because getFirstMatch must consume something
|
||||
assert_total (tokenise reject tokenizer line' col' (toks ++ acc) rest)
|
||||
Left reason => (reverse acc, reason, (line, col, str))
|
||||
where
|
||||
countNLs : List Char -> Nat
|
||||
countNLs str = List.length (filter (== '\n') str)
|
||||
|
||||
getCols : List Char -> Int -> Int
|
||||
getCols x c
|
||||
= case span (/= '\n') x of
|
||||
(incol, []) => c + cast (length incol)
|
||||
(incol, _) => cast (length incol)
|
||||
|
||||
-- get the next lexeme using the `Lexer` in argument, its position and the input
|
||||
-- Returns the new position, the lexeme parsed and the rest of the input
|
||||
-- If parsing the lexer fails, this returns `Nothing`
|
||||
getNext : (lexer : Lexer) -> (line, col : Int) -> (input : List Char) -> Maybe (String, Int, Int, List Char)
|
||||
getNext lexer line col str =
|
||||
let Just (token, rest) = scan lexer [] str
|
||||
| _ => Nothing
|
||||
line' = line + cast (countNLs token)
|
||||
col' = getCols token col
|
||||
tokenStr = fastPack $ reverse token
|
||||
in pure (tokenStr, line', col', rest)
|
||||
|
||||
getFirstMatch : Tokenizer a -> List Char ->
|
||||
Either StopReason (List (WithBounds a), Int, Int, List Char)
|
||||
getFirstMatch (Match lex fn) str
|
||||
= let Just (tok, line', col', rest) = getNext lex line col str
|
||||
| _ => Left NoRuleApply
|
||||
tok' = MkBounded (fn tok) False (MkBounds line col line' col')
|
||||
in Right ([tok'], line', col', rest)
|
||||
getFirstMatch (Compose begin mapBegin tagger middleFn endFn mapEnd) str
|
||||
= let Just (beginTok', line', col' , rest) = getNext begin line col str
|
||||
| Nothing => Left NoRuleApply
|
||||
tag = tagger beginTok'
|
||||
middle = middleFn tag
|
||||
end = endFn tag
|
||||
beginTok'' = MkBounded (mapBegin beginTok') False (MkBounds line col line' col')
|
||||
(midToks, (reason, line'', col'', rest'')) =
|
||||
tokenise end middle line' col' [] rest
|
||||
in case reason of
|
||||
(ComposeNotClosing _ _) => Left reason
|
||||
_ => let Just (endTok', lineEnd, colEnd, restEnd) =
|
||||
getNext end line'' col'' rest''
|
||||
| _ => Left $ ComposeNotClosing (line, col) (line', col')
|
||||
endTok'' = MkBounded (mapEnd endTok') False (MkBounds line'' col'' lineEnd colEnd)
|
||||
in Right ([endTok''] ++ reverse midToks ++ [beginTok''], lineEnd, colEnd, restEnd)
|
||||
getFirstMatch (Alt t1 t2) str
|
||||
= case getFirstMatch t1 str of
|
||||
Right result => Right result
|
||||
Left reason@(ComposeNotClosing _ _) => Left reason
|
||||
Left _ => getFirstMatch t2 str
|
||||
|
||||
export
|
||||
lexTo : Lexer ->
|
||||
Tokenizer a ->
|
||||
String ->
|
||||
(List (WithBounds a), (StopReason, Int, Int, String))
|
||||
lexTo reject tokenizer str
|
||||
= let (ts, reason, (l, c, str')) =
|
||||
tokenise reject tokenizer 0 0 [] (fastUnpack str) in
|
||||
(ts, reason, (l, c, fastPack str'))
|
||||
|
||||
||| Given a tokenizer and an input string, return a list of recognised tokens,
|
||||
||| and the line, column, and remainder of the input at the first point in the string
|
||||
||| where there are no recognised tokens.
|
||||
export
|
||||
lex : Tokenizer a -> String -> (List (WithBounds a), (StopReason, Int, Int, String))
|
||||
lex tokenizer str = lexTo (pred $ const False) tokenizer str
|
234
libs/contrib/Text/Literate.idr
Normal file
234
libs/contrib/Text/Literate.idr
Normal file
@ -0,0 +1,234 @@
|
||||
||| A simple module to process 'literate' documents.
|
||||
|||
|
||||
||| The module uses a lexer to split the document into code blocks,
|
||||
||| delineated by user-defined markers, and code lines that are
|
||||
||| indicated be a line marker. The lexer returns a document stripped
|
||||
||| of non-code elements but preserving the original document's line
|
||||
||| count. Column numbering of code lines are not preserved.
|
||||
|||
|
||||
||| The underlying tokeniser is greedy.
|
||||
|||
|
||||
||| Once it identifies a line marker it reads a prettifying space then
|
||||
||| consumes until the end of line. Once identifies a starting code
|
||||
||| block marker, the lexer will consume input until the next
|
||||
||| identifiable end block is encountered. Any other content is
|
||||
||| treated as part of the original document.
|
||||
|||
|
||||
||| Thus, the input literate files *must* be well-formed w.r.t
|
||||
||| to code line markers and code blocks.
|
||||
|||
|
||||
||| A further restriction is that literate documents cannot contain
|
||||
||| the markers within the document's main text: This will confuse the
|
||||
||| lexer.
|
||||
|||
|
||||
module Text.Literate
|
||||
|
||||
import Text.Lexer
|
||||
|
||||
import Data.List
|
||||
import Data.List1
|
||||
import Data.List.Views
|
||||
import Data.String
|
||||
import Data.String.Extra
|
||||
|
||||
%default total
|
||||
|
||||
untilEOL : Recognise False
|
||||
untilEOL = manyUntil newline any
|
||||
|
||||
line : String -> Lexer
|
||||
line s = exact s <+> (newline <|> space <+> untilEOL)
|
||||
|
||||
block : String -> String -> Lexer
|
||||
block s e = surround (exact s <+> untilEOL) (exact e <+> untilEOL) any
|
||||
|
||||
notCodeLine : Lexer
|
||||
notCodeLine = newline
|
||||
<|> any <+> untilEOL
|
||||
|
||||
data Token = CodeBlock String String String
|
||||
| Any String
|
||||
| CodeLine String String
|
||||
|
||||
Show Token where
|
||||
showPrec d (CodeBlock l r x) = showCon d "CodeBlock" $ showArg l ++ showArg r ++ showArg x
|
||||
showPrec d (Any x) = showCon d "Any" $ showArg x
|
||||
showPrec d (CodeLine m x) = showCon d "CodeLine" $ showArg m ++ showArg x
|
||||
|
||||
rawTokens : (delims : List (String, String))
|
||||
-> (markers : List String)
|
||||
-> TokenMap (Token)
|
||||
rawTokens delims ls =
|
||||
map (\(l,r) => (block l r, CodeBlock (trim l) (trim r))) delims
|
||||
++ map (\m => (line m, CodeLine (trim m))) ls
|
||||
++ [(notCodeLine, Any)]
|
||||
|
||||
namespace Compat
|
||||
-- `reduce` below was depending on the old behaviour of `lines` before #1585
|
||||
-- was merged. That old `lines` function is added here to preserve behaviour
|
||||
-- of `reduce`.
|
||||
lines' : List Char -> List1 (List Char)
|
||||
lines' [] = singleton []
|
||||
lines' s = case break isNL s of
|
||||
(l, s') => l ::: case s' of
|
||||
[] => []
|
||||
_ :: s'' => forget $ lines' (assert_smaller s s'')
|
||||
export
|
||||
lines : String -> List1 String
|
||||
lines s = map pack (lines' (unpack s))
|
||||
|
||||
||| Merge the tokens into a single source file.
|
||||
reduce : List (WithBounds Token) -> List String -> String
|
||||
reduce [] acc = fastAppend (reverse acc)
|
||||
reduce (MkBounded (Any x) _ _ :: rest) acc =
|
||||
-- newline will always be tokenized as a single token
|
||||
if x == "\n"
|
||||
then reduce rest ("\n"::acc)
|
||||
else reduce rest acc
|
||||
|
||||
reduce (MkBounded (CodeLine m src) _ _ :: rest) acc =
|
||||
if m == trim src
|
||||
then reduce rest ("\n"::acc)
|
||||
else reduce rest ((substr (length m + 1) -- remove space to right of marker.
|
||||
(length src)
|
||||
src
|
||||
)::acc)
|
||||
|
||||
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc with (Compat.lines src) -- Strip the deliminators surrounding the block.
|
||||
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc | (s ::: ys) with (snocList ys)
|
||||
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc | (s ::: []) | Empty = reduce rest acc -- 2
|
||||
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc | (s ::: (srcs ++ [f])) | (Snoc f srcs rec) =
|
||||
-- the "\n" counts for the open deliminator; the closing deliminator should always be followed by a (Any "\n"), so we don't add a newline
|
||||
reduce rest (((unlines srcs) ++ "\n") :: "\n" :: acc)
|
||||
|
||||
-- [ NOTE ] 1 & 2 shouldn't happen as code blocks are well formed i.e. have two deliminators.
|
||||
|
||||
|
||||
public export
|
||||
record LiterateError where
|
||||
constructor MkLitErr
|
||||
line : Int
|
||||
column : Int
|
||||
input : String
|
||||
|
||||
||| Description of literate styles.
|
||||
|||
|
||||
||| A 'literate' style comprises of
|
||||
|||
|
||||
||| + a list of code block deliminators (`deliminators`);
|
||||
||| + a list of code line markers (`line_markers`); and
|
||||
||| + a list of known file extensions `file_extensions`.
|
||||
|||
|
||||
||| Some example specifications:
|
||||
|||
|
||||
||| + Bird Style
|
||||
|||
|
||||
|||```
|
||||
|||MkLitStyle Nil [">"] [".lidr"]
|
||||
|||```
|
||||
|||
|
||||
||| + Literate Haskell (for LaTeX)
|
||||
|||
|
||||
|||```
|
||||
|||MkLitStyle [("\\begin{code}", "\\end{code}"),("\\begin{spec}","\\end{spec}")]
|
||||
||| Nil
|
||||
||| [".lhs", ".tex"]
|
||||
|||```
|
||||
|||
|
||||
||| + OrgMode
|
||||
|||
|
||||
|||```
|
||||
|||MkLitStyle [("#+BEGIN_SRC idris","#+END_SRC"), ("#+COMMENT idris","#+END_COMMENT")]
|
||||
||| ["#+IDRIS:"]
|
||||
||| [".org"]
|
||||
|||```
|
||||
|||
|
||||
||| + Common Mark
|
||||
|||
|
||||
|||```
|
||||
|||MkLitStyle [("```idris","```"), ("<!-- idris","--!>")]
|
||||
||| Nil
|
||||
||| [".md", ".markdown"]
|
||||
|||```
|
||||
|||
|
||||
public export
|
||||
record LiterateStyle where
|
||||
constructor MkLitStyle
|
||||
||| The pairs of start and end tags for code blocks.
|
||||
deliminators : List (String, String)
|
||||
|
||||
||| Line markers that indicate a line contains code.
|
||||
line_markers : List String
|
||||
|
||||
||| Recognised file extensions. Not used by the module, but will be
|
||||
||| of use when connecting to code that reads in the original source
|
||||
||| files.
|
||||
file_extensions : List String
|
||||
|
||||
||| Given a 'literate specification' extract the code from the
|
||||
||| literate source file (`litStr`) that follows the presented style.
|
||||
|||
|
||||
||| @specification The literate specification to use.
|
||||
||| @litStr The literate source file.
|
||||
|||
|
||||
||| Returns a `LiterateError` if the literate file contains malformed
|
||||
||| code blocks or code lines.
|
||||
|||
|
||||
export
|
||||
extractCode : (specification : LiterateStyle)
|
||||
-> (litStr : String)
|
||||
-> Either LiterateError String
|
||||
extractCode (MkLitStyle delims markers exts) str =
|
||||
case lex (rawTokens delims markers) str of
|
||||
(toks, (_,_,"")) => Right (reduce toks Nil)
|
||||
(_, (l,c,i)) => Left (MkLitErr l c i)
|
||||
|
||||
||| Synonym for `extractCode`.
|
||||
export
|
||||
unlit : (specification : LiterateStyle)
|
||||
-> (litStr : String)
|
||||
-> Either LiterateError String
|
||||
unlit = extractCode
|
||||
|
||||
||| Is the provided line marked up using a line marker?
|
||||
|||
|
||||
||| If the line is suffixed by any one of the style's set of line
|
||||
||| markers then return length of literate line marker, and the code stripped from the line
|
||||
||| marker. Otherwise, return Nothing and the unmarked line.
|
||||
export
|
||||
isLiterateLine : (specification : LiterateStyle)
|
||||
-> (str : String)
|
||||
-> Pair (Maybe String) String
|
||||
isLiterateLine (MkLitStyle delims markers _) str with (lex (rawTokens delims markers) str)
|
||||
isLiterateLine (MkLitStyle delims markers _) str | ([MkBounded (CodeLine m str') _ _], (_,_, "")) = (Just m, str')
|
||||
isLiterateLine (MkLitStyle delims markers _) str | (_, _) = (Nothing, str)
|
||||
|
||||
||| Given a 'literate specification' embed the given code using the
|
||||
||| literate style provided.
|
||||
|||
|
||||
||| If the style uses deliminators to denote code blocks use the first
|
||||
||| pair of deliminators in the style. Otherwise use first linemarker
|
||||
||| in the style. If there is **no style** return the presented code
|
||||
||| string unembedded.
|
||||
|||
|
||||
|||
|
||||
||| @specification The literate specification to use.
|
||||
||| @code The code to embed,
|
||||
|||
|
||||
|||
|
||||
export
|
||||
embedCode : (specification : LiterateStyle)
|
||||
-> (code : String)
|
||||
-> String
|
||||
embedCode (MkLitStyle ((s,e)::delims) _ _) str = unlines [s,str,e]
|
||||
embedCode (MkLitStyle Nil (m::markers) _) str = unwords [m, str]
|
||||
embedCode (MkLitStyle _ _ _) str = str
|
||||
|
||||
||| Synonm for `embedCode`
|
||||
export
|
||||
relit : (specification : LiterateStyle)
|
||||
-> (code : String)
|
||||
-> String
|
||||
relit = embedCode
|
||||
|
||||
-- --------------------------------------------------------------------- [ EOF ]
|
@ -2,49 +2,50 @@ module Text.Parser
|
||||
|
||||
import Data.Bool
|
||||
import Data.List
|
||||
import Data.List1
|
||||
import Data.Nat
|
||||
import Data.Vect
|
||||
import public Data.List1
|
||||
|
||||
import public Text.Parser.Core
|
||||
import public Text.Quantity
|
||||
import public Text.Token
|
||||
|
||||
%default total
|
||||
|
||||
||| Parse a terminal based on a kind of token.
|
||||
export
|
||||
match : (Eq k, TokenKind k) =>
|
||||
(kind : k) ->
|
||||
Grammar (Token k) True (TokType kind)
|
||||
match kind = terminal "Unrecognised input" $
|
||||
\(Tok kind' text) => if kind' == kind
|
||||
then Just $ tokValue kind text
|
||||
else Nothing
|
||||
Grammar state (Token k) True (TokType kind)
|
||||
match k = terminal "Unrecognised input" $
|
||||
\t => if t.kind == k
|
||||
then Just $ tokValue k t.text
|
||||
else Nothing
|
||||
|
||||
||| Optionally parse a thing, with a default value if the grammar doesn't
|
||||
||| match. May match the empty input.
|
||||
export
|
||||
option : {c : Bool} ->
|
||||
(def : a) -> (p : Grammar tok c a) ->
|
||||
Grammar tok False a
|
||||
(def : a) -> (p : Grammar state tok c a) ->
|
||||
Grammar state tok False a
|
||||
option {c = False} def p = p <|> pure def
|
||||
option {c = True} def p = p <|> pure def
|
||||
|
||||
||| Optionally parse a thing.
|
||||
||| To provide a default value, use `option` instead.
|
||||
export
|
||||
optional : {c : _} ->
|
||||
(p : Grammar tok c a) ->
|
||||
Grammar tok False (Maybe a)
|
||||
optional : {c : Bool} ->
|
||||
(p : Grammar state tok c a) ->
|
||||
Grammar state tok False (Maybe a)
|
||||
optional p = option Nothing (map Just p)
|
||||
|
||||
||| Try to parse one thing or the other, producing a value that indicates
|
||||
||| which option succeeded. If both would succeed, the left option
|
||||
||| takes priority.
|
||||
export
|
||||
choose : {c1, c2 : _} ->
|
||||
(l : Grammar tok c1 a) ->
|
||||
(r : Grammar tok c2 b) ->
|
||||
Grammar tok (c1 && c2) (Either a b)
|
||||
choose : {c1, c2 : Bool} ->
|
||||
(l : Grammar state tok c1 a) ->
|
||||
(r : Grammar state tok c2 b) ->
|
||||
Grammar state tok (c1 && c2) (Either a b)
|
||||
choose l r = map Left l <|> map Right r
|
||||
|
||||
||| Produce a grammar by applying a function to each element of a container,
|
||||
@ -52,48 +53,42 @@ choose l r = map Left l <|> map Right r
|
||||
||| container is empty.
|
||||
export
|
||||
choiceMap : {c : Bool} ->
|
||||
(a -> Grammar tok c b) ->
|
||||
(a -> Grammar state tok c b) ->
|
||||
Foldable t => t a ->
|
||||
Grammar tok c b
|
||||
choiceMap f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
|
||||
f x <|> acc)
|
||||
(fail "No more options") xs
|
||||
Grammar state tok c b
|
||||
choiceMap {c} f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
|
||||
f x <|> acc)
|
||||
(fail "No more options") xs
|
||||
|
||||
%hide Prelude.(>>=)
|
||||
|
||||
||| Try each grammar in a container until the first one succeeds.
|
||||
||| Fails if the container is empty.
|
||||
export
|
||||
choice : {c : _} ->
|
||||
Foldable t => t (Grammar tok c a) ->
|
||||
Grammar tok c a
|
||||
choice = Parser.choiceMap id
|
||||
choice : Foldable t =>
|
||||
{c : Bool} ->
|
||||
t (Grammar state tok c a) ->
|
||||
Grammar state tok c a
|
||||
choice = choiceMap id
|
||||
|
||||
mutual
|
||||
||| Parse one or more things
|
||||
export
|
||||
some : Grammar tok True a ->
|
||||
Grammar tok True (List1 a)
|
||||
some : Grammar state tok True a ->
|
||||
Grammar state tok True (List1 a)
|
||||
some p = pure (!p ::: !(many p))
|
||||
|
||||
||| Parse zero or more things (may match the empty input)
|
||||
export
|
||||
many : Grammar tok True a ->
|
||||
Grammar tok False (List a)
|
||||
many : Grammar state tok True a ->
|
||||
Grammar state tok False (List a)
|
||||
many p = option [] (forget <$> some p)
|
||||
|
||||
||| Parse one or more instances of `p`, returning the parsed items and proof
|
||||
||| that the resulting list is non-empty.
|
||||
export
|
||||
some' : (p : Grammar tok True a) ->
|
||||
Grammar tok True (xs : List a ** NonEmpty xs)
|
||||
some' p = pure (!p :: !(many p) ** IsNonEmpty)
|
||||
|
||||
mutual
|
||||
private
|
||||
count1 : (q : Quantity) ->
|
||||
(p : Grammar tok True a) ->
|
||||
Grammar tok True (List a)
|
||||
(p : Grammar state tok True a) ->
|
||||
Grammar state tok True (List a)
|
||||
count1 q p = do x <- p
|
||||
seq (count q p)
|
||||
(\xs => pure (x :: xs))
|
||||
@ -101,31 +96,23 @@ mutual
|
||||
||| Parse `p`, repeated as specified by `q`, returning the list of values.
|
||||
export
|
||||
count : (q : Quantity) ->
|
||||
(p : Grammar tok True a) ->
|
||||
Grammar tok (isSucc (min q)) (List a)
|
||||
(p : Grammar state tok True a) ->
|
||||
Grammar state tok (isSucc (min q)) (List a)
|
||||
count (Qty Z Nothing) p = many p
|
||||
count (Qty Z (Just Z)) _ = pure []
|
||||
count (Qty Z (Just (S max))) p = option [] $ count1 (atMost max) p
|
||||
count (Qty (S min) Nothing) p = count1 (atLeast min) p
|
||||
count (Qty (S min) (Just Z)) _ = fail "Quantity out of order"
|
||||
count (Qty (S min) (Just (S max))) p = count1 (between min max) p
|
||||
|
||||
||| Parse `p` `n` times, returning the vector of values.
|
||||
export
|
||||
countExactly : (n : Nat) ->
|
||||
(p : Grammar tok True a) ->
|
||||
Grammar tok (isSucc n) (Vect n a)
|
||||
countExactly Z p = Empty []
|
||||
countExactly (S k) p = [| p :: countExactly k p |]
|
||||
count (Qty (S min) (Just (S max))) p = count1 (between (S min) max) p
|
||||
|
||||
mutual
|
||||
||| Parse one or more instances of `p` until `end` succeeds, returning the
|
||||
||| list of values from `p`. Guaranteed to consume input.
|
||||
export
|
||||
someTill : {c : Bool} ->
|
||||
(end : Grammar tok c e) ->
|
||||
(p : Grammar tok True a) ->
|
||||
Grammar tok True (List1 a)
|
||||
(end : Grammar state tok c e) ->
|
||||
(p : Grammar state tok True a) ->
|
||||
Grammar state tok True (List1 a)
|
||||
someTill {c} end p = do x <- p
|
||||
seq (manyTill end p)
|
||||
(\xs => pure (x ::: xs))
|
||||
@ -134,9 +121,9 @@ mutual
|
||||
||| list of values from `p`. Guaranteed to consume input if `end` consumes.
|
||||
export
|
||||
manyTill : {c : Bool} ->
|
||||
(end : Grammar tok c e) ->
|
||||
(p : Grammar tok True a) ->
|
||||
Grammar tok c (List a)
|
||||
(end : Grammar state tok c e) ->
|
||||
(p : Grammar state tok True a) ->
|
||||
Grammar state tok c (List a)
|
||||
manyTill {c} end p = rewrite sym (andTrueNeutral c) in
|
||||
map (const []) end <|> (forget <$> someTill end p)
|
||||
|
||||
@ -145,28 +132,28 @@ mutual
|
||||
||| returning its value.
|
||||
export
|
||||
afterSome : {c : Bool} ->
|
||||
(skip : Grammar tok True s) ->
|
||||
(p : Grammar tok c a) ->
|
||||
Grammar tok True a
|
||||
afterSome skip p = do ignore skip
|
||||
(skip : Grammar state tok True s) ->
|
||||
(p : Grammar state tok c a) ->
|
||||
Grammar state tok True a
|
||||
afterSome skip p = do ignore $ skip
|
||||
afterMany skip p
|
||||
|
||||
||| Parse zero or more instance of `skip` until `p` is encountered,
|
||||
||| returning its value.
|
||||
export
|
||||
afterMany : {c : Bool} ->
|
||||
(skip : Grammar tok True s) ->
|
||||
(p : Grammar tok c a) ->
|
||||
Grammar tok c a
|
||||
(skip : Grammar state tok True s) ->
|
||||
(p : Grammar state tok c a) ->
|
||||
Grammar state tok c a
|
||||
afterMany {c} skip p = rewrite sym (andTrueNeutral c) in
|
||||
p <|> afterSome skip p
|
||||
|
||||
||| Parse one or more things, each separated by another thing.
|
||||
export
|
||||
sepBy1 : {c : Bool} ->
|
||||
(sep : Grammar tok True s) ->
|
||||
(p : Grammar tok c a) ->
|
||||
Grammar tok c (List1 a)
|
||||
(sep : Grammar state tok True s) ->
|
||||
(p : Grammar state tok c a) ->
|
||||
Grammar state tok c (List1 a)
|
||||
sepBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
|
||||
[| p ::: many (sep *> p) |]
|
||||
|
||||
@ -174,18 +161,18 @@ sepBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
|
||||
||| match the empty input.
|
||||
export
|
||||
sepBy : {c : Bool} ->
|
||||
(sep : Grammar tok True s) ->
|
||||
(p : Grammar tok c a) ->
|
||||
Grammar tok False (List a)
|
||||
(sep : Grammar state tok True s) ->
|
||||
(p : Grammar state tok c a) ->
|
||||
Grammar state tok False (List a)
|
||||
sepBy sep p = option [] $ forget <$> sepBy1 sep p
|
||||
|
||||
||| Parse one or more instances of `p` separated by and optionally terminated by
|
||||
||| `sep`.
|
||||
export
|
||||
sepEndBy1 : {c : Bool} ->
|
||||
(sep : Grammar tok True s) ->
|
||||
(p : Grammar tok c a) ->
|
||||
Grammar tok c (List1 a)
|
||||
(sep : Grammar state tok True s) ->
|
||||
(p : Grammar state tok c a) ->
|
||||
Grammar state tok c (List1 a)
|
||||
sepEndBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
|
||||
sepBy1 sep p <* optional sep
|
||||
|
||||
@ -193,32 +180,49 @@ sepEndBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
|
||||
||| by `sep`. Will not match a separator by itself.
|
||||
export
|
||||
sepEndBy : {c : Bool} ->
|
||||
(sep : Grammar tok True s) ->
|
||||
(p : Grammar tok c a) ->
|
||||
Grammar tok False (List a)
|
||||
(sep : Grammar state tok True s) ->
|
||||
(p : Grammar state tok c a) ->
|
||||
Grammar state tok False (List a)
|
||||
sepEndBy sep p = option [] $ forget <$> sepEndBy1 sep p
|
||||
|
||||
||| Parse one or more instances of `p`, separated and terminated by `sep`.
|
||||
export
|
||||
endBy1 : {c : Bool} ->
|
||||
(sep : Grammar tok True s) ->
|
||||
(p : Grammar tok c a) ->
|
||||
Grammar tok True (List1 a)
|
||||
endBy1 sep p = some $ rewrite sym (orTrueTrue c) in
|
||||
p <* sep
|
||||
(sep : Grammar state tok True s) ->
|
||||
(p : Grammar state tok c a) ->
|
||||
Grammar state tok True (List1 a)
|
||||
endBy1 {c} sep p = some $ rewrite sym (orTrueTrue c) in
|
||||
p <* sep
|
||||
|
||||
export
|
||||
endBy : {c : Bool} ->
|
||||
(sep : Grammar tok True s) ->
|
||||
(p : Grammar tok c a) ->
|
||||
Grammar tok False (List a)
|
||||
(sep : Grammar state tok True s) ->
|
||||
(p : Grammar state tok c a) ->
|
||||
Grammar state tok False (List a)
|
||||
endBy sep p = option [] $ forget <$> endBy1 sep p
|
||||
|
||||
||| Parse an instance of `p` that is between `left` and `right`.
|
||||
export
|
||||
between : {c : _} ->
|
||||
(left : Grammar tok True l) ->
|
||||
(right : Inf (Grammar tok True r)) ->
|
||||
(p : Inf (Grammar tok c a)) ->
|
||||
Grammar tok True a
|
||||
between : {c : Bool} ->
|
||||
(left : Grammar state tok True l) ->
|
||||
(right : Grammar state tok True r) ->
|
||||
(p : Grammar state tok c a) ->
|
||||
Grammar state tok True a
|
||||
between left right contents = left *> contents <* right
|
||||
|
||||
export
|
||||
location : Grammar state token False (Int, Int)
|
||||
location = startBounds <$> position
|
||||
|
||||
export
|
||||
endLocation : Grammar state token False (Int, Int)
|
||||
endLocation = endBounds <$> position
|
||||
|
||||
export
|
||||
column : Grammar state token False Int
|
||||
column = snd <$> location
|
||||
|
||||
public export
|
||||
when : Bool -> Lazy (Grammar state token False ()) -> Grammar state token False ()
|
||||
when True y = y
|
||||
when False y = pure ()
|
||||
|
@ -1,47 +1,68 @@
|
||||
module Text.Parser.Core
|
||||
|
||||
import public Control.Delayed
|
||||
import Data.Bool
|
||||
import Data.List
|
||||
import Data.List1
|
||||
|
||||
import public Control.Delayed
|
||||
import public Text.Bounded
|
||||
|
||||
%default total
|
||||
|
||||
-- TODO: Add some primitives for helping with error messages.
|
||||
-- e.g. perhaps set a string to state what we're currently trying to
|
||||
-- parse, or to say what the next expected token is in words
|
||||
|
||||
||| Description of a language's grammar. The `tok` parameter is the type
|
||||
||| of tokens, and the `consumes` flag is True if the language is guaranteed
|
||||
||| to be non-empty - that is, successfully parsing the language is guaranteed
|
||||
||| to consume some input.
|
||||
public export
|
||||
data Grammar : (tok : Type) -> (consumes : Bool) -> Type -> Type where
|
||||
Empty : (val : ty) -> Grammar tok False ty
|
||||
Terminal : String -> (tok -> Maybe a) -> Grammar tok True a
|
||||
NextIs : String -> (tok -> Bool) -> Grammar tok False tok
|
||||
EOF : Grammar tok False ()
|
||||
export
|
||||
data Grammar : (state : Type) -> (tok : Type) -> (consumes : Bool) -> Type -> Type where
|
||||
Empty : (val : ty) -> Grammar state tok False ty
|
||||
Terminal : String -> (tok -> Maybe a) -> Grammar state tok True a
|
||||
NextIs : String -> (tok -> Bool) -> Grammar state tok False tok
|
||||
EOF : Grammar state tok False ()
|
||||
|
||||
Fail : Bool -> String -> Grammar tok c ty
|
||||
Commit : Grammar tok False ()
|
||||
MustWork : Grammar tok c a -> Grammar tok c a
|
||||
Fail : (location : Maybe Bounds) -> (fatal : Bool) -> String -> Grammar state tok c ty
|
||||
Try : Grammar state tok c ty -> Grammar state tok c ty
|
||||
|
||||
SeqEat : {c2 : _} -> Grammar tok True a -> Inf (a -> Grammar tok c2 b) ->
|
||||
Grammar tok True b
|
||||
SeqEmpty : {c1, c2 : _} -> Grammar tok c1 a -> (a -> Grammar tok c2 b) ->
|
||||
Grammar tok (c1 || c2) b
|
||||
Commit : Grammar state tok False ()
|
||||
MustWork : Grammar state tok c a -> Grammar state tok c a
|
||||
|
||||
ThenEat : {c2 : _} -> Grammar tok True () -> Inf (Grammar tok c2 b) ->
|
||||
Grammar tok True b
|
||||
ThenEmpty : {c1, c2 : _} -> Grammar tok c1 () -> Grammar tok c2 b ->
|
||||
Grammar tok (c1 || c2) b
|
||||
SeqEat : {c2 : Bool} ->
|
||||
Grammar state tok True a -> Inf (a -> Grammar state tok c2 b) ->
|
||||
Grammar state tok True b
|
||||
SeqEmpty : {c1, c2 : Bool} ->
|
||||
Grammar state tok c1 a -> (a -> Grammar state tok c2 b) ->
|
||||
Grammar state tok (c1 || c2) b
|
||||
|
||||
Alt : {c1, c2 : _} -> Grammar tok c1 ty -> Grammar tok c2 ty ->
|
||||
Grammar tok (c1 && c2) ty
|
||||
ThenEat : {c2 : Bool} ->
|
||||
Grammar state tok True () -> Inf (Grammar state tok c2 a) ->
|
||||
Grammar state tok True a
|
||||
ThenEmpty : {c1, c2 : Bool} ->
|
||||
Grammar state tok c1 () -> Grammar state tok c2 a ->
|
||||
Grammar state tok (c1 || c2) a
|
||||
|
||||
Alt : {c1, c2 : Bool} ->
|
||||
Grammar state tok c1 ty -> Lazy (Grammar state tok c2 ty) ->
|
||||
Grammar state tok (c1 && c2) ty
|
||||
Bounds : Grammar state tok c ty -> Grammar state tok c (WithBounds ty)
|
||||
Position : Grammar state tok False Bounds
|
||||
|
||||
Act : state -> Grammar state tok False ()
|
||||
|
||||
||| Sequence two grammars. If either consumes some input, the sequence is
|
||||
||| guaranteed to consume some input. If the first one consumes input, the
|
||||
||| second is allowed to be recursive (because it means some input has been
|
||||
||| consumed and therefore the input is smaller)
|
||||
public export %inline %tcinline
|
||||
export %inline
|
||||
(>>=) : {c1, c2 : Bool} ->
|
||||
Grammar tok c1 a ->
|
||||
inf c1 (a -> Grammar tok c2 b) ->
|
||||
Grammar tok (c1 || c2) b
|
||||
Grammar state tok c1 a ->
|
||||
inf c1 (a -> Grammar state tok c2 b) ->
|
||||
Grammar state tok (c1 || c2) b
|
||||
(>>=) {c1 = False} = SeqEmpty
|
||||
(>>=) {c1 = True} = SeqEat
|
||||
(>>=) {c1 = True} = SeqEat
|
||||
|
||||
||| Sequence two grammars. If either consumes some input, the sequence is
|
||||
||| guaranteed to consume some input. If the first one consumes input, the
|
||||
@ -49,279 +70,305 @@ public export %inline %tcinline
|
||||
||| consumed and therefore the input is smaller)
|
||||
public export %inline %tcinline
|
||||
(>>) : {c1, c2 : Bool} ->
|
||||
Grammar tok c1 () ->
|
||||
inf c1 (Grammar tok c2 a) ->
|
||||
Grammar tok (c1 || c2) a
|
||||
Grammar state tok c1 () ->
|
||||
inf c1 (Grammar state tok c2 a) ->
|
||||
Grammar state tok (c1 || c2) a
|
||||
(>>) {c1 = False} = ThenEmpty
|
||||
(>>) {c1 = True} = ThenEat
|
||||
|
||||
||| Sequence two grammars. If either consumes some input, the sequence is
|
||||
||| guaranteed to consume input. This is an explicitly non-infinite version
|
||||
||| of `>>=`.
|
||||
export
|
||||
seq : {c1, c2 : _} -> Grammar tok c1 a ->
|
||||
(a -> Grammar tok c2 b) ->
|
||||
Grammar tok (c1 || c2) b
|
||||
export %inline
|
||||
seq : {c1,c2 : Bool} ->
|
||||
Grammar state tok c1 a ->
|
||||
(a -> Grammar state tok c2 b) ->
|
||||
Grammar state tok (c1 || c2) b
|
||||
seq = SeqEmpty
|
||||
|
||||
||| Sequence a grammar followed by the grammar it returns.
|
||||
export
|
||||
join : {c1, c2 : Bool} ->
|
||||
Grammar tok c1 (Grammar tok c2 a) ->
|
||||
Grammar tok (c1 || c2) a
|
||||
export %inline
|
||||
join : {c1,c2 : Bool} ->
|
||||
Grammar state tok c1 (Grammar state tok c2 a) ->
|
||||
Grammar state tok (c1 || c2) a
|
||||
join {c1 = False} p = SeqEmpty p id
|
||||
join {c1 = True} p = SeqEat p id
|
||||
|
||||
||| Give two alternative grammars. If both consume, the combination is
|
||||
||| guaranteed to consume.
|
||||
export
|
||||
(<|>) : {c1, c2 : _} ->
|
||||
Grammar tok c1 ty ->
|
||||
Grammar tok c2 ty ->
|
||||
Grammar tok (c1 && c2) ty
|
||||
(<|>) = Alt
|
||||
|
||||
||| Allows the result of a grammar to be mapped to a different value.
|
||||
export
|
||||
{c : _} -> Functor (Grammar tok c) where
|
||||
{c : _} ->
|
||||
Functor (Grammar state tok c) where
|
||||
map f (Empty val) = Empty (f val)
|
||||
map f (Fail fatal msg) = Fail fatal msg
|
||||
map f (Fail bd fatal msg) = Fail bd fatal msg
|
||||
map f (Try g) = Try (map f g)
|
||||
map f (MustWork g) = MustWork (map f g)
|
||||
map f (Terminal msg g) = Terminal msg (\t => map f (g t))
|
||||
map f (Terminal msg g) = Terminal msg (map f . g)
|
||||
map f (Alt x y) = Alt (map f x) (map f y)
|
||||
map f (SeqEat act next)
|
||||
= SeqEat act (\val => map f (next val))
|
||||
map f (SeqEmpty act next)
|
||||
= SeqEmpty act (\val => map f (next val))
|
||||
= SeqEmpty act (\ val => map f (next val))
|
||||
map f (ThenEat act next)
|
||||
= ThenEat act (map f next)
|
||||
map f (ThenEmpty act next)
|
||||
= ThenEmpty act (map f next)
|
||||
map {c} f (Bounds act)
|
||||
= rewrite sym $ orFalseNeutral c in
|
||||
SeqEmpty (Bounds act) (Empty . f) -- Bounds (map f act)
|
||||
-- The remaining constructors (NextIs, EOF, Commit) have a fixed type,
|
||||
-- so a sequence must be used.
|
||||
map {c = False} f p = SeqEmpty p (Empty . f)
|
||||
|
||||
||| Give two alternative grammars. If both consume, the combination is
|
||||
||| guaranteed to consume.
|
||||
export %inline
|
||||
(<|>) : {c1,c2 : Bool} ->
|
||||
Grammar state tok c1 ty ->
|
||||
Lazy (Grammar state tok c2 ty) ->
|
||||
Grammar state tok (c1 && c2) ty
|
||||
(<|>) = Alt
|
||||
|
||||
infixr 2 <||>
|
||||
||| Take the tagged disjunction of two grammars. If both consume, the
|
||||
||| combination is guaranteed to consume.
|
||||
export
|
||||
(<||>) : {c1,c2 : Bool} ->
|
||||
Grammar state tok c1 a ->
|
||||
Lazy (Grammar state tok c2 b) ->
|
||||
Grammar state tok (c1 && c2) (Either a b)
|
||||
(<||>) p q = (Left <$> p) <|> (Right <$> q)
|
||||
|
||||
||| Sequence a grammar with value type `a -> b` and a grammar
|
||||
||| with value type `a`. If both succeed, apply the function
|
||||
||| from the first grammar to the value from the second grammar.
|
||||
||| Guaranteed to consume if either grammar consumes.
|
||||
export
|
||||
(<*>) : {c1, c2 : _} ->
|
||||
Grammar tok c1 (a -> b) ->
|
||||
Inf (Grammar tok c2 a) ->
|
||||
Grammar tok (c1 || c2) b
|
||||
(<*>) {c1 = False} x y = SeqEmpty x (\f => map f y)
|
||||
(<*>) {c1 = True } x y = SeqEmpty x (\f => map f (Force y))
|
||||
export %inline
|
||||
(<*>) : {c1, c2 : Bool} ->
|
||||
Grammar state tok c1 (a -> b) ->
|
||||
Grammar state tok c2 a ->
|
||||
Grammar state tok (c1 || c2) b
|
||||
(<*>) x y = SeqEmpty x (\f => map f y)
|
||||
|
||||
||| Sequence two grammars. If both succeed, use the value of the first one.
|
||||
||| Guaranteed to consume if either grammar consumes.
|
||||
export
|
||||
(<*) : {c1, c2 : _} ->
|
||||
Grammar tok c1 a ->
|
||||
Inf (Grammar tok c2 b) ->
|
||||
Grammar tok (c1 || c2) a
|
||||
export %inline
|
||||
(<*) : {c1,c2 : Bool} ->
|
||||
Grammar state tok c1 a ->
|
||||
Grammar state tok c2 b ->
|
||||
Grammar state tok (c1 || c2) a
|
||||
(<*) x y = map const x <*> y
|
||||
|
||||
||| Sequence two grammars. If both succeed, use the value of the second one.
|
||||
||| Guaranteed to consume if either grammar consumes.
|
||||
export
|
||||
(*>) : {c1, c2 : _} ->
|
||||
Grammar tok c1 a ->
|
||||
Inf (Grammar tok c2 b) ->
|
||||
Grammar tok (c1 || c2) b
|
||||
export %inline
|
||||
(*>) : {c1,c2 : Bool} ->
|
||||
Grammar state tok c1 a ->
|
||||
Grammar state tok c2 b ->
|
||||
Grammar state tok (c1 || c2) b
|
||||
(*>) x y = map (const id) x <*> y
|
||||
|
||||
export %inline
|
||||
act : state -> Grammar state tok False ()
|
||||
act = Act
|
||||
|
||||
||| Produce a grammar that can parse a different type of token by providing a
|
||||
||| function converting the new token type into the original one.
|
||||
export
|
||||
mapToken : (a -> b) -> Grammar b c ty -> Grammar a c ty
|
||||
mapToken : (a -> b) -> Grammar state b c ty -> Grammar state a c ty
|
||||
mapToken f (Empty val) = Empty val
|
||||
mapToken f (Terminal msg g) = Terminal msg (g . f)
|
||||
mapToken f (NextIs msg g) = SeqEmpty (NextIs msg (g . f)) (Empty . f)
|
||||
mapToken f EOF = EOF
|
||||
mapToken f (Fail fatal msg) = Fail fatal msg
|
||||
mapToken f (Fail bd fatal msg) = Fail bd fatal msg
|
||||
mapToken f (Try g) = Try (mapToken f g)
|
||||
mapToken f (MustWork g) = MustWork (mapToken f g)
|
||||
mapToken f Commit = Commit
|
||||
mapToken f (SeqEat act next)
|
||||
= SeqEat (mapToken f act) (\x => mapToken f (next x))
|
||||
= SeqEat (mapToken f act) (\x => mapToken f (next x))
|
||||
mapToken f (SeqEmpty act next)
|
||||
= SeqEmpty (mapToken f act) (\x => mapToken f (next x))
|
||||
= SeqEmpty (mapToken f act) (\x => mapToken f (next x))
|
||||
mapToken f (ThenEat act next)
|
||||
= ThenEat (mapToken f act) (mapToken f next)
|
||||
= ThenEat (mapToken f act) (mapToken f next)
|
||||
mapToken f (ThenEmpty act next)
|
||||
= ThenEmpty (mapToken f act) (mapToken f next)
|
||||
= ThenEmpty (mapToken f act) (mapToken f next)
|
||||
mapToken f (Alt x y) = Alt (mapToken f x) (mapToken f y)
|
||||
mapToken f (Bounds act) = Bounds (mapToken f act)
|
||||
mapToken f Position = Position
|
||||
mapToken f (Act action) = Act action
|
||||
|
||||
||| Always succeed with the given value.
|
||||
export
|
||||
pure : (val : ty) -> Grammar tok False ty
|
||||
export %inline
|
||||
pure : (val : ty) -> Grammar state tok False ty
|
||||
pure = Empty
|
||||
|
||||
||| Check whether the next token satisfies a predicate
|
||||
export
|
||||
nextIs : String -> (tok -> Bool) -> Grammar tok False tok
|
||||
export %inline
|
||||
nextIs : String -> (tok -> Bool) -> Grammar state tok False tok
|
||||
nextIs = NextIs
|
||||
|
||||
||| Look at the next token in the input
|
||||
export
|
||||
peek : Grammar tok False tok
|
||||
export %inline
|
||||
peek : Grammar state tok False tok
|
||||
peek = nextIs "Unrecognised token" (const True)
|
||||
|
||||
||| Succeeds if running the predicate on the next token returns Just x,
|
||||
||| returning x. Otherwise fails.
|
||||
export
|
||||
terminal : String -> (tok -> Maybe a) -> Grammar tok True a
|
||||
export %inline
|
||||
terminal : String -> (tok -> Maybe a) -> Grammar state tok True a
|
||||
terminal = Terminal
|
||||
|
||||
||| Always fail with a message
|
||||
export
|
||||
fail : String -> Grammar tok c ty
|
||||
fail = Fail False
|
||||
export %inline
|
||||
fail : String -> Grammar state tok c ty
|
||||
fail = Fail Nothing False
|
||||
|
||||
export
|
||||
fatalError : String -> Grammar tok c ty
|
||||
fatalError = Fail True
|
||||
||| Always fail with a message and a location
|
||||
export %inline
|
||||
failLoc : Bounds -> String -> Grammar state tok c ty
|
||||
failLoc b = Fail (Just b) False
|
||||
|
||||
||| Fail with no possibility for recovery (i.e.
|
||||
||| no alternative parsing can succeed).
|
||||
export %inline
|
||||
fatalError : String -> Grammar state tok c ty
|
||||
fatalError = Fail Nothing True
|
||||
|
||||
||| Fail with no possibility for recovery (i.e.
|
||||
||| no alternative parsing can succeed).
|
||||
export %inline
|
||||
fatalLoc : Bounds -> String -> Grammar state tok c ty
|
||||
fatalLoc b = Fail (Just b) True
|
||||
|
||||
||| Catch a fatal error
|
||||
export %inline
|
||||
try : Grammar state tok c ty -> Grammar state tok c ty
|
||||
try = Try
|
||||
|
||||
||| Succeed if the input is empty
|
||||
export
|
||||
eof : Grammar tok False ()
|
||||
export %inline
|
||||
eof : Grammar state tok False ()
|
||||
eof = EOF
|
||||
|
||||
||| Commit to an alternative; if the current branch of an alternative
|
||||
||| fails to parse, no more branches will be tried
|
||||
export
|
||||
commit : Grammar tok False ()
|
||||
export %inline
|
||||
commit : Grammar state tok False ()
|
||||
commit = Commit
|
||||
|
||||
||| If the parser fails, treat it as a fatal error
|
||||
export
|
||||
mustWork : Grammar tok c ty -> Grammar tok c ty
|
||||
export %inline
|
||||
mustWork : {c : Bool} -> Grammar state tok c ty -> Grammar state tok c ty
|
||||
mustWork = MustWork
|
||||
|
||||
data ParseResult : List tok -> (consumes : Bool) -> Type -> Type where
|
||||
Failure : {xs : List tok} ->
|
||||
(committed : Bool) -> (fatal : Bool) ->
|
||||
(err : String) -> (rest : List tok) -> ParseResult xs c ty
|
||||
EmptyRes : (committed : Bool) ->
|
||||
(val : ty) -> (more : List tok) -> ParseResult more False ty
|
||||
NonEmptyRes : {xs : List tok} ->
|
||||
(committed : Bool) ->
|
||||
(val : ty) -> (more : List tok) ->
|
||||
ParseResult (x :: xs ++ more) c ty
|
||||
export %inline
|
||||
bounds : Grammar state tok c ty -> Grammar state tok c (WithBounds ty)
|
||||
bounds = Bounds
|
||||
|
||||
-- Take the result of an alternative branch, reset the commit flag to
|
||||
-- the commit flag from the outer alternative, and weaken the 'consumes'
|
||||
-- flag to take both alternatives into account
|
||||
weakenRes : {whatever, c : Bool} -> {xs : List tok} ->
|
||||
(com' : Bool) ->
|
||||
ParseResult xs c ty -> ParseResult xs (whatever && c) ty
|
||||
weakenRes com' (Failure com fatal msg ts) = Failure com' fatal msg ts
|
||||
weakenRes {whatever=True} com' (EmptyRes com val xs) = EmptyRes com' val xs
|
||||
weakenRes {whatever=False} com' (EmptyRes com val xs) = EmptyRes com' val xs
|
||||
weakenRes com' (NonEmptyRes {xs} com val more) = NonEmptyRes {xs} com' val more
|
||||
|
||||
doParse : (commit : Bool) ->
|
||||
(act : Grammar tok c ty) ->
|
||||
(xs : List tok) ->
|
||||
ParseResult xs c ty
|
||||
-- doParse com xs act with (sizeAccessible xs)
|
||||
doParse com (Empty val) xs = EmptyRes com val xs
|
||||
doParse com (Fail fatal str) [] = Failure com fatal str []
|
||||
doParse com (Fail fatal str) (x :: xs) = Failure com fatal str (x :: xs)
|
||||
doParse com Commit xs = EmptyRes True () xs
|
||||
doParse com (MustWork g) xs =
|
||||
let p' = doParse com g xs in
|
||||
case p' of
|
||||
Failure com' _ msg ts => Failure com' True msg ts
|
||||
res => res
|
||||
doParse com (Terminal err f) [] = Failure com False "End of input" []
|
||||
doParse com (Terminal err f) (x :: xs)
|
||||
= maybe
|
||||
(Failure com False err (x :: xs))
|
||||
(\a => NonEmptyRes com {xs=[]} a xs)
|
||||
(f x)
|
||||
doParse com EOF [] = EmptyRes com () []
|
||||
doParse com EOF (x :: xs)
|
||||
= Failure com False "Expected end of input" (x :: xs)
|
||||
doParse com (NextIs err f) [] = Failure com False "End of input" []
|
||||
doParse com (NextIs err f) (x :: xs)
|
||||
= if f x
|
||||
then EmptyRes com x (x :: xs)
|
||||
else Failure com False err (x :: xs)
|
||||
doParse com (Alt {c1} {c2} x y) xs
|
||||
= let p' = doParse False x xs in
|
||||
case p' of
|
||||
Failure com' fatal msg ts
|
||||
=> if com' || fatal
|
||||
-- If the alternative had committed, don't try the
|
||||
-- other branch (and reset commit flag)
|
||||
then Failure com fatal msg ts
|
||||
else weakenRes {whatever = c1} com (doParse False y xs)
|
||||
-- Successfully parsed the first option, so use the outer commit flag
|
||||
EmptyRes _ val xs => EmptyRes com val xs
|
||||
NonEmptyRes {xs=xs'} _ val more => NonEmptyRes {xs=xs'} com val more
|
||||
doParse com (SeqEmpty {c1} {c2} act next) xs
|
||||
= let p' = assert_total (doParse {c = c1} com act xs) in
|
||||
case p' of
|
||||
Failure com fatal msg ts => Failure com fatal msg ts
|
||||
EmptyRes com val xs => assert_total (doParse com (next val) xs)
|
||||
NonEmptyRes {x} {xs=ys} com val more =>
|
||||
case (assert_total (doParse com (next val) more)) of
|
||||
Failure com' fatal msg ts => Failure com' fatal msg ts
|
||||
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
|
||||
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
|
||||
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
|
||||
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
|
||||
doParse com (SeqEat act next) xs with (doParse com act xs)
|
||||
doParse com (SeqEat act next) xs | Failure com' fatal msg ts
|
||||
= Failure com' fatal msg ts
|
||||
doParse com (SeqEat act next) (x :: (ys ++ more)) | (NonEmptyRes {xs=ys} com' val more)
|
||||
= let p' = assert_total (doParse com' (next val) more) in
|
||||
case p' of
|
||||
Failure com' fatal msg ts => Failure com' fatal msg ts
|
||||
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
|
||||
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
|
||||
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
|
||||
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
|
||||
doParse com (ThenEmpty {c1} {c2} act next) xs
|
||||
= let p' = assert_total (doParse {c = c1} com act xs) in
|
||||
case p' of
|
||||
Failure com fatal msg ts => Failure com fatal msg ts
|
||||
EmptyRes com val xs => assert_total (doParse com next xs)
|
||||
NonEmptyRes {x} {xs=ys} com val more =>
|
||||
case (assert_total (doParse com next more)) of
|
||||
Failure com' fatal msg ts => Failure com' fatal msg ts
|
||||
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
|
||||
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
|
||||
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
|
||||
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
|
||||
doParse com (ThenEat act next) xs with (doParse com act xs)
|
||||
doParse com (ThenEat act next) xs | Failure com' fatal msg ts
|
||||
= Failure com' fatal msg ts
|
||||
doParse com (ThenEat act next) (x :: (ys ++ more)) | (NonEmptyRes {xs=ys} com' val more)
|
||||
= let p' = assert_total (doParse com' next more) in
|
||||
case p' of
|
||||
Failure com' fatal msg ts => Failure com' fatal msg ts
|
||||
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
|
||||
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
|
||||
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
|
||||
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
|
||||
|
||||
-- This next line is not strictly necessary, but it stops the coverage
|
||||
-- checker taking a really long time and eating lots of memory...
|
||||
-- doParse _ _ _ = Failure True True "Help the coverage checker!" []
|
||||
export %inline
|
||||
position : Grammar state tok False Bounds
|
||||
position = Position
|
||||
|
||||
public export
|
||||
data ParseError tok = Error String (List tok)
|
||||
data ParsingError tok = Error String (Maybe Bounds)
|
||||
|
||||
data ParseResult : Type -> Type -> Type -> Type where
|
||||
Failure : (committed : Bool) -> (fatal : Bool) ->
|
||||
List1 (ParsingError tok) -> ParseResult state tok ty
|
||||
Res : state -> (committed : Bool) ->
|
||||
(val : WithBounds ty) -> (more : List (WithBounds tok)) -> ParseResult state tok ty
|
||||
|
||||
mergeWith : WithBounds ty -> ParseResult state tok sy -> ParseResult state tok sy
|
||||
mergeWith x (Res s committed val more) = Res s committed (mergeBounds x val) more
|
||||
mergeWith x v = v
|
||||
|
||||
doParse : Semigroup state => state -> (commit : Bool) ->
|
||||
(act : Grammar state tok c ty) ->
|
||||
(xs : List (WithBounds tok)) ->
|
||||
ParseResult state tok ty
|
||||
doParse s com (Empty val) xs = Res s com (irrelevantBounds val) xs
|
||||
doParse s com (Fail location fatal str) xs
|
||||
= Failure com fatal (Error str (location <|> (bounds <$> head' xs)) ::: Nil)
|
||||
doParse s com (Try g) xs = case doParse s com g xs of
|
||||
-- recover from fatal match but still propagate the 'commit'
|
||||
Failure com _ errs => Failure com False errs
|
||||
res => res
|
||||
doParse s com Commit xs = Res s True (irrelevantBounds ()) xs
|
||||
doParse s com (MustWork g) xs =
|
||||
case assert_total (doParse s com g xs) of
|
||||
Failure com' _ errs => Failure com' True errs
|
||||
res => res
|
||||
doParse s com (Terminal err f) [] = Failure com False (Error "End of input" Nothing ::: Nil)
|
||||
doParse s com (Terminal err f) (x :: xs) =
|
||||
case f x.val of
|
||||
Nothing => Failure com False (Error err (Just x.bounds) ::: Nil)
|
||||
Just a => Res s com (const a <$> x) xs
|
||||
doParse s com EOF [] = Res s com (irrelevantBounds ()) []
|
||||
doParse s com EOF (x :: xs) = Failure com False (Error "Expected end of input" (Just x.bounds) ::: Nil)
|
||||
doParse s com (NextIs err f) [] = Failure com False (Error "End of input" Nothing ::: Nil)
|
||||
doParse s com (NextIs err f) (x :: xs)
|
||||
= if f x.val
|
||||
then Res s com (removeIrrelevance x) (x :: xs)
|
||||
else Failure com False (Error err (Just x.bounds) ::: Nil)
|
||||
doParse s com (Alt {c1} {c2} x y) xs
|
||||
= case doParse s False x xs of
|
||||
Failure com' fatal errs
|
||||
=> if com' || fatal
|
||||
-- If the alternative had committed, don't try the
|
||||
-- other branch (and reset commit flag)
|
||||
then Failure com fatal errs
|
||||
else case (assert_total doParse s False y xs) of
|
||||
(Failure com'' fatal' errs') => if com'' || fatal'
|
||||
-- Only add the errors together if the second branch
|
||||
-- is also non-committed and non-fatal.
|
||||
then Failure com'' fatal' errs'
|
||||
else Failure False False (errs ++ errs')
|
||||
(Res s _ val xs) => Res s com val xs
|
||||
-- Successfully parsed the first option, so use the outer commit flag
|
||||
Res s _ val xs => Res s com val xs
|
||||
doParse s com (SeqEmpty act next) xs
|
||||
= case assert_total (doParse s com act xs) of
|
||||
Failure com fatal errs => Failure com fatal errs
|
||||
Res s com v xs =>
|
||||
mergeWith v (assert_total $ doParse s com (next v.val) xs)
|
||||
doParse s com (SeqEat act next) xs
|
||||
= case assert_total (doParse s com act xs) of
|
||||
Failure com fatal errs => Failure com fatal errs
|
||||
Res s com v xs =>
|
||||
mergeWith v (assert_total $ doParse s com (next v.val) xs)
|
||||
doParse s com (ThenEmpty act next) xs
|
||||
= case assert_total (doParse s com act xs) of
|
||||
Failure com fatal errs => Failure com fatal errs
|
||||
Res s com v xs =>
|
||||
mergeWith v (assert_total $ doParse s com next xs)
|
||||
doParse s com (ThenEat act next) xs
|
||||
= case assert_total (doParse s com act xs) of
|
||||
Failure com fatal errs => Failure com fatal errs
|
||||
Res s com v xs =>
|
||||
mergeWith v (assert_total $ doParse s com next xs)
|
||||
doParse s com (Bounds act) xs
|
||||
= case assert_total (doParse s com act xs) of
|
||||
Failure com fatal errs => Failure com fatal errs
|
||||
Res s com v xs => Res s com (const v <$> v) xs
|
||||
doParse s com Position [] = Failure com False (Error "End of input" Nothing ::: Nil)
|
||||
doParse s com Position (x :: xs)
|
||||
= Res s com (irrelevantBounds x.bounds) (x :: xs)
|
||||
doParse s com (Act action) xs = Res (s <+> action) com (irrelevantBounds ()) xs
|
||||
|
||||
||| Parse a list of tokens according to the given grammar. If successful,
|
||||
||| returns a pair of the parse result and the unparsed tokens (the remaining
|
||||
||| input).
|
||||
export
|
||||
parse : {c : _} -> (act : Grammar tok c ty) -> (xs : List tok) ->
|
||||
Either (ParseError tok) (ty, List tok)
|
||||
parse : {c : Bool} -> (act : Grammar () tok c ty) -> (xs : List (WithBounds tok)) ->
|
||||
Either (List1 (ParsingError tok)) (ty, List (WithBounds tok))
|
||||
parse act xs
|
||||
= case doParse False act xs of
|
||||
Failure _ _ msg ts => Left (Error msg ts)
|
||||
EmptyRes _ val rest => pure (val, rest)
|
||||
NonEmptyRes _ val rest => pure (val, rest)
|
||||
= case doParse neutral False act xs of
|
||||
Failure _ _ errs => Left errs
|
||||
Res _ _ v rest => Right (v.val, rest)
|
||||
|
||||
export
|
||||
parseWith : Monoid state => {c : Bool} -> (act : Grammar state tok c ty) -> (xs : List (WithBounds tok)) ->
|
||||
Either (List1 (ParsingError tok)) (state, ty, List (WithBounds tok))
|
||||
parseWith act xs
|
||||
= case doParse neutral False act xs of
|
||||
Failure _ _ errs => Left errs
|
||||
Res s _ v rest => Right (s, v.val, rest)
|
||||
|
@ -413,10 +413,6 @@ Pretty Char where
|
||||
export Pretty Nat where pretty = unsafeTextWithoutNewLines . show
|
||||
export Pretty Int where pretty = unsafeTextWithoutNewLines . show
|
||||
export Pretty Integer where pretty = unsafeTextWithoutNewLines . show
|
||||
export Pretty Int8 where pretty = unsafeTextWithoutNewLines . show
|
||||
export Pretty Int16 where pretty = unsafeTextWithoutNewLines . show
|
||||
export Pretty Int32 where pretty = unsafeTextWithoutNewLines . show
|
||||
export Pretty Int64 where pretty = unsafeTextWithoutNewLines . show
|
||||
export Pretty Double where pretty = unsafeTextWithoutNewLines . show
|
||||
export Pretty Bits8 where pretty = unsafeTextWithoutNewLines . show
|
||||
export Pretty Bits16 where pretty = unsafeTextWithoutNewLines . show
|
||||
|
@ -3,6 +3,8 @@ module Text.PrettyPrint.Prettyprinter.Render.HTML
|
||||
import Data.List
|
||||
import Data.String
|
||||
|
||||
%default covering
|
||||
|
||||
export
|
||||
htmlEscape : String -> String
|
||||
htmlEscape s = fastAppend $ reverse $ go [] s
|
||||
|
@ -6,7 +6,7 @@ import public Control.ANSI
|
||||
import Control.Monad.ST
|
||||
import Text.PrettyPrint.Prettyprinter.Doc
|
||||
|
||||
%default total
|
||||
%default covering
|
||||
|
||||
public export
|
||||
AnsiStyle : Type
|
||||
|
@ -68,6 +68,7 @@ sdocToTreeParser (SLine i rest) = case sdocToTreeParser rest of
|
||||
(Just tree, rest') => (Just $ STConcat [STLine i, tree], rest')
|
||||
(Nothing, rest') => (Just $ STLine i, rest')
|
||||
sdocToTreeParser (SAnnPush ann rest) = case sdocToTreeParser rest of
|
||||
(tree, Nothing) => (Nothing, Nothing)
|
||||
(Just tree, Nothing) => (Just $ STAnn ann tree, Nothing)
|
||||
(Just tree, Just rest') => case sdocToTreeParser rest' of
|
||||
(Just tree', rest'') => (Just $ STConcat [STAnn ann tree, tree'], rest'')
|
||||
|
@ -101,8 +101,10 @@ angles : Doc ann -> Doc ann
|
||||
angles = enclose langle rangle
|
||||
|
||||
export
|
||||
brackets : Doc ann -> Doc ann
|
||||
brackets = enclose lbracket rbracket
|
||||
brackets : {default lbracket ldelim : Doc ann} ->
|
||||
{default rbracket rdelim : Doc ann} ->
|
||||
Doc ann -> Doc ann
|
||||
brackets {ldelim, rdelim} = enclose ldelim rdelim
|
||||
|
||||
export
|
||||
braces : Doc ann -> Doc ann
|
||||
|
@ -1,5 +1,7 @@
|
||||
module Text.Quantity
|
||||
|
||||
%default total
|
||||
|
||||
public export
|
||||
record Quantity where
|
||||
constructor Qty
|
||||
|
@ -1,5 +1,7 @@
|
||||
module Text.Token
|
||||
|
||||
%default total
|
||||
|
||||
||| For a type `kind`, specify a way of converting the recognised
|
||||
||| string into a value.
|
||||
|||
|
||||
|
@ -159,17 +159,20 @@ modules = Control.ANSI,
|
||||
System.Random,
|
||||
System.Path,
|
||||
|
||||
Text.Token,
|
||||
Text.Quantity,
|
||||
Text.Parser,
|
||||
Text.Bounded,
|
||||
Text.Lexer,
|
||||
Text.Parser.Core,
|
||||
Text.Lexer.Core,
|
||||
Text.Lexer.Tokenizer,
|
||||
Text.Literate,
|
||||
Text.Parser,
|
||||
Text.Parser.Core,
|
||||
Text.PrettyPrint.Prettyprinter,
|
||||
Text.PrettyPrint.Prettyprinter.Doc,
|
||||
Text.PrettyPrint.Prettyprinter.Symbols,
|
||||
Text.PrettyPrint.Prettyprinter.Util,
|
||||
Text.PrettyPrint.Prettyprinter.SimpleDocTree,
|
||||
Text.PrettyPrint.Prettyprinter.Render.HTML,
|
||||
Text.PrettyPrint.Prettyprinter.Render.String,
|
||||
Text.PrettyPrint.Prettyprinter.Render.Terminal
|
||||
Text.PrettyPrint.Prettyprinter.Render.Terminal,
|
||||
Text.PrettyPrint.Prettyprinter.SimpleDocTree,
|
||||
Text.PrettyPrint.Prettyprinter.Symbols,
|
||||
Text.PrettyPrint.Prettyprinter.Util,
|
||||
Text.Quantity,
|
||||
Text.Token
|
||||
|
Loading…
Reference in New Issue
Block a user