mirror of
https://github.com/idris-lang/Idris2.git
synced 2025-01-07 00:07:19 +03:00
Update contrib Text.Parser to match Library.Text.Parser (#1808)
Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org>
This commit is contained in:
parent
ca95781a74
commit
f3855d7100
@ -7,7 +7,6 @@ import Data.String
|
|||||||
|
|
||||||
%default total
|
%default total
|
||||||
|
|
||||||
|
|
||||||
infixl 5 +>
|
infixl 5 +>
|
||||||
infixr 5 <+
|
infixr 5 <+
|
||||||
|
|
||||||
@ -94,20 +93,14 @@ public export
|
|||||||
indentLines : (n : Nat) -> String -> String
|
indentLines : (n : Nat) -> String -> String
|
||||||
indentLines n str = unlines $ map (indent n) $ lines str
|
indentLines n str = unlines $ map (indent n) $ lines str
|
||||||
|
|
||||||
||| Return a string of the given character repeated
|
|
||||||
||| `n` times.
|
|
||||||
export
|
|
||||||
fastReplicate : (n : Nat) -> Char -> String
|
|
||||||
fastReplicate n c = fastPack $ replicate n c
|
|
||||||
|
|
||||||
||| Left-justify a string to the given length, using the
|
||| Left-justify a string to the given length, using the
|
||||||
||| specified fill character on the right.
|
||| specified fill character on the right.
|
||||||
export
|
export
|
||||||
justifyLeft : Nat -> Char -> String -> String
|
justifyLeft : Nat -> Char -> String -> String
|
||||||
justifyLeft n c s = s ++ fastReplicate (n `minus` length s) c
|
justifyLeft n c s = s ++ replicate (n `minus` length s) c
|
||||||
|
|
||||||
||| Right-justify a string to the given length, using the
|
||| Right-justify a string to the given length, using the
|
||||||
||| specified fill character on the left.
|
||| specified fill character on the left.
|
||||||
export
|
export
|
||||||
justifyRight : Nat -> Char -> String -> String
|
justifyRight : Nat -> Char -> String -> String
|
||||||
justifyRight n c s = fastReplicate (n `minus` length s) c ++ s
|
justifyRight n c s = replicate (n `minus` length s) c ++ s
|
||||||
|
@ -6,6 +6,8 @@ import Language.JSON.Parser
|
|||||||
|
|
||||||
import public Language.JSON.Data
|
import public Language.JSON.Data
|
||||||
|
|
||||||
|
import public Text.Bounded
|
||||||
|
|
||||||
%default total
|
%default total
|
||||||
|
|
||||||
||| Parse a JSON string.
|
||| Parse a JSON string.
|
||||||
|
@ -34,8 +34,8 @@ jsonTokenMap = toTokenMap $
|
|||||||
]
|
]
|
||||||
|
|
||||||
export
|
export
|
||||||
lexJSON : String -> Maybe (List JSONToken)
|
lexJSON : String -> Maybe (List (WithBounds JSONToken))
|
||||||
lexJSON str
|
lexJSON str
|
||||||
= case lex jsonTokenMap str of
|
= case lex jsonTokenMap str of
|
||||||
(tokens, _, _, "") => Just $ map TokenData.tok tokens
|
(tokens, _, _, "") => Just $ tokens
|
||||||
_ => Nothing
|
_ => Nothing
|
||||||
|
@ -10,20 +10,20 @@ import public Language.JSON.Tokens
|
|||||||
%default total
|
%default total
|
||||||
|
|
||||||
private
|
private
|
||||||
punct : Punctuation -> Grammar JSONToken True ()
|
punct : Punctuation -> Grammar state JSONToken True ()
|
||||||
punct p = match $ JTPunct p
|
punct p = match $ JTPunct p
|
||||||
|
|
||||||
private
|
private
|
||||||
rawString : Grammar JSONToken True String
|
rawString : Grammar state JSONToken True String
|
||||||
rawString = do mstr <- match JTString
|
rawString = do mstr <- match JTString
|
||||||
the (Grammar _ False _) $
|
the (Grammar _ _ False _) $
|
||||||
case mstr of
|
case mstr of
|
||||||
Just str => pure str
|
Just str => pure str
|
||||||
Nothing => fail "invalid string"
|
Nothing => fail "invalid string"
|
||||||
|
|
||||||
mutual
|
mutual
|
||||||
private
|
private
|
||||||
json : Grammar JSONToken True JSON
|
json : Grammar state JSONToken True JSON
|
||||||
json = object
|
json = object
|
||||||
<|> array
|
<|> array
|
||||||
<|> string
|
<|> string
|
||||||
@ -32,14 +32,14 @@ mutual
|
|||||||
<|> null
|
<|> null
|
||||||
|
|
||||||
private
|
private
|
||||||
object : Grammar JSONToken True JSON
|
object : Grammar state JSONToken True JSON
|
||||||
object = do punct $ Curly Open
|
object = do punct $ Curly Open
|
||||||
commit
|
commit
|
||||||
props <- properties
|
props <- properties
|
||||||
punct $ Curly Close
|
punct $ Curly Close
|
||||||
pure $ JObject props
|
pure $ JObject props
|
||||||
where
|
where
|
||||||
properties : Grammar JSONToken False (List (String, JSON))
|
properties : Grammar state JSONToken False (List (String, JSON))
|
||||||
properties = sepBy (punct Comma) $
|
properties = sepBy (punct Comma) $
|
||||||
do key <- rawString
|
do key <- rawString
|
||||||
punct Colon
|
punct Colon
|
||||||
@ -47,34 +47,34 @@ mutual
|
|||||||
pure (key, value)
|
pure (key, value)
|
||||||
|
|
||||||
private
|
private
|
||||||
array : Grammar JSONToken True JSON
|
array : Grammar state JSONToken True JSON
|
||||||
array = do punct (Square Open)
|
array = do punct (Square Open)
|
||||||
commit
|
commit
|
||||||
vals <- values
|
vals <- values
|
||||||
punct (Square Close)
|
punct (Square Close)
|
||||||
pure (JArray vals)
|
pure (JArray vals)
|
||||||
where
|
where
|
||||||
values : Grammar JSONToken False (List JSON)
|
values : Grammar state JSONToken False (List JSON)
|
||||||
values = sepBy (punct Comma) json
|
values = sepBy (punct Comma) json
|
||||||
|
|
||||||
private
|
private
|
||||||
string : Grammar JSONToken True JSON
|
string : Grammar state JSONToken True JSON
|
||||||
string = map JString rawString
|
string = map JString rawString
|
||||||
|
|
||||||
private
|
private
|
||||||
boolean : Grammar JSONToken True JSON
|
boolean : Grammar state JSONToken True JSON
|
||||||
boolean = map JBoolean $ match JTBoolean
|
boolean = map JBoolean $ match JTBoolean
|
||||||
|
|
||||||
private
|
private
|
||||||
number : Grammar JSONToken True JSON
|
number : Grammar state JSONToken True JSON
|
||||||
number = map JNumber $ match JTNumber
|
number = map JNumber $ match JTNumber
|
||||||
|
|
||||||
private
|
private
|
||||||
null : Grammar JSONToken True JSON
|
null : Grammar state JSONToken True JSON
|
||||||
null = map (const JNull) $ match JTNull
|
null = map (const JNull) $ match JTNull
|
||||||
|
|
||||||
export
|
export
|
||||||
parseJSON : List JSONToken -> Maybe JSON
|
parseJSON : List (WithBounds JSONToken) -> Maybe JSON
|
||||||
parseJSON toks = case parse json $ filter (not . ignored) toks of
|
parseJSON toks = case parse json $ filter (not . ignored) toks of
|
||||||
Right (j, []) => Just j
|
Right (j, []) => Just j
|
||||||
_ => Nothing
|
_ => Nothing
|
||||||
|
@ -12,7 +12,7 @@ quo = is '"'
|
|||||||
|
|
||||||
export
|
export
|
||||||
esc : Lexer -> Lexer
|
esc : Lexer -> Lexer
|
||||||
esc = escape '\\'
|
esc = escape (is '\\')
|
||||||
|
|
||||||
private
|
private
|
||||||
unicodeEscape : Lexer
|
unicodeEscape : Lexer
|
||||||
@ -36,7 +36,7 @@ jsonStringTokenMap = toTokenMap $
|
|||||||
]
|
]
|
||||||
|
|
||||||
export
|
export
|
||||||
lexString : String -> Maybe (List JSONStringToken)
|
lexString : String -> Maybe (List (WithBounds JSONStringToken))
|
||||||
lexString x = case lex jsonStringTokenMap x of
|
lexString x = case lex jsonStringTokenMap x of
|
||||||
(toks, _, _, "") => Just $ map TokenData.tok toks
|
(toks, _, _, "") => Just $ toks
|
||||||
_ => Nothing
|
_ => Nothing
|
||||||
|
@ -7,20 +7,20 @@ import Text.Parser
|
|||||||
%default total
|
%default total
|
||||||
|
|
||||||
private
|
private
|
||||||
stringChar : Grammar JSONStringToken True Char
|
stringChar : Grammar state JSONStringToken True Char
|
||||||
stringChar = match JSTChar
|
stringChar = match JSTChar
|
||||||
<|> match JSTSimpleEscape
|
<|> match JSTSimpleEscape
|
||||||
<|> match JSTUnicodeEscape
|
<|> match JSTUnicodeEscape
|
||||||
|
|
||||||
private
|
private
|
||||||
quotedString : Grammar JSONStringToken True String
|
quotedString : Grammar state JSONStringToken True String
|
||||||
quotedString = let q = match JSTQuote in
|
quotedString = let q = match JSTQuote in
|
||||||
do chars <- between q q (many stringChar)
|
do chars <- between q q (many stringChar)
|
||||||
eof
|
eof
|
||||||
pure $ pack chars
|
pure $ pack chars
|
||||||
|
|
||||||
export
|
export
|
||||||
parseString : List JSONStringToken -> Maybe String
|
parseString : List (WithBounds JSONStringToken) -> Maybe String
|
||||||
parseString toks = case parse quotedString toks of
|
parseString toks = case parse quotedString toks of
|
||||||
Right (str, []) => Just str
|
Right (str, []) => Just str
|
||||||
_ => Nothing
|
_ => Nothing
|
||||||
|
@ -2,6 +2,7 @@ module Language.JSON.Tokens
|
|||||||
|
|
||||||
import Language.JSON.String
|
import Language.JSON.String
|
||||||
import Text.Token
|
import Text.Token
|
||||||
|
import Text.Bounded
|
||||||
|
|
||||||
%default total
|
%default total
|
||||||
|
|
||||||
@ -76,6 +77,6 @@ TokenKind JSONTokenKind where
|
|||||||
tokValue JTIgnore _ = ()
|
tokValue JTIgnore _ = ()
|
||||||
|
|
||||||
export
|
export
|
||||||
ignored : JSONToken -> Bool
|
ignored : WithBounds JSONToken -> Bool
|
||||||
ignored (Tok JTIgnore _) = True
|
ignored (MkBounded (Tok JTIgnore _) _ _) = True
|
||||||
ignored _ = False
|
ignored _ = False
|
||||||
|
@ -153,15 +153,15 @@ pathTokenMap = toTokenMap $
|
|||||||
, (some $ non $ oneOf "/\\:?", PTText)
|
, (some $ non $ oneOf "/\\:?", PTText)
|
||||||
]
|
]
|
||||||
|
|
||||||
lexPath : String -> List PathToken
|
lexPath : String -> List (WithBounds PathToken)
|
||||||
lexPath str =
|
lexPath str =
|
||||||
let
|
let
|
||||||
(tokens, _, _, _) = lex pathTokenMap str
|
(tokens, _, _, _) = lex pathTokenMap str
|
||||||
in
|
in
|
||||||
map TokenData.tok tokens
|
tokens -- TokenData.tok tokens
|
||||||
|
|
||||||
-- match both '/' and '\\' regardless of the platform.
|
-- match both '/' and '\\' regardless of the platform.
|
||||||
bodySeparator : Grammar PathToken True ()
|
bodySeparator : Grammar state PathToken True ()
|
||||||
bodySeparator = (match $ PTPunct '\\') <|> (match $ PTPunct '/')
|
bodySeparator = (match $ PTPunct '\\') <|> (match $ PTPunct '/')
|
||||||
|
|
||||||
-- Windows will automatically translate '/' to '\\'. And the verbatim prefix,
|
-- Windows will automatically translate '/' to '\\'. And the verbatim prefix,
|
||||||
@ -169,7 +169,7 @@ bodySeparator = (match $ PTPunct '\\') <|> (match $ PTPunct '/')
|
|||||||
-- However, we just parse it and ignore it.
|
-- However, we just parse it and ignore it.
|
||||||
--
|
--
|
||||||
-- Example: \\?\
|
-- Example: \\?\
|
||||||
verbatim : Grammar PathToken True ()
|
verbatim : Grammar state PathToken True ()
|
||||||
verbatim =
|
verbatim =
|
||||||
do
|
do
|
||||||
ignore $ count (exactly 2) $ match $ PTPunct '\\'
|
ignore $ count (exactly 2) $ match $ PTPunct '\\'
|
||||||
@ -178,7 +178,7 @@ verbatim =
|
|||||||
pure ()
|
pure ()
|
||||||
|
|
||||||
-- Example: \\server\share
|
-- Example: \\server\share
|
||||||
unc : Grammar PathToken True Volume
|
unc : Grammar state PathToken True Volume
|
||||||
unc =
|
unc =
|
||||||
do
|
do
|
||||||
ignore $ count (exactly 2) $ match $ PTPunct '\\'
|
ignore $ count (exactly 2) $ match $ PTPunct '\\'
|
||||||
@ -188,7 +188,7 @@ unc =
|
|||||||
pure $ UNC server share
|
pure $ UNC server share
|
||||||
|
|
||||||
-- Example: \\?\server\share
|
-- Example: \\?\server\share
|
||||||
verbatimUnc : Grammar PathToken True Volume
|
verbatimUnc : Grammar state PathToken True Volume
|
||||||
verbatimUnc =
|
verbatimUnc =
|
||||||
do
|
do
|
||||||
verbatim
|
verbatim
|
||||||
@ -198,7 +198,7 @@ verbatimUnc =
|
|||||||
pure $ UNC server share
|
pure $ UNC server share
|
||||||
|
|
||||||
-- Example: C:
|
-- Example: C:
|
||||||
disk : Grammar PathToken True Volume
|
disk : Grammar state PathToken True Volume
|
||||||
disk =
|
disk =
|
||||||
do
|
do
|
||||||
text <- match PTText
|
text <- match PTText
|
||||||
@ -209,21 +209,21 @@ disk =
|
|||||||
pure $ Disk (toUpper disk)
|
pure $ Disk (toUpper disk)
|
||||||
|
|
||||||
-- Example: \\?\C:
|
-- Example: \\?\C:
|
||||||
verbatimDisk : Grammar PathToken True Volume
|
verbatimDisk : Grammar state PathToken True Volume
|
||||||
verbatimDisk =
|
verbatimDisk =
|
||||||
do
|
do
|
||||||
verbatim
|
verbatim
|
||||||
disk <- disk
|
disk <- disk
|
||||||
pure disk
|
pure disk
|
||||||
|
|
||||||
parseVolume : Grammar PathToken True Volume
|
parseVolume : Grammar state PathToken True Volume
|
||||||
parseVolume =
|
parseVolume =
|
||||||
verbatimUnc
|
verbatimUnc
|
||||||
<|> verbatimDisk
|
<|> verbatimDisk
|
||||||
<|> unc
|
<|> unc
|
||||||
<|> disk
|
<|> disk
|
||||||
|
|
||||||
parseBody : Grammar PathToken True Body
|
parseBody : Grammar state PathToken True Body
|
||||||
parseBody =
|
parseBody =
|
||||||
do
|
do
|
||||||
text <- match PTText
|
text <- match PTText
|
||||||
@ -232,7 +232,7 @@ parseBody =
|
|||||||
"." => CurDir
|
"." => CurDir
|
||||||
normal => Normal normal
|
normal => Normal normal
|
||||||
|
|
||||||
parsePath : Grammar PathToken False Path
|
parsePath : Grammar state PathToken False Path
|
||||||
parsePath =
|
parsePath =
|
||||||
do
|
do
|
||||||
vol <- optional parseVolume
|
vol <- optional parseVolume
|
||||||
|
91
libs/contrib/Text/Bounded.idr
Normal file
91
libs/contrib/Text/Bounded.idr
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
module Text.Bounded
|
||||||
|
|
||||||
|
%default total
|
||||||
|
|
||||||
|
|
||||||
|
public export
|
||||||
|
record Bounds where
|
||||||
|
constructor MkBounds
|
||||||
|
startLine : Int
|
||||||
|
startCol : Int
|
||||||
|
endLine : Int
|
||||||
|
endCol : Int
|
||||||
|
|
||||||
|
Show Bounds where
|
||||||
|
showPrec d (MkBounds sl sc el ec) =
|
||||||
|
showCon d "MkBounds" (concat [showArg sl, showArg sc, showArg el, showArg ec])
|
||||||
|
|
||||||
|
export
|
||||||
|
startBounds : Bounds -> (Int, Int)
|
||||||
|
startBounds b = (b.startLine, b.startCol)
|
||||||
|
|
||||||
|
export
|
||||||
|
endBounds : Bounds -> (Int, Int)
|
||||||
|
endBounds b = (b.endLine, b.endCol)
|
||||||
|
|
||||||
|
export
|
||||||
|
Eq Bounds where
|
||||||
|
(MkBounds sl sc el ec) == (MkBounds sl' sc' el' ec') =
|
||||||
|
sl == sl'
|
||||||
|
&& sc == sc'
|
||||||
|
&& el == el'
|
||||||
|
&& ec == ec'
|
||||||
|
|
||||||
|
public export
|
||||||
|
record WithBounds ty where
|
||||||
|
constructor MkBounded
|
||||||
|
val : ty
|
||||||
|
isIrrelevant : Bool
|
||||||
|
bounds : Bounds
|
||||||
|
|
||||||
|
export
|
||||||
|
start : WithBounds ty -> (Int, Int)
|
||||||
|
start = startBounds . bounds
|
||||||
|
|
||||||
|
export
|
||||||
|
end : WithBounds ty -> (Int, Int)
|
||||||
|
end = endBounds . bounds
|
||||||
|
|
||||||
|
export
|
||||||
|
Eq ty => Eq (WithBounds ty) where
|
||||||
|
(MkBounded val ir bd) == (MkBounded val' ir' bd') =
|
||||||
|
val == val' && ir == ir' && bd == bd'
|
||||||
|
|
||||||
|
export
|
||||||
|
Show ty => Show (WithBounds ty) where
|
||||||
|
showPrec d (MkBounded val ir bd) =
|
||||||
|
showCon d "MkBounded" (concat [showArg ir, showArg val, showArg bd])
|
||||||
|
|
||||||
|
export
|
||||||
|
Functor WithBounds where
|
||||||
|
map f (MkBounded val ir bd) = MkBounded (f val) ir bd
|
||||||
|
|
||||||
|
export
|
||||||
|
Foldable WithBounds where
|
||||||
|
foldr c n b = c b.val n
|
||||||
|
|
||||||
|
export
|
||||||
|
Traversable WithBounds where
|
||||||
|
traverse f (MkBounded v a bd) = (\ v => MkBounded v a bd) <$> f v
|
||||||
|
|
||||||
|
export
|
||||||
|
irrelevantBounds : ty -> WithBounds ty
|
||||||
|
irrelevantBounds x = MkBounded x True (MkBounds (-1) (-1) (-1) (-1))
|
||||||
|
|
||||||
|
export
|
||||||
|
removeIrrelevance : WithBounds ty -> WithBounds ty
|
||||||
|
removeIrrelevance (MkBounded val ir bd) = MkBounded val True bd
|
||||||
|
|
||||||
|
export
|
||||||
|
mergeBounds : WithBounds ty -> WithBounds ty' -> WithBounds ty'
|
||||||
|
mergeBounds (MkBounded _ True _) (MkBounded val True _) = irrelevantBounds val
|
||||||
|
mergeBounds (MkBounded _ True _) b2 = b2
|
||||||
|
mergeBounds b1 (MkBounded val True _) = const val <$> b1
|
||||||
|
mergeBounds b1 b2 =
|
||||||
|
let (ur, uc) = min (start b1) (start b2)
|
||||||
|
(lr, lc) = max (end b1) (end b2) in
|
||||||
|
MkBounded b2.val False (MkBounds ur uc lr lc)
|
||||||
|
|
||||||
|
export
|
||||||
|
joinBounds : WithBounds (WithBounds ty) -> WithBounds ty
|
||||||
|
joinBounds b = mergeBounds b b.val
|
69
libs/contrib/Text/Distance/Levenshtein.idr
Normal file
69
libs/contrib/Text/Distance/Levenshtein.idr
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
module Text.Distance.Levenshtein
|
||||||
|
|
||||||
|
import Data.List
|
||||||
|
import Data.Maybe
|
||||||
|
import Data.String
|
||||||
|
import Data.IOMatrix
|
||||||
|
import Data.List.Extra
|
||||||
|
|
||||||
|
%default total
|
||||||
|
|
||||||
|
||| Self-evidently correct but O(3 ^ (min mn)) complexity
|
||||||
|
spec : String -> String -> Nat
|
||||||
|
spec a b = loop (fastUnpack a) (fastUnpack b) where
|
||||||
|
|
||||||
|
loop : List Char -> List Char -> Nat
|
||||||
|
loop [] ys = length ys -- deletions
|
||||||
|
loop xs [] = length xs -- insertions
|
||||||
|
loop (x :: xs) (y :: ys)
|
||||||
|
= if x == y then loop xs ys -- match
|
||||||
|
else 1 + minimum [ loop (x :: xs) ys -- insert y
|
||||||
|
, loop xs (y :: ys) -- delete x
|
||||||
|
, loop xs ys -- substitute y for x
|
||||||
|
]
|
||||||
|
|
||||||
|
||| Dynamic programming
|
||||||
|
export
|
||||||
|
compute : HasIO io => String -> String -> io Nat
|
||||||
|
compute a b = do
|
||||||
|
let w = strLength a
|
||||||
|
let h = strLength b
|
||||||
|
-- In mat[i][j], we store the distance between
|
||||||
|
-- * the suffix of a of size i
|
||||||
|
-- * the suffix of b of size j
|
||||||
|
-- So we need a matrix of size (|a|+1) * (|b|+1)
|
||||||
|
mat <- new (w+1) (h+1)
|
||||||
|
-- Whenever one of the two suffixes of interest is empty, the only
|
||||||
|
-- winning move is to:
|
||||||
|
-- * delete all of the first
|
||||||
|
-- * insert all of the second
|
||||||
|
-- i.e. the cost is the length of the non-zero suffix
|
||||||
|
for_ [0..w] $ \ i => write mat i 0 i -- deletions
|
||||||
|
for_ [0..h] $ \ j => write mat 0 j j -- insertions
|
||||||
|
|
||||||
|
-- We introduce a specialised `read` for ease of use
|
||||||
|
let get = \i, j => case !(read {io} mat i j) of
|
||||||
|
Nothing => assert_total $
|
||||||
|
idris_crash "INTERNAL ERROR: Badly initialised matrix"
|
||||||
|
Just n => pure n
|
||||||
|
|
||||||
|
-- We fill the matrix from the bottom up, using the same formula we used
|
||||||
|
-- in the specification's `loop`.
|
||||||
|
for_ [1..h] $ \ j => do
|
||||||
|
for_ [1..w] $ \ i => do
|
||||||
|
-- here we change Levenshtein slightly so that we may only substitute
|
||||||
|
-- alpha / numerical characters for similar ones. This avoids suggesting
|
||||||
|
-- "#" as a replacement for an out of scope "n".
|
||||||
|
let cost = let c = assert_total $ strIndex a (i-1)
|
||||||
|
d = assert_total $ strIndex b (j-1)
|
||||||
|
in if c == d then 0 else
|
||||||
|
if isAlpha c && isAlpha d then 1 else
|
||||||
|
if isDigit c && isDigit d then 1 else 2
|
||||||
|
write mat i j $
|
||||||
|
minimum [ 1 + !(get i (j-1)) -- insert y
|
||||||
|
, 1 + !(get (i-1) j) -- delete x
|
||||||
|
, cost + !(get (i-1) (j-1)) -- equal or substitute y for x
|
||||||
|
]
|
||||||
|
|
||||||
|
-- Once the matrix is fully filled, we can simply read the top right corner
|
||||||
|
integerToNat . cast <$> get w h
|
@ -8,6 +8,8 @@ import public Text.Lexer.Core
|
|||||||
import public Text.Quantity
|
import public Text.Quantity
|
||||||
import public Text.Token
|
import public Text.Token
|
||||||
|
|
||||||
|
%default total
|
||||||
|
|
||||||
export
|
export
|
||||||
toTokenMap : List (Lexer, k) -> TokenMap (Token k)
|
toTokenMap : List (Lexer, k) -> TokenMap (Token k)
|
||||||
toTokenMap = map $ \(l, kind) => (l, Tok kind)
|
toTokenMap = map $ \(l, kind) => (l, Tok kind)
|
||||||
@ -37,7 +39,7 @@ non l = reject l <+> any
|
|||||||
export
|
export
|
||||||
choiceMap : {c : Bool} ->
|
choiceMap : {c : Bool} ->
|
||||||
Foldable t => (a -> Recognise c) -> t a -> Recognise c
|
Foldable t => (a -> Recognise c) -> t a -> Recognise c
|
||||||
choiceMap f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
|
choiceMap {c} f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
|
||||||
f x <|> acc)
|
f x <|> acc)
|
||||||
fail xs
|
fail xs
|
||||||
|
|
||||||
@ -46,13 +48,13 @@ choiceMap f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
|
|||||||
export
|
export
|
||||||
choice : {c : _} ->
|
choice : {c : _} ->
|
||||||
Foldable t => t (Recognise c) -> Recognise c
|
Foldable t => t (Recognise c) -> Recognise c
|
||||||
choice = Lexer.choiceMap id
|
choice = choiceMap id
|
||||||
|
|
||||||
||| Sequence a list of recognisers. Guaranteed to consume input if the list is
|
||| Sequence a list of recognisers. Guaranteed to consume input if the list is
|
||||||
||| non-empty and the recognisers consume.
|
||| non-empty and the recognisers consume.
|
||||||
export
|
export
|
||||||
concat : {c : _} ->
|
concat : {c : _} ->
|
||||||
(xs : List (Recognise c)) -> Recognise (c && isCons xs)
|
(xs : List (Recognise c)) -> Recognise (isCons xs && c)
|
||||||
concat = Lexer.Core.concatMap id
|
concat = Lexer.Core.concatMap id
|
||||||
|
|
||||||
||| Recognise a specific character.
|
||| Recognise a specific character.
|
||||||
@ -124,6 +126,13 @@ mutual
|
|||||||
many : Lexer -> Recognise False
|
many : Lexer -> Recognise False
|
||||||
many l = opt (some l)
|
many l = opt (some l)
|
||||||
|
|
||||||
|
||| Repeat the sub-lexer `l` one or more times until the lexer
|
||||||
|
||| `stopBefore` is encountered. `stopBefore` will not be consumed.
|
||||||
|
||| /((?!`stopBefore`)`l`)\+/
|
||||||
|
export
|
||||||
|
someUntil : (stopBefore : Recognise c) -> (l : Lexer) -> Lexer
|
||||||
|
someUntil stopBefore l = some (reject stopBefore <+> l)
|
||||||
|
|
||||||
||| Repeat the sub-lexer `l` zero or more times until the lexer
|
||| Repeat the sub-lexer `l` zero or more times until the lexer
|
||||||
||| `stopBefore` is encountered. `stopBefore` will not be consumed.
|
||| `stopBefore` is encountered. `stopBefore` will not be consumed.
|
||||||
||| Not guaranteed to consume input.
|
||| Not guaranteed to consume input.
|
||||||
@ -140,14 +149,6 @@ export
|
|||||||
manyThen : (stopAfter : Recognise c) -> (l : Lexer) -> Recognise c
|
manyThen : (stopAfter : Recognise c) -> (l : Lexer) -> Recognise c
|
||||||
manyThen stopAfter l = manyUntil stopAfter l <+> stopAfter
|
manyThen stopAfter l = manyUntil stopAfter l <+> stopAfter
|
||||||
|
|
||||||
||| Recognise many instances of `l` until an instance of `end` is
|
|
||||||
||| encountered.
|
|
||||||
|||
|
|
||||||
||| Useful for defining comments.
|
|
||||||
export
|
|
||||||
manyTill : (l : Lexer) -> (end : Lexer) -> Recognise False
|
|
||||||
manyTill l end = end <|> opt (l <+> manyTill l end)
|
|
||||||
|
|
||||||
||| Recognise a sub-lexer repeated as specified by `q`. Fails if `q` has
|
||| Recognise a sub-lexer repeated as specified by `q`. Fails if `q` has
|
||||||
||| `min` and `max` in the wrong order. Consumes input unless `min q` is zero.
|
||| `min` and `max` in the wrong order. Consumes input unless `min q` is zero.
|
||||||
||| /`l`{`q`}/
|
||| /`l`{`q`}/
|
||||||
@ -172,6 +173,18 @@ export
|
|||||||
digits : Lexer
|
digits : Lexer
|
||||||
digits = some digit
|
digits = some digit
|
||||||
|
|
||||||
|
||| Recognise a single binary digit
|
||||||
|
||| /[0-1]/
|
||||||
|
export
|
||||||
|
binDigit : Lexer
|
||||||
|
binDigit = pred (\c => c == '0' || c == '1')
|
||||||
|
|
||||||
|
||| Recognise one or more binary digits
|
||||||
|
||| /[0-1]+/
|
||||||
|
export
|
||||||
|
binDigits : Lexer
|
||||||
|
binDigits = some binDigit
|
||||||
|
|
||||||
||| Recognise a single hexidecimal digit
|
||| Recognise a single hexidecimal digit
|
||||||
||| /[0-9A-Fa-f]/
|
||| /[0-9A-Fa-f]/
|
||||||
export
|
export
|
||||||
@ -188,13 +201,13 @@ hexDigits = some hexDigit
|
|||||||
||| /[0-8]/
|
||| /[0-8]/
|
||||||
export
|
export
|
||||||
octDigit : Lexer
|
octDigit : Lexer
|
||||||
octDigit = pred isHexDigit
|
octDigit = pred isOctDigit
|
||||||
|
|
||||||
||| Recognise one or more octal digits
|
||| Recognise one or more octal digits
|
||||||
||| /[0-8]+/
|
||| /[0-8]+/
|
||||||
export
|
export
|
||||||
octDigits : Lexer
|
octDigits : Lexer
|
||||||
octDigits = some hexDigit
|
octDigits = some octDigit
|
||||||
|
|
||||||
||| Recognise a single alpha character
|
||| Recognise a single alpha character
|
||||||
||| /[A-Za-z]/
|
||| /[A-Za-z]/
|
||||||
@ -307,18 +320,19 @@ export
|
|||||||
quote : (q : Lexer) -> (l : Lexer) -> Lexer
|
quote : (q : Lexer) -> (l : Lexer) -> Lexer
|
||||||
quote q l = surround q q l
|
quote q l = surround q q l
|
||||||
|
|
||||||
||| Recognise an escape character (often '\\') followed by a sub-lexer
|
||| Recognise an escape sub-lexer (often '\\') followed by
|
||||||
|
||| another sub-lexer
|
||||||
||| /[`esc`]`l`/
|
||| /[`esc`]`l`/
|
||||||
export
|
export
|
||||||
escape : (esc : Char) -> Lexer -> Lexer
|
escape : (esc : Lexer) -> Lexer -> Lexer
|
||||||
escape esc l = is esc <+> l
|
escape esc l = esc <+> l
|
||||||
|
|
||||||
||| Recognise a string literal, including escaped characters.
|
||| Recognise a string literal, including escaped characters.
|
||||||
||| (Note: doesn't yet handle escape sequences such as \123)
|
||| (Note: doesn't yet handle escape sequences such as \123)
|
||||||
||| /"(\\\\.|.)\*?"/
|
||| /"(\\\\.|.)\*?"/
|
||||||
export
|
export
|
||||||
stringLit : Lexer
|
stringLit : Lexer
|
||||||
stringLit = quote (is '"') (escape '\\' any <|> any)
|
stringLit = quote (is '"') (escape (is '\\') any <|> any)
|
||||||
|
|
||||||
||| Recognise a character literal, including escaped characters.
|
||| Recognise a character literal, including escaped characters.
|
||||||
||| (Note: doesn't yet handle escape sequences such as \123)
|
||| (Note: doesn't yet handle escape sequences such as \123)
|
||||||
@ -326,7 +340,7 @@ stringLit = quote (is '"') (escape '\\' any <|> any)
|
|||||||
export
|
export
|
||||||
charLit : Lexer
|
charLit : Lexer
|
||||||
charLit = let q = '\'' in
|
charLit = let q = '\'' in
|
||||||
is q <+> (escape '\\' (control <|> any) <|> isNot q) <+> is q
|
is q <+> (escape (is '\\') (control <|> any) <|> isNot q) <+> is q
|
||||||
where
|
where
|
||||||
lexStr : List String -> Lexer
|
lexStr : List String -> Lexer
|
||||||
lexStr [] = fail
|
lexStr [] = fail
|
||||||
@ -348,12 +362,40 @@ export
|
|||||||
intLit : Lexer
|
intLit : Lexer
|
||||||
intLit = opt (is '-') <+> digits
|
intLit = opt (is '-') <+> digits
|
||||||
|
|
||||||
|
||| Recognise a binary literal, prefixed by "0b"
|
||||||
|
||| /0b[0-1]+/
|
||||||
|
export
|
||||||
|
binLit : Lexer
|
||||||
|
binLit = exact "0b" <+> binDigits
|
||||||
|
|
||||||
||| Recognise a hexidecimal literal, prefixed by "0x" or "0X"
|
||| Recognise a hexidecimal literal, prefixed by "0x" or "0X"
|
||||||
||| /0[Xx][0-9A-Fa-f]+/
|
||| /0[Xx][0-9A-Fa-f]+/
|
||||||
export
|
export
|
||||||
hexLit : Lexer
|
hexLit : Lexer
|
||||||
hexLit = approx "0x" <+> hexDigits
|
hexLit = approx "0x" <+> hexDigits
|
||||||
|
|
||||||
|
||| Recognise an octal literal, prefixed by "0o"
|
||||||
|
||| /0o[0-9A-Fa-f]+/
|
||||||
|
export
|
||||||
|
octLit : Lexer
|
||||||
|
octLit = exact "0o" <+> octDigits
|
||||||
|
|
||||||
|
export
|
||||||
|
digitsUnderscoredLit : Lexer
|
||||||
|
digitsUnderscoredLit = digits <+> many (is '_' <+> digits)
|
||||||
|
|
||||||
|
export
|
||||||
|
binUnderscoredLit : Lexer
|
||||||
|
binUnderscoredLit = binLit <+> many (is '_' <+> binDigits)
|
||||||
|
|
||||||
|
export
|
||||||
|
hexUnderscoredLit : Lexer
|
||||||
|
hexUnderscoredLit = hexLit <+> many (is '_' <+> hexDigits)
|
||||||
|
|
||||||
|
export
|
||||||
|
octUnderscoredLit : Lexer
|
||||||
|
octUnderscoredLit = octLit <+> many (is '_' <+> octDigits)
|
||||||
|
|
||||||
||| Recognise `start`, then recognise all input until a newline is encountered,
|
||| Recognise `start`, then recognise all input until a newline is encountered,
|
||||||
||| and consume the newline. Will succeed if end-of-input is encountered before
|
||| and consume the newline. Will succeed if end-of-input is encountered before
|
||||||
||| a newline.
|
||| a newline.
|
||||||
|
@ -1,15 +1,19 @@
|
|||||||
module Text.Lexer.Core
|
module Text.Lexer.Core
|
||||||
|
|
||||||
import public Control.Delayed
|
|
||||||
import Data.Bool
|
|
||||||
import Data.List
|
import Data.List
|
||||||
|
import Data.Maybe
|
||||||
import Data.Nat
|
import Data.Nat
|
||||||
import Data.String
|
import Data.String
|
||||||
|
|
||||||
|
import public Control.Delayed
|
||||||
|
import public Text.Bounded
|
||||||
|
|
||||||
|
%default total
|
||||||
|
|
||||||
||| A language of token recognisers.
|
||| A language of token recognisers.
|
||||||
||| @ consumes If `True`, this recogniser is guaranteed to consume at
|
||| @ consumes If `True`, this recogniser is guaranteed to consume at
|
||||||
||| least one character of input when it succeeds.
|
||| least one character of input when it succeeds.
|
||||||
public export
|
export
|
||||||
data Recognise : (consumes : Bool) -> Type where
|
data Recognise : (consumes : Bool) -> Type where
|
||||||
Empty : Recognise False
|
Empty : Recognise False
|
||||||
Fail : Recognise c
|
Fail : Recognise c
|
||||||
@ -17,6 +21,7 @@ data Recognise : (consumes : Bool) -> Type where
|
|||||||
Pred : (Char -> Bool) -> Recognise True
|
Pred : (Char -> Bool) -> Recognise True
|
||||||
SeqEat : Recognise True -> Inf (Recognise e) -> Recognise True
|
SeqEat : Recognise True -> Inf (Recognise e) -> Recognise True
|
||||||
SeqEmpty : Recognise e1 -> Recognise e2 -> Recognise (e1 || e2)
|
SeqEmpty : Recognise e1 -> Recognise e2 -> Recognise (e1 || e2)
|
||||||
|
SeqSame : Recognise e -> Recognise e -> Recognise e
|
||||||
Alt : Recognise e1 -> Recognise e2 -> Recognise (e1 && e2)
|
Alt : Recognise e1 -> Recognise e2 -> Recognise (e1 && e2)
|
||||||
|
|
||||||
||| A token recogniser. Guaranteed to consume at least one character.
|
||| A token recogniser. Guaranteed to consume at least one character.
|
||||||
@ -24,8 +29,6 @@ public export
|
|||||||
Lexer : Type
|
Lexer : Type
|
||||||
Lexer = Recognise True
|
Lexer = Recognise True
|
||||||
|
|
||||||
-- %allow_overloads (<+>)
|
|
||||||
|
|
||||||
||| Sequence two recognisers. If either consumes a character, the sequence
|
||| Sequence two recognisers. If either consumes a character, the sequence
|
||||||
||| is guaranteed to consume a character.
|
||| is guaranteed to consume a character.
|
||||||
export %inline
|
export %inline
|
||||||
@ -34,11 +37,8 @@ export %inline
|
|||||||
(<+>) {c1 = False} = SeqEmpty
|
(<+>) {c1 = False} = SeqEmpty
|
||||||
(<+>) {c1 = True} = SeqEat
|
(<+>) {c1 = True} = SeqEat
|
||||||
|
|
||||||
%allow_overloads (<|>)
|
|
||||||
|
|
||||||
||| Alternative recognisers. If both consume, the combination is guaranteed
|
||| Alternative recognisers. If both consume, the combination is guaranteed
|
||||||
||| to consumer a character.
|
||| to consume a character.
|
||||||
%inline
|
|
||||||
export
|
export
|
||||||
(<|>) : Recognise c1 -> Recognise c2 -> Recognise (c1 && c2)
|
(<|>) : Recognise c1 -> Recognise c2 -> Recognise (c1 && c2)
|
||||||
(<|>) = Alt
|
(<|>) = Alt
|
||||||
@ -49,13 +49,11 @@ fail : Recognise c
|
|||||||
fail = Fail
|
fail = Fail
|
||||||
|
|
||||||
||| Recognise no input (doesn't consume any input)
|
||| Recognise no input (doesn't consume any input)
|
||||||
%inline
|
|
||||||
export
|
export
|
||||||
empty : Recognise False
|
empty : Recognise False
|
||||||
empty = Empty
|
empty = Empty
|
||||||
|
|
||||||
||| Recognise a character that matches a predicate
|
||| Recognise a character that matches a predicate
|
||||||
%inline
|
|
||||||
export
|
export
|
||||||
pred : (Char -> Bool) -> Lexer
|
pred : (Char -> Bool) -> Lexer
|
||||||
pred = Pred
|
pred = Pred
|
||||||
@ -70,26 +68,18 @@ export
|
|||||||
reject : Recognise c -> Recognise False
|
reject : Recognise c -> Recognise False
|
||||||
reject = Lookahead False
|
reject = Lookahead False
|
||||||
|
|
||||||
%allow_overloads concatMap
|
|
||||||
|
|
||||||
||| Sequence the recognisers resulting from applying a function to each element
|
||| Sequence the recognisers resulting from applying a function to each element
|
||||||
||| of a list. The resulting recogniser will consume input if the produced
|
||| of a list. The resulting recogniser will consume input if the produced
|
||||||
||| recognisers consume and the list is non-empty.
|
||| recognisers consume and the list is non-empty.
|
||||||
export
|
export
|
||||||
concatMap : (a -> Recognise c) -> (xs : List a) ->
|
concatMap : (a -> Recognise c) -> (xs : List a) -> Recognise (isCons xs && c)
|
||||||
Recognise (c && (isCons xs))
|
concatMap _ [] = Empty
|
||||||
concatMap _ [] = rewrite andFalseFalse c in Empty
|
concatMap f (x :: []) = f x
|
||||||
concatMap f (x :: xs)
|
concatMap f (x :: xs@(_ :: _)) = SeqSame (f x) (concatMap f xs)
|
||||||
= rewrite andTrueNeutral c in
|
|
||||||
rewrite sym (orSameAndRightNeutral c (isCons xs)) in
|
|
||||||
SeqEmpty (f x) (Core.concatMap f xs)
|
|
||||||
|
|
||||||
data StrLen : Type where
|
data StrLen : Type where
|
||||||
MkStrLen : String -> Nat -> StrLen
|
MkStrLen : String -> Nat -> StrLen
|
||||||
|
|
||||||
Show StrLen where
|
|
||||||
show (MkStrLen str n) = str ++ "(" ++ show n ++ ")"
|
|
||||||
|
|
||||||
getString : StrLen -> String
|
getString : StrLen -> String
|
||||||
getString (MkStrLen str n) = str
|
getString (MkStrLen str n) = str
|
||||||
|
|
||||||
@ -105,19 +95,9 @@ strTail : Nat -> StrLen -> StrLen
|
|||||||
strTail start (MkStrLen str len)
|
strTail start (MkStrLen str len)
|
||||||
= MkStrLen (substr start len str) (minus len start)
|
= MkStrLen (substr start len str) (minus len start)
|
||||||
|
|
||||||
isJust : Maybe a -> Bool
|
|
||||||
isJust Nothing = False
|
|
||||||
isJust (Just x) = True
|
|
||||||
|
|
||||||
export
|
|
||||||
unpack' : String -> List Char
|
|
||||||
unpack' str
|
|
||||||
= case strUncons str of
|
|
||||||
Nothing => []
|
|
||||||
Just (x, xs) => x :: unpack' xs
|
|
||||||
|
|
||||||
-- If the string is recognised, returns the index at which the token
|
-- If the string is recognised, returns the index at which the token
|
||||||
-- ends
|
-- ends
|
||||||
|
export
|
||||||
scan : Recognise c -> List Char -> List Char -> Maybe (List Char, List Char)
|
scan : Recognise c -> List Char -> List Char -> Maybe (List Char, List Char)
|
||||||
scan Empty tok str = pure (tok, str)
|
scan Empty tok str = pure (tok, str)
|
||||||
scan Fail tok str = Nothing
|
scan Fail tok str = Nothing
|
||||||
@ -137,6 +117,9 @@ scan (SeqEat r1 r2) tok str
|
|||||||
scan (SeqEmpty r1 r2) tok str
|
scan (SeqEmpty r1 r2) tok str
|
||||||
= do (tok', rest) <- scan r1 tok str
|
= do (tok', rest) <- scan r1 tok str
|
||||||
scan r2 tok' rest
|
scan r2 tok' rest
|
||||||
|
scan (SeqSame r1 r2) tok str
|
||||||
|
= do (tok', rest) <- scan r1 tok str
|
||||||
|
scan r2 tok' rest
|
||||||
scan (Alt r1 r2) tok str
|
scan (Alt r1 r2) tok str
|
||||||
= maybe (scan r2 tok str) Just (scan r1 tok str)
|
= maybe (scan r2 tok str) Just (scan r1 tok str)
|
||||||
|
|
||||||
@ -148,27 +131,15 @@ public export
|
|||||||
TokenMap : (tokenType : Type) -> Type
|
TokenMap : (tokenType : Type) -> Type
|
||||||
TokenMap tokenType = List (Lexer, String -> tokenType)
|
TokenMap tokenType = List (Lexer, String -> tokenType)
|
||||||
|
|
||||||
||| A token, and the line and column where it was in the input
|
tokenise : (a -> Bool) ->
|
||||||
public export
|
|
||||||
record TokenData a where
|
|
||||||
constructor MkToken
|
|
||||||
line : Int
|
|
||||||
col : Int
|
|
||||||
tok : a
|
|
||||||
|
|
||||||
export
|
|
||||||
Show a => Show (TokenData a) where
|
|
||||||
show t = show (line t) ++ ":" ++ show (col t) ++ ":" ++ show (tok t)
|
|
||||||
|
|
||||||
tokenise : (TokenData a -> Bool) ->
|
|
||||||
(line : Int) -> (col : Int) ->
|
(line : Int) -> (col : Int) ->
|
||||||
List (TokenData a) -> TokenMap a ->
|
List (WithBounds a) -> TokenMap a ->
|
||||||
List Char -> (List (TokenData a), (Int, Int, List Char))
|
List Char -> (List (WithBounds a), (Int, Int, List Char))
|
||||||
tokenise pred line col acc tmap str
|
tokenise pred line col acc tmap str
|
||||||
= case getFirstToken tmap str of
|
= case getFirstToken tmap str of
|
||||||
Just (tok, line', col', rest) =>
|
Just (tok, line', col', rest) =>
|
||||||
-- assert total because getFirstToken must consume something
|
-- assert total because getFirstToken must consume something
|
||||||
if pred tok
|
if pred tok.val
|
||||||
then (reverse acc, (line, col, []))
|
then (reverse acc, (line, col, []))
|
||||||
else assert_total (tokenise pred line' col' (tok :: acc) tmap rest)
|
else assert_total (tokenise pred line' col' (tok :: acc) tmap rest)
|
||||||
Nothing => (reverse acc, (line, col, str))
|
Nothing => (reverse acc, (line, col, str))
|
||||||
@ -183,13 +154,15 @@ tokenise pred line col acc tmap str
|
|||||||
(incol, _) => cast (length incol)
|
(incol, _) => cast (length incol)
|
||||||
|
|
||||||
getFirstToken : TokenMap a -> List Char ->
|
getFirstToken : TokenMap a -> List Char ->
|
||||||
Maybe (TokenData a, Int, Int, List Char)
|
Maybe (WithBounds a, Int, Int, List Char)
|
||||||
getFirstToken [] str = Nothing
|
getFirstToken [] str = Nothing
|
||||||
getFirstToken ((lex, fn) :: ts) str
|
getFirstToken ((lex, fn) :: ts) str
|
||||||
= case scan lex [] str of
|
= case scan lex [] str of
|
||||||
Just (tok, rest) => Just (MkToken line col (fn (pack (reverse tok))),
|
Just (tok, rest) =>
|
||||||
line + cast (countNLs tok),
|
let line' = line + cast (countNLs tok)
|
||||||
getCols tok col, rest)
|
col' = getCols tok col in
|
||||||
|
Just (MkBounded (fn (fastPack (reverse tok))) False (MkBounds line col line' col'),
|
||||||
|
line', col', rest)
|
||||||
Nothing => getFirstToken ts str
|
Nothing => getFirstToken ts str
|
||||||
|
|
||||||
||| Given a mapping from lexers to token generating functions (the
|
||| Given a mapping from lexers to token generating functions (the
|
||||||
@ -197,14 +170,14 @@ tokenise pred line col acc tmap str
|
|||||||
||| and the line, column, and remainder of the input at the first point in the
|
||| and the line, column, and remainder of the input at the first point in the
|
||||||
||| string where there are no recognised tokens.
|
||| string where there are no recognised tokens.
|
||||||
export
|
export
|
||||||
lex : TokenMap a -> String -> (List (TokenData a), (Int, Int, String))
|
lex : TokenMap a -> String -> (List (WithBounds a), (Int, Int, String))
|
||||||
lex tmap str
|
lex tmap str
|
||||||
= let (ts, (l, c, str')) = tokenise (const False) 0 0 [] tmap (fastUnpack str) in
|
= let (ts, (l, c, str')) = tokenise (const False) 0 0 [] tmap (unpack str) in
|
||||||
(ts, (l, c, pack str'))
|
(ts, (l, c, fastPack str'))
|
||||||
|
|
||||||
export
|
export
|
||||||
lexTo : (TokenData a -> Bool) ->
|
lexTo : (a -> Bool) ->
|
||||||
TokenMap a -> String -> (List (TokenData a), (Int, Int, String))
|
TokenMap a -> String -> (List (WithBounds a), (Int, Int, String))
|
||||||
lexTo pred tmap str
|
lexTo pred tmap str
|
||||||
= let (ts, (l, c, str')) = tokenise pred 0 0 [] tmap (fastUnpack str) in
|
= let (ts, (l, c, str')) = tokenise pred 0 0 [] tmap (unpack str) in
|
||||||
(ts, (l, c, pack str'))
|
(ts, (l, c, fastPack str'))
|
||||||
|
153
libs/contrib/Text/Lexer/Tokenizer.idr
Normal file
153
libs/contrib/Text/Lexer/Tokenizer.idr
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
module Text.Lexer.Tokenizer
|
||||||
|
|
||||||
|
import Data.List
|
||||||
|
import Data.Either
|
||||||
|
import Data.Nat
|
||||||
|
import Data.String
|
||||||
|
|
||||||
|
import Data.String.Extra
|
||||||
|
import Text.Lexer.Core
|
||||||
|
import Text.Lexer
|
||||||
|
import Text.PrettyPrint.Prettyprinter
|
||||||
|
import Text.PrettyPrint.Prettyprinter.Util
|
||||||
|
|
||||||
|
import public Control.Delayed
|
||||||
|
import public Text.Bounded
|
||||||
|
|
||||||
|
%default total
|
||||||
|
|
||||||
|
||| Description of a language's tokenization rule.
|
||||||
|
export
|
||||||
|
data Tokenizer : (tokenType : Type) -> Type where
|
||||||
|
Match : Lexer -> (String -> tokenType) -> Tokenizer tokenType
|
||||||
|
Compose : (begin : Lexer) ->
|
||||||
|
(mapBegin : String -> tokenType) ->
|
||||||
|
(tagger : String -> tag) ->
|
||||||
|
(middle : Inf (tag -> Tokenizer tokenType)) ->
|
||||||
|
(end : tag -> Lexer) ->
|
||||||
|
(mapEnd : String -> tokenType) ->
|
||||||
|
Tokenizer tokenType
|
||||||
|
Alt : Tokenizer tokenType -> Lazy (Tokenizer tokenType) -> Tokenizer tokenType
|
||||||
|
|
||||||
|
||| Alternative tokenizer rules.
|
||||||
|
export
|
||||||
|
(<|>) : Tokenizer t -> Lazy (Tokenizer t) -> Tokenizer t
|
||||||
|
(<|>) = Alt
|
||||||
|
|
||||||
|
||| Match on a recogniser and cast the string to a token.
|
||||||
|
export
|
||||||
|
match : Lexer -> (String -> a) -> Tokenizer a
|
||||||
|
match = Match
|
||||||
|
|
||||||
|
||| Compose other tokenizer. Language composition should be quoted between
|
||||||
|
||| a begin lexer and a end lexer. The begin token can be used to generate
|
||||||
|
||| the composition tokenizer and the end lexer.
|
||||||
|
export
|
||||||
|
compose : (begin : Lexer) ->
|
||||||
|
(mapBegin : String -> a) ->
|
||||||
|
(tagger : String -> tag) ->
|
||||||
|
(middle : Inf (tag -> Tokenizer a)) ->
|
||||||
|
(end : tag -> Lexer) ->
|
||||||
|
(mapEnd : String -> a) ->
|
||||||
|
Tokenizer a
|
||||||
|
compose = Compose
|
||||||
|
|
||||||
|
||| Stop reason why tokenizer can't make more progress.
|
||||||
|
||| @ ComposeNotClosing carries the span of composition begin token in the
|
||||||
|
||| form of `(startLine, startCol), (endLine, endCol)`.
|
||||||
|
public export
|
||||||
|
data StopReason = EndInput | NoRuleApply | ComposeNotClosing (Int, Int) (Int, Int)
|
||||||
|
|
||||||
|
export
|
||||||
|
Show StopReason where
|
||||||
|
show EndInput = "EndInput"
|
||||||
|
show NoRuleApply = "NoRuleApply"
|
||||||
|
show (ComposeNotClosing start end) = "ComposeNotClosing " ++ show start ++ " " ++ show end
|
||||||
|
|
||||||
|
export
|
||||||
|
Pretty StopReason where
|
||||||
|
pretty EndInput = pretty "EndInput"
|
||||||
|
pretty NoRuleApply = pretty "NoRuleApply"
|
||||||
|
pretty (ComposeNotClosing start end) = "ComposeNotClosing" <++> pretty start <++> pretty end
|
||||||
|
|
||||||
|
tokenise : Lexer ->
|
||||||
|
Tokenizer a ->
|
||||||
|
(line, col : Int) -> List (WithBounds a) ->
|
||||||
|
List Char ->
|
||||||
|
(List (WithBounds a), (StopReason, Int, Int, List Char))
|
||||||
|
tokenise reject tokenizer line col acc [] = (reverse acc, EndInput, (line, col, []))
|
||||||
|
tokenise reject tokenizer line col acc str
|
||||||
|
= case scan reject [] str of
|
||||||
|
Just _ => (reverse acc, (EndInput, line, col, str))
|
||||||
|
Nothing => case getFirstMatch tokenizer str of
|
||||||
|
Right (toks, line', col', rest) =>
|
||||||
|
-- assert total because getFirstMatch must consume something
|
||||||
|
assert_total (tokenise reject tokenizer line' col' (toks ++ acc) rest)
|
||||||
|
Left reason => (reverse acc, reason, (line, col, str))
|
||||||
|
where
|
||||||
|
countNLs : List Char -> Nat
|
||||||
|
countNLs str = List.length (filter (== '\n') str)
|
||||||
|
|
||||||
|
getCols : List Char -> Int -> Int
|
||||||
|
getCols x c
|
||||||
|
= case span (/= '\n') x of
|
||||||
|
(incol, []) => c + cast (length incol)
|
||||||
|
(incol, _) => cast (length incol)
|
||||||
|
|
||||||
|
-- get the next lexeme using the `Lexer` in argument, its position and the input
|
||||||
|
-- Returns the new position, the lexeme parsed and the rest of the input
|
||||||
|
-- If parsing the lexer fails, this returns `Nothing`
|
||||||
|
getNext : (lexer : Lexer) -> (line, col : Int) -> (input : List Char) -> Maybe (String, Int, Int, List Char)
|
||||||
|
getNext lexer line col str =
|
||||||
|
let Just (token, rest) = scan lexer [] str
|
||||||
|
| _ => Nothing
|
||||||
|
line' = line + cast (countNLs token)
|
||||||
|
col' = getCols token col
|
||||||
|
tokenStr = fastPack $ reverse token
|
||||||
|
in pure (tokenStr, line', col', rest)
|
||||||
|
|
||||||
|
getFirstMatch : Tokenizer a -> List Char ->
|
||||||
|
Either StopReason (List (WithBounds a), Int, Int, List Char)
|
||||||
|
getFirstMatch (Match lex fn) str
|
||||||
|
= let Just (tok, line', col', rest) = getNext lex line col str
|
||||||
|
| _ => Left NoRuleApply
|
||||||
|
tok' = MkBounded (fn tok) False (MkBounds line col line' col')
|
||||||
|
in Right ([tok'], line', col', rest)
|
||||||
|
getFirstMatch (Compose begin mapBegin tagger middleFn endFn mapEnd) str
|
||||||
|
= let Just (beginTok', line', col' , rest) = getNext begin line col str
|
||||||
|
| Nothing => Left NoRuleApply
|
||||||
|
tag = tagger beginTok'
|
||||||
|
middle = middleFn tag
|
||||||
|
end = endFn tag
|
||||||
|
beginTok'' = MkBounded (mapBegin beginTok') False (MkBounds line col line' col')
|
||||||
|
(midToks, (reason, line'', col'', rest'')) =
|
||||||
|
tokenise end middle line' col' [] rest
|
||||||
|
in case reason of
|
||||||
|
(ComposeNotClosing _ _) => Left reason
|
||||||
|
_ => let Just (endTok', lineEnd, colEnd, restEnd) =
|
||||||
|
getNext end line'' col'' rest''
|
||||||
|
| _ => Left $ ComposeNotClosing (line, col) (line', col')
|
||||||
|
endTok'' = MkBounded (mapEnd endTok') False (MkBounds line'' col'' lineEnd colEnd)
|
||||||
|
in Right ([endTok''] ++ reverse midToks ++ [beginTok''], lineEnd, colEnd, restEnd)
|
||||||
|
getFirstMatch (Alt t1 t2) str
|
||||||
|
= case getFirstMatch t1 str of
|
||||||
|
Right result => Right result
|
||||||
|
Left reason@(ComposeNotClosing _ _) => Left reason
|
||||||
|
Left _ => getFirstMatch t2 str
|
||||||
|
|
||||||
|
export
|
||||||
|
lexTo : Lexer ->
|
||||||
|
Tokenizer a ->
|
||||||
|
String ->
|
||||||
|
(List (WithBounds a), (StopReason, Int, Int, String))
|
||||||
|
lexTo reject tokenizer str
|
||||||
|
= let (ts, reason, (l, c, str')) =
|
||||||
|
tokenise reject tokenizer 0 0 [] (fastUnpack str) in
|
||||||
|
(ts, reason, (l, c, fastPack str'))
|
||||||
|
|
||||||
|
||| Given a tokenizer and an input string, return a list of recognised tokens,
|
||||||
|
||| and the line, column, and remainder of the input at the first point in the string
|
||||||
|
||| where there are no recognised tokens.
|
||||||
|
export
|
||||||
|
lex : Tokenizer a -> String -> (List (WithBounds a), (StopReason, Int, Int, String))
|
||||||
|
lex tokenizer str = lexTo (pred $ const False) tokenizer str
|
234
libs/contrib/Text/Literate.idr
Normal file
234
libs/contrib/Text/Literate.idr
Normal file
@ -0,0 +1,234 @@
|
|||||||
|
||| A simple module to process 'literate' documents.
|
||||||
|
|||
|
||||||
|
||| The module uses a lexer to split the document into code blocks,
|
||||||
|
||| delineated by user-defined markers, and code lines that are
|
||||||
|
||| indicated be a line marker. The lexer returns a document stripped
|
||||||
|
||| of non-code elements but preserving the original document's line
|
||||||
|
||| count. Column numbering of code lines are not preserved.
|
||||||
|
|||
|
||||||
|
||| The underlying tokeniser is greedy.
|
||||||
|
|||
|
||||||
|
||| Once it identifies a line marker it reads a prettifying space then
|
||||||
|
||| consumes until the end of line. Once identifies a starting code
|
||||||
|
||| block marker, the lexer will consume input until the next
|
||||||
|
||| identifiable end block is encountered. Any other content is
|
||||||
|
||| treated as part of the original document.
|
||||||
|
|||
|
||||||
|
||| Thus, the input literate files *must* be well-formed w.r.t
|
||||||
|
||| to code line markers and code blocks.
|
||||||
|
|||
|
||||||
|
||| A further restriction is that literate documents cannot contain
|
||||||
|
||| the markers within the document's main text: This will confuse the
|
||||||
|
||| lexer.
|
||||||
|
|||
|
||||||
|
module Text.Literate
|
||||||
|
|
||||||
|
import Text.Lexer
|
||||||
|
|
||||||
|
import Data.List
|
||||||
|
import Data.List1
|
||||||
|
import Data.List.Views
|
||||||
|
import Data.String
|
||||||
|
import Data.String.Extra
|
||||||
|
|
||||||
|
%default total
|
||||||
|
|
||||||
|
untilEOL : Recognise False
|
||||||
|
untilEOL = manyUntil newline any
|
||||||
|
|
||||||
|
line : String -> Lexer
|
||||||
|
line s = exact s <+> (newline <|> space <+> untilEOL)
|
||||||
|
|
||||||
|
block : String -> String -> Lexer
|
||||||
|
block s e = surround (exact s <+> untilEOL) (exact e <+> untilEOL) any
|
||||||
|
|
||||||
|
notCodeLine : Lexer
|
||||||
|
notCodeLine = newline
|
||||||
|
<|> any <+> untilEOL
|
||||||
|
|
||||||
|
data Token = CodeBlock String String String
|
||||||
|
| Any String
|
||||||
|
| CodeLine String String
|
||||||
|
|
||||||
|
Show Token where
|
||||||
|
showPrec d (CodeBlock l r x) = showCon d "CodeBlock" $ showArg l ++ showArg r ++ showArg x
|
||||||
|
showPrec d (Any x) = showCon d "Any" $ showArg x
|
||||||
|
showPrec d (CodeLine m x) = showCon d "CodeLine" $ showArg m ++ showArg x
|
||||||
|
|
||||||
|
rawTokens : (delims : List (String, String))
|
||||||
|
-> (markers : List String)
|
||||||
|
-> TokenMap (Token)
|
||||||
|
rawTokens delims ls =
|
||||||
|
map (\(l,r) => (block l r, CodeBlock (trim l) (trim r))) delims
|
||||||
|
++ map (\m => (line m, CodeLine (trim m))) ls
|
||||||
|
++ [(notCodeLine, Any)]
|
||||||
|
|
||||||
|
namespace Compat
|
||||||
|
-- `reduce` below was depending on the old behaviour of `lines` before #1585
|
||||||
|
-- was merged. That old `lines` function is added here to preserve behaviour
|
||||||
|
-- of `reduce`.
|
||||||
|
lines' : List Char -> List1 (List Char)
|
||||||
|
lines' [] = singleton []
|
||||||
|
lines' s = case break isNL s of
|
||||||
|
(l, s') => l ::: case s' of
|
||||||
|
[] => []
|
||||||
|
_ :: s'' => forget $ lines' (assert_smaller s s'')
|
||||||
|
export
|
||||||
|
lines : String -> List1 String
|
||||||
|
lines s = map pack (lines' (unpack s))
|
||||||
|
|
||||||
|
||| Merge the tokens into a single source file.
|
||||||
|
reduce : List (WithBounds Token) -> List String -> String
|
||||||
|
reduce [] acc = fastAppend (reverse acc)
|
||||||
|
reduce (MkBounded (Any x) _ _ :: rest) acc =
|
||||||
|
-- newline will always be tokenized as a single token
|
||||||
|
if x == "\n"
|
||||||
|
then reduce rest ("\n"::acc)
|
||||||
|
else reduce rest acc
|
||||||
|
|
||||||
|
reduce (MkBounded (CodeLine m src) _ _ :: rest) acc =
|
||||||
|
if m == trim src
|
||||||
|
then reduce rest ("\n"::acc)
|
||||||
|
else reduce rest ((substr (length m + 1) -- remove space to right of marker.
|
||||||
|
(length src)
|
||||||
|
src
|
||||||
|
)::acc)
|
||||||
|
|
||||||
|
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc with (Compat.lines src) -- Strip the deliminators surrounding the block.
|
||||||
|
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc | (s ::: ys) with (snocList ys)
|
||||||
|
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc | (s ::: []) | Empty = reduce rest acc -- 2
|
||||||
|
reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc | (s ::: (srcs ++ [f])) | (Snoc f srcs rec) =
|
||||||
|
-- the "\n" counts for the open deliminator; the closing deliminator should always be followed by a (Any "\n"), so we don't add a newline
|
||||||
|
reduce rest (((unlines srcs) ++ "\n") :: "\n" :: acc)
|
||||||
|
|
||||||
|
-- [ NOTE ] 1 & 2 shouldn't happen as code blocks are well formed i.e. have two deliminators.
|
||||||
|
|
||||||
|
|
||||||
|
public export
|
||||||
|
record LiterateError where
|
||||||
|
constructor MkLitErr
|
||||||
|
line : Int
|
||||||
|
column : Int
|
||||||
|
input : String
|
||||||
|
|
||||||
|
||| Description of literate styles.
|
||||||
|
|||
|
||||||
|
||| A 'literate' style comprises of
|
||||||
|
|||
|
||||||
|
||| + a list of code block deliminators (`deliminators`);
|
||||||
|
||| + a list of code line markers (`line_markers`); and
|
||||||
|
||| + a list of known file extensions `file_extensions`.
|
||||||
|
|||
|
||||||
|
||| Some example specifications:
|
||||||
|
|||
|
||||||
|
||| + Bird Style
|
||||||
|
|||
|
||||||
|
|||```
|
||||||
|
|||MkLitStyle Nil [">"] [".lidr"]
|
||||||
|
|||```
|
||||||
|
|||
|
||||||
|
||| + Literate Haskell (for LaTeX)
|
||||||
|
|||
|
||||||
|
|||```
|
||||||
|
|||MkLitStyle [("\\begin{code}", "\\end{code}"),("\\begin{spec}","\\end{spec}")]
|
||||||
|
||| Nil
|
||||||
|
||| [".lhs", ".tex"]
|
||||||
|
|||```
|
||||||
|
|||
|
||||||
|
||| + OrgMode
|
||||||
|
|||
|
||||||
|
|||```
|
||||||
|
|||MkLitStyle [("#+BEGIN_SRC idris","#+END_SRC"), ("#+COMMENT idris","#+END_COMMENT")]
|
||||||
|
||| ["#+IDRIS:"]
|
||||||
|
||| [".org"]
|
||||||
|
|||```
|
||||||
|
|||
|
||||||
|
||| + Common Mark
|
||||||
|
|||
|
||||||
|
|||```
|
||||||
|
|||MkLitStyle [("```idris","```"), ("<!-- idris","--!>")]
|
||||||
|
||| Nil
|
||||||
|
||| [".md", ".markdown"]
|
||||||
|
|||```
|
||||||
|
|||
|
||||||
|
public export
|
||||||
|
record LiterateStyle where
|
||||||
|
constructor MkLitStyle
|
||||||
|
||| The pairs of start and end tags for code blocks.
|
||||||
|
deliminators : List (String, String)
|
||||||
|
|
||||||
|
||| Line markers that indicate a line contains code.
|
||||||
|
line_markers : List String
|
||||||
|
|
||||||
|
||| Recognised file extensions. Not used by the module, but will be
|
||||||
|
||| of use when connecting to code that reads in the original source
|
||||||
|
||| files.
|
||||||
|
file_extensions : List String
|
||||||
|
|
||||||
|
||| Given a 'literate specification' extract the code from the
|
||||||
|
||| literate source file (`litStr`) that follows the presented style.
|
||||||
|
|||
|
||||||
|
||| @specification The literate specification to use.
|
||||||
|
||| @litStr The literate source file.
|
||||||
|
|||
|
||||||
|
||| Returns a `LiterateError` if the literate file contains malformed
|
||||||
|
||| code blocks or code lines.
|
||||||
|
|||
|
||||||
|
export
|
||||||
|
extractCode : (specification : LiterateStyle)
|
||||||
|
-> (litStr : String)
|
||||||
|
-> Either LiterateError String
|
||||||
|
extractCode (MkLitStyle delims markers exts) str =
|
||||||
|
case lex (rawTokens delims markers) str of
|
||||||
|
(toks, (_,_,"")) => Right (reduce toks Nil)
|
||||||
|
(_, (l,c,i)) => Left (MkLitErr l c i)
|
||||||
|
|
||||||
|
||| Synonym for `extractCode`.
|
||||||
|
export
|
||||||
|
unlit : (specification : LiterateStyle)
|
||||||
|
-> (litStr : String)
|
||||||
|
-> Either LiterateError String
|
||||||
|
unlit = extractCode
|
||||||
|
|
||||||
|
||| Is the provided line marked up using a line marker?
|
||||||
|
|||
|
||||||
|
||| If the line is suffixed by any one of the style's set of line
|
||||||
|
||| markers then return length of literate line marker, and the code stripped from the line
|
||||||
|
||| marker. Otherwise, return Nothing and the unmarked line.
|
||||||
|
export
|
||||||
|
isLiterateLine : (specification : LiterateStyle)
|
||||||
|
-> (str : String)
|
||||||
|
-> Pair (Maybe String) String
|
||||||
|
isLiterateLine (MkLitStyle delims markers _) str with (lex (rawTokens delims markers) str)
|
||||||
|
isLiterateLine (MkLitStyle delims markers _) str | ([MkBounded (CodeLine m str') _ _], (_,_, "")) = (Just m, str')
|
||||||
|
isLiterateLine (MkLitStyle delims markers _) str | (_, _) = (Nothing, str)
|
||||||
|
|
||||||
|
||| Given a 'literate specification' embed the given code using the
|
||||||
|
||| literate style provided.
|
||||||
|
|||
|
||||||
|
||| If the style uses deliminators to denote code blocks use the first
|
||||||
|
||| pair of deliminators in the style. Otherwise use first linemarker
|
||||||
|
||| in the style. If there is **no style** return the presented code
|
||||||
|
||| string unembedded.
|
||||||
|
|||
|
||||||
|
|||
|
||||||
|
||| @specification The literate specification to use.
|
||||||
|
||| @code The code to embed,
|
||||||
|
|||
|
||||||
|
|||
|
||||||
|
export
|
||||||
|
embedCode : (specification : LiterateStyle)
|
||||||
|
-> (code : String)
|
||||||
|
-> String
|
||||||
|
embedCode (MkLitStyle ((s,e)::delims) _ _) str = unlines [s,str,e]
|
||||||
|
embedCode (MkLitStyle Nil (m::markers) _) str = unwords [m, str]
|
||||||
|
embedCode (MkLitStyle _ _ _) str = str
|
||||||
|
|
||||||
|
||| Synonm for `embedCode`
|
||||||
|
export
|
||||||
|
relit : (specification : LiterateStyle)
|
||||||
|
-> (code : String)
|
||||||
|
-> String
|
||||||
|
relit = embedCode
|
||||||
|
|
||||||
|
-- --------------------------------------------------------------------- [ EOF ]
|
@ -2,49 +2,50 @@ module Text.Parser
|
|||||||
|
|
||||||
import Data.Bool
|
import Data.Bool
|
||||||
import Data.List
|
import Data.List
|
||||||
import Data.List1
|
|
||||||
import Data.Nat
|
import Data.Nat
|
||||||
import Data.Vect
|
import public Data.List1
|
||||||
|
|
||||||
import public Text.Parser.Core
|
import public Text.Parser.Core
|
||||||
import public Text.Quantity
|
import public Text.Quantity
|
||||||
import public Text.Token
|
import public Text.Token
|
||||||
|
|
||||||
|
%default total
|
||||||
|
|
||||||
||| Parse a terminal based on a kind of token.
|
||| Parse a terminal based on a kind of token.
|
||||||
export
|
export
|
||||||
match : (Eq k, TokenKind k) =>
|
match : (Eq k, TokenKind k) =>
|
||||||
(kind : k) ->
|
(kind : k) ->
|
||||||
Grammar (Token k) True (TokType kind)
|
Grammar state (Token k) True (TokType kind)
|
||||||
match kind = terminal "Unrecognised input" $
|
match k = terminal "Unrecognised input" $
|
||||||
\(Tok kind' text) => if kind' == kind
|
\t => if t.kind == k
|
||||||
then Just $ tokValue kind text
|
then Just $ tokValue k t.text
|
||||||
else Nothing
|
else Nothing
|
||||||
|
|
||||||
||| Optionally parse a thing, with a default value if the grammar doesn't
|
||| Optionally parse a thing, with a default value if the grammar doesn't
|
||||||
||| match. May match the empty input.
|
||| match. May match the empty input.
|
||||||
export
|
export
|
||||||
option : {c : Bool} ->
|
option : {c : Bool} ->
|
||||||
(def : a) -> (p : Grammar tok c a) ->
|
(def : a) -> (p : Grammar state tok c a) ->
|
||||||
Grammar tok False a
|
Grammar state tok False a
|
||||||
option {c = False} def p = p <|> pure def
|
option {c = False} def p = p <|> pure def
|
||||||
option {c = True} def p = p <|> pure def
|
option {c = True} def p = p <|> pure def
|
||||||
|
|
||||||
||| Optionally parse a thing.
|
||| Optionally parse a thing.
|
||||||
||| To provide a default value, use `option` instead.
|
||| To provide a default value, use `option` instead.
|
||||||
export
|
export
|
||||||
optional : {c : _} ->
|
optional : {c : Bool} ->
|
||||||
(p : Grammar tok c a) ->
|
(p : Grammar state tok c a) ->
|
||||||
Grammar tok False (Maybe a)
|
Grammar state tok False (Maybe a)
|
||||||
optional p = option Nothing (map Just p)
|
optional p = option Nothing (map Just p)
|
||||||
|
|
||||||
||| Try to parse one thing or the other, producing a value that indicates
|
||| Try to parse one thing or the other, producing a value that indicates
|
||||||
||| which option succeeded. If both would succeed, the left option
|
||| which option succeeded. If both would succeed, the left option
|
||||||
||| takes priority.
|
||| takes priority.
|
||||||
export
|
export
|
||||||
choose : {c1, c2 : _} ->
|
choose : {c1, c2 : Bool} ->
|
||||||
(l : Grammar tok c1 a) ->
|
(l : Grammar state tok c1 a) ->
|
||||||
(r : Grammar tok c2 b) ->
|
(r : Grammar state tok c2 b) ->
|
||||||
Grammar tok (c1 && c2) (Either a b)
|
Grammar state tok (c1 && c2) (Either a b)
|
||||||
choose l r = map Left l <|> map Right r
|
choose l r = map Left l <|> map Right r
|
||||||
|
|
||||||
||| Produce a grammar by applying a function to each element of a container,
|
||| Produce a grammar by applying a function to each element of a container,
|
||||||
@ -52,10 +53,10 @@ choose l r = map Left l <|> map Right r
|
|||||||
||| container is empty.
|
||| container is empty.
|
||||||
export
|
export
|
||||||
choiceMap : {c : Bool} ->
|
choiceMap : {c : Bool} ->
|
||||||
(a -> Grammar tok c b) ->
|
(a -> Grammar state tok c b) ->
|
||||||
Foldable t => t a ->
|
Foldable t => t a ->
|
||||||
Grammar tok c b
|
Grammar state tok c b
|
||||||
choiceMap f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
|
choiceMap {c} f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
|
||||||
f x <|> acc)
|
f x <|> acc)
|
||||||
(fail "No more options") xs
|
(fail "No more options") xs
|
||||||
|
|
||||||
@ -64,36 +65,30 @@ choiceMap f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
|
|||||||
||| Try each grammar in a container until the first one succeeds.
|
||| Try each grammar in a container until the first one succeeds.
|
||||||
||| Fails if the container is empty.
|
||| Fails if the container is empty.
|
||||||
export
|
export
|
||||||
choice : {c : _} ->
|
choice : Foldable t =>
|
||||||
Foldable t => t (Grammar tok c a) ->
|
{c : Bool} ->
|
||||||
Grammar tok c a
|
t (Grammar state tok c a) ->
|
||||||
choice = Parser.choiceMap id
|
Grammar state tok c a
|
||||||
|
choice = choiceMap id
|
||||||
|
|
||||||
mutual
|
mutual
|
||||||
||| Parse one or more things
|
||| Parse one or more things
|
||||||
export
|
export
|
||||||
some : Grammar tok True a ->
|
some : Grammar state tok True a ->
|
||||||
Grammar tok True (List1 a)
|
Grammar state tok True (List1 a)
|
||||||
some p = pure (!p ::: !(many p))
|
some p = pure (!p ::: !(many p))
|
||||||
|
|
||||||
||| Parse zero or more things (may match the empty input)
|
||| Parse zero or more things (may match the empty input)
|
||||||
export
|
export
|
||||||
many : Grammar tok True a ->
|
many : Grammar state tok True a ->
|
||||||
Grammar tok False (List a)
|
Grammar state tok False (List a)
|
||||||
many p = option [] (forget <$> some p)
|
many p = option [] (forget <$> some p)
|
||||||
|
|
||||||
||| Parse one or more instances of `p`, returning the parsed items and proof
|
|
||||||
||| that the resulting list is non-empty.
|
|
||||||
export
|
|
||||||
some' : (p : Grammar tok True a) ->
|
|
||||||
Grammar tok True (xs : List a ** NonEmpty xs)
|
|
||||||
some' p = pure (!p :: !(many p) ** IsNonEmpty)
|
|
||||||
|
|
||||||
mutual
|
mutual
|
||||||
private
|
private
|
||||||
count1 : (q : Quantity) ->
|
count1 : (q : Quantity) ->
|
||||||
(p : Grammar tok True a) ->
|
(p : Grammar state tok True a) ->
|
||||||
Grammar tok True (List a)
|
Grammar state tok True (List a)
|
||||||
count1 q p = do x <- p
|
count1 q p = do x <- p
|
||||||
seq (count q p)
|
seq (count q p)
|
||||||
(\xs => pure (x :: xs))
|
(\xs => pure (x :: xs))
|
||||||
@ -101,31 +96,23 @@ mutual
|
|||||||
||| Parse `p`, repeated as specified by `q`, returning the list of values.
|
||| Parse `p`, repeated as specified by `q`, returning the list of values.
|
||||||
export
|
export
|
||||||
count : (q : Quantity) ->
|
count : (q : Quantity) ->
|
||||||
(p : Grammar tok True a) ->
|
(p : Grammar state tok True a) ->
|
||||||
Grammar tok (isSucc (min q)) (List a)
|
Grammar state tok (isSucc (min q)) (List a)
|
||||||
count (Qty Z Nothing) p = many p
|
count (Qty Z Nothing) p = many p
|
||||||
count (Qty Z (Just Z)) _ = pure []
|
count (Qty Z (Just Z)) _ = pure []
|
||||||
count (Qty Z (Just (S max))) p = option [] $ count1 (atMost max) p
|
count (Qty Z (Just (S max))) p = option [] $ count1 (atMost max) p
|
||||||
count (Qty (S min) Nothing) p = count1 (atLeast min) p
|
count (Qty (S min) Nothing) p = count1 (atLeast min) p
|
||||||
count (Qty (S min) (Just Z)) _ = fail "Quantity out of order"
|
count (Qty (S min) (Just Z)) _ = fail "Quantity out of order"
|
||||||
count (Qty (S min) (Just (S max))) p = count1 (between min max) p
|
count (Qty (S min) (Just (S max))) p = count1 (between (S min) max) p
|
||||||
|
|
||||||
||| Parse `p` `n` times, returning the vector of values.
|
|
||||||
export
|
|
||||||
countExactly : (n : Nat) ->
|
|
||||||
(p : Grammar tok True a) ->
|
|
||||||
Grammar tok (isSucc n) (Vect n a)
|
|
||||||
countExactly Z p = Empty []
|
|
||||||
countExactly (S k) p = [| p :: countExactly k p |]
|
|
||||||
|
|
||||||
mutual
|
mutual
|
||||||
||| Parse one or more instances of `p` until `end` succeeds, returning the
|
||| Parse one or more instances of `p` until `end` succeeds, returning the
|
||||||
||| list of values from `p`. Guaranteed to consume input.
|
||| list of values from `p`. Guaranteed to consume input.
|
||||||
export
|
export
|
||||||
someTill : {c : Bool} ->
|
someTill : {c : Bool} ->
|
||||||
(end : Grammar tok c e) ->
|
(end : Grammar state tok c e) ->
|
||||||
(p : Grammar tok True a) ->
|
(p : Grammar state tok True a) ->
|
||||||
Grammar tok True (List1 a)
|
Grammar state tok True (List1 a)
|
||||||
someTill {c} end p = do x <- p
|
someTill {c} end p = do x <- p
|
||||||
seq (manyTill end p)
|
seq (manyTill end p)
|
||||||
(\xs => pure (x ::: xs))
|
(\xs => pure (x ::: xs))
|
||||||
@ -134,9 +121,9 @@ mutual
|
|||||||
||| list of values from `p`. Guaranteed to consume input if `end` consumes.
|
||| list of values from `p`. Guaranteed to consume input if `end` consumes.
|
||||||
export
|
export
|
||||||
manyTill : {c : Bool} ->
|
manyTill : {c : Bool} ->
|
||||||
(end : Grammar tok c e) ->
|
(end : Grammar state tok c e) ->
|
||||||
(p : Grammar tok True a) ->
|
(p : Grammar state tok True a) ->
|
||||||
Grammar tok c (List a)
|
Grammar state tok c (List a)
|
||||||
manyTill {c} end p = rewrite sym (andTrueNeutral c) in
|
manyTill {c} end p = rewrite sym (andTrueNeutral c) in
|
||||||
map (const []) end <|> (forget <$> someTill end p)
|
map (const []) end <|> (forget <$> someTill end p)
|
||||||
|
|
||||||
@ -145,28 +132,28 @@ mutual
|
|||||||
||| returning its value.
|
||| returning its value.
|
||||||
export
|
export
|
||||||
afterSome : {c : Bool} ->
|
afterSome : {c : Bool} ->
|
||||||
(skip : Grammar tok True s) ->
|
(skip : Grammar state tok True s) ->
|
||||||
(p : Grammar tok c a) ->
|
(p : Grammar state tok c a) ->
|
||||||
Grammar tok True a
|
Grammar state tok True a
|
||||||
afterSome skip p = do ignore skip
|
afterSome skip p = do ignore $ skip
|
||||||
afterMany skip p
|
afterMany skip p
|
||||||
|
|
||||||
||| Parse zero or more instance of `skip` until `p` is encountered,
|
||| Parse zero or more instance of `skip` until `p` is encountered,
|
||||||
||| returning its value.
|
||| returning its value.
|
||||||
export
|
export
|
||||||
afterMany : {c : Bool} ->
|
afterMany : {c : Bool} ->
|
||||||
(skip : Grammar tok True s) ->
|
(skip : Grammar state tok True s) ->
|
||||||
(p : Grammar tok c a) ->
|
(p : Grammar state tok c a) ->
|
||||||
Grammar tok c a
|
Grammar state tok c a
|
||||||
afterMany {c} skip p = rewrite sym (andTrueNeutral c) in
|
afterMany {c} skip p = rewrite sym (andTrueNeutral c) in
|
||||||
p <|> afterSome skip p
|
p <|> afterSome skip p
|
||||||
|
|
||||||
||| Parse one or more things, each separated by another thing.
|
||| Parse one or more things, each separated by another thing.
|
||||||
export
|
export
|
||||||
sepBy1 : {c : Bool} ->
|
sepBy1 : {c : Bool} ->
|
||||||
(sep : Grammar tok True s) ->
|
(sep : Grammar state tok True s) ->
|
||||||
(p : Grammar tok c a) ->
|
(p : Grammar state tok c a) ->
|
||||||
Grammar tok c (List1 a)
|
Grammar state tok c (List1 a)
|
||||||
sepBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
|
sepBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
|
||||||
[| p ::: many (sep *> p) |]
|
[| p ::: many (sep *> p) |]
|
||||||
|
|
||||||
@ -174,18 +161,18 @@ sepBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
|
|||||||
||| match the empty input.
|
||| match the empty input.
|
||||||
export
|
export
|
||||||
sepBy : {c : Bool} ->
|
sepBy : {c : Bool} ->
|
||||||
(sep : Grammar tok True s) ->
|
(sep : Grammar state tok True s) ->
|
||||||
(p : Grammar tok c a) ->
|
(p : Grammar state tok c a) ->
|
||||||
Grammar tok False (List a)
|
Grammar state tok False (List a)
|
||||||
sepBy sep p = option [] $ forget <$> sepBy1 sep p
|
sepBy sep p = option [] $ forget <$> sepBy1 sep p
|
||||||
|
|
||||||
||| Parse one or more instances of `p` separated by and optionally terminated by
|
||| Parse one or more instances of `p` separated by and optionally terminated by
|
||||||
||| `sep`.
|
||| `sep`.
|
||||||
export
|
export
|
||||||
sepEndBy1 : {c : Bool} ->
|
sepEndBy1 : {c : Bool} ->
|
||||||
(sep : Grammar tok True s) ->
|
(sep : Grammar state tok True s) ->
|
||||||
(p : Grammar tok c a) ->
|
(p : Grammar state tok c a) ->
|
||||||
Grammar tok c (List1 a)
|
Grammar state tok c (List1 a)
|
||||||
sepEndBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
|
sepEndBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
|
||||||
sepBy1 sep p <* optional sep
|
sepBy1 sep p <* optional sep
|
||||||
|
|
||||||
@ -193,32 +180,49 @@ sepEndBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
|
|||||||
||| by `sep`. Will not match a separator by itself.
|
||| by `sep`. Will not match a separator by itself.
|
||||||
export
|
export
|
||||||
sepEndBy : {c : Bool} ->
|
sepEndBy : {c : Bool} ->
|
||||||
(sep : Grammar tok True s) ->
|
(sep : Grammar state tok True s) ->
|
||||||
(p : Grammar tok c a) ->
|
(p : Grammar state tok c a) ->
|
||||||
Grammar tok False (List a)
|
Grammar state tok False (List a)
|
||||||
sepEndBy sep p = option [] $ forget <$> sepEndBy1 sep p
|
sepEndBy sep p = option [] $ forget <$> sepEndBy1 sep p
|
||||||
|
|
||||||
||| Parse one or more instances of `p`, separated and terminated by `sep`.
|
||| Parse one or more instances of `p`, separated and terminated by `sep`.
|
||||||
export
|
export
|
||||||
endBy1 : {c : Bool} ->
|
endBy1 : {c : Bool} ->
|
||||||
(sep : Grammar tok True s) ->
|
(sep : Grammar state tok True s) ->
|
||||||
(p : Grammar tok c a) ->
|
(p : Grammar state tok c a) ->
|
||||||
Grammar tok True (List1 a)
|
Grammar state tok True (List1 a)
|
||||||
endBy1 sep p = some $ rewrite sym (orTrueTrue c) in
|
endBy1 {c} sep p = some $ rewrite sym (orTrueTrue c) in
|
||||||
p <* sep
|
p <* sep
|
||||||
|
|
||||||
export
|
export
|
||||||
endBy : {c : Bool} ->
|
endBy : {c : Bool} ->
|
||||||
(sep : Grammar tok True s) ->
|
(sep : Grammar state tok True s) ->
|
||||||
(p : Grammar tok c a) ->
|
(p : Grammar state tok c a) ->
|
||||||
Grammar tok False (List a)
|
Grammar state tok False (List a)
|
||||||
endBy sep p = option [] $ forget <$> endBy1 sep p
|
endBy sep p = option [] $ forget <$> endBy1 sep p
|
||||||
|
|
||||||
||| Parse an instance of `p` that is between `left` and `right`.
|
||| Parse an instance of `p` that is between `left` and `right`.
|
||||||
export
|
export
|
||||||
between : {c : _} ->
|
between : {c : Bool} ->
|
||||||
(left : Grammar tok True l) ->
|
(left : Grammar state tok True l) ->
|
||||||
(right : Inf (Grammar tok True r)) ->
|
(right : Grammar state tok True r) ->
|
||||||
(p : Inf (Grammar tok c a)) ->
|
(p : Grammar state tok c a) ->
|
||||||
Grammar tok True a
|
Grammar state tok True a
|
||||||
between left right contents = left *> contents <* right
|
between left right contents = left *> contents <* right
|
||||||
|
|
||||||
|
export
|
||||||
|
location : Grammar state token False (Int, Int)
|
||||||
|
location = startBounds <$> position
|
||||||
|
|
||||||
|
export
|
||||||
|
endLocation : Grammar state token False (Int, Int)
|
||||||
|
endLocation = endBounds <$> position
|
||||||
|
|
||||||
|
export
|
||||||
|
column : Grammar state token False Int
|
||||||
|
column = snd <$> location
|
||||||
|
|
||||||
|
public export
|
||||||
|
when : Bool -> Lazy (Grammar state token False ()) -> Grammar state token False ()
|
||||||
|
when True y = y
|
||||||
|
when False y = pure ()
|
||||||
|
@ -1,45 +1,66 @@
|
|||||||
module Text.Parser.Core
|
module Text.Parser.Core
|
||||||
|
|
||||||
import public Control.Delayed
|
import Data.Bool
|
||||||
import Data.List
|
import Data.List
|
||||||
|
import Data.List1
|
||||||
|
|
||||||
|
import public Control.Delayed
|
||||||
|
import public Text.Bounded
|
||||||
|
|
||||||
|
%default total
|
||||||
|
|
||||||
|
-- TODO: Add some primitives for helping with error messages.
|
||||||
|
-- e.g. perhaps set a string to state what we're currently trying to
|
||||||
|
-- parse, or to say what the next expected token is in words
|
||||||
|
|
||||||
||| Description of a language's grammar. The `tok` parameter is the type
|
||| Description of a language's grammar. The `tok` parameter is the type
|
||||||
||| of tokens, and the `consumes` flag is True if the language is guaranteed
|
||| of tokens, and the `consumes` flag is True if the language is guaranteed
|
||||||
||| to be non-empty - that is, successfully parsing the language is guaranteed
|
||| to be non-empty - that is, successfully parsing the language is guaranteed
|
||||||
||| to consume some input.
|
||| to consume some input.
|
||||||
public export
|
export
|
||||||
data Grammar : (tok : Type) -> (consumes : Bool) -> Type -> Type where
|
data Grammar : (state : Type) -> (tok : Type) -> (consumes : Bool) -> Type -> Type where
|
||||||
Empty : (val : ty) -> Grammar tok False ty
|
Empty : (val : ty) -> Grammar state tok False ty
|
||||||
Terminal : String -> (tok -> Maybe a) -> Grammar tok True a
|
Terminal : String -> (tok -> Maybe a) -> Grammar state tok True a
|
||||||
NextIs : String -> (tok -> Bool) -> Grammar tok False tok
|
NextIs : String -> (tok -> Bool) -> Grammar state tok False tok
|
||||||
EOF : Grammar tok False ()
|
EOF : Grammar state tok False ()
|
||||||
|
|
||||||
Fail : Bool -> String -> Grammar tok c ty
|
Fail : (location : Maybe Bounds) -> (fatal : Bool) -> String -> Grammar state tok c ty
|
||||||
Commit : Grammar tok False ()
|
Try : Grammar state tok c ty -> Grammar state tok c ty
|
||||||
MustWork : Grammar tok c a -> Grammar tok c a
|
|
||||||
|
|
||||||
SeqEat : {c2 : _} -> Grammar tok True a -> Inf (a -> Grammar tok c2 b) ->
|
Commit : Grammar state tok False ()
|
||||||
Grammar tok True b
|
MustWork : Grammar state tok c a -> Grammar state tok c a
|
||||||
SeqEmpty : {c1, c2 : _} -> Grammar tok c1 a -> (a -> Grammar tok c2 b) ->
|
|
||||||
Grammar tok (c1 || c2) b
|
|
||||||
|
|
||||||
ThenEat : {c2 : _} -> Grammar tok True () -> Inf (Grammar tok c2 b) ->
|
SeqEat : {c2 : Bool} ->
|
||||||
Grammar tok True b
|
Grammar state tok True a -> Inf (a -> Grammar state tok c2 b) ->
|
||||||
ThenEmpty : {c1, c2 : _} -> Grammar tok c1 () -> Grammar tok c2 b ->
|
Grammar state tok True b
|
||||||
Grammar tok (c1 || c2) b
|
SeqEmpty : {c1, c2 : Bool} ->
|
||||||
|
Grammar state tok c1 a -> (a -> Grammar state tok c2 b) ->
|
||||||
|
Grammar state tok (c1 || c2) b
|
||||||
|
|
||||||
Alt : {c1, c2 : _} -> Grammar tok c1 ty -> Grammar tok c2 ty ->
|
ThenEat : {c2 : Bool} ->
|
||||||
Grammar tok (c1 && c2) ty
|
Grammar state tok True () -> Inf (Grammar state tok c2 a) ->
|
||||||
|
Grammar state tok True a
|
||||||
|
ThenEmpty : {c1, c2 : Bool} ->
|
||||||
|
Grammar state tok c1 () -> Grammar state tok c2 a ->
|
||||||
|
Grammar state tok (c1 || c2) a
|
||||||
|
|
||||||
|
Alt : {c1, c2 : Bool} ->
|
||||||
|
Grammar state tok c1 ty -> Lazy (Grammar state tok c2 ty) ->
|
||||||
|
Grammar state tok (c1 && c2) ty
|
||||||
|
Bounds : Grammar state tok c ty -> Grammar state tok c (WithBounds ty)
|
||||||
|
Position : Grammar state tok False Bounds
|
||||||
|
|
||||||
|
Act : state -> Grammar state tok False ()
|
||||||
|
|
||||||
||| Sequence two grammars. If either consumes some input, the sequence is
|
||| Sequence two grammars. If either consumes some input, the sequence is
|
||||||
||| guaranteed to consume some input. If the first one consumes input, the
|
||| guaranteed to consume some input. If the first one consumes input, the
|
||||||
||| second is allowed to be recursive (because it means some input has been
|
||| second is allowed to be recursive (because it means some input has been
|
||||||
||| consumed and therefore the input is smaller)
|
||| consumed and therefore the input is smaller)
|
||||||
public export %inline %tcinline
|
export %inline
|
||||||
(>>=) : {c1, c2 : Bool} ->
|
(>>=) : {c1, c2 : Bool} ->
|
||||||
Grammar tok c1 a ->
|
Grammar state tok c1 a ->
|
||||||
inf c1 (a -> Grammar tok c2 b) ->
|
inf c1 (a -> Grammar state tok c2 b) ->
|
||||||
Grammar tok (c1 || c2) b
|
Grammar state tok (c1 || c2) b
|
||||||
(>>=) {c1 = False} = SeqEmpty
|
(>>=) {c1 = False} = SeqEmpty
|
||||||
(>>=) {c1 = True} = SeqEat
|
(>>=) {c1 = True} = SeqEat
|
||||||
|
|
||||||
@ -49,97 +70,117 @@ public export %inline %tcinline
|
|||||||
||| consumed and therefore the input is smaller)
|
||| consumed and therefore the input is smaller)
|
||||||
public export %inline %tcinline
|
public export %inline %tcinline
|
||||||
(>>) : {c1, c2 : Bool} ->
|
(>>) : {c1, c2 : Bool} ->
|
||||||
Grammar tok c1 () ->
|
Grammar state tok c1 () ->
|
||||||
inf c1 (Grammar tok c2 a) ->
|
inf c1 (Grammar state tok c2 a) ->
|
||||||
Grammar tok (c1 || c2) a
|
Grammar state tok (c1 || c2) a
|
||||||
(>>) {c1 = False} = ThenEmpty
|
(>>) {c1 = False} = ThenEmpty
|
||||||
(>>) {c1 = True} = ThenEat
|
(>>) {c1 = True} = ThenEat
|
||||||
|
|
||||||
||| Sequence two grammars. If either consumes some input, the sequence is
|
||| Sequence two grammars. If either consumes some input, the sequence is
|
||||||
||| guaranteed to consume input. This is an explicitly non-infinite version
|
||| guaranteed to consume input. This is an explicitly non-infinite version
|
||||||
||| of `>>=`.
|
||| of `>>=`.
|
||||||
export
|
export %inline
|
||||||
seq : {c1, c2 : _} -> Grammar tok c1 a ->
|
seq : {c1,c2 : Bool} ->
|
||||||
(a -> Grammar tok c2 b) ->
|
Grammar state tok c1 a ->
|
||||||
Grammar tok (c1 || c2) b
|
(a -> Grammar state tok c2 b) ->
|
||||||
|
Grammar state tok (c1 || c2) b
|
||||||
seq = SeqEmpty
|
seq = SeqEmpty
|
||||||
|
|
||||||
||| Sequence a grammar followed by the grammar it returns.
|
||| Sequence a grammar followed by the grammar it returns.
|
||||||
export
|
export %inline
|
||||||
join : {c1, c2 : Bool} ->
|
join : {c1,c2 : Bool} ->
|
||||||
Grammar tok c1 (Grammar tok c2 a) ->
|
Grammar state tok c1 (Grammar state tok c2 a) ->
|
||||||
Grammar tok (c1 || c2) a
|
Grammar state tok (c1 || c2) a
|
||||||
join {c1 = False} p = SeqEmpty p id
|
join {c1 = False} p = SeqEmpty p id
|
||||||
join {c1 = True} p = SeqEat p id
|
join {c1 = True} p = SeqEat p id
|
||||||
|
|
||||||
||| Give two alternative grammars. If both consume, the combination is
|
|
||||||
||| guaranteed to consume.
|
|
||||||
export
|
|
||||||
(<|>) : {c1, c2 : _} ->
|
|
||||||
Grammar tok c1 ty ->
|
|
||||||
Grammar tok c2 ty ->
|
|
||||||
Grammar tok (c1 && c2) ty
|
|
||||||
(<|>) = Alt
|
|
||||||
|
|
||||||
||| Allows the result of a grammar to be mapped to a different value.
|
||| Allows the result of a grammar to be mapped to a different value.
|
||||||
export
|
export
|
||||||
{c : _} -> Functor (Grammar tok c) where
|
{c : _} ->
|
||||||
|
Functor (Grammar state tok c) where
|
||||||
map f (Empty val) = Empty (f val)
|
map f (Empty val) = Empty (f val)
|
||||||
map f (Fail fatal msg) = Fail fatal msg
|
map f (Fail bd fatal msg) = Fail bd fatal msg
|
||||||
|
map f (Try g) = Try (map f g)
|
||||||
map f (MustWork g) = MustWork (map f g)
|
map f (MustWork g) = MustWork (map f g)
|
||||||
map f (Terminal msg g) = Terminal msg (\t => map f (g t))
|
map f (Terminal msg g) = Terminal msg (map f . g)
|
||||||
map f (Alt x y) = Alt (map f x) (map f y)
|
map f (Alt x y) = Alt (map f x) (map f y)
|
||||||
map f (SeqEat act next)
|
map f (SeqEat act next)
|
||||||
= SeqEat act (\val => map f (next val))
|
= SeqEat act (\val => map f (next val))
|
||||||
map f (SeqEmpty act next)
|
map f (SeqEmpty act next)
|
||||||
= SeqEmpty act (\val => map f (next val))
|
= SeqEmpty act (\ val => map f (next val))
|
||||||
map f (ThenEat act next)
|
map f (ThenEat act next)
|
||||||
= ThenEat act (map f next)
|
= ThenEat act (map f next)
|
||||||
map f (ThenEmpty act next)
|
map f (ThenEmpty act next)
|
||||||
= ThenEmpty act (map f next)
|
= ThenEmpty act (map f next)
|
||||||
|
map {c} f (Bounds act)
|
||||||
|
= rewrite sym $ orFalseNeutral c in
|
||||||
|
SeqEmpty (Bounds act) (Empty . f) -- Bounds (map f act)
|
||||||
-- The remaining constructors (NextIs, EOF, Commit) have a fixed type,
|
-- The remaining constructors (NextIs, EOF, Commit) have a fixed type,
|
||||||
-- so a sequence must be used.
|
-- so a sequence must be used.
|
||||||
map {c = False} f p = SeqEmpty p (Empty . f)
|
map {c = False} f p = SeqEmpty p (Empty . f)
|
||||||
|
|
||||||
|
||| Give two alternative grammars. If both consume, the combination is
|
||||||
|
||| guaranteed to consume.
|
||||||
|
export %inline
|
||||||
|
(<|>) : {c1,c2 : Bool} ->
|
||||||
|
Grammar state tok c1 ty ->
|
||||||
|
Lazy (Grammar state tok c2 ty) ->
|
||||||
|
Grammar state tok (c1 && c2) ty
|
||||||
|
(<|>) = Alt
|
||||||
|
|
||||||
|
infixr 2 <||>
|
||||||
|
||| Take the tagged disjunction of two grammars. If both consume, the
|
||||||
|
||| combination is guaranteed to consume.
|
||||||
|
export
|
||||||
|
(<||>) : {c1,c2 : Bool} ->
|
||||||
|
Grammar state tok c1 a ->
|
||||||
|
Lazy (Grammar state tok c2 b) ->
|
||||||
|
Grammar state tok (c1 && c2) (Either a b)
|
||||||
|
(<||>) p q = (Left <$> p) <|> (Right <$> q)
|
||||||
|
|
||||||
||| Sequence a grammar with value type `a -> b` and a grammar
|
||| Sequence a grammar with value type `a -> b` and a grammar
|
||||||
||| with value type `a`. If both succeed, apply the function
|
||| with value type `a`. If both succeed, apply the function
|
||||||
||| from the first grammar to the value from the second grammar.
|
||| from the first grammar to the value from the second grammar.
|
||||||
||| Guaranteed to consume if either grammar consumes.
|
||| Guaranteed to consume if either grammar consumes.
|
||||||
export
|
export %inline
|
||||||
(<*>) : {c1, c2 : _} ->
|
(<*>) : {c1, c2 : Bool} ->
|
||||||
Grammar tok c1 (a -> b) ->
|
Grammar state tok c1 (a -> b) ->
|
||||||
Inf (Grammar tok c2 a) ->
|
Grammar state tok c2 a ->
|
||||||
Grammar tok (c1 || c2) b
|
Grammar state tok (c1 || c2) b
|
||||||
(<*>) {c1 = False} x y = SeqEmpty x (\f => map f y)
|
(<*>) x y = SeqEmpty x (\f => map f y)
|
||||||
(<*>) {c1 = True } x y = SeqEmpty x (\f => map f (Force y))
|
|
||||||
|
|
||||||
||| Sequence two grammars. If both succeed, use the value of the first one.
|
||| Sequence two grammars. If both succeed, use the value of the first one.
|
||||||
||| Guaranteed to consume if either grammar consumes.
|
||| Guaranteed to consume if either grammar consumes.
|
||||||
export
|
export %inline
|
||||||
(<*) : {c1, c2 : _} ->
|
(<*) : {c1,c2 : Bool} ->
|
||||||
Grammar tok c1 a ->
|
Grammar state tok c1 a ->
|
||||||
Inf (Grammar tok c2 b) ->
|
Grammar state tok c2 b ->
|
||||||
Grammar tok (c1 || c2) a
|
Grammar state tok (c1 || c2) a
|
||||||
(<*) x y = map const x <*> y
|
(<*) x y = map const x <*> y
|
||||||
|
|
||||||
||| Sequence two grammars. If both succeed, use the value of the second one.
|
||| Sequence two grammars. If both succeed, use the value of the second one.
|
||||||
||| Guaranteed to consume if either grammar consumes.
|
||| Guaranteed to consume if either grammar consumes.
|
||||||
export
|
export %inline
|
||||||
(*>) : {c1, c2 : _} ->
|
(*>) : {c1,c2 : Bool} ->
|
||||||
Grammar tok c1 a ->
|
Grammar state tok c1 a ->
|
||||||
Inf (Grammar tok c2 b) ->
|
Grammar state tok c2 b ->
|
||||||
Grammar tok (c1 || c2) b
|
Grammar state tok (c1 || c2) b
|
||||||
(*>) x y = map (const id) x <*> y
|
(*>) x y = map (const id) x <*> y
|
||||||
|
|
||||||
|
export %inline
|
||||||
|
act : state -> Grammar state tok False ()
|
||||||
|
act = Act
|
||||||
|
|
||||||
||| Produce a grammar that can parse a different type of token by providing a
|
||| Produce a grammar that can parse a different type of token by providing a
|
||||||
||| function converting the new token type into the original one.
|
||| function converting the new token type into the original one.
|
||||||
export
|
export
|
||||||
mapToken : (a -> b) -> Grammar b c ty -> Grammar a c ty
|
mapToken : (a -> b) -> Grammar state b c ty -> Grammar state a c ty
|
||||||
mapToken f (Empty val) = Empty val
|
mapToken f (Empty val) = Empty val
|
||||||
mapToken f (Terminal msg g) = Terminal msg (g . f)
|
mapToken f (Terminal msg g) = Terminal msg (g . f)
|
||||||
mapToken f (NextIs msg g) = SeqEmpty (NextIs msg (g . f)) (Empty . f)
|
mapToken f (NextIs msg g) = SeqEmpty (NextIs msg (g . f)) (Empty . f)
|
||||||
mapToken f EOF = EOF
|
mapToken f EOF = EOF
|
||||||
mapToken f (Fail fatal msg) = Fail fatal msg
|
mapToken f (Fail bd fatal msg) = Fail bd fatal msg
|
||||||
|
mapToken f (Try g) = Try (mapToken f g)
|
||||||
mapToken f (MustWork g) = MustWork (mapToken f g)
|
mapToken f (MustWork g) = MustWork (mapToken f g)
|
||||||
mapToken f Commit = Commit
|
mapToken f Commit = Commit
|
||||||
mapToken f (SeqEat act next)
|
mapToken f (SeqEat act next)
|
||||||
@ -151,177 +192,183 @@ mapToken f (ThenEat act next)
|
|||||||
mapToken f (ThenEmpty act next)
|
mapToken f (ThenEmpty act next)
|
||||||
= ThenEmpty (mapToken f act) (mapToken f next)
|
= ThenEmpty (mapToken f act) (mapToken f next)
|
||||||
mapToken f (Alt x y) = Alt (mapToken f x) (mapToken f y)
|
mapToken f (Alt x y) = Alt (mapToken f x) (mapToken f y)
|
||||||
|
mapToken f (Bounds act) = Bounds (mapToken f act)
|
||||||
|
mapToken f Position = Position
|
||||||
|
mapToken f (Act action) = Act action
|
||||||
|
|
||||||
||| Always succeed with the given value.
|
||| Always succeed with the given value.
|
||||||
export
|
export %inline
|
||||||
pure : (val : ty) -> Grammar tok False ty
|
pure : (val : ty) -> Grammar state tok False ty
|
||||||
pure = Empty
|
pure = Empty
|
||||||
|
|
||||||
||| Check whether the next token satisfies a predicate
|
||| Check whether the next token satisfies a predicate
|
||||||
export
|
export %inline
|
||||||
nextIs : String -> (tok -> Bool) -> Grammar tok False tok
|
nextIs : String -> (tok -> Bool) -> Grammar state tok False tok
|
||||||
nextIs = NextIs
|
nextIs = NextIs
|
||||||
|
|
||||||
||| Look at the next token in the input
|
||| Look at the next token in the input
|
||||||
export
|
export %inline
|
||||||
peek : Grammar tok False tok
|
peek : Grammar state tok False tok
|
||||||
peek = nextIs "Unrecognised token" (const True)
|
peek = nextIs "Unrecognised token" (const True)
|
||||||
|
|
||||||
||| Succeeds if running the predicate on the next token returns Just x,
|
||| Succeeds if running the predicate on the next token returns Just x,
|
||||||
||| returning x. Otherwise fails.
|
||| returning x. Otherwise fails.
|
||||||
export
|
export %inline
|
||||||
terminal : String -> (tok -> Maybe a) -> Grammar tok True a
|
terminal : String -> (tok -> Maybe a) -> Grammar state tok True a
|
||||||
terminal = Terminal
|
terminal = Terminal
|
||||||
|
|
||||||
||| Always fail with a message
|
||| Always fail with a message
|
||||||
export
|
export %inline
|
||||||
fail : String -> Grammar tok c ty
|
fail : String -> Grammar state tok c ty
|
||||||
fail = Fail False
|
fail = Fail Nothing False
|
||||||
|
|
||||||
export
|
||| Always fail with a message and a location
|
||||||
fatalError : String -> Grammar tok c ty
|
export %inline
|
||||||
fatalError = Fail True
|
failLoc : Bounds -> String -> Grammar state tok c ty
|
||||||
|
failLoc b = Fail (Just b) False
|
||||||
|
|
||||||
|
||| Fail with no possibility for recovery (i.e.
|
||||||
|
||| no alternative parsing can succeed).
|
||||||
|
export %inline
|
||||||
|
fatalError : String -> Grammar state tok c ty
|
||||||
|
fatalError = Fail Nothing True
|
||||||
|
|
||||||
|
||| Fail with no possibility for recovery (i.e.
|
||||||
|
||| no alternative parsing can succeed).
|
||||||
|
export %inline
|
||||||
|
fatalLoc : Bounds -> String -> Grammar state tok c ty
|
||||||
|
fatalLoc b = Fail (Just b) True
|
||||||
|
|
||||||
|
||| Catch a fatal error
|
||||||
|
export %inline
|
||||||
|
try : Grammar state tok c ty -> Grammar state tok c ty
|
||||||
|
try = Try
|
||||||
|
|
||||||
||| Succeed if the input is empty
|
||| Succeed if the input is empty
|
||||||
export
|
export %inline
|
||||||
eof : Grammar tok False ()
|
eof : Grammar state tok False ()
|
||||||
eof = EOF
|
eof = EOF
|
||||||
|
|
||||||
||| Commit to an alternative; if the current branch of an alternative
|
||| Commit to an alternative; if the current branch of an alternative
|
||||||
||| fails to parse, no more branches will be tried
|
||| fails to parse, no more branches will be tried
|
||||||
export
|
export %inline
|
||||||
commit : Grammar tok False ()
|
commit : Grammar state tok False ()
|
||||||
commit = Commit
|
commit = Commit
|
||||||
|
|
||||||
||| If the parser fails, treat it as a fatal error
|
||| If the parser fails, treat it as a fatal error
|
||||||
export
|
export %inline
|
||||||
mustWork : Grammar tok c ty -> Grammar tok c ty
|
mustWork : {c : Bool} -> Grammar state tok c ty -> Grammar state tok c ty
|
||||||
mustWork = MustWork
|
mustWork = MustWork
|
||||||
|
|
||||||
data ParseResult : List tok -> (consumes : Bool) -> Type -> Type where
|
export %inline
|
||||||
Failure : {xs : List tok} ->
|
bounds : Grammar state tok c ty -> Grammar state tok c (WithBounds ty)
|
||||||
(committed : Bool) -> (fatal : Bool) ->
|
bounds = Bounds
|
||||||
(err : String) -> (rest : List tok) -> ParseResult xs c ty
|
|
||||||
EmptyRes : (committed : Bool) ->
|
|
||||||
(val : ty) -> (more : List tok) -> ParseResult more False ty
|
|
||||||
NonEmptyRes : {xs : List tok} ->
|
|
||||||
(committed : Bool) ->
|
|
||||||
(val : ty) -> (more : List tok) ->
|
|
||||||
ParseResult (x :: xs ++ more) c ty
|
|
||||||
|
|
||||||
-- Take the result of an alternative branch, reset the commit flag to
|
export %inline
|
||||||
-- the commit flag from the outer alternative, and weaken the 'consumes'
|
position : Grammar state tok False Bounds
|
||||||
-- flag to take both alternatives into account
|
position = Position
|
||||||
weakenRes : {whatever, c : Bool} -> {xs : List tok} ->
|
|
||||||
(com' : Bool) ->
|
|
||||||
ParseResult xs c ty -> ParseResult xs (whatever && c) ty
|
|
||||||
weakenRes com' (Failure com fatal msg ts) = Failure com' fatal msg ts
|
|
||||||
weakenRes {whatever=True} com' (EmptyRes com val xs) = EmptyRes com' val xs
|
|
||||||
weakenRes {whatever=False} com' (EmptyRes com val xs) = EmptyRes com' val xs
|
|
||||||
weakenRes com' (NonEmptyRes {xs} com val more) = NonEmptyRes {xs} com' val more
|
|
||||||
|
|
||||||
doParse : (commit : Bool) ->
|
public export
|
||||||
(act : Grammar tok c ty) ->
|
data ParsingError tok = Error String (Maybe Bounds)
|
||||||
(xs : List tok) ->
|
|
||||||
ParseResult xs c ty
|
data ParseResult : Type -> Type -> Type -> Type where
|
||||||
-- doParse com xs act with (sizeAccessible xs)
|
Failure : (committed : Bool) -> (fatal : Bool) ->
|
||||||
doParse com (Empty val) xs = EmptyRes com val xs
|
List1 (ParsingError tok) -> ParseResult state tok ty
|
||||||
doParse com (Fail fatal str) [] = Failure com fatal str []
|
Res : state -> (committed : Bool) ->
|
||||||
doParse com (Fail fatal str) (x :: xs) = Failure com fatal str (x :: xs)
|
(val : WithBounds ty) -> (more : List (WithBounds tok)) -> ParseResult state tok ty
|
||||||
doParse com Commit xs = EmptyRes True () xs
|
|
||||||
doParse com (MustWork g) xs =
|
mergeWith : WithBounds ty -> ParseResult state tok sy -> ParseResult state tok sy
|
||||||
let p' = doParse com g xs in
|
mergeWith x (Res s committed val more) = Res s committed (mergeBounds x val) more
|
||||||
case p' of
|
mergeWith x v = v
|
||||||
Failure com' _ msg ts => Failure com' True msg ts
|
|
||||||
|
doParse : Semigroup state => state -> (commit : Bool) ->
|
||||||
|
(act : Grammar state tok c ty) ->
|
||||||
|
(xs : List (WithBounds tok)) ->
|
||||||
|
ParseResult state tok ty
|
||||||
|
doParse s com (Empty val) xs = Res s com (irrelevantBounds val) xs
|
||||||
|
doParse s com (Fail location fatal str) xs
|
||||||
|
= Failure com fatal (Error str (location <|> (bounds <$> head' xs)) ::: Nil)
|
||||||
|
doParse s com (Try g) xs = case doParse s com g xs of
|
||||||
|
-- recover from fatal match but still propagate the 'commit'
|
||||||
|
Failure com _ errs => Failure com False errs
|
||||||
res => res
|
res => res
|
||||||
doParse com (Terminal err f) [] = Failure com False "End of input" []
|
doParse s com Commit xs = Res s True (irrelevantBounds ()) xs
|
||||||
doParse com (Terminal err f) (x :: xs)
|
doParse s com (MustWork g) xs =
|
||||||
= maybe
|
case assert_total (doParse s com g xs) of
|
||||||
(Failure com False err (x :: xs))
|
Failure com' _ errs => Failure com' True errs
|
||||||
(\a => NonEmptyRes com {xs=[]} a xs)
|
res => res
|
||||||
(f x)
|
doParse s com (Terminal err f) [] = Failure com False (Error "End of input" Nothing ::: Nil)
|
||||||
doParse com EOF [] = EmptyRes com () []
|
doParse s com (Terminal err f) (x :: xs) =
|
||||||
doParse com EOF (x :: xs)
|
case f x.val of
|
||||||
= Failure com False "Expected end of input" (x :: xs)
|
Nothing => Failure com False (Error err (Just x.bounds) ::: Nil)
|
||||||
doParse com (NextIs err f) [] = Failure com False "End of input" []
|
Just a => Res s com (const a <$> x) xs
|
||||||
doParse com (NextIs err f) (x :: xs)
|
doParse s com EOF [] = Res s com (irrelevantBounds ()) []
|
||||||
= if f x
|
doParse s com EOF (x :: xs) = Failure com False (Error "Expected end of input" (Just x.bounds) ::: Nil)
|
||||||
then EmptyRes com x (x :: xs)
|
doParse s com (NextIs err f) [] = Failure com False (Error "End of input" Nothing ::: Nil)
|
||||||
else Failure com False err (x :: xs)
|
doParse s com (NextIs err f) (x :: xs)
|
||||||
doParse com (Alt {c1} {c2} x y) xs
|
= if f x.val
|
||||||
= let p' = doParse False x xs in
|
then Res s com (removeIrrelevance x) (x :: xs)
|
||||||
case p' of
|
else Failure com False (Error err (Just x.bounds) ::: Nil)
|
||||||
Failure com' fatal msg ts
|
doParse s com (Alt {c1} {c2} x y) xs
|
||||||
|
= case doParse s False x xs of
|
||||||
|
Failure com' fatal errs
|
||||||
=> if com' || fatal
|
=> if com' || fatal
|
||||||
-- If the alternative had committed, don't try the
|
-- If the alternative had committed, don't try the
|
||||||
-- other branch (and reset commit flag)
|
-- other branch (and reset commit flag)
|
||||||
then Failure com fatal msg ts
|
then Failure com fatal errs
|
||||||
else weakenRes {whatever = c1} com (doParse False y xs)
|
else case (assert_total doParse s False y xs) of
|
||||||
|
(Failure com'' fatal' errs') => if com'' || fatal'
|
||||||
|
-- Only add the errors together if the second branch
|
||||||
|
-- is also non-committed and non-fatal.
|
||||||
|
then Failure com'' fatal' errs'
|
||||||
|
else Failure False False (errs ++ errs')
|
||||||
|
(Res s _ val xs) => Res s com val xs
|
||||||
-- Successfully parsed the first option, so use the outer commit flag
|
-- Successfully parsed the first option, so use the outer commit flag
|
||||||
EmptyRes _ val xs => EmptyRes com val xs
|
Res s _ val xs => Res s com val xs
|
||||||
NonEmptyRes {xs=xs'} _ val more => NonEmptyRes {xs=xs'} com val more
|
doParse s com (SeqEmpty act next) xs
|
||||||
doParse com (SeqEmpty {c1} {c2} act next) xs
|
= case assert_total (doParse s com act xs) of
|
||||||
= let p' = assert_total (doParse {c = c1} com act xs) in
|
Failure com fatal errs => Failure com fatal errs
|
||||||
case p' of
|
Res s com v xs =>
|
||||||
Failure com fatal msg ts => Failure com fatal msg ts
|
mergeWith v (assert_total $ doParse s com (next v.val) xs)
|
||||||
EmptyRes com val xs => assert_total (doParse com (next val) xs)
|
doParse s com (SeqEat act next) xs
|
||||||
NonEmptyRes {x} {xs=ys} com val more =>
|
= case assert_total (doParse s com act xs) of
|
||||||
case (assert_total (doParse com (next val) more)) of
|
Failure com fatal errs => Failure com fatal errs
|
||||||
Failure com' fatal msg ts => Failure com' fatal msg ts
|
Res s com v xs =>
|
||||||
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
|
mergeWith v (assert_total $ doParse s com (next v.val) xs)
|
||||||
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
|
doParse s com (ThenEmpty act next) xs
|
||||||
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
|
= case assert_total (doParse s com act xs) of
|
||||||
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
|
Failure com fatal errs => Failure com fatal errs
|
||||||
doParse com (SeqEat act next) xs with (doParse com act xs)
|
Res s com v xs =>
|
||||||
doParse com (SeqEat act next) xs | Failure com' fatal msg ts
|
mergeWith v (assert_total $ doParse s com next xs)
|
||||||
= Failure com' fatal msg ts
|
doParse s com (ThenEat act next) xs
|
||||||
doParse com (SeqEat act next) (x :: (ys ++ more)) | (NonEmptyRes {xs=ys} com' val more)
|
= case assert_total (doParse s com act xs) of
|
||||||
= let p' = assert_total (doParse com' (next val) more) in
|
Failure com fatal errs => Failure com fatal errs
|
||||||
case p' of
|
Res s com v xs =>
|
||||||
Failure com' fatal msg ts => Failure com' fatal msg ts
|
mergeWith v (assert_total $ doParse s com next xs)
|
||||||
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
|
doParse s com (Bounds act) xs
|
||||||
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
|
= case assert_total (doParse s com act xs) of
|
||||||
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
|
Failure com fatal errs => Failure com fatal errs
|
||||||
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
|
Res s com v xs => Res s com (const v <$> v) xs
|
||||||
doParse com (ThenEmpty {c1} {c2} act next) xs
|
doParse s com Position [] = Failure com False (Error "End of input" Nothing ::: Nil)
|
||||||
= let p' = assert_total (doParse {c = c1} com act xs) in
|
doParse s com Position (x :: xs)
|
||||||
case p' of
|
= Res s com (irrelevantBounds x.bounds) (x :: xs)
|
||||||
Failure com fatal msg ts => Failure com fatal msg ts
|
doParse s com (Act action) xs = Res (s <+> action) com (irrelevantBounds ()) xs
|
||||||
EmptyRes com val xs => assert_total (doParse com next xs)
|
|
||||||
NonEmptyRes {x} {xs=ys} com val more =>
|
|
||||||
case (assert_total (doParse com next more)) of
|
|
||||||
Failure com' fatal msg ts => Failure com' fatal msg ts
|
|
||||||
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
|
|
||||||
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
|
|
||||||
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
|
|
||||||
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
|
|
||||||
doParse com (ThenEat act next) xs with (doParse com act xs)
|
|
||||||
doParse com (ThenEat act next) xs | Failure com' fatal msg ts
|
|
||||||
= Failure com' fatal msg ts
|
|
||||||
doParse com (ThenEat act next) (x :: (ys ++ more)) | (NonEmptyRes {xs=ys} com' val more)
|
|
||||||
= let p' = assert_total (doParse com' next more) in
|
|
||||||
case p' of
|
|
||||||
Failure com' fatal msg ts => Failure com' fatal msg ts
|
|
||||||
EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
|
|
||||||
NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
|
|
||||||
rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
|
|
||||||
NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
|
|
||||||
|
|
||||||
-- This next line is not strictly necessary, but it stops the coverage
|
|
||||||
-- checker taking a really long time and eating lots of memory...
|
|
||||||
-- doParse _ _ _ = Failure True True "Help the coverage checker!" []
|
|
||||||
|
|
||||||
public export
|
|
||||||
data ParseError tok = Error String (List tok)
|
|
||||||
|
|
||||||
||| Parse a list of tokens according to the given grammar. If successful,
|
||| Parse a list of tokens according to the given grammar. If successful,
|
||||||
||| returns a pair of the parse result and the unparsed tokens (the remaining
|
||| returns a pair of the parse result and the unparsed tokens (the remaining
|
||||||
||| input).
|
||| input).
|
||||||
export
|
export
|
||||||
parse : {c : _} -> (act : Grammar tok c ty) -> (xs : List tok) ->
|
parse : {c : Bool} -> (act : Grammar () tok c ty) -> (xs : List (WithBounds tok)) ->
|
||||||
Either (ParseError tok) (ty, List tok)
|
Either (List1 (ParsingError tok)) (ty, List (WithBounds tok))
|
||||||
parse act xs
|
parse act xs
|
||||||
= case doParse False act xs of
|
= case doParse neutral False act xs of
|
||||||
Failure _ _ msg ts => Left (Error msg ts)
|
Failure _ _ errs => Left errs
|
||||||
EmptyRes _ val rest => pure (val, rest)
|
Res _ _ v rest => Right (v.val, rest)
|
||||||
NonEmptyRes _ val rest => pure (val, rest)
|
|
||||||
|
export
|
||||||
|
parseWith : Monoid state => {c : Bool} -> (act : Grammar state tok c ty) -> (xs : List (WithBounds tok)) ->
|
||||||
|
Either (List1 (ParsingError tok)) (state, ty, List (WithBounds tok))
|
||||||
|
parseWith act xs
|
||||||
|
= case doParse neutral False act xs of
|
||||||
|
Failure _ _ errs => Left errs
|
||||||
|
Res s _ v rest => Right (s, v.val, rest)
|
||||||
|
@ -413,10 +413,6 @@ Pretty Char where
|
|||||||
export Pretty Nat where pretty = unsafeTextWithoutNewLines . show
|
export Pretty Nat where pretty = unsafeTextWithoutNewLines . show
|
||||||
export Pretty Int where pretty = unsafeTextWithoutNewLines . show
|
export Pretty Int where pretty = unsafeTextWithoutNewLines . show
|
||||||
export Pretty Integer where pretty = unsafeTextWithoutNewLines . show
|
export Pretty Integer where pretty = unsafeTextWithoutNewLines . show
|
||||||
export Pretty Int8 where pretty = unsafeTextWithoutNewLines . show
|
|
||||||
export Pretty Int16 where pretty = unsafeTextWithoutNewLines . show
|
|
||||||
export Pretty Int32 where pretty = unsafeTextWithoutNewLines . show
|
|
||||||
export Pretty Int64 where pretty = unsafeTextWithoutNewLines . show
|
|
||||||
export Pretty Double where pretty = unsafeTextWithoutNewLines . show
|
export Pretty Double where pretty = unsafeTextWithoutNewLines . show
|
||||||
export Pretty Bits8 where pretty = unsafeTextWithoutNewLines . show
|
export Pretty Bits8 where pretty = unsafeTextWithoutNewLines . show
|
||||||
export Pretty Bits16 where pretty = unsafeTextWithoutNewLines . show
|
export Pretty Bits16 where pretty = unsafeTextWithoutNewLines . show
|
||||||
|
@ -3,6 +3,8 @@ module Text.PrettyPrint.Prettyprinter.Render.HTML
|
|||||||
import Data.List
|
import Data.List
|
||||||
import Data.String
|
import Data.String
|
||||||
|
|
||||||
|
%default covering
|
||||||
|
|
||||||
export
|
export
|
||||||
htmlEscape : String -> String
|
htmlEscape : String -> String
|
||||||
htmlEscape s = fastAppend $ reverse $ go [] s
|
htmlEscape s = fastAppend $ reverse $ go [] s
|
||||||
|
@ -6,7 +6,7 @@ import public Control.ANSI
|
|||||||
import Control.Monad.ST
|
import Control.Monad.ST
|
||||||
import Text.PrettyPrint.Prettyprinter.Doc
|
import Text.PrettyPrint.Prettyprinter.Doc
|
||||||
|
|
||||||
%default total
|
%default covering
|
||||||
|
|
||||||
public export
|
public export
|
||||||
AnsiStyle : Type
|
AnsiStyle : Type
|
||||||
|
@ -68,6 +68,7 @@ sdocToTreeParser (SLine i rest) = case sdocToTreeParser rest of
|
|||||||
(Just tree, rest') => (Just $ STConcat [STLine i, tree], rest')
|
(Just tree, rest') => (Just $ STConcat [STLine i, tree], rest')
|
||||||
(Nothing, rest') => (Just $ STLine i, rest')
|
(Nothing, rest') => (Just $ STLine i, rest')
|
||||||
sdocToTreeParser (SAnnPush ann rest) = case sdocToTreeParser rest of
|
sdocToTreeParser (SAnnPush ann rest) = case sdocToTreeParser rest of
|
||||||
|
(tree, Nothing) => (Nothing, Nothing)
|
||||||
(Just tree, Nothing) => (Just $ STAnn ann tree, Nothing)
|
(Just tree, Nothing) => (Just $ STAnn ann tree, Nothing)
|
||||||
(Just tree, Just rest') => case sdocToTreeParser rest' of
|
(Just tree, Just rest') => case sdocToTreeParser rest' of
|
||||||
(Just tree', rest'') => (Just $ STConcat [STAnn ann tree, tree'], rest'')
|
(Just tree', rest'') => (Just $ STConcat [STAnn ann tree, tree'], rest'')
|
||||||
|
@ -101,8 +101,10 @@ angles : Doc ann -> Doc ann
|
|||||||
angles = enclose langle rangle
|
angles = enclose langle rangle
|
||||||
|
|
||||||
export
|
export
|
||||||
brackets : Doc ann -> Doc ann
|
brackets : {default lbracket ldelim : Doc ann} ->
|
||||||
brackets = enclose lbracket rbracket
|
{default rbracket rdelim : Doc ann} ->
|
||||||
|
Doc ann -> Doc ann
|
||||||
|
brackets {ldelim, rdelim} = enclose ldelim rdelim
|
||||||
|
|
||||||
export
|
export
|
||||||
braces : Doc ann -> Doc ann
|
braces : Doc ann -> Doc ann
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
module Text.Quantity
|
module Text.Quantity
|
||||||
|
|
||||||
|
%default total
|
||||||
|
|
||||||
public export
|
public export
|
||||||
record Quantity where
|
record Quantity where
|
||||||
constructor Qty
|
constructor Qty
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
module Text.Token
|
module Text.Token
|
||||||
|
|
||||||
|
%default total
|
||||||
|
|
||||||
||| For a type `kind`, specify a way of converting the recognised
|
||| For a type `kind`, specify a way of converting the recognised
|
||||||
||| string into a value.
|
||| string into a value.
|
||||||
|||
|
|||
|
||||||
|
@ -159,17 +159,20 @@ modules = Control.ANSI,
|
|||||||
System.Random,
|
System.Random,
|
||||||
System.Path,
|
System.Path,
|
||||||
|
|
||||||
Text.Token,
|
Text.Bounded,
|
||||||
Text.Quantity,
|
|
||||||
Text.Parser,
|
|
||||||
Text.Lexer,
|
Text.Lexer,
|
||||||
Text.Parser.Core,
|
|
||||||
Text.Lexer.Core,
|
Text.Lexer.Core,
|
||||||
|
Text.Lexer.Tokenizer,
|
||||||
|
Text.Literate,
|
||||||
|
Text.Parser,
|
||||||
|
Text.Parser.Core,
|
||||||
Text.PrettyPrint.Prettyprinter,
|
Text.PrettyPrint.Prettyprinter,
|
||||||
Text.PrettyPrint.Prettyprinter.Doc,
|
Text.PrettyPrint.Prettyprinter.Doc,
|
||||||
Text.PrettyPrint.Prettyprinter.Symbols,
|
|
||||||
Text.PrettyPrint.Prettyprinter.Util,
|
|
||||||
Text.PrettyPrint.Prettyprinter.SimpleDocTree,
|
|
||||||
Text.PrettyPrint.Prettyprinter.Render.HTML,
|
Text.PrettyPrint.Prettyprinter.Render.HTML,
|
||||||
Text.PrettyPrint.Prettyprinter.Render.String,
|
Text.PrettyPrint.Prettyprinter.Render.String,
|
||||||
Text.PrettyPrint.Prettyprinter.Render.Terminal
|
Text.PrettyPrint.Prettyprinter.Render.Terminal,
|
||||||
|
Text.PrettyPrint.Prettyprinter.SimpleDocTree,
|
||||||
|
Text.PrettyPrint.Prettyprinter.Symbols,
|
||||||
|
Text.PrettyPrint.Prettyprinter.Util,
|
||||||
|
Text.Quantity,
|
||||||
|
Text.Token
|
||||||
|
Loading…
Reference in New Issue
Block a user