Update contrib Text.Parser to match Library.Text.Parser (#1808)

Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org>
2025-01-03 00:55:00 +03:00 · 2021-08-06 09:03:13 +00:00 · 2021-08-06 09:03:13 +00:00 · f3855d7100
commit f3855d7100
parent ca95781a74
24 changed files with 1054 additions and 437 deletions
--- a/libs/contrib/Data/String/Extra.idr
+++ b/libs/contrib/Data/String/Extra.idr
@ -7,7 +7,6 @@ import Data.String

 %default total

-
 infixl 5 +>
 infixr 5 <+

@ -94,20 +93,14 @@ public export
 indentLines : (n : Nat) -> String -> String
 indentLines n str = unlines $ map (indent n) $ lines str

-||| Return a string of the given character repeated
-||| `n` times.
-export
-fastReplicate : (n : Nat) -> Char -> String
-fastReplicate n c = fastPack $ replicate n c
-
 ||| Left-justify a string to the given length, using the
 ||| specified fill character on the right.
 export
 justifyLeft : Nat -> Char -> String -> String
-justifyLeft n c s = s ++ fastReplicate (n `minus` length s) c
+justifyLeft n c s = s ++ replicate (n `minus` length s) c

 ||| Right-justify a string to the given length, using the
 ||| specified fill character on the left.
 export
 justifyRight : Nat -> Char -> String -> String
-justifyRight n c s = fastReplicate (n `minus` length s) c ++ s
+justifyRight n c s = replicate (n `minus` length s) c ++ s
--- a/libs/contrib/Language/JSON.idr
+++ b/libs/contrib/Language/JSON.idr
@ -6,6 +6,8 @@ import Language.JSON.Parser

 import public Language.JSON.Data

+import public Text.Bounded
+
 %default total

 ||| Parse a JSON string.
--- a/libs/contrib/Language/JSON/Lexer.idr
+++ b/libs/contrib/Language/JSON/Lexer.idr
@ -34,8 +34,8 @@ jsonTokenMap = toTokenMap $
  ]

 export
-lexJSON : String -> Maybe (List JSONToken)
+lexJSON : String -> Maybe (List (WithBounds JSONToken))
 lexJSON str
  = case lex jsonTokenMap str of
-         (tokens, _, _, "") => Just $ map TokenData.tok tokens
+         (tokens, _, _, "") => Just $ tokens
         _ => Nothing
--- a/libs/contrib/Language/JSON/Parser.idr
+++ b/libs/contrib/Language/JSON/Parser.idr
@ -10,20 +10,20 @@ import public Language.JSON.Tokens
 %default total

 private
-punct : Punctuation -> Grammar JSONToken True ()
+punct : Punctuation -> Grammar state JSONToken True ()
 punct p = match $ JTPunct p

 private
-rawString : Grammar JSONToken True String
+rawString : Grammar state JSONToken True String
 rawString = do mstr <- match JTString
-               the (Grammar _ False _) $
+               the (Grammar _ _ False _) $
                   case mstr of
                        Just str => pure str
                        Nothing => fail "invalid string"

 mutual
  private
-  json : Grammar JSONToken True JSON
+  json : Grammar state JSONToken True JSON
  json = object
     <|> array
     <|> string
@ -32,14 +32,14 @@ mutual
     <|> null

  private
-  object : Grammar JSONToken True JSON
+  object : Grammar state JSONToken True JSON
  object = do punct $ Curly Open
              commit
              props <- properties
              punct $ Curly Close
              pure $ JObject props
    where
-      properties : Grammar JSONToken False (List (String, JSON))
+      properties : Grammar state JSONToken False (List (String, JSON))
      properties = sepBy (punct Comma) $
                         do key <- rawString
                            punct Colon
@ -47,34 +47,34 @@ mutual
                            pure (key, value)

  private
-  array : Grammar JSONToken True JSON
+  array : Grammar state JSONToken True JSON
  array = do punct (Square Open)
             commit
             vals <- values
             punct (Square Close)
             pure (JArray vals)
    where
-      values : Grammar JSONToken False (List JSON)
+      values : Grammar state JSONToken False (List JSON)
      values = sepBy (punct Comma) json

  private
-  string : Grammar JSONToken True JSON
+  string : Grammar state JSONToken True JSON
  string = map JString rawString

  private
-  boolean : Grammar JSONToken True JSON
+  boolean : Grammar state JSONToken True JSON
  boolean = map JBoolean $ match JTBoolean

  private
-  number : Grammar JSONToken True JSON
+  number : Grammar state JSONToken True JSON
  number = map JNumber $ match JTNumber

  private
-  null : Grammar JSONToken True JSON
+  null : Grammar state JSONToken True JSON
  null = map (const JNull) $ match JTNull

 export
-parseJSON : List JSONToken -> Maybe JSON
+parseJSON : List (WithBounds JSONToken) -> Maybe JSON
 parseJSON toks = case parse json $ filter (not . ignored) toks of
                      Right (j, []) => Just j
                      _ => Nothing
--- a/libs/contrib/Language/JSON/String/Lexer.idr
+++ b/libs/contrib/Language/JSON/String/Lexer.idr
@ -12,7 +12,7 @@ quo = is '"'

 export
 esc : Lexer -> Lexer
-esc = escape '\\'
+esc = escape (is '\\')

 private
 unicodeEscape : Lexer
@ -36,7 +36,7 @@ jsonStringTokenMap = toTokenMap $
  ]

 export
-lexString : String -> Maybe (List JSONStringToken)
+lexString : String -> Maybe (List (WithBounds JSONStringToken))
 lexString x = case lex jsonStringTokenMap x of
-                   (toks, _, _, "") => Just $ map TokenData.tok toks
+                   (toks, _, _, "") => Just $ toks
                   _ => Nothing
--- a/libs/contrib/Language/JSON/String/Parser.idr
+++ b/libs/contrib/Language/JSON/String/Parser.idr
@ -7,20 +7,20 @@ import Text.Parser
 %default total

 private
-stringChar : Grammar JSONStringToken True Char
+stringChar : Grammar state JSONStringToken True Char
 stringChar = match JSTChar
         <|> match JSTSimpleEscape
         <|> match JSTUnicodeEscape

 private
-quotedString : Grammar JSONStringToken True String
+quotedString : Grammar state JSONStringToken True String
 quotedString = let q = match JSTQuote in
                   do chars <- between q q (many stringChar)
                      eof
                      pure $ pack chars

 export
-parseString : List JSONStringToken -> Maybe String
+parseString : List (WithBounds JSONStringToken) -> Maybe String
 parseString toks = case parse quotedString toks of
                        Right (str, []) => Just str
                        _ => Nothing
--- a/libs/contrib/Language/JSON/Tokens.idr
+++ b/libs/contrib/Language/JSON/Tokens.idr
@ -2,6 +2,7 @@ module Language.JSON.Tokens

 import Language.JSON.String
 import Text.Token
+import Text.Bounded

 %default total

@ -76,6 +77,6 @@ TokenKind JSONTokenKind where
  tokValue JTIgnore _ = ()

 export
-ignored : JSONToken -> Bool
-ignored (Tok JTIgnore _) = True
+ignored : WithBounds JSONToken -> Bool
+ignored (MkBounded (Tok JTIgnore _) _ _) = True
 ignored _ = False
--- a/libs/contrib/System/Path.idr
+++ b/libs/contrib/System/Path.idr
@ -153,15 +153,15 @@ pathTokenMap = toTokenMap $
  , (some $ non $ oneOf "/\\:?", PTText)
  ]

-lexPath : String -> List PathToken
+lexPath : String -> List (WithBounds PathToken)
 lexPath str =
  let
    (tokens, _, _, _) = lex pathTokenMap str
  in
-    map TokenData.tok tokens
+    tokens -- TokenData.tok tokens

 -- match both '/' and '\\' regardless of the platform.
-bodySeparator : Grammar PathToken True ()
+bodySeparator : Grammar state PathToken True ()
 bodySeparator = (match $ PTPunct '\\') <|> (match $ PTPunct '/')

 -- Windows will automatically translate '/' to '\\'. And the verbatim prefix,
@ -169,7 +169,7 @@ bodySeparator = (match $ PTPunct '\\') <|> (match $ PTPunct '/')
 -- However, we just parse it and ignore it.
 --
 -- Example: \\?\
-verbatim : Grammar PathToken True ()
+verbatim : Grammar state PathToken True ()
 verbatim =
  do
    ignore $ count (exactly 2) $ match $ PTPunct '\\'
@ -178,7 +178,7 @@ verbatim =
    pure ()

 -- Example: \\server\share
-unc : Grammar PathToken True Volume
+unc : Grammar state PathToken True Volume
 unc =
  do
    ignore $ count (exactly 2) $ match $ PTPunct '\\'
@ -188,7 +188,7 @@ unc =
    pure $ UNC server share

 -- Example: \\?\server\share
-verbatimUnc : Grammar PathToken True Volume
+verbatimUnc : Grammar state PathToken True Volume
 verbatimUnc =
  do
    verbatim
@ -198,7 +198,7 @@ verbatimUnc =
    pure $ UNC server share

 -- Example: C:
-disk : Grammar PathToken True Volume
+disk : Grammar state PathToken True Volume
 disk =
  do
    text <- match PTText
@ -209,21 +209,21 @@ disk =
    pure $ Disk (toUpper disk)

 -- Example: \\?\C:
-verbatimDisk : Grammar PathToken True Volume
+verbatimDisk : Grammar state PathToken True Volume
 verbatimDisk =
  do
    verbatim
    disk <- disk
    pure disk

-parseVolume : Grammar PathToken True Volume
+parseVolume : Grammar state PathToken True Volume
 parseVolume =
      verbatimUnc
  <|> verbatimDisk
  <|> unc
  <|> disk

-parseBody : Grammar PathToken True Body
+parseBody : Grammar state PathToken True Body
 parseBody =
  do
    text <- match PTText
@ -232,7 +232,7 @@ parseBody =
      "." => CurDir
      normal => Normal normal

-parsePath : Grammar PathToken False Path
+parsePath : Grammar state PathToken False Path
 parsePath =
  do
    vol <- optional parseVolume
--- a/libs/contrib/Text/Bounded.idr
+++ b/libs/contrib/Text/Bounded.idr
@ -0,0 +1,91 @@
+module Text.Bounded
+
+%default total
+
+
+public export
+record Bounds where
+  constructor MkBounds
+  startLine : Int
+  startCol : Int
+  endLine : Int
+  endCol : Int
+
+Show Bounds where
+  showPrec d (MkBounds sl sc el ec) =
+    showCon d "MkBounds" (concat [showArg sl, showArg sc, showArg el, showArg ec])
+
+export
+startBounds : Bounds -> (Int, Int)
+startBounds b = (b.startLine, b.startCol)
+
+export
+endBounds : Bounds -> (Int, Int)
+endBounds b = (b.endLine, b.endCol)
+
+export
+Eq Bounds where
+  (MkBounds sl sc el ec) == (MkBounds sl' sc' el' ec') =
+      sl == sl'
+   && sc == sc'
+   && el == el'
+   && ec == ec'
+
+public export
+record WithBounds ty where
+  constructor MkBounded
+  val : ty
+  isIrrelevant : Bool
+  bounds : Bounds
+
+export
+start : WithBounds ty -> (Int, Int)
+start = startBounds . bounds
+
+export
+end : WithBounds ty -> (Int, Int)
+end = endBounds . bounds
+
+export
+Eq ty => Eq (WithBounds ty) where
+  (MkBounded val ir bd) == (MkBounded val' ir' bd') =
+    val == val' && ir == ir' && bd == bd'
+
+export
+Show ty => Show (WithBounds ty) where
+  showPrec d (MkBounded val ir bd) =
+    showCon d "MkBounded" (concat [showArg ir, showArg val, showArg bd])
+
+export
+Functor WithBounds where
+  map f (MkBounded val ir bd) = MkBounded (f val) ir bd
+
+export
+Foldable WithBounds where
+  foldr c n b = c b.val n
+
+export
+Traversable WithBounds where
+  traverse f (MkBounded v a bd) = (\ v => MkBounded v a bd) <$> f v
+
+export
+irrelevantBounds : ty -> WithBounds ty
+irrelevantBounds x = MkBounded x True (MkBounds (-1) (-1) (-1) (-1))
+
+export
+removeIrrelevance : WithBounds ty -> WithBounds ty
+removeIrrelevance (MkBounded val ir bd) = MkBounded val True bd
+
+export
+mergeBounds : WithBounds ty -> WithBounds ty' -> WithBounds ty'
+mergeBounds (MkBounded _ True _) (MkBounded val True _) = irrelevantBounds val
+mergeBounds (MkBounded _ True _) b2 = b2
+mergeBounds b1 (MkBounded val True _) = const val <$> b1
+mergeBounds b1 b2 =
+  let (ur, uc) = min (start b1) (start b2)
+      (lr, lc) = max (end b1) (end b2) in
+      MkBounded b2.val False (MkBounds ur uc lr lc)
+
+export
+joinBounds : WithBounds (WithBounds ty) -> WithBounds ty
+joinBounds b = mergeBounds b b.val
--- a/libs/contrib/Text/Distance/Levenshtein.idr
+++ b/libs/contrib/Text/Distance/Levenshtein.idr
@ -0,0 +1,69 @@
+module Text.Distance.Levenshtein
+
+import Data.List
+import Data.Maybe
+import Data.String
+import Data.IOMatrix
+import Data.List.Extra
+
+%default total
+
+||| Self-evidently correct but O(3 ^ (min mn)) complexity
+spec : String -> String -> Nat
+spec a b = loop (fastUnpack a) (fastUnpack b) where
+
+  loop : List Char -> List Char -> Nat
+  loop [] ys = length ys -- deletions
+  loop xs [] = length xs -- insertions
+  loop (x :: xs) (y :: ys)
+    = if x == y then loop xs ys -- match
+      else 1 + minimum [ loop (x :: xs) ys -- insert y
+                       , loop xs (y :: ys) -- delete x
+                       , loop xs ys        -- substitute y for x
+                       ]
+
+||| Dynamic programming
+export
+compute : HasIO io => String -> String -> io Nat
+compute a b = do
+  let w = strLength a
+  let h = strLength b
+  -- In mat[i][j], we store the distance between
+  -- * the suffix of a of size i
+  -- * the suffix of b of size j
+  -- So we need a matrix of size (|a|+1) * (|b|+1)
+  mat <- new (w+1) (h+1)
+  -- Whenever one of the two suffixes of interest is empty, the only
+  -- winning move is to:
+  -- * delete all of the first
+  -- * insert all of the second
+  -- i.e. the cost is the length of the non-zero suffix
+  for_ [0..w] $ \ i => write mat i 0 i -- deletions
+  for_ [0..h] $ \ j => write mat 0 j j -- insertions
+
+  -- We introduce a specialised `read` for ease of use
+  let get = \i, j => case !(read {io} mat i j) of
+        Nothing => assert_total $
+          idris_crash "INTERNAL ERROR: Badly initialised matrix"
+        Just n => pure n
+
+  -- We fill the matrix from the bottom up, using the same formula we used
+  -- in the specification's `loop`.
+  for_ [1..h] $ \ j => do
+    for_ [1..w] $ \ i => do
+      -- here we change Levenshtein slightly so that we may only substitute
+      -- alpha / numerical characters for similar ones. This avoids suggesting
+      -- "#" as a replacement for an out of scope "n".
+      let cost = let c = assert_total $ strIndex a (i-1)
+                     d = assert_total $ strIndex b (j-1)
+                 in if c == d then 0 else
+                    if isAlpha c && isAlpha d then 1 else
+                    if isDigit c && isDigit d then 1 else 2
+      write mat i j $
+        minimum [ 1    + !(get i (j-1))     -- insert y
+                , 1    + !(get (i-1) j)     -- delete x
+                , cost + !(get (i-1) (j-1)) -- equal or substitute y for x
+                ]
+
+  -- Once the matrix is fully filled, we can simply read the top right corner
+  integerToNat . cast <$> get w h
--- a/libs/contrib/Text/Lexer.idr
+++ b/libs/contrib/Text/Lexer.idr
@ -8,6 +8,8 @@ import public Text.Lexer.Core
 import public Text.Quantity
 import public Text.Token

+%default total
+
 export
 toTokenMap : List (Lexer, k) -> TokenMap (Token k)
 toTokenMap = map $ \(l, kind) => (l, Tok kind)
@ -37,22 +39,22 @@ non l = reject l <+> any
 export
 choiceMap : {c : Bool} ->
            Foldable t => (a -> Recognise c) -> t a -> Recognise c
-choiceMap f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
-                                           f x <|> acc)
-                       fail xs
+choiceMap {c} f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
+                                               f x <|> acc)
+                           fail xs

 ||| Recognise the first matching recogniser in a container. Consumes input if
 ||| recognisers in the list consume. Fails if the container is empty.
 export
 choice : {c : _} ->
         Foldable t => t (Recognise c) -> Recognise c
-choice = Lexer.choiceMap id
+choice = choiceMap id

 ||| Sequence a list of recognisers. Guaranteed to consume input if the list is
 ||| non-empty and the recognisers consume.
 export
 concat : {c : _} ->
-         (xs : List (Recognise c)) -> Recognise (c && isCons xs)
+         (xs : List (Recognise c)) -> Recognise (isCons xs && c)
 concat = Lexer.Core.concatMap id

 ||| Recognise a specific character.
@ -124,6 +126,13 @@ mutual
  many : Lexer -> Recognise False
  many l = opt (some l)

+||| Repeat the sub-lexer `l` one or more times until the lexer
+||| `stopBefore` is encountered. `stopBefore` will not be consumed.
+||| /((?!`stopBefore`)`l`)\+/
+export
+someUntil : (stopBefore : Recognise c) -> (l : Lexer) -> Lexer
+someUntil stopBefore l = some (reject stopBefore <+> l)
+
 ||| Repeat the sub-lexer `l` zero or more times until the lexer
 ||| `stopBefore` is encountered. `stopBefore` will not be consumed.
 ||| Not guaranteed to consume input.
@ -140,14 +149,6 @@ export
 manyThen : (stopAfter : Recognise c) -> (l : Lexer) -> Recognise c
 manyThen stopAfter l = manyUntil stopAfter l <+> stopAfter

-||| Recognise many instances of `l` until an instance of `end` is
-||| encountered.
-|||
-||| Useful for defining comments.
-export
-manyTill : (l : Lexer) -> (end : Lexer) -> Recognise False
-manyTill l end = end <|> opt (l <+> manyTill l end)
-
 ||| Recognise a sub-lexer repeated as specified by `q`. Fails if `q` has
 ||| `min` and `max` in the wrong order. Consumes input unless `min q` is zero.
 ||| /`l`{`q`}/
@ -172,6 +173,18 @@ export
 digits : Lexer
 digits = some digit

+||| Recognise a single binary digit
+||| /[0-1]/
+export
+binDigit : Lexer
+binDigit = pred (\c => c == '0' || c == '1')
+
+||| Recognise one or more binary digits
+||| /[0-1]+/
+export
+binDigits : Lexer
+binDigits = some binDigit
+
 ||| Recognise a single hexidecimal digit
 ||| /[0-9A-Fa-f]/
 export
@ -188,13 +201,13 @@ hexDigits = some hexDigit
 ||| /[0-8]/
 export
 octDigit : Lexer
-octDigit = pred isHexDigit
+octDigit = pred isOctDigit

 ||| Recognise one or more octal digits
 ||| /[0-8]+/
 export
 octDigits : Lexer
-octDigits = some hexDigit
+octDigits = some octDigit

 ||| Recognise a single alpha character
 ||| /[A-Za-z]/
@ -307,18 +320,19 @@ export
 quote : (q : Lexer) -> (l : Lexer) -> Lexer
 quote q l = surround q q l

-||| Recognise an escape character (often '\\') followed by a sub-lexer
+||| Recognise an escape sub-lexer (often '\\') followed by
+||| another sub-lexer
 ||| /[`esc`]`l`/
 export
-escape : (esc : Char) -> Lexer -> Lexer
-escape esc l = is esc <+> l
+escape : (esc : Lexer) -> Lexer -> Lexer
+escape esc l = esc <+> l

 ||| Recognise a string literal, including escaped characters.
 ||| (Note: doesn't yet handle escape sequences such as \123)
 ||| /"(\\\\.|.)\*?"/
 export
 stringLit : Lexer
-stringLit = quote (is '"') (escape '\\' any <|> any)
+stringLit = quote (is '"') (escape (is '\\') any <|> any)

 ||| Recognise a character literal, including escaped characters.
 ||| (Note: doesn't yet handle escape sequences such as \123)
@ -326,7 +340,7 @@ stringLit = quote (is '"') (escape '\\' any <|> any)
 export
 charLit : Lexer
 charLit = let q = '\'' in
-              is q <+> (escape '\\' (control <|> any) <|> isNot q) <+> is q
+              is q <+> (escape (is '\\') (control <|> any) <|> isNot q) <+> is q
  where
    lexStr : List String -> Lexer
    lexStr [] = fail
@ -348,12 +362,40 @@ export
 intLit : Lexer
 intLit = opt (is '-') <+> digits

+||| Recognise a binary literal, prefixed by "0b"
+||| /0b[0-1]+/
+export
+binLit : Lexer
+binLit = exact "0b" <+> binDigits
+
 ||| Recognise a hexidecimal literal, prefixed by "0x" or "0X"
 ||| /0[Xx][0-9A-Fa-f]+/
 export
 hexLit : Lexer
 hexLit = approx "0x" <+> hexDigits

+||| Recognise an octal literal, prefixed by "0o"
+||| /0o[0-9A-Fa-f]+/
+export
+octLit : Lexer
+octLit = exact "0o" <+> octDigits
+
+export
+digitsUnderscoredLit : Lexer
+digitsUnderscoredLit = digits <+> many (is '_' <+> digits)
+
+export
+binUnderscoredLit : Lexer
+binUnderscoredLit = binLit <+> many (is '_' <+> binDigits)
+
+export
+hexUnderscoredLit : Lexer
+hexUnderscoredLit = hexLit <+> many (is '_' <+> hexDigits)
+
+export
+octUnderscoredLit : Lexer
+octUnderscoredLit = octLit <+> many (is '_' <+> octDigits)
+
 ||| Recognise `start`, then recognise all input until a newline is encountered,
 ||| and consume the newline. Will succeed if end-of-input is encountered before
 ||| a newline.
--- a/libs/contrib/Text/Lexer/Core.idr
+++ b/libs/contrib/Text/Lexer/Core.idr
@ -1,15 +1,19 @@
 module Text.Lexer.Core

-import public Control.Delayed
-import Data.Bool
 import Data.List
+import Data.Maybe
 import Data.Nat
 import Data.String

+import public Control.Delayed
+import public Text.Bounded
+
+%default total
+
 ||| A language of token recognisers.
 ||| @ consumes If `True`, this recogniser is guaranteed to consume at
 |||            least one character of input when it succeeds.
-public export
+export
 data Recognise : (consumes : Bool) -> Type where
     Empty : Recognise False
     Fail : Recognise c
@ -17,6 +21,7 @@ data Recognise : (consumes : Bool) -> Type where
     Pred : (Char -> Bool) -> Recognise True
     SeqEat : Recognise True -> Inf (Recognise e) -> Recognise True
     SeqEmpty : Recognise e1 -> Recognise e2 -> Recognise (e1 || e2)
+     SeqSame : Recognise e -> Recognise e -> Recognise e
     Alt : Recognise e1 -> Recognise e2 -> Recognise (e1 && e2)

 ||| A token recogniser. Guaranteed to consume at least one character.
@ -24,8 +29,6 @@ public export
 Lexer : Type
 Lexer = Recognise True

-- %allow_overloads (<+>)
-
 ||| Sequence two recognisers. If either consumes a character, the sequence
 ||| is guaranteed to consume a character.
 export %inline
@ -34,11 +37,8 @@ export %inline
 (<+>) {c1 = False} = SeqEmpty
 (<+>) {c1 = True} = SeqEat

-%allow_overloads (<|>)
-
 ||| Alternative recognisers. If both consume, the combination is guaranteed
-||| to consumer a character.
-%inline
+||| to consume a character.
 export
 (<|>) : Recognise c1 -> Recognise c2 -> Recognise (c1 && c2)
 (<|>) = Alt
@ -49,13 +49,11 @@ fail : Recognise c
 fail = Fail

 ||| Recognise no input (doesn't consume any input)
-%inline
 export
 empty : Recognise False
 empty = Empty

 ||| Recognise a character that matches a predicate
-%inline
 export
 pred : (Char -> Bool) -> Lexer
 pred = Pred
@ -70,33 +68,25 @@ export
 reject : Recognise c -> Recognise False
 reject = Lookahead False

-%allow_overloads concatMap
-
 ||| Sequence the recognisers resulting from applying a function to each element
 ||| of a list. The resulting recogniser will consume input if the produced
 ||| recognisers consume and the list is non-empty.
 export
-concatMap : (a -> Recognise c) -> (xs : List a) ->
-            Recognise (c && (isCons xs))
-concatMap _ [] = rewrite andFalseFalse c in Empty
-concatMap f (x :: xs)
-   = rewrite andTrueNeutral c in
-     rewrite sym (orSameAndRightNeutral c (isCons xs)) in
-             SeqEmpty (f x) (Core.concatMap f xs)
+concatMap : (a -> Recognise c) -> (xs : List a) -> Recognise (isCons xs && c)
+concatMap _ []                 = Empty
+concatMap f (x :: [])          = f x
+concatMap f (x :: xs@(_ :: _)) = SeqSame (f x) (concatMap f xs)

 data StrLen : Type where
     MkStrLen : String -> Nat -> StrLen

-Show StrLen where
-  show (MkStrLen str n) = str ++ "(" ++ show n ++ ")"
-
 getString : StrLen -> String
 getString (MkStrLen str n) = str

 strIndex : StrLen -> Nat -> Maybe Char
 strIndex (MkStrLen str len) i
    = if cast {to = Integer} i >= cast len then Nothing
-                  else Just (assert_total (prim__strIndex str (cast i)))
+      else Just (assert_total (prim__strIndex str (cast i)))

 mkStr : String -> StrLen
 mkStr str = MkStrLen str (length str)
@ -105,19 +95,9 @@ strTail : Nat -> StrLen -> StrLen
 strTail start (MkStrLen str len)
    = MkStrLen (substr start len str) (minus len start)

-isJust : Maybe a -> Bool
-isJust Nothing = False
-isJust (Just x) = True
-
-export
-unpack' : String -> List Char
-unpack' str
-    = case strUncons str of
-           Nothing => []
-           Just (x, xs) => x :: unpack' xs
-
 -- If the string is recognised, returns the index at which the token
 -- ends
+export
 scan : Recognise c -> List Char -> List Char -> Maybe (List Char, List Char)
 scan Empty tok str = pure (tok, str)
 scan Fail tok str = Nothing
@ -137,6 +117,9 @@ scan (SeqEat r1 r2) tok str
 scan (SeqEmpty r1 r2) tok str
    = do (tok', rest) <- scan r1 tok str
         scan r2 tok' rest
+scan (SeqSame r1 r2) tok str
+    = do (tok', rest) <- scan r1 tok str
+         scan r2 tok' rest
 scan (Alt r1 r2) tok str
    = maybe (scan r2 tok str) Just (scan r1 tok str)

@ -148,27 +131,15 @@ public export
 TokenMap : (tokenType : Type) -> Type
 TokenMap tokenType = List (Lexer, String -> tokenType)

-||| A token, and the line and column where it was in the input
-public export
-record TokenData a where
-  constructor MkToken
-  line : Int
-  col : Int
-  tok : a
-
-export
-Show a => Show (TokenData a) where
-  show t = show (line t) ++ ":" ++ show (col t) ++ ":" ++ show (tok t)
-
-tokenise : (TokenData a -> Bool) ->
+tokenise : (a -> Bool) ->
           (line : Int) -> (col : Int) ->
-           List (TokenData a) -> TokenMap a ->
-           List Char -> (List (TokenData a), (Int, Int, List Char))
+           List (WithBounds a) -> TokenMap a ->
+           List Char -> (List (WithBounds a), (Int, Int, List Char))
 tokenise pred line col acc tmap str
    = case getFirstToken tmap str of
           Just (tok, line', col', rest) =>
           -- assert total because getFirstToken must consume something
-               if pred tok
+               if pred tok.val
                  then (reverse acc, (line, col, []))
                  else assert_total (tokenise pred line' col' (tok :: acc) tmap rest)
           Nothing => (reverse acc, (line, col, str))
@ -183,13 +154,15 @@ tokenise pred line col acc tmap str
                (incol, _) => cast (length incol)

    getFirstToken : TokenMap a -> List Char ->
-                    Maybe (TokenData a, Int, Int, List Char)
+                    Maybe (WithBounds a, Int, Int, List Char)
    getFirstToken [] str = Nothing
    getFirstToken ((lex, fn) :: ts) str
        = case scan lex [] str of
-               Just (tok, rest) => Just (MkToken line col (fn (pack (reverse tok))),
-                                         line + cast (countNLs tok),
-                                         getCols tok col, rest)
+               Just (tok, rest) =>
+                 let line' = line + cast (countNLs tok)
+                     col' = getCols tok col in
+                     Just (MkBounded (fn (fastPack (reverse tok))) False (MkBounds line col line' col'),
+                           line', col', rest)
               Nothing => getFirstToken ts str

 ||| Given a mapping from lexers to token generating functions (the
@ -197,14 +170,14 @@ tokenise pred line col acc tmap str
 ||| and the line, column, and remainder of the input at the first point in the
 ||| string where there are no recognised tokens.
 export
-lex : TokenMap a -> String -> (List (TokenData a), (Int, Int, String))
+lex : TokenMap a -> String -> (List (WithBounds a), (Int, Int, String))
 lex tmap str
-    = let (ts, (l, c, str')) = tokenise (const False) 0 0 [] tmap (fastUnpack str) in
-          (ts, (l, c, pack str'))
+    = let (ts, (l, c, str')) = tokenise (const False) 0 0 [] tmap (unpack str) in
+          (ts, (l, c, fastPack str'))

 export
-lexTo : (TokenData a -> Bool) ->
-        TokenMap a -> String -> (List (TokenData a), (Int, Int, String))
+lexTo : (a -> Bool) ->
+        TokenMap a -> String -> (List (WithBounds a), (Int, Int, String))
 lexTo pred tmap str
-    = let (ts, (l, c, str')) = tokenise pred 0 0 [] tmap (fastUnpack str) in
-          (ts, (l, c, pack str'))
+    = let (ts, (l, c, str')) = tokenise pred 0 0 [] tmap (unpack str) in
+          (ts, (l, c, fastPack str'))
--- a/libs/contrib/Text/Lexer/Tokenizer.idr
+++ b/libs/contrib/Text/Lexer/Tokenizer.idr
@ -0,0 +1,153 @@
+module Text.Lexer.Tokenizer
+
+import Data.List
+import Data.Either
+import Data.Nat
+import Data.String
+
+import Data.String.Extra
+import Text.Lexer.Core
+import Text.Lexer
+import Text.PrettyPrint.Prettyprinter
+import Text.PrettyPrint.Prettyprinter.Util
+
+import public Control.Delayed
+import public Text.Bounded
+
+%default total
+
+||| Description of a language's tokenization rule.
+export
+data Tokenizer : (tokenType : Type) -> Type where
+     Match : Lexer -> (String -> tokenType) -> Tokenizer tokenType
+     Compose : (begin : Lexer) ->
+               (mapBegin : String -> tokenType) ->
+               (tagger : String -> tag) ->
+               (middle : Inf (tag -> Tokenizer tokenType)) ->
+               (end : tag -> Lexer) ->
+               (mapEnd : String -> tokenType) ->
+               Tokenizer tokenType
+     Alt : Tokenizer tokenType -> Lazy (Tokenizer tokenType) -> Tokenizer tokenType
+
+||| Alternative tokenizer rules.
+export
+(<|>) : Tokenizer t -> Lazy (Tokenizer t) -> Tokenizer t
+(<|>) = Alt
+
+||| Match on a recogniser and cast the string to a token.
+export
+match : Lexer -> (String -> a) -> Tokenizer a
+match = Match
+
+||| Compose other tokenizer. Language composition should be quoted between
+||| a begin lexer and a end lexer. The begin token can be used to generate
+||| the composition tokenizer and the end lexer.
+export
+compose : (begin : Lexer) ->
+          (mapBegin : String -> a) ->
+          (tagger : String -> tag) ->
+          (middle : Inf (tag -> Tokenizer a)) ->
+          (end : tag -> Lexer) ->
+          (mapEnd : String -> a) ->
+          Tokenizer a
+compose = Compose
+
+||| Stop reason why tokenizer can't make more progress.
+||| @ ComposeNotClosing carries the span of composition begin token in the
+|||                     form of `(startLine, startCol), (endLine, endCol)`.
+public export
+data StopReason = EndInput | NoRuleApply | ComposeNotClosing (Int, Int) (Int, Int)
+
+export
+Show StopReason where
+  show EndInput = "EndInput"
+  show NoRuleApply = "NoRuleApply"
+  show (ComposeNotClosing start end) = "ComposeNotClosing " ++ show start ++ " " ++ show end
+
+export
+Pretty StopReason where
+  pretty EndInput = pretty "EndInput"
+  pretty NoRuleApply = pretty "NoRuleApply"
+  pretty (ComposeNotClosing start end) = "ComposeNotClosing" <++> pretty start <++> pretty end
+
+tokenise : Lexer ->
+           Tokenizer a ->
+           (line, col : Int) -> List (WithBounds a) ->
+           List Char ->
+           (List (WithBounds a), (StopReason, Int, Int, List Char))
+tokenise reject tokenizer line col acc [] = (reverse acc, EndInput, (line, col, []))
+tokenise reject tokenizer line col acc str
+    = case scan reject [] str of
+           Just _ => (reverse acc, (EndInput, line, col, str))
+           Nothing => case getFirstMatch tokenizer str of
+                           Right (toks, line', col', rest) =>
+                               -- assert total because getFirstMatch must consume something
+                               assert_total (tokenise reject tokenizer line' col' (toks ++ acc) rest)
+                           Left reason => (reverse acc, reason, (line, col, str))
+  where
+    countNLs : List Char -> Nat
+    countNLs str = List.length (filter (== '\n') str)
+
+    getCols : List Char -> Int -> Int
+    getCols x c
+        = case span (/= '\n') x of
+               (incol, []) => c + cast (length incol)
+               (incol, _) => cast (length incol)
+
+    -- get the next lexeme using the `Lexer` in argument, its position and the input
+    -- Returns the new position, the lexeme parsed and the rest of the input
+    -- If parsing the lexer fails, this returns `Nothing`
+    getNext : (lexer : Lexer) ->  (line, col : Int) -> (input : List Char) -> Maybe (String, Int, Int, List Char)
+    getNext lexer line col str =
+      let Just (token, rest) = scan lexer [] str
+            | _ => Nothing
+          line' = line + cast (countNLs token)
+          col' = getCols token col
+          tokenStr = fastPack $ reverse token
+       in pure (tokenStr, line', col', rest)
+
+    getFirstMatch : Tokenizer a -> List Char ->
+                    Either StopReason (List (WithBounds a), Int, Int, List Char)
+    getFirstMatch (Match lex fn) str
+        = let Just (tok, line', col', rest) = getNext lex line col str
+                | _ => Left NoRuleApply
+              tok' = MkBounded (fn tok) False (MkBounds line col line' col')
+           in Right ([tok'], line', col', rest)
+    getFirstMatch (Compose begin mapBegin tagger middleFn endFn mapEnd) str
+        = let Just (beginTok', line', col' , rest) = getNext begin line col str
+                | Nothing => Left NoRuleApply
+              tag = tagger beginTok'
+              middle = middleFn tag
+              end = endFn tag
+              beginTok'' = MkBounded (mapBegin beginTok') False (MkBounds line col line' col')
+              (midToks, (reason, line'', col'', rest'')) =
+                    tokenise end middle line' col' [] rest
+           in case reason of
+                   (ComposeNotClosing _ _) => Left reason
+                   _ => let Just (endTok', lineEnd, colEnd, restEnd) =
+                                getNext end line'' col'' rest''
+                              | _ => Left $ ComposeNotClosing (line, col) (line', col')
+                            endTok'' = MkBounded (mapEnd endTok') False (MkBounds line'' col'' lineEnd colEnd)
+                         in Right ([endTok''] ++ reverse midToks ++ [beginTok''], lineEnd, colEnd, restEnd)
+    getFirstMatch (Alt t1 t2) str
+        = case getFirstMatch t1 str of
+               Right result => Right result
+               Left reason@(ComposeNotClosing _ _) => Left reason
+               Left _ => getFirstMatch t2 str
+
+export
+lexTo : Lexer ->
+        Tokenizer a ->
+        String ->
+        (List (WithBounds a), (StopReason, Int, Int, String))
+lexTo reject tokenizer str
+    = let (ts, reason, (l, c, str')) =
+              tokenise reject tokenizer 0 0 [] (fastUnpack str) in
+          (ts, reason, (l, c, fastPack str'))
+
+||| Given a tokenizer and an input string, return a list of recognised tokens,
+||| and the line, column, and remainder of the input at the first point in the string
+||| where there are no recognised tokens.
+export
+lex : Tokenizer a -> String -> (List (WithBounds a), (StopReason, Int, Int, String))
+lex tokenizer str = lexTo (pred $ const False) tokenizer str
--- a/libs/contrib/Text/Literate.idr
+++ b/libs/contrib/Text/Literate.idr
@ -0,0 +1,234 @@
+||| A simple module to process 'literate' documents.
+|||
+||| The module uses a lexer to split the document into code blocks,
+||| delineated by user-defined markers, and code lines that are
+||| indicated be a line marker. The lexer returns a document stripped
+||| of non-code elements but preserving the original document's line
+||| count. Column numbering of code lines are not preserved.
+|||
+||| The underlying tokeniser is greedy.
+|||
+||| Once it identifies a line marker it reads a prettifying space then
+||| consumes until the end of line. Once identifies a starting code
+||| block marker, the lexer will consume input until the next
+||| identifiable end block is encountered. Any other content is
+||| treated as part of the original document.
+|||
+||| Thus, the input literate files *must* be well-formed w.r.t
+||| to code line markers and code blocks.
+|||
+||| A further restriction is that literate documents cannot contain
+||| the markers within the document's main text: This will confuse the
+||| lexer.
+|||
+module Text.Literate
+
+import Text.Lexer
+
+import Data.List
+import Data.List1
+import Data.List.Views
+import Data.String
+import Data.String.Extra
+
+%default total
+
+untilEOL : Recognise False
+untilEOL = manyUntil newline any
+
+line : String -> Lexer
+line s = exact s <+> (newline <|> space <+> untilEOL)
+
+block : String -> String -> Lexer
+block s e = surround (exact s <+> untilEOL) (exact e <+> untilEOL) any
+
+notCodeLine : Lexer
+notCodeLine = newline
+           <|> any <+> untilEOL
+
+data Token = CodeBlock String String String
+           | Any String
+           | CodeLine String String
+
+Show Token where
+  showPrec d (CodeBlock l r x) = showCon d "CodeBlock" $ showArg l ++ showArg r ++ showArg x
+  showPrec d (Any x)           = showCon d "Any" $ showArg x
+  showPrec d (CodeLine m x)    = showCon d "CodeLine" $ showArg m ++ showArg x
+
+rawTokens : (delims  : List (String, String))
+         -> (markers : List String)
+         -> TokenMap (Token)
+rawTokens delims ls =
+          map (\(l,r) => (block l r, CodeBlock (trim l) (trim r))) delims
+       ++ map (\m => (line m, CodeLine (trim m))) ls
+       ++ [(notCodeLine, Any)]
+
+namespace Compat
+  -- `reduce` below was depending on the old behaviour of `lines` before #1585
+  -- was merged. That old `lines` function is added here to preserve behaviour
+  -- of `reduce`.
+  lines' : List Char -> List1 (List Char)
+  lines' [] = singleton []
+  lines' s  = case break isNL s of
+                   (l, s') => l ::: case s' of
+                                         [] => []
+                                         _ :: s'' => forget $ lines' (assert_smaller s s'')
+  export
+  lines : String -> List1 String
+  lines s = map pack (lines' (unpack s))
+
+||| Merge the tokens into a single source file.
+reduce : List (WithBounds Token) -> List String -> String
+reduce [] acc = fastAppend (reverse acc)
+reduce (MkBounded (Any x) _ _ :: rest) acc =
+  -- newline will always be tokenized as a single token
+  if x == "\n"
+  then reduce rest ("\n"::acc)
+  else reduce rest acc
+
+reduce (MkBounded (CodeLine m src) _ _ :: rest) acc =
+    if m == trim src
+    then reduce rest ("\n"::acc)
+    else reduce rest ((substr (length m + 1) -- remove space to right of marker.
+                              (length src)
+                              src
+                      )::acc)
+
+reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc with (Compat.lines src) -- Strip the deliminators surrounding the block.
+  reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc | (s ::: ys) with (snocList ys)
+    reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc | (s ::: []) | Empty = reduce rest acc -- 2
+    reduce (MkBounded (CodeBlock l r src) _ _ :: rest) acc | (s ::: (srcs ++ [f])) | (Snoc f srcs rec) =
+        -- the "\n" counts for the open deliminator; the closing deliminator should always be followed by a (Any "\n"), so we don't add a newline
+        reduce rest (((unlines srcs) ++ "\n") :: "\n" :: acc)
+
+-- [ NOTE ] 1 & 2 shouldn't happen as code blocks are well formed i.e. have two deliminators.
+
+
+public export
+record LiterateError where
+  constructor MkLitErr
+  line   : Int
+  column : Int
+  input  : String
+
+||| Description of literate styles.
+|||
+||| A 'literate' style comprises of
+|||
+||| + a list of code block deliminators (`deliminators`);
+||| + a list of code line markers (`line_markers`); and
+||| + a list of known file extensions `file_extensions`.
+|||
+||| Some example specifications:
+|||
+||| + Bird Style
+|||
+|||```
+|||MkLitStyle Nil [">"] [".lidr"]
+|||```
+|||
+||| + Literate Haskell (for LaTeX)
+|||
+|||```
+|||MkLitStyle [("\\begin{code}", "\\end{code}"),("\\begin{spec}","\\end{spec}")]
+|||           Nil
+|||           [".lhs", ".tex"]
+|||```
+|||
+||| + OrgMode
+|||
+|||```
+|||MkLitStyle [("#+BEGIN_SRC idris","#+END_SRC"), ("#+COMMENT idris","#+END_COMMENT")]
+|||           ["#+IDRIS:"]
+|||           [".org"]
+|||```
+|||
+||| + Common Mark
+|||
+|||```
+|||MkLitStyle [("```idris","```"), ("<!-- idris","--!>")]
+|||           Nil
+|||           [".md", ".markdown"]
+|||```
+|||
+public export
+record LiterateStyle where
+  constructor MkLitStyle
+  ||| The pairs of start and end tags for code blocks.
+  deliminators : List (String, String)
+
+  ||| Line markers that indicate a line contains code.
+  line_markers : List String
+
+  ||| Recognised file extensions. Not used by the module, but will be
+  ||| of use when connecting to code that reads in the original source
+  ||| files.
+  file_extensions : List String
+
+||| Given a 'literate specification' extract the code from the
+||| literate source file (`litStr`) that follows the presented style.
+|||
+||| @specification The literate specification to use.
+||| @litStr  The literate source file.
+|||
+||| Returns a `LiterateError` if the literate file contains malformed
+||| code blocks or code lines.
+|||
+export
+extractCode : (specification : LiterateStyle)
+           -> (litStr        : String)
+           -> Either LiterateError String
+extractCode (MkLitStyle delims markers exts) str =
+      case lex (rawTokens delims markers) str of
+        (toks, (_,_,"")) => Right (reduce toks Nil)
+        (_, (l,c,i))     => Left (MkLitErr l c i)
+
+||| Synonym for `extractCode`.
+export
+unlit : (specification : LiterateStyle)
+     -> (litStr        : String)
+     -> Either LiterateError String
+unlit = extractCode
+
+||| Is the provided line marked up using a line marker?
+|||
+||| If the line is suffixed by any one of the style's set of line
+||| markers then return length of literate line marker, and the code stripped from the line
+||| marker. Otherwise, return Nothing and the unmarked line.
+export
+isLiterateLine : (specification : LiterateStyle)
+              -> (str : String)
+              -> Pair (Maybe String) String
+isLiterateLine (MkLitStyle delims markers _) str with (lex (rawTokens delims markers) str)
+  isLiterateLine (MkLitStyle delims markers _) str | ([MkBounded (CodeLine m str') _ _], (_,_, "")) = (Just m, str')
+  isLiterateLine (MkLitStyle delims markers _) str | (_, _) = (Nothing, str)
+
+||| Given a 'literate specification' embed the given code using the
+||| literate style provided.
+|||
+||| If the style uses deliminators to denote code blocks use the first
+||| pair of deliminators in the style. Otherwise use first linemarker
+||| in the style. If there is **no style** return the presented code
+||| string unembedded.
+|||
+|||
+||| @specification The literate specification to use.
+||| @code  The code to embed,
+|||
+|||
+export
+embedCode : (specification : LiterateStyle)
+         -> (code : String)
+         -> String
+embedCode (MkLitStyle ((s,e)::delims) _            _) str = unlines [s,str,e]
+embedCode (MkLitStyle Nil             (m::markers) _) str = unwords [m, str]
+embedCode (MkLitStyle _               _            _) str = str
+
+||| Synonm for `embedCode`
+export
+relit : (specification : LiterateStyle)
+     -> (code : String)
+     -> String
+relit = embedCode
+
+-- --------------------------------------------------------------------- [ EOF ]
--- a/libs/contrib/Text/Parser.idr
+++ b/libs/contrib/Text/Parser.idr
@ -2,49 +2,50 @@ module Text.Parser

 import Data.Bool
 import Data.List
-import Data.List1
 import Data.Nat
-import Data.Vect
+import public Data.List1

 import public Text.Parser.Core
 import public Text.Quantity
 import public Text.Token

+%default total
+
 ||| Parse a terminal based on a kind of token.
 export
 match : (Eq k, TokenKind k) =>
        (kind : k) ->
-        Grammar (Token k) True (TokType kind)
-match kind = terminal "Unrecognised input" $
-  \(Tok kind' text) => if kind' == kind
-                          then Just $ tokValue kind text
-                          else Nothing
+        Grammar state (Token k) True (TokType kind)
+match k = terminal "Unrecognised input" $
+    \t => if t.kind == k
+             then Just $ tokValue k t.text
+             else Nothing

 ||| Optionally parse a thing, with a default value if the grammar doesn't
 ||| match. May match the empty input.
 export
 option : {c : Bool} ->
-         (def : a) -> (p : Grammar tok c a) ->
-         Grammar tok False a
+         (def : a) -> (p : Grammar state tok c a) ->
+         Grammar state tok False a
 option {c = False} def p = p <|> pure def
 option {c = True} def p = p <|> pure def

 ||| Optionally parse a thing.
 ||| To provide a default value, use `option` instead.
 export
-optional : {c : _} ->
-           (p : Grammar tok c a) ->
-           Grammar tok False (Maybe a)
+optional : {c : Bool} ->
+           (p : Grammar state tok c a) ->
+           Grammar state tok False (Maybe a)
 optional p = option Nothing (map Just p)

 ||| Try to parse one thing or the other, producing a value that indicates
 ||| which option succeeded. If both would succeed, the left option
 ||| takes priority.
 export
-choose : {c1, c2 : _} ->
-         (l : Grammar tok c1 a) ->
-         (r : Grammar tok c2 b) ->
-         Grammar tok (c1 && c2) (Either a b)
+choose : {c1, c2 : Bool} ->
+         (l : Grammar state tok c1 a) ->
+         (r : Grammar state tok c2 b) ->
+         Grammar state tok (c1 && c2) (Either a b)
 choose l r = map Left l <|> map Right r

 ||| Produce a grammar by applying a function to each element of a container,
@ -52,48 +53,42 @@ choose l r = map Left l <|> map Right r
 ||| container is empty.
 export
 choiceMap : {c : Bool} ->
-            (a -> Grammar tok c b) ->
+            (a -> Grammar state tok c b) ->
            Foldable t => t a ->
-            Grammar tok c b
-choiceMap f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
-                                           f x <|> acc)
-                       (fail "No more options") xs
+            Grammar state tok c b
+choiceMap {c} f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
+                                               f x <|> acc)
+                           (fail "No more options") xs

 %hide Prelude.(>>=)

 ||| Try each grammar in a container until the first one succeeds.
 ||| Fails if the container is empty.
 export
-choice : {c : _} ->
-         Foldable t => t (Grammar tok c a) ->
-         Grammar tok c a
-choice = Parser.choiceMap id
+choice : Foldable t =>
+         {c : Bool} ->
+         t (Grammar state tok c a) ->
+         Grammar state tok c a
+choice = choiceMap id

 mutual
  ||| Parse one or more things
  export
-  some : Grammar tok True a ->
-         Grammar tok True (List1 a)
+  some : Grammar state tok True a ->
+         Grammar state tok True (List1 a)
  some p = pure (!p ::: !(many p))

  ||| Parse zero or more things (may match the empty input)
  export
-  many : Grammar tok True a ->
-         Grammar tok False (List a)
+  many : Grammar state tok True a ->
+         Grammar state tok False (List a)
  many p = option [] (forget <$> some p)

-||| Parse one or more instances of `p`, returning the parsed items and proof
-||| that the resulting list is non-empty.
-export
-some' : (p : Grammar tok True a) ->
-        Grammar tok True (xs : List a ** NonEmpty xs)
-some' p = pure (!p :: !(many p) ** IsNonEmpty)
-
 mutual
  private
  count1 : (q : Quantity) ->
-           (p : Grammar tok True a) ->
-           Grammar tok True (List a)
+           (p : Grammar state tok True a) ->
+           Grammar state tok True (List a)
  count1 q p = do x <- p
                  seq (count q p)
                      (\xs => pure (x :: xs))
@ -101,31 +96,23 @@ mutual
  ||| Parse `p`, repeated as specified by `q`, returning the list of values.
  export
  count : (q : Quantity) ->
-          (p : Grammar tok True a) ->
-          Grammar tok (isSucc (min q)) (List a)
+          (p : Grammar state tok True a) ->
+          Grammar state tok (isSucc (min q)) (List a)
  count (Qty Z Nothing) p = many p
  count (Qty Z (Just Z)) _ = pure []
  count (Qty Z (Just (S max))) p = option [] $ count1 (atMost max) p
  count (Qty (S min) Nothing) p = count1 (atLeast min) p
  count (Qty (S min) (Just Z)) _ = fail "Quantity out of order"
-  count (Qty (S min) (Just (S max))) p = count1 (between min max) p
-
-||| Parse `p` `n` times, returning the vector of values.
-export
-countExactly : (n : Nat) ->
-               (p : Grammar tok True a) ->
-               Grammar tok (isSucc n) (Vect n a)
-countExactly Z p = Empty []
-countExactly (S k) p = [| p :: countExactly k p |]
+  count (Qty (S min) (Just (S max))) p = count1 (between (S min) max) p

 mutual
  ||| Parse one or more instances of `p` until `end` succeeds, returning the
  ||| list of values from `p`. Guaranteed to consume input.
  export
  someTill : {c : Bool} ->
-             (end : Grammar tok c e) ->
-             (p : Grammar tok True a) ->
-             Grammar tok True (List1 a)
+             (end : Grammar state tok c e) ->
+             (p : Grammar state tok True a) ->
+             Grammar state tok True (List1 a)
  someTill {c} end p = do x <- p
                          seq (manyTill end p)
                              (\xs => pure (x ::: xs))
@ -134,9 +121,9 @@ mutual
  ||| list of values from `p`. Guaranteed to consume input if `end` consumes.
  export
  manyTill : {c : Bool} ->
-             (end : Grammar tok c e) ->
-             (p : Grammar tok True a) ->
-             Grammar tok c (List a)
+             (end : Grammar state tok c e) ->
+             (p : Grammar state tok True a) ->
+             Grammar state tok c (List a)
  manyTill {c} end p = rewrite sym (andTrueNeutral c) in
                               map (const []) end <|> (forget <$> someTill end p)

@ -145,28 +132,28 @@ mutual
  ||| returning its value.
  export
  afterSome : {c : Bool} ->
-              (skip : Grammar tok True s) ->
-              (p : Grammar tok c a) ->
-              Grammar tok True a
-  afterSome skip p = do ignore skip
+              (skip : Grammar state tok True s) ->
+              (p : Grammar state tok c a) ->
+              Grammar state tok True a
+  afterSome skip p = do ignore $ skip
                        afterMany skip p

  ||| Parse zero or more instance of `skip` until `p` is encountered,
  ||| returning its value.
  export
  afterMany : {c : Bool} ->
-              (skip : Grammar tok True s) ->
-              (p : Grammar tok c a) ->
-              Grammar tok c a
+              (skip : Grammar state tok True s) ->
+              (p : Grammar state tok c a) ->
+              Grammar state tok c a
  afterMany {c} skip p = rewrite sym (andTrueNeutral c) in
                                 p <|> afterSome skip p

 ||| Parse one or more things, each separated by another thing.
 export
 sepBy1 : {c : Bool} ->
-         (sep : Grammar tok True s) ->
-         (p : Grammar tok c a) ->
-         Grammar tok c (List1 a)
+         (sep : Grammar state tok True s) ->
+         (p : Grammar state tok c a) ->
+         Grammar state tok c (List1 a)
 sepBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
                           [| p ::: many (sep *> p) |]

@ -174,18 +161,18 @@ sepBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
 ||| match the empty input.
 export
 sepBy : {c : Bool} ->
-        (sep : Grammar tok True s) ->
-        (p : Grammar tok c a) ->
-        Grammar tok False (List a)
+        (sep : Grammar state tok True s) ->
+        (p : Grammar state tok c a) ->
+        Grammar state tok False (List a)
 sepBy sep p = option [] $ forget <$> sepBy1 sep p

 ||| Parse one or more instances of `p` separated by and optionally terminated by
 ||| `sep`.
 export
 sepEndBy1 : {c : Bool} ->
-            (sep : Grammar tok True s) ->
-            (p : Grammar tok c a) ->
-            Grammar tok c (List1 a)
+            (sep : Grammar state tok True s) ->
+            (p : Grammar state tok c a) ->
+            Grammar state tok c (List1 a)
 sepEndBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
                              sepBy1 sep p <* optional sep

@ -193,32 +180,49 @@ sepEndBy1 {c} sep p = rewrite sym (orFalseNeutral c) in
 ||| by `sep`. Will not match a separator by itself.
 export
 sepEndBy : {c : Bool} ->
-           (sep : Grammar tok True s) ->
-           (p : Grammar tok c a) ->
-           Grammar tok False (List a)
+           (sep : Grammar state tok True s) ->
+           (p : Grammar state tok c a) ->
+           Grammar state tok False (List a)
 sepEndBy sep p = option [] $ forget <$> sepEndBy1 sep p

 ||| Parse one or more instances of `p`, separated and terminated by `sep`.
 export
 endBy1 : {c : Bool} ->
-         (sep : Grammar tok True s) ->
-         (p : Grammar tok c a) ->
-         Grammar tok True (List1 a)
-endBy1 sep p = some $ rewrite sym (orTrueTrue c) in
-                              p <* sep
+         (sep : Grammar state tok True s) ->
+         (p : Grammar state tok c a) ->
+         Grammar state tok True (List1 a)
+endBy1 {c} sep p = some $ rewrite sym (orTrueTrue c) in
+                                  p <* sep

 export
 endBy : {c : Bool} ->
-        (sep : Grammar tok True s) ->
-        (p : Grammar tok c a) ->
-        Grammar tok False (List a)
+        (sep : Grammar state tok True s) ->
+        (p : Grammar state tok c a) ->
+        Grammar state tok False (List a)
 endBy sep p = option [] $ forget <$> endBy1 sep p

 ||| Parse an instance of `p` that is between `left` and `right`.
 export
-between : {c : _} ->
-          (left : Grammar tok True l) ->
-          (right : Inf (Grammar tok True r)) ->
-          (p : Inf (Grammar tok c a)) ->
-          Grammar tok True a
+between : {c : Bool} ->
+          (left : Grammar state tok True l) ->
+          (right : Grammar state tok True r) ->
+          (p : Grammar state tok c a) ->
+          Grammar state tok True a
 between left right contents = left *> contents <* right
+
+export
+location : Grammar state token False (Int, Int)
+location = startBounds <$> position
+
+export
+endLocation : Grammar state token False (Int, Int)
+endLocation = endBounds <$> position
+
+export
+column : Grammar state token False Int
+column = snd <$> location
+
+public export
+when : Bool -> Lazy (Grammar state token False ()) -> Grammar state token False ()
+when True y = y
+when False y = pure ()
--- a/libs/contrib/Text/Parser/Core.idr
+++ b/libs/contrib/Text/Parser/Core.idr
@ -1,47 +1,68 @@
 module Text.Parser.Core

-import public Control.Delayed
+import Data.Bool
 import Data.List
+import Data.List1
+
+import public Control.Delayed
+import public Text.Bounded
+
+%default total
+
+-- TODO: Add some primitives for helping with error messages.
+-- e.g. perhaps set a string to state what we're currently trying to
+-- parse, or to say what the next expected token is in words

 ||| Description of a language's grammar. The `tok` parameter is the type
 ||| of tokens, and the `consumes` flag is True if the language is guaranteed
 ||| to be non-empty - that is, successfully parsing the language is guaranteed
 ||| to consume some input.
-public export
-data Grammar : (tok : Type) -> (consumes : Bool) -> Type -> Type where
-     Empty : (val : ty) -> Grammar tok False ty
-     Terminal : String -> (tok -> Maybe a) -> Grammar tok True a
-     NextIs : String -> (tok -> Bool) -> Grammar tok False tok
-     EOF : Grammar tok False ()
+export
+data Grammar : (state : Type) -> (tok : Type) -> (consumes : Bool) -> Type -> Type where
+     Empty : (val : ty) -> Grammar state tok False ty
+     Terminal : String -> (tok -> Maybe a) -> Grammar state tok True a
+     NextIs : String -> (tok -> Bool) -> Grammar state tok False tok
+     EOF : Grammar state tok False ()

-     Fail : Bool -> String -> Grammar tok c ty
-     Commit : Grammar tok False ()
-     MustWork : Grammar tok c a -> Grammar tok c a
+     Fail : (location : Maybe Bounds) -> (fatal : Bool) -> String -> Grammar state tok c ty
+     Try : Grammar state tok c ty -> Grammar state tok c ty

-     SeqEat : {c2 : _} -> Grammar tok True a -> Inf (a -> Grammar tok c2 b) ->
-              Grammar tok True b
-     SeqEmpty : {c1, c2 : _} -> Grammar tok c1 a -> (a -> Grammar tok c2 b) ->
-                Grammar tok (c1 || c2) b
+     Commit : Grammar state tok False ()
+     MustWork : Grammar state tok c a -> Grammar state tok c a

-     ThenEat : {c2 : _} -> Grammar tok True () -> Inf (Grammar tok c2 b) ->
-               Grammar tok True b
-     ThenEmpty : {c1, c2 : _} -> Grammar tok c1 () -> Grammar tok c2 b ->
-                 Grammar tok (c1 || c2) b
+     SeqEat : {c2 : Bool} ->
+              Grammar state tok True a -> Inf (a -> Grammar state tok c2 b) ->
+              Grammar state tok True b
+     SeqEmpty : {c1, c2 : Bool} ->
+                Grammar state tok c1 a -> (a -> Grammar state tok c2 b) ->
+                Grammar state tok (c1 || c2) b

-     Alt : {c1, c2 : _} -> Grammar tok c1 ty -> Grammar tok c2 ty ->
-           Grammar tok (c1 && c2) ty
+     ThenEat : {c2 : Bool} ->
+               Grammar state tok True () -> Inf (Grammar state tok c2 a) ->
+               Grammar state tok True a
+     ThenEmpty : {c1, c2 : Bool} ->
+                 Grammar state tok c1 () -> Grammar state tok c2 a ->
+                 Grammar state tok (c1 || c2) a
+
+     Alt : {c1, c2 : Bool} ->
+           Grammar state tok c1 ty -> Lazy (Grammar state tok c2 ty) ->
+           Grammar state tok (c1 && c2) ty
+     Bounds : Grammar state tok c ty -> Grammar state tok c (WithBounds ty)
+     Position : Grammar state tok False Bounds
+
+     Act : state -> Grammar state tok False ()

 ||| Sequence two grammars. If either consumes some input, the sequence is
 ||| guaranteed to consume some input. If the first one consumes input, the
 ||| second is allowed to be recursive (because it means some input has been
 ||| consumed and therefore the input is smaller)
-public export %inline %tcinline
+export %inline
 (>>=) : {c1, c2 : Bool} ->
-        Grammar tok c1 a ->
-        inf c1 (a -> Grammar tok c2 b) ->
-        Grammar tok (c1 || c2) b
+        Grammar state tok c1 a ->
+        inf c1 (a -> Grammar state tok c2 b) ->
+        Grammar state tok (c1 || c2) b
 (>>=) {c1 = False} = SeqEmpty
-(>>=) {c1 = True} = SeqEat
+(>>=) {c1 = True}  = SeqEat

 ||| Sequence two grammars. If either consumes some input, the sequence is
 ||| guaranteed to consume some input. If the first one consumes input, the
@ -49,279 +70,305 @@ public export %inline %tcinline
 ||| consumed and therefore the input is smaller)
 public export %inline %tcinline
 (>>) : {c1, c2 : Bool} ->
-        Grammar tok c1 () ->
-        inf c1 (Grammar tok c2 a) ->
-        Grammar tok (c1 || c2) a
+        Grammar state tok c1 () ->
+        inf c1 (Grammar state tok c2 a) ->
+        Grammar state tok (c1 || c2) a
 (>>) {c1 = False} = ThenEmpty
 (>>) {c1 = True} = ThenEat

 ||| Sequence two grammars. If either consumes some input, the sequence is
 ||| guaranteed to consume input. This is an explicitly non-infinite version
 ||| of `>>=`.
-export
-seq : {c1, c2 : _} -> Grammar tok c1 a ->
-      (a -> Grammar tok c2 b) ->
-      Grammar tok (c1 || c2) b
+export %inline
+seq : {c1,c2 : Bool} ->
+      Grammar state tok c1 a ->
+      (a -> Grammar state tok c2 b) ->
+      Grammar state tok (c1 || c2) b
 seq = SeqEmpty

 ||| Sequence a grammar followed by the grammar it returns.
-export
-join : {c1, c2 : Bool} ->
-       Grammar tok c1 (Grammar tok c2 a) ->
-       Grammar tok (c1 || c2) a
+export %inline
+join : {c1,c2 : Bool} ->
+       Grammar state tok c1 (Grammar state tok c2 a) ->
+       Grammar state tok (c1 || c2) a
 join {c1 = False} p = SeqEmpty p id
 join {c1 = True} p = SeqEat p id

-||| Give two alternative grammars. If both consume, the combination is
-||| guaranteed to consume.
-export
-(<|>) : {c1, c2 : _} ->
-        Grammar tok c1 ty ->
-        Grammar tok c2 ty ->
-        Grammar tok (c1 && c2) ty
-(<|>) = Alt
-
 ||| Allows the result of a grammar to be mapped to a different value.
 export
-{c : _} -> Functor (Grammar tok c) where
+{c : _} ->
+Functor (Grammar state tok c) where
  map f (Empty val)  = Empty (f val)
-  map f (Fail fatal msg) = Fail fatal msg
+  map f (Fail bd fatal msg) = Fail bd fatal msg
+  map f (Try g) = Try (map f g)
  map f (MustWork g) = MustWork (map f g)
-  map f (Terminal msg g) = Terminal msg (\t => map f (g t))
+  map f (Terminal msg g) = Terminal msg (map f . g)
  map f (Alt x y)    = Alt (map f x) (map f y)
  map f (SeqEat act next)
      = SeqEat act (\val => map f (next val))
  map f (SeqEmpty act next)
-      = SeqEmpty act (\val => map f (next val))
+      = SeqEmpty act (\ val => map f (next val))
  map f (ThenEat act next)
      = ThenEat act (map f next)
  map f (ThenEmpty act next)
      = ThenEmpty act (map f next)
+  map {c} f (Bounds act)
+    = rewrite sym $ orFalseNeutral c in
+      SeqEmpty (Bounds act) (Empty . f) -- Bounds (map f act)
  -- The remaining constructors (NextIs, EOF, Commit) have a fixed type,
  -- so a sequence must be used.
  map {c = False} f p = SeqEmpty p (Empty . f)

+||| Give two alternative grammars. If both consume, the combination is
+||| guaranteed to consume.
+export %inline
+(<|>) : {c1,c2 : Bool} ->
+        Grammar state tok c1 ty ->
+        Lazy (Grammar state tok c2 ty) ->
+        Grammar state tok (c1 && c2) ty
+(<|>) = Alt
+
+infixr 2 <||>
+||| Take the tagged disjunction of two grammars. If both consume, the
+||| combination is guaranteed to consume.
+export
+(<||>) : {c1,c2 : Bool} ->
+        Grammar state tok c1 a ->
+        Lazy (Grammar state tok c2 b) ->
+        Grammar state tok (c1 && c2) (Either a b)
+(<||>) p q = (Left <$> p) <|> (Right <$> q)
+
 ||| Sequence a grammar with value type `a -> b` and a grammar
 ||| with value type `a`. If both succeed, apply the function
 ||| from the first grammar to the value from the second grammar.
 ||| Guaranteed to consume if either grammar consumes.
-export
-(<*>) : {c1, c2 : _} ->
-        Grammar tok c1 (a -> b) ->
-        Inf (Grammar tok c2 a) ->
-        Grammar tok (c1 || c2) b
-(<*>) {c1 = False} x y = SeqEmpty x (\f => map f y)
-(<*>) {c1 = True } x y = SeqEmpty x (\f => map f (Force y))
+export %inline
+(<*>) : {c1, c2 : Bool} ->
+        Grammar state tok c1 (a -> b) ->
+        Grammar state tok c2 a ->
+        Grammar state tok (c1 || c2) b
+(<*>) x y = SeqEmpty x (\f => map f y)

 ||| Sequence two grammars. If both succeed, use the value of the first one.
 ||| Guaranteed to consume if either grammar consumes.
-export
-(<*) : {c1, c2 : _} ->
-       Grammar tok c1 a ->
-       Inf (Grammar tok c2 b) ->
-       Grammar tok (c1 || c2) a
+export %inline
+(<*) : {c1,c2 : Bool} ->
+       Grammar state tok c1 a ->
+       Grammar state tok c2 b ->
+       Grammar state tok (c1 || c2) a
 (<*) x y = map const x <*> y

 ||| Sequence two grammars. If both succeed, use the value of the second one.
 ||| Guaranteed to consume if either grammar consumes.
-export
-(*>) : {c1, c2 : _} ->
-       Grammar tok c1 a ->
-       Inf (Grammar tok c2 b) ->
-       Grammar tok (c1 || c2) b
+export %inline
+(*>) : {c1,c2 : Bool} ->
+       Grammar state tok c1 a ->
+       Grammar state tok c2 b ->
+       Grammar state tok (c1 || c2) b
 (*>) x y = map (const id) x <*> y

+export %inline
+act : state -> Grammar state tok False ()
+act = Act
+
 ||| Produce a grammar that can parse a different type of token by providing a
 ||| function converting the new token type into the original one.
 export
-mapToken : (a -> b) -> Grammar b c ty -> Grammar a c ty
+mapToken : (a -> b) -> Grammar state b c ty -> Grammar state a c ty
 mapToken f (Empty val) = Empty val
 mapToken f (Terminal msg g) = Terminal msg (g . f)
 mapToken f (NextIs msg g) = SeqEmpty (NextIs msg (g . f)) (Empty . f)
 mapToken f EOF = EOF
-mapToken f (Fail fatal msg) = Fail fatal msg
+mapToken f (Fail bd fatal msg) = Fail bd fatal msg
+mapToken f (Try g) = Try (mapToken f g)
 mapToken f (MustWork g) = MustWork (mapToken f g)
 mapToken f Commit = Commit
 mapToken f (SeqEat act next)
-   = SeqEat (mapToken f act) (\x => mapToken f (next x))
+  = SeqEat (mapToken f act) (\x => mapToken f (next x))
 mapToken f (SeqEmpty act next)
-   = SeqEmpty (mapToken f act) (\x => mapToken f (next x))
+  = SeqEmpty (mapToken f act) (\x => mapToken f (next x))
 mapToken f (ThenEat act next)
-   = ThenEat (mapToken f act) (mapToken f next)
+  = ThenEat (mapToken f act) (mapToken f next)
 mapToken f (ThenEmpty act next)
-   = ThenEmpty (mapToken f act) (mapToken f next)
+  = ThenEmpty (mapToken f act) (mapToken f next)
 mapToken f (Alt x y) = Alt (mapToken f x) (mapToken f y)
+mapToken f (Bounds act) = Bounds (mapToken f act)
+mapToken f Position = Position
+mapToken f (Act action) = Act action

 ||| Always succeed with the given value.
-export
-pure : (val : ty) -> Grammar tok False ty
+export %inline
+pure : (val : ty) -> Grammar state tok False ty
 pure = Empty

 ||| Check whether the next token satisfies a predicate
-export
-nextIs : String -> (tok -> Bool) -> Grammar tok False tok
+export %inline
+nextIs : String -> (tok -> Bool) -> Grammar state tok False tok
 nextIs = NextIs

 ||| Look at the next token in the input
-export
-peek : Grammar tok False tok
+export %inline
+peek : Grammar state tok False tok
 peek = nextIs "Unrecognised token" (const True)

 ||| Succeeds if running the predicate on the next token returns Just x,
 ||| returning x. Otherwise fails.
-export
-terminal : String -> (tok -> Maybe a) -> Grammar tok True a
+export %inline
+terminal : String -> (tok -> Maybe a) -> Grammar state tok True a
 terminal = Terminal

 ||| Always fail with a message
-export
-fail : String -> Grammar tok c ty
-fail = Fail False
+export %inline
+fail : String -> Grammar state tok c ty
+fail = Fail Nothing False

-export
-fatalError : String -> Grammar tok c ty
-fatalError = Fail True
+||| Always fail with a message and a location
+export %inline
+failLoc : Bounds -> String -> Grammar state tok c ty
+failLoc b = Fail (Just b) False
+
+||| Fail with no possibility for recovery (i.e.
+||| no alternative parsing can succeed).
+export %inline
+fatalError : String -> Grammar state tok c ty
+fatalError = Fail Nothing True
+
+||| Fail with no possibility for recovery (i.e.
+||| no alternative parsing can succeed).
+export %inline
+fatalLoc : Bounds -> String -> Grammar state tok c ty
+fatalLoc b = Fail (Just b) True
+
+||| Catch a fatal error
+export %inline
+try : Grammar state tok c ty -> Grammar state tok c ty
+try = Try

 ||| Succeed if the input is empty
-export
-eof : Grammar tok False ()
+export %inline
+eof : Grammar state tok False ()
 eof = EOF

 ||| Commit to an alternative; if the current branch of an alternative
 ||| fails to parse, no more branches will be tried
-export
-commit : Grammar tok False ()
+export %inline
+commit : Grammar state tok False ()
 commit = Commit

 ||| If the parser fails, treat it as a fatal error
-export
-mustWork : Grammar tok c ty -> Grammar tok c ty
+export %inline
+mustWork : {c : Bool} -> Grammar state tok c ty -> Grammar state tok c ty
 mustWork = MustWork

-data ParseResult : List tok -> (consumes : Bool) -> Type -> Type where
-     Failure : {xs : List tok} ->
-               (committed : Bool) -> (fatal : Bool) ->
-               (err : String) -> (rest : List tok) -> ParseResult xs c ty
-     EmptyRes : (committed : Bool) ->
-                (val : ty) -> (more : List tok) -> ParseResult more False ty
-     NonEmptyRes : {xs : List tok} ->
-                   (committed : Bool) ->
-                   (val : ty) -> (more : List tok) ->
-                   ParseResult (x :: xs ++ more) c ty
+export %inline
+bounds : Grammar state tok c ty -> Grammar state tok c (WithBounds ty)
+bounds = Bounds

-- Take the result of an alternative branch, reset the commit flag to
-- the commit flag from the outer alternative, and weaken the 'consumes'
-- flag to take both alternatives into account
-weakenRes : {whatever, c : Bool} -> {xs : List tok} ->
-            (com' : Bool) ->
-            ParseResult xs c ty -> ParseResult xs (whatever && c) ty
-weakenRes com' (Failure com fatal msg ts) = Failure com' fatal msg ts
-weakenRes {whatever=True} com' (EmptyRes com val xs) = EmptyRes com' val xs
-weakenRes {whatever=False} com' (EmptyRes com val xs) = EmptyRes com' val xs
-weakenRes com' (NonEmptyRes {xs} com val more) = NonEmptyRes {xs} com' val more
-
-doParse : (commit : Bool) ->
-          (act : Grammar tok c ty) ->
-          (xs : List tok) ->
-          ParseResult xs c ty
-- doParse com xs act with (sizeAccessible xs)
-doParse com (Empty val) xs = EmptyRes com val xs
-doParse com (Fail fatal str) [] = Failure com fatal str []
-doParse com (Fail fatal str) (x :: xs) = Failure com fatal str (x :: xs)
-doParse com Commit xs = EmptyRes True () xs
-doParse com (MustWork g) xs =
-  let p' = doParse com g xs in
-      case p' of
-           Failure com' _ msg ts => Failure com' True msg ts
-           res => res
-doParse com (Terminal err f) [] = Failure com False "End of input" []
-doParse com (Terminal err f) (x :: xs)
-      = maybe
-           (Failure com False err (x :: xs))
-           (\a => NonEmptyRes com {xs=[]} a xs)
-           (f x)
-doParse com EOF [] = EmptyRes com () []
-doParse com EOF (x :: xs)
-      = Failure com False "Expected end of input" (x :: xs)
-doParse com (NextIs err f) [] = Failure com False "End of input" []
-doParse com (NextIs err f) (x :: xs)
-      = if f x
-           then EmptyRes com x (x :: xs)
-           else Failure com False err (x :: xs)
-doParse com (Alt {c1} {c2} x y) xs
-    = let p' = doParse False x xs in
-          case p' of
-               Failure com' fatal msg ts
-                  => if com' || fatal
-                            -- If the alternative had committed, don't try the
-                            -- other branch (and reset commit flag)
-                       then Failure com fatal msg ts
-                       else weakenRes {whatever = c1} com (doParse False y xs)
-  -- Successfully parsed the first option, so use the outer commit flag
-               EmptyRes _ val xs => EmptyRes com val xs
-               NonEmptyRes {xs=xs'} _ val more => NonEmptyRes {xs=xs'} com val more
-doParse com (SeqEmpty {c1} {c2} act next) xs
-    = let p' = assert_total (doParse {c = c1} com act xs) in
-              case p' of
-               Failure com fatal msg ts => Failure com fatal msg ts
-               EmptyRes com val xs => assert_total (doParse com (next val) xs)
-               NonEmptyRes {x} {xs=ys} com val more =>
-                     case (assert_total (doParse com (next val) more)) of
-                          Failure com' fatal msg ts => Failure com' fatal msg ts
-                          EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
-                          NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
-                               rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
-                                       NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
-doParse com (SeqEat act next) xs with (doParse com act xs)
-  doParse com (SeqEat act next) xs | Failure com' fatal msg ts
-       = Failure com' fatal msg ts
-  doParse com (SeqEat act next) (x :: (ys ++ more)) | (NonEmptyRes {xs=ys} com' val more)
-       = let p' = assert_total (doParse com' (next val) more) in
-             case p' of
-              Failure com' fatal msg ts => Failure com' fatal msg ts
-              EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
-              NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
-                   rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
-                           NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
-doParse com (ThenEmpty {c1} {c2} act next) xs
-    = let p' = assert_total (doParse {c = c1} com act xs) in
-              case p' of
-               Failure com fatal msg ts => Failure com fatal msg ts
-               EmptyRes com val xs => assert_total (doParse com next xs)
-               NonEmptyRes {x} {xs=ys} com val more =>
-                     case (assert_total (doParse com next more)) of
-                          Failure com' fatal msg ts => Failure com' fatal msg ts
-                          EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
-                          NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
-                               rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
-                                       NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
-doParse com (ThenEat act next) xs with (doParse com act xs)
-  doParse com (ThenEat act next) xs | Failure com' fatal msg ts
-       = Failure com' fatal msg ts
-  doParse com (ThenEat act next) (x :: (ys ++ more)) | (NonEmptyRes {xs=ys} com' val more)
-       = let p' = assert_total (doParse com' next more) in
-             case p' of
-              Failure com' fatal msg ts => Failure com' fatal msg ts
-              EmptyRes com' val _ => NonEmptyRes {xs=ys} com' val more
-              NonEmptyRes {x=x1} {xs=xs1} com' val more' =>
-                   rewrite appendAssociative (x :: ys) (x1 :: xs1) more' in
-                           NonEmptyRes {xs = ys ++ (x1 :: xs1)} com' val more'
-
-- This next line is not strictly necessary, but it stops the coverage
-- checker taking a really long time and eating lots of memory...
-- doParse _ _ _ = Failure True True "Help the coverage checker!" []
+export %inline
+position : Grammar state tok False Bounds
+position = Position

 public export
-data ParseError tok = Error String (List tok)
+data ParsingError tok = Error String (Maybe Bounds)
+
+data ParseResult : Type -> Type -> Type -> Type where
+     Failure : (committed : Bool) -> (fatal : Bool) ->
+               List1 (ParsingError tok) -> ParseResult state tok ty
+     Res : state -> (committed : Bool) ->
+           (val : WithBounds ty) -> (more : List (WithBounds tok)) -> ParseResult state tok ty
+
+mergeWith : WithBounds ty -> ParseResult state tok sy -> ParseResult state tok sy
+mergeWith x (Res s committed val more) = Res s committed (mergeBounds x val) more
+mergeWith x v = v
+
+doParse : Semigroup state => state -> (commit : Bool) ->
+          (act : Grammar state tok c ty) ->
+          (xs : List (WithBounds tok)) ->
+          ParseResult state tok ty
+doParse s com (Empty val) xs = Res s com (irrelevantBounds val) xs
+doParse s com (Fail location fatal str) xs
+    = Failure com fatal (Error str (location <|> (bounds <$> head' xs)) ::: Nil)
+doParse s com (Try g) xs = case doParse s com g xs of
+  -- recover from fatal match but still propagate the 'commit'
+  Failure com _ errs => Failure com False errs
+  res => res
+doParse s com Commit xs = Res s True (irrelevantBounds ()) xs
+doParse s com (MustWork g) xs =
+  case assert_total (doParse s com g xs) of
+       Failure com' _ errs => Failure com' True errs
+       res => res
+doParse s com (Terminal err f) [] = Failure com False (Error "End of input" Nothing ::: Nil)
+doParse s com (Terminal err f) (x :: xs) =
+  case f x.val of
+       Nothing => Failure com False (Error err (Just x.bounds) ::: Nil)
+       Just a => Res s com (const a <$> x) xs
+doParse s com EOF [] = Res s com (irrelevantBounds ()) []
+doParse s com EOF (x :: xs) = Failure com False (Error "Expected end of input" (Just x.bounds) ::: Nil)
+doParse s com (NextIs err f) [] = Failure com False (Error "End of input" Nothing ::: Nil)
+doParse s com (NextIs err f) (x :: xs)
+      = if f x.val
+           then Res s com (removeIrrelevance x) (x :: xs)
+           else Failure com False (Error err (Just x.bounds) ::: Nil)
+doParse s com (Alt {c1} {c2} x y) xs
+    = case doParse s False x xs of
+           Failure com' fatal errs
+              => if com' || fatal
+                        -- If the alternative had committed, don't try the
+                        -- other branch (and reset commit flag)
+                   then Failure com fatal errs
+                   else case (assert_total doParse s False y xs) of
+                             (Failure com'' fatal' errs') => if com'' || fatal'
+                                                                     -- Only add the errors together if the second branch
+                                                                     -- is also non-committed and non-fatal.
+                                                             then Failure com'' fatal' errs'
+                                                             else Failure False False (errs ++ errs')
+                             (Res s _ val xs) => Res s com val xs
+           -- Successfully parsed the first option, so use the outer commit flag
+           Res s _ val xs => Res s com val xs
+doParse s com (SeqEmpty act next) xs
+    = case assert_total (doParse s com act xs) of
+           Failure com fatal errs => Failure com fatal errs
+           Res s com v xs =>
+             mergeWith v (assert_total $ doParse s com (next v.val) xs)
+doParse s com (SeqEat act next) xs
+    = case assert_total (doParse s com act xs) of
+           Failure com fatal errs => Failure com fatal errs
+           Res s com v xs =>
+             mergeWith v (assert_total $ doParse s com (next v.val) xs)
+doParse s com (ThenEmpty act next) xs
+    = case assert_total (doParse s com act xs) of
+           Failure com fatal errs => Failure com fatal errs
+           Res s com v xs =>
+             mergeWith v (assert_total $ doParse s com next xs)
+doParse s com (ThenEat act next) xs
+    = case assert_total (doParse s com act xs) of
+           Failure com fatal errs => Failure com fatal errs
+           Res s com v xs =>
+             mergeWith v (assert_total $ doParse s com next xs)
+doParse s com (Bounds act) xs
+    = case assert_total (doParse s com act xs) of
+           Failure com fatal errs => Failure com fatal errs
+           Res s com v xs => Res s com (const v <$> v) xs
+doParse s com Position [] = Failure com False (Error "End of input" Nothing ::: Nil)
+doParse s com Position (x :: xs)
+    = Res s com (irrelevantBounds x.bounds) (x :: xs)
+doParse s com (Act action) xs = Res (s <+> action) com (irrelevantBounds ()) xs

 ||| Parse a list of tokens according to the given grammar. If successful,
 ||| returns a pair of the parse result and the unparsed tokens (the remaining
 ||| input).
 export
-parse : {c : _} -> (act : Grammar tok c ty) -> (xs : List tok) ->
-        Either (ParseError tok) (ty, List tok)
+parse : {c : Bool} -> (act : Grammar () tok c ty) -> (xs : List (WithBounds tok)) ->
+        Either (List1 (ParsingError tok)) (ty, List (WithBounds tok))
 parse act xs
-    = case doParse False act xs of
-           Failure _ _ msg ts => Left (Error msg ts)
-           EmptyRes _ val rest => pure (val, rest)
-           NonEmptyRes _ val rest => pure (val, rest)
+    = case doParse neutral False act xs of
+           Failure _ _ errs => Left errs
+           Res _ _ v rest => Right (v.val, rest)
+
+export
+parseWith : Monoid state => {c : Bool} -> (act : Grammar state tok c ty) -> (xs : List (WithBounds tok)) ->
+        Either (List1 (ParsingError tok)) (state, ty, List (WithBounds tok))
+parseWith act xs
+    = case doParse neutral False act xs of
+           Failure _ _ errs => Left errs
+           Res s _ v rest => Right (s, v.val, rest)
--- a/libs/contrib/Text/PrettyPrint/Prettyprinter/Doc.idr
+++ b/libs/contrib/Text/PrettyPrint/Prettyprinter/Doc.idr
@ -413,10 +413,6 @@ Pretty Char where
 export Pretty Nat where pretty = unsafeTextWithoutNewLines . show
 export Pretty Int where pretty = unsafeTextWithoutNewLines . show
 export Pretty Integer where pretty = unsafeTextWithoutNewLines . show
-export Pretty Int8 where pretty = unsafeTextWithoutNewLines . show
-export Pretty Int16 where pretty = unsafeTextWithoutNewLines . show
-export Pretty Int32 where pretty = unsafeTextWithoutNewLines . show
-export Pretty Int64 where pretty = unsafeTextWithoutNewLines . show
 export Pretty Double where pretty = unsafeTextWithoutNewLines . show
 export Pretty Bits8 where pretty = unsafeTextWithoutNewLines . show
 export Pretty Bits16 where pretty = unsafeTextWithoutNewLines . show
--- a/libs/contrib/Text/PrettyPrint/Prettyprinter/Render/HTML.idr
+++ b/libs/contrib/Text/PrettyPrint/Prettyprinter/Render/HTML.idr
@ -3,6 +3,8 @@ module Text.PrettyPrint.Prettyprinter.Render.HTML
 import Data.List
 import Data.String

+%default covering
+
 export
 htmlEscape : String -> String
 htmlEscape s = fastAppend $ reverse $ go [] s
--- a/libs/contrib/Text/PrettyPrint/Prettyprinter/Render/Terminal.idr
+++ b/libs/contrib/Text/PrettyPrint/Prettyprinter/Render/Terminal.idr
@ -6,7 +6,7 @@ import public Control.ANSI
 import Control.Monad.ST
 import Text.PrettyPrint.Prettyprinter.Doc

-%default total
+%default covering

 public export
 AnsiStyle : Type
--- a/libs/contrib/Text/PrettyPrint/Prettyprinter/SimpleDocTree.idr
+++ b/libs/contrib/Text/PrettyPrint/Prettyprinter/SimpleDocTree.idr
@ -68,6 +68,7 @@ sdocToTreeParser (SLine i rest) = case sdocToTreeParser rest of
  (Just tree, rest') => (Just $ STConcat [STLine i, tree], rest')
  (Nothing, rest') => (Just $ STLine i, rest')
 sdocToTreeParser (SAnnPush ann rest) = case sdocToTreeParser rest of
+  (tree, Nothing) => (Nothing, Nothing)
  (Just tree, Nothing) => (Just $ STAnn ann tree, Nothing)
  (Just tree, Just rest') => case sdocToTreeParser rest' of
    (Just tree', rest'') => (Just $ STConcat [STAnn ann tree, tree'], rest'')
--- a/libs/contrib/Text/PrettyPrint/Prettyprinter/Symbols.idr
+++ b/libs/contrib/Text/PrettyPrint/Prettyprinter/Symbols.idr
@ -101,8 +101,10 @@ angles : Doc ann -> Doc ann
 angles = enclose langle rangle

 export
-brackets : Doc ann -> Doc ann
-brackets = enclose lbracket rbracket
+brackets : {default lbracket ldelim : Doc ann} ->
+           {default rbracket rdelim : Doc ann} ->
+           Doc ann -> Doc ann
+brackets {ldelim, rdelim} = enclose ldelim rdelim

 export
 braces : Doc ann -> Doc ann
--- a/libs/contrib/Text/Quantity.idr
+++ b/libs/contrib/Text/Quantity.idr
@ -1,5 +1,7 @@
 module Text.Quantity

+%default total
+
 public export
 record Quantity where
  constructor Qty
--- a/libs/contrib/Text/Token.idr
+++ b/libs/contrib/Text/Token.idr
@ -1,5 +1,7 @@
 module Text.Token

+%default total
+
 ||| For a type `kind`, specify a way of converting the recognised
 ||| string into a value.
 |||
--- a/libs/contrib/contrib.ipkg
+++ b/libs/contrib/contrib.ipkg
@ -159,17 +159,20 @@ modules = Control.ANSI,
          System.Random,
          System.Path,

-          Text.Token,
-          Text.Quantity,
-          Text.Parser,
+          Text.Bounded,
          Text.Lexer,
-          Text.Parser.Core,
          Text.Lexer.Core,
+          Text.Lexer.Tokenizer,
+          Text.Literate,
+          Text.Parser,
+          Text.Parser.Core,
          Text.PrettyPrint.Prettyprinter,
          Text.PrettyPrint.Prettyprinter.Doc,
-          Text.PrettyPrint.Prettyprinter.Symbols,
-          Text.PrettyPrint.Prettyprinter.Util,
-          Text.PrettyPrint.Prettyprinter.SimpleDocTree,
          Text.PrettyPrint.Prettyprinter.Render.HTML,
          Text.PrettyPrint.Prettyprinter.Render.String,
-          Text.PrettyPrint.Prettyprinter.Render.Terminal
+          Text.PrettyPrint.Prettyprinter.Render.Terminal,
+          Text.PrettyPrint.Prettyprinter.SimpleDocTree,
+          Text.PrettyPrint.Prettyprinter.Symbols,
+          Text.PrettyPrint.Prettyprinter.Util,
+          Text.Quantity,
+          Text.Token