Idris2/libs/contrib/Text/Lexer.idr

module Text.Lexer

import Data.Bool
import Data.List
import Data.Nat

import public Text.Lexer.Core
import public Text.Quantity
import public Text.Token

%default total

export
toTokenMap : List (Lexer, k) -> TokenMap (Token k)
toTokenMap = map $ \(l, kind) => (l, Tok kind)

||| Recognise any character.
||| /./
export
any : Lexer
any = pred (const True)

||| Recognise a lexer or recognise no input. This is not guaranteed
||| to consume input.
||| /`l`?/
export
opt : (l : Lexer) -> Recognise False
opt l = l <|> empty

||| Recognise any character if the sub-lexer `l` fails.
||| /(?!`l`)./
export
non : (l : Lexer) -> Lexer
non l = reject l <+> any

||| Produce recognisers by applying a function to elements of a container, and
||| recognise the first match. Consumes input if the function produces consuming
||| recognisers. Fails if the container is empty.
export
choiceMap : {c : Bool} ->
            Foldable t => (a -> Recognise c) -> t a -> Recognise c
choiceMap {c} f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
                                               f x <|> acc)
                           fail xs

||| Recognise the first matching recogniser in a container. Consumes input if
||| recognisers in the list consume. Fails if the container is empty.
export
choice : {c : _} ->
         Foldable t => t (Recognise c) -> Recognise c
choice = choiceMap id

||| Sequence a list of recognisers. Guaranteed to consume input if the list is
||| non-empty and the recognisers consume.
export
concat : {c : _} ->
         (xs : List (Recognise c)) -> Recognise (isCons xs && c)
concat = Lexer.Core.concatMap id

||| Recognise a specific character.
||| /[`x`]/
export
is : (x : Char) -> Lexer
is x = pred (==x)

||| Recognise anything but the given character.
||| /[\^`x`]/
export
isNot : (x : Char) -> Lexer
isNot x = pred (/=x)

||| Recognise a specific character (case-insensitive).
||| /[`x`]/i
export
like : (x : Char) -> Lexer
like x = pred (\y => toUpper x == toUpper y)

||| Recognise anything but the given character (case-insensitive).
||| /[\^`x`]/i
export
notLike : (x : Char) -> Lexer
notLike x = pred (\y => toUpper x /= toUpper y)

||| Recognise a specific string.
||| Fails if the string is empty.
||| /`str`/
export
exact : (str : String) -> Lexer
exact str = case unpack str of
                 [] => fail
                 (x :: xs) => concatMap is (x :: xs)

||| Recognise a specific string (case-insensitive).
||| Fails if the string is empty.
||| /`str`/i
export
approx : (str : String) -> Lexer
approx str = case unpack str of
                  [] => fail
                  (x :: xs) => concatMap like (x :: xs)

||| Recognise any of the characters in the given string.
||| /[`chars`]/
export
oneOf : (chars : String) -> Lexer
oneOf chars = pred (\x => x `elem` unpack chars)

||| Recognise a character range. Also works in reverse!
||| /[`start`-`end`]/
export
range : (start : Char) -> (end : Char) -> Lexer
range start end = pred (\x => (x >= min start end)
                           && (x <= max start end))

mutual
  ||| Recognise a sequence of at least one sub-lexers
  ||| /`l`+/
  export
  some : Lexer -> Lexer
  some l = l <+> many l

  ||| Recognise a sequence of at zero or more sub-lexers. This is not
  ||| guaranteed to consume input
  ||| /`l`\*/
  export
  many : Lexer -> Recognise False
  many l = opt (some l)

||| Repeat the sub-lexer `l` one or more times until the lexer
||| `stopBefore` is encountered. `stopBefore` will not be consumed.
||| /((?!`stopBefore`)`l`)\+/
export
someUntil : (stopBefore : Recognise c) -> (l : Lexer) -> Lexer
someUntil stopBefore l = some (reject stopBefore <+> l)

||| Repeat the sub-lexer `l` zero or more times until the lexer
||| `stopBefore` is encountered. `stopBefore` will not be consumed.
||| Not guaranteed to consume input.
||| /((?!`stopBefore`)`l`)\*/
export
manyUntil : (stopBefore : Recognise c) -> (l : Lexer) -> Recognise False
manyUntil stopBefore l = many (reject stopBefore <+> l)

||| Repeat the sub-lexer `l` zero or more times until the lexer
||| `stopAfter` is encountered, and consume it. Guaranteed to
||| consume if `stopAfter` consumes.
||| /`l`\*?`stopAfter`/
export
manyThen : (stopAfter : Recognise c) -> (l : Lexer) -> Recognise c
manyThen stopAfter l = manyUntil stopAfter l <+> stopAfter

||| Recognise a sub-lexer repeated as specified by `q`. Fails if `q` has
||| `min` and `max` in the wrong order. Consumes input unless `min q` is zero.
||| /`l`{`q`}/
export
count : (q : Quantity) -> (l : Lexer) -> Recognise (isSucc (min q))
count (Qty Z Nothing) l = many l
count (Qty Z (Just Z)) _ = empty
count (Qty Z (Just (S max))) l = opt $ l <+> count (atMost max) l
count (Qty (S min) Nothing) l = l <+> count (atLeast min) l
count (Qty (S min) (Just Z)) _ = fail
count (Qty (S min) (Just (S max))) l = l <+> count (between min max) l

||| Recognise a single digit 0-9
||| /[0-9]/
export
digit : Lexer
digit = pred isDigit

||| Recognise one or more digits
||| /[0-9]+/
export
digits : Lexer
digits = some digit

||| Recognise a single binary digit
||| /[0-1]/
export
binDigit : Lexer
binDigit = pred (\c => c == '0' || c == '1')

||| Recognise one or more binary digits
||| /[0-1]+/
export
binDigits : Lexer
binDigits = some binDigit

||| Recognise a single hexidecimal digit
||| /[0-9A-Fa-f]/
export
hexDigit : Lexer
hexDigit = pred isHexDigit

||| Recognise one or more hexidecimal digits
||| /[0-9A-Fa-f]+/
export
hexDigits : Lexer
hexDigits = some hexDigit

||| Recognise a single octal digit
||| /[0-8]/
export
octDigit : Lexer
octDigit = pred isOctDigit

||| Recognise one or more octal digits
||| /[0-8]+/
export
octDigits : Lexer
octDigits = some octDigit

||| Recognise a single alpha character
||| /[A-Za-z]/
export
alpha : Lexer
alpha = pred isAlpha

||| Recognise one or more alpha characters
||| /[A-Za-z]+/
export
alphas : Lexer
alphas = some alpha

||| Recognise a lowercase alpha character
||| /[a-z]/
export
lower : Lexer
lower = pred isLower

||| Recognise one or more lowercase alpha characters
||| /[a-z]+/
export
lowers : Lexer
lowers = some lower

||| Recognise an uppercase alpha character
||| /[A-Z]/
export
upper : Lexer
upper = pred isUpper

||| Recognise one or more uppercase alpha characters
||| /[A-Z]+/
export
uppers : Lexer
uppers = some upper

||| Recognise an alphanumeric character
||| /[A-Za-z0-9]/
export
alphaNum : Lexer
alphaNum = pred isAlphaNum

||| Recognise one or more alphanumeric characters
||| /[A-Za-z0-9]+/
export
alphaNums : Lexer
alphaNums = some alphaNum

||| Recognise a single whitespace character
||| /\\s/
export
space : Lexer
space = pred isSpace

||| Recognise one or more whitespace characters
||| /\\s+/
export
spaces : Lexer
spaces = some space

||| Recognise a single newline sequence. Understands CRLF, CR, and LF
||| /\\r\\n|[\\r\\n]/
export
newline : Lexer
newline = let crlf = "\r\n" in
              exact crlf <|> oneOf crlf

||| Recognise one or more newline sequences. Understands CRLF, CR, and LF
||| /(\\r\\n|[\\r\\n])+)/
export
newlines : Lexer
newlines = some newline

||| Recognise a single non-whitespace, non-alphanumeric character
||| /[\^\\sA-Za-z0-9]/
export
symbol : Lexer
symbol = pred (\x => not (isSpace x || isAlphaNum x))

||| Recognise one or more non-whitespace, non-alphanumeric characters
||| /[\^\\sA-Za-z0-9]+/
export
symbols : Lexer
symbols = some symbol

||| Recognise a single control character
||| /[\\x00-\\x1f\\x7f-\\x9f]/
export
control : Lexer
control = pred isControl

||| Recognise one or more control characters
||| /[\\x00-\\x1f\\x7f-\\x9f]+/
export
controls : Lexer
controls = some control

||| Recognise zero or more occurrences of a sub-lexer between
||| delimiting lexers
||| /`start`(`l`)\*?`end`/
export
surround : (start : Lexer) -> (end : Lexer) -> (l : Lexer) -> Lexer
surround start end l = start <+> manyThen end l

||| Recognise zero or more occurrences of a sub-lexer surrounded
||| by the same quote lexer on both sides (useful for strings)
||| /`q`(`l`)\*?`q`/
export
quote : (q : Lexer) -> (l : Lexer) -> Lexer
quote q l = surround q q l

||| Recognise an escape sub-lexer (often '\\') followed by
||| another sub-lexer
||| /[`esc`]`l`/
export
escape : (esc : Lexer) -> Lexer -> Lexer
escape esc l = esc <+> l

||| Recognise a string literal, including escaped characters.
||| (Note: doesn't yet handle escape sequences such as \123)
||| /"(\\\\.|.)\*?"/
export
stringLit : Lexer
stringLit = quote (is '"') (escape (is '\\') any <|> any)

||| Recognise a character literal, including escaped characters.
||| (Note: doesn't yet handle escape sequences such as \123)
||| /'(\\\\.|[\^'])'/
export
charLit : Lexer
charLit = let q = '\'' in
              is q <+> (escape (is '\\') (control <|> any) <|> isNot q) <+> is q
  where
    lexStr : List String -> Lexer
    lexStr [] = fail
    lexStr (t :: ts) = exact t <|> lexStr ts

    control : Lexer
    control = lexStr ["NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
                      "BS",  "HT",  "LF",  "VT",  "FF",  "CR",  "SO",  "SI",
                      "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
                      "CAN", "EM",  "SUB", "ESC", "FS",  "GS",  "RS",  "US",
                      "SP",  "DEL"]
                <|> (is 'x' <+> hexDigits)
                <|> (is 'o' <+> octDigits)
                <|> digits

||| Recognise an integer literal (possibly with a '-' prefix)
||| /-?[0-9]+/
export
intLit : Lexer
intLit = opt (is '-') <+> digits

||| Recognise a binary literal, prefixed by "0b"
||| /0b[0-1]+/
export
binLit : Lexer
binLit = exact "0b" <+> binDigits

||| Recognise a hexidecimal literal, prefixed by "0x" or "0X"
||| /0[Xx][0-9A-Fa-f]+/
export
hexLit : Lexer
hexLit = approx "0x" <+> hexDigits

||| Recognise an octal literal, prefixed by "0o"
||| /0o[0-9A-Fa-f]+/
export
octLit : Lexer
octLit = exact "0o" <+> octDigits

export
digitsUnderscoredLit : Lexer
digitsUnderscoredLit = digits <+> many (is '_' <+> digits)

export
binUnderscoredLit : Lexer
binUnderscoredLit = binLit <+> many (is '_' <+> binDigits)

export
hexUnderscoredLit : Lexer
hexUnderscoredLit = hexLit <+> many (is '_' <+> hexDigits)

export
octUnderscoredLit : Lexer
octUnderscoredLit = octLit <+> many (is '_' <+> octDigits)

||| Recognise `start`, then recognise all input until a newline is encountered,
||| and consume the newline. Will succeed if end-of-input is encountered before
||| a newline.
||| /`start`[\^\\r\\n]+(\\r\\n|[\\r\\n])?/
export
lineComment : (start : Lexer) -> Lexer
lineComment start = start <+> manyUntil newline any <+> opt newline

||| Recognise all input between `start` and `end` lexers.
||| Supports balanced nesting.
|||
||| For block comments that don't support nesting (such as C-style comments),
||| use `surround`
export
blockComment : (start : Lexer) -> (end : Lexer) -> Lexer
blockComment start end = start <+> middle <+> end
  where
    middle : Recognise False
    middle = manyUntil end (blockComment start end <|> any)
Add libraries 2020-05-18 15:59:07 +03:00			`module Text.Lexer`

various stdlib updates 2020-06-12 00:14:11 +03:00			`import Data.Bool`
Add libraries 2020-05-18 15:59:07 +03:00			`import Data.List`
			`import Data.Nat`

			`import public Text.Lexer.Core`
			`import public Text.Quantity`
			`import public Text.Token`

Update contrib Text.Parser to match Library.Text.Parser (#1808) Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org> 2021-08-06 12:03:13 +03:00			`%default total`

Add libraries 2020-05-18 15:59:07 +03:00			`export`
			`toTokenMap : List (Lexer, k) -> TokenMap (Token k)`
			`toTokenMap = map $ \(l, kind) => (l, Tok kind)`

			`\|\|\| Recognise any character.`
			`\|\|\| /./`
			`export`
			`any : Lexer`
			`any = pred (const True)`

			`\|\|\| Recognise a lexer or recognise no input. This is not guaranteed`
			`\|\|\| to consume input.`
			\|\|\| /`l`?/
			`export`
			`opt : (l : Lexer) -> Recognise False`
			`opt l = l <\|> empty`

			\|\|\| Recognise any character if the sub-lexer `l` fails.
			\|\|\| /(?!`l`)./
			`export`
			`non : (l : Lexer) -> Lexer`
			`non l = reject l <+> any`

			`\|\|\| Produce recognisers by applying a function to elements of a container, and`
			`\|\|\| recognise the first match. Consumes input if the function produces consuming`
			`\|\|\| recognisers. Fails if the container is empty.`
			`export`
			`choiceMap : {c : Bool} ->`
			`Foldable t => (a -> Recognise c) -> t a -> Recognise c`
Update contrib Text.Parser to match Library.Text.Parser (#1808) Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org> 2021-08-06 12:03:13 +03:00			`choiceMap {c} f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in`
			`f x <\|> acc)`
			`fail xs`
Add libraries 2020-05-18 15:59:07 +03:00
			`\|\|\| Recognise the first matching recogniser in a container. Consumes input if`
			`\|\|\| recognisers in the list consume. Fails if the container is empty.`
			`export`
			`choice : {c : _} ->`
			`Foldable t => t (Recognise c) -> Recognise c`
Update contrib Text.Parser to match Library.Text.Parser (#1808) Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org> 2021-08-06 12:03:13 +03:00			`choice = choiceMap id`
Add libraries 2020-05-18 15:59:07 +03:00
			`\|\|\| Sequence a list of recognisers. Guaranteed to consume input if the list is`
			`\|\|\| non-empty and the recognisers consume.`
			`export`
			`concat : {c : _} ->`
Update contrib Text.Parser to match Library.Text.Parser (#1808) Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org> 2021-08-06 12:03:13 +03:00			`(xs : List (Recognise c)) -> Recognise (isCons xs && c)`
Add libraries 2020-05-18 15:59:07 +03:00			`concat = Lexer.Core.concatMap id`

			`\|\|\| Recognise a specific character.`
			\|\|\| /[`x`]/
			`export`
			`is : (x : Char) -> Lexer`
			`is x = pred (==x)`

			`\|\|\| Recognise anything but the given character.`
			\|\|\| /[\^`x`]/
			`export`
			`isNot : (x : Char) -> Lexer`
			`isNot x = pred (/=x)`

			`\|\|\| Recognise a specific character (case-insensitive).`
			\|\|\| /[`x`]/i
			`export`
			`like : (x : Char) -> Lexer`
			`like x = pred (\y => toUpper x == toUpper y)`

			`\|\|\| Recognise anything but the given character (case-insensitive).`
			\|\|\| /[\^`x`]/i
			`export`
			`notLike : (x : Char) -> Lexer`
			`notLike x = pred (\y => toUpper x /= toUpper y)`

			`\|\|\| Recognise a specific string.`
			`\|\|\| Fails if the string is empty.`
			\|\|\| /`str`/
			`export`
			`exact : (str : String) -> Lexer`
			`exact str = case unpack str of`
			`[] => fail`
			`(x :: xs) => concatMap is (x :: xs)`

			`\|\|\| Recognise a specific string (case-insensitive).`
			`\|\|\| Fails if the string is empty.`
			\|\|\| /`str`/i
			`export`
			`approx : (str : String) -> Lexer`
			`approx str = case unpack str of`
			`[] => fail`
			`(x :: xs) => concatMap like (x :: xs)`

			`\|\|\| Recognise any of the characters in the given string.`
			\|\|\| /[`chars`]/
			`export`
			`oneOf : (chars : String) -> Lexer`
			oneOf chars = pred (\x => x `elem` unpack chars)

			`\|\|\| Recognise a character range. Also works in reverse!`
			\|\|\| /[`start`-`end`]/
			`export`
			`range : (start : Char) -> (end : Char) -> Lexer`
			`range start end = pred (\x => (x >= min start end)`
			`&& (x <= max start end))`

			`mutual`
			`\|\|\| Recognise a sequence of at least one sub-lexers`
			\|\|\| /`l`+/
			`export`
			`some : Lexer -> Lexer`
			`some l = l <+> many l`

			`\|\|\| Recognise a sequence of at zero or more sub-lexers. This is not`
			`\|\|\| guaranteed to consume input`
			\|\|\| /`l`\*/
			`export`
			`many : Lexer -> Recognise False`
			`many l = opt (some l)`

Update contrib Text.Parser to match Library.Text.Parser (#1808) Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org> 2021-08-06 12:03:13 +03:00			\|\|\| Repeat the sub-lexer `l` one or more times until the lexer
			\|\|\| `stopBefore` is encountered. `stopBefore` will not be consumed.
			\|\|\| /((?!`stopBefore`)`l`)\+/
			`export`
			`someUntil : (stopBefore : Recognise c) -> (l : Lexer) -> Lexer`
			`someUntil stopBefore l = some (reject stopBefore <+> l)`

Add libraries 2020-05-18 15:59:07 +03:00			\|\|\| Repeat the sub-lexer `l` zero or more times until the lexer
			\|\|\| `stopBefore` is encountered. `stopBefore` will not be consumed.
			`\|\|\| Not guaranteed to consume input.`
			\|\|\| /((?!`stopBefore`)`l`)\*/
			`export`
			`manyUntil : (stopBefore : Recognise c) -> (l : Lexer) -> Recognise False`
			`manyUntil stopBefore l = many (reject stopBefore <+> l)`

			\|\|\| Repeat the sub-lexer `l` zero or more times until the lexer
			\|\|\| `stopAfter` is encountered, and consume it. Guaranteed to
			\|\|\| consume if `stopAfter` consumes.
			\|\|\| /`l`\*?`stopAfter`/
			`export`
			`manyThen : (stopAfter : Recognise c) -> (l : Lexer) -> Recognise c`
			`manyThen stopAfter l = manyUntil stopAfter l <+> stopAfter`

			\|\|\| Recognise a sub-lexer repeated as specified by `q`. Fails if `q` has
			\|\|\| `min` and `max` in the wrong order. Consumes input unless `min q` is zero.
			\|\|\| /`l`{`q`}/
			`export`
			`count : (q : Quantity) -> (l : Lexer) -> Recognise (isSucc (min q))`
			`count (Qty Z Nothing) l = many l`
			`count (Qty Z (Just Z)) _ = empty`
			`count (Qty Z (Just (S max))) l = opt $ l <+> count (atMost max) l`
			`count (Qty (S min) Nothing) l = l <+> count (atLeast min) l`
			`count (Qty (S min) (Just Z)) _ = fail`
			`count (Qty (S min) (Just (S max))) l = l <+> count (between min max) l`

			`\|\|\| Recognise a single digit 0-9`
			`\|\|\| /[0-9]/`
			`export`
			`digit : Lexer`
			`digit = pred isDigit`

			`\|\|\| Recognise one or more digits`
			`\|\|\| /[0-9]+/`
			`export`
			`digits : Lexer`
			`digits = some digit`

Update contrib Text.Parser to match Library.Text.Parser (#1808) Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org> 2021-08-06 12:03:13 +03:00			`\|\|\| Recognise a single binary digit`
			`\|\|\| /[0-1]/`
			`export`
			`binDigit : Lexer`
			`binDigit = pred (\c => c == '0' \|\| c == '1')`

			`\|\|\| Recognise one or more binary digits`
			`\|\|\| /[0-1]+/`
			`export`
			`binDigits : Lexer`
			`binDigits = some binDigit`

Add libraries 2020-05-18 15:59:07 +03:00			`\|\|\| Recognise a single hexidecimal digit`
			`\|\|\| /[0-9A-Fa-f]/`
			`export`
			`hexDigit : Lexer`
			`hexDigit = pred isHexDigit`

			`\|\|\| Recognise one or more hexidecimal digits`
			`\|\|\| /[0-9A-Fa-f]+/`
			`export`
			`hexDigits : Lexer`
			`hexDigits = some hexDigit`

			`\|\|\| Recognise a single octal digit`
			`\|\|\| /[0-8]/`
			`export`
			`octDigit : Lexer`
Update contrib Text.Parser to match Library.Text.Parser (#1808) Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org> 2021-08-06 12:03:13 +03:00			`octDigit = pred isOctDigit`
Add libraries 2020-05-18 15:59:07 +03:00
			`\|\|\| Recognise one or more octal digits`
			`\|\|\| /[0-8]+/`
			`export`
			`octDigits : Lexer`
Update contrib Text.Parser to match Library.Text.Parser (#1808) Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org> 2021-08-06 12:03:13 +03:00			`octDigits = some octDigit`
Add libraries 2020-05-18 15:59:07 +03:00
			`\|\|\| Recognise a single alpha character`
			`\|\|\| /[A-Za-z]/`
			`export`
			`alpha : Lexer`
			`alpha = pred isAlpha`

			`\|\|\| Recognise one or more alpha characters`
			`\|\|\| /[A-Za-z]+/`
			`export`
			`alphas : Lexer`
			`alphas = some alpha`

			`\|\|\| Recognise a lowercase alpha character`
			`\|\|\| /[a-z]/`
			`export`
			`lower : Lexer`
			`lower = pred isLower`

			`\|\|\| Recognise one or more lowercase alpha characters`
			`\|\|\| /[a-z]+/`
			`export`
			`lowers : Lexer`
			`lowers = some lower`

			`\|\|\| Recognise an uppercase alpha character`
			`\|\|\| /[A-Z]/`
			`export`
			`upper : Lexer`
			`upper = pred isUpper`

			`\|\|\| Recognise one or more uppercase alpha characters`
			`\|\|\| /[A-Z]+/`
			`export`
			`uppers : Lexer`
			`uppers = some upper`

			`\|\|\| Recognise an alphanumeric character`
			`\|\|\| /[A-Za-z0-9]/`
			`export`
			`alphaNum : Lexer`
			`alphaNum = pred isAlphaNum`

			`\|\|\| Recognise one or more alphanumeric characters`
			`\|\|\| /[A-Za-z0-9]+/`
			`export`
			`alphaNums : Lexer`
			`alphaNums = some alphaNum`

			`\|\|\| Recognise a single whitespace character`
			`\|\|\| /\\s/`
			`export`
			`space : Lexer`
			`space = pred isSpace`

			`\|\|\| Recognise one or more whitespace characters`
			`\|\|\| /\\s+/`
			`export`
			`spaces : Lexer`
			`spaces = some space`

			`\|\|\| Recognise a single newline sequence. Understands CRLF, CR, and LF`
			`\|\|\| /\\r\\n\|[\\r\\n]/`
			`export`
			`newline : Lexer`
			`newline = let crlf = "\r\n" in`
			`exact crlf <\|> oneOf crlf`

			`\|\|\| Recognise one or more newline sequences. Understands CRLF, CR, and LF`
			`\|\|\| /(\\r\\n\|[\\r\\n])+)/`
			`export`
			`newlines : Lexer`
			`newlines = some newline`

			`\|\|\| Recognise a single non-whitespace, non-alphanumeric character`
			`\|\|\| /[\^\\sA-Za-z0-9]/`
			`export`
			`symbol : Lexer`
			`symbol = pred (\x => not (isSpace x \|\| isAlphaNum x))`

			`\|\|\| Recognise one or more non-whitespace, non-alphanumeric characters`
			`\|\|\| /[\^\\sA-Za-z0-9]+/`
			`export`
			`symbols : Lexer`
			`symbols = some symbol`

			`\|\|\| Recognise a single control character`
			`\|\|\| /[\\x00-\\x1f\\x7f-\\x9f]/`
			`export`
			`control : Lexer`
			`control = pred isControl`

			`\|\|\| Recognise one or more control characters`
			`\|\|\| /[\\x00-\\x1f\\x7f-\\x9f]+/`
			`export`
			`controls : Lexer`
			`controls = some control`

			`\|\|\| Recognise zero or more occurrences of a sub-lexer between`
			`\|\|\| delimiting lexers`
			\|\|\| /`start`(`l`)\*?`end`/
			`export`
			`surround : (start : Lexer) -> (end : Lexer) -> (l : Lexer) -> Lexer`
			`surround start end l = start <+> manyThen end l`

			`\|\|\| Recognise zero or more occurrences of a sub-lexer surrounded`
			`\|\|\| by the same quote lexer on both sides (useful for strings)`
			\|\|\| /`q`(`l`)\*?`q`/
			`export`
			`quote : (q : Lexer) -> (l : Lexer) -> Lexer`
			`quote q l = surround q q l`

Update contrib Text.Parser to match Library.Text.Parser (#1808) Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org> 2021-08-06 12:03:13 +03:00			`\|\|\| Recognise an escape sub-lexer (often '\\') followed by`
			`\|\|\| another sub-lexer`
Add libraries 2020-05-18 15:59:07 +03:00			\|\|\| /[`esc`]`l`/
			`export`
Update contrib Text.Parser to match Library.Text.Parser (#1808) Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org> 2021-08-06 12:03:13 +03:00			`escape : (esc : Lexer) -> Lexer -> Lexer`
			`escape esc l = esc <+> l`
Add libraries 2020-05-18 15:59:07 +03:00
			`\|\|\| Recognise a string literal, including escaped characters.`
			`\|\|\| (Note: doesn't yet handle escape sequences such as \123)`
			`\|\|\| /"(\\\\.\|.)\*?"/`
			`export`
			`stringLit : Lexer`
Update contrib Text.Parser to match Library.Text.Parser (#1808) Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org> 2021-08-06 12:03:13 +03:00			`stringLit = quote (is '"') (escape (is '\\') any <\|> any)`
Add libraries 2020-05-18 15:59:07 +03:00
			`\|\|\| Recognise a character literal, including escaped characters.`
			`\|\|\| (Note: doesn't yet handle escape sequences such as \123)`
			`\|\|\| /'(\\\\.\|[\^'])'/`
			`export`
			`charLit : Lexer`
			`charLit = let q = '\'' in`
Update contrib Text.Parser to match Library.Text.Parser (#1808) Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org> 2021-08-06 12:03:13 +03:00			`is q <+> (escape (is '\\') (control <\|> any) <\|> isNot q) <+> is q`
Add libraries 2020-05-18 15:59:07 +03:00			`where`
			`lexStr : List String -> Lexer`
			`lexStr [] = fail`
			`lexStr (t :: ts) = exact t <\|> lexStr ts`

			`control : Lexer`
			`control = lexStr ["NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",`
			`"BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",`
			`"DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",`
			`"CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",`
			`"SP", "DEL"]`
			`<\|> (is 'x' <+> hexDigits)`
			`<\|> (is 'o' <+> octDigits)`
			`<\|> digits`

			`\|\|\| Recognise an integer literal (possibly with a '-' prefix)`
			`\|\|\| /-?[0-9]+/`
			`export`
			`intLit : Lexer`
			`intLit = opt (is '-') <+> digits`

Update contrib Text.Parser to match Library.Text.Parser (#1808) Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org> 2021-08-06 12:03:13 +03:00			`\|\|\| Recognise a binary literal, prefixed by "0b"`
			`\|\|\| /0b[0-1]+/`
			`export`
			`binLit : Lexer`
			`binLit = exact "0b" <+> binDigits`

Add libraries 2020-05-18 15:59:07 +03:00			`\|\|\| Recognise a hexidecimal literal, prefixed by "0x" or "0X"`
			`\|\|\| /0[Xx][0-9A-Fa-f]+/`
			`export`
			`hexLit : Lexer`
			`hexLit = approx "0x" <+> hexDigits`

Update contrib Text.Parser to match Library.Text.Parser (#1808) Co-authored-by: Guillaume ALLAIS <guillaume.allais@ens-lyon.org> 2021-08-06 12:03:13 +03:00			`\|\|\| Recognise an octal literal, prefixed by "0o"`
			`\|\|\| /0o[0-9A-Fa-f]+/`
			`export`
			`octLit : Lexer`
			`octLit = exact "0o" <+> octDigits`

			`export`
			`digitsUnderscoredLit : Lexer`
			`digitsUnderscoredLit = digits <+> many (is '_' <+> digits)`

			`export`
			`binUnderscoredLit : Lexer`
			`binUnderscoredLit = binLit <+> many (is '_' <+> binDigits)`

			`export`
			`hexUnderscoredLit : Lexer`
			`hexUnderscoredLit = hexLit <+> many (is '_' <+> hexDigits)`

			`export`
			`octUnderscoredLit : Lexer`
			`octUnderscoredLit = octLit <+> many (is '_' <+> octDigits)`

Add libraries 2020-05-18 15:59:07 +03:00			\|\|\| Recognise `start`, then recognise all input until a newline is encountered,
			`\|\|\| and consume the newline. Will succeed if end-of-input is encountered before`
			`\|\|\| a newline.`
			\|\|\| /`start`[\^\\r\\n]+(\\r\\n\|[\\r\\n])?/
			`export`
			`lineComment : (start : Lexer) -> Lexer`
			`lineComment start = start <+> manyUntil newline any <+> opt newline`

			\|\|\| Recognise all input between `start` and `end` lexers.
			`\|\|\| Supports balanced nesting.`
			`\|\|\|`
			`\|\|\| For block comments that don't support nesting (such as C-style comments),`
			\|\|\| use `surround`
			`export`
			`blockComment : (start : Lexer) -> (end : Lexer) -> Lexer`
			`blockComment start end = start <+> middle <+> end`
			`where`
			`middle : Recognise False`
			`middle = manyUntil end (blockComment start end <\|> any)`