[ refactor ] S-Exp protocols to depend on fewer Idris modules (#3060)

This commit is contained in:
Ohad Kammar 2023-08-31 11:53:14 +01:00 committed by GitHub
parent e3214586a5
commit d3dc9b7c44
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 306 additions and 185 deletions

1
.gitignore vendored
View File

@ -15,6 +15,7 @@ idris2docs_venv
/docs/build
/libs/**/build
/ipkg/build
/tests/**/build
/tests/**/output*

View File

@ -107,6 +107,11 @@
* Fixed a bug that caused `f` to sometimes be replaced by `fx` after matching `fx = f x`.
* Refactor the idris2protocols package to depend on fewer Idris2 modules.
We can now export the package independently.
To avoid confusing tooling about which `.ipkg` to use, the
package file is under the newly added `ipkg` sub-directory.
### Library changes
#### Prelude

View File

@ -224,6 +224,7 @@ modules =
Parser.Package,
Parser.Source,
Parser.Support,
Parser.Support.Escaping,
Parser.Unlit,
Parser.Lexer.Common,

View File

@ -3,33 +3,37 @@ version = 0.0.1
modules
= Protocol.Hex
-- , Protocol.IDE
, Protocol.IDE
, Protocol.IDE.Command
, Protocol.IDE.Decoration
, Protocol.SExp
-- TODO: want to add this module, but the parser library introduces
-- dependencies on core idris
-- , Protocol.SExp.Parser
, Libraries.Data.Span
, Libraries.Text.Lexer
, Libraries.Control.Delayed
, Libraries.Text.Bounded
, Libraries.Text.Lexer.Core
, Libraries.Text.Quantity
, Libraries.Text.Token
, Libraries.Text.Lexer.Tokenizer
, Libraries.Text.Lexer
, Parser.Lexer.Common
, Libraries.Data.Span
, Libraries.Data.String.Extra
, Libraries.Text.PrettyPrint.Prettyprinter.Doc
, Libraries.Text.PrettyPrint.Prettyprinter.Symbols
, Libraries.Text.PrettyPrint.Prettyprinter.Util
, Libraries.Text.PrettyPrint.Prettyprinter.Render.String
, Libraries.Text.PrettyPrint.Prettyprinter
, Libraries.Text.Lexer.Tokenizer
, Libraries.Text.Parser.Core
, Libraries.Text.Parser
, Parser.Rule.Source
-- , Protocol.IDE.FileContext
-- , Protocol.IDE.Formatting
-- , Protocol.IDE.Holes
-- , Protocol.IDE.Result
-- , Protocol.IDE.Highlight
, Parser.Support.Escaping
, Protocol.SExp.Parser
, Protocol.IDE.Decoration
, Protocol.IDE.Command
, Protocol.Hex
, Protocol.IDE.FileContext
, Protocol.IDE.Formatting
, Protocol.IDE.Holes
, Protocol.IDE.Result
, Protocol.IDE.Highlight
--depends =

View File

@ -10,6 +10,7 @@ import Core.TT
import Core.Core
import Data.List
import Parser.Unlit
import public Parser.Support.Escaping
%default total
@ -39,146 +40,6 @@ fromParsingErrors origin = ParseFail . (map fromError)
else MkFC origin start end
in (fc, msg +> '.')
export
hex : Char -> Maybe Int
hex '0' = Just 0
hex '1' = Just 1
hex '2' = Just 2
hex '3' = Just 3
hex '4' = Just 4
hex '5' = Just 5
hex '6' = Just 6
hex '7' = Just 7
hex '8' = Just 8
hex '9' = Just 9
hex 'a' = Just 10
hex 'b' = Just 11
hex 'c' = Just 12
hex 'd' = Just 13
hex 'e' = Just 14
hex 'f' = Just 15
hex _ = Nothing
export
dec : Char -> Maybe Int
dec '0' = Just 0
dec '1' = Just 1
dec '2' = Just 2
dec '3' = Just 3
dec '4' = Just 4
dec '5' = Just 5
dec '6' = Just 6
dec '7' = Just 7
dec '8' = Just 8
dec '9' = Just 9
dec _ = Nothing
export
oct : Char -> Maybe Int
oct '0' = Just 0
oct '1' = Just 1
oct '2' = Just 2
oct '3' = Just 3
oct '4' = Just 4
oct '5' = Just 5
oct '6' = Just 6
oct '7' = Just 7
oct _ = Nothing
export
getEsc : String -> Maybe Char
getEsc "NUL" = Just '\NUL'
getEsc "SOH" = Just '\SOH'
getEsc "STX" = Just '\STX'
getEsc "ETX" = Just '\ETX'
getEsc "EOT" = Just '\EOT'
getEsc "ENQ" = Just '\ENQ'
getEsc "ACK" = Just '\ACK'
getEsc "BEL" = Just '\BEL'
getEsc "BS" = Just '\BS'
getEsc "HT" = Just '\HT'
getEsc "LF" = Just '\LF'
getEsc "VT" = Just '\VT'
getEsc "FF" = Just '\FF'
getEsc "CR" = Just '\CR'
getEsc "SO" = Just '\SO'
getEsc "SI" = Just '\SI'
getEsc "DLE" = Just '\DLE'
getEsc "DC1" = Just '\DC1'
getEsc "DC2" = Just '\DC2'
getEsc "DC3" = Just '\DC3'
getEsc "DC4" = Just '\DC4'
getEsc "NAK" = Just '\NAK'
getEsc "SYN" = Just '\SYN'
getEsc "ETB" = Just '\ETB'
getEsc "CAN" = Just '\CAN'
getEsc "EM" = Just '\EM'
getEsc "SUB" = Just '\SUB'
getEsc "ESC" = Just '\ESC'
getEsc "FS" = Just '\FS'
getEsc "GS" = Just '\GS'
getEsc "RS" = Just '\RS'
getEsc "US" = Just '\US'
getEsc "SP" = Just '\SP'
getEsc "DEL" = Just '\DEL'
getEsc str = Nothing
unescape' : List Char -> List Char -> Maybe (List Char)
unescape' _ [] = pure []
unescape' escapeChars (x::xs)
= assert_total $ if escapeChars `isPrefixOf` (x::xs)
then case drop (length escapeChars) (x::xs) of
('\\' :: xs) => pure $ '\\' :: !(unescape' escapeChars xs)
('\n' :: xs) => pure !(unescape' escapeChars xs)
('&' :: xs) => pure !(unescape' escapeChars xs)
('a' :: xs) => pure $ '\a' :: !(unescape' escapeChars xs)
('b' :: xs) => pure $ '\b' :: !(unescape' escapeChars xs)
('f' :: xs) => pure $ '\f' :: !(unescape' escapeChars xs)
('n' :: xs) => pure $ '\n' :: !(unescape' escapeChars xs)
('r' :: xs) => pure $ '\r' :: !(unescape' escapeChars xs)
('t' :: xs) => pure $ '\t' :: !(unescape' escapeChars xs)
('v' :: xs) => pure $ '\v' :: !(unescape' escapeChars xs)
('\'' :: xs) => pure $ '\'' :: !(unescape' escapeChars xs)
('"' :: xs) => pure $ '"' :: !(unescape' escapeChars xs)
('x' :: xs) => case span isHexDigit xs of
([], rest) => unescape' escapeChars rest
(ds, rest) => pure $ cast !(toHex 1 (reverse ds)) ::
!(unescape' escapeChars rest)
('o' :: xs) => case span isOctDigit xs of
([], rest) => unescape' escapeChars rest
(ds, rest) => pure $ cast !(toOct 1 (reverse ds)) ::
!(unescape' escapeChars rest)
xs => case span isDigit xs of
([], (a :: b :: c :: rest)) =>
case getEsc (fastPack [a, b, c]) of
Just v => Just (v :: !(unescape' escapeChars rest))
Nothing => case getEsc (fastPack [a, b]) of
Just v => Just (v :: !(unescape' escapeChars (c :: rest)))
Nothing => unescape' escapeChars xs
([], (a :: b :: [])) =>
case getEsc (fastPack [a, b]) of
Just v => Just (v :: [])
Nothing => unescape' escapeChars xs
([], rest) => unescape' escapeChars rest
(ds, rest) => Just $ cast (cast {to=Int} (fastPack ds)) ::
!(unescape' escapeChars rest)
else Just $ x :: !(unescape' escapeChars xs)
where
toHex : Int -> List Char -> Maybe Int
toHex _ [] = Just 0
toHex m (d :: ds)
= pure $ !(hex (toLower d)) * m + !(toHex (m*16) ds)
toOct : Int -> List Char -> Maybe Int
toOct _ [] = Just 0
toOct m (d :: ds)
= pure $ !(oct (toLower d)) * m + !(toOct (m*8) ds)
export
unescape : Nat -> String -> Maybe String
unescape hashtag x = let escapeChars = '\\' :: replicate hashtag '#' in
fastPack <$> (unescape' escapeChars (unpack x))
export
getCharLit : String -> Maybe Char
getCharLit str

View File

@ -0,0 +1,144 @@
module Parser.Support.Escaping
import Libraries.Data.String.Extra
import Data.List
export
hex : Char -> Maybe Int
hex '0' = Just 0
hex '1' = Just 1
hex '2' = Just 2
hex '3' = Just 3
hex '4' = Just 4
hex '5' = Just 5
hex '6' = Just 6
hex '7' = Just 7
hex '8' = Just 8
hex '9' = Just 9
hex 'a' = Just 10
hex 'b' = Just 11
hex 'c' = Just 12
hex 'd' = Just 13
hex 'e' = Just 14
hex 'f' = Just 15
hex _ = Nothing
export
dec : Char -> Maybe Int
dec '0' = Just 0
dec '1' = Just 1
dec '2' = Just 2
dec '3' = Just 3
dec '4' = Just 4
dec '5' = Just 5
dec '6' = Just 6
dec '7' = Just 7
dec '8' = Just 8
dec '9' = Just 9
dec _ = Nothing
export
oct : Char -> Maybe Int
oct '0' = Just 0
oct '1' = Just 1
oct '2' = Just 2
oct '3' = Just 3
oct '4' = Just 4
oct '5' = Just 5
oct '6' = Just 6
oct '7' = Just 7
oct _ = Nothing
export
getEsc : String -> Maybe Char
getEsc "NUL" = Just '\NUL'
getEsc "SOH" = Just '\SOH'
getEsc "STX" = Just '\STX'
getEsc "ETX" = Just '\ETX'
getEsc "EOT" = Just '\EOT'
getEsc "ENQ" = Just '\ENQ'
getEsc "ACK" = Just '\ACK'
getEsc "BEL" = Just '\BEL'
getEsc "BS" = Just '\BS'
getEsc "HT" = Just '\HT'
getEsc "LF" = Just '\LF'
getEsc "VT" = Just '\VT'
getEsc "FF" = Just '\FF'
getEsc "CR" = Just '\CR'
getEsc "SO" = Just '\SO'
getEsc "SI" = Just '\SI'
getEsc "DLE" = Just '\DLE'
getEsc "DC1" = Just '\DC1'
getEsc "DC2" = Just '\DC2'
getEsc "DC3" = Just '\DC3'
getEsc "DC4" = Just '\DC4'
getEsc "NAK" = Just '\NAK'
getEsc "SYN" = Just '\SYN'
getEsc "ETB" = Just '\ETB'
getEsc "CAN" = Just '\CAN'
getEsc "EM" = Just '\EM'
getEsc "SUB" = Just '\SUB'
getEsc "ESC" = Just '\ESC'
getEsc "FS" = Just '\FS'
getEsc "GS" = Just '\GS'
getEsc "RS" = Just '\RS'
getEsc "US" = Just '\US'
getEsc "SP" = Just '\SP'
getEsc "DEL" = Just '\DEL'
getEsc str = Nothing
unescape' : List Char -> List Char -> Maybe (List Char)
unescape' _ [] = pure []
unescape' escapeChars (x::xs)
= assert_total $ if escapeChars `isPrefixOf` (x::xs)
then case drop (length escapeChars) (x::xs) of
('\\' :: xs) => pure $ '\\' :: !(unescape' escapeChars xs)
('\n' :: xs) => pure !(unescape' escapeChars xs)
('&' :: xs) => pure !(unescape' escapeChars xs)
('a' :: xs) => pure $ '\a' :: !(unescape' escapeChars xs)
('b' :: xs) => pure $ '\b' :: !(unescape' escapeChars xs)
('f' :: xs) => pure $ '\f' :: !(unescape' escapeChars xs)
('n' :: xs) => pure $ '\n' :: !(unescape' escapeChars xs)
('r' :: xs) => pure $ '\r' :: !(unescape' escapeChars xs)
('t' :: xs) => pure $ '\t' :: !(unescape' escapeChars xs)
('v' :: xs) => pure $ '\v' :: !(unescape' escapeChars xs)
('\'' :: xs) => pure $ '\'' :: !(unescape' escapeChars xs)
('"' :: xs) => pure $ '"' :: !(unescape' escapeChars xs)
('x' :: xs) => case span isHexDigit xs of
([], rest) => unescape' escapeChars rest
(ds, rest) => pure $ cast !(toHex 1 (reverse ds)) ::
!(unescape' escapeChars rest)
('o' :: xs) => case span isOctDigit xs of
([], rest) => unescape' escapeChars rest
(ds, rest) => pure $ cast !(toOct 1 (reverse ds)) ::
!(unescape' escapeChars rest)
xs => case span isDigit xs of
([], (a :: b :: c :: rest)) =>
case getEsc (fastPack [a, b, c]) of
Just v => Just (v :: !(unescape' escapeChars rest))
Nothing => case getEsc (fastPack [a, b]) of
Just v => Just (v :: !(unescape' escapeChars (c :: rest)))
Nothing => unescape' escapeChars xs
([], (a :: b :: [])) =>
case getEsc (fastPack [a, b]) of
Just v => Just (v :: [])
Nothing => unescape' escapeChars xs
([], rest) => unescape' escapeChars rest
(ds, rest) => Just $ cast (cast {to=Int} (fastPack ds)) ::
!(unescape' escapeChars rest)
else Just $ x :: !(unescape' escapeChars xs)
where
toHex : Int -> List Char -> Maybe Int
toHex _ [] = Just 0
toHex m (d :: ds)
= pure $ !(hex (toLower d)) * m + !(toHex (m*16) ds)
toOct : Int -> List Char -> Maybe Int
toOct _ [] = Just 0
toOct m (d :: ds)
= pure $ !(oct (toLower d)) * m + !(toOct (m*8) ds)
export
unescape : Nat -> String -> Maybe String
unescape hashtag x = let escapeChars = '\\' :: replicate hashtag '#' in
fastPack <$> (unescape' escapeChars (unpack x))

View File

@ -1,69 +1,174 @@
module Protocol.SExp.Parser
import Idris.IDEMode.Commands
import Data.List
import Libraries.Text.Lexer
import Protocol.SExp
import Parser.Lexer.Common
import Parser.Source
import Parser.Lexer.Source
import Libraries.Text.Token
import Libraries.Text.Lexer.Tokenizer
import Libraries.Text.Parser
import Libraries.Text.PrettyPrint.Prettyprinter.Symbols
import Libraries.Text.PrettyPrint.Prettyprinter.Util
import Libraries.Text.PrettyPrint.Prettyprinter.Doc
import Core.Metadata
import Parser.Support.Escaping
%default total
%hide Text.Lexer.symbols
%hide Parser.Lexer.Source.symbols
%hide Parser.Rule.Source.symbol
{- To decouple parsing of S-expressions, we reproduce some
functionality from the source-file parser. We duplicate it since we
don't need the full blown functionality of the source-code token.
-}
public export
data SExpToken
= StringLit String
| IntegerLit Integer
| Symbol String
| Ident String
| StringBegin Nat
| StringEnd
| Whitespace
| EndInput
export
Show SExpToken where
-- Literals
show (IntegerLit x) = "literal " ++ show x
-- String
show (StringBegin hashtag) = "string begin"
show StringEnd = "string end"
show (StringLit x) = "string " ++ show x
-- Identifiers
show (Ident x) = "identifier " ++ x
show (Symbol x) = "symbol " ++ x
show Whitespace = " "
show EndInput = "end of input"
export
Pretty Void SExpToken where
-- Literals
pretty (IntegerLit x) = pretty "literal" <++> pretty (show x)
-- String
pretty (StringBegin hashtag) = reflow "string begin"
pretty StringEnd = reflow "string end"
pretty (StringLit x) = pretty "string" <++> dquotes (pretty x)
-- Identifiers
pretty (Ident x) = pretty "identifier" <++> pretty x
pretty (Symbol x) = pretty "symbol" <++> pretty x
-- Special
pretty Whitespace = pretty "space"
pretty EndInput = reflow "end of input"
SExpRule : Type -> Type
SExpRule = Grammar () SExpToken True
EmptySExpRule : Type -> Type
EmptySExpRule = Grammar () SExpToken False
symbols : List String
symbols = ["(", ":", ")"]
symbol : String -> Rule ()
symbol : String -> SExpRule ()
symbol req
= terminal ("Expected '" ++ req ++ "'") $
\case
Symbol s => guard (s == req)
_ => Nothing
space : SExpRule ()
space = terminal ("Expected a space") $
\case
Whitespace => Just ()
_ => Nothing
stringTokens : Tokenizer Token
exactIdent : String -> SExpRule ()
exactIdent req
= terminal ("Expected " ++ req) $
\case
Ident s => guard (s == req)
_ => Nothing
simpleStrLit : SExpRule String
simpleStrLit
= terminal "Expected string literal" $
\case
StringLit s => unescape 0 s
_ => Nothing
strBegin : SExpRule Nat
strBegin = terminal "Expected string begin" $
\case
StringBegin hashtag => Just hashtag
_ => Nothing
strEnd : SExpRule ()
strEnd = terminal "Expected string end" $
\case
StringEnd => Just ()
_ => Nothing
simpleStr : SExpRule String
simpleStr = strBegin *> commit *> (option "" simpleStrLit) <* strEnd
stringTokens : Tokenizer SExpToken
stringTokens
= match (someUntil (is '"') (escape (is '\\') any <|> any)) StringLit
ideTokens : Tokenizer Token
ideTokens : Tokenizer SExpToken
ideTokens =
match (choice $ exact <$> symbols) Symbol
<|> match digits (IntegerLit . cast)
<|> compose (is '"')
(const $ StringBegin 0 Single)
(const $ StringBegin 0)
(const ())
(const stringTokens)
(const $ is '"')
(const StringEnd)
<|> match (some $ pred isSpace) (const Whitespace)
<|> match identAllowDashes Ident
<|> match space (const Comment)
idelex : String -> Either (StopReason, Int, Int, String) (List (WithBounds Token))
idelex str
= case lex ideTokens str of
-- Add the EndInput token so that we'll have a line and column
-- number to read when storing spans in the file
(tok, (EndInput, l, c, _)) => Right (filter notComment tok ++
[MkBounded EndInput False (MkBounds l c l c)])
(_, fail) => Left fail
where
notComment : WithBounds Token -> Bool
notComment t = case t.val of
Comment => False
_ => True
notWhitespace : WithBounds SExpToken -> Bool
notWhitespace btok = case btok.val of
Whitespace => False
_ => True
idelex : String ->
Either (StopReason, Int, Int, String)
(List (WithBounds SExpToken))
idelex str = case lex ideTokens str of
-- Add the EndInput token so that we'll have a line and column
-- number to read when storing spans in the file
(tok, (EndInput, l, c, _)) => Right (filter notWhitespace tok ++
[MkBounded EndInput False (MkBounds l c l c)])
(_, fail) => Left fail
identifierSExp : SExpRule String
identifierSExp
= terminal "Expected name" $
\case
Ident str => Just str
_ => Nothing
intLit : SExpRule Integer
intLit
= terminal "Expected integer literal" $
\case
IntegerLit i => Just i
_ => Nothing
eoi : EmptySExpRule ()
eoi = ignore $ nextIs "Expected end of input" isEOI
where
isEOI : SExpToken -> Bool
isEOI EndInput = True
isEOI _ = False
covering
sexp : Rule SExp
sexp : SExpRule SExp
sexp
= do symbol ":"; exactIdent "True"
pure (BoolAtom True)
@ -73,20 +178,20 @@ sexp
pure (IntegerAtom i)
<|> do str <- simpleStr
pure (StringAtom str)
<|> do symbol ":"; x <- unqualifiedName
<|> do symbol ":"; x <- identifierSExp
pure (SymbolAtom x)
<|> do Parser.symbol "("
xs <- Parser.many sexp
xs <- many sexp
symbol ")"
pure (SExpList xs)
public export
data SExpError =
LexError (StopReason, Int, Int, String)
| ParseErrors (List1 $ ParsingError Token)
| ParseErrors (List1 $ ParsingError SExpToken)
ideParser : {e : _} ->
String -> Grammar ParsingState Token e ty -> Either SExpError ty
String -> Grammar () SExpToken e ty -> Either SExpError ty
ideParser str p
= do toks <- mapFst LexError $ idelex str
(_, _, (parsed, _)) <- mapFst ParseErrors $ parseWith p toks