[ refactor ] S-Exp protocols to depend on fewer Idris modules (#3060)

2024-10-04 00:48:36 +03:00 · 2023-08-31 11:53:14 +01:00 · 2023-08-31 11:53:14 +01:00 · d3dc9b7c44
commit d3dc9b7c44
parent e3214586a5
7 changed files with 306 additions and 185 deletions
--- a/.gitignore
+++ b/.gitignore
@ -15,6 +15,7 @@ idris2docs_venv
 /docs/build

 /libs/**/build
+/ipkg/build

 /tests/**/build
 /tests/**/output*
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -107,6 +107,11 @@

 * Fixed a bug that caused `f` to sometimes be replaced by `fx` after matching `fx = f x`.

+* Refactor the idris2protocols package to depend on fewer Idris2 modules.
+  We can now export the package independently.
+  To avoid confusing tooling about which `.ipkg` to use, the
+  package file is under the newly added `ipkg` sub-directory.
+
 ### Library changes

 #### Prelude
--- a/idris2api.ipkg
+++ b/idris2api.ipkg
@ -224,6 +224,7 @@ modules =
    Parser.Package,
    Parser.Source,
    Parser.Support,
+    Parser.Support.Escaping,
    Parser.Unlit,

    Parser.Lexer.Common,
--- a/ipkg/idris2protocols.ipkg
+++ b/ipkg/idris2protocols.ipkg
@ -3,33 +3,37 @@ version = 0.0.1

 modules
  = Protocol.Hex
-  -- , Protocol.IDE
+  , Protocol.IDE
  , Protocol.IDE.Command
  , Protocol.IDE.Decoration
  , Protocol.SExp
-  -- TODO: want to add this module, but the parser library introduces
-  -- dependencies on core idris
-  -- , Protocol.SExp.Parser
-  , Libraries.Data.Span
-  , Libraries.Text.Lexer
  , Libraries.Control.Delayed
  , Libraries.Text.Bounded
  , Libraries.Text.Lexer.Core
  , Libraries.Text.Quantity
  , Libraries.Text.Token
-  , Libraries.Text.Lexer.Tokenizer
+  , Libraries.Text.Lexer
+  , Parser.Lexer.Common
+  , Libraries.Data.Span
  , Libraries.Data.String.Extra
  , Libraries.Text.PrettyPrint.Prettyprinter.Doc
  , Libraries.Text.PrettyPrint.Prettyprinter.Symbols
+  , Libraries.Text.PrettyPrint.Prettyprinter.Util
+  , Libraries.Text.PrettyPrint.Prettyprinter.Render.String
  , Libraries.Text.PrettyPrint.Prettyprinter
+  , Libraries.Text.Lexer.Tokenizer
  , Libraries.Text.Parser.Core
  , Libraries.Text.Parser
-  , Parser.Rule.Source
-  -- , Protocol.IDE.FileContext
-  -- , Protocol.IDE.Formatting
-  -- , Protocol.IDE.Holes
-  -- , Protocol.IDE.Result
-  -- , Protocol.IDE.Highlight
+  , Parser.Support.Escaping
+  , Protocol.SExp.Parser
+  , Protocol.IDE.Decoration
+  , Protocol.IDE.Command
+  , Protocol.Hex
+  , Protocol.IDE.FileContext
+  , Protocol.IDE.Formatting
+  , Protocol.IDE.Holes
+  , Protocol.IDE.Result
+  , Protocol.IDE.Highlight

 --depends =

--- a/src/Parser/Support.idr
+++ b/src/Parser/Support.idr
@ -10,6 +10,7 @@ import Core.TT
 import Core.Core
 import Data.List
 import Parser.Unlit
+import public Parser.Support.Escaping

 %default total

@ -39,146 +40,6 @@ fromParsingErrors origin = ParseFail . (map fromError)
                      else MkFC origin start end
            in (fc, msg +> '.')

-export
-hex : Char -> Maybe Int
-hex '0' = Just 0
-hex '1' = Just 1
-hex '2' = Just 2
-hex '3' = Just 3
-hex '4' = Just 4
-hex '5' = Just 5
-hex '6' = Just 6
-hex '7' = Just 7
-hex '8' = Just 8
-hex '9' = Just 9
-hex 'a' = Just 10
-hex 'b' = Just 11
-hex 'c' = Just 12
-hex 'd' = Just 13
-hex 'e' = Just 14
-hex 'f' = Just 15
-hex _ = Nothing
-
-export
-dec : Char -> Maybe Int
-dec '0' = Just 0
-dec '1' = Just 1
-dec '2' = Just 2
-dec '3' = Just 3
-dec '4' = Just 4
-dec '5' = Just 5
-dec '6' = Just 6
-dec '7' = Just 7
-dec '8' = Just 8
-dec '9' = Just 9
-dec _ = Nothing
-
-export
-oct : Char -> Maybe Int
-oct '0' = Just 0
-oct '1' = Just 1
-oct '2' = Just 2
-oct '3' = Just 3
-oct '4' = Just 4
-oct '5' = Just 5
-oct '6' = Just 6
-oct '7' = Just 7
-oct _ = Nothing
-
-export
-getEsc : String -> Maybe Char
-getEsc "NUL" = Just '\NUL'
-getEsc "SOH" = Just '\SOH'
-getEsc "STX" = Just '\STX'
-getEsc "ETX" = Just '\ETX'
-getEsc "EOT" = Just '\EOT'
-getEsc "ENQ" = Just '\ENQ'
-getEsc "ACK" = Just '\ACK'
-getEsc "BEL" = Just '\BEL'
-getEsc "BS" = Just '\BS'
-getEsc "HT" = Just '\HT'
-getEsc "LF" = Just '\LF'
-getEsc "VT" = Just '\VT'
-getEsc "FF" = Just '\FF'
-getEsc "CR" = Just '\CR'
-getEsc "SO" = Just '\SO'
-getEsc "SI" = Just '\SI'
-getEsc "DLE" = Just '\DLE'
-getEsc "DC1" = Just '\DC1'
-getEsc "DC2" = Just '\DC2'
-getEsc "DC3" = Just '\DC3'
-getEsc "DC4" = Just '\DC4'
-getEsc "NAK" = Just '\NAK'
-getEsc "SYN" = Just '\SYN'
-getEsc "ETB" = Just '\ETB'
-getEsc "CAN" = Just '\CAN'
-getEsc "EM" = Just '\EM'
-getEsc "SUB" = Just '\SUB'
-getEsc "ESC" = Just '\ESC'
-getEsc "FS" = Just '\FS'
-getEsc "GS" = Just '\GS'
-getEsc "RS" = Just '\RS'
-getEsc "US" = Just '\US'
-getEsc "SP" = Just '\SP'
-getEsc "DEL" = Just '\DEL'
-getEsc str = Nothing
-
-unescape' : List Char -> List Char -> Maybe (List Char)
-unescape' _ [] = pure []
-unescape' escapeChars (x::xs)
-    = assert_total $ if escapeChars `isPrefixOf` (x::xs)
-         then case drop (length escapeChars) (x::xs) of
-                   ('\\' :: xs) => pure $ '\\' :: !(unescape' escapeChars xs)
-                   ('\n' :: xs) => pure !(unescape' escapeChars xs)
-                   ('&' :: xs) => pure !(unescape' escapeChars xs)
-                   ('a' :: xs) => pure $ '\a' :: !(unescape' escapeChars xs)
-                   ('b' :: xs) => pure $ '\b' :: !(unescape' escapeChars xs)
-                   ('f' :: xs) => pure $ '\f' :: !(unescape' escapeChars xs)
-                   ('n' :: xs) => pure $ '\n' :: !(unescape' escapeChars xs)
-                   ('r' :: xs) => pure $ '\r' :: !(unescape' escapeChars xs)
-                   ('t' :: xs) => pure $ '\t' :: !(unescape' escapeChars xs)
-                   ('v' :: xs) => pure $ '\v' :: !(unescape' escapeChars xs)
-                   ('\'' :: xs) => pure $ '\'' :: !(unescape' escapeChars xs)
-                   ('"' :: xs) => pure $ '"' :: !(unescape' escapeChars xs)
-                   ('x' :: xs) => case span isHexDigit xs of
-                                       ([], rest) => unescape' escapeChars rest
-                                       (ds, rest) => pure $ cast !(toHex 1 (reverse ds)) ::
-                                                             !(unescape' escapeChars rest)
-                   ('o' :: xs) => case span isOctDigit xs of
-                                       ([], rest) => unescape' escapeChars rest
-                                       (ds, rest) => pure $ cast !(toOct 1 (reverse ds)) ::
-                                                             !(unescape' escapeChars rest)
-                   xs => case span isDigit xs of
-                              ([], (a :: b :: c :: rest)) =>
-                                case getEsc (fastPack [a, b, c]) of
-                                     Just v => Just (v :: !(unescape' escapeChars rest))
-                                     Nothing => case getEsc (fastPack [a, b]) of
-                                                     Just v => Just (v :: !(unescape' escapeChars (c :: rest)))
-                                                     Nothing => unescape' escapeChars xs
-                              ([], (a :: b :: [])) =>
-                                case getEsc (fastPack [a, b]) of
-                                     Just v => Just (v :: [])
-                                     Nothing => unescape' escapeChars xs
-                              ([], rest) => unescape' escapeChars rest
-                              (ds, rest) => Just $ cast (cast {to=Int} (fastPack ds)) ::
-                                              !(unescape' escapeChars rest)
-         else Just $ x :: !(unescape' escapeChars xs)
-  where
-    toHex : Int -> List Char -> Maybe Int
-    toHex _ [] = Just 0
-    toHex m (d :: ds)
-        = pure $ !(hex (toLower d)) * m + !(toHex (m*16) ds)
-
-    toOct : Int -> List Char -> Maybe Int
-    toOct _ [] = Just 0
-    toOct m (d :: ds)
-        = pure $ !(oct (toLower d)) * m + !(toOct (m*8) ds)
-
-export
-unescape : Nat -> String -> Maybe String
-unescape hashtag x = let escapeChars = '\\' :: replicate hashtag '#' in
-                       fastPack <$> (unescape' escapeChars (unpack x))
-
 export
 getCharLit : String -> Maybe Char
 getCharLit str
--- a/src/Parser/Support/Escaping.idr
+++ b/src/Parser/Support/Escaping.idr
@ -0,0 +1,144 @@
+module Parser.Support.Escaping
+
+import Libraries.Data.String.Extra
+import Data.List
+
+export
+hex : Char -> Maybe Int
+hex '0' = Just 0
+hex '1' = Just 1
+hex '2' = Just 2
+hex '3' = Just 3
+hex '4' = Just 4
+hex '5' = Just 5
+hex '6' = Just 6
+hex '7' = Just 7
+hex '8' = Just 8
+hex '9' = Just 9
+hex 'a' = Just 10
+hex 'b' = Just 11
+hex 'c' = Just 12
+hex 'd' = Just 13
+hex 'e' = Just 14
+hex 'f' = Just 15
+hex _ = Nothing
+
+export
+dec : Char -> Maybe Int
+dec '0' = Just 0
+dec '1' = Just 1
+dec '2' = Just 2
+dec '3' = Just 3
+dec '4' = Just 4
+dec '5' = Just 5
+dec '6' = Just 6
+dec '7' = Just 7
+dec '8' = Just 8
+dec '9' = Just 9
+dec _ = Nothing
+
+export
+oct : Char -> Maybe Int
+oct '0' = Just 0
+oct '1' = Just 1
+oct '2' = Just 2
+oct '3' = Just 3
+oct '4' = Just 4
+oct '5' = Just 5
+oct '6' = Just 6
+oct '7' = Just 7
+oct _ = Nothing
+
+export
+getEsc : String -> Maybe Char
+getEsc "NUL" = Just '\NUL'
+getEsc "SOH" = Just '\SOH'
+getEsc "STX" = Just '\STX'
+getEsc "ETX" = Just '\ETX'
+getEsc "EOT" = Just '\EOT'
+getEsc "ENQ" = Just '\ENQ'
+getEsc "ACK" = Just '\ACK'
+getEsc "BEL" = Just '\BEL'
+getEsc "BS" = Just '\BS'
+getEsc "HT" = Just '\HT'
+getEsc "LF" = Just '\LF'
+getEsc "VT" = Just '\VT'
+getEsc "FF" = Just '\FF'
+getEsc "CR" = Just '\CR'
+getEsc "SO" = Just '\SO'
+getEsc "SI" = Just '\SI'
+getEsc "DLE" = Just '\DLE'
+getEsc "DC1" = Just '\DC1'
+getEsc "DC2" = Just '\DC2'
+getEsc "DC3" = Just '\DC3'
+getEsc "DC4" = Just '\DC4'
+getEsc "NAK" = Just '\NAK'
+getEsc "SYN" = Just '\SYN'
+getEsc "ETB" = Just '\ETB'
+getEsc "CAN" = Just '\CAN'
+getEsc "EM" = Just '\EM'
+getEsc "SUB" = Just '\SUB'
+getEsc "ESC" = Just '\ESC'
+getEsc "FS" = Just '\FS'
+getEsc "GS" = Just '\GS'
+getEsc "RS" = Just '\RS'
+getEsc "US" = Just '\US'
+getEsc "SP" = Just '\SP'
+getEsc "DEL" = Just '\DEL'
+getEsc str = Nothing
+
+unescape' : List Char -> List Char -> Maybe (List Char)
+unescape' _ [] = pure []
+unescape' escapeChars (x::xs)
+    = assert_total $ if escapeChars `isPrefixOf` (x::xs)
+         then case drop (length escapeChars) (x::xs) of
+                   ('\\' :: xs) => pure $ '\\' :: !(unescape' escapeChars xs)
+                   ('\n' :: xs) => pure !(unescape' escapeChars xs)
+                   ('&' :: xs) => pure !(unescape' escapeChars xs)
+                   ('a' :: xs) => pure $ '\a' :: !(unescape' escapeChars xs)
+                   ('b' :: xs) => pure $ '\b' :: !(unescape' escapeChars xs)
+                   ('f' :: xs) => pure $ '\f' :: !(unescape' escapeChars xs)
+                   ('n' :: xs) => pure $ '\n' :: !(unescape' escapeChars xs)
+                   ('r' :: xs) => pure $ '\r' :: !(unescape' escapeChars xs)
+                   ('t' :: xs) => pure $ '\t' :: !(unescape' escapeChars xs)
+                   ('v' :: xs) => pure $ '\v' :: !(unescape' escapeChars xs)
+                   ('\'' :: xs) => pure $ '\'' :: !(unescape' escapeChars xs)
+                   ('"' :: xs) => pure $ '"' :: !(unescape' escapeChars xs)
+                   ('x' :: xs) => case span isHexDigit xs of
+                                       ([], rest) => unescape' escapeChars rest
+                                       (ds, rest) => pure $ cast !(toHex 1 (reverse ds)) ::
+                                                             !(unescape' escapeChars rest)
+                   ('o' :: xs) => case span isOctDigit xs of
+                                       ([], rest) => unescape' escapeChars rest
+                                       (ds, rest) => pure $ cast !(toOct 1 (reverse ds)) ::
+                                                             !(unescape' escapeChars rest)
+                   xs => case span isDigit xs of
+                              ([], (a :: b :: c :: rest)) =>
+                                case getEsc (fastPack [a, b, c]) of
+                                     Just v => Just (v :: !(unescape' escapeChars rest))
+                                     Nothing => case getEsc (fastPack [a, b]) of
+                                                     Just v => Just (v :: !(unescape' escapeChars (c :: rest)))
+                                                     Nothing => unescape' escapeChars xs
+                              ([], (a :: b :: [])) =>
+                                case getEsc (fastPack [a, b]) of
+                                     Just v => Just (v :: [])
+                                     Nothing => unescape' escapeChars xs
+                              ([], rest) => unescape' escapeChars rest
+                              (ds, rest) => Just $ cast (cast {to=Int} (fastPack ds)) ::
+                                              !(unescape' escapeChars rest)
+         else Just $ x :: !(unescape' escapeChars xs)
+  where
+    toHex : Int -> List Char -> Maybe Int
+    toHex _ [] = Just 0
+    toHex m (d :: ds)
+        = pure $ !(hex (toLower d)) * m + !(toHex (m*16) ds)
+
+    toOct : Int -> List Char -> Maybe Int
+    toOct _ [] = Just 0
+    toOct m (d :: ds)
+        = pure $ !(oct (toLower d)) * m + !(toOct (m*8) ds)
+
+export
+unescape : Nat -> String -> Maybe String
+unescape hashtag x = let escapeChars = '\\' :: replicate hashtag '#' in
+                       fastPack <$> (unescape' escapeChars (unpack x))
--- a/src/Protocol/SExp/Parser.idr
+++ b/src/Protocol/SExp/Parser.idr
@ -1,69 +1,174 @@
 module Protocol.SExp.Parser

-import Idris.IDEMode.Commands
-
 import Data.List

-import Libraries.Text.Lexer
+import Protocol.SExp
+
 import Parser.Lexer.Common
-import Parser.Source
-import Parser.Lexer.Source
 import Libraries.Text.Token
 import Libraries.Text.Lexer.Tokenizer
 import Libraries.Text.Parser
+import Libraries.Text.PrettyPrint.Prettyprinter.Symbols
+import Libraries.Text.PrettyPrint.Prettyprinter.Util
+import Libraries.Text.PrettyPrint.Prettyprinter.Doc

-import Core.Metadata
+import Parser.Support.Escaping

 %default total

-%hide Text.Lexer.symbols
-%hide Parser.Lexer.Source.symbols
-%hide Parser.Rule.Source.symbol
+{- To decouple parsing of S-expressions, we reproduce some
+   functionality from the source-file parser. We duplicate it since we
+   don't need the full blown functionality of the source-code token.
+-}
+
+public export
+data SExpToken
+  = StringLit String
+  | IntegerLit Integer
+  | Symbol String
+  | Ident String
+  | StringBegin Nat
+  | StringEnd
+  | Whitespace
+  | EndInput
+
+export
+Show SExpToken where
+  -- Literals
+  show (IntegerLit x) = "literal " ++ show x
+  -- String
+  show (StringBegin hashtag) = "string begin"
+  show StringEnd = "string end"
+  show (StringLit x) = "string " ++ show x
+  -- Identifiers
+  show (Ident x) = "identifier " ++ x
+  show (Symbol x) = "symbol " ++ x
+  show Whitespace = " "
+  show EndInput = "end of input"
+
+export
+Pretty Void SExpToken where
+  -- Literals
+  pretty (IntegerLit x) = pretty "literal" <++> pretty (show x)
+  -- String
+  pretty (StringBegin hashtag) = reflow "string begin"
+  pretty StringEnd = reflow "string end"
+  pretty (StringLit x) = pretty "string" <++> dquotes (pretty x)
+  -- Identifiers
+  pretty (Ident x) = pretty "identifier" <++> pretty x
+  pretty (Symbol x) = pretty "symbol" <++> pretty x
+  -- Special
+  pretty Whitespace = pretty "space"
+  pretty EndInput = reflow "end of input"
+
+SExpRule : Type -> Type
+SExpRule = Grammar () SExpToken True
+
+EmptySExpRule : Type -> Type
+EmptySExpRule = Grammar () SExpToken False

 symbols : List String
 symbols = ["(", ":", ")"]

-symbol : String -> Rule ()
+symbol : String -> SExpRule ()
 symbol req
    = terminal ("Expected '" ++ req ++ "'") $
               \case
                 Symbol s => guard (s == req)
                 _ => Nothing

+space : SExpRule ()
+space = terminal ("Expected a space") $
+               \case
+                 Whitespace => Just ()
+                 _ => Nothing

-stringTokens : Tokenizer Token
+exactIdent : String -> SExpRule ()
+exactIdent req
+    = terminal ("Expected " ++ req) $
+               \case
+                 Ident s => guard (s == req)
+                 _ => Nothing
+
+simpleStrLit : SExpRule String
+simpleStrLit
+    = terminal "Expected string literal" $
+               \case
+                 StringLit s => unescape 0 s
+                 _ => Nothing
+
+strBegin : SExpRule Nat
+strBegin = terminal "Expected string begin" $
+                    \case
+                      StringBegin hashtag => Just hashtag
+                      _ => Nothing
+
+strEnd : SExpRule ()
+strEnd = terminal "Expected string end" $
+                  \case
+                    StringEnd => Just ()
+                    _ => Nothing
+
+simpleStr : SExpRule String
+simpleStr = strBegin *> commit *> (option "" simpleStrLit) <* strEnd
+
+stringTokens : Tokenizer SExpToken
 stringTokens
    = match (someUntil (is '"') (escape (is '\\') any <|> any)) StringLit

-ideTokens : Tokenizer Token
+ideTokens : Tokenizer SExpToken
 ideTokens =
      match (choice $ exact <$> symbols) Symbol
  <|> match digits (IntegerLit . cast)
  <|> compose (is '"')
-              (const $ StringBegin 0 Single)
+              (const $ StringBegin 0)
              (const ())
              (const stringTokens)
              (const $ is '"')
              (const StringEnd)
+  <|> match (some $ pred isSpace) (const Whitespace)
  <|> match identAllowDashes Ident
-  <|> match space (const Comment)

-idelex : String -> Either (StopReason, Int, Int, String) (List (WithBounds Token))
-idelex str
-    = case lex ideTokens str of
-           -- Add the EndInput token so that we'll have a line and column
-           -- number to read when storing spans in the file
-           (tok, (EndInput, l, c, _)) => Right (filter notComment tok ++
-                                               [MkBounded EndInput False (MkBounds l c l c)])
-           (_, fail) => Left fail
-    where
-      notComment : WithBounds Token -> Bool
-      notComment t = case t.val of
-                          Comment => False
-                          _ => True
+notWhitespace : WithBounds SExpToken -> Bool
+notWhitespace btok = case btok.val of
+  Whitespace => False
+  _ => True
+
+idelex : String ->
+  Either (StopReason, Int, Int, String)
+         (List (WithBounds SExpToken))
+idelex str = case lex ideTokens str of
+  -- Add the EndInput token so that we'll have a line and column
+  -- number to read when storing spans in the file
+  (tok, (EndInput, l, c, _)) => Right (filter notWhitespace tok ++
+                                      [MkBounded EndInput False (MkBounds l c l c)])
+  (_, fail) => Left fail
+
+
+identifierSExp : SExpRule String
+identifierSExp
+    = terminal "Expected name" $
+               \case
+                 Ident str => Just str
+                 _ => Nothing
+
+intLit : SExpRule Integer
+intLit
+    = terminal "Expected integer literal" $
+               \case
+                 IntegerLit i => Just i
+                 _ => Nothing
+
+
+eoi : EmptySExpRule ()
+eoi = ignore $ nextIs "Expected end of input" isEOI
+  where
+    isEOI : SExpToken -> Bool
+    isEOI EndInput = True
+    isEOI _ = False

 covering
-sexp : Rule SExp
+sexp : SExpRule SExp
 sexp
    = do symbol ":"; exactIdent "True"
         pure (BoolAtom True)
@ -73,20 +178,20 @@ sexp
         pure (IntegerAtom i)
  <|> do str <- simpleStr
         pure (StringAtom str)
-  <|> do symbol ":"; x <- unqualifiedName
+  <|> do symbol ":"; x <- identifierSExp
         pure (SymbolAtom x)
  <|> do Parser.symbol "("
-         xs <- Parser.many sexp
+         xs <- many sexp
         symbol ")"
         pure (SExpList xs)

 public export
 data SExpError =
    LexError (StopReason, Int, Int, String)
-  | ParseErrors (List1 $ ParsingError Token)
+  | ParseErrors (List1 $ ParsingError SExpToken)

 ideParser : {e : _} ->
-            String -> Grammar ParsingState Token e ty -> Either SExpError ty
+            String -> Grammar () SExpToken e ty -> Either SExpError ty
 ideParser str p
    = do toks   <- mapFst LexError $ idelex str
         (_, _, (parsed, _)) <- mapFst ParseErrors $ parseWith p toks