Insert parsed comments back into the AST and pretty-print with comments (#1845)

This PR does three main things:
1. Insert parsed comments into the AST
2. Extend pretty-printing to include comments
3. Extend the `format` subcommand with a few additional options and move the code to `Swarm.Language.Format`.

The pretty-printed code-with-comments is OK but not great.  It does a reasonable job with comments in standard-ish places; for example, it turns
```
// This function increments a number
def incr : int -> int =
  \n. n + 1
end

/* This command does some stuff.  It is super
   cool and important. */
def stuff : cmd unit =
  move;
  move;
  move;  // the third move is important
  move;
end
```
into
```

// This function increments a number
def incr: int -> int = \n. n + 1 end;

/* This command does some stuff.  It is super
   cool and important. */
def stuff: cmd unit =
  move;
  move;
  move // the third move is important
  ;
  move
end
```
which is good other than the fact that it moves the inline comment after `move;` to before the semicolon.

However, it turns this:
```
// This function does a cool math thing
def foo : int -> int =     // This is an optional type signature
  // pre
  \n. n + 1    // add one
end

/* This is a
   block comment which
   spans multiple lines */

def bar : int -> int   // Another type signature, = on the next line
  = \x. foo /* very important to use foo here */ (foo x)   // very cool implementation
end

def baz : cmd unit =
  move;
  move;
  turn left;   // don't forget to turn left!
  move
end

// And one last thing
```
into this:
```

// This function does a cool math thing
def foo: int -> int 
  = \n.
  n + 1 // add one

end // This is an optional type signature
;

/* This is a
   block comment which
   spans multiple lines */
def bar: int -> int 
  = \x.
  foo /* very important to use foo here */ (
    foo x // very cool implementation

  )
end // Another type signature, = on the next line
```
which has several obvious problems.  I think I know what the problem is in most cases; it will just require more engineering and special cases to get the output to look nicer, but I honestly don't really want to spend more time on this right now.  I'm hoping we can merge this as is (since it is still better than the status quo, namely, deleting all comments) and continue to improve it in the future.

The important point is that I ran the code formatter on every single `.sw` file in the repository and then re-ran the test suite; all the tests passed. So at least `swarm format` does not seem to break anything even if the output does not look the best.

Closes #1467 .
This commit is contained in:
Brent Yorgey 2024-05-14 06:32:03 -05:00 committed by GitHub
parent 79bf8ebcf5
commit 76958a4639
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 406 additions and 178 deletions

View File

@ -7,23 +7,15 @@
module Main where
import Data.Foldable qualified
import Data.Text (Text, pack)
import Data.Text.IO qualified as Text
import GitHash (GitInfo, giBranch, giHash, tGitInfoCwdTry)
import Options.Applicative
import Prettyprinter
import Prettyprinter.Render.Text qualified as RT
import Swarm.App (appMain)
import Swarm.Language.Format
import Swarm.Language.LSP (lspMain)
import Swarm.Language.Parser (readTerm)
import Swarm.Language.Pretty (ppr)
import Swarm.TUI.Model (AppOpts (..), ColorMode (..))
import Swarm.TUI.Model.UI (defaultInitLgTicksPerSecond)
import Swarm.Util ((?))
import Swarm.Version
import Swarm.Web (defaultPort)
import System.Console.Terminal.Size qualified as Term
import System.Exit (exitFailure)
import System.IO (hPrint, stderr)
import Text.Read (readMaybe)
@ -35,11 +27,9 @@ commitInfo = case gitInfo of
Nothing -> ""
Just git -> " (" <> giBranch git <> "@" <> take 10 (giHash git) <> ")"
type Width = Int
data CLI
= Run AppOpts
| Format Input (Maybe Width)
| Format FormatInput FormatOutput (Maybe FormatWidth)
| LSP
| Version
@ -47,7 +37,7 @@ cliParser :: Parser CLI
cliParser =
subparser
( mconcat
[ command "format" (info (Format <$> format <*> optional widthOpt <**> helper) (progDesc "Format a file"))
[ command "format" (info (Format <$> input <*> output <*> optional widthOpt <**> helper) (progDesc "Format a file"))
, command "lsp" (info (pure LSP) (progDesc "Start the LSP"))
, command "version" (info (pure Version) (progDesc "Get current and upstream version."))
]
@ -65,12 +55,18 @@ cliParser =
<*> pure gitInfo
)
where
format :: Parser Input
format =
input :: Parser FormatInput
input =
flag' Stdin (long "stdin" <> help "Read code from stdin")
<|> (File <$> strArgument (metavar "FILE"))
<|> (InputFile <$> strArgument (metavar "FILE"))
widthOpt :: Parser Width
output :: Parser FormatOutput
output =
flag Stdout Stdout (long "stdout" <> help "Write formatted code to stdout (default)")
<|> (OutputFile <$> strOption (long "output" <> short 'o' <> metavar "FILE" <> help "Write formatted code to an output file"))
<|> flag' Inplace (long "inplace" <> short 'i' <> help "Format file in place")
widthOpt :: Parser FormatWidth
widthOpt = option auto (long "width" <> metavar "COLUMNS" <> help "Use layout with maximum width")
seed :: Parser (Maybe Int)
@ -116,34 +112,6 @@ cliInfo =
<> fullDesc
)
data Input = Stdin | File FilePath
getInput :: Input -> IO Text
getInput Stdin = Text.getContents
getInput (File fp) = Text.readFile fp
showInput :: Input -> Text
showInput Stdin = "(input)"
showInput (File fp) = pack fp
-- | Utility function to validate and format swarm-lang code
formatFile :: Input -> Maybe Width -> IO ()
formatFile input mWidth = do
content <- getInput input
case readTerm content of
Right Nothing -> Text.putStrLn ""
Right (Just ast) -> do
mWindow <- Term.size
let mkOpt w = LayoutOptions (AvailablePerLine w 1.0)
let opt =
fmap mkOpt mWidth
? fmap (\(Term.Window _h w) -> mkOpt w) mWindow
? defaultLayoutOptions
Text.putStrLn . RT.renderStrict . layoutPretty opt $ ppr ast
Left e -> do
Text.hPutStrLn stderr $ showInput input <> ":" <> e
exitFailure
showVersion :: IO ()
showVersion = do
putStrLn $ "Swarm game - " <> version <> commitInfo
@ -155,6 +123,6 @@ main = do
cli <- execParser cliInfo
case cli of
Run opts -> appMain opts
Format fo w -> formatFile fo w
Format fi fo w -> formatSwarmIO fi fo w
LSP -> lspMain
Version -> showVersion

View File

@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternSynonyms #-}
-- |
-- SPDX-License-Identifier: BSD-3-Clause
@ -6,7 +7,7 @@
-- Term elaboration which happens after type checking.
module Swarm.Language.Elaborate where
import Control.Lens (transform, (%~), (^.))
import Control.Lens (transform, (%~), (^.), pattern Empty)
import Swarm.Language.Syntax
import Swarm.Language.Types
@ -53,4 +54,4 @@ wrapForce x = mapFreeS x (\s@(Syntax' l _ ty cs) -> Syntax' l (SApp sForce s) ty
-- Note, TyUnit is not the right type, but I don't want to bother
sForce :: Syntax' Polytype
sForce = Syntax' NoLoc (TConst Force) Nothing (Forall ["a"] (TyDelay (TyVar "a") :->: TyVar "a"))
sForce = Syntax' NoLoc (TConst Force) Empty (Forall ["a"] (TyDelay (TyVar "a") :->: TyVar "a"))

View File

@ -0,0 +1,62 @@
{-# LANGUAGE OverloadedStrings #-}
-- |
-- SPDX-License-Identifier: BSD-3-Clause
--
-- Formatting Swarm language code.
module Swarm.Language.Format where
import Control.Applicative ((<|>))
import Data.Text (Text)
import Data.Text qualified as T
import Data.Text.IO qualified as T
import Prettyprinter
import Prettyprinter.Render.Text qualified as RT
import Swarm.Language.Parser (readTerm)
import Swarm.Language.Pretty
import Swarm.Util ((?))
import System.Console.Terminal.Size qualified as Term
import System.Exit (exitFailure)
import System.IO (stderr)
type FormatWidth = Int
-- | From where should the input be taken?
data FormatInput = Stdin | InputFile FilePath
-- | Where should the formatted code be output?
data FormatOutput = Stdout | OutputFile FilePath | Inplace
getInput :: FormatInput -> IO Text
getInput Stdin = T.getContents
getInput (InputFile fp) = T.readFile fp
showInput :: FormatInput -> Text
showInput Stdin = "(input)"
showInput (InputFile fp) = T.pack fp
-- | Validate and format swarm-lang code.
formatSwarmIO :: FormatInput -> FormatOutput -> Maybe FormatWidth -> IO ()
formatSwarmIO input output mWidth = do
content <- getInput input
mWindowWidth <- (fmap . fmap) Term.width Term.size
let w = mWidth <|> case output of Stdout -> mWindowWidth; _ -> Nothing
case formatSwarm w content of
Right fmt -> case output of
Stdout -> T.putStrLn fmt
OutputFile outFile -> T.writeFile outFile fmt
Inplace -> case input of
Stdin -> T.putStrLn fmt
InputFile inFile -> T.writeFile inFile fmt
Left e -> do
T.hPutStrLn stderr $ showInput input <> ":" <> e
exitFailure
formatSwarm :: Maybe FormatWidth -> Text -> Either Text Text
formatSwarm mWidth content = case readTerm content of
Right Nothing -> Right ""
Right (Just ast) ->
let mkOpt w = LayoutOptions (AvailablePerLine w 1.0)
opt = (mkOpt <$> mWidth) ? defaultLayoutOptions
in Right . RT.renderStrict . layoutPretty opt $ ppr ast
Left e -> Left e

View File

@ -82,8 +82,8 @@ validateSwarmCode doc version content = do
flushDiagnosticsBySource 0 (Just diagnosticSourcePrefix)
let (parsingErrs, unusedVarWarnings) = case readTerm' content of
Right (Nothing, _) -> ([], [])
Right (Just term, _) -> (parsingErrors, unusedWarnings)
Right Nothing -> ([], [])
Right (Just term) -> (parsingErrors, unusedWarnings)
where
VU.Usage _ problems = VU.getUsage mempty term
unusedWarnings = mapMaybe (VU.toErrPos content) problems

View File

@ -58,10 +58,14 @@ showHoverInfo ::
VirtualFile ->
Maybe (Text, Maybe J.Range)
showHoverInfo _ p vf@(VirtualFile _ _ myRope) =
case readTerm' content of
Left _ -> Nothing
Right (Nothing, _) -> Nothing
Right (Just stx, _) -> Just $ case processParsedTerm stx of
either (const Nothing) (fmap genHoverInfo) (readTerm' content)
where
content = virtualFileText vf
absolutePos =
R.charLength . fst $ R.charSplitAtPosition (lspToRopePosition p) myRope
genHoverInfo stx =
case processParsedTerm stx of
Left _e ->
let found@(Syntax foundSloc _) = narrowToPosition stx $ fromIntegral absolutePos
finalPos = posToRange myRope foundSloc
@ -70,10 +74,6 @@ showHoverInfo _ p vf@(VirtualFile _ _ myRope) =
let found@(Syntax' foundSloc _ _ _) = narrowToPosition (moduleAST modul) $ fromIntegral absolutePos
finalPos = posToRange myRope foundSloc
in (,finalPos) . treeToMarkdown 0 $ explain found
where
content = virtualFileText vf
absolutePos =
R.charLength . fst $ R.charSplitAtPosition (lspToRopePosition p) myRope
posToRange :: R.Rope -> SrcLoc -> Maybe J.Range
posToRange myRope foundSloc = do

View File

@ -13,14 +13,15 @@ module Swarm.Language.Parser (
readTerm',
) where
import Data.Bifunctor (bimap)
import Data.Bifunctor (first, second)
import Data.Sequence (Seq)
import Data.Text (Text)
import Swarm.Language.Parser.Comment (populateComments)
import Swarm.Language.Parser.Core (ParserError, runParser)
import Swarm.Language.Parser.Lex (sc)
import Swarm.Language.Parser.Term (parseTerm)
import Swarm.Language.Parser.Util (fullyMaybe)
import Swarm.Language.Syntax (Comment, Syntax)
import Swarm.Util.Parse (fullyMaybe)
import Text.Megaparsec.Error (errorBundlePretty)
import Witch (from)
@ -30,9 +31,12 @@ import Witch (from)
-- 'Nothing' if the input was only whitespace) or a pretty-printed
-- parse error message.
readTerm :: Text -> Either Text (Maybe Syntax)
readTerm = bimap (from . errorBundlePretty) fst . runParser (fullyMaybe sc parseTerm)
readTerm = first (from . errorBundlePretty) . readTerm'
-- | A lower-level `readTerm` which returns the megaparsec bundle error
-- for precise error reporting, as well as the parsed comments.
readTerm' :: Text -> Either ParserError (Maybe Syntax, Seq Comment)
readTerm' = runParser (fullyMaybe sc parseTerm)
-- for precise error reporting.
readTerm' :: Text -> Either ParserError (Maybe Syntax)
readTerm' = second handleComments . runParser (fullyMaybe sc parseTerm)
where
handleComments :: (Maybe Syntax, Seq Comment) -> Maybe Syntax
handleComments (s, cs) = populateComments cs <$> s

View File

@ -0,0 +1,119 @@
-- |
-- SPDX-License-Identifier: BSD-3-Clause
--
-- Facilities for re-inserting parsed comments back into an AST.
-- Actual parsing of comments is handled in "Swarm.Language.Parser.Lex".
module Swarm.Language.Parser.Comment (
-- * Comment AST insertion
populateComments,
populateStandaloneComments,
populateSuffixComments,
-- * Generic tree traversals
preorder,
revpostorder,
) where
import Control.Lens (backwards, mapMOf, (%~))
import Control.Lens.Plated (Plated, plate)
import Control.Monad ((>=>))
import Control.Monad.State (MonadState (..), State, evalState)
import Data.Foldable qualified as F
import Data.List (partition)
import Data.Sequence (Seq, (<|), (|>))
import Swarm.Language.Syntax
------------------------------------------------------------
-- Comment insertion
------------------------------------------------------------
-- The approach for preserving comments is taken from
-- https://www.reddit.com/r/haskell/comments/ni4gpm/comment/gz0ipmp/ . In short:
--
-- (1) Parse all comments out-of-band and record a source span for
-- each (this is done in "Swarm.Language.Parser.Lex").
--
-- (2) For each standalone comment (i.e. comments on a line by
-- themselves), attach them to the earliest node in a preorder
-- traversal which begins after the comment.
--
-- (3) For each suffix comment (i.e. comments after something else
-- at the end of a line, or in the middle of a line), attach
-- them to the latest node in a postorder traversal which begins
-- before the comment.
-- | Re-insert parsed comments into an AST. Prerequisite: the sequence of comments
-- must be in order by 'SrcLoc'.
populateComments :: Seq Comment -> Syntax -> Syntax
populateComments cmts = populateStandaloneComments standalone . populateSuffixComments suffix
where
(standalone, suffix) = partition isStandalone (F.toList cmts)
-- | Insert comments from the state at the current AST node (using the
-- provided insertion function) as long as the custom comparison
-- function returns 'True' when applied to the 'SrcLoc's of the next
-- comment and the AST node (in that order).
insertComments ::
(SrcLoc -> SrcLoc -> Bool) ->
(Comment -> Comments -> Comments) ->
Syntax ->
State [Comment] Syntax
insertComments cmpLoc ins = go
where
go s@(CSyntax l t cs) = do
curCmts <- get
case curCmts of
[] -> return s
(nextCmt : restCmts) -> case commentSrcLoc nextCmt `cmpLoc` l of
True -> put restCmts >> go (CSyntax l t (ins nextCmt cs))
False -> return s
-- | Given a list of standalone comments sorted by 'SrcLoc', insert
-- them into the given AST, attaching each comment to the earliest
-- node in a preorder traversal which begins after it.
populateStandaloneComments :: [Comment] -> Syntax -> Syntax
populateStandaloneComments cmts =
flip evalState cmts
. preorder (insertComments srcLocBefore (\c -> beforeComments %~ (|> c)))
-- | Given a list of suffix comments sorted by 'SrcLoc', insert
-- them into the given AST, attaching each comment to the latest
-- node in a postorder traversal which begins before it.
populateSuffixComments :: [Comment] -> Syntax -> Syntax
populateSuffixComments cmts =
flip evalState (reverse cmts)
. revpostorder (insertComments (flip srcLocBefore) (\c -> afterComments %~ (c <|)))
------------------------------------------------------------
-- Traversals
------------------------------------------------------------
-- $setup
-- >>> import Control.Monad.State
-- >>> import Data.Tree
-- >>> import Data.List (intercalate)
-- >>> next :: Tree Int -> State Int (Tree Int); next (Node _ cs) = do { i <- get; put (i+1); return (Node i cs) }
-- >>> showTree :: Show a => Tree a -> String; showTree = foldTree (\n cs -> show n ++ case cs of { [] -> ""; _ -> "(" ++ intercalate " " cs ++ ")" })
-- >>> exampleTree = Node 0 [Node 0 [], Node 0 [Node 0 [], Node 0 [], Node 0 []], Node 0 [Node 0 []]]
-- | Preorder traversal of a 'Plated' structure with a monadic
-- transformation. Apply the transformation at the root, then
-- recursively transform each of the children.
--
-- >>> showTree (evalState (preorder next exampleTree) 0)
-- "0(1 2(3 4 5) 6(7))"
preorder :: (Plated a, Monad m) => (a -> m a) -> (a -> m a)
preorder g = go
where
go = g >=> mapMOf plate go
-- | Reverse postorder traversal of a 'Plated' structure with a
-- monadic transformation. Apply the transformation recursively to
-- all the children in reverse order, then transform the root.
--
-- >>> showTree (evalState (revpostorder next exampleTree) 0)
-- "7(6 5(4 3 2) 1(0))"
revpostorder :: (Plated a, Monad m) => (a -> m a) -> (a -> m a)
revpostorder g = go
where
go = mapMOf (backwards plate) go >=> g

View File

@ -10,8 +10,8 @@ import Language.Haskell.TH.Quote
import Swarm.Language.Parser.Core (runParserTH)
import Swarm.Language.Parser.Lex (sc)
import Swarm.Language.Parser.Type (parsePolytype)
import Swarm.Language.Parser.Util (fully)
import Swarm.Util (liftText)
import Swarm.Util.Parse (fully)
------------------------------------------------------------
-- Quasiquoters

View File

@ -28,7 +28,7 @@ parsePolytype :: Parser Polytype
parsePolytype =
join $
( quantify . fromMaybe []
<$> optional (reserved "forall" *> some identifier <* symbol ".")
<$> optional ((reserved "forall" <|> reserved "") *> some identifier <* symbol ".")
)
<*> parseType
where

View File

@ -3,6 +3,8 @@
--
-- A few utilities for use in conjunction with the parser.
module Swarm.Language.Parser.Util (
fully,
fullyMaybe,
showShortError,
showErrorPos,
getLocRange,
@ -16,6 +18,17 @@ import Text.Megaparsec
import Text.Megaparsec.Pos qualified as Pos
import Witch (from)
-- | Run a parser "fully", consuming leading whitespace and ensuring
-- that the parser extends all the way to eof.
fully :: (MonadParsec e s f) => f () -> f a -> f a
fully sc p = sc *> p <* eof
-- | Run a parser "fully", consuming leading whitespace (including the
-- possibility that the input is nothing but whitespace) and
-- ensuring that the parser extends all the way to eof.
fullyMaybe :: (MonadParsec e s f) => f () -> f a -> f (Maybe a)
fullyMaybe sc = fully sc . optional
-- | A utility for converting a 'ParserError' into a one line message:
-- @<line-nr>: <error-msg>@
showShortError :: ParserError -> String

View File

@ -10,12 +10,12 @@ import Language.Haskell.TH.Quote
import Swarm.Language.Parser.Core (runParserTH)
import Swarm.Language.Parser.Lex (sc)
import Swarm.Language.Parser.Term (parseTerm)
import Swarm.Language.Parser.Util (fully)
import Swarm.Language.Pipeline
import Swarm.Language.Pretty
import Swarm.Language.Syntax
import Swarm.Language.Types (Polytype)
import Swarm.Util (failT, liftText)
import Swarm.Util.Parse (fully)
import Witch (from)
-- | A quasiquoter for Swarm language terms, so we can conveniently

View File

@ -13,9 +13,11 @@ import Control.Lens.Combinators (pattern Empty)
import Control.Monad.Free (Free (..))
import Data.Bool (bool)
import Data.Fix
import Data.Foldable qualified as F
import Data.List.NonEmpty ((<|))
import Data.List.NonEmpty qualified as NE
import Data.Map.Strict qualified as M
import Data.Sequence qualified as Seq
import Data.Set (Set)
import Data.Set qualified as S
import Data.String (fromString)
@ -191,10 +193,35 @@ instance PrettyPrec Capability where
instance PrettyPrec Const where
prettyPrec p c = pparens (p > fixity (constInfo c)) $ pretty . syntax . constInfo $ c
-- | Pretty-print a syntax node with comments.
instance PrettyPrec (Syntax' ty) where
prettyPrec p = prettyPrec p . eraseS
prettyPrec p (Syntax' _ t (Comments before after) _) = case before of
Empty -> t'
_ ->
-- Print out any comments before the node, with a blank line before
mconcat
[ hardline
, vsep (map ppr (F.toList before))
, hardline
, t'
]
where
-- Print the node itself, possibly with suffix comments on the same line
t' = case Seq.viewr after of
Seq.EmptyR -> prettyPrec p t
_ Seq.:> lst -> case commentType lst of
-- Output a newline after a line comment, but not after a block comment
BlockComment -> tWithComments
LineComment -> tWithComments <> hardline
where
-- The pretty-printed node with suffix comments
tWithComments = prettyPrec p t <+> hsep (map ppr (F.toList after))
instance PrettyPrec Term where
instance PrettyPrec Comment where
prettyPrec _ (Comment _ LineComment _ txt) = "//" <> pretty txt
prettyPrec _ (Comment _ BlockComment _ txt) = "/*" <> pretty txt <> "*/"
instance PrettyPrec (Term' ty) where
prettyPrec _ TUnit = "()"
prettyPrec p (TConst c) = prettyPrec p c
prettyPrec _ (TDir d) = ppr d
@ -207,15 +234,15 @@ instance PrettyPrec Term where
prettyPrec _ (TRef r) = "@" <> pretty r
prettyPrec p (TRequireDevice d) = pparens (p > 10) $ "require" <+> ppr @Term (TText d)
prettyPrec p (TRequire n e) = pparens (p > 10) $ "require" <+> pretty n <+> ppr @Term (TText e)
prettyPrec p (TRequirements _ e) = pparens (p > 10) $ "requirements" <+> ppr e
prettyPrec p (SRequirements _ e) = pparens (p > 10) $ "requirements" <+> ppr e
prettyPrec _ (TVar s) = pretty s
prettyPrec _ (TDelay _ t) = group . encloseWithIndent 2 lbrace rbrace $ ppr t
prettyPrec _ t@TPair {} = prettyTuple t
prettyPrec p t@(TLam {}) =
prettyPrec _ (SDelay _ t) = group . encloseWithIndent 2 lbrace rbrace $ ppr t
prettyPrec _ t@SPair {} = prettyTuple t
prettyPrec p t@(SLam {}) =
pparens (p > 9) $
prettyLambdas t
-- Special handling of infix operators - ((+) 2) 3 --> 2 + 3
prettyPrec p (TApp t@(TApp (TConst c) l) r) =
prettyPrec p (SApp t@(Syntax' _ (SApp (Syntax' _ (TConst c) _ _) l) _ _) r) =
let ci = constInfo c
pC = fixity ci
in case constMeta ci of
@ -227,8 +254,8 @@ instance PrettyPrec Term where
, prettyPrec (pC + fromEnum (assoc == L)) r
]
_ -> prettyPrecApp p t r
prettyPrec p (TApp t1 t2) = case t1 of
TConst c ->
prettyPrec p (SApp t1 t2) = case t1 of
Syntax' _ (TConst c) _ _ ->
let ci = constInfo c
pC = fixity ci
in case constMeta ci of
@ -236,25 +263,25 @@ instance PrettyPrec Term where
ConstMUnOp S -> pparens (p > pC) $ prettyPrec (succ pC) t2 <> ppr t1
_ -> prettyPrecApp p t1 t2
_ -> prettyPrecApp p t1 t2
prettyPrec _ (TLet _ x mty t1 t2) =
prettyPrec _ (SLet _ (LV _ x) mty t1 t2) =
sep
[ prettyDefinition "let" x mty t1 <+> "in"
, ppr t2
]
prettyPrec _ (TDef _ x mty t1) =
prettyPrec _ (SDef _ (LV _ x) mty t1) =
sep
[ prettyDefinition "def" x mty t1
, "end"
]
prettyPrec p (TBind Nothing t1 t2) =
prettyPrec p (SBind Nothing t1 t2) =
pparens (p > 0) $
prettyPrec 1 t1 <> ";" <> line <> prettyPrec 0 t2
prettyPrec p (TBind (Just x) t1 t2) =
prettyPrec p (SBind (Just (LV _ x)) t1 t2) =
pparens (p > 0) $
pretty x <+> "<-" <+> prettyPrec 1 t1 <> ";" <> line <> prettyPrec 0 t2
prettyPrec _ (TRcd m) = brackets $ hsep (punctuate "," (map prettyEquality (M.assocs m)))
prettyPrec _ (TProj t x) = prettyPrec 11 t <> "." <> pretty x
prettyPrec p (TAnnotate t pt) =
prettyPrec _ (SRcd m) = brackets $ hsep (punctuate "," (map prettyEquality (M.assocs m)))
prettyPrec _ (SProj t x) = prettyPrec 11 t <> "." <> pretty x
prettyPrec p (SAnnotate t pt) =
pparens (p > 0) $
prettyPrec 1 t <+> ":" <+> ppr pt
@ -262,13 +289,7 @@ prettyEquality :: (Pretty a, PrettyPrec b) => (a, Maybe b) -> Doc ann
prettyEquality (x, Nothing) = pretty x
prettyEquality (x, Just t) = pretty x <+> "=" <+> ppr t
prettyTuple :: Term -> Doc a
prettyTuple = tupled . map ppr . unnestTuple
where
unnestTuple (TPair t1 t2) = t1 : unnestTuple t2
unnestTuple t = [t]
prettyDefinition :: Doc ann -> Var -> Maybe Polytype -> Term -> Doc ann
prettyDefinition :: Doc ann -> Var -> Maybe Polytype -> Syntax' ty -> Doc ann
prettyDefinition defName x mty t1 =
nest 2 . sep $
[ flatAlt
@ -284,7 +305,7 @@ prettyDefinition defName x mty t1 =
defEqLambdas = hsep ("=" : map prettyLambda defLambdaList)
eqAndLambdaLine = if null defLambdaList then "=" else line <> defEqLambdas
prettyPrecApp :: Int -> Term -> Term -> Doc a
prettyPrecApp :: Int -> Syntax' ty -> Syntax' ty -> Doc a
prettyPrecApp p t1 t2 =
pparens (p > 10) $
prettyPrec 10 t1 <+> prettyPrec 11 t2
@ -295,14 +316,17 @@ appliedTermPrec (TApp f _) = case f of
_ -> appliedTermPrec f
appliedTermPrec _ = 10
prettyLambdas :: Term -> Doc a
prettyTuple :: Term' ty -> Doc a
prettyTuple = tupled . map ppr . unTuple . STerm . erase
prettyLambdas :: Term' ty -> Doc a
prettyLambdas t = hsep (prettyLambda <$> lms) <> softline <> ppr rest
where
(rest, lms) = unchainLambdas t
(rest, lms) = unchainLambdas (STerm (erase t))
unchainLambdas :: Term -> (Term, [(Var, Maybe Type)])
unchainLambdas :: Syntax' ty -> (Syntax' ty, [(Var, Maybe Type)])
unchainLambdas = \case
TLam x mty body -> ((x, mty) :) <$> unchainLambdas body
Syntax' _ (SLam (LV _ x) mty body) _ _ -> ((x, mty) :) <$> unchainLambdas body
body -> (body, [])
prettyLambda :: (Pretty a1, PrettyPrec a2) => (a1, Maybe a2) -> Doc ann

View File

@ -45,6 +45,20 @@ module Swarm.Language.Syntax (
maxPathRange,
globalMaxVolume,
-- * SrcLoc
SrcLoc (..),
srcLocBefore,
noLoc,
-- * Comments
CommentType (..),
CommentSituation (..),
isStandalone,
Comment (..),
Comments (..),
beforeComments,
afterComments,
-- * Syntax
Syntax' (..),
sLoc,
@ -55,8 +69,6 @@ module Swarm.Language.Syntax (
pattern Syntax,
pattern CSyntax,
LocVar (..),
SrcLoc (..),
noLoc,
pattern STerm,
pattern TRequirements,
pattern TPair,
@ -71,11 +83,6 @@ module Swarm.Language.Syntax (
pattern TProj,
pattern TAnnotate,
-- * Comments
CommentType (..),
CommentSituation (..),
Comment (..),
-- * Terms
Var,
DelayType (..),
@ -88,6 +95,7 @@ module Swarm.Language.Syntax (
unTuple,
-- * Erasure
erase,
eraseS,
-- * Term traversal
@ -100,7 +108,7 @@ module Swarm.Language.Syntax (
measureAstSize,
) where
import Control.Lens (Plated (..), Traversal', makeLenses, para, universe, (%~), (^.))
import Control.Lens (AsEmpty, Plated (..), Traversal', makeLenses, para, universe, (%~), (^.), pattern Empty)
import Control.Monad (void)
import Data.Aeson.Types hiding (Key)
import Data.Data (Data)
@ -990,6 +998,83 @@ constInfo c = case c of
lowShow :: Show a => a -> Text
lowShow a = toLower (from (show a))
------------------------------------------------------------
-- SrcLoc
------------------------------------------------------------
-- | The location of something in the textual source code (recorded as
-- an interval measured in terms of indices into the input stream).
data SrcLoc
= NoLoc
| -- | Half-open interval from start (inclusive) to end (exclusive)
SrcLoc Int Int
deriving (Eq, Ord, Show, Data, Generic, FromJSON, ToJSON)
-- | @x <> y@ is the smallest 'SrcLoc' that subsumes both @x@ and @y@.
instance Semigroup SrcLoc where
NoLoc <> l = l
l <> NoLoc = l
SrcLoc s1 e1 <> SrcLoc s2 e2 = SrcLoc (min s1 s2) (max e1 e2)
-- | @mempty@ is a special value which means we have no location
-- information.
instance Monoid SrcLoc where
mempty = NoLoc
-- | Check whether one @SrcLoc@ starts at or before another one,
-- /i.e./ compare their starting indices to see if the first is @<=@
-- the second.
srcLocBefore :: SrcLoc -> SrcLoc -> Bool
srcLocBefore (SrcLoc a _) (SrcLoc b _) = a <= b
srcLocBefore _ _ = False
------------------------------------------------------------
-- Comments
------------------------------------------------------------
-- | Line vs block comments.
data CommentType = LineComment | BlockComment
deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Data, ToJSON, FromJSON)
-- | Was a comment all by itself on a line, or did it occur after some
-- other tokens on a line?
data CommentSituation = StandaloneComment | SuffixComment
deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Data, ToJSON, FromJSON)
-- | Test whether a comment is a standalone comment or not.
isStandalone :: Comment -> Bool
isStandalone = (== StandaloneComment) . commentSituation
-- | A comment is retained as some text plus metadata (source
-- location, comment type, + comment situation). While parsing we
-- record all comments out-of-band, for later re-insertion into the
-- AST.
data Comment = Comment
{ commentSrcLoc :: SrcLoc
, commentType :: CommentType
, commentSituation :: CommentSituation
, commentText :: Text
}
deriving (Eq, Show, Generic, Data, ToJSON, FromJSON)
-- | Comments which can be attached to a particular AST node. Some
-- comments come textually before the node and some come after.
data Comments = Comments
{ _beforeComments :: Seq Comment
, _afterComments :: Seq Comment
}
deriving (Eq, Show, Generic, Data, ToJSON, FromJSON)
makeLenses ''Comments
instance Semigroup Comments where
Comments b1 a1 <> Comments b2 a2 = Comments (b1 <> b2) (a1 <> a2)
instance Monoid Comments where
mempty = Comments mempty mempty
instance AsEmpty Comments
------------------------------------------------------------
-- Basic terms
------------------------------------------------------------
@ -1018,7 +1103,7 @@ data LocVar = LV {lvSrcLoc :: SrcLoc, lvVar :: Var}
deriving (Eq, Ord, Show, Data, Generic, FromJSON, ToJSON)
locVarToSyntax' :: LocVar -> ty -> Syntax' ty
locVarToSyntax' (LV s v) = Syntax' s (TVar v) Nothing
locVarToSyntax' (LV s v) = Syntax' s (TVar v) Empty
-- | Terms of the Swarm language.
data Term' ty
@ -1116,14 +1201,14 @@ instance Data ty => Plated (Term' ty) where
plate = uniplate
------------------------------------------------------------
-- Syntax: annotation on top of Terms with SrcLoc and type
-- Syntax: annotation on top of Terms with SrcLoc, comments, + type
------------------------------------------------------------
-- | The surface syntax for the language, with location and type annotations.
data Syntax' ty = Syntax'
{ _sLoc :: SrcLoc
, _sTerm :: Term' ty
, _sComments :: Maybe (Seq Comment)
, _sComments :: Comments
, _sType :: ty
}
deriving (Eq, Show, Functor, Foldable, Traversable, Data, Generic, FromJSON, ToJSON)
@ -1131,45 +1216,6 @@ data Syntax' ty = Syntax'
instance Data ty => Plated (Syntax' ty) where
plate = uniplate
data SrcLoc
= NoLoc
| -- | Half-open interval from start (inclusive) to end (exclusive)
SrcLoc Int Int
deriving (Eq, Ord, Show, Data, Generic, FromJSON, ToJSON)
instance Semigroup SrcLoc where
NoLoc <> l = l
l <> NoLoc = l
SrcLoc s1 e1 <> SrcLoc s2 e2 = SrcLoc (min s1 s2) (max e1 e2)
instance Monoid SrcLoc where
mempty = NoLoc
------------------------------------------------------------
-- Comments
------------------------------------------------------------
-- | Line vs block comments.
data CommentType = LineComment | BlockComment
deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Data, ToJSON, FromJSON)
-- | Was a comment all by itself on a line, or did it occur after some
-- other tokens on a line?
data CommentSituation = StandaloneComment | SuffixComment
deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Data, ToJSON, FromJSON)
-- | A comment is retained as some text plus metadata (source
-- location, comment type, + comment situation). While parsing we
-- record all comments out-of-band, for later re-insertion into the
-- AST.
data Comment = Comment
{ commentSrcLoc :: SrcLoc
, commentType :: CommentType
, commentSituation :: CommentSituation
, commentText :: Text
}
deriving (Eq, Show, Generic, Data, ToJSON, FromJSON)
------------------------------------------------------------
-- Pattern synonyms for untyped terms
------------------------------------------------------------
@ -1179,12 +1225,12 @@ type Syntax = Syntax' ()
-- | Raw parsed syntax, without comments or type annotations.
pattern Syntax :: SrcLoc -> Term -> Syntax
pattern Syntax l t = Syntax' l t Nothing ()
pattern Syntax l t = Syntax' l t Empty ()
{-# COMPLETE Syntax #-}
-- | Untyped syntax with assocated comments.
pattern CSyntax :: SrcLoc -> Term -> Maybe (Seq Comment) -> Syntax
pattern CSyntax :: SrcLoc -> Term -> Comments -> Syntax
pattern CSyntax l t cs = Syntax' l t cs ()
{-# COMPLETE CSyntax #-}
@ -1197,7 +1243,7 @@ noLoc = Syntax mempty
-- | Match an untyped term without annotations.
pattern STerm :: Term -> Syntax
pattern STerm t <-
Syntax _ t
CSyntax _ t _
where
STerm t = Syntax mempty t
@ -1307,12 +1353,16 @@ unTuple = \case
s -> [s]
--------------------------------------------------
-- Erasure
-- Type erasure
-- | Erase a 'Syntax' tree annotated with type and comment information
-- to a bare unannotated 'Term'.
-- | Erase the type annotations from a 'Syntax' or 'Term' tree.
erase :: Functor t => t ty -> t ()
erase = void
-- | Erase all annotations from a 'Syntax' node, turning it into a
-- bare 'Term'.
eraseS :: Syntax' ty -> Term
eraseS (Syntax' _ t _ _) = void t
eraseS (Syntax' _ t _ _) = erase t
------------------------------------------------------------
-- Free variable traversals

View File

@ -1,5 +1,6 @@
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE PatternSynonyms #-}
-- |
-- SPDX-License-Identifier: BSD-3-Clause
@ -16,6 +17,7 @@ module Swarm.Language.Value (
Env,
) where
import Control.Lens (pattern Empty)
import Data.Aeson (FromJSON, ToJSON)
import Data.Bool (bool)
import Data.List (foldl')
@ -113,7 +115,7 @@ valueToTerm (VClo x t e) =
M.foldrWithKey
(\y v -> TLet False y Nothing (valueToTerm v))
(TLam x Nothing t)
(M.restrictKeys (unCtx e) (S.delete x (setOf freeVarsV (Syntax' NoLoc t Nothing ()))))
(M.restrictKeys (unCtx e) (S.delete x (setOf freeVarsV (Syntax' NoLoc t Empty ()))))
valueToTerm (VCApp c vs) = foldl' TApp (TConst c) (reverse (map valueToTerm vs))
valueToTerm (VDef r x t _) = TDef r x Nothing t
valueToTerm (VResult v _) = valueToTerm v

View File

@ -18,8 +18,8 @@ import Data.Text qualified as T
import Data.Void (Void)
import Data.Yaml (FromJSON (parseJSON), withText)
import Swarm.Game.World.Syntax
import Swarm.Language.Parser.Util (fully)
import Swarm.Util (failT, showT, squote)
import Swarm.Util.Parse (fully)
import Text.Megaparsec hiding (runParser)
import Text.Megaparsec.Char
import Text.Megaparsec.Char.Lexer qualified as L

View File

@ -1,19 +0,0 @@
-- |
-- SPDX-License-Identifier: BSD-3-Clause
--
-- Parsing utilities for Swarm.
module Swarm.Util.Parse where
import Control.Applicative (optional)
import Text.Megaparsec (MonadParsec, eof)
-- | Run a parser "fully", consuming leading whitespace and ensuring
-- that the parser extends all the way to eof.
fully :: (MonadParsec e s f) => f () -> f a -> f a
fully sc p = sc *> p <* eof
-- | Run a parser "fully", consuming leading whitespace (including the
-- possibility that the input is nothing but whitespace) and
-- ensuring that the parser extends all the way to eof.
fullyMaybe :: (MonadParsec e s f) => f () -> f a -> f (Maybe a)
fullyMaybe sc = fully sc . optional

View File

@ -128,12 +128,14 @@ library swarm-lang
Swarm.Language.Context
Swarm.Language.Direction
Swarm.Language.Elaborate
Swarm.Language.Format
Swarm.Language.Key
Swarm.Language.LSP
Swarm.Language.LSP.Hover
Swarm.Language.LSP.VarUsage
Swarm.Language.Module
Swarm.Language.Parser
Swarm.Language.Parser.Comment
Swarm.Language.Parser.Core
Swarm.Language.Parser.Lex
Swarm.Language.Parser.QQ
@ -176,6 +178,7 @@ library swarm-lang
split,
syb >=0.7 && <0.8,
template-haskell,
terminal-size >=0.3 && <1.0,
text,
text-rope >=0.2 && <0.3,
vector,
@ -511,7 +514,6 @@ library swarm-util
Swarm.Util.Erasable
Swarm.Util.Lens
Swarm.Util.OccurrenceEncoder
Swarm.Util.Parse
Swarm.Util.ReadableIORef
Swarm.Util.RingBuffer
Swarm.Util.UnitInterval
@ -531,7 +533,6 @@ library swarm-util
filepath >=1.4 && <1.5,
fused-effects >=1.1.1.1 && <1.2,
lens >=4.19 && <5.4,
megaparsec >=9.6.1 && <9.7,
minimorph >=0.3 && <0.4,
mtl >=2.2.2 && <2.4,
servant-docs >=0.12 && <0.14,

View File

@ -85,7 +85,7 @@ testLSP =
getWarnings :: Text -> [UnusedVar]
getWarnings content =
case readTerm' content of
Right (Just term, _) -> map simplifyWarning problems
Right (Just term) -> map simplifyWarning problems
where
VU.Usage _ problems = VU.getUsage mempty term
_ -> []

View File

@ -9,7 +9,10 @@ module TestParse where
import Data.Foldable qualified as F
import Data.Text (Text)
import Swarm.Language.Parser
import Swarm.Language.Parser.Core (runParser)
import Swarm.Language.Parser.Lex (sc)
import Swarm.Language.Parser.Term (parseTerm)
import Swarm.Language.Parser.Util (fullyMaybe)
import Swarm.Language.Syntax
import Test.Tasty
import Test.Tasty.HUnit (Assertion, assertEqual, assertFailure, testCase)
@ -61,6 +64,6 @@ testParse =
]
expectParsedComments :: Text -> [Comment] -> Assertion
expectParsedComments input ex = case readTerm' input of
expectParsedComments input ex = case runParser (fullyMaybe sc parseTerm) input of
Left err -> assertFailure (into @String $ errorBundlePretty err)
Right (_, res) -> assertEqual "Expected parsed comments" ex (F.toList res)