Parse comments (#1838)

Towards #1467. This is incomplete but I thought it would be useful to split up the work into multiple PRs to avoid having a really massive one at the end. This PR accomplishes a few things: - Creates a new data type `Comment` to record the text of a comment, along with a bit of metadata giving its source location, whether it is a line or block comment, and whether it was on a line by itself or at the end of a line with some other non-comment tokens (this information will be used to decide which AST node to associate the comment with). - Adds a field to store comments in every `Syntax'` node. Note this is currently unused, since we haven't yet implemented the logic to insert comments into appropriate AST nodes (that will come in a later PR). - Note it's a bit annoying that the number of fields of each `Syntax'` node is growing. I did look into consolidating and generalizing `Syntax'` to just have a single field for arbitrary annotations, but the changes required seemed annoying enough that I didn't want to bother.
2024-10-26 17:38:34 +03:00 · 2024-05-11 19:32:21 -05:00 · 2024-05-11 19:32:21 -05:00 · 4bd409dd69
commit 4bd409dd69
parent f9c5df90f6
19 changed files with 315 additions and 124 deletions
--- a/feedback.yaml
+++ b/feedback.yaml
@ -1,3 +1,3 @@
 loops:
  test: cabal test -j -O0 --test-show-details=direct swarm:swarm-integration swarm:swarm-unit
-  unit: cabal test -j -O0 --test-show-details=direct swarm:swarm-unit --fast
+  unit: cabal test -j -O0 --test-show-details=direct swarm:swarm-unit
--- a/src/swarm-doc/Swarm/Doc/Pedagogy.hs
+++ b/src/swarm-doc/Swarm/Doc/Pedagogy.hs
@ -136,7 +136,7 @@ getCommands (Just (ProcessedTerm (Module stx _) _ _)) =
 where
  nodelist :: [Syntax' Polytype]
  nodelist = universe stx
-  isCommand (Syntax' sloc t _) = case t of
+  isCommand (Syntax' sloc t _ _) = case t of
    TConst c -> guard (isConsidered c) >> Just (c, [sloc])
    _ -> Nothing

--- a/src/swarm-engine/Swarm/Game/Scenario/Scoring/CodeSize.hs
+++ b/src/swarm-engine/Swarm/Game/Scenario/Scoring/CodeSize.hs
@ -29,7 +29,7 @@ codeMetricsFromSyntax ::
  Data a =>
  Syntax' a ->
  ScenarioCodeMetrics
-codeMetricsFromSyntax s@(Syntax' srcLoc _ _) =
+codeMetricsFromSyntax s@(Syntax' srcLoc _ _ _) =
  ScenarioCodeMetrics (charCount srcLoc) (measureAstSize s)
 where
  charCount :: SrcLoc -> Int
--- a/src/swarm-engine/Swarm/Game/State.hs
+++ b/src/swarm-engine/Swarm/Game/State.hs
@ -167,7 +167,7 @@ parseCodeFile ::
  m CodeToRun
 parseCodeFile filepath = do
  contents <- sendIO $ TIO.readFile filepath
-  pt@(ProcessedTerm (Module (Syntax' srcLoc _ _) _) _ _) <-
+  pt@(ProcessedTerm (Module (Syntax' srcLoc _ _ _) _) _ _) <-
    either (throwError . CustomFailure) return (processTermEither contents)
  let strippedText = stripSrc srcLoc contents
      programBytestring = TL.encodeUtf8 $ TL.fromStrict strippedText
--- a/src/swarm-engine/Swarm/Game/Step/Const.hs
+++ b/src/swarm-engine/Swarm/Game/Step/Const.hs
@ -89,7 +89,6 @@ import Swarm.Game.Value
 import Swarm.Language.Capability
 import Swarm.Language.Context hiding (delete)
 import Swarm.Language.Key (parseKeyComboFull)
-import Swarm.Language.Parse (runParser)
 import Swarm.Language.Pipeline
 import Swarm.Language.Pretty (prettyText)
 import Swarm.Language.Requirement qualified as R
@ -100,6 +99,7 @@ import Swarm.Log
 import Swarm.Util hiding (both)
 import Swarm.Util.Effect (throwToMaybe)
 import Swarm.Util.Lens (inherit)
+import Text.Megaparsec (runParser)
 import Witch (From (from), into)
 import Prelude hiding (Applicative (..), lookup)

@ -1001,7 +1001,7 @@ execConst runChildProg c vs s k = do
      [VText msg] -> return $ Up (User msg) s k
      _ -> badConst
    Key -> case vs of
-      [VText ktxt] -> case runParser parseKeyComboFull ktxt of
+      [VText ktxt] -> case runParser parseKeyComboFull "" ktxt of
        Right kc -> return $ Out (VKey kc) s k
        Left _ -> return $ Up (CmdFailed Key (T.unwords ["Unknown key", quote ktxt]) Nothing) s k
      _ -> badConst
--- a/src/swarm-lang/Swarm/Language/Elaborate.hs
+++ b/src/swarm-lang/Swarm/Language/Elaborate.hs
@ -22,12 +22,12 @@ elaborate =
  -- Wrap all *free* variables in 'Force'.  Free variables must be
  -- referring to a previous definition, which are all wrapped in
  -- 'TDelay'.
-  (freeVarsS %~ \s -> Syntax' (s ^. sLoc) (SApp sForce s) (s ^. sType))
+  (freeVarsS %~ \s -> Syntax' (s ^. sLoc) (SApp sForce s) (s ^. sComments) (s ^. sType))
    -- Now do additional rewriting on all subterms.
    . transform rewrite
 where
  rewrite :: Syntax' Polytype -> Syntax' Polytype
-  rewrite (Syntax' l t ty) = Syntax' l (rewriteTerm t) ty
+  rewrite (Syntax' l t ty cs) = Syntax' l (rewriteTerm t) ty cs

  rewriteTerm :: Term' Polytype -> Term' Polytype
  rewriteTerm = \case
@ -43,14 +43,14 @@ elaborate =
    -- bound by 'def'.
    SDef True x ty t1 -> SDef True x ty (wrapForce (lvVar x) t1)
    -- Rewrite @f $ x@ to @f x@.
-    SApp (Syntax' _ (SApp (Syntax' _ (TConst AppF) _) l) _) r -> SApp l r
+    SApp (Syntax' _ (SApp (Syntax' _ (TConst AppF) _ _) l) _ _) r -> SApp l r
    -- Leave any other subterms alone.
    t -> t

 wrapForce :: Var -> Syntax' Polytype -> Syntax' Polytype
-wrapForce x = mapFreeS x (\s@(Syntax' l _ ty) -> Syntax' l (SApp sForce s) ty)
+wrapForce x = mapFreeS x (\s@(Syntax' l _ ty cs) -> Syntax' l (SApp sForce s) ty cs)

 -- Note, TyUnit is not the right type, but I don't want to bother

 sForce :: Syntax' Polytype
-sForce = Syntax' NoLoc (TConst Force) (Forall ["a"] (TyDelay (TyVar "a") :->: TyVar "a"))
+sForce = Syntax' NoLoc (TConst Force) Nothing (Forall ["a"] (TyDelay (TyVar "a") :->: TyVar "a"))
--- a/src/swarm-lang/Swarm/Language/Key.hs
+++ b/src/swarm-lang/Swarm/Language/Key.hs
@ -26,9 +26,9 @@ import Data.Set (Set)
 import Data.Set qualified as S
 import Data.Text (Text)
 import Data.Text qualified as T
+import Data.Void
 import GHC.Generics hiding (from)
 import Graphics.Vty.Input.Events qualified as V
-import Swarm.Language.Parse
 import Text.Megaparsec
 import Text.Megaparsec.Char (char, string)
 import Text.Megaparsec.Char.Lexer (decimal)
@ -47,27 +47,29 @@ deriving instance FromJSON V.Modifier
 deriving instance ToJSON V.Key
 deriving instance ToJSON V.Modifier

+type SParser = Parsec Void Text
+
 -- | Smart constructor for 'KeyCombo'.
 mkKeyCombo :: [V.Modifier] -> V.Key -> KeyCombo
 mkKeyCombo mods k = KeyCombo k (sort mods)

 -- | Parse a key combo with nothing after it.
-parseKeyComboFull :: Parser KeyCombo
+parseKeyComboFull :: SParser KeyCombo
 parseKeyComboFull = parseKeyCombo <* eof

 -- | Parse a key combo like @\"M-C-F5\"@, @\"Down\"@, or @\"C-x\"@.
-parseKeyCombo :: Parser KeyCombo
+parseKeyCombo :: SParser KeyCombo
 parseKeyCombo =
  mkKeyCombo <$> many (try (parseModifier <* char '-')) <*> parseKey

-parseModifier :: Parser V.Modifier
+parseModifier :: SParser V.Modifier
 parseModifier =
  V.MShift <$ string "S"
    <|> V.MCtrl <$ string "C"
    <|> V.MMeta <$ string "M"
    <|> V.MAlt <$ string "A"

-parseKey :: Parser V.Key
+parseKey :: SParser V.Key
 parseKey =
  -- For an explanation of the 'reverse', see Note [Key names are not prefix-free]
  (asum . map specialKeyParser . reverse . S.toList $ specialKeyNames)
@ -90,13 +92,13 @@ parseKey =
 -- of key names (which are sorted alphabetically), it guarantees that
 -- longer names will come before names which are prefixes of them.

-parseFunctionKey :: Parser V.Key
+parseFunctionKey :: SParser V.Key
 parseFunctionKey = V.KFun <$> try (char 'F' *> decimal)

-parseCharKey :: Parser V.Key
+parseCharKey :: SParser V.Key
 parseCharKey = V.KChar <$> anySingle

-specialKeyParser :: Text -> Parser V.Key
+specialKeyParser :: Text -> SParser V.Key
 specialKeyParser t = read . ('K' :) . from @Text <$> string t

 -- https://stackoverflow.com/questions/51848587/list-constructor-names-using-generics-in-haskell
--- a/src/swarm-lang/Swarm/Language/LSP.hs
+++ b/src/swarm-lang/Swarm/Language/LSP.hs
@ -81,8 +81,8 @@ validateSwarmCode doc version content = do
  flushDiagnosticsBySource 0 (Just diagnosticSourcePrefix)

  let (parsingErrs, unusedVarWarnings) = case readTerm' content of
-        Right Nothing -> ([], [])
-        Right (Just term) -> (parsingErrors, unusedWarnings)
+        Right (Nothing, _) -> ([], [])
+        Right (Just term, _) -> (parsingErrors, unusedWarnings)
         where
          VU.Usage _ problems = VU.getUsage mempty term
          unusedWarnings = mapMaybe (VU.toErrPos content) problems
--- a/src/swarm-lang/Swarm/Language/LSP/Hover.hs
+++ b/src/swarm-lang/Swarm/Language/LSP/Hover.hs
@ -60,14 +60,14 @@ showHoverInfo ::
 showHoverInfo _ p vf@(VirtualFile _ _ myRope) =
  case readTerm' content of
    Left _ -> Nothing
-    Right Nothing -> Nothing
-    Right (Just stx) -> Just $ case processParsedTerm stx of
+    Right (Nothing, _) -> Nothing
+    Right (Just stx, _) -> Just $ case processParsedTerm stx of
      Left _e ->
        let found@(Syntax foundSloc _) = narrowToPosition stx $ fromIntegral absolutePos
            finalPos = posToRange myRope foundSloc
         in (,finalPos) . treeToMarkdown 0 $ explain found
      Right (ProcessedTerm modul _req _reqCtx) ->
-        let found@(Syntax' foundSloc _ _) = narrowToPosition (moduleAST modul) $ fromIntegral absolutePos
+        let found@(Syntax' foundSloc _ _ _) = narrowToPosition (moduleAST modul) $ fromIntegral absolutePos
            finalPos = posToRange myRope foundSloc
         in (,finalPos) . treeToMarkdown 0 $ explain found
 where
@ -94,7 +94,7 @@ descend ::
  -- | next element to inspect
  Syntax' ty ->
  Maybe (Syntax' ty)
-descend pos s1@(Syntax' l1 _ _) = do
+descend pos s1@(Syntax' l1 _ _ _) = do
  guard $ withinBound pos l1
  return $ narrowToPosition s1 pos

@ -107,12 +107,12 @@ narrowToPosition ::
  -- | absolute offset within the file
  Int ->
  Syntax' ty
-narrowToPosition s0@(Syntax' _ t ty) pos = fromMaybe s0 $ case t of
+narrowToPosition s0@(Syntax' _ t _ ty) pos = fromMaybe s0 $ case t of
  SLam lv _ s -> d (locVarToSyntax' lv $ getInnerType ty) <|> d s
  SApp s1 s2 -> d s1 <|> d s2
-  SLet _ lv _ s1@(Syntax' _ _ lty) s2 -> d (locVarToSyntax' lv lty) <|> d s1 <|> d s2
-  SDef _ lv _ s@(Syntax' _ _ lty) -> d (locVarToSyntax' lv lty) <|> d s
-  SBind mlv s1@(Syntax' _ _ lty) s2 -> (mlv >>= d . flip locVarToSyntax' (getInnerType lty)) <|> d s1 <|> d s2
+  SLet _ lv _ s1@(Syntax' _ _ _ lty) s2 -> d (locVarToSyntax' lv lty) <|> d s1 <|> d s2
+  SDef _ lv _ s@(Syntax' _ _ _ lty) -> d (locVarToSyntax' lv lty) <|> d s
+  SBind mlv s1@(Syntax' _ _ _ lty) s2 -> (mlv >>= d . flip locVarToSyntax' (getInnerType lty)) <|> d s1 <|> d s2
  SPair s1 s2 -> d s1 <|> d s2
  SDelay _ s -> d s
  SRcd m -> asum . map d . catMaybes . M.elems $ m
@ -244,8 +244,8 @@ explain trm = case trm ^. sTerm of
 explainFunction :: ExplainableType ty => Syntax' ty -> Tree Text
 explainFunction s =
  case unfoldApps s of
-    (Syntax' _ (TConst Force) _ :| [innerT]) -> explain innerT
-    (Syntax' _ (TConst Force) _ :| f : params) -> explainF f params
+    (Syntax' _ (TConst Force) _ _ :| [innerT]) -> explain innerT
+    (Syntax' _ (TConst Force) _ _ :| f : params) -> explainF f params
    (f :| params) -> explainF f params
 where
  explainF f params =
--- a/src/swarm-lang/Swarm/Language/Parse.hs
+++ b/src/swarm-lang/Swarm/Language/Parse.hs
@ -1,4 +1,5 @@
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TemplateHaskell #-}
 {-# LANGUAGE TypeFamilies #-}
 {-# LANGUAGE ViewPatterns #-}

@ -33,13 +34,15 @@ module Swarm.Language.Parse (
  unTuple,
 ) where

-import Control.Lens (view, (^.))
-import Control.Monad (guard, join)
+import Control.Arrow (right)
+import Control.Lens (makeLenses, use, view, (%=), (.=), (^.))
+import Control.Monad (guard, join, void)
 import Control.Monad.Combinators.Expr
 import Control.Monad.Reader (
  MonadReader (ask),
  ReaderT (runReaderT),
 )
+import Control.Monad.State (StateT, runStateT)
 import Data.Bifunctor
 import Data.Foldable (asum)
 import Data.List (foldl', nub)
@ -47,8 +50,11 @@ import Data.List.NonEmpty qualified (head)
 import Data.Map.Strict (Map)
 import Data.Map.Strict qualified as Map
 import Data.Maybe (fromMaybe, mapMaybe)
+import Data.Sequence (Seq)
+import Data.Sequence qualified as Seq
 import Data.Set qualified as S
 import Data.Set.Lens (setOf)
+import Data.String (fromString)
 import Data.Text (Text, index, toLower)
 import Data.Text qualified as T
 import Data.Void
@ -76,7 +82,22 @@ import Witch
 data Antiquoting = AllowAntiquoting | DisallowAntiquoting
  deriving (Eq, Ord, Show)

-type Parser = ReaderT Antiquoting (Parsec Void Text)
+data CommentState = CS
+  { _freshLine :: Bool
+  -- ^ Are we currently on a (so far) blank line, i.e. have there been
+  --   no nontrivial tokens since the most recent newline?  This field
+  --   is updated every time we parse a lexeme or symbol (set to
+  --   false), or a newline (set to true).
+  , _comments :: Seq Comment
+  -- ^ The actual sequence of comments, in the order they were encountered
+  }
+
+makeLenses ''CommentState
+
+initCommentState :: CommentState
+initCommentState = CS {_freshLine = True, _comments = Seq.empty}
+
+type Parser = ReaderT Antiquoting (StateT CommentState (Parsec Void Text))

 type ParserError = ParseErrorBundle Text Void

@ -109,24 +130,61 @@ reservedWords =
       , "requirements"
       ]

+-- Approach for preserving comments taken from https://www.reddit.com/r/haskell/comments/ni4gpm/comment/gz0ipmp/
+
+-- | If we see a comment starting now, is it the first non-whitespace
+--   thing on the current line so far, or were there other
+--   non-whitespace tokens previously?
+getCommentSituation :: Parser CommentSituation
+getCommentSituation = do
+  fl <- use freshLine
+  return $ if fl then StandaloneComment else SuffixComment
+
+-- | Parse a line comment, while appending it out-of-band to the list of
+--   comments saved in the custom state.
+lineComment :: Text -> Parser ()
+lineComment start = do
+  cs <- getCommentSituation
+  (loc, t) <- parseLocG $ do
+    string start *> takeWhileP (Just "character") (/= '\n')
+  comments %= (Seq.|> Comment loc LineComment cs t)
+
+-- | Parse a block comment, while appending it out-of-band to the list of
+--   comments saved in the custom state.
+blockComment :: Text -> Text -> Parser ()
+blockComment start end = do
+  cs <- getCommentSituation
+  (loc, t) <- parseLocG $ do
+    void $ string start
+    manyTill anySingle (string end)
+  comments %= (Seq.|> Comment loc BlockComment cs (fromString t))
+
 -- | Skip spaces and comments.
 sc :: Parser ()
 sc =
-  L.space
-    space1
-    (L.skipLineComment "//")
-    (L.skipBlockComment "/*" "*/")
+  -- Typically we would use L.space here, but we have to inline its
+  -- definition and use our own slight variant, since we need to treat
+  -- end-of-line specially.
+  skipMany . choice . map hidden $
+    [ hspace1
+    , eol *> (freshLine .= True) -- If we see a newline, reset freshLine to True.
+    , lineComment "//"
+    , blockComment "/*" "*/"
+    ]

 -- | In general, we follow the convention that every token parser
 --   assumes no leading whitespace and consumes all trailing
 --   whitespace.  Concretely, we achieve this by wrapping every token
 --   parser using 'lexeme'.
+--
+--   Also sets freshLine to False every time we see a non-whitespace
+--   token.
 lexeme :: Parser a -> Parser a
-lexeme = L.lexeme sc
+lexeme p = (freshLine .= False) *> L.lexeme sc p

 -- | A lexeme consisting of a literal string.
 symbol :: Text -> Parser Text
-symbol = L.symbol sc
+symbol s = (freshLine .= False) *> L.symbol sc s

 -- | Parse a case-insensitive reserved word, making sure it is not a
 --   prefix of a longer variable name, and allowing the parser to
@ -329,7 +387,7 @@ mkTuple (x : xs) = let r = mkTuple xs in loc x r $ SPair x r

 unTuple :: Syntax' ty -> [Syntax' ty]
 unTuple = \case
-  Syntax' _ (SPair s1 s2) _ -> s1 : unTuple s2
+  Syntax' _ (SPair s1 s2) _ _ -> s1 : unTuple s2
  s -> [s]

 -- | Construct an 'SLet', automatically filling in the Boolean field
@ -473,19 +531,31 @@ operatorSymbol = T.singleton <$> oneOf opChars
 --------------------------------------------------
 -- Utilities

-- | Run a parser on some input text, returning either the result or a
--   pretty-printed parse error message.
-runParser :: Parser a -> Text -> Either Text a
-runParser p t = first (from . errorBundlePretty) (parse (runReaderT p DisallowAntiquoting) "" t)
+-- | Run a parser on some input text, returning either the result +
+--   all collected comments, or a pretty-printed parse error message.
+runParser :: Parser a -> Text -> Either Text (a, Seq Comment)
+runParser p t =
+  first (from . errorBundlePretty)
+    . (\pt -> parse pt "" t)
+    . fmap (second (^. comments))
+    . flip runStateT initCommentState
+    . flip runReaderT DisallowAntiquoting
+    $ p

 -- | A utility for running a parser in an arbitrary 'MonadFail' (which
 --   is going to be the TemplateHaskell 'Language.Haskell.TH.Q' monad --- see
 --   "Swarm.Language.Parse.QQ"), with a specified source position.
 runParserTH :: (Monad m, MonadFail m) => (String, Int, Int) -> Parser a -> String -> m a
 runParserTH (file, line, col) p s =
-  case snd (runParser' (runReaderT (fully sc p) AllowAntiquoting) initState) of
+  let (_, res) =
+        flip runParser' initState
+          . flip runStateT initCommentState
+          . flip runReaderT AllowAntiquoting
+          . fully sc
+          $ p
+   in case res of
        Left err -> fail $ errorBundlePretty err
-    Right e -> return e
+        Right e -> return $ fst e
 where
  initState :: State Text Void
  initState =
@ -502,12 +572,17 @@ runParserTH (file, line, col) p s =
 --   'Nothing' if the input was only whitespace) or a pretty-printed
 --   parse error message.
 readTerm :: Text -> Either Text (Maybe Syntax)
-readTerm = runParser (fullyMaybe sc parseTerm)
+readTerm = right fst . runParser (fullyMaybe sc parseTerm)

 -- | A lower-level `readTerm` which returns the megaparsec bundle error
--   for precise error reporting.
-readTerm' :: Text -> Either ParserError (Maybe Syntax)
-readTerm' = parse (runReaderT (fullyMaybe sc parseTerm) DisallowAntiquoting) ""
+--   for precise error reporting, as well as the parsed comments.
+readTerm' :: Text -> Either ParserError (Maybe Syntax, Seq Comment)
+readTerm' t =
+  (\pt -> parse pt "" t)
+    . fmap (second (^. comments))
+    . flip runStateT initCommentState
+    . flip runReaderT DisallowAntiquoting
+    $ fullyMaybe sc parseTerm

 -- | A utility for converting a ParserError into a one line message:
 --   @<line-nr>: <error-msg>@
--- a/src/swarm-lang/Swarm/Language/Syntax.hs
+++ b/src/swarm-lang/Swarm/Language/Syntax.hs
@ -50,8 +50,10 @@ module Swarm.Language.Syntax (
  sLoc,
  sTerm,
  sType,
+  sComments,
  Syntax,
  pattern Syntax,
+  pattern CSyntax,
  LocVar (..),
  SrcLoc (..),
  noLoc,
@ -69,6 +71,11 @@ module Swarm.Language.Syntax (
  pattern TProj,
  pattern TAnnotate,

+  -- * Comments
+  CommentType (..),
+  CommentSituation (..),
+  Comment (..),
+
  -- * Terms
  Var,
  DelayType (..),
@ -100,6 +107,7 @@ import Data.Int (Int32)
 import Data.List.NonEmpty (NonEmpty)
 import Data.List.NonEmpty qualified as NonEmpty
 import Data.Map.Strict (Map)
+import Data.Sequence (Seq)
 import Data.Set (Set)
 import Data.Set qualified as S
 import Data.Set qualified as Set
@ -1008,7 +1016,7 @@ data LocVar = LV {lvSrcLoc :: SrcLoc, lvVar :: Var}
  deriving (Eq, Ord, Show, Data, Generic, FromJSON, ToJSON)

 locVarToSyntax' :: LocVar -> ty -> Syntax' ty
-locVarToSyntax' (LV s v) = Syntax' s (TVar v)
+locVarToSyntax' (LV s v) = Syntax' s (TVar v) Nothing

 -- | Terms of the Swarm language.
 data Term' ty
@ -1113,6 +1121,7 @@ instance Data ty => Plated (Term' ty) where
 data Syntax' ty = Syntax'
  { _sLoc :: SrcLoc
  , _sTerm :: Term' ty
+  , _sComments :: Maybe (Seq Comment)
  , _sType :: ty
  }
  deriving (Eq, Show, Functor, Foldable, Traversable, Data, Generic, FromJSON, ToJSON)
@ -1134,23 +1143,56 @@ instance Semigroup SrcLoc where
 instance Monoid SrcLoc where
  mempty = NoLoc

+------------------------------------------------------------
+-- Comments
+------------------------------------------------------------
+
+-- | Line vs block comments.
+data CommentType = LineComment | BlockComment
+  deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Data, ToJSON, FromJSON)
+
+-- | Was a comment all by itself on a line, or did it occur after some
+--   other tokens on a line?
+data CommentSituation = StandaloneComment | SuffixComment
+  deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Data, ToJSON, FromJSON)
+
+-- | A comment is retained as some text plus metadata (source
+--   location, comment type, + comment situation).  While parsing we
+--   record all comments out-of-band, for later re-insertion into the
+--   AST.
+data Comment = Comment
+  { commentSrcLoc :: SrcLoc
+  , commentType :: CommentType
+  , commentSituation :: CommentSituation
+  , commentText :: Text
+  }
+  deriving (Eq, Show, Generic, Data, ToJSON, FromJSON)
+
 ------------------------------------------------------------
 -- Pattern synonyms for untyped terms
 ------------------------------------------------------------

+-- | Syntax without type annotations.
 type Syntax = Syntax' ()

+-- | Raw parsed syntax, without comments or type annotations.
 pattern Syntax :: SrcLoc -> Term -> Syntax
-pattern Syntax l t = Syntax' l t ()
+pattern Syntax l t = Syntax' l t Nothing ()

 {-# COMPLETE Syntax #-}

+-- | Untyped syntax with assocated comments.
+pattern CSyntax :: SrcLoc -> Term -> Maybe (Seq Comment) -> Syntax
+pattern CSyntax l t cs = Syntax' l t cs ()
+
+{-# COMPLETE CSyntax #-}
+
 makeLenses ''Syntax'

 noLoc :: Term -> Syntax
 noLoc = Syntax mempty

-- | Match an untyped term without its 'SrcLoc'.
+-- | Match an untyped term without annotations.
 pattern STerm :: Term -> Syntax
 pattern STerm t <-
  Syntax _ t
@ -1160,17 +1202,17 @@ pattern STerm t <-
 pattern TRequirements :: Text -> Term -> Term
 pattern TRequirements x t = SRequirements x (STerm t)

-- | Match a TPair without syntax
+-- | Match a TPair without annotations.
 pattern TPair :: Term -> Term -> Term
 pattern TPair t1 t2 = SPair (STerm t1) (STerm t2)

-- | Match a TLam without syntax
+-- | Match a TLam without annotations.
 pattern TLam :: Var -> Maybe Type -> Term -> Term
 pattern TLam v ty t <- SLam (lvVar -> v) ty (STerm t)
  where
    TLam v ty t = SLam (LV NoLoc v) ty (STerm t)

-- | Match a TApp without syntax
+-- | Match a TApp without annotations.
 pattern TApp :: Term -> Term -> Term
 pattern TApp t1 t2 = SApp (STerm t1) (STerm t2)

@ -1180,29 +1222,29 @@ infixl 0 :$:
 pattern (:$:) :: Term -> Syntax -> Term
 pattern (:$:) t1 s2 = SApp (STerm t1) s2

-- | Match a TLet without syntax
+-- | Match a TLet without annotations.
 pattern TLet :: Bool -> Var -> Maybe Polytype -> Term -> Term -> Term
 pattern TLet r v pt t1 t2 <- SLet r (lvVar -> v) pt (STerm t1) (STerm t2)
  where
    TLet r v pt t1 t2 = SLet r (LV NoLoc v) pt (STerm t1) (STerm t2)

-- | Match a TDef without syntax
+-- | Match a TDef without annotations.
 pattern TDef :: Bool -> Var -> Maybe Polytype -> Term -> Term
 pattern TDef r v pt t <- SDef r (lvVar -> v) pt (STerm t)
  where
    TDef r v pt t = SDef r (LV NoLoc v) pt (STerm t)

-- | Match a TBind without syntax
+-- | Match a TBind without annotations.
 pattern TBind :: Maybe Var -> Term -> Term -> Term
 pattern TBind mv t1 t2 <- SBind (fmap lvVar -> mv) (STerm t1) (STerm t2)
  where
    TBind mv t1 t2 = SBind (LV NoLoc <$> mv) (STerm t1) (STerm t2)

-- | Match a TDelay without syntax
+-- | Match a TDelay without annotations.
 pattern TDelay :: DelayType -> Term -> Term
 pattern TDelay m t = SDelay m (STerm t)

-- | Match a TRcd without syntax
+-- | Match a TRcd without annotations.
 pattern TRcd :: Map Var (Maybe Term) -> Term
 pattern TRcd m <- SRcd ((fmap . fmap) _sTerm -> m)
  where
@ -1211,15 +1253,16 @@ pattern TRcd m <- SRcd ((fmap . fmap) _sTerm -> m)
 pattern TProj :: Term -> Var -> Term
 pattern TProj t x = SProj (STerm t) x

-- | Match a TAnnotate without syntax
+-- | Match a TAnnotate without annotations.
 pattern TAnnotate :: Term -> Polytype -> Term
 pattern TAnnotate t pt = SAnnotate (STerm t) pt

-- | COMPLETE pragma tells GHC using this set of pattern is complete for Term
+-- COMPLETE pragma tells GHC using this set of patterns is complete for Term
+
 {-# COMPLETE TUnit, TConst, TDir, TInt, TAntiInt, TText, TAntiText, TBool, TRequireDevice, TRequire, TRequirements, TVar, TPair, TLam, TApp, TLet, TDef, TBind, TDelay, TRcd, TProj, TAnnotate #-}

-- | Make infix operation (e.g. @2 + 3@) a curried function
--   application (@((+) 2) 3@).
+-- | Make an infix operation (e.g. @2 + 3@) a curried function
+--   application (e.g. @((+) 2) 3@).
 mkOp :: Const -> Syntax -> Syntax -> Syntax
 mkOp c s1@(Syntax l1 _) s2@(Syntax l2 _) = Syntax newLoc newTerm
 where
@ -1230,7 +1273,7 @@ mkOp c s1@(Syntax l1 _) s2@(Syntax l2 _) = Syntax newLoc newTerm
  sop = noLoc (TConst c)
  newTerm = SApp (Syntax l1 $ SApp sop s1) s2

-- | Make infix operation, discarding any syntax related location
+-- | Make an infix operation, discarding any location information
 mkOp' :: Const -> Term -> Term -> Term
 mkOp' c t1 = TApp (TApp (TConst c) t1)

@ -1244,16 +1287,16 @@ mkOp' c t1 = TApp (TApp (TConst c) t1)
 -- TConst Mul :| [TInt 1,TInt 2]
 unfoldApps :: Syntax' ty -> NonEmpty (Syntax' ty)
 unfoldApps trm = NonEmpty.reverse . flip NonEmpty.unfoldr trm $ \case
-  Syntax' _ (SApp s1 s2) _ -> (s2, Just s1)
+  Syntax' _ (SApp s1 s2) _ _ -> (s2, Just s1)
  s -> (s, Nothing)

 --------------------------------------------------
 -- Erasure

-- | Erase a 'Syntax' tree annotated with type
--   information to a bare unannotated 'Term'.
+-- | Erase a 'Syntax' tree annotated with type and comment information
+--   to a bare unannotated 'Term'.
 eraseS :: Syntax' ty -> Term
-eraseS (Syntax' _ t _) = void t
+eraseS (Syntax' _ t _ _) = void t

 ------------------------------------------------------------
 -- Free variable traversals
@ -1270,7 +1313,7 @@ freeVarsS :: forall ty. Traversal' (Syntax' ty) (Syntax' ty)
 freeVarsS f = go S.empty
 where
  -- go :: Applicative f => Set Var -> Syntax' ty -> f (Syntax' ty)
-  go bound s@(Syntax' l t ty) = case t of
+  go bound s@(Syntax' l t ty cmts) = case t of
    TUnit -> pure s
    TConst {} -> pure s
    TDir {} -> pure s
@ -1300,30 +1343,30 @@ freeVarsS f = go S.empty
    SProj s1 x -> rewrap $ SProj <$> go bound s1 <*> pure x
    SAnnotate s1 pty -> rewrap $ SAnnotate <$> go bound s1 <*> pure pty
   where
-    rewrap s' = Syntax' l <$> s' <*> pure ty
+    rewrap s' = Syntax' l <$> s' <*> pure ty <*> pure cmts

 -- | Like 'freeVarsS', but traverse over the 'Term's containing free
 --   variables.  More direct if you don't need to know the types or
 --   source locations of the variables.  Note that if you want to get
--   the list of all `Term`s representing free variables, you can do so via
--   @'toListOf' 'freeVarsT'@.
+--   the list of all `Term`s representing free variables, you can do
+--   so via @'toListOf' 'freeVarsT'@.
 freeVarsT :: forall ty. Traversal' (Syntax' ty) (Term' ty)
 freeVarsT = freeVarsS . sTerm

 -- | Traversal over the free variables of a term.  Like 'freeVarsS'
--   and 'freeVarsT', but traverse over the variable names
--   themselves.  Note that if you want to get the set of all free
--   variable names, you can do so via @'Data.Set.Lens.setOf'
--   'freeVarsV'@.
+--   and 'freeVarsT', but traverse over the variable names themselves.
+--   Note that if you want to get the set of all free variable names,
+--   you can do so via @'Data.Set.Lens.setOf' 'freeVarsV'@.
 freeVarsV :: Traversal' (Syntax' ty) Var
 freeVarsV = freeVarsT . (\f -> \case TVar x -> TVar <$> f x; t -> pure t)

-- | Apply a function to all free occurrences of a particular variable.
+-- | Apply a function to all free occurrences of a particular
+--   variable.
 mapFreeS :: Var -> (Syntax' ty -> Syntax' ty) -> Syntax' ty -> Syntax' ty
 mapFreeS x f = freeVarsS %~ (\t -> case t ^. sTerm of TVar y | y == x -> f t; _ -> t)

-- | Transform the AST into a Tree datatype.
-- Useful for pretty-printing (e.g. via "Data.Tree.drawTree").
+-- | Transform the AST into a Tree datatype.  Useful for
+--   pretty-printing (e.g. via "Data.Tree.drawTree").
 asTree :: Data a => Syntax' a -> Tree (Syntax' a)
 asTree = para Node

--- a/src/swarm-lang/Swarm/Language/Typecheck.hs
+++ b/src/swarm-lang/Swarm/Language/Typecheck.hs
@ -332,7 +332,7 @@ instance (HasBindings u, Data u) => HasBindings (Term' u) where
  applyBindings = gmapM (mkM (applyBindings @(Syntax' u)))

 instance (HasBindings u, Data u) => HasBindings (Syntax' u) where
-  applyBindings (Syntax' l t u) = Syntax' l <$> applyBindings t <*> applyBindings u
+  applyBindings (Syntax' l t cs u) = Syntax' l <$> applyBindings t <*> pure cs <*> applyBindings u

 instance HasBindings UModule where
  applyBindings (Module u uctx) = Module <$> applyBindings u <*> applyBindings uctx
@ -556,7 +556,7 @@ inferModule ::
  ) =>
  Syntax ->
  m UModule
-inferModule s@(Syntax l t) = addLocToTypeErr l $ case t of
+inferModule s@(CSyntax l t cs) = addLocToTypeErr l $ case t of
  -- For definitions with no type signature, make up a fresh type
  -- variable for the body, infer the body under an extended context,
  -- and unify the two.  Then generalize the type and return an
@ -566,7 +566,7 @@ inferModule s@(Syntax l t) = addLocToTypeErr l $ case t of
    t1' <- withBinding (lvVar x) (Forall [] xTy) $ infer t1
    _ <- unify (Just t1) (joined xTy (t1' ^. sType))
    pty <- generalize (t1' ^. sType)
-    return $ Module (Syntax' l (SDef r x Nothing t1') (UTyCmd UTyUnit)) (singleton (lvVar x) pty)
+    return $ Module (Syntax' l (SDef r x Nothing t1') cs (UTyCmd UTyUnit)) (singleton (lvVar x) pty)

  -- If a (poly)type signature has been provided, skolemize it and
  -- check the definition.
@ -574,7 +574,7 @@ inferModule s@(Syntax l t) = addLocToTypeErr l $ case t of
    let upty = toU pty
    uty <- skolemize upty
    t1' <- withBinding (lvVar x) upty $ check t1 uty
-    return $ Module (Syntax' l (SDef r x (Just pty) t1') (UTyCmd UTyUnit)) (singleton (lvVar x) upty)
+    return $ Module (Syntax' l (SDef r x (Just pty) t1') cs (UTyCmd UTyUnit)) (singleton (lvVar x) upty)

  -- To handle a 'TBind', infer the types of both sides, combining the
  -- returned modules appropriately.  Have to be careful to use the
@ -624,7 +624,7 @@ inferModule s@(Syntax l t) = addLocToTypeErr l $ case t of
        let ctxX = maybe Ctx.empty ((`Ctx.singleton` genA) . lvVar) mx
        return $
          Module
-            (Syntax' l (SBind mx c1' c2') (c2' ^. sType))
+            (Syntax' l (SBind mx c1' c2') cs (c2' ^. sType))
            (ctx1 `Ctx.union` ctxX `Ctx.union` ctx2)

  -- In all other cases, there can no longer be any definitions in the
@ -648,31 +648,31 @@ infer ::
  ) =>
  Syntax ->
  m (Syntax' UType)
-infer s@(Syntax l t) = addLocToTypeErr l $ case t of
+infer s@(CSyntax l t cs) = addLocToTypeErr l $ case t of
  -- Primitives, i.e. things for which we immediately know the only
  -- possible correct type, and knowing an expected type would provide
  -- no extra information.
-  TUnit -> return $ Syntax' l TUnit UTyUnit
-  TConst c -> Syntax' l (TConst c) <$> (instantiate . toU $ inferConst c)
-  TDir d -> return $ Syntax' l (TDir d) UTyDir
-  TInt n -> return $ Syntax' l (TInt n) UTyInt
-  TAntiInt x -> return $ Syntax' l (TAntiInt x) UTyInt
-  TText x -> return $ Syntax' l (TText x) UTyText
-  TAntiText x -> return $ Syntax' l (TAntiText x) UTyText
-  TBool b -> return $ Syntax' l (TBool b) UTyBool
-  TRobot r -> return $ Syntax' l (TRobot r) UTyActor
-  TRequireDevice d -> return $ Syntax' l (TRequireDevice d) (UTyCmd UTyUnit)
-  TRequire n d -> return $ Syntax' l (TRequire n d) (UTyCmd UTyUnit)
+  TUnit -> return $ Syntax' l TUnit cs UTyUnit
+  TConst c -> Syntax' l (TConst c) cs <$> (instantiate . toU $ inferConst c)
+  TDir d -> return $ Syntax' l (TDir d) cs UTyDir
+  TInt n -> return $ Syntax' l (TInt n) cs UTyInt
+  TAntiInt x -> return $ Syntax' l (TAntiInt x) cs UTyInt
+  TText x -> return $ Syntax' l (TText x) cs UTyText
+  TAntiText x -> return $ Syntax' l (TAntiText x) cs UTyText
+  TBool b -> return $ Syntax' l (TBool b) cs UTyBool
+  TRobot r -> return $ Syntax' l (TRobot r) cs UTyActor
+  TRequireDevice d -> return $ Syntax' l (TRequireDevice d) cs (UTyCmd UTyUnit)
+  TRequire n d -> return $ Syntax' l (TRequire n d) cs (UTyCmd UTyUnit)
  SRequirements x t1 -> do
    t1' <- infer t1
-    return $ Syntax' l (SRequirements x t1') (UTyCmd UTyUnit)
+    return $ Syntax' l (SRequirements x t1') cs (UTyCmd UTyUnit)

  -- We should never encounter a TRef since they do not show up in
  -- surface syntax, only as values while evaluating (*after*
  -- typechecking).
  TRef _ -> throwTypeErr l $ CantInfer t
  -- Just look up variables in the context.
-  TVar x -> Syntax' l (TVar x) <$> lookup l x
+  TVar x -> Syntax' l (TVar x) cs <$> lookup l x
  -- It is helpful to handle lambdas in inference mode as well as
  -- checking mode; in particular, we can handle lambdas with an
  -- explicit type annotation on the argument.  Just infer the body
@ -681,7 +681,7 @@ infer s@(Syntax l t) = addLocToTypeErr l $ case t of
  SLam x (Just argTy) body -> do
    let uargTy = toU argTy
    body' <- withBinding (lvVar x) (Forall [] uargTy) $ infer body
-    return $ Syntax' l (SLam x (Just argTy) body') (UTyFun uargTy (body' ^. sType))
+    return $ Syntax' l (SLam x (Just argTy) body') cs (UTyFun uargTy (body' ^. sType))

  -- Need special case here for applying 'atomic' or 'instant' so we
  -- don't handle it with the case for generic type application.
@ -722,7 +722,7 @@ infer s@(Syntax l t) = addLocToTypeErr l $ case t of
    -- unit`).
    resTy' <- applyBindings resTy

-    return $ Syntax' l (SApp f' x') resTy'
+    return $ Syntax' l (SApp f' x') cs resTy'

  -- We handle binds in inference mode for a similar reason to
  -- application.
@ -735,7 +735,7 @@ infer s@(Syntax l t) = addLocToTypeErr l $ case t of
        . withFrame l TCBindR
        $ infer c2
    _ <- decomposeCmdTy c2 (Actual, c2' ^. sType)
-    return $ Syntax' l (SBind mx c1' c2') (c2' ^. sType)
+    return $ Syntax' l (SBind mx c1' c2') cs (c2' ^. sType)

  -- Handle record projection in inference mode.  Knowing the expected
  -- type of r.x doesn't really help since we must infer the type of r
@ -744,14 +744,14 @@ infer s@(Syntax l t) = addLocToTypeErr l $ case t of
    t1' <- infer t1
    case t1' ^. sType of
      UTyRcd m -> case M.lookup x m of
-        Just xTy -> return $ Syntax' l (SProj t1' x) xTy
+        Just xTy -> return $ Syntax' l (SProj t1' x) cs xTy
        Nothing -> throwTypeErr l $ UnknownProj x (SProj t1 x)
      _ -> throwTypeErr l $ CantInferProj (SProj t1 x)

  -- See Note [Checking and inference for record literals]
  SRcd m -> do
    m' <- itraverse (\x -> infer . fromMaybe (STerm (TVar x))) m
-    return $ Syntax' l (SRcd (Just <$> m')) (UTyRcd (fmap (^. sType) m'))
+    return $ Syntax' l (SRcd (Just <$> m')) cs (UTyRcd (fmap (^. sType) m'))

  -- To infer a type-annotated term, switch into checking mode.
  -- However, we must be careful to deal properly with polymorphic
@ -769,7 +769,7 @@ infer s@(Syntax l t) = addLocToTypeErr l $ case t of
    -- following typechecking steps.
    iuty <- instantiate upty
    c' <- check c iuty
-    return $ Syntax' l (SAnnotate c' pty) (c' ^. sType)
+    return $ Syntax' l (SAnnotate c' pty) cs (c' ^. sType)

  -- Fallback: to infer the type of anything else, make up a fresh unification
  -- variable for its type and check against it.
@ -905,7 +905,7 @@ check ::
  Syntax ->
  UType ->
  m (Syntax' UType)
-check s@(Syntax l t) expected = addLocToTypeErr l $ case t of
+check s@(CSyntax l t cs) expected = addLocToTypeErr l $ case t of
  -- if t : ty, then  {t} : {ty}.
  -- Note that in theory, if the @Maybe Var@ component of the @SDelay@
  -- is @Just@, we should typecheck the body under a context extended
@ -918,7 +918,7 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of
  SDelay d s1 -> do
    ty1 <- decomposeDelayTy s (Expected, expected)
    s1' <- check s1 ty1
-    return $ Syntax' l (SDelay d s1') (UTyDelay ty1)
+    return $ Syntax' l (SDelay d s1') cs (UTyDelay ty1)

  -- To check the type of a pair, make sure the expected type is a
  -- product type, and push the two types down into the left and right.
@ -926,7 +926,7 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of
    (ty1, ty2) <- decomposeProdTy s (Expected, expected)
    s1' <- check s1 ty1
    s2' <- check s2 ty2
-    return $ Syntax' l (SPair s1' s2') (UTyProd ty1 ty2)
+    return $ Syntax' l (SPair s1' s2') cs (UTyProd ty1 ty2)

  -- To check a lambda, make sure the expected type is a function type.
  SLam x mxTy body -> do
@ -943,7 +943,7 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of
          Right _ -> return ()
      Nothing -> return ()
    body' <- withBinding (lvVar x) (Forall [] argTy) $ check body resTy
-    return $ Syntax' l (SLam x mxTy body') (UTyFun argTy resTy)
+    return $ Syntax' l (SLam x mxTy body') cs (UTyFun argTy resTy)

  -- Special case for checking the argument to 'atomic' (or
  -- 'instant').  'atomic t' has the same type as 't', which must have
@ -963,7 +963,7 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of
        -- guaranteed to operate within a single tick.  When c is Instant
        -- we skip this check.
        when (c == Atomic) $ validAtomic at
-        return $ Syntax' l (SApp atomic' at') (UTyCmd argTy)
+        return $ Syntax' l (SApp atomic' at') cs (UTyCmd argTy)
  -- Checking the type of a let-expression.
  SLet r x mxTy t1 t2 -> do
    (upty, t1') <- case mxTy of
@ -993,7 +993,7 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of
    ask @UCtx >>= mapM_ (noSkolems l)

    -- Return the annotated let.
-    return $ Syntax' l (SLet r x mxTy t1' t2') expected
+    return $ Syntax' l (SLet r x mxTy t1' t2') cs expected

  -- Definitions can only occur at the top level.
  SDef {} -> throwTypeErr l $ DefNotTopLevel t
@ -1015,13 +1015,13 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of
          throwTypeErr l $
            FieldsMismatch (joined expectedFields actualFields)
        m' <- itraverse (\x ms -> check (fromMaybe (STerm (TVar x)) ms) (tyMap ! x)) fields
-        return $ Syntax' l (SRcd (Just <$> m')) expected
+        return $ Syntax' l (SRcd (Just <$> m')) cs expected

  -- Fallback: switch into inference mode, and check that the type we
  -- get is what we expected.
  _ -> do
-    Syntax' l' t' actual <- infer s
-    Syntax' l' t' <$> unify (Just s) (joined expected actual)
+    Syntax' l' t' _ actual <- infer s
+    Syntax' l' t' cs <$> unify (Just s) (joined expected actual)

 -- ~~~~ Note [Checking and inference for record literals]
 --
--- a/src/swarm-lang/Swarm/Language/Value.hs
+++ b/src/swarm-lang/Swarm/Language/Value.hs
@ -113,7 +113,7 @@ valueToTerm (VClo x t e) =
  M.foldrWithKey
    (\y v -> TLet False y Nothing (valueToTerm v))
    (TLam x Nothing t)
-    (M.restrictKeys (unCtx e) (S.delete x (setOf freeVarsV (Syntax' NoLoc t ()))))
+    (M.restrictKeys (unCtx e) (S.delete x (setOf freeVarsV (Syntax' NoLoc t Nothing ()))))
 valueToTerm (VCApp c vs) = foldl' TApp (TConst c) (reverse (map valueToTerm vs))
 valueToTerm (VDef r x t _) = TDef r x Nothing t
 valueToTerm (VResult v _) = valueToTerm v
--- a/src/swarm-web/Swarm/Web.hs
+++ b/src/swarm-web/Swarm/Web.hs
@ -231,7 +231,7 @@ recogFoundHandler appStateRef = do
 codeRenderHandler :: Text -> Handler Text
 codeRenderHandler contents = do
  return $ case processTermEither contents of
-    Right (ProcessedTerm (Module stx@(Syntax' _srcLoc _term _) _) _ _) ->
+    Right (ProcessedTerm (Module stx@(Syntax' _srcLoc _term _ _) _) _ _) ->
      into @Text . drawTree . fmap (T.unpack . prettyTextLine) . para Node $ stx
    Left x -> x

--- a/swarm.cabal
+++ b/swarm.cabal
@ -369,6 +369,7 @@ library swarm-engine
    http-types >=0.12 && <0.13,
    lens >=4.19 && <5.4,
    linear >=1.21.6 && <1.24,
+    megaparsec >=9.6 && <9.7,
    mtl >=2.2.2 && <2.4,
    nonempty-containers >=0.3.4 && <0.3.5,
    prettyprinter >=1.7.0 && <1.8,
@ -775,6 +776,7 @@ test-suite swarm-unit
    TestLanguagePipeline
    TestNotification
    TestOrdering
+    TestParse
    TestPedagogy
    TestPretty
    TestRecipeCoverage
@ -792,6 +794,7 @@ test-suite swarm-unit
    filepath,
    hashable,
    lens,
+    megaparsec,
    mtl,
    tasty >=0.10 && <1.6,
    tasty-expected-failure >=0.12 && <0.13,
--- a/test/unit/Main.hs
+++ b/test/unit/Main.hs
@ -36,6 +36,7 @@ import TestLSP (testLSP)
 import TestLanguagePipeline (testLanguagePipeline)
 import TestNotification (testNotification)
 import TestOrdering (testOrdering)
+import TestParse (testParse)
 import TestPedagogy (testPedagogy)
 import TestPretty (testPrettyConst)
 import TestRecipeCoverage (testDeviceRecipeCoverage)
@ -55,6 +56,7 @@ tests s =
  testGroup
    "Tests"
    [ testLanguagePipeline
+    , testParse
    , testPrettyConst
    , testBoolExpr
    , testCommands
--- a/test/unit/TestCommand.hs
+++ b/test/unit/TestCommand.hs
@ -12,12 +12,12 @@ import Data.Text (Text)
 import Graphics.Vty.Input.Events qualified as V
 import Swarm.Game.Location
 import Swarm.Language.Key
-import Swarm.Language.Parse (runParser)
 import Swarm.Language.Syntax
 import Test.QuickCheck qualified as QC
 import Test.Tasty
 import Test.Tasty.HUnit
 import Test.Tasty.QuickCheck (testProperty)
+import Text.Megaparsec (runParser)
 import Witch

 testCommands :: TestTree
@ -47,7 +47,7 @@ testCommands =
        [ testGroup
            "Parsing"
            ( let parseKeyTest input mods k =
-                    assertEqual "" (runParser parseKeyCombo input) (Right (mkKeyCombo mods k))
+                    assertEqual "" (runParser parseKeyCombo "" input) (Right (mkKeyCombo mods k))
               in [ testCase "parse x" $ parseKeyTest "x" [] (V.KChar 'x')
                  , testCase "parse X" $ parseKeyTest "X" [] (V.KChar 'X')
                  , testCase "parse C" $ parseKeyTest "C" [] (V.KChar 'C')
@ -89,4 +89,4 @@ arbitraryModifiers = QC.sublistOf [V.MAlt, V.MCtrl, V.MMeta, V.MShift]

 prop_parse_pretty_key :: KeyCombo -> Bool
 prop_parse_pretty_key kc =
-  runParser parseKeyCombo (prettyKeyCombo kc) == Right kc
+  runParser parseKeyCombo "" (prettyKeyCombo kc) == Right kc
--- a/test/unit/TestLSP.hs
+++ b/test/unit/TestLSP.hs
@ -85,7 +85,7 @@ testLSP =
  getWarnings :: Text -> [UnusedVar]
  getWarnings content =
    case readTerm' content of
-      Right (Just term) -> map simplifyWarning problems
+      Right (Just term, _) -> map simplifyWarning problems
       where
        VU.Usage _ problems = VU.getUsage mempty term
      _ -> []
--- a/test/unit/TestParse.hs
+++ b/test/unit/TestParse.hs
@ -0,0 +1,66 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TypeApplications #-}
+
+-- |
+-- SPDX-License-Identifier: BSD-3-Clause
+--
+-- Swarm parser tests.
+module TestParse where
+
+import Data.Foldable qualified as F
+import Data.Text (Text)
+import Swarm.Language.Parse
+import Swarm.Language.Syntax
+import Test.Tasty
+import Test.Tasty.HUnit (Assertion, assertEqual, assertFailure, testCase)
+import Text.Megaparsec (errorBundlePretty)
+import Witch (into)
+
+testParse :: TestTree
+testParse =
+  testGroup
+    "Parser - comments"
+    [ testCase "none" $
+        expectParsedComments
+          "1 + 2"
+          []
+    , testCase "suffix" $
+        expectParsedComments
+          "1 + 2 // add"
+          [Comment (SrcLoc 6 12) LineComment SuffixComment " add"]
+    , testCase "standalone" $
+        expectParsedComments
+          "// add\n1 + 2"
+          [Comment (SrcLoc 0 6) LineComment StandaloneComment " add"]
+    , testCase "block suffix" $
+        expectParsedComments
+          "1 + 2 /* add */"
+          [Comment (SrcLoc 6 15) BlockComment SuffixComment " add "]
+    , testCase "block standalone" $
+        expectParsedComments
+          "/* add */\n1 + 2"
+          [Comment (SrcLoc 0 9) BlockComment StandaloneComment " add "]
+    , testCase "block prefix" $
+        expectParsedComments
+          "/* add */ 1 + 2"
+          [Comment (SrcLoc 0 9) BlockComment StandaloneComment " add "]
+    , testCase "block infix" $
+        expectParsedComments
+          "1 + /*add*/ 2"
+          [Comment (SrcLoc 4 11) BlockComment SuffixComment "add"]
+    , testCase "multiline block" $
+        expectParsedComments
+          "/* add \n  some numbers */\n  1 + 2"
+          [Comment (SrcLoc 0 25) BlockComment StandaloneComment " add \n  some numbers "]
+    , testCase "multiple lines" $
+        expectParsedComments
+          "// add\n// some numbers\n  1 + 2"
+          [ Comment (SrcLoc 0 6) LineComment StandaloneComment " add"
+          , Comment (SrcLoc 7 22) LineComment StandaloneComment " some numbers"
+          ]
+    ]
+
+expectParsedComments :: Text -> [Comment] -> Assertion
+expectParsedComments input ex = case readTerm' input of
+  Left err -> assertFailure (into @String $ errorBundlePretty err)
+  Right (_, res) -> assertEqual "Expected parsed comments" ex (F.toList res)