From 4bd409dd690f98d430eef7e94bccd958ca8b3d6d Mon Sep 17 00:00:00 2001 From: Brent Yorgey Date: Sat, 11 May 2024 19:32:21 -0500 Subject: [PATCH] Parse comments (#1838) Towards #1467. This is incomplete but I thought it would be useful to split up the work into multiple PRs to avoid having a really massive one at the end. This PR accomplishes a few things: - Creates a new data type `Comment` to record the text of a comment, along with a bit of metadata giving its source location, whether it is a line or block comment, and whether it was on a line by itself or at the end of a line with some other non-comment tokens (this information will be used to decide which AST node to associate the comment with). - Adds a field to store comments in every `Syntax'` node. Note this is currently unused, since we haven't yet implemented the logic to insert comments into appropriate AST nodes (that will come in a later PR). - Note it's a bit annoying that the number of fields of each `Syntax'` node is growing. I did look into consolidating and generalizing `Syntax'` to just have a single field for arbitrary annotations, but the changes required seemed annoying enough that I didn't want to bother. --- feedback.yaml | 2 +- src/swarm-doc/Swarm/Doc/Pedagogy.hs | 2 +- .../Swarm/Game/Scenario/Scoring/CodeSize.hs | 2 +- src/swarm-engine/Swarm/Game/State.hs | 2 +- src/swarm-engine/Swarm/Game/Step/Const.hs | 4 +- src/swarm-lang/Swarm/Language/Elaborate.hs | 10 +- src/swarm-lang/Swarm/Language/Key.hs | 18 +-- src/swarm-lang/Swarm/Language/LSP.hs | 4 +- src/swarm-lang/Swarm/Language/LSP/Hover.hs | 20 +-- src/swarm-lang/Swarm/Language/Parse.hs | 117 ++++++++++++++---- src/swarm-lang/Swarm/Language/Syntax.hs | 107 +++++++++++----- src/swarm-lang/Swarm/Language/Typecheck.hs | 68 +++++----- src/swarm-lang/Swarm/Language/Value.hs | 2 +- src/swarm-web/Swarm/Web.hs | 2 +- swarm.cabal | 3 + test/unit/Main.hs | 2 + test/unit/TestCommand.hs | 6 +- test/unit/TestLSP.hs | 2 +- test/unit/TestParse.hs | 66 ++++++++++ 19 files changed, 315 insertions(+), 124 deletions(-) create mode 100644 test/unit/TestParse.hs diff --git a/feedback.yaml b/feedback.yaml index cce47722..e40024d7 100644 --- a/feedback.yaml +++ b/feedback.yaml @@ -1,3 +1,3 @@ loops: test: cabal test -j -O0 --test-show-details=direct swarm:swarm-integration swarm:swarm-unit - unit: cabal test -j -O0 --test-show-details=direct swarm:swarm-unit --fast + unit: cabal test -j -O0 --test-show-details=direct swarm:swarm-unit diff --git a/src/swarm-doc/Swarm/Doc/Pedagogy.hs b/src/swarm-doc/Swarm/Doc/Pedagogy.hs index e2f38447..0be6564a 100644 --- a/src/swarm-doc/Swarm/Doc/Pedagogy.hs +++ b/src/swarm-doc/Swarm/Doc/Pedagogy.hs @@ -136,7 +136,7 @@ getCommands (Just (ProcessedTerm (Module stx _) _ _)) = where nodelist :: [Syntax' Polytype] nodelist = universe stx - isCommand (Syntax' sloc t _) = case t of + isCommand (Syntax' sloc t _ _) = case t of TConst c -> guard (isConsidered c) >> Just (c, [sloc]) _ -> Nothing diff --git a/src/swarm-engine/Swarm/Game/Scenario/Scoring/CodeSize.hs b/src/swarm-engine/Swarm/Game/Scenario/Scoring/CodeSize.hs index 6bbfc9ce..7cf98412 100644 --- a/src/swarm-engine/Swarm/Game/Scenario/Scoring/CodeSize.hs +++ b/src/swarm-engine/Swarm/Game/Scenario/Scoring/CodeSize.hs @@ -29,7 +29,7 @@ codeMetricsFromSyntax :: Data a => Syntax' a -> ScenarioCodeMetrics -codeMetricsFromSyntax s@(Syntax' srcLoc _ _) = +codeMetricsFromSyntax s@(Syntax' srcLoc _ _ _) = ScenarioCodeMetrics (charCount srcLoc) (measureAstSize s) where charCount :: SrcLoc -> Int diff --git a/src/swarm-engine/Swarm/Game/State.hs b/src/swarm-engine/Swarm/Game/State.hs index aad43daf..e5ce39a2 100644 --- a/src/swarm-engine/Swarm/Game/State.hs +++ b/src/swarm-engine/Swarm/Game/State.hs @@ -167,7 +167,7 @@ parseCodeFile :: m CodeToRun parseCodeFile filepath = do contents <- sendIO $ TIO.readFile filepath - pt@(ProcessedTerm (Module (Syntax' srcLoc _ _) _) _ _) <- + pt@(ProcessedTerm (Module (Syntax' srcLoc _ _ _) _) _ _) <- either (throwError . CustomFailure) return (processTermEither contents) let strippedText = stripSrc srcLoc contents programBytestring = TL.encodeUtf8 $ TL.fromStrict strippedText diff --git a/src/swarm-engine/Swarm/Game/Step/Const.hs b/src/swarm-engine/Swarm/Game/Step/Const.hs index bd48930b..8b28441e 100644 --- a/src/swarm-engine/Swarm/Game/Step/Const.hs +++ b/src/swarm-engine/Swarm/Game/Step/Const.hs @@ -89,7 +89,6 @@ import Swarm.Game.Value import Swarm.Language.Capability import Swarm.Language.Context hiding (delete) import Swarm.Language.Key (parseKeyComboFull) -import Swarm.Language.Parse (runParser) import Swarm.Language.Pipeline import Swarm.Language.Pretty (prettyText) import Swarm.Language.Requirement qualified as R @@ -100,6 +99,7 @@ import Swarm.Log import Swarm.Util hiding (both) import Swarm.Util.Effect (throwToMaybe) import Swarm.Util.Lens (inherit) +import Text.Megaparsec (runParser) import Witch (From (from), into) import Prelude hiding (Applicative (..), lookup) @@ -1001,7 +1001,7 @@ execConst runChildProg c vs s k = do [VText msg] -> return $ Up (User msg) s k _ -> badConst Key -> case vs of - [VText ktxt] -> case runParser parseKeyComboFull ktxt of + [VText ktxt] -> case runParser parseKeyComboFull "" ktxt of Right kc -> return $ Out (VKey kc) s k Left _ -> return $ Up (CmdFailed Key (T.unwords ["Unknown key", quote ktxt]) Nothing) s k _ -> badConst diff --git a/src/swarm-lang/Swarm/Language/Elaborate.hs b/src/swarm-lang/Swarm/Language/Elaborate.hs index e48a6250..e4b7b476 100644 --- a/src/swarm-lang/Swarm/Language/Elaborate.hs +++ b/src/swarm-lang/Swarm/Language/Elaborate.hs @@ -22,12 +22,12 @@ elaborate = -- Wrap all *free* variables in 'Force'. Free variables must be -- referring to a previous definition, which are all wrapped in -- 'TDelay'. - (freeVarsS %~ \s -> Syntax' (s ^. sLoc) (SApp sForce s) (s ^. sType)) + (freeVarsS %~ \s -> Syntax' (s ^. sLoc) (SApp sForce s) (s ^. sComments) (s ^. sType)) -- Now do additional rewriting on all subterms. . transform rewrite where rewrite :: Syntax' Polytype -> Syntax' Polytype - rewrite (Syntax' l t ty) = Syntax' l (rewriteTerm t) ty + rewrite (Syntax' l t ty cs) = Syntax' l (rewriteTerm t) ty cs rewriteTerm :: Term' Polytype -> Term' Polytype rewriteTerm = \case @@ -43,14 +43,14 @@ elaborate = -- bound by 'def'. SDef True x ty t1 -> SDef True x ty (wrapForce (lvVar x) t1) -- Rewrite @f $ x@ to @f x@. - SApp (Syntax' _ (SApp (Syntax' _ (TConst AppF) _) l) _) r -> SApp l r + SApp (Syntax' _ (SApp (Syntax' _ (TConst AppF) _ _) l) _ _) r -> SApp l r -- Leave any other subterms alone. t -> t wrapForce :: Var -> Syntax' Polytype -> Syntax' Polytype -wrapForce x = mapFreeS x (\s@(Syntax' l _ ty) -> Syntax' l (SApp sForce s) ty) +wrapForce x = mapFreeS x (\s@(Syntax' l _ ty cs) -> Syntax' l (SApp sForce s) ty cs) -- Note, TyUnit is not the right type, but I don't want to bother sForce :: Syntax' Polytype -sForce = Syntax' NoLoc (TConst Force) (Forall ["a"] (TyDelay (TyVar "a") :->: TyVar "a")) +sForce = Syntax' NoLoc (TConst Force) Nothing (Forall ["a"] (TyDelay (TyVar "a") :->: TyVar "a")) diff --git a/src/swarm-lang/Swarm/Language/Key.hs b/src/swarm-lang/Swarm/Language/Key.hs index 4f9471da..788c6ba2 100644 --- a/src/swarm-lang/Swarm/Language/Key.hs +++ b/src/swarm-lang/Swarm/Language/Key.hs @@ -26,9 +26,9 @@ import Data.Set (Set) import Data.Set qualified as S import Data.Text (Text) import Data.Text qualified as T +import Data.Void import GHC.Generics hiding (from) import Graphics.Vty.Input.Events qualified as V -import Swarm.Language.Parse import Text.Megaparsec import Text.Megaparsec.Char (char, string) import Text.Megaparsec.Char.Lexer (decimal) @@ -47,27 +47,29 @@ deriving instance FromJSON V.Modifier deriving instance ToJSON V.Key deriving instance ToJSON V.Modifier +type SParser = Parsec Void Text + -- | Smart constructor for 'KeyCombo'. mkKeyCombo :: [V.Modifier] -> V.Key -> KeyCombo mkKeyCombo mods k = KeyCombo k (sort mods) -- | Parse a key combo with nothing after it. -parseKeyComboFull :: Parser KeyCombo +parseKeyComboFull :: SParser KeyCombo parseKeyComboFull = parseKeyCombo <* eof -- | Parse a key combo like @\"M-C-F5\"@, @\"Down\"@, or @\"C-x\"@. -parseKeyCombo :: Parser KeyCombo +parseKeyCombo :: SParser KeyCombo parseKeyCombo = mkKeyCombo <$> many (try (parseModifier <* char '-')) <*> parseKey -parseModifier :: Parser V.Modifier +parseModifier :: SParser V.Modifier parseModifier = V.MShift <$ string "S" <|> V.MCtrl <$ string "C" <|> V.MMeta <$ string "M" <|> V.MAlt <$ string "A" -parseKey :: Parser V.Key +parseKey :: SParser V.Key parseKey = -- For an explanation of the 'reverse', see Note [Key names are not prefix-free] (asum . map specialKeyParser . reverse . S.toList $ specialKeyNames) @@ -90,13 +92,13 @@ parseKey = -- of key names (which are sorted alphabetically), it guarantees that -- longer names will come before names which are prefixes of them. -parseFunctionKey :: Parser V.Key +parseFunctionKey :: SParser V.Key parseFunctionKey = V.KFun <$> try (char 'F' *> decimal) -parseCharKey :: Parser V.Key +parseCharKey :: SParser V.Key parseCharKey = V.KChar <$> anySingle -specialKeyParser :: Text -> Parser V.Key +specialKeyParser :: Text -> SParser V.Key specialKeyParser t = read . ('K' :) . from @Text <$> string t -- https://stackoverflow.com/questions/51848587/list-constructor-names-using-generics-in-haskell diff --git a/src/swarm-lang/Swarm/Language/LSP.hs b/src/swarm-lang/Swarm/Language/LSP.hs index 69927e7a..0ad1b976 100644 --- a/src/swarm-lang/Swarm/Language/LSP.hs +++ b/src/swarm-lang/Swarm/Language/LSP.hs @@ -81,8 +81,8 @@ validateSwarmCode doc version content = do flushDiagnosticsBySource 0 (Just diagnosticSourcePrefix) let (parsingErrs, unusedVarWarnings) = case readTerm' content of - Right Nothing -> ([], []) - Right (Just term) -> (parsingErrors, unusedWarnings) + Right (Nothing, _) -> ([], []) + Right (Just term, _) -> (parsingErrors, unusedWarnings) where VU.Usage _ problems = VU.getUsage mempty term unusedWarnings = mapMaybe (VU.toErrPos content) problems diff --git a/src/swarm-lang/Swarm/Language/LSP/Hover.hs b/src/swarm-lang/Swarm/Language/LSP/Hover.hs index 031c0753..d7b02cb5 100644 --- a/src/swarm-lang/Swarm/Language/LSP/Hover.hs +++ b/src/swarm-lang/Swarm/Language/LSP/Hover.hs @@ -60,14 +60,14 @@ showHoverInfo :: showHoverInfo _ p vf@(VirtualFile _ _ myRope) = case readTerm' content of Left _ -> Nothing - Right Nothing -> Nothing - Right (Just stx) -> Just $ case processParsedTerm stx of + Right (Nothing, _) -> Nothing + Right (Just stx, _) -> Just $ case processParsedTerm stx of Left _e -> let found@(Syntax foundSloc _) = narrowToPosition stx $ fromIntegral absolutePos finalPos = posToRange myRope foundSloc in (,finalPos) . treeToMarkdown 0 $ explain found Right (ProcessedTerm modul _req _reqCtx) -> - let found@(Syntax' foundSloc _ _) = narrowToPosition (moduleAST modul) $ fromIntegral absolutePos + let found@(Syntax' foundSloc _ _ _) = narrowToPosition (moduleAST modul) $ fromIntegral absolutePos finalPos = posToRange myRope foundSloc in (,finalPos) . treeToMarkdown 0 $ explain found where @@ -94,7 +94,7 @@ descend :: -- | next element to inspect Syntax' ty -> Maybe (Syntax' ty) -descend pos s1@(Syntax' l1 _ _) = do +descend pos s1@(Syntax' l1 _ _ _) = do guard $ withinBound pos l1 return $ narrowToPosition s1 pos @@ -107,12 +107,12 @@ narrowToPosition :: -- | absolute offset within the file Int -> Syntax' ty -narrowToPosition s0@(Syntax' _ t ty) pos = fromMaybe s0 $ case t of +narrowToPosition s0@(Syntax' _ t _ ty) pos = fromMaybe s0 $ case t of SLam lv _ s -> d (locVarToSyntax' lv $ getInnerType ty) <|> d s SApp s1 s2 -> d s1 <|> d s2 - SLet _ lv _ s1@(Syntax' _ _ lty) s2 -> d (locVarToSyntax' lv lty) <|> d s1 <|> d s2 - SDef _ lv _ s@(Syntax' _ _ lty) -> d (locVarToSyntax' lv lty) <|> d s - SBind mlv s1@(Syntax' _ _ lty) s2 -> (mlv >>= d . flip locVarToSyntax' (getInnerType lty)) <|> d s1 <|> d s2 + SLet _ lv _ s1@(Syntax' _ _ _ lty) s2 -> d (locVarToSyntax' lv lty) <|> d s1 <|> d s2 + SDef _ lv _ s@(Syntax' _ _ _ lty) -> d (locVarToSyntax' lv lty) <|> d s + SBind mlv s1@(Syntax' _ _ _ lty) s2 -> (mlv >>= d . flip locVarToSyntax' (getInnerType lty)) <|> d s1 <|> d s2 SPair s1 s2 -> d s1 <|> d s2 SDelay _ s -> d s SRcd m -> asum . map d . catMaybes . M.elems $ m @@ -244,8 +244,8 @@ explain trm = case trm ^. sTerm of explainFunction :: ExplainableType ty => Syntax' ty -> Tree Text explainFunction s = case unfoldApps s of - (Syntax' _ (TConst Force) _ :| [innerT]) -> explain innerT - (Syntax' _ (TConst Force) _ :| f : params) -> explainF f params + (Syntax' _ (TConst Force) _ _ :| [innerT]) -> explain innerT + (Syntax' _ (TConst Force) _ _ :| f : params) -> explainF f params (f :| params) -> explainF f params where explainF f params = diff --git a/src/swarm-lang/Swarm/Language/Parse.hs b/src/swarm-lang/Swarm/Language/Parse.hs index 4b80bea5..f80978c7 100644 --- a/src/swarm-lang/Swarm/Language/Parse.hs +++ b/src/swarm-lang/Swarm/Language/Parse.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE ViewPatterns #-} @@ -33,13 +34,15 @@ module Swarm.Language.Parse ( unTuple, ) where -import Control.Lens (view, (^.)) -import Control.Monad (guard, join) +import Control.Arrow (right) +import Control.Lens (makeLenses, use, view, (%=), (.=), (^.)) +import Control.Monad (guard, join, void) import Control.Monad.Combinators.Expr import Control.Monad.Reader ( MonadReader (ask), ReaderT (runReaderT), ) +import Control.Monad.State (StateT, runStateT) import Data.Bifunctor import Data.Foldable (asum) import Data.List (foldl', nub) @@ -47,8 +50,11 @@ import Data.List.NonEmpty qualified (head) import Data.Map.Strict (Map) import Data.Map.Strict qualified as Map import Data.Maybe (fromMaybe, mapMaybe) +import Data.Sequence (Seq) +import Data.Sequence qualified as Seq import Data.Set qualified as S import Data.Set.Lens (setOf) +import Data.String (fromString) import Data.Text (Text, index, toLower) import Data.Text qualified as T import Data.Void @@ -76,7 +82,22 @@ import Witch data Antiquoting = AllowAntiquoting | DisallowAntiquoting deriving (Eq, Ord, Show) -type Parser = ReaderT Antiquoting (Parsec Void Text) +data CommentState = CS + { _freshLine :: Bool + -- ^ Are we currently on a (so far) blank line, i.e. have there been + -- no nontrivial tokens since the most recent newline? This field + -- is updated every time we parse a lexeme or symbol (set to + -- false), or a newline (set to true). + , _comments :: Seq Comment + -- ^ The actual sequence of comments, in the order they were encountered + } + +makeLenses ''CommentState + +initCommentState :: CommentState +initCommentState = CS {_freshLine = True, _comments = Seq.empty} + +type Parser = ReaderT Antiquoting (StateT CommentState (Parsec Void Text)) type ParserError = ParseErrorBundle Text Void @@ -109,24 +130,61 @@ reservedWords = , "requirements" ] +-- Approach for preserving comments taken from https://www.reddit.com/r/haskell/comments/ni4gpm/comment/gz0ipmp/ + +-- | If we see a comment starting now, is it the first non-whitespace +-- thing on the current line so far, or were there other +-- non-whitespace tokens previously? +getCommentSituation :: Parser CommentSituation +getCommentSituation = do + fl <- use freshLine + return $ if fl then StandaloneComment else SuffixComment + +-- | Parse a line comment, while appending it out-of-band to the list of +-- comments saved in the custom state. +lineComment :: Text -> Parser () +lineComment start = do + cs <- getCommentSituation + (loc, t) <- parseLocG $ do + string start *> takeWhileP (Just "character") (/= '\n') + comments %= (Seq.|> Comment loc LineComment cs t) + +-- | Parse a block comment, while appending it out-of-band to the list of +-- comments saved in the custom state. +blockComment :: Text -> Text -> Parser () +blockComment start end = do + cs <- getCommentSituation + (loc, t) <- parseLocG $ do + void $ string start + manyTill anySingle (string end) + comments %= (Seq.|> Comment loc BlockComment cs (fromString t)) + -- | Skip spaces and comments. sc :: Parser () sc = - L.space - space1 - (L.skipLineComment "//") - (L.skipBlockComment "/*" "*/") + -- Typically we would use L.space here, but we have to inline its + -- definition and use our own slight variant, since we need to treat + -- end-of-line specially. + skipMany . choice . map hidden $ + [ hspace1 + , eol *> (freshLine .= True) -- If we see a newline, reset freshLine to True. + , lineComment "//" + , blockComment "/*" "*/" + ] -- | In general, we follow the convention that every token parser -- assumes no leading whitespace and consumes all trailing -- whitespace. Concretely, we achieve this by wrapping every token -- parser using 'lexeme'. +-- +-- Also sets freshLine to False every time we see a non-whitespace +-- token. lexeme :: Parser a -> Parser a -lexeme = L.lexeme sc +lexeme p = (freshLine .= False) *> L.lexeme sc p -- | A lexeme consisting of a literal string. symbol :: Text -> Parser Text -symbol = L.symbol sc +symbol s = (freshLine .= False) *> L.symbol sc s -- | Parse a case-insensitive reserved word, making sure it is not a -- prefix of a longer variable name, and allowing the parser to @@ -329,7 +387,7 @@ mkTuple (x : xs) = let r = mkTuple xs in loc x r $ SPair x r unTuple :: Syntax' ty -> [Syntax' ty] unTuple = \case - Syntax' _ (SPair s1 s2) _ -> s1 : unTuple s2 + Syntax' _ (SPair s1 s2) _ _ -> s1 : unTuple s2 s -> [s] -- | Construct an 'SLet', automatically filling in the Boolean field @@ -473,19 +531,31 @@ operatorSymbol = T.singleton <$> oneOf opChars -------------------------------------------------- -- Utilities --- | Run a parser on some input text, returning either the result or a --- pretty-printed parse error message. -runParser :: Parser a -> Text -> Either Text a -runParser p t = first (from . errorBundlePretty) (parse (runReaderT p DisallowAntiquoting) "" t) +-- | Run a parser on some input text, returning either the result + +-- all collected comments, or a pretty-printed parse error message. +runParser :: Parser a -> Text -> Either Text (a, Seq Comment) +runParser p t = + first (from . errorBundlePretty) + . (\pt -> parse pt "" t) + . fmap (second (^. comments)) + . flip runStateT initCommentState + . flip runReaderT DisallowAntiquoting + $ p -- | A utility for running a parser in an arbitrary 'MonadFail' (which -- is going to be the TemplateHaskell 'Language.Haskell.TH.Q' monad --- see -- "Swarm.Language.Parse.QQ"), with a specified source position. runParserTH :: (Monad m, MonadFail m) => (String, Int, Int) -> Parser a -> String -> m a runParserTH (file, line, col) p s = - case snd (runParser' (runReaderT (fully sc p) AllowAntiquoting) initState) of - Left err -> fail $ errorBundlePretty err - Right e -> return e + let (_, res) = + flip runParser' initState + . flip runStateT initCommentState + . flip runReaderT AllowAntiquoting + . fully sc + $ p + in case res of + Left err -> fail $ errorBundlePretty err + Right e -> return $ fst e where initState :: State Text Void initState = @@ -502,12 +572,17 @@ runParserTH (file, line, col) p s = -- 'Nothing' if the input was only whitespace) or a pretty-printed -- parse error message. readTerm :: Text -> Either Text (Maybe Syntax) -readTerm = runParser (fullyMaybe sc parseTerm) +readTerm = right fst . runParser (fullyMaybe sc parseTerm) -- | A lower-level `readTerm` which returns the megaparsec bundle error --- for precise error reporting. -readTerm' :: Text -> Either ParserError (Maybe Syntax) -readTerm' = parse (runReaderT (fullyMaybe sc parseTerm) DisallowAntiquoting) "" +-- for precise error reporting, as well as the parsed comments. +readTerm' :: Text -> Either ParserError (Maybe Syntax, Seq Comment) +readTerm' t = + (\pt -> parse pt "" t) + . fmap (second (^. comments)) + . flip runStateT initCommentState + . flip runReaderT DisallowAntiquoting + $ fullyMaybe sc parseTerm -- | A utility for converting a ParserError into a one line message: -- @: @ diff --git a/src/swarm-lang/Swarm/Language/Syntax.hs b/src/swarm-lang/Swarm/Language/Syntax.hs index a80a33bd..d6d882e6 100644 --- a/src/swarm-lang/Swarm/Language/Syntax.hs +++ b/src/swarm-lang/Swarm/Language/Syntax.hs @@ -50,8 +50,10 @@ module Swarm.Language.Syntax ( sLoc, sTerm, sType, + sComments, Syntax, pattern Syntax, + pattern CSyntax, LocVar (..), SrcLoc (..), noLoc, @@ -69,6 +71,11 @@ module Swarm.Language.Syntax ( pattern TProj, pattern TAnnotate, + -- * Comments + CommentType (..), + CommentSituation (..), + Comment (..), + -- * Terms Var, DelayType (..), @@ -100,6 +107,7 @@ import Data.Int (Int32) import Data.List.NonEmpty (NonEmpty) import Data.List.NonEmpty qualified as NonEmpty import Data.Map.Strict (Map) +import Data.Sequence (Seq) import Data.Set (Set) import Data.Set qualified as S import Data.Set qualified as Set @@ -1008,7 +1016,7 @@ data LocVar = LV {lvSrcLoc :: SrcLoc, lvVar :: Var} deriving (Eq, Ord, Show, Data, Generic, FromJSON, ToJSON) locVarToSyntax' :: LocVar -> ty -> Syntax' ty -locVarToSyntax' (LV s v) = Syntax' s (TVar v) +locVarToSyntax' (LV s v) = Syntax' s (TVar v) Nothing -- | Terms of the Swarm language. data Term' ty @@ -1113,6 +1121,7 @@ instance Data ty => Plated (Term' ty) where data Syntax' ty = Syntax' { _sLoc :: SrcLoc , _sTerm :: Term' ty + , _sComments :: Maybe (Seq Comment) , _sType :: ty } deriving (Eq, Show, Functor, Foldable, Traversable, Data, Generic, FromJSON, ToJSON) @@ -1134,23 +1143,56 @@ instance Semigroup SrcLoc where instance Monoid SrcLoc where mempty = NoLoc +------------------------------------------------------------ +-- Comments +------------------------------------------------------------ + +-- | Line vs block comments. +data CommentType = LineComment | BlockComment + deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Data, ToJSON, FromJSON) + +-- | Was a comment all by itself on a line, or did it occur after some +-- other tokens on a line? +data CommentSituation = StandaloneComment | SuffixComment + deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Data, ToJSON, FromJSON) + +-- | A comment is retained as some text plus metadata (source +-- location, comment type, + comment situation). While parsing we +-- record all comments out-of-band, for later re-insertion into the +-- AST. +data Comment = Comment + { commentSrcLoc :: SrcLoc + , commentType :: CommentType + , commentSituation :: CommentSituation + , commentText :: Text + } + deriving (Eq, Show, Generic, Data, ToJSON, FromJSON) + ------------------------------------------------------------ -- Pattern synonyms for untyped terms ------------------------------------------------------------ +-- | Syntax without type annotations. type Syntax = Syntax' () +-- | Raw parsed syntax, without comments or type annotations. pattern Syntax :: SrcLoc -> Term -> Syntax -pattern Syntax l t = Syntax' l t () +pattern Syntax l t = Syntax' l t Nothing () {-# COMPLETE Syntax #-} +-- | Untyped syntax with assocated comments. +pattern CSyntax :: SrcLoc -> Term -> Maybe (Seq Comment) -> Syntax +pattern CSyntax l t cs = Syntax' l t cs () + +{-# COMPLETE CSyntax #-} + makeLenses ''Syntax' noLoc :: Term -> Syntax noLoc = Syntax mempty --- | Match an untyped term without its 'SrcLoc'. +-- | Match an untyped term without annotations. pattern STerm :: Term -> Syntax pattern STerm t <- Syntax _ t @@ -1160,17 +1202,17 @@ pattern STerm t <- pattern TRequirements :: Text -> Term -> Term pattern TRequirements x t = SRequirements x (STerm t) --- | Match a TPair without syntax +-- | Match a TPair without annotations. pattern TPair :: Term -> Term -> Term pattern TPair t1 t2 = SPair (STerm t1) (STerm t2) --- | Match a TLam without syntax +-- | Match a TLam without annotations. pattern TLam :: Var -> Maybe Type -> Term -> Term pattern TLam v ty t <- SLam (lvVar -> v) ty (STerm t) where TLam v ty t = SLam (LV NoLoc v) ty (STerm t) --- | Match a TApp without syntax +-- | Match a TApp without annotations. pattern TApp :: Term -> Term -> Term pattern TApp t1 t2 = SApp (STerm t1) (STerm t2) @@ -1180,29 +1222,29 @@ infixl 0 :$: pattern (:$:) :: Term -> Syntax -> Term pattern (:$:) t1 s2 = SApp (STerm t1) s2 --- | Match a TLet without syntax +-- | Match a TLet without annotations. pattern TLet :: Bool -> Var -> Maybe Polytype -> Term -> Term -> Term pattern TLet r v pt t1 t2 <- SLet r (lvVar -> v) pt (STerm t1) (STerm t2) where TLet r v pt t1 t2 = SLet r (LV NoLoc v) pt (STerm t1) (STerm t2) --- | Match a TDef without syntax +-- | Match a TDef without annotations. pattern TDef :: Bool -> Var -> Maybe Polytype -> Term -> Term pattern TDef r v pt t <- SDef r (lvVar -> v) pt (STerm t) where TDef r v pt t = SDef r (LV NoLoc v) pt (STerm t) --- | Match a TBind without syntax +-- | Match a TBind without annotations. pattern TBind :: Maybe Var -> Term -> Term -> Term pattern TBind mv t1 t2 <- SBind (fmap lvVar -> mv) (STerm t1) (STerm t2) where TBind mv t1 t2 = SBind (LV NoLoc <$> mv) (STerm t1) (STerm t2) --- | Match a TDelay without syntax +-- | Match a TDelay without annotations. pattern TDelay :: DelayType -> Term -> Term pattern TDelay m t = SDelay m (STerm t) --- | Match a TRcd without syntax +-- | Match a TRcd without annotations. pattern TRcd :: Map Var (Maybe Term) -> Term pattern TRcd m <- SRcd ((fmap . fmap) _sTerm -> m) where @@ -1211,15 +1253,16 @@ pattern TRcd m <- SRcd ((fmap . fmap) _sTerm -> m) pattern TProj :: Term -> Var -> Term pattern TProj t x = SProj (STerm t) x --- | Match a TAnnotate without syntax +-- | Match a TAnnotate without annotations. pattern TAnnotate :: Term -> Polytype -> Term pattern TAnnotate t pt = SAnnotate (STerm t) pt --- | COMPLETE pragma tells GHC using this set of pattern is complete for Term +-- COMPLETE pragma tells GHC using this set of patterns is complete for Term + {-# COMPLETE TUnit, TConst, TDir, TInt, TAntiInt, TText, TAntiText, TBool, TRequireDevice, TRequire, TRequirements, TVar, TPair, TLam, TApp, TLet, TDef, TBind, TDelay, TRcd, TProj, TAnnotate #-} --- | Make infix operation (e.g. @2 + 3@) a curried function --- application (@((+) 2) 3@). +-- | Make an infix operation (e.g. @2 + 3@) a curried function +-- application (e.g. @((+) 2) 3@). mkOp :: Const -> Syntax -> Syntax -> Syntax mkOp c s1@(Syntax l1 _) s2@(Syntax l2 _) = Syntax newLoc newTerm where @@ -1230,7 +1273,7 @@ mkOp c s1@(Syntax l1 _) s2@(Syntax l2 _) = Syntax newLoc newTerm sop = noLoc (TConst c) newTerm = SApp (Syntax l1 $ SApp sop s1) s2 --- | Make infix operation, discarding any syntax related location +-- | Make an infix operation, discarding any location information mkOp' :: Const -> Term -> Term -> Term mkOp' c t1 = TApp (TApp (TConst c) t1) @@ -1244,16 +1287,16 @@ mkOp' c t1 = TApp (TApp (TConst c) t1) -- TConst Mul :| [TInt 1,TInt 2] unfoldApps :: Syntax' ty -> NonEmpty (Syntax' ty) unfoldApps trm = NonEmpty.reverse . flip NonEmpty.unfoldr trm $ \case - Syntax' _ (SApp s1 s2) _ -> (s2, Just s1) + Syntax' _ (SApp s1 s2) _ _ -> (s2, Just s1) s -> (s, Nothing) -------------------------------------------------- -- Erasure --- | Erase a 'Syntax' tree annotated with type --- information to a bare unannotated 'Term'. +-- | Erase a 'Syntax' tree annotated with type and comment information +-- to a bare unannotated 'Term'. eraseS :: Syntax' ty -> Term -eraseS (Syntax' _ t _) = void t +eraseS (Syntax' _ t _ _) = void t ------------------------------------------------------------ -- Free variable traversals @@ -1270,7 +1313,7 @@ freeVarsS :: forall ty. Traversal' (Syntax' ty) (Syntax' ty) freeVarsS f = go S.empty where -- go :: Applicative f => Set Var -> Syntax' ty -> f (Syntax' ty) - go bound s@(Syntax' l t ty) = case t of + go bound s@(Syntax' l t ty cmts) = case t of TUnit -> pure s TConst {} -> pure s TDir {} -> pure s @@ -1300,34 +1343,34 @@ freeVarsS f = go S.empty SProj s1 x -> rewrap $ SProj <$> go bound s1 <*> pure x SAnnotate s1 pty -> rewrap $ SAnnotate <$> go bound s1 <*> pure pty where - rewrap s' = Syntax' l <$> s' <*> pure ty + rewrap s' = Syntax' l <$> s' <*> pure ty <*> pure cmts -- | Like 'freeVarsS', but traverse over the 'Term's containing free -- variables. More direct if you don't need to know the types or -- source locations of the variables. Note that if you want to get --- the list of all `Term`s representing free variables, you can do so via --- @'toListOf' 'freeVarsT'@. +-- the list of all `Term`s representing free variables, you can do +-- so via @'toListOf' 'freeVarsT'@. freeVarsT :: forall ty. Traversal' (Syntax' ty) (Term' ty) freeVarsT = freeVarsS . sTerm -- | Traversal over the free variables of a term. Like 'freeVarsS' --- and 'freeVarsT', but traverse over the variable names --- themselves. Note that if you want to get the set of all free --- variable names, you can do so via @'Data.Set.Lens.setOf' --- 'freeVarsV'@. +-- and 'freeVarsT', but traverse over the variable names themselves. +-- Note that if you want to get the set of all free variable names, +-- you can do so via @'Data.Set.Lens.setOf' 'freeVarsV'@. freeVarsV :: Traversal' (Syntax' ty) Var freeVarsV = freeVarsT . (\f -> \case TVar x -> TVar <$> f x; t -> pure t) --- | Apply a function to all free occurrences of a particular variable. +-- | Apply a function to all free occurrences of a particular +-- variable. mapFreeS :: Var -> (Syntax' ty -> Syntax' ty) -> Syntax' ty -> Syntax' ty mapFreeS x f = freeVarsS %~ (\t -> case t ^. sTerm of TVar y | y == x -> f t; _ -> t) --- | Transform the AST into a Tree datatype. --- Useful for pretty-printing (e.g. via "Data.Tree.drawTree"). +-- | Transform the AST into a Tree datatype. Useful for +-- pretty-printing (e.g. via "Data.Tree.drawTree"). asTree :: Data a => Syntax' a -> Tree (Syntax' a) asTree = para Node -- | Each constructor is a assigned a value of 1, plus --- any recursive syntax it entails. +-- any recursive syntax it entails. measureAstSize :: Data a => Syntax' a -> Int measureAstSize = length . universe diff --git a/src/swarm-lang/Swarm/Language/Typecheck.hs b/src/swarm-lang/Swarm/Language/Typecheck.hs index 2fe99182..f7501c48 100644 --- a/src/swarm-lang/Swarm/Language/Typecheck.hs +++ b/src/swarm-lang/Swarm/Language/Typecheck.hs @@ -332,7 +332,7 @@ instance (HasBindings u, Data u) => HasBindings (Term' u) where applyBindings = gmapM (mkM (applyBindings @(Syntax' u))) instance (HasBindings u, Data u) => HasBindings (Syntax' u) where - applyBindings (Syntax' l t u) = Syntax' l <$> applyBindings t <*> applyBindings u + applyBindings (Syntax' l t cs u) = Syntax' l <$> applyBindings t <*> pure cs <*> applyBindings u instance HasBindings UModule where applyBindings (Module u uctx) = Module <$> applyBindings u <*> applyBindings uctx @@ -556,7 +556,7 @@ inferModule :: ) => Syntax -> m UModule -inferModule s@(Syntax l t) = addLocToTypeErr l $ case t of +inferModule s@(CSyntax l t cs) = addLocToTypeErr l $ case t of -- For definitions with no type signature, make up a fresh type -- variable for the body, infer the body under an extended context, -- and unify the two. Then generalize the type and return an @@ -566,7 +566,7 @@ inferModule s@(Syntax l t) = addLocToTypeErr l $ case t of t1' <- withBinding (lvVar x) (Forall [] xTy) $ infer t1 _ <- unify (Just t1) (joined xTy (t1' ^. sType)) pty <- generalize (t1' ^. sType) - return $ Module (Syntax' l (SDef r x Nothing t1') (UTyCmd UTyUnit)) (singleton (lvVar x) pty) + return $ Module (Syntax' l (SDef r x Nothing t1') cs (UTyCmd UTyUnit)) (singleton (lvVar x) pty) -- If a (poly)type signature has been provided, skolemize it and -- check the definition. @@ -574,7 +574,7 @@ inferModule s@(Syntax l t) = addLocToTypeErr l $ case t of let upty = toU pty uty <- skolemize upty t1' <- withBinding (lvVar x) upty $ check t1 uty - return $ Module (Syntax' l (SDef r x (Just pty) t1') (UTyCmd UTyUnit)) (singleton (lvVar x) upty) + return $ Module (Syntax' l (SDef r x (Just pty) t1') cs (UTyCmd UTyUnit)) (singleton (lvVar x) upty) -- To handle a 'TBind', infer the types of both sides, combining the -- returned modules appropriately. Have to be careful to use the @@ -624,7 +624,7 @@ inferModule s@(Syntax l t) = addLocToTypeErr l $ case t of let ctxX = maybe Ctx.empty ((`Ctx.singleton` genA) . lvVar) mx return $ Module - (Syntax' l (SBind mx c1' c2') (c2' ^. sType)) + (Syntax' l (SBind mx c1' c2') cs (c2' ^. sType)) (ctx1 `Ctx.union` ctxX `Ctx.union` ctx2) -- In all other cases, there can no longer be any definitions in the @@ -648,31 +648,31 @@ infer :: ) => Syntax -> m (Syntax' UType) -infer s@(Syntax l t) = addLocToTypeErr l $ case t of +infer s@(CSyntax l t cs) = addLocToTypeErr l $ case t of -- Primitives, i.e. things for which we immediately know the only -- possible correct type, and knowing an expected type would provide -- no extra information. - TUnit -> return $ Syntax' l TUnit UTyUnit - TConst c -> Syntax' l (TConst c) <$> (instantiate . toU $ inferConst c) - TDir d -> return $ Syntax' l (TDir d) UTyDir - TInt n -> return $ Syntax' l (TInt n) UTyInt - TAntiInt x -> return $ Syntax' l (TAntiInt x) UTyInt - TText x -> return $ Syntax' l (TText x) UTyText - TAntiText x -> return $ Syntax' l (TAntiText x) UTyText - TBool b -> return $ Syntax' l (TBool b) UTyBool - TRobot r -> return $ Syntax' l (TRobot r) UTyActor - TRequireDevice d -> return $ Syntax' l (TRequireDevice d) (UTyCmd UTyUnit) - TRequire n d -> return $ Syntax' l (TRequire n d) (UTyCmd UTyUnit) + TUnit -> return $ Syntax' l TUnit cs UTyUnit + TConst c -> Syntax' l (TConst c) cs <$> (instantiate . toU $ inferConst c) + TDir d -> return $ Syntax' l (TDir d) cs UTyDir + TInt n -> return $ Syntax' l (TInt n) cs UTyInt + TAntiInt x -> return $ Syntax' l (TAntiInt x) cs UTyInt + TText x -> return $ Syntax' l (TText x) cs UTyText + TAntiText x -> return $ Syntax' l (TAntiText x) cs UTyText + TBool b -> return $ Syntax' l (TBool b) cs UTyBool + TRobot r -> return $ Syntax' l (TRobot r) cs UTyActor + TRequireDevice d -> return $ Syntax' l (TRequireDevice d) cs (UTyCmd UTyUnit) + TRequire n d -> return $ Syntax' l (TRequire n d) cs (UTyCmd UTyUnit) SRequirements x t1 -> do t1' <- infer t1 - return $ Syntax' l (SRequirements x t1') (UTyCmd UTyUnit) + return $ Syntax' l (SRequirements x t1') cs (UTyCmd UTyUnit) -- We should never encounter a TRef since they do not show up in -- surface syntax, only as values while evaluating (*after* -- typechecking). TRef _ -> throwTypeErr l $ CantInfer t -- Just look up variables in the context. - TVar x -> Syntax' l (TVar x) <$> lookup l x + TVar x -> Syntax' l (TVar x) cs <$> lookup l x -- It is helpful to handle lambdas in inference mode as well as -- checking mode; in particular, we can handle lambdas with an -- explicit type annotation on the argument. Just infer the body @@ -681,7 +681,7 @@ infer s@(Syntax l t) = addLocToTypeErr l $ case t of SLam x (Just argTy) body -> do let uargTy = toU argTy body' <- withBinding (lvVar x) (Forall [] uargTy) $ infer body - return $ Syntax' l (SLam x (Just argTy) body') (UTyFun uargTy (body' ^. sType)) + return $ Syntax' l (SLam x (Just argTy) body') cs (UTyFun uargTy (body' ^. sType)) -- Need special case here for applying 'atomic' or 'instant' so we -- don't handle it with the case for generic type application. @@ -722,7 +722,7 @@ infer s@(Syntax l t) = addLocToTypeErr l $ case t of -- unit`). resTy' <- applyBindings resTy - return $ Syntax' l (SApp f' x') resTy' + return $ Syntax' l (SApp f' x') cs resTy' -- We handle binds in inference mode for a similar reason to -- application. @@ -735,7 +735,7 @@ infer s@(Syntax l t) = addLocToTypeErr l $ case t of . withFrame l TCBindR $ infer c2 _ <- decomposeCmdTy c2 (Actual, c2' ^. sType) - return $ Syntax' l (SBind mx c1' c2') (c2' ^. sType) + return $ Syntax' l (SBind mx c1' c2') cs (c2' ^. sType) -- Handle record projection in inference mode. Knowing the expected -- type of r.x doesn't really help since we must infer the type of r @@ -744,14 +744,14 @@ infer s@(Syntax l t) = addLocToTypeErr l $ case t of t1' <- infer t1 case t1' ^. sType of UTyRcd m -> case M.lookup x m of - Just xTy -> return $ Syntax' l (SProj t1' x) xTy + Just xTy -> return $ Syntax' l (SProj t1' x) cs xTy Nothing -> throwTypeErr l $ UnknownProj x (SProj t1 x) _ -> throwTypeErr l $ CantInferProj (SProj t1 x) -- See Note [Checking and inference for record literals] SRcd m -> do m' <- itraverse (\x -> infer . fromMaybe (STerm (TVar x))) m - return $ Syntax' l (SRcd (Just <$> m')) (UTyRcd (fmap (^. sType) m')) + return $ Syntax' l (SRcd (Just <$> m')) cs (UTyRcd (fmap (^. sType) m')) -- To infer a type-annotated term, switch into checking mode. -- However, we must be careful to deal properly with polymorphic @@ -769,7 +769,7 @@ infer s@(Syntax l t) = addLocToTypeErr l $ case t of -- following typechecking steps. iuty <- instantiate upty c' <- check c iuty - return $ Syntax' l (SAnnotate c' pty) (c' ^. sType) + return $ Syntax' l (SAnnotate c' pty) cs (c' ^. sType) -- Fallback: to infer the type of anything else, make up a fresh unification -- variable for its type and check against it. @@ -905,7 +905,7 @@ check :: Syntax -> UType -> m (Syntax' UType) -check s@(Syntax l t) expected = addLocToTypeErr l $ case t of +check s@(CSyntax l t cs) expected = addLocToTypeErr l $ case t of -- if t : ty, then {t} : {ty}. -- Note that in theory, if the @Maybe Var@ component of the @SDelay@ -- is @Just@, we should typecheck the body under a context extended @@ -918,7 +918,7 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of SDelay d s1 -> do ty1 <- decomposeDelayTy s (Expected, expected) s1' <- check s1 ty1 - return $ Syntax' l (SDelay d s1') (UTyDelay ty1) + return $ Syntax' l (SDelay d s1') cs (UTyDelay ty1) -- To check the type of a pair, make sure the expected type is a -- product type, and push the two types down into the left and right. @@ -926,7 +926,7 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of (ty1, ty2) <- decomposeProdTy s (Expected, expected) s1' <- check s1 ty1 s2' <- check s2 ty2 - return $ Syntax' l (SPair s1' s2') (UTyProd ty1 ty2) + return $ Syntax' l (SPair s1' s2') cs (UTyProd ty1 ty2) -- To check a lambda, make sure the expected type is a function type. SLam x mxTy body -> do @@ -943,7 +943,7 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of Right _ -> return () Nothing -> return () body' <- withBinding (lvVar x) (Forall [] argTy) $ check body resTy - return $ Syntax' l (SLam x mxTy body') (UTyFun argTy resTy) + return $ Syntax' l (SLam x mxTy body') cs (UTyFun argTy resTy) -- Special case for checking the argument to 'atomic' (or -- 'instant'). 'atomic t' has the same type as 't', which must have @@ -963,7 +963,7 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of -- guaranteed to operate within a single tick. When c is Instant -- we skip this check. when (c == Atomic) $ validAtomic at - return $ Syntax' l (SApp atomic' at') (UTyCmd argTy) + return $ Syntax' l (SApp atomic' at') cs (UTyCmd argTy) -- Checking the type of a let-expression. SLet r x mxTy t1 t2 -> do (upty, t1') <- case mxTy of @@ -993,7 +993,7 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of ask @UCtx >>= mapM_ (noSkolems l) -- Return the annotated let. - return $ Syntax' l (SLet r x mxTy t1' t2') expected + return $ Syntax' l (SLet r x mxTy t1' t2') cs expected -- Definitions can only occur at the top level. SDef {} -> throwTypeErr l $ DefNotTopLevel t @@ -1015,13 +1015,13 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of throwTypeErr l $ FieldsMismatch (joined expectedFields actualFields) m' <- itraverse (\x ms -> check (fromMaybe (STerm (TVar x)) ms) (tyMap ! x)) fields - return $ Syntax' l (SRcd (Just <$> m')) expected + return $ Syntax' l (SRcd (Just <$> m')) cs expected -- Fallback: switch into inference mode, and check that the type we -- get is what we expected. _ -> do - Syntax' l' t' actual <- infer s - Syntax' l' t' <$> unify (Just s) (joined expected actual) + Syntax' l' t' _ actual <- infer s + Syntax' l' t' cs <$> unify (Just s) (joined expected actual) -- ~~~~ Note [Checking and inference for record literals] -- diff --git a/src/swarm-lang/Swarm/Language/Value.hs b/src/swarm-lang/Swarm/Language/Value.hs index 60674b1c..6a97067a 100644 --- a/src/swarm-lang/Swarm/Language/Value.hs +++ b/src/swarm-lang/Swarm/Language/Value.hs @@ -113,7 +113,7 @@ valueToTerm (VClo x t e) = M.foldrWithKey (\y v -> TLet False y Nothing (valueToTerm v)) (TLam x Nothing t) - (M.restrictKeys (unCtx e) (S.delete x (setOf freeVarsV (Syntax' NoLoc t ())))) + (M.restrictKeys (unCtx e) (S.delete x (setOf freeVarsV (Syntax' NoLoc t Nothing ())))) valueToTerm (VCApp c vs) = foldl' TApp (TConst c) (reverse (map valueToTerm vs)) valueToTerm (VDef r x t _) = TDef r x Nothing t valueToTerm (VResult v _) = valueToTerm v diff --git a/src/swarm-web/Swarm/Web.hs b/src/swarm-web/Swarm/Web.hs index 1c3b91e8..e62a2006 100644 --- a/src/swarm-web/Swarm/Web.hs +++ b/src/swarm-web/Swarm/Web.hs @@ -231,7 +231,7 @@ recogFoundHandler appStateRef = do codeRenderHandler :: Text -> Handler Text codeRenderHandler contents = do return $ case processTermEither contents of - Right (ProcessedTerm (Module stx@(Syntax' _srcLoc _term _) _) _ _) -> + Right (ProcessedTerm (Module stx@(Syntax' _srcLoc _term _ _) _) _ _) -> into @Text . drawTree . fmap (T.unpack . prettyTextLine) . para Node $ stx Left x -> x diff --git a/swarm.cabal b/swarm.cabal index e196b5d7..2ca182e8 100644 --- a/swarm.cabal +++ b/swarm.cabal @@ -369,6 +369,7 @@ library swarm-engine http-types >=0.12 && <0.13, lens >=4.19 && <5.4, linear >=1.21.6 && <1.24, + megaparsec >=9.6 && <9.7, mtl >=2.2.2 && <2.4, nonempty-containers >=0.3.4 && <0.3.5, prettyprinter >=1.7.0 && <1.8, @@ -775,6 +776,7 @@ test-suite swarm-unit TestLanguagePipeline TestNotification TestOrdering + TestParse TestPedagogy TestPretty TestRecipeCoverage @@ -792,6 +794,7 @@ test-suite swarm-unit filepath, hashable, lens, + megaparsec, mtl, tasty >=0.10 && <1.6, tasty-expected-failure >=0.12 && <0.13, diff --git a/test/unit/Main.hs b/test/unit/Main.hs index 7aa1dbf3..53f19ee2 100644 --- a/test/unit/Main.hs +++ b/test/unit/Main.hs @@ -36,6 +36,7 @@ import TestLSP (testLSP) import TestLanguagePipeline (testLanguagePipeline) import TestNotification (testNotification) import TestOrdering (testOrdering) +import TestParse (testParse) import TestPedagogy (testPedagogy) import TestPretty (testPrettyConst) import TestRecipeCoverage (testDeviceRecipeCoverage) @@ -55,6 +56,7 @@ tests s = testGroup "Tests" [ testLanguagePipeline + , testParse , testPrettyConst , testBoolExpr , testCommands diff --git a/test/unit/TestCommand.hs b/test/unit/TestCommand.hs index d1159e3f..4ba30ba8 100644 --- a/test/unit/TestCommand.hs +++ b/test/unit/TestCommand.hs @@ -12,12 +12,12 @@ import Data.Text (Text) import Graphics.Vty.Input.Events qualified as V import Swarm.Game.Location import Swarm.Language.Key -import Swarm.Language.Parse (runParser) import Swarm.Language.Syntax import Test.QuickCheck qualified as QC import Test.Tasty import Test.Tasty.HUnit import Test.Tasty.QuickCheck (testProperty) +import Text.Megaparsec (runParser) import Witch testCommands :: TestTree @@ -47,7 +47,7 @@ testCommands = [ testGroup "Parsing" ( let parseKeyTest input mods k = - assertEqual "" (runParser parseKeyCombo input) (Right (mkKeyCombo mods k)) + assertEqual "" (runParser parseKeyCombo "" input) (Right (mkKeyCombo mods k)) in [ testCase "parse x" $ parseKeyTest "x" [] (V.KChar 'x') , testCase "parse X" $ parseKeyTest "X" [] (V.KChar 'X') , testCase "parse C" $ parseKeyTest "C" [] (V.KChar 'C') @@ -89,4 +89,4 @@ arbitraryModifiers = QC.sublistOf [V.MAlt, V.MCtrl, V.MMeta, V.MShift] prop_parse_pretty_key :: KeyCombo -> Bool prop_parse_pretty_key kc = - runParser parseKeyCombo (prettyKeyCombo kc) == Right kc + runParser parseKeyCombo "" (prettyKeyCombo kc) == Right kc diff --git a/test/unit/TestLSP.hs b/test/unit/TestLSP.hs index 4ad2970a..0f8a720d 100644 --- a/test/unit/TestLSP.hs +++ b/test/unit/TestLSP.hs @@ -85,7 +85,7 @@ testLSP = getWarnings :: Text -> [UnusedVar] getWarnings content = case readTerm' content of - Right (Just term) -> map simplifyWarning problems + Right (Just term, _) -> map simplifyWarning problems where VU.Usage _ problems = VU.getUsage mempty term _ -> [] diff --git a/test/unit/TestParse.hs b/test/unit/TestParse.hs new file mode 100644 index 00000000..55daeb00 --- /dev/null +++ b/test/unit/TestParse.hs @@ -0,0 +1,66 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeApplications #-} + +-- | +-- SPDX-License-Identifier: BSD-3-Clause +-- +-- Swarm parser tests. +module TestParse where + +import Data.Foldable qualified as F +import Data.Text (Text) +import Swarm.Language.Parse +import Swarm.Language.Syntax +import Test.Tasty +import Test.Tasty.HUnit (Assertion, assertEqual, assertFailure, testCase) +import Text.Megaparsec (errorBundlePretty) +import Witch (into) + +testParse :: TestTree +testParse = + testGroup + "Parser - comments" + [ testCase "none" $ + expectParsedComments + "1 + 2" + [] + , testCase "suffix" $ + expectParsedComments + "1 + 2 // add" + [Comment (SrcLoc 6 12) LineComment SuffixComment " add"] + , testCase "standalone" $ + expectParsedComments + "// add\n1 + 2" + [Comment (SrcLoc 0 6) LineComment StandaloneComment " add"] + , testCase "block suffix" $ + expectParsedComments + "1 + 2 /* add */" + [Comment (SrcLoc 6 15) BlockComment SuffixComment " add "] + , testCase "block standalone" $ + expectParsedComments + "/* add */\n1 + 2" + [Comment (SrcLoc 0 9) BlockComment StandaloneComment " add "] + , testCase "block prefix" $ + expectParsedComments + "/* add */ 1 + 2" + [Comment (SrcLoc 0 9) BlockComment StandaloneComment " add "] + , testCase "block infix" $ + expectParsedComments + "1 + /*add*/ 2" + [Comment (SrcLoc 4 11) BlockComment SuffixComment "add"] + , testCase "multiline block" $ + expectParsedComments + "/* add \n some numbers */\n 1 + 2" + [Comment (SrcLoc 0 25) BlockComment StandaloneComment " add \n some numbers "] + , testCase "multiple lines" $ + expectParsedComments + "// add\n// some numbers\n 1 + 2" + [ Comment (SrcLoc 0 6) LineComment StandaloneComment " add" + , Comment (SrcLoc 7 22) LineComment StandaloneComment " some numbers" + ] + ] + +expectParsedComments :: Text -> [Comment] -> Assertion +expectParsedComments input ex = case readTerm' input of + Left err -> assertFailure (into @String $ errorBundlePretty err) + Right (_, res) -> assertEqual "Expected parsed comments" ex (F.toList res)