Parse comments (#1838)

Towards #1467.  This is incomplete but I thought it would be useful to split up the work into multiple PRs to avoid having a really massive one at the end.

This PR accomplishes a few things:
- Creates a new data type `Comment` to record the text of a comment, along with a bit of metadata giving its source location, whether it is a line or block comment, and whether it was on a line by itself or at the end of a line with some other non-comment tokens (this information will be used to decide which AST node to associate the comment with).
- Adds a field to store comments in every `Syntax'` node.  Note this is currently unused, since we haven't yet implemented the logic to insert comments into appropriate AST nodes (that will come in a later PR).
    - Note it's a bit annoying that the number of fields of each `Syntax'` node is growing.  I did look into consolidating and generalizing `Syntax'` to just have a single field for arbitrary annotations, but the changes required seemed annoying enough that I didn't want to bother.
This commit is contained in:
Brent Yorgey 2024-05-11 19:32:21 -05:00 committed by GitHub
parent f9c5df90f6
commit 4bd409dd69
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 315 additions and 124 deletions

View File

@ -1,3 +1,3 @@
loops: loops:
test: cabal test -j -O0 --test-show-details=direct swarm:swarm-integration swarm:swarm-unit test: cabal test -j -O0 --test-show-details=direct swarm:swarm-integration swarm:swarm-unit
unit: cabal test -j -O0 --test-show-details=direct swarm:swarm-unit --fast unit: cabal test -j -O0 --test-show-details=direct swarm:swarm-unit

View File

@ -136,7 +136,7 @@ getCommands (Just (ProcessedTerm (Module stx _) _ _)) =
where where
nodelist :: [Syntax' Polytype] nodelist :: [Syntax' Polytype]
nodelist = universe stx nodelist = universe stx
isCommand (Syntax' sloc t _) = case t of isCommand (Syntax' sloc t _ _) = case t of
TConst c -> guard (isConsidered c) >> Just (c, [sloc]) TConst c -> guard (isConsidered c) >> Just (c, [sloc])
_ -> Nothing _ -> Nothing

View File

@ -29,7 +29,7 @@ codeMetricsFromSyntax ::
Data a => Data a =>
Syntax' a -> Syntax' a ->
ScenarioCodeMetrics ScenarioCodeMetrics
codeMetricsFromSyntax s@(Syntax' srcLoc _ _) = codeMetricsFromSyntax s@(Syntax' srcLoc _ _ _) =
ScenarioCodeMetrics (charCount srcLoc) (measureAstSize s) ScenarioCodeMetrics (charCount srcLoc) (measureAstSize s)
where where
charCount :: SrcLoc -> Int charCount :: SrcLoc -> Int

View File

@ -167,7 +167,7 @@ parseCodeFile ::
m CodeToRun m CodeToRun
parseCodeFile filepath = do parseCodeFile filepath = do
contents <- sendIO $ TIO.readFile filepath contents <- sendIO $ TIO.readFile filepath
pt@(ProcessedTerm (Module (Syntax' srcLoc _ _) _) _ _) <- pt@(ProcessedTerm (Module (Syntax' srcLoc _ _ _) _) _ _) <-
either (throwError . CustomFailure) return (processTermEither contents) either (throwError . CustomFailure) return (processTermEither contents)
let strippedText = stripSrc srcLoc contents let strippedText = stripSrc srcLoc contents
programBytestring = TL.encodeUtf8 $ TL.fromStrict strippedText programBytestring = TL.encodeUtf8 $ TL.fromStrict strippedText

View File

@ -89,7 +89,6 @@ import Swarm.Game.Value
import Swarm.Language.Capability import Swarm.Language.Capability
import Swarm.Language.Context hiding (delete) import Swarm.Language.Context hiding (delete)
import Swarm.Language.Key (parseKeyComboFull) import Swarm.Language.Key (parseKeyComboFull)
import Swarm.Language.Parse (runParser)
import Swarm.Language.Pipeline import Swarm.Language.Pipeline
import Swarm.Language.Pretty (prettyText) import Swarm.Language.Pretty (prettyText)
import Swarm.Language.Requirement qualified as R import Swarm.Language.Requirement qualified as R
@ -100,6 +99,7 @@ import Swarm.Log
import Swarm.Util hiding (both) import Swarm.Util hiding (both)
import Swarm.Util.Effect (throwToMaybe) import Swarm.Util.Effect (throwToMaybe)
import Swarm.Util.Lens (inherit) import Swarm.Util.Lens (inherit)
import Text.Megaparsec (runParser)
import Witch (From (from), into) import Witch (From (from), into)
import Prelude hiding (Applicative (..), lookup) import Prelude hiding (Applicative (..), lookup)
@ -1001,7 +1001,7 @@ execConst runChildProg c vs s k = do
[VText msg] -> return $ Up (User msg) s k [VText msg] -> return $ Up (User msg) s k
_ -> badConst _ -> badConst
Key -> case vs of Key -> case vs of
[VText ktxt] -> case runParser parseKeyComboFull ktxt of [VText ktxt] -> case runParser parseKeyComboFull "" ktxt of
Right kc -> return $ Out (VKey kc) s k Right kc -> return $ Out (VKey kc) s k
Left _ -> return $ Up (CmdFailed Key (T.unwords ["Unknown key", quote ktxt]) Nothing) s k Left _ -> return $ Up (CmdFailed Key (T.unwords ["Unknown key", quote ktxt]) Nothing) s k
_ -> badConst _ -> badConst

View File

@ -22,12 +22,12 @@ elaborate =
-- Wrap all *free* variables in 'Force'. Free variables must be -- Wrap all *free* variables in 'Force'. Free variables must be
-- referring to a previous definition, which are all wrapped in -- referring to a previous definition, which are all wrapped in
-- 'TDelay'. -- 'TDelay'.
(freeVarsS %~ \s -> Syntax' (s ^. sLoc) (SApp sForce s) (s ^. sType)) (freeVarsS %~ \s -> Syntax' (s ^. sLoc) (SApp sForce s) (s ^. sComments) (s ^. sType))
-- Now do additional rewriting on all subterms. -- Now do additional rewriting on all subterms.
. transform rewrite . transform rewrite
where where
rewrite :: Syntax' Polytype -> Syntax' Polytype rewrite :: Syntax' Polytype -> Syntax' Polytype
rewrite (Syntax' l t ty) = Syntax' l (rewriteTerm t) ty rewrite (Syntax' l t ty cs) = Syntax' l (rewriteTerm t) ty cs
rewriteTerm :: Term' Polytype -> Term' Polytype rewriteTerm :: Term' Polytype -> Term' Polytype
rewriteTerm = \case rewriteTerm = \case
@ -43,14 +43,14 @@ elaborate =
-- bound by 'def'. -- bound by 'def'.
SDef True x ty t1 -> SDef True x ty (wrapForce (lvVar x) t1) SDef True x ty t1 -> SDef True x ty (wrapForce (lvVar x) t1)
-- Rewrite @f $ x@ to @f x@. -- Rewrite @f $ x@ to @f x@.
SApp (Syntax' _ (SApp (Syntax' _ (TConst AppF) _) l) _) r -> SApp l r SApp (Syntax' _ (SApp (Syntax' _ (TConst AppF) _ _) l) _ _) r -> SApp l r
-- Leave any other subterms alone. -- Leave any other subterms alone.
t -> t t -> t
wrapForce :: Var -> Syntax' Polytype -> Syntax' Polytype wrapForce :: Var -> Syntax' Polytype -> Syntax' Polytype
wrapForce x = mapFreeS x (\s@(Syntax' l _ ty) -> Syntax' l (SApp sForce s) ty) wrapForce x = mapFreeS x (\s@(Syntax' l _ ty cs) -> Syntax' l (SApp sForce s) ty cs)
-- Note, TyUnit is not the right type, but I don't want to bother -- Note, TyUnit is not the right type, but I don't want to bother
sForce :: Syntax' Polytype sForce :: Syntax' Polytype
sForce = Syntax' NoLoc (TConst Force) (Forall ["a"] (TyDelay (TyVar "a") :->: TyVar "a")) sForce = Syntax' NoLoc (TConst Force) Nothing (Forall ["a"] (TyDelay (TyVar "a") :->: TyVar "a"))

View File

@ -26,9 +26,9 @@ import Data.Set (Set)
import Data.Set qualified as S import Data.Set qualified as S
import Data.Text (Text) import Data.Text (Text)
import Data.Text qualified as T import Data.Text qualified as T
import Data.Void
import GHC.Generics hiding (from) import GHC.Generics hiding (from)
import Graphics.Vty.Input.Events qualified as V import Graphics.Vty.Input.Events qualified as V
import Swarm.Language.Parse
import Text.Megaparsec import Text.Megaparsec
import Text.Megaparsec.Char (char, string) import Text.Megaparsec.Char (char, string)
import Text.Megaparsec.Char.Lexer (decimal) import Text.Megaparsec.Char.Lexer (decimal)
@ -47,27 +47,29 @@ deriving instance FromJSON V.Modifier
deriving instance ToJSON V.Key deriving instance ToJSON V.Key
deriving instance ToJSON V.Modifier deriving instance ToJSON V.Modifier
type SParser = Parsec Void Text
-- | Smart constructor for 'KeyCombo'. -- | Smart constructor for 'KeyCombo'.
mkKeyCombo :: [V.Modifier] -> V.Key -> KeyCombo mkKeyCombo :: [V.Modifier] -> V.Key -> KeyCombo
mkKeyCombo mods k = KeyCombo k (sort mods) mkKeyCombo mods k = KeyCombo k (sort mods)
-- | Parse a key combo with nothing after it. -- | Parse a key combo with nothing after it.
parseKeyComboFull :: Parser KeyCombo parseKeyComboFull :: SParser KeyCombo
parseKeyComboFull = parseKeyCombo <* eof parseKeyComboFull = parseKeyCombo <* eof
-- | Parse a key combo like @\"M-C-F5\"@, @\"Down\"@, or @\"C-x\"@. -- | Parse a key combo like @\"M-C-F5\"@, @\"Down\"@, or @\"C-x\"@.
parseKeyCombo :: Parser KeyCombo parseKeyCombo :: SParser KeyCombo
parseKeyCombo = parseKeyCombo =
mkKeyCombo <$> many (try (parseModifier <* char '-')) <*> parseKey mkKeyCombo <$> many (try (parseModifier <* char '-')) <*> parseKey
parseModifier :: Parser V.Modifier parseModifier :: SParser V.Modifier
parseModifier = parseModifier =
V.MShift <$ string "S" V.MShift <$ string "S"
<|> V.MCtrl <$ string "C" <|> V.MCtrl <$ string "C"
<|> V.MMeta <$ string "M" <|> V.MMeta <$ string "M"
<|> V.MAlt <$ string "A" <|> V.MAlt <$ string "A"
parseKey :: Parser V.Key parseKey :: SParser V.Key
parseKey = parseKey =
-- For an explanation of the 'reverse', see Note [Key names are not prefix-free] -- For an explanation of the 'reverse', see Note [Key names are not prefix-free]
(asum . map specialKeyParser . reverse . S.toList $ specialKeyNames) (asum . map specialKeyParser . reverse . S.toList $ specialKeyNames)
@ -90,13 +92,13 @@ parseKey =
-- of key names (which are sorted alphabetically), it guarantees that -- of key names (which are sorted alphabetically), it guarantees that
-- longer names will come before names which are prefixes of them. -- longer names will come before names which are prefixes of them.
parseFunctionKey :: Parser V.Key parseFunctionKey :: SParser V.Key
parseFunctionKey = V.KFun <$> try (char 'F' *> decimal) parseFunctionKey = V.KFun <$> try (char 'F' *> decimal)
parseCharKey :: Parser V.Key parseCharKey :: SParser V.Key
parseCharKey = V.KChar <$> anySingle parseCharKey = V.KChar <$> anySingle
specialKeyParser :: Text -> Parser V.Key specialKeyParser :: Text -> SParser V.Key
specialKeyParser t = read . ('K' :) . from @Text <$> string t specialKeyParser t = read . ('K' :) . from @Text <$> string t
-- https://stackoverflow.com/questions/51848587/list-constructor-names-using-generics-in-haskell -- https://stackoverflow.com/questions/51848587/list-constructor-names-using-generics-in-haskell

View File

@ -81,8 +81,8 @@ validateSwarmCode doc version content = do
flushDiagnosticsBySource 0 (Just diagnosticSourcePrefix) flushDiagnosticsBySource 0 (Just diagnosticSourcePrefix)
let (parsingErrs, unusedVarWarnings) = case readTerm' content of let (parsingErrs, unusedVarWarnings) = case readTerm' content of
Right Nothing -> ([], []) Right (Nothing, _) -> ([], [])
Right (Just term) -> (parsingErrors, unusedWarnings) Right (Just term, _) -> (parsingErrors, unusedWarnings)
where where
VU.Usage _ problems = VU.getUsage mempty term VU.Usage _ problems = VU.getUsage mempty term
unusedWarnings = mapMaybe (VU.toErrPos content) problems unusedWarnings = mapMaybe (VU.toErrPos content) problems

View File

@ -60,14 +60,14 @@ showHoverInfo ::
showHoverInfo _ p vf@(VirtualFile _ _ myRope) = showHoverInfo _ p vf@(VirtualFile _ _ myRope) =
case readTerm' content of case readTerm' content of
Left _ -> Nothing Left _ -> Nothing
Right Nothing -> Nothing Right (Nothing, _) -> Nothing
Right (Just stx) -> Just $ case processParsedTerm stx of Right (Just stx, _) -> Just $ case processParsedTerm stx of
Left _e -> Left _e ->
let found@(Syntax foundSloc _) = narrowToPosition stx $ fromIntegral absolutePos let found@(Syntax foundSloc _) = narrowToPosition stx $ fromIntegral absolutePos
finalPos = posToRange myRope foundSloc finalPos = posToRange myRope foundSloc
in (,finalPos) . treeToMarkdown 0 $ explain found in (,finalPos) . treeToMarkdown 0 $ explain found
Right (ProcessedTerm modul _req _reqCtx) -> Right (ProcessedTerm modul _req _reqCtx) ->
let found@(Syntax' foundSloc _ _) = narrowToPosition (moduleAST modul) $ fromIntegral absolutePos let found@(Syntax' foundSloc _ _ _) = narrowToPosition (moduleAST modul) $ fromIntegral absolutePos
finalPos = posToRange myRope foundSloc finalPos = posToRange myRope foundSloc
in (,finalPos) . treeToMarkdown 0 $ explain found in (,finalPos) . treeToMarkdown 0 $ explain found
where where
@ -94,7 +94,7 @@ descend ::
-- | next element to inspect -- | next element to inspect
Syntax' ty -> Syntax' ty ->
Maybe (Syntax' ty) Maybe (Syntax' ty)
descend pos s1@(Syntax' l1 _ _) = do descend pos s1@(Syntax' l1 _ _ _) = do
guard $ withinBound pos l1 guard $ withinBound pos l1
return $ narrowToPosition s1 pos return $ narrowToPosition s1 pos
@ -107,12 +107,12 @@ narrowToPosition ::
-- | absolute offset within the file -- | absolute offset within the file
Int -> Int ->
Syntax' ty Syntax' ty
narrowToPosition s0@(Syntax' _ t ty) pos = fromMaybe s0 $ case t of narrowToPosition s0@(Syntax' _ t _ ty) pos = fromMaybe s0 $ case t of
SLam lv _ s -> d (locVarToSyntax' lv $ getInnerType ty) <|> d s SLam lv _ s -> d (locVarToSyntax' lv $ getInnerType ty) <|> d s
SApp s1 s2 -> d s1 <|> d s2 SApp s1 s2 -> d s1 <|> d s2
SLet _ lv _ s1@(Syntax' _ _ lty) s2 -> d (locVarToSyntax' lv lty) <|> d s1 <|> d s2 SLet _ lv _ s1@(Syntax' _ _ _ lty) s2 -> d (locVarToSyntax' lv lty) <|> d s1 <|> d s2
SDef _ lv _ s@(Syntax' _ _ lty) -> d (locVarToSyntax' lv lty) <|> d s SDef _ lv _ s@(Syntax' _ _ _ lty) -> d (locVarToSyntax' lv lty) <|> d s
SBind mlv s1@(Syntax' _ _ lty) s2 -> (mlv >>= d . flip locVarToSyntax' (getInnerType lty)) <|> d s1 <|> d s2 SBind mlv s1@(Syntax' _ _ _ lty) s2 -> (mlv >>= d . flip locVarToSyntax' (getInnerType lty)) <|> d s1 <|> d s2
SPair s1 s2 -> d s1 <|> d s2 SPair s1 s2 -> d s1 <|> d s2
SDelay _ s -> d s SDelay _ s -> d s
SRcd m -> asum . map d . catMaybes . M.elems $ m SRcd m -> asum . map d . catMaybes . M.elems $ m
@ -244,8 +244,8 @@ explain trm = case trm ^. sTerm of
explainFunction :: ExplainableType ty => Syntax' ty -> Tree Text explainFunction :: ExplainableType ty => Syntax' ty -> Tree Text
explainFunction s = explainFunction s =
case unfoldApps s of case unfoldApps s of
(Syntax' _ (TConst Force) _ :| [innerT]) -> explain innerT (Syntax' _ (TConst Force) _ _ :| [innerT]) -> explain innerT
(Syntax' _ (TConst Force) _ :| f : params) -> explainF f params (Syntax' _ (TConst Force) _ _ :| f : params) -> explainF f params
(f :| params) -> explainF f params (f :| params) -> explainF f params
where where
explainF f params = explainF f params =

View File

@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE ViewPatterns #-} {-# LANGUAGE ViewPatterns #-}
@ -33,13 +34,15 @@ module Swarm.Language.Parse (
unTuple, unTuple,
) where ) where
import Control.Lens (view, (^.)) import Control.Arrow (right)
import Control.Monad (guard, join) import Control.Lens (makeLenses, use, view, (%=), (.=), (^.))
import Control.Monad (guard, join, void)
import Control.Monad.Combinators.Expr import Control.Monad.Combinators.Expr
import Control.Monad.Reader ( import Control.Monad.Reader (
MonadReader (ask), MonadReader (ask),
ReaderT (runReaderT), ReaderT (runReaderT),
) )
import Control.Monad.State (StateT, runStateT)
import Data.Bifunctor import Data.Bifunctor
import Data.Foldable (asum) import Data.Foldable (asum)
import Data.List (foldl', nub) import Data.List (foldl', nub)
@ -47,8 +50,11 @@ import Data.List.NonEmpty qualified (head)
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Maybe (fromMaybe, mapMaybe) import Data.Maybe (fromMaybe, mapMaybe)
import Data.Sequence (Seq)
import Data.Sequence qualified as Seq
import Data.Set qualified as S import Data.Set qualified as S
import Data.Set.Lens (setOf) import Data.Set.Lens (setOf)
import Data.String (fromString)
import Data.Text (Text, index, toLower) import Data.Text (Text, index, toLower)
import Data.Text qualified as T import Data.Text qualified as T
import Data.Void import Data.Void
@ -76,7 +82,22 @@ import Witch
data Antiquoting = AllowAntiquoting | DisallowAntiquoting data Antiquoting = AllowAntiquoting | DisallowAntiquoting
deriving (Eq, Ord, Show) deriving (Eq, Ord, Show)
type Parser = ReaderT Antiquoting (Parsec Void Text) data CommentState = CS
{ _freshLine :: Bool
-- ^ Are we currently on a (so far) blank line, i.e. have there been
-- no nontrivial tokens since the most recent newline? This field
-- is updated every time we parse a lexeme or symbol (set to
-- false), or a newline (set to true).
, _comments :: Seq Comment
-- ^ The actual sequence of comments, in the order they were encountered
}
makeLenses ''CommentState
initCommentState :: CommentState
initCommentState = CS {_freshLine = True, _comments = Seq.empty}
type Parser = ReaderT Antiquoting (StateT CommentState (Parsec Void Text))
type ParserError = ParseErrorBundle Text Void type ParserError = ParseErrorBundle Text Void
@ -109,24 +130,61 @@ reservedWords =
, "requirements" , "requirements"
] ]
-- Approach for preserving comments taken from https://www.reddit.com/r/haskell/comments/ni4gpm/comment/gz0ipmp/
-- | If we see a comment starting now, is it the first non-whitespace
-- thing on the current line so far, or were there other
-- non-whitespace tokens previously?
getCommentSituation :: Parser CommentSituation
getCommentSituation = do
fl <- use freshLine
return $ if fl then StandaloneComment else SuffixComment
-- | Parse a line comment, while appending it out-of-band to the list of
-- comments saved in the custom state.
lineComment :: Text -> Parser ()
lineComment start = do
cs <- getCommentSituation
(loc, t) <- parseLocG $ do
string start *> takeWhileP (Just "character") (/= '\n')
comments %= (Seq.|> Comment loc LineComment cs t)
-- | Parse a block comment, while appending it out-of-band to the list of
-- comments saved in the custom state.
blockComment :: Text -> Text -> Parser ()
blockComment start end = do
cs <- getCommentSituation
(loc, t) <- parseLocG $ do
void $ string start
manyTill anySingle (string end)
comments %= (Seq.|> Comment loc BlockComment cs (fromString t))
-- | Skip spaces and comments. -- | Skip spaces and comments.
sc :: Parser () sc :: Parser ()
sc = sc =
L.space -- Typically we would use L.space here, but we have to inline its
space1 -- definition and use our own slight variant, since we need to treat
(L.skipLineComment "//") -- end-of-line specially.
(L.skipBlockComment "/*" "*/") skipMany . choice . map hidden $
[ hspace1
, eol *> (freshLine .= True) -- If we see a newline, reset freshLine to True.
, lineComment "//"
, blockComment "/*" "*/"
]
-- | In general, we follow the convention that every token parser -- | In general, we follow the convention that every token parser
-- assumes no leading whitespace and consumes all trailing -- assumes no leading whitespace and consumes all trailing
-- whitespace. Concretely, we achieve this by wrapping every token -- whitespace. Concretely, we achieve this by wrapping every token
-- parser using 'lexeme'. -- parser using 'lexeme'.
--
-- Also sets freshLine to False every time we see a non-whitespace
-- token.
lexeme :: Parser a -> Parser a lexeme :: Parser a -> Parser a
lexeme = L.lexeme sc lexeme p = (freshLine .= False) *> L.lexeme sc p
-- | A lexeme consisting of a literal string. -- | A lexeme consisting of a literal string.
symbol :: Text -> Parser Text symbol :: Text -> Parser Text
symbol = L.symbol sc symbol s = (freshLine .= False) *> L.symbol sc s
-- | Parse a case-insensitive reserved word, making sure it is not a -- | Parse a case-insensitive reserved word, making sure it is not a
-- prefix of a longer variable name, and allowing the parser to -- prefix of a longer variable name, and allowing the parser to
@ -329,7 +387,7 @@ mkTuple (x : xs) = let r = mkTuple xs in loc x r $ SPair x r
unTuple :: Syntax' ty -> [Syntax' ty] unTuple :: Syntax' ty -> [Syntax' ty]
unTuple = \case unTuple = \case
Syntax' _ (SPair s1 s2) _ -> s1 : unTuple s2 Syntax' _ (SPair s1 s2) _ _ -> s1 : unTuple s2
s -> [s] s -> [s]
-- | Construct an 'SLet', automatically filling in the Boolean field -- | Construct an 'SLet', automatically filling in the Boolean field
@ -473,19 +531,31 @@ operatorSymbol = T.singleton <$> oneOf opChars
-------------------------------------------------- --------------------------------------------------
-- Utilities -- Utilities
-- | Run a parser on some input text, returning either the result or a -- | Run a parser on some input text, returning either the result +
-- pretty-printed parse error message. -- all collected comments, or a pretty-printed parse error message.
runParser :: Parser a -> Text -> Either Text a runParser :: Parser a -> Text -> Either Text (a, Seq Comment)
runParser p t = first (from . errorBundlePretty) (parse (runReaderT p DisallowAntiquoting) "" t) runParser p t =
first (from . errorBundlePretty)
. (\pt -> parse pt "" t)
. fmap (second (^. comments))
. flip runStateT initCommentState
. flip runReaderT DisallowAntiquoting
$ p
-- | A utility for running a parser in an arbitrary 'MonadFail' (which -- | A utility for running a parser in an arbitrary 'MonadFail' (which
-- is going to be the TemplateHaskell 'Language.Haskell.TH.Q' monad --- see -- is going to be the TemplateHaskell 'Language.Haskell.TH.Q' monad --- see
-- "Swarm.Language.Parse.QQ"), with a specified source position. -- "Swarm.Language.Parse.QQ"), with a specified source position.
runParserTH :: (Monad m, MonadFail m) => (String, Int, Int) -> Parser a -> String -> m a runParserTH :: (Monad m, MonadFail m) => (String, Int, Int) -> Parser a -> String -> m a
runParserTH (file, line, col) p s = runParserTH (file, line, col) p s =
case snd (runParser' (runReaderT (fully sc p) AllowAntiquoting) initState) of let (_, res) =
Left err -> fail $ errorBundlePretty err flip runParser' initState
Right e -> return e . flip runStateT initCommentState
. flip runReaderT AllowAntiquoting
. fully sc
$ p
in case res of
Left err -> fail $ errorBundlePretty err
Right e -> return $ fst e
where where
initState :: State Text Void initState :: State Text Void
initState = initState =
@ -502,12 +572,17 @@ runParserTH (file, line, col) p s =
-- 'Nothing' if the input was only whitespace) or a pretty-printed -- 'Nothing' if the input was only whitespace) or a pretty-printed
-- parse error message. -- parse error message.
readTerm :: Text -> Either Text (Maybe Syntax) readTerm :: Text -> Either Text (Maybe Syntax)
readTerm = runParser (fullyMaybe sc parseTerm) readTerm = right fst . runParser (fullyMaybe sc parseTerm)
-- | A lower-level `readTerm` which returns the megaparsec bundle error -- | A lower-level `readTerm` which returns the megaparsec bundle error
-- for precise error reporting. -- for precise error reporting, as well as the parsed comments.
readTerm' :: Text -> Either ParserError (Maybe Syntax) readTerm' :: Text -> Either ParserError (Maybe Syntax, Seq Comment)
readTerm' = parse (runReaderT (fullyMaybe sc parseTerm) DisallowAntiquoting) "" readTerm' t =
(\pt -> parse pt "" t)
. fmap (second (^. comments))
. flip runStateT initCommentState
. flip runReaderT DisallowAntiquoting
$ fullyMaybe sc parseTerm
-- | A utility for converting a ParserError into a one line message: -- | A utility for converting a ParserError into a one line message:
-- @<line-nr>: <error-msg>@ -- @<line-nr>: <error-msg>@

View File

@ -50,8 +50,10 @@ module Swarm.Language.Syntax (
sLoc, sLoc,
sTerm, sTerm,
sType, sType,
sComments,
Syntax, Syntax,
pattern Syntax, pattern Syntax,
pattern CSyntax,
LocVar (..), LocVar (..),
SrcLoc (..), SrcLoc (..),
noLoc, noLoc,
@ -69,6 +71,11 @@ module Swarm.Language.Syntax (
pattern TProj, pattern TProj,
pattern TAnnotate, pattern TAnnotate,
-- * Comments
CommentType (..),
CommentSituation (..),
Comment (..),
-- * Terms -- * Terms
Var, Var,
DelayType (..), DelayType (..),
@ -100,6 +107,7 @@ import Data.Int (Int32)
import Data.List.NonEmpty (NonEmpty) import Data.List.NonEmpty (NonEmpty)
import Data.List.NonEmpty qualified as NonEmpty import Data.List.NonEmpty qualified as NonEmpty
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
import Data.Sequence (Seq)
import Data.Set (Set) import Data.Set (Set)
import Data.Set qualified as S import Data.Set qualified as S
import Data.Set qualified as Set import Data.Set qualified as Set
@ -1008,7 +1016,7 @@ data LocVar = LV {lvSrcLoc :: SrcLoc, lvVar :: Var}
deriving (Eq, Ord, Show, Data, Generic, FromJSON, ToJSON) deriving (Eq, Ord, Show, Data, Generic, FromJSON, ToJSON)
locVarToSyntax' :: LocVar -> ty -> Syntax' ty locVarToSyntax' :: LocVar -> ty -> Syntax' ty
locVarToSyntax' (LV s v) = Syntax' s (TVar v) locVarToSyntax' (LV s v) = Syntax' s (TVar v) Nothing
-- | Terms of the Swarm language. -- | Terms of the Swarm language.
data Term' ty data Term' ty
@ -1113,6 +1121,7 @@ instance Data ty => Plated (Term' ty) where
data Syntax' ty = Syntax' data Syntax' ty = Syntax'
{ _sLoc :: SrcLoc { _sLoc :: SrcLoc
, _sTerm :: Term' ty , _sTerm :: Term' ty
, _sComments :: Maybe (Seq Comment)
, _sType :: ty , _sType :: ty
} }
deriving (Eq, Show, Functor, Foldable, Traversable, Data, Generic, FromJSON, ToJSON) deriving (Eq, Show, Functor, Foldable, Traversable, Data, Generic, FromJSON, ToJSON)
@ -1134,23 +1143,56 @@ instance Semigroup SrcLoc where
instance Monoid SrcLoc where instance Monoid SrcLoc where
mempty = NoLoc mempty = NoLoc
------------------------------------------------------------
-- Comments
------------------------------------------------------------
-- | Line vs block comments.
data CommentType = LineComment | BlockComment
deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Data, ToJSON, FromJSON)
-- | Was a comment all by itself on a line, or did it occur after some
-- other tokens on a line?
data CommentSituation = StandaloneComment | SuffixComment
deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Data, ToJSON, FromJSON)
-- | A comment is retained as some text plus metadata (source
-- location, comment type, + comment situation). While parsing we
-- record all comments out-of-band, for later re-insertion into the
-- AST.
data Comment = Comment
{ commentSrcLoc :: SrcLoc
, commentType :: CommentType
, commentSituation :: CommentSituation
, commentText :: Text
}
deriving (Eq, Show, Generic, Data, ToJSON, FromJSON)
------------------------------------------------------------ ------------------------------------------------------------
-- Pattern synonyms for untyped terms -- Pattern synonyms for untyped terms
------------------------------------------------------------ ------------------------------------------------------------
-- | Syntax without type annotations.
type Syntax = Syntax' () type Syntax = Syntax' ()
-- | Raw parsed syntax, without comments or type annotations.
pattern Syntax :: SrcLoc -> Term -> Syntax pattern Syntax :: SrcLoc -> Term -> Syntax
pattern Syntax l t = Syntax' l t () pattern Syntax l t = Syntax' l t Nothing ()
{-# COMPLETE Syntax #-} {-# COMPLETE Syntax #-}
-- | Untyped syntax with assocated comments.
pattern CSyntax :: SrcLoc -> Term -> Maybe (Seq Comment) -> Syntax
pattern CSyntax l t cs = Syntax' l t cs ()
{-# COMPLETE CSyntax #-}
makeLenses ''Syntax' makeLenses ''Syntax'
noLoc :: Term -> Syntax noLoc :: Term -> Syntax
noLoc = Syntax mempty noLoc = Syntax mempty
-- | Match an untyped term without its 'SrcLoc'. -- | Match an untyped term without annotations.
pattern STerm :: Term -> Syntax pattern STerm :: Term -> Syntax
pattern STerm t <- pattern STerm t <-
Syntax _ t Syntax _ t
@ -1160,17 +1202,17 @@ pattern STerm t <-
pattern TRequirements :: Text -> Term -> Term pattern TRequirements :: Text -> Term -> Term
pattern TRequirements x t = SRequirements x (STerm t) pattern TRequirements x t = SRequirements x (STerm t)
-- | Match a TPair without syntax -- | Match a TPair without annotations.
pattern TPair :: Term -> Term -> Term pattern TPair :: Term -> Term -> Term
pattern TPair t1 t2 = SPair (STerm t1) (STerm t2) pattern TPair t1 t2 = SPair (STerm t1) (STerm t2)
-- | Match a TLam without syntax -- | Match a TLam without annotations.
pattern TLam :: Var -> Maybe Type -> Term -> Term pattern TLam :: Var -> Maybe Type -> Term -> Term
pattern TLam v ty t <- SLam (lvVar -> v) ty (STerm t) pattern TLam v ty t <- SLam (lvVar -> v) ty (STerm t)
where where
TLam v ty t = SLam (LV NoLoc v) ty (STerm t) TLam v ty t = SLam (LV NoLoc v) ty (STerm t)
-- | Match a TApp without syntax -- | Match a TApp without annotations.
pattern TApp :: Term -> Term -> Term pattern TApp :: Term -> Term -> Term
pattern TApp t1 t2 = SApp (STerm t1) (STerm t2) pattern TApp t1 t2 = SApp (STerm t1) (STerm t2)
@ -1180,29 +1222,29 @@ infixl 0 :$:
pattern (:$:) :: Term -> Syntax -> Term pattern (:$:) :: Term -> Syntax -> Term
pattern (:$:) t1 s2 = SApp (STerm t1) s2 pattern (:$:) t1 s2 = SApp (STerm t1) s2
-- | Match a TLet without syntax -- | Match a TLet without annotations.
pattern TLet :: Bool -> Var -> Maybe Polytype -> Term -> Term -> Term pattern TLet :: Bool -> Var -> Maybe Polytype -> Term -> Term -> Term
pattern TLet r v pt t1 t2 <- SLet r (lvVar -> v) pt (STerm t1) (STerm t2) pattern TLet r v pt t1 t2 <- SLet r (lvVar -> v) pt (STerm t1) (STerm t2)
where where
TLet r v pt t1 t2 = SLet r (LV NoLoc v) pt (STerm t1) (STerm t2) TLet r v pt t1 t2 = SLet r (LV NoLoc v) pt (STerm t1) (STerm t2)
-- | Match a TDef without syntax -- | Match a TDef without annotations.
pattern TDef :: Bool -> Var -> Maybe Polytype -> Term -> Term pattern TDef :: Bool -> Var -> Maybe Polytype -> Term -> Term
pattern TDef r v pt t <- SDef r (lvVar -> v) pt (STerm t) pattern TDef r v pt t <- SDef r (lvVar -> v) pt (STerm t)
where where
TDef r v pt t = SDef r (LV NoLoc v) pt (STerm t) TDef r v pt t = SDef r (LV NoLoc v) pt (STerm t)
-- | Match a TBind without syntax -- | Match a TBind without annotations.
pattern TBind :: Maybe Var -> Term -> Term -> Term pattern TBind :: Maybe Var -> Term -> Term -> Term
pattern TBind mv t1 t2 <- SBind (fmap lvVar -> mv) (STerm t1) (STerm t2) pattern TBind mv t1 t2 <- SBind (fmap lvVar -> mv) (STerm t1) (STerm t2)
where where
TBind mv t1 t2 = SBind (LV NoLoc <$> mv) (STerm t1) (STerm t2) TBind mv t1 t2 = SBind (LV NoLoc <$> mv) (STerm t1) (STerm t2)
-- | Match a TDelay without syntax -- | Match a TDelay without annotations.
pattern TDelay :: DelayType -> Term -> Term pattern TDelay :: DelayType -> Term -> Term
pattern TDelay m t = SDelay m (STerm t) pattern TDelay m t = SDelay m (STerm t)
-- | Match a TRcd without syntax -- | Match a TRcd without annotations.
pattern TRcd :: Map Var (Maybe Term) -> Term pattern TRcd :: Map Var (Maybe Term) -> Term
pattern TRcd m <- SRcd ((fmap . fmap) _sTerm -> m) pattern TRcd m <- SRcd ((fmap . fmap) _sTerm -> m)
where where
@ -1211,15 +1253,16 @@ pattern TRcd m <- SRcd ((fmap . fmap) _sTerm -> m)
pattern TProj :: Term -> Var -> Term pattern TProj :: Term -> Var -> Term
pattern TProj t x = SProj (STerm t) x pattern TProj t x = SProj (STerm t) x
-- | Match a TAnnotate without syntax -- | Match a TAnnotate without annotations.
pattern TAnnotate :: Term -> Polytype -> Term pattern TAnnotate :: Term -> Polytype -> Term
pattern TAnnotate t pt = SAnnotate (STerm t) pt pattern TAnnotate t pt = SAnnotate (STerm t) pt
-- | COMPLETE pragma tells GHC using this set of pattern is complete for Term -- COMPLETE pragma tells GHC using this set of patterns is complete for Term
{-# COMPLETE TUnit, TConst, TDir, TInt, TAntiInt, TText, TAntiText, TBool, TRequireDevice, TRequire, TRequirements, TVar, TPair, TLam, TApp, TLet, TDef, TBind, TDelay, TRcd, TProj, TAnnotate #-} {-# COMPLETE TUnit, TConst, TDir, TInt, TAntiInt, TText, TAntiText, TBool, TRequireDevice, TRequire, TRequirements, TVar, TPair, TLam, TApp, TLet, TDef, TBind, TDelay, TRcd, TProj, TAnnotate #-}
-- | Make infix operation (e.g. @2 + 3@) a curried function -- | Make an infix operation (e.g. @2 + 3@) a curried function
-- application (@((+) 2) 3@). -- application (e.g. @((+) 2) 3@).
mkOp :: Const -> Syntax -> Syntax -> Syntax mkOp :: Const -> Syntax -> Syntax -> Syntax
mkOp c s1@(Syntax l1 _) s2@(Syntax l2 _) = Syntax newLoc newTerm mkOp c s1@(Syntax l1 _) s2@(Syntax l2 _) = Syntax newLoc newTerm
where where
@ -1230,7 +1273,7 @@ mkOp c s1@(Syntax l1 _) s2@(Syntax l2 _) = Syntax newLoc newTerm
sop = noLoc (TConst c) sop = noLoc (TConst c)
newTerm = SApp (Syntax l1 $ SApp sop s1) s2 newTerm = SApp (Syntax l1 $ SApp sop s1) s2
-- | Make infix operation, discarding any syntax related location -- | Make an infix operation, discarding any location information
mkOp' :: Const -> Term -> Term -> Term mkOp' :: Const -> Term -> Term -> Term
mkOp' c t1 = TApp (TApp (TConst c) t1) mkOp' c t1 = TApp (TApp (TConst c) t1)
@ -1244,16 +1287,16 @@ mkOp' c t1 = TApp (TApp (TConst c) t1)
-- TConst Mul :| [TInt 1,TInt 2] -- TConst Mul :| [TInt 1,TInt 2]
unfoldApps :: Syntax' ty -> NonEmpty (Syntax' ty) unfoldApps :: Syntax' ty -> NonEmpty (Syntax' ty)
unfoldApps trm = NonEmpty.reverse . flip NonEmpty.unfoldr trm $ \case unfoldApps trm = NonEmpty.reverse . flip NonEmpty.unfoldr trm $ \case
Syntax' _ (SApp s1 s2) _ -> (s2, Just s1) Syntax' _ (SApp s1 s2) _ _ -> (s2, Just s1)
s -> (s, Nothing) s -> (s, Nothing)
-------------------------------------------------- --------------------------------------------------
-- Erasure -- Erasure
-- | Erase a 'Syntax' tree annotated with type -- | Erase a 'Syntax' tree annotated with type and comment information
-- information to a bare unannotated 'Term'. -- to a bare unannotated 'Term'.
eraseS :: Syntax' ty -> Term eraseS :: Syntax' ty -> Term
eraseS (Syntax' _ t _) = void t eraseS (Syntax' _ t _ _) = void t
------------------------------------------------------------ ------------------------------------------------------------
-- Free variable traversals -- Free variable traversals
@ -1270,7 +1313,7 @@ freeVarsS :: forall ty. Traversal' (Syntax' ty) (Syntax' ty)
freeVarsS f = go S.empty freeVarsS f = go S.empty
where where
-- go :: Applicative f => Set Var -> Syntax' ty -> f (Syntax' ty) -- go :: Applicative f => Set Var -> Syntax' ty -> f (Syntax' ty)
go bound s@(Syntax' l t ty) = case t of go bound s@(Syntax' l t ty cmts) = case t of
TUnit -> pure s TUnit -> pure s
TConst {} -> pure s TConst {} -> pure s
TDir {} -> pure s TDir {} -> pure s
@ -1300,34 +1343,34 @@ freeVarsS f = go S.empty
SProj s1 x -> rewrap $ SProj <$> go bound s1 <*> pure x SProj s1 x -> rewrap $ SProj <$> go bound s1 <*> pure x
SAnnotate s1 pty -> rewrap $ SAnnotate <$> go bound s1 <*> pure pty SAnnotate s1 pty -> rewrap $ SAnnotate <$> go bound s1 <*> pure pty
where where
rewrap s' = Syntax' l <$> s' <*> pure ty rewrap s' = Syntax' l <$> s' <*> pure ty <*> pure cmts
-- | Like 'freeVarsS', but traverse over the 'Term's containing free -- | Like 'freeVarsS', but traverse over the 'Term's containing free
-- variables. More direct if you don't need to know the types or -- variables. More direct if you don't need to know the types or
-- source locations of the variables. Note that if you want to get -- source locations of the variables. Note that if you want to get
-- the list of all `Term`s representing free variables, you can do so via -- the list of all `Term`s representing free variables, you can do
-- @'toListOf' 'freeVarsT'@. -- so via @'toListOf' 'freeVarsT'@.
freeVarsT :: forall ty. Traversal' (Syntax' ty) (Term' ty) freeVarsT :: forall ty. Traversal' (Syntax' ty) (Term' ty)
freeVarsT = freeVarsS . sTerm freeVarsT = freeVarsS . sTerm
-- | Traversal over the free variables of a term. Like 'freeVarsS' -- | Traversal over the free variables of a term. Like 'freeVarsS'
-- and 'freeVarsT', but traverse over the variable names -- and 'freeVarsT', but traverse over the variable names themselves.
-- themselves. Note that if you want to get the set of all free -- Note that if you want to get the set of all free variable names,
-- variable names, you can do so via @'Data.Set.Lens.setOf' -- you can do so via @'Data.Set.Lens.setOf' 'freeVarsV'@.
-- 'freeVarsV'@.
freeVarsV :: Traversal' (Syntax' ty) Var freeVarsV :: Traversal' (Syntax' ty) Var
freeVarsV = freeVarsT . (\f -> \case TVar x -> TVar <$> f x; t -> pure t) freeVarsV = freeVarsT . (\f -> \case TVar x -> TVar <$> f x; t -> pure t)
-- | Apply a function to all free occurrences of a particular variable. -- | Apply a function to all free occurrences of a particular
-- variable.
mapFreeS :: Var -> (Syntax' ty -> Syntax' ty) -> Syntax' ty -> Syntax' ty mapFreeS :: Var -> (Syntax' ty -> Syntax' ty) -> Syntax' ty -> Syntax' ty
mapFreeS x f = freeVarsS %~ (\t -> case t ^. sTerm of TVar y | y == x -> f t; _ -> t) mapFreeS x f = freeVarsS %~ (\t -> case t ^. sTerm of TVar y | y == x -> f t; _ -> t)
-- | Transform the AST into a Tree datatype. -- | Transform the AST into a Tree datatype. Useful for
-- Useful for pretty-printing (e.g. via "Data.Tree.drawTree"). -- pretty-printing (e.g. via "Data.Tree.drawTree").
asTree :: Data a => Syntax' a -> Tree (Syntax' a) asTree :: Data a => Syntax' a -> Tree (Syntax' a)
asTree = para Node asTree = para Node
-- | Each constructor is a assigned a value of 1, plus -- | Each constructor is a assigned a value of 1, plus
-- any recursive syntax it entails. -- any recursive syntax it entails.
measureAstSize :: Data a => Syntax' a -> Int measureAstSize :: Data a => Syntax' a -> Int
measureAstSize = length . universe measureAstSize = length . universe

View File

@ -332,7 +332,7 @@ instance (HasBindings u, Data u) => HasBindings (Term' u) where
applyBindings = gmapM (mkM (applyBindings @(Syntax' u))) applyBindings = gmapM (mkM (applyBindings @(Syntax' u)))
instance (HasBindings u, Data u) => HasBindings (Syntax' u) where instance (HasBindings u, Data u) => HasBindings (Syntax' u) where
applyBindings (Syntax' l t u) = Syntax' l <$> applyBindings t <*> applyBindings u applyBindings (Syntax' l t cs u) = Syntax' l <$> applyBindings t <*> pure cs <*> applyBindings u
instance HasBindings UModule where instance HasBindings UModule where
applyBindings (Module u uctx) = Module <$> applyBindings u <*> applyBindings uctx applyBindings (Module u uctx) = Module <$> applyBindings u <*> applyBindings uctx
@ -556,7 +556,7 @@ inferModule ::
) => ) =>
Syntax -> Syntax ->
m UModule m UModule
inferModule s@(Syntax l t) = addLocToTypeErr l $ case t of inferModule s@(CSyntax l t cs) = addLocToTypeErr l $ case t of
-- For definitions with no type signature, make up a fresh type -- For definitions with no type signature, make up a fresh type
-- variable for the body, infer the body under an extended context, -- variable for the body, infer the body under an extended context,
-- and unify the two. Then generalize the type and return an -- and unify the two. Then generalize the type and return an
@ -566,7 +566,7 @@ inferModule s@(Syntax l t) = addLocToTypeErr l $ case t of
t1' <- withBinding (lvVar x) (Forall [] xTy) $ infer t1 t1' <- withBinding (lvVar x) (Forall [] xTy) $ infer t1
_ <- unify (Just t1) (joined xTy (t1' ^. sType)) _ <- unify (Just t1) (joined xTy (t1' ^. sType))
pty <- generalize (t1' ^. sType) pty <- generalize (t1' ^. sType)
return $ Module (Syntax' l (SDef r x Nothing t1') (UTyCmd UTyUnit)) (singleton (lvVar x) pty) return $ Module (Syntax' l (SDef r x Nothing t1') cs (UTyCmd UTyUnit)) (singleton (lvVar x) pty)
-- If a (poly)type signature has been provided, skolemize it and -- If a (poly)type signature has been provided, skolemize it and
-- check the definition. -- check the definition.
@ -574,7 +574,7 @@ inferModule s@(Syntax l t) = addLocToTypeErr l $ case t of
let upty = toU pty let upty = toU pty
uty <- skolemize upty uty <- skolemize upty
t1' <- withBinding (lvVar x) upty $ check t1 uty t1' <- withBinding (lvVar x) upty $ check t1 uty
return $ Module (Syntax' l (SDef r x (Just pty) t1') (UTyCmd UTyUnit)) (singleton (lvVar x) upty) return $ Module (Syntax' l (SDef r x (Just pty) t1') cs (UTyCmd UTyUnit)) (singleton (lvVar x) upty)
-- To handle a 'TBind', infer the types of both sides, combining the -- To handle a 'TBind', infer the types of both sides, combining the
-- returned modules appropriately. Have to be careful to use the -- returned modules appropriately. Have to be careful to use the
@ -624,7 +624,7 @@ inferModule s@(Syntax l t) = addLocToTypeErr l $ case t of
let ctxX = maybe Ctx.empty ((`Ctx.singleton` genA) . lvVar) mx let ctxX = maybe Ctx.empty ((`Ctx.singleton` genA) . lvVar) mx
return $ return $
Module Module
(Syntax' l (SBind mx c1' c2') (c2' ^. sType)) (Syntax' l (SBind mx c1' c2') cs (c2' ^. sType))
(ctx1 `Ctx.union` ctxX `Ctx.union` ctx2) (ctx1 `Ctx.union` ctxX `Ctx.union` ctx2)
-- In all other cases, there can no longer be any definitions in the -- In all other cases, there can no longer be any definitions in the
@ -648,31 +648,31 @@ infer ::
) => ) =>
Syntax -> Syntax ->
m (Syntax' UType) m (Syntax' UType)
infer s@(Syntax l t) = addLocToTypeErr l $ case t of infer s@(CSyntax l t cs) = addLocToTypeErr l $ case t of
-- Primitives, i.e. things for which we immediately know the only -- Primitives, i.e. things for which we immediately know the only
-- possible correct type, and knowing an expected type would provide -- possible correct type, and knowing an expected type would provide
-- no extra information. -- no extra information.
TUnit -> return $ Syntax' l TUnit UTyUnit TUnit -> return $ Syntax' l TUnit cs UTyUnit
TConst c -> Syntax' l (TConst c) <$> (instantiate . toU $ inferConst c) TConst c -> Syntax' l (TConst c) cs <$> (instantiate . toU $ inferConst c)
TDir d -> return $ Syntax' l (TDir d) UTyDir TDir d -> return $ Syntax' l (TDir d) cs UTyDir
TInt n -> return $ Syntax' l (TInt n) UTyInt TInt n -> return $ Syntax' l (TInt n) cs UTyInt
TAntiInt x -> return $ Syntax' l (TAntiInt x) UTyInt TAntiInt x -> return $ Syntax' l (TAntiInt x) cs UTyInt
TText x -> return $ Syntax' l (TText x) UTyText TText x -> return $ Syntax' l (TText x) cs UTyText
TAntiText x -> return $ Syntax' l (TAntiText x) UTyText TAntiText x -> return $ Syntax' l (TAntiText x) cs UTyText
TBool b -> return $ Syntax' l (TBool b) UTyBool TBool b -> return $ Syntax' l (TBool b) cs UTyBool
TRobot r -> return $ Syntax' l (TRobot r) UTyActor TRobot r -> return $ Syntax' l (TRobot r) cs UTyActor
TRequireDevice d -> return $ Syntax' l (TRequireDevice d) (UTyCmd UTyUnit) TRequireDevice d -> return $ Syntax' l (TRequireDevice d) cs (UTyCmd UTyUnit)
TRequire n d -> return $ Syntax' l (TRequire n d) (UTyCmd UTyUnit) TRequire n d -> return $ Syntax' l (TRequire n d) cs (UTyCmd UTyUnit)
SRequirements x t1 -> do SRequirements x t1 -> do
t1' <- infer t1 t1' <- infer t1
return $ Syntax' l (SRequirements x t1') (UTyCmd UTyUnit) return $ Syntax' l (SRequirements x t1') cs (UTyCmd UTyUnit)
-- We should never encounter a TRef since they do not show up in -- We should never encounter a TRef since they do not show up in
-- surface syntax, only as values while evaluating (*after* -- surface syntax, only as values while evaluating (*after*
-- typechecking). -- typechecking).
TRef _ -> throwTypeErr l $ CantInfer t TRef _ -> throwTypeErr l $ CantInfer t
-- Just look up variables in the context. -- Just look up variables in the context.
TVar x -> Syntax' l (TVar x) <$> lookup l x TVar x -> Syntax' l (TVar x) cs <$> lookup l x
-- It is helpful to handle lambdas in inference mode as well as -- It is helpful to handle lambdas in inference mode as well as
-- checking mode; in particular, we can handle lambdas with an -- checking mode; in particular, we can handle lambdas with an
-- explicit type annotation on the argument. Just infer the body -- explicit type annotation on the argument. Just infer the body
@ -681,7 +681,7 @@ infer s@(Syntax l t) = addLocToTypeErr l $ case t of
SLam x (Just argTy) body -> do SLam x (Just argTy) body -> do
let uargTy = toU argTy let uargTy = toU argTy
body' <- withBinding (lvVar x) (Forall [] uargTy) $ infer body body' <- withBinding (lvVar x) (Forall [] uargTy) $ infer body
return $ Syntax' l (SLam x (Just argTy) body') (UTyFun uargTy (body' ^. sType)) return $ Syntax' l (SLam x (Just argTy) body') cs (UTyFun uargTy (body' ^. sType))
-- Need special case here for applying 'atomic' or 'instant' so we -- Need special case here for applying 'atomic' or 'instant' so we
-- don't handle it with the case for generic type application. -- don't handle it with the case for generic type application.
@ -722,7 +722,7 @@ infer s@(Syntax l t) = addLocToTypeErr l $ case t of
-- unit`). -- unit`).
resTy' <- applyBindings resTy resTy' <- applyBindings resTy
return $ Syntax' l (SApp f' x') resTy' return $ Syntax' l (SApp f' x') cs resTy'
-- We handle binds in inference mode for a similar reason to -- We handle binds in inference mode for a similar reason to
-- application. -- application.
@ -735,7 +735,7 @@ infer s@(Syntax l t) = addLocToTypeErr l $ case t of
. withFrame l TCBindR . withFrame l TCBindR
$ infer c2 $ infer c2
_ <- decomposeCmdTy c2 (Actual, c2' ^. sType) _ <- decomposeCmdTy c2 (Actual, c2' ^. sType)
return $ Syntax' l (SBind mx c1' c2') (c2' ^. sType) return $ Syntax' l (SBind mx c1' c2') cs (c2' ^. sType)
-- Handle record projection in inference mode. Knowing the expected -- Handle record projection in inference mode. Knowing the expected
-- type of r.x doesn't really help since we must infer the type of r -- type of r.x doesn't really help since we must infer the type of r
@ -744,14 +744,14 @@ infer s@(Syntax l t) = addLocToTypeErr l $ case t of
t1' <- infer t1 t1' <- infer t1
case t1' ^. sType of case t1' ^. sType of
UTyRcd m -> case M.lookup x m of UTyRcd m -> case M.lookup x m of
Just xTy -> return $ Syntax' l (SProj t1' x) xTy Just xTy -> return $ Syntax' l (SProj t1' x) cs xTy
Nothing -> throwTypeErr l $ UnknownProj x (SProj t1 x) Nothing -> throwTypeErr l $ UnknownProj x (SProj t1 x)
_ -> throwTypeErr l $ CantInferProj (SProj t1 x) _ -> throwTypeErr l $ CantInferProj (SProj t1 x)
-- See Note [Checking and inference for record literals] -- See Note [Checking and inference for record literals]
SRcd m -> do SRcd m -> do
m' <- itraverse (\x -> infer . fromMaybe (STerm (TVar x))) m m' <- itraverse (\x -> infer . fromMaybe (STerm (TVar x))) m
return $ Syntax' l (SRcd (Just <$> m')) (UTyRcd (fmap (^. sType) m')) return $ Syntax' l (SRcd (Just <$> m')) cs (UTyRcd (fmap (^. sType) m'))
-- To infer a type-annotated term, switch into checking mode. -- To infer a type-annotated term, switch into checking mode.
-- However, we must be careful to deal properly with polymorphic -- However, we must be careful to deal properly with polymorphic
@ -769,7 +769,7 @@ infer s@(Syntax l t) = addLocToTypeErr l $ case t of
-- following typechecking steps. -- following typechecking steps.
iuty <- instantiate upty iuty <- instantiate upty
c' <- check c iuty c' <- check c iuty
return $ Syntax' l (SAnnotate c' pty) (c' ^. sType) return $ Syntax' l (SAnnotate c' pty) cs (c' ^. sType)
-- Fallback: to infer the type of anything else, make up a fresh unification -- Fallback: to infer the type of anything else, make up a fresh unification
-- variable for its type and check against it. -- variable for its type and check against it.
@ -905,7 +905,7 @@ check ::
Syntax -> Syntax ->
UType -> UType ->
m (Syntax' UType) m (Syntax' UType)
check s@(Syntax l t) expected = addLocToTypeErr l $ case t of check s@(CSyntax l t cs) expected = addLocToTypeErr l $ case t of
-- if t : ty, then {t} : {ty}. -- if t : ty, then {t} : {ty}.
-- Note that in theory, if the @Maybe Var@ component of the @SDelay@ -- Note that in theory, if the @Maybe Var@ component of the @SDelay@
-- is @Just@, we should typecheck the body under a context extended -- is @Just@, we should typecheck the body under a context extended
@ -918,7 +918,7 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of
SDelay d s1 -> do SDelay d s1 -> do
ty1 <- decomposeDelayTy s (Expected, expected) ty1 <- decomposeDelayTy s (Expected, expected)
s1' <- check s1 ty1 s1' <- check s1 ty1
return $ Syntax' l (SDelay d s1') (UTyDelay ty1) return $ Syntax' l (SDelay d s1') cs (UTyDelay ty1)
-- To check the type of a pair, make sure the expected type is a -- To check the type of a pair, make sure the expected type is a
-- product type, and push the two types down into the left and right. -- product type, and push the two types down into the left and right.
@ -926,7 +926,7 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of
(ty1, ty2) <- decomposeProdTy s (Expected, expected) (ty1, ty2) <- decomposeProdTy s (Expected, expected)
s1' <- check s1 ty1 s1' <- check s1 ty1
s2' <- check s2 ty2 s2' <- check s2 ty2
return $ Syntax' l (SPair s1' s2') (UTyProd ty1 ty2) return $ Syntax' l (SPair s1' s2') cs (UTyProd ty1 ty2)
-- To check a lambda, make sure the expected type is a function type. -- To check a lambda, make sure the expected type is a function type.
SLam x mxTy body -> do SLam x mxTy body -> do
@ -943,7 +943,7 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of
Right _ -> return () Right _ -> return ()
Nothing -> return () Nothing -> return ()
body' <- withBinding (lvVar x) (Forall [] argTy) $ check body resTy body' <- withBinding (lvVar x) (Forall [] argTy) $ check body resTy
return $ Syntax' l (SLam x mxTy body') (UTyFun argTy resTy) return $ Syntax' l (SLam x mxTy body') cs (UTyFun argTy resTy)
-- Special case for checking the argument to 'atomic' (or -- Special case for checking the argument to 'atomic' (or
-- 'instant'). 'atomic t' has the same type as 't', which must have -- 'instant'). 'atomic t' has the same type as 't', which must have
@ -963,7 +963,7 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of
-- guaranteed to operate within a single tick. When c is Instant -- guaranteed to operate within a single tick. When c is Instant
-- we skip this check. -- we skip this check.
when (c == Atomic) $ validAtomic at when (c == Atomic) $ validAtomic at
return $ Syntax' l (SApp atomic' at') (UTyCmd argTy) return $ Syntax' l (SApp atomic' at') cs (UTyCmd argTy)
-- Checking the type of a let-expression. -- Checking the type of a let-expression.
SLet r x mxTy t1 t2 -> do SLet r x mxTy t1 t2 -> do
(upty, t1') <- case mxTy of (upty, t1') <- case mxTy of
@ -993,7 +993,7 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of
ask @UCtx >>= mapM_ (noSkolems l) ask @UCtx >>= mapM_ (noSkolems l)
-- Return the annotated let. -- Return the annotated let.
return $ Syntax' l (SLet r x mxTy t1' t2') expected return $ Syntax' l (SLet r x mxTy t1' t2') cs expected
-- Definitions can only occur at the top level. -- Definitions can only occur at the top level.
SDef {} -> throwTypeErr l $ DefNotTopLevel t SDef {} -> throwTypeErr l $ DefNotTopLevel t
@ -1015,13 +1015,13 @@ check s@(Syntax l t) expected = addLocToTypeErr l $ case t of
throwTypeErr l $ throwTypeErr l $
FieldsMismatch (joined expectedFields actualFields) FieldsMismatch (joined expectedFields actualFields)
m' <- itraverse (\x ms -> check (fromMaybe (STerm (TVar x)) ms) (tyMap ! x)) fields m' <- itraverse (\x ms -> check (fromMaybe (STerm (TVar x)) ms) (tyMap ! x)) fields
return $ Syntax' l (SRcd (Just <$> m')) expected return $ Syntax' l (SRcd (Just <$> m')) cs expected
-- Fallback: switch into inference mode, and check that the type we -- Fallback: switch into inference mode, and check that the type we
-- get is what we expected. -- get is what we expected.
_ -> do _ -> do
Syntax' l' t' actual <- infer s Syntax' l' t' _ actual <- infer s
Syntax' l' t' <$> unify (Just s) (joined expected actual) Syntax' l' t' cs <$> unify (Just s) (joined expected actual)
-- ~~~~ Note [Checking and inference for record literals] -- ~~~~ Note [Checking and inference for record literals]
-- --

View File

@ -113,7 +113,7 @@ valueToTerm (VClo x t e) =
M.foldrWithKey M.foldrWithKey
(\y v -> TLet False y Nothing (valueToTerm v)) (\y v -> TLet False y Nothing (valueToTerm v))
(TLam x Nothing t) (TLam x Nothing t)
(M.restrictKeys (unCtx e) (S.delete x (setOf freeVarsV (Syntax' NoLoc t ())))) (M.restrictKeys (unCtx e) (S.delete x (setOf freeVarsV (Syntax' NoLoc t Nothing ()))))
valueToTerm (VCApp c vs) = foldl' TApp (TConst c) (reverse (map valueToTerm vs)) valueToTerm (VCApp c vs) = foldl' TApp (TConst c) (reverse (map valueToTerm vs))
valueToTerm (VDef r x t _) = TDef r x Nothing t valueToTerm (VDef r x t _) = TDef r x Nothing t
valueToTerm (VResult v _) = valueToTerm v valueToTerm (VResult v _) = valueToTerm v

View File

@ -231,7 +231,7 @@ recogFoundHandler appStateRef = do
codeRenderHandler :: Text -> Handler Text codeRenderHandler :: Text -> Handler Text
codeRenderHandler contents = do codeRenderHandler contents = do
return $ case processTermEither contents of return $ case processTermEither contents of
Right (ProcessedTerm (Module stx@(Syntax' _srcLoc _term _) _) _ _) -> Right (ProcessedTerm (Module stx@(Syntax' _srcLoc _term _ _) _) _ _) ->
into @Text . drawTree . fmap (T.unpack . prettyTextLine) . para Node $ stx into @Text . drawTree . fmap (T.unpack . prettyTextLine) . para Node $ stx
Left x -> x Left x -> x

View File

@ -369,6 +369,7 @@ library swarm-engine
http-types >=0.12 && <0.13, http-types >=0.12 && <0.13,
lens >=4.19 && <5.4, lens >=4.19 && <5.4,
linear >=1.21.6 && <1.24, linear >=1.21.6 && <1.24,
megaparsec >=9.6 && <9.7,
mtl >=2.2.2 && <2.4, mtl >=2.2.2 && <2.4,
nonempty-containers >=0.3.4 && <0.3.5, nonempty-containers >=0.3.4 && <0.3.5,
prettyprinter >=1.7.0 && <1.8, prettyprinter >=1.7.0 && <1.8,
@ -775,6 +776,7 @@ test-suite swarm-unit
TestLanguagePipeline TestLanguagePipeline
TestNotification TestNotification
TestOrdering TestOrdering
TestParse
TestPedagogy TestPedagogy
TestPretty TestPretty
TestRecipeCoverage TestRecipeCoverage
@ -792,6 +794,7 @@ test-suite swarm-unit
filepath, filepath,
hashable, hashable,
lens, lens,
megaparsec,
mtl, mtl,
tasty >=0.10 && <1.6, tasty >=0.10 && <1.6,
tasty-expected-failure >=0.12 && <0.13, tasty-expected-failure >=0.12 && <0.13,

View File

@ -36,6 +36,7 @@ import TestLSP (testLSP)
import TestLanguagePipeline (testLanguagePipeline) import TestLanguagePipeline (testLanguagePipeline)
import TestNotification (testNotification) import TestNotification (testNotification)
import TestOrdering (testOrdering) import TestOrdering (testOrdering)
import TestParse (testParse)
import TestPedagogy (testPedagogy) import TestPedagogy (testPedagogy)
import TestPretty (testPrettyConst) import TestPretty (testPrettyConst)
import TestRecipeCoverage (testDeviceRecipeCoverage) import TestRecipeCoverage (testDeviceRecipeCoverage)
@ -55,6 +56,7 @@ tests s =
testGroup testGroup
"Tests" "Tests"
[ testLanguagePipeline [ testLanguagePipeline
, testParse
, testPrettyConst , testPrettyConst
, testBoolExpr , testBoolExpr
, testCommands , testCommands

View File

@ -12,12 +12,12 @@ import Data.Text (Text)
import Graphics.Vty.Input.Events qualified as V import Graphics.Vty.Input.Events qualified as V
import Swarm.Game.Location import Swarm.Game.Location
import Swarm.Language.Key import Swarm.Language.Key
import Swarm.Language.Parse (runParser)
import Swarm.Language.Syntax import Swarm.Language.Syntax
import Test.QuickCheck qualified as QC import Test.QuickCheck qualified as QC
import Test.Tasty import Test.Tasty
import Test.Tasty.HUnit import Test.Tasty.HUnit
import Test.Tasty.QuickCheck (testProperty) import Test.Tasty.QuickCheck (testProperty)
import Text.Megaparsec (runParser)
import Witch import Witch
testCommands :: TestTree testCommands :: TestTree
@ -47,7 +47,7 @@ testCommands =
[ testGroup [ testGroup
"Parsing" "Parsing"
( let parseKeyTest input mods k = ( let parseKeyTest input mods k =
assertEqual "" (runParser parseKeyCombo input) (Right (mkKeyCombo mods k)) assertEqual "" (runParser parseKeyCombo "" input) (Right (mkKeyCombo mods k))
in [ testCase "parse x" $ parseKeyTest "x" [] (V.KChar 'x') in [ testCase "parse x" $ parseKeyTest "x" [] (V.KChar 'x')
, testCase "parse X" $ parseKeyTest "X" [] (V.KChar 'X') , testCase "parse X" $ parseKeyTest "X" [] (V.KChar 'X')
, testCase "parse C" $ parseKeyTest "C" [] (V.KChar 'C') , testCase "parse C" $ parseKeyTest "C" [] (V.KChar 'C')
@ -89,4 +89,4 @@ arbitraryModifiers = QC.sublistOf [V.MAlt, V.MCtrl, V.MMeta, V.MShift]
prop_parse_pretty_key :: KeyCombo -> Bool prop_parse_pretty_key :: KeyCombo -> Bool
prop_parse_pretty_key kc = prop_parse_pretty_key kc =
runParser parseKeyCombo (prettyKeyCombo kc) == Right kc runParser parseKeyCombo "" (prettyKeyCombo kc) == Right kc

View File

@ -85,7 +85,7 @@ testLSP =
getWarnings :: Text -> [UnusedVar] getWarnings :: Text -> [UnusedVar]
getWarnings content = getWarnings content =
case readTerm' content of case readTerm' content of
Right (Just term) -> map simplifyWarning problems Right (Just term, _) -> map simplifyWarning problems
where where
VU.Usage _ problems = VU.getUsage mempty term VU.Usage _ problems = VU.getUsage mempty term
_ -> [] _ -> []

66
test/unit/TestParse.hs Normal file
View File

@ -0,0 +1,66 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
-- |
-- SPDX-License-Identifier: BSD-3-Clause
--
-- Swarm parser tests.
module TestParse where
import Data.Foldable qualified as F
import Data.Text (Text)
import Swarm.Language.Parse
import Swarm.Language.Syntax
import Test.Tasty
import Test.Tasty.HUnit (Assertion, assertEqual, assertFailure, testCase)
import Text.Megaparsec (errorBundlePretty)
import Witch (into)
testParse :: TestTree
testParse =
testGroup
"Parser - comments"
[ testCase "none" $
expectParsedComments
"1 + 2"
[]
, testCase "suffix" $
expectParsedComments
"1 + 2 // add"
[Comment (SrcLoc 6 12) LineComment SuffixComment " add"]
, testCase "standalone" $
expectParsedComments
"// add\n1 + 2"
[Comment (SrcLoc 0 6) LineComment StandaloneComment " add"]
, testCase "block suffix" $
expectParsedComments
"1 + 2 /* add */"
[Comment (SrcLoc 6 15) BlockComment SuffixComment " add "]
, testCase "block standalone" $
expectParsedComments
"/* add */\n1 + 2"
[Comment (SrcLoc 0 9) BlockComment StandaloneComment " add "]
, testCase "block prefix" $
expectParsedComments
"/* add */ 1 + 2"
[Comment (SrcLoc 0 9) BlockComment StandaloneComment " add "]
, testCase "block infix" $
expectParsedComments
"1 + /*add*/ 2"
[Comment (SrcLoc 4 11) BlockComment SuffixComment "add"]
, testCase "multiline block" $
expectParsedComments
"/* add \n some numbers */\n 1 + 2"
[Comment (SrcLoc 0 25) BlockComment StandaloneComment " add \n some numbers "]
, testCase "multiple lines" $
expectParsedComments
"// add\n// some numbers\n 1 + 2"
[ Comment (SrcLoc 0 6) LineComment StandaloneComment " add"
, Comment (SrcLoc 7 22) LineComment StandaloneComment " some numbers"
]
]
expectParsedComments :: Text -> [Comment] -> Assertion
expectParsedComments input ex = case readTerm' input of
Left err -> assertFailure (into @String $ errorBundlePretty err)
Right (_, res) -> assertEqual "Expected parsed comments" ex (F.toList res)