Add format --v0.5 option to port code from older syntax (#1851)

This is a followup on top of #1583 which turns `swarm format` into a tool for porting existing Swarm code into the newest syntax, via an extra `--v0.5` argument.  In particular, this PR:

- Generalizes the parser to take a configuration record, which among other things contains the language version being parsed.
- Adds code to allow the parser to parse either the current syntax or one version ago (when types did not start with capital letter) depending on the version in the configuration.
    - The idea is to have the parser always support the current version and one older version, so we can always upgrade version n to version n+1.
- Adds a new flag `--v0.5` to the `format` subcommand which causes the input to be parsed in v0.5 mode.  However, the output of `format` will always use the latest syntax.  Thus, `swarm format --v0.5` reads code in v0.5 format and prints it in the latest format, so this can be used to automatically port existing `.sw` files.

This PR also makes a few minor improvements to pretty-printing.
This commit is contained in:
Brent Yorgey 2024-05-21 19:09:31 -05:00 committed by GitHub
parent 1a4dcd82f0
commit e071252d72
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 146 additions and 53 deletions

View File

@ -12,6 +12,7 @@ import Options.Applicative
import Swarm.App (appMain)
import Swarm.Language.Format
import Swarm.Language.LSP (lspMain)
import Swarm.Language.Parser.Core (LanguageVersion (..))
import Swarm.TUI.Model (AppOpts (..), ColorMode (..))
import Swarm.TUI.Model.UI (defaultInitLgTicksPerSecond)
import Swarm.Version
@ -29,7 +30,7 @@ commitInfo = case gitInfo of
data CLI
= Run AppOpts
| Format FormatInput FormatOutput (Maybe FormatWidth)
| Format FormatConfig
| LSP
| Version
@ -37,7 +38,7 @@ cliParser :: Parser CLI
cliParser =
subparser
( mconcat
[ command "format" (info (Format <$> input <*> output <*> optional widthOpt <**> helper) (progDesc "Format a file"))
[ command "format" (info (Format <$> parseFormat) (progDesc "Format a file"))
, command "lsp" (info (pure LSP) (progDesc "Start the LSP"))
, command "version" (info (pure Version) (progDesc "Get current and upstream version."))
]
@ -69,6 +70,12 @@ cliParser =
widthOpt :: Parser FormatWidth
widthOpt = option auto (long "width" <> metavar "COLUMNS" <> help "Use layout with maximum width")
langVer :: Parser LanguageVersion
langVer = flag SwarmLangLatest SwarmLang0_5 (long "v0.5" <> help "Read (& convert) code from Swarm version 0.5")
parseFormat :: Parser FormatConfig
parseFormat = FormatConfig <$> input <*> output <*> optional widthOpt <*> langVer <**> helper
seed :: Parser (Maybe Int)
seed = optional $ option auto (long "seed" <> short 's' <> metavar "INT" <> help "Seed to use for world generation")
@ -123,6 +130,6 @@ main = do
cli <- execParser cliInfo
case cli of
Run opts -> appMain opts
Format fi fo w -> formatSwarmIO fi fo w
Format cfg -> formatSwarmIO cfg
LSP -> lspMain
Version -> showVersion

View File

@ -7,26 +7,25 @@
module Swarm.Language.Format where
import Control.Applicative ((<|>))
import Control.Lens ((&), (.~))
import Data.Text (Text)
import Data.Text qualified as T
import Data.Text.IO qualified as T
import Prettyprinter
import Prettyprinter.Render.Text qualified as RT
import Swarm.Language.Parser (readTerm)
import Swarm.Language.Parser (readTerm')
import Swarm.Language.Parser.Core (LanguageVersion, defaultParserConfig, languageVersion)
import Swarm.Language.Pretty
import Swarm.Util ((?))
import System.Console.Terminal.Size qualified as Term
import System.Exit (exitFailure)
import System.IO (stderr)
type FormatWidth = Int
import Text.Megaparsec.Error (errorBundlePretty)
import Witch (into)
-- | From where should the input be taken?
data FormatInput = Stdin | InputFile FilePath
-- | Where should the formatted code be output?
data FormatOutput = Stdout | OutputFile FilePath | Inplace
getInput :: FormatInput -> IO Text
getInput Stdin = T.getContents
getInput (InputFile fp) = T.readFile fp
@ -35,13 +34,25 @@ showInput :: FormatInput -> Text
showInput Stdin = "(input)"
showInput (InputFile fp) = T.pack fp
-- | Where should the formatted code be output?
data FormatOutput = Stdout | OutputFile FilePath | Inplace
type FormatWidth = Int
data FormatConfig = FormatConfig
{ formatInput :: FormatInput
, formatOutput :: FormatOutput
, formatWidth :: Maybe FormatWidth
, formatLanguageVersion :: LanguageVersion
}
-- | Validate and format swarm-lang code.
formatSwarmIO :: FormatInput -> FormatOutput -> Maybe FormatWidth -> IO ()
formatSwarmIO input output mWidth = do
formatSwarmIO :: FormatConfig -> IO ()
formatSwarmIO cfg@(FormatConfig input output mWidth _) = do
content <- getInput input
mWindowWidth <- (fmap . fmap) Term.width Term.size
let w = mWidth <|> case output of Stdout -> mWindowWidth; _ -> Nothing
case formatSwarm w content of
case formatSwarm cfg {formatWidth = w} content of
Right fmt -> case output of
Stdout -> T.putStrLn fmt
OutputFile outFile -> T.writeFile outFile fmt
@ -52,11 +63,13 @@ formatSwarmIO input output mWidth = do
T.hPutStrLn stderr $ showInput input <> ":" <> e
exitFailure
formatSwarm :: Maybe FormatWidth -> Text -> Either Text Text
formatSwarm mWidth content = case readTerm content of
formatSwarm :: FormatConfig -> Text -> Either Text Text
formatSwarm (FormatConfig _ _ mWidth ver) content = case readTerm' cfg content of
Right Nothing -> Right ""
Right (Just ast) ->
let mkOpt w = LayoutOptions (AvailablePerLine w 1.0)
opt = (mkOpt <$> mWidth) ? defaultLayoutOptions
in Right . RT.renderStrict . layoutPretty opt $ ppr ast
Left e -> Left e
Left e -> Left (into @Text $ errorBundlePretty e)
where
cfg = defaultParserConfig & languageVersion .~ ver

View File

@ -23,6 +23,7 @@ import Language.LSP.VFS (VirtualFile (..), virtualFileText)
import Swarm.Language.LSP.Hover qualified as H
import Swarm.Language.LSP.VarUsage qualified as VU
import Swarm.Language.Parser (readTerm')
import Swarm.Language.Parser.Core (defaultParserConfig)
import Swarm.Language.Parser.Util (getLocRange, showErrorPos)
import Swarm.Language.Pipeline (processParsedTerm')
import Swarm.Language.Pretty (prettyText)
@ -81,7 +82,7 @@ validateSwarmCode doc version content = do
-- However, getting rid of this seems to break error highlighting.
flushDiagnosticsBySource 0 (Just diagnosticSourcePrefix)
let (parsingErrs, unusedVarWarnings) = case readTerm' content of
let (parsingErrs, unusedVarWarnings) = case readTerm' defaultParserConfig content of
Right Nothing -> ([], [])
Right (Just term) -> (parsingErrors, unusedWarnings)
where

View File

@ -33,6 +33,7 @@ import Language.LSP.VFS
import Swarm.Language.Context as Ctx
import Swarm.Language.Module (Module (..))
import Swarm.Language.Parser (readTerm')
import Swarm.Language.Parser.Core (defaultParserConfig)
import Swarm.Language.Pipeline (ProcessedTerm (..), processParsedTerm)
import Swarm.Language.Pretty (prettyText, prettyTextLine)
import Swarm.Language.Syntax
@ -58,7 +59,7 @@ showHoverInfo ::
VirtualFile ->
Maybe (Text, Maybe J.Range)
showHoverInfo _ p vf@(VirtualFile _ _ myRope) =
either (const Nothing) (fmap genHoverInfo) (readTerm' content)
either (const Nothing) (fmap genHoverInfo) (readTerm' defaultParserConfig content)
where
content = virtualFileText vf
absolutePos =

View File

@ -17,7 +17,7 @@ import Data.Bifunctor (first, second)
import Data.Sequence (Seq)
import Data.Text (Text)
import Swarm.Language.Parser.Comment (populateComments)
import Swarm.Language.Parser.Core (ParserError, runParser)
import Swarm.Language.Parser.Core (ParserConfig, ParserError, defaultParserConfig, runParser')
import Swarm.Language.Parser.Lex (sc)
import Swarm.Language.Parser.Term (parseTerm)
import Swarm.Language.Parser.Util (fullyMaybe)
@ -31,12 +31,12 @@ import Witch (from)
-- 'Nothing' if the input was only whitespace) or a pretty-printed
-- parse error message.
readTerm :: Text -> Either Text (Maybe Syntax)
readTerm = first (from . errorBundlePretty) . readTerm'
readTerm = first (from . errorBundlePretty) . readTerm' defaultParserConfig
-- | A lower-level `readTerm` which returns the megaparsec bundle error
-- for precise error reporting.
readTerm' :: Text -> Either ParserError (Maybe Syntax)
readTerm' = second handleComments . runParser (fullyMaybe sc parseTerm)
-- | A lower-level `readTerm` which allow configuring the parser and
-- returns the megaparsec bundle error for precise error reporting.
readTerm' :: ParserConfig -> Text -> Either ParserError (Maybe Syntax)
readTerm' cfg = second handleComments . runParser' cfg (fullyMaybe sc parseTerm)
where
handleComments :: (Maybe Syntax, Seq Comment) -> Maybe Syntax
handleComments (s, cs) = populateComments cs <$> s

View File

@ -6,8 +6,13 @@
-- Core data type definitions and utilities for the Swarm language
-- parser.
module Swarm.Language.Parser.Core (
-- * Antiquoting
-- * Parser configuration
Antiquoting (..),
LanguageVersion (..),
ParserConfig,
defaultParserConfig,
antiquoting,
languageVersion,
-- * Comment parsing state
CommentState (..),
@ -20,6 +25,7 @@ module Swarm.Language.Parser.Core (
-- ** Running
runParser,
runParser',
runParserTH,
) where
@ -32,7 +38,8 @@ import Data.Sequence qualified as Seq
import Data.Text (Text)
import Data.Void (Void)
import Swarm.Language.Syntax (Comment)
import Text.Megaparsec hiding (runParser)
import Text.Megaparsec hiding (runParser, runParser')
import Text.Megaparsec qualified as MP
import Text.Megaparsec.State (initialPosState, initialState)
import Witch (from)
@ -47,6 +54,27 @@ import Witch (from)
data Antiquoting = AllowAntiquoting | DisallowAntiquoting
deriving (Eq, Ord, Show)
-- | Which version of the Swarm language are we parsing? As a general
-- rule, we want to support one older version in addition to the
-- current version, to allow for upgrading code via @swarm format@.
data LanguageVersion = SwarmLang0_5 | SwarmLangLatest
deriving (Eq, Ord, Show, Enum, Bounded)
-- | Read-only parser configuration.
data ParserConfig = ParserConfig
{ _antiquoting :: Antiquoting
, _languageVersion :: LanguageVersion
}
makeLenses ''ParserConfig
defaultParserConfig :: ParserConfig
defaultParserConfig =
ParserConfig
{ _antiquoting = DisallowAntiquoting
, _languageVersion = SwarmLangLatest
}
data CommentState = CS
{ _freshLine :: Bool
-- ^ Are we currently on a (so far) blank line, i.e. have there been
@ -65,7 +93,7 @@ initCommentState = CS {_freshLine = True, _comments = Seq.empty}
------------------------------------------------------------
-- Parser types
type Parser = ReaderT Antiquoting (StateT CommentState (Parsec Void Text))
type Parser = ReaderT ParserConfig (StateT CommentState (Parsec Void Text))
type ParserError = ParseErrorBundle Text Void
@ -75,11 +103,16 @@ type ParserError = ParseErrorBundle Text Void
-- | Run a parser on some input text, returning either the result +
-- all collected comments, or a parse error message.
runParser :: Parser a -> Text -> Either ParserError (a, Seq Comment)
runParser p t =
runParser = runParser' defaultParserConfig
-- | Like 'runParser', but allow configuring with an arbitrary
-- 'ParserConfig'.
runParser' :: ParserConfig -> Parser a -> Text -> Either ParserError (a, Seq Comment)
runParser' cfg p t =
(\pt -> parse pt "" t)
. fmap (second (^. comments))
. flip runStateT initCommentState
. flip runReaderT DisallowAntiquoting
. flip runReaderT cfg
$ p
-- | A utility for running a parser in an arbitrary 'MonadFail' (which
@ -89,9 +122,9 @@ runParserTH :: (Monad m, MonadFail m) => (String, Int, Int) -> Parser a -> Strin
runParserTH (file, line, col) p s =
either (fail . errorBundlePretty) (return . fst)
. snd
. flip runParser' initState
. flip MP.runParser' initState
. flip runStateT initCommentState
. flip runReaderT AllowAntiquoting
. flip runReaderT defaultParserConfig {_antiquoting = AllowAntiquoting}
$ p
where
initState :: State Text Void

View File

@ -43,7 +43,7 @@ module Swarm.Language.Parser.Lex (
brackets,
) where
import Control.Lens (use, (%=), (.=))
import Control.Lens (use, view, (%=), (.=))
import Control.Monad (void)
import Data.Char (isUpper)
import Data.Containers.ListUtils (nubOrd)
@ -172,6 +172,12 @@ reservedWords =
++ primitiveTypeNames
++ keywords
-- | Cached version of the reserved words list with everything
-- lowercase, for use in parsing version 0.5 of the language, where
-- types were lowercase instead of uppercase.
lowerReservedWords :: Set Text
lowerReservedWords = S.map T.toLower reservedWords
-- | Parse a reserved word, given a string recognizer (which can
-- /e.g./ be case sensitive or not), making sure it is not a prefix
-- of a longer variable name, and allowing the parser to backtrack
@ -193,19 +199,26 @@ data IdentifierType = IDTyVar | IDTmVar
-- | Parse an identifier together with its source location info.
locIdentifier :: IdentifierType -> Parser LocVar
locIdentifier idTy = uncurry LV <$> parseLocG ((lexeme . try) (p >>= check) <?> "variable name")
locIdentifier idTy = do
ver <- view languageVersion
uncurry LV <$> parseLocG ((lexeme . try) (p >>= check ver) <?> "variable name")
where
p = (:) <$> (letterChar <|> char '_') <*> many (alphaNumChar <|> char '_' <|> char '\'')
check (into @Text -> t)
| t `S.member` reservedWords || T.toLower t `S.member` reservedWords =
failT ["Reserved word", squote t, "cannot be used as a variable name"]
| IDTyVar <- idTy
, T.toTitle t `S.member` reservedWords =
failT ["Reserved type name", squote t, "cannot be used as a type variable name; perhaps you meant", squote (T.toTitle t) <> "?"]
| IDTyVar <- idTy
, isUpper (T.head t) =
failT ["Type variable names must start with a lowercase letter"]
| otherwise = return t
check ver (into @Text -> t) = case ver of
SwarmLang0_5
| T.toLower t `S.member` lowerReservedWords ->
failT ["reserved word", squote t, "cannot be used as variable name"]
| otherwise -> return t
SwarmLangLatest
| t `S.member` reservedWords || T.toLower t `S.member` reservedWords ->
failT ["Reserved word", squote t, "cannot be used as a variable name"]
| IDTyVar <- idTy
, T.toTitle t `S.member` reservedWords ->
failT ["Reserved type name", squote t, "cannot be used as a type variable name; perhaps you meant", squote (T.toTitle t) <> "?"]
| IDTyVar <- idTy
, isUpper (T.head t) ->
failT ["Type variable names must start with a lowercase letter"]
| otherwise -> return t
-- | Parse a term variable together with its source location info.
locTmVar :: Parser LocVar

View File

@ -9,7 +9,6 @@ module Swarm.Language.Parser.Term where
import Control.Lens (view, (^.))
import Control.Monad (guard)
import Control.Monad.Combinators.Expr
import Control.Monad.Reader (ask)
import Data.Foldable (asum)
import Data.List (foldl')
import Data.Map (Map)
@ -98,7 +97,7 @@ parseTermAtom2 =
<|> parseLoc (TDelay SimpleDelay (TConst Noop) <$ try (symbol "{" *> symbol "}"))
<|> parseLoc (SDelay SimpleDelay <$> braces parseTerm)
<|> parseLoc (ask >>= (guard . (== AllowAntiquoting)) >> parseAntiquotation)
<|> parseLoc (view antiquoting >>= (guard . (== AllowAntiquoting)) >> parseAntiquotation)
-- | Construct an 'SLet', automatically filling in the Boolean field
-- indicating whether it is recursive.

View File

@ -9,11 +9,12 @@ module Swarm.Language.Parser.Type (
parseType,
) where
import Control.Lens (view)
import Control.Monad (join)
import Control.Monad.Combinators.Expr (Operator (..), makeExprParser)
import Data.Maybe (fromMaybe)
import Data.Set qualified as S
import Swarm.Language.Parser.Core (Parser)
import Swarm.Language.Parser.Core (LanguageVersion (..), Parser, languageVersion)
import Swarm.Language.Parser.Lex (
braces,
brackets,
@ -68,9 +69,19 @@ parseType = makeExprParser parseTypeAtom table
parseTypeAtom :: Parser Type
parseTypeAtom =
choice (map (\b -> TyBase b <$ reservedCS (baseTyName b)) listEnums)
<|> TyCmd <$> (reservedCS "Cmd" *> parseTypeAtom)
parseTyCon
<|> TyVar <$> tyVar
<|> TyDelay <$> braces parseType
<|> TyRcd <$> brackets (parseRecord (symbol ":" *> parseType))
<|> parens parseType
parseTyCon :: Parser Type
parseTyCon = do
ver <- view languageVersion
let reservedCase = case ver of
-- Version 0.5 of the language accepted type names in any case
SwarmLang0_5 -> reserved
-- The latest version requires them to be uppercase
SwarmLangLatest -> reservedCS
choice (map (\b -> TyBase b <$ reservedCase (baseTyName b)) listEnums)
<|> TyCmd <$> (reservedCase "Cmd" *> parseTypeAtom)

View File

@ -229,6 +229,7 @@ instance PrettyPrec (Term' ty) where
prettyPrec p (TRequire n e) = pparens (p > 10) $ "require" <+> pretty n <+> ppr @Term (TText e)
prettyPrec p (SRequirements _ e) = pparens (p > 10) $ "requirements" <+> ppr e
prettyPrec _ (TVar s) = pretty s
prettyPrec _ (SDelay _ (Syntax' _ (TConst Noop) _ _)) = "{}"
prettyPrec _ (SDelay _ t) = group . encloseWithIndent 2 lbrace rbrace $ ppr t
prettyPrec _ t@SPair {} = prettyTuple t
prettyPrec p t@(SLam {}) =
@ -266,6 +267,11 @@ instance PrettyPrec (Term' ty) where
[ prettyDefinition "def" x mty t1
, "end"
]
-- Special case for printing consecutive defs: don't worry about
-- precedence, and print a blank line with no semicolon
prettyPrec _ (SBind Nothing t1@(Syntax' _ (SDef {}) _ _) t2) =
prettyPrec 0 t1 <> hardline <> hardline <> prettyPrec 0 t2
-- General case for bind
prettyPrec p (SBind Nothing t1 t2) =
pparens (p > 0) $
prettyPrec 1 t1 <> ";" <> line <> prettyPrec 0 t2

View File

@ -60,8 +60,10 @@ import Data.List.NonEmpty (NonEmpty (..))
import Data.List.NonEmpty qualified as NE
import Data.Map qualified as M
import Data.Maybe (fromMaybe, isJust, isNothing, mapMaybe)
import Data.Set (Set)
import Data.Set qualified as S
import Data.String (fromString)
import Data.Text (Text)
import Data.Text qualified as T
import Data.Text.IO qualified as T
import Data.Time (getZonedTime)
@ -1212,6 +1214,15 @@ data CompletionType
newtype CompletionContext = CompletionContext {ctxCreativeMode :: Bool}
deriving (Eq)
-- | Reserved words corresponding to commands that can only be used in
-- creative mode. We only autocomplete to these when in creative mode.
creativeWords :: Set Text
creativeWords =
S.fromList
. map (syntax . constInfo)
. filter (\w -> constCaps w == Just CGod)
$ allConst
-- | Try to complete the last word in a partially-entered REPL prompt using
-- reserved words and names in scope (in the case of function names) or
-- entity names (in the case of string literals).
@ -1261,12 +1272,10 @@ tabComplete CompletionContext {..} names em theRepl = case theRepl ^. replPrompt
EntityName -> (entityNames, (/= '"'))
FunctionName -> (possibleWords, isIdentChar)
creativeWords = map (syntax . constInfo) $ filter (\w -> constCaps w == Just CGod) allConst
possibleWords =
names <> case ctxCreativeMode of
True -> S.toList reservedWords
False -> filter (`notElem` creativeWords) (S.toList reservedWords)
False -> S.toList $ reservedWords `S.difference` creativeWords
entityNames = M.keys $ entitiesByName em

View File

@ -9,7 +9,7 @@ module TestLSP (testLSP) where
import Data.Text (Text)
import Data.Text.IO qualified as TIO
import Swarm.Language.LSP.VarUsage qualified as VU
import Swarm.Language.Parser (readTerm')
import Swarm.Language.Parser (readTerm)
import Swarm.Language.Syntax qualified as S
import System.FilePath ((</>))
import Test.Tasty
@ -84,7 +84,7 @@ testLSP =
getWarnings :: Text -> [UnusedVar]
getWarnings content =
case readTerm' content of
case readTerm content of
Right (Just term) -> map simplifyWarning problems
where
VU.Usage _ problems = VU.getUsage mempty term

View File

@ -211,7 +211,7 @@ testLanguagePipeline =
, testGroup
"json encoding"
[ testCase "simple expr" (roundTripTerm "42 + 43")
, testCase "module def" (roundTripTerm "def x = 41 end;\ndef y = 42 end")
, testCase "module def" (roundTripTerm "def x = 41 end\n\ndef y = 42 end")
]
, testGroup
"atomic - #479"