diff --git a/semantic-core/semantic-core.cabal b/semantic-core/semantic-core.cabal index 5427b6251..a8955c373 100644 --- a/semantic-core/semantic-core.cabal +++ b/semantic-core/semantic-core.cabal @@ -26,22 +26,26 @@ library , Analysis.Typecheck , Control.Effect.Readline , Data.Core + , Data.Core.Parser , Data.File , Data.Loc , Data.Name , Data.Stack -- other-modules: -- other-extensions: - build-depends: algebraic-graphs ^>= 0.3 - , base >= 4.11 && < 5 - , containers ^>= 0.6 - , directory ^>= 1.3 - , filepath ^>= 1.4 - , fused-effects ^>= 0.4 - , haskeline ^>= 0.7.5 - , prettyprinter ^>= 1.2.1 - , semigroupoids ^>= 5.3 - , transformers ^>= 0.5.6 + build-depends: algebraic-graphs ^>= 0.3 + , base >= 4.11 && < 5 + , containers ^>= 0.6 + , directory ^>= 1.3 + , filepath ^>= 1.4 + , fused-effects ^>= 0.4 + , haskeline ^>= 0.7.5 + , parsers ^>= 0.12.10 + , prettyprinter ^>= 1.2.1 + , semigroupoids ^>= 5.3 + , transformers ^>= 0.5.6 + , trifecta ^>= 2 + , unordered-containers ^>= 0.2.10 hs-source-dirs: src default-language: Haskell2010 ghc-options: -Weverything -Wno-missing-local-signatures -Wno-missing-import-lists -Wno-implicit-prelude -Wno-safe -Wno-unsafe -Wno-name-shadowing -Wno-monomorphism-restriction -Wno-missed-specialisations -Wno-all-missed-specialisations diff --git a/semantic-core/src/Data/Core/Parser.hs b/semantic-core/src/Data/Core/Parser.hs new file mode 100644 index 000000000..a00342766 --- /dev/null +++ b/semantic-core/src/Data/Core/Parser.hs @@ -0,0 +1,118 @@ +{-# LANGUAGE ExplicitForAll, FlexibleInstances, LambdaCase, MultiParamTypeClasses, OverloadedLists, + ScopedTypeVariables #-} + +module Data.Core.Parser + ( module Text.Trifecta + , core + , lit + , expr + , lvalue + ) where + +-- Consult @doc/grammar.md@ for an EBNF grammar. + +import Control.Applicative +import qualified Data.Char as Char +import Data.Core +import Data.Name +import Data.Semigroup +import Data.String +import qualified Text.Parser.Token as Token +import qualified Text.Parser.Token.Highlight as Highlight +import Text.Trifecta hiding (ident) + +-- * Identifier styles and derived parsers + +validIdentifierStart :: Char -> Bool +validIdentifierStart c = not (Char.isDigit c) && isSimpleCharacter c + +coreIdents :: TokenParsing m => IdentifierStyle m +coreIdents = Token.IdentifierStyle + { _styleName = "core" + , _styleStart = satisfy validIdentifierStart + , _styleLetter = satisfy isSimpleCharacter + , _styleReserved = reservedNames + , _styleHighlight = Highlight.Identifier + , _styleReservedHighlight = Highlight.ReservedIdentifier + } + +reserved :: (TokenParsing m, Monad m) => String -> m () +reserved = Token.reserve coreIdents + +identifier :: (TokenParsing m, Monad m, IsString s) => m s +identifier = choice [quote, plain] "identifier" where + plain = Token.ident coreIdents + quote = between (string "#{") (symbol "}") (fromString <$> some (noneOf "{}")) + +-- * Parsers (corresponding to EBNF) + +core :: (TokenParsing m, Monad m) => m Core +core = expr + +expr :: (TokenParsing m, Monad m) => m Core +expr = chainl1 atom go where + go = choice [ (:.) <$ dot + , (:$) <$ notFollowedBy dot + ] + +atom :: (TokenParsing m, Monad m) => m Core +atom = choice + [ comp + , ifthenelse + , edge + , lit + , ident + , assign + , parens expr + ] + +comp :: (TokenParsing m, Monad m) => m Core +comp = braces (sconcat <$> sepEndByNonEmpty expr semi) + +ifthenelse :: (TokenParsing m, Monad m) => m Core +ifthenelse = If + <$ reserved "if" <*> core + <* reserved "then" <*> core + <* reserved "else" <*> core + "if-then-else statement" + +assign :: (TokenParsing m, Monad m) => m Core +assign = (:=) <$> try (lvalue <* symbolic '=') <*> core + +edge :: (TokenParsing m, Monad m) => m Core +edge = kw <*> expr where kw = choice [ Edge Lexical <$ reserved "lexical" + , Edge Import <$ reserved "import" + , Load <$ reserved "load" + ] + +lvalue :: (TokenParsing m, Monad m) => m Core +lvalue = choice + [ Let <$ reserved "let" <*> name + , ident + , parens expr + ] + +-- * Literals + +name :: (TokenParsing m, Monad m) => m Name +name = choice [regular, strpath] "name" where + regular = User <$> identifier + strpath = Path <$> between (symbolic '"') (symbolic '"') (some $ noneOf "\"") + +lit :: (TokenParsing m, Monad m) => m Core +lit = let x `given` n = x <$ reserved n in choice + [ Bool True `given` "#true" + , Bool False `given` "#false" + , Unit `given` "#unit" + , Frame `given` "#frame" + , lambda + ] "literal" + +lambda :: (TokenParsing m, Monad m) => m Core +lambda = Lam <$ lambduh <*> name <* arrow <*> core "lambda" where + lambduh = symbolic 'λ' <|> symbolic '\\' + arrow = symbol "→" <|> symbol "->" + +ident :: (Monad m, TokenParsing m) => m Core +ident = Var <$> name "identifier" + diff --git a/semantic-core/src/Data/Name.hs b/semantic-core/src/Data/Name.hs index 9ae8cf17d..0fc4ee02c 100644 --- a/semantic-core/src/Data/Name.hs +++ b/semantic-core/src/Data/Name.hs @@ -1,8 +1,10 @@ -{-# LANGUAGE DeriveFunctor, ExistentialQuantification, FlexibleContexts, FlexibleInstances, GeneralizedNewtypeDeriving, LambdaCase, MultiParamTypeClasses, OverloadedStrings, StandaloneDeriving, TypeOperators, UndecidableInstances #-} +{-# LANGUAGE DeriveFunctor, ExistentialQuantification, FlexibleContexts, FlexibleInstances, GeneralizedNewtypeDeriving, LambdaCase, MultiParamTypeClasses, OverloadedLists, OverloadedStrings,StandaloneDeriving, TypeOperators, UndecidableInstances #-} module Data.Name ( User , Namespaced , Name(..) +, reservedNames +, isSimpleCharacter , Gensym(..) , (//) , gensym @@ -20,6 +22,8 @@ import Control.Effect.State import Control.Effect.Sum import Control.Monad.Fail import Control.Monad.IO.Class +import qualified Data.Char as Char +import Data.HashSet (HashSet) import Data.Text.Prettyprint.Doc (Pretty (..)) import qualified Data.Text.Prettyprint.Doc as Pretty @@ -50,6 +54,20 @@ instance Pretty Name where User n -> pretty n Path p -> pretty (show p) +reservedNames :: HashSet User +reservedNames = [ "#true", "#false", "let", "#frame", "if", "then", "else" + , "lexical", "import", "#unit", "load"] + +-- | A ‘simple’ character is, loosely defined, a character that is compatible +-- with identifiers in most ASCII-oriented programming languages. This is defined +-- as the alphanumeric set plus @$@ and @_@. +isSimpleCharacter :: Char -> Bool +isSimpleCharacter = \case + '$' -> True -- common in JS + '_' -> True + '?' -> True -- common in Ruby + c -> Char.isAlphaNum c + data Gensym = Root String | Gensym :/ (String, Int)