1
1
mirror of https://github.com/github/semantic.git synced 2024-11-24 17:04:47 +03:00

Institute parser for Core.

I tried to pull the history from this patch over but I was not able to
figure out how to resolve conflicts from `git am`.
This commit is contained in:
Patrick Thomson 2019-06-03 16:09:47 -04:00
parent ac8bd4409c
commit e8ac13f098
3 changed files with 151 additions and 11 deletions

View File

@ -26,22 +26,26 @@ library
, Analysis.Typecheck
, Control.Effect.Readline
, Data.Core
, Data.Core.Parser
, Data.File
, Data.Loc
, Data.Name
, Data.Stack
-- other-modules:
-- other-extensions:
build-depends: algebraic-graphs ^>= 0.3
, base >= 4.11 && < 5
, containers ^>= 0.6
, directory ^>= 1.3
, filepath ^>= 1.4
, fused-effects ^>= 0.4
, haskeline ^>= 0.7.5
, prettyprinter ^>= 1.2.1
, semigroupoids ^>= 5.3
, transformers ^>= 0.5.6
build-depends: algebraic-graphs ^>= 0.3
, base >= 4.11 && < 5
, containers ^>= 0.6
, directory ^>= 1.3
, filepath ^>= 1.4
, fused-effects ^>= 0.4
, haskeline ^>= 0.7.5
, parsers ^>= 0.12.10
, prettyprinter ^>= 1.2.1
, semigroupoids ^>= 5.3
, transformers ^>= 0.5.6
, trifecta ^>= 2
, unordered-containers ^>= 0.2.10
hs-source-dirs: src
default-language: Haskell2010
ghc-options: -Weverything -Wno-missing-local-signatures -Wno-missing-import-lists -Wno-implicit-prelude -Wno-safe -Wno-unsafe -Wno-name-shadowing -Wno-monomorphism-restriction -Wno-missed-specialisations -Wno-all-missed-specialisations

View File

@ -0,0 +1,118 @@
{-# LANGUAGE ExplicitForAll, FlexibleInstances, LambdaCase, MultiParamTypeClasses, OverloadedLists,
ScopedTypeVariables #-}
module Data.Core.Parser
( module Text.Trifecta
, core
, lit
, expr
, lvalue
) where
-- Consult @doc/grammar.md@ for an EBNF grammar.
import Control.Applicative
import qualified Data.Char as Char
import Data.Core
import Data.Name
import Data.Semigroup
import Data.String
import qualified Text.Parser.Token as Token
import qualified Text.Parser.Token.Highlight as Highlight
import Text.Trifecta hiding (ident)
-- * Identifier styles and derived parsers
validIdentifierStart :: Char -> Bool
validIdentifierStart c = not (Char.isDigit c) && isSimpleCharacter c
coreIdents :: TokenParsing m => IdentifierStyle m
coreIdents = Token.IdentifierStyle
{ _styleName = "core"
, _styleStart = satisfy validIdentifierStart
, _styleLetter = satisfy isSimpleCharacter
, _styleReserved = reservedNames
, _styleHighlight = Highlight.Identifier
, _styleReservedHighlight = Highlight.ReservedIdentifier
}
reserved :: (TokenParsing m, Monad m) => String -> m ()
reserved = Token.reserve coreIdents
identifier :: (TokenParsing m, Monad m, IsString s) => m s
identifier = choice [quote, plain] <?> "identifier" where
plain = Token.ident coreIdents
quote = between (string "#{") (symbol "}") (fromString <$> some (noneOf "{}"))
-- * Parsers (corresponding to EBNF)
core :: (TokenParsing m, Monad m) => m Core
core = expr
expr :: (TokenParsing m, Monad m) => m Core
expr = chainl1 atom go where
go = choice [ (:.) <$ dot
, (:$) <$ notFollowedBy dot
]
atom :: (TokenParsing m, Monad m) => m Core
atom = choice
[ comp
, ifthenelse
, edge
, lit
, ident
, assign
, parens expr
]
comp :: (TokenParsing m, Monad m) => m Core
comp = braces (sconcat <$> sepEndByNonEmpty expr semi)
ifthenelse :: (TokenParsing m, Monad m) => m Core
ifthenelse = If
<$ reserved "if" <*> core
<* reserved "then" <*> core
<* reserved "else" <*> core
<?> "if-then-else statement"
assign :: (TokenParsing m, Monad m) => m Core
assign = (:=) <$> try (lvalue <* symbolic '=') <*> core
edge :: (TokenParsing m, Monad m) => m Core
edge = kw <*> expr where kw = choice [ Edge Lexical <$ reserved "lexical"
, Edge Import <$ reserved "import"
, Load <$ reserved "load"
]
lvalue :: (TokenParsing m, Monad m) => m Core
lvalue = choice
[ Let <$ reserved "let" <*> name
, ident
, parens expr
]
-- * Literals
name :: (TokenParsing m, Monad m) => m Name
name = choice [regular, strpath] <?> "name" where
regular = User <$> identifier
strpath = Path <$> between (symbolic '"') (symbolic '"') (some $ noneOf "\"")
lit :: (TokenParsing m, Monad m) => m Core
lit = let x `given` n = x <$ reserved n in choice
[ Bool True `given` "#true"
, Bool False `given` "#false"
, Unit `given` "#unit"
, Frame `given` "#frame"
, lambda
] <?> "literal"
lambda :: (TokenParsing m, Monad m) => m Core
lambda = Lam <$ lambduh <*> name <* arrow <*> core <?> "lambda" where
lambduh = symbolic 'λ' <|> symbolic '\\'
arrow = symbol "" <|> symbol "->"
ident :: (Monad m, TokenParsing m) => m Core
ident = Var <$> name <?> "identifier"

View File

@ -1,8 +1,10 @@
{-# LANGUAGE DeriveFunctor, ExistentialQuantification, FlexibleContexts, FlexibleInstances, GeneralizedNewtypeDeriving, LambdaCase, MultiParamTypeClasses, OverloadedStrings, StandaloneDeriving, TypeOperators, UndecidableInstances #-}
{-# LANGUAGE DeriveFunctor, ExistentialQuantification, FlexibleContexts, FlexibleInstances, GeneralizedNewtypeDeriving, LambdaCase, MultiParamTypeClasses, OverloadedLists, OverloadedStrings,StandaloneDeriving, TypeOperators, UndecidableInstances #-}
module Data.Name
( User
, Namespaced
, Name(..)
, reservedNames
, isSimpleCharacter
, Gensym(..)
, (//)
, gensym
@ -20,6 +22,8 @@ import Control.Effect.State
import Control.Effect.Sum
import Control.Monad.Fail
import Control.Monad.IO.Class
import qualified Data.Char as Char
import Data.HashSet (HashSet)
import Data.Text.Prettyprint.Doc (Pretty (..))
import qualified Data.Text.Prettyprint.Doc as Pretty
@ -50,6 +54,20 @@ instance Pretty Name where
User n -> pretty n
Path p -> pretty (show p)
reservedNames :: HashSet User
reservedNames = [ "#true", "#false", "let", "#frame", "if", "then", "else"
, "lexical", "import", "#unit", "load"]
-- | A simple character is, loosely defined, a character that is compatible
-- with identifiers in most ASCII-oriented programming languages. This is defined
-- as the alphanumeric set plus @$@ and @_@.
isSimpleCharacter :: Char -> Bool
isSimpleCharacter = \case
'$' -> True -- common in JS
'_' -> True
'?' -> True -- common in Ruby
c -> Char.isAlphaNum c
data Gensym
= Root String
| Gensym :/ (String, Int)