1
1
mirror of https://github.com/github/semantic.git synced 2024-12-27 17:05:33 +03:00

Merge pull request #1148 from github/abstract-assignment-over-the-input-term-type

Abstract assignment over the input term type
This commit is contained in:
Rob Rix 2017-06-08 11:38:39 -04:00 committed by GitHub
commit e4cb8f9329
11 changed files with 534 additions and 258 deletions

View File

@ -36,6 +36,7 @@ library
, Data.Syntax.Declaration
, Data.Syntax.Expression
, Data.Syntax.Literal
, Data.Syntax.Markup
, Data.Syntax.Statement
, Data.Syntax.Type
, Data.Text.Listable
@ -45,6 +46,7 @@ library
, Language
, Language.C
, Language.Markdown
, Language.Markdown.Syntax
, Language.Go
, Language.Go.Syntax
, Language.Ruby

View File

@ -1,4 +1,4 @@
{-# LANGUAGE DataKinds, GADTs, InstanceSigs, MultiParamTypeClasses, ScopedTypeVariables, StandaloneDeriving, TypeFamilies #-}
{-# LANGUAGE DataKinds, GADTs, InstanceSigs, MultiParamTypeClasses, RankNTypes, ScopedTypeVariables, StandaloneDeriving, TypeFamilies, TypeOperators #-}
-- | Assignment of AST onto some other structure (typically terms).
--
-- Parsing yields an AST represented as a Rose tree labelled with symbols in the languages grammar and source locations (byte Range and SourceSpan). An Assignment represents a (partial) map from AST nodes onto some other structure; in essence, its a parser that operates over trees. (For our purposes, this structure is typically Terms annotated with source locations.) Assignments are able to match based on symbol, sequence, and hierarchy; thus, in @x = y@, both @x@ and @y@ might have the same symbol, @Identifier@, the left can be assigned to a variable declaration, while the right can be assigned to a variable reference.
@ -62,20 +62,19 @@
module Data.Syntax.Assignment
( Assignment
, Location
, AST
, location
, Data.Syntax.Assignment.project
, symbol
, source
, children
, Rose(..)
, RoseF(..)
, Node
, AST
, Result(..)
, Error(..)
, ErrorCause(..)
, showError
, showExpectation
, assign
, assignBy
, runAssignment
, AssignmentState(..)
, makeState
@ -84,7 +83,7 @@ module Data.Syntax.Assignment
import Control.Monad.Free.Freer
import Data.ByteString (isSuffixOf)
import Data.Functor.Classes
import Data.Functor.Foldable hiding (Nil)
import Data.Functor.Foldable as F hiding (Nil)
import qualified Data.IntMap.Lazy as IntMap
import Data.Ix (inRange)
import Data.List.NonEmpty (nonEmpty)
@ -102,75 +101,74 @@ import Text.Show hiding (show)
-- | Assignment from an AST with some set of 'symbol's onto some other value.
--
-- This is essentially a parser.
type Assignment node = Freer (AssignmentF node)
type Assignment ast grammar = Freer (AssignmentF ast grammar)
data AssignmentF node a where
Location :: HasCallStack => AssignmentF node Location
Source :: HasCallStack => AssignmentF symbol ByteString
Children :: HasCallStack => Assignment symbol a -> AssignmentF symbol a
Choose :: HasCallStack => IntMap.IntMap a -> AssignmentF node a
Alt :: HasCallStack => a -> a -> AssignmentF symbol a
Empty :: HasCallStack => AssignmentF symbol a
Throw :: HasCallStack => Error symbol -> AssignmentF (Node symbol) a
Catch :: HasCallStack => a -> (Error symbol -> a) -> AssignmentF (Node symbol) a
data AssignmentF ast grammar a where
Location :: HasCallStack => AssignmentF ast grammar (Record Location)
Project :: HasCallStack => (forall x. Base ast x -> a) -> AssignmentF ast grammar a
Source :: HasCallStack => AssignmentF ast grammar ByteString
Children :: HasCallStack => Assignment ast grammar a -> AssignmentF ast grammar a
Choose :: HasCallStack => IntMap.IntMap a -> AssignmentF ast grammar a
Alt :: HasCallStack => a -> a -> AssignmentF ast grammar a
Empty :: HasCallStack => AssignmentF ast grammar a
Throw :: HasCallStack => Error grammar -> AssignmentF ast grammar a
Catch :: HasCallStack => a -> (Error grammar -> a) -> AssignmentF ast grammar a
-- | Zero-width production of the current location.
--
-- If assigning at the end of input or at the end of a list of children, the loccation will be returned as an empty Range and SourceSpan at the current offset. Otherwise, it will be the Range and SourceSpan of the current node.
location :: HasCallStack => Assignment (Node grammar) Location
location :: HasCallStack => Assignment ast grammar (Record Location)
location = Location `Then` return
-- | Zero-width projection of the current node.
--
-- Since this is zero-width, care must be taken not to repeat it without chaining on other rules. I.e. 'many (project f *> b)' is fine, but 'many (project f)' is not.
project :: HasCallStack => (forall x. Base ast x -> a) -> Assignment ast grammar a
project projection = Project projection `Then` return
-- | Zero-width match of a node with the given symbol, producing the current nodes location.
--
-- Since this is zero-width, care must be taken not to repeat it without chaining on other rules. I.e. 'many (symbol A *> b)' is fine, but 'many (symbol A)' is not.
symbol :: (Enum symbol, Eq symbol, HasCallStack) => symbol -> Assignment (Node symbol) Location
symbol :: (Enum grammar, Eq grammar, HasCallStack) => grammar -> Assignment ast grammar (Record Location)
symbol s = withFrozenCallStack $ Choose (IntMap.singleton (fromEnum s) ()) `Then` (const location)
-- | A rule to produce a nodes source as a ByteString.
source :: HasCallStack => Assignment symbol ByteString
source :: HasCallStack => Assignment ast grammar ByteString
source = withFrozenCallStack $ Source `Then` return
-- | Match a node by applying an assignment to its children.
children :: HasCallStack => Assignment symbol a -> Assignment symbol a
children :: HasCallStack => Assignment ast grammar a -> Assignment ast grammar a
children forEach = withFrozenCallStack $ Children forEach `Then` return
-- | A rose tree.
data Rose a = Rose { roseValue :: !a, roseChildren :: ![Rose a] }
deriving (Eq, Functor, Show)
-- | A location specified as possibly-empty intervals of bytes and line/column positions.
type Location = Record '[Info.Range, Info.SourceSpan]
-- | The label annotating a node in the AST, specified as the pairing of its symbol and location information.
type Node grammar = Record '[Maybe grammar, Info.Range, Info.SourceSpan]
-- | An abstract syntax tree in some 'grammar', with symbols and location information annotating each node.
type AST grammar = Rose (Node grammar)
type Location = '[Info.Range, Info.SourceSpan]
-- | An AST node labelled with symbols and source location.
type AST grammar = Cofree [] (Record (Maybe grammar ': Location))
-- | The result of assignment, possibly containing an error.
data Result symbol a = Result { resultError :: Maybe (Error symbol), resultValue :: Maybe a }
data Result grammar a = Result { resultError :: Maybe (Error grammar), resultValue :: Maybe a }
deriving (Eq, Foldable, Functor, Traversable)
data Error symbol where
data Error grammar where
Error
:: HasCallStack
=> { errorPos :: Info.SourcePos
, errorCause :: ErrorCause symbol
} -> Error symbol
, errorCause :: ErrorCause grammar
} -> Error grammar
deriving instance Eq symbol => Eq (Error symbol)
deriving instance Show symbol => Show (Error symbol)
deriving instance Eq grammar => Eq (Error grammar)
deriving instance Show grammar => Show (Error grammar)
data ErrorCause symbol
= UnexpectedSymbol [symbol] symbol
| UnexpectedEndOfInput [symbol]
| ParseError [symbol]
data ErrorCause grammar
= UnexpectedSymbol [grammar] grammar
| UnexpectedEndOfInput [grammar]
| ParseError [grammar]
deriving (Eq, Show)
-- | Pretty-print an Error with reference to the source where it occurred.
showError :: Show symbol => Source.Source -> Error symbol -> String
showError :: Show grammar => Source.Source -> Error grammar -> String
showError source error@Error{..}
= withSGRCode [SetConsoleIntensity BoldIntensity] (showSourcePos Nothing errorPos) . showString ": " . withSGRCode [SetColor Foreground Vivid Red] (showString "error") . showString ": " . showExpectation error . showChar '\n'
. showString (toS context) . (if isSuffixOf "\n" context then identity else showChar '\n')
@ -183,14 +181,14 @@ showError source error@Error{..}
showSGRCode = showString . setSGRCode
withSGRCode code s = showSGRCode code . s . showSGRCode []
showExpectation :: Show symbol => Error symbol -> ShowS
showExpectation :: Show grammar => Error grammar -> ShowS
showExpectation Error{..} = case errorCause of
UnexpectedEndOfInput [] -> showString "no rule to match at end of input nodes"
UnexpectedEndOfInput symbols -> showString "expected " . showSymbols symbols . showString " at end of input nodes"
UnexpectedSymbol symbols a -> showString "expected " . showSymbols symbols . showString ", but got " . shows a
ParseError symbols -> showString "expected " . showSymbols symbols . showString ", but got parse error"
showSymbols :: Show symbol => [symbol] -> ShowS
showSymbols :: Show grammar => [grammar] -> ShowS
showSymbols [] = showString "end of input nodes"
showSymbols [symbol] = shows symbol
showSymbols [a, b] = shows a . showString " or " . shows b
@ -201,82 +199,87 @@ showSourcePos :: Maybe FilePath -> Info.SourcePos -> ShowS
showSourcePos path Info.SourcePos{..} = maybe (showParen True (showString "interactive")) showString path . showChar ':' . shows line . showChar ':' . shows column
-- | Run an assignment over an AST exhaustively.
assign :: (Symbol grammar, Enum grammar, Eq grammar, HasCallStack) => Assignment (Node grammar) a -> Source.Source -> AST grammar -> Result grammar a
assign assignment source = fmap snd . assignAllFrom assignment . makeState source . pure
assign :: (HasField fields Info.Range, HasField fields Info.SourceSpan, HasField fields (Maybe grammar), Symbol grammar, Enum grammar, Eq grammar, Traversable f, HasCallStack) => Assignment (Cofree f (Record fields)) grammar a -> Source.Source -> Cofree f (Record fields) -> Result grammar a
assign = assignBy (\ (r :< _) -> getField r :. getField r :. getField r :. Nil)
assignAllFrom :: (Symbol grammar, Enum grammar, Eq grammar, HasCallStack) => Assignment (Node grammar) a -> AssignmentState grammar -> Result grammar (AssignmentState grammar, a)
assignAllFrom assignment state = case runAssignment assignment state of
Result err (Just (state, a)) -> case stateNodes (dropAnonymous state) of
[] -> Result Nothing (Just (state, a))
Rose (Just s :. _) _ :_ -> Result (err <|> Just (Error (statePos state) (UnexpectedSymbol [] s))) Nothing
Rose (Nothing :. _) _ :_ -> Result (err <|> Just (Error (statePos state) (ParseError []))) Nothing
assignBy :: (Symbol grammar, Enum grammar, Eq grammar, Recursive ast, Foldable (Base ast), HasCallStack) => (forall x. Base ast x -> Record (Maybe grammar ': Location)) -> Assignment ast grammar a -> Source.Source -> ast -> Result grammar a
assignBy toRecord assignment source = fmap fst . assignAllFrom toRecord assignment . makeState source . pure
assignAllFrom :: (Symbol grammar, Enum grammar, Eq grammar, Recursive ast, Foldable (Base ast), HasCallStack) => (forall x. Base ast x -> Record (Maybe grammar ': Location)) -> Assignment ast grammar a -> AssignmentState ast -> Result grammar (a, AssignmentState ast)
assignAllFrom toRecord assignment state = case runAssignment toRecord assignment state of
Result err (Just (a, state)) -> case stateNodes (dropAnonymous (rhead . toRecord) state) of
[] -> pure (a, state)
node : _ -> Result (err <|> Just (Error (statePos state) (maybe (ParseError []) (UnexpectedSymbol []) (rhead (toRecord (F.project node)))))) Nothing
r -> r
-- | Run an assignment of nodes in a grammar onto terms in a syntax.
runAssignment :: forall grammar a. (Symbol grammar, Enum grammar, Eq grammar, HasCallStack) => Assignment (Node grammar) a -> AssignmentState grammar -> Result grammar (AssignmentState grammar, a)
runAssignment = iterFreer run . fmap (\ a state -> pure (state, a))
where run :: AssignmentF (Node grammar) x -> (x -> AssignmentState grammar -> Result grammar (AssignmentState grammar, a)) -> AssignmentState grammar -> Result grammar (AssignmentState grammar, a)
runAssignment :: forall grammar a ast. (Symbol grammar, Enum grammar, Eq grammar, Recursive ast, Foldable (Base ast), HasCallStack) => (forall x. Base ast x -> Record (Maybe grammar ': Location)) -> Assignment ast grammar a -> AssignmentState ast -> Result grammar (a, AssignmentState ast)
runAssignment toRecord = iterFreer run . fmap ((pure .) . (,))
where run :: AssignmentF ast grammar x -> (x -> AssignmentState ast -> Result grammar (a, AssignmentState ast)) -> AssignmentState ast -> Result grammar (a, AssignmentState ast)
run assignment yield initialState = case (assignment, stateNodes) of
(Location, Rose (_ :. location) _ : _) -> yield location state
(Location, node : _) -> yield (rtail (toRecord (F.project node))) state
(Location, []) -> yield (Info.Range stateOffset stateOffset :. Info.SourceSpan statePos statePos :. Nil) state
(Source, Rose (_ :. range :. _) _ : _) -> yield (Source.sourceText (Source.slice (offsetRange range (negate stateOffset)) stateSource)) (advanceState state)
(Children childAssignment, Rose _ children : _) -> case assignAllFrom childAssignment state { stateNodes = children } of
Result _ (Just (state', a)) -> yield a (advanceState state' { stateNodes = stateNodes })
(Project projection, node : _) -> yield (projection (F.project node)) state
(Source, node : _) -> yield (Source.sourceText (Source.slice (offsetRange (Info.byteRange (toRecord (F.project node))) (negate stateOffset)) stateSource)) (advanceState (rtail . toRecord) state)
(Children childAssignment, node : _) -> case assignAllFrom toRecord childAssignment state { stateNodes = toList (F.project node) } of
Result _ (Just (a, state')) -> yield a (advanceState (rtail . toRecord) state' { stateNodes = stateNodes })
Result err Nothing -> Result err Nothing
(Choose choices, Rose (Just symbol :. _) _ : _) | Just a <- IntMap.lookup (fromEnum symbol) choices -> yield a state
(Choose choices, node : _) | Just symbol :. _ <- toRecord (F.project node), Just a <- IntMap.lookup (fromEnum symbol) choices -> yield a state
-- Nullability: some rules, e.g. 'pure a' and 'many a', should match at the end of input. Either side of an alternation may be nullable, ergo Alt can match at the end of input.
(Alt a b, _) -> yield a state <|> yield b state
(Throw e, _) -> Result (Just e) Nothing
(Catch during handler, _) -> case yield during state of
Result _ (Just (state', a)) -> Result Nothing (Just (state', a))
Result err Nothing -> maybe (Result Nothing Nothing) (flip yield state . handler) err
Result _ (Just (a, state')) -> pure (a, state')
Result err Nothing -> maybe empty (flip yield state . handler) err
(_, []) -> Result (Just (Error statePos (UnexpectedEndOfInput expectedSymbols))) Nothing
(_, Rose (symbol :. _ :. nodeSpan :. Nil) _:_) -> Result (Just (maybe (Error (Info.spanStart nodeSpan) (ParseError expectedSymbols)) (Error (Info.spanStart nodeSpan) . UnexpectedSymbol expectedSymbols) symbol)) Nothing
(_, node:_) -> let Info.SourceSpan startPos _ = Info.sourceSpan (toRecord (F.project node)) in Result (Error startPos . UnexpectedSymbol expectedSymbols <$> rhead (toRecord (F.project node)) <|> Just (Error startPos (ParseError expectedSymbols))) Nothing
where state@AssignmentState{..} = case assignment of
Choose choices | all ((== Regular) . symbolType) (choiceSymbols choices) -> dropAnonymous initialState
Choose choices | all ((== Regular) . symbolType) (choiceSymbols choices) -> dropAnonymous (rhead . toRecord) initialState
_ -> initialState
expectedSymbols = case assignment of
Choose choices -> choiceSymbols choices
_ -> []
choiceSymbols choices = ((toEnum :: Int -> grammar) <$> IntMap.keys choices)
choiceSymbols choices = (toEnum :: Int -> grammar) <$> IntMap.keys choices
dropAnonymous :: Symbol grammar => AssignmentState grammar -> AssignmentState grammar
dropAnonymous state = state { stateNodes = dropWhile ((`notElem` [Just Regular, Nothing]) . fmap symbolType . rhead . roseValue) (stateNodes state) }
dropAnonymous :: (Symbol grammar, Recursive ast) => (forall x. Base ast x -> Maybe grammar) -> AssignmentState ast -> AssignmentState ast
dropAnonymous toSymbol state = state { stateNodes = dropWhile ((`notElem` [Just Regular, Nothing]) . fmap symbolType . toSymbol . F.project) (stateNodes state) }
-- | Advances the state past the current (head) node (if any), dropping it off stateNodes & its corresponding bytes off of stateSource, and updating stateOffset & statePos to its end. Exhausted 'AssignmentState's (those without any remaining nodes) are returned unchanged.
advanceState :: AssignmentState grammar -> AssignmentState grammar
advanceState state@AssignmentState{..}
| Rose (_ :. range :. span :. _) _ : rest <- stateNodes = AssignmentState (Info.end range) (Info.spanEnd span) (Source.drop (Info.end range - stateOffset) stateSource) rest
advanceState :: Recursive ast => (forall x. Base ast x -> Record Location) -> AssignmentState ast -> AssignmentState ast
advanceState toLocation state@AssignmentState{..}
| node : rest <- stateNodes
, range :. span :. Nil <- toLocation (F.project node) = AssignmentState (Info.end range) (Info.spanEnd span) (Source.drop (Info.end range - stateOffset) stateSource) rest
| otherwise = state
-- | State kept while running 'Assignment's.
data AssignmentState grammar = AssignmentState
data AssignmentState ast = AssignmentState
{ stateOffset :: Int -- ^ The offset into the Source thus far reached, measured in bytes.
, statePos :: Info.SourcePos -- ^ The (1-indexed) line/column position in the Source thus far reached.
, stateSource :: Source.Source -- ^ The remaining Source. Equal to dropping 'stateOffset' bytes off the original input Source.
, stateNodes :: [AST grammar] -- ^ The remaining nodes to assign. Note that 'children' rules recur into subterms, and thus this does not necessarily reflect all of the terms remaining to be assigned in the overall algorithm, only those “in scope.”
, stateNodes :: [ast] -- ^ The remaining nodes to assign. Note that 'children' rules recur into subterms, and thus this does not necessarily reflect all of the terms remaining to be assigned in the overall algorithm, only those “in scope.”
}
deriving (Eq, Show)
makeState :: Source.Source -> [AST grammar] -> AssignmentState grammar
makeState :: Source.Source -> [ast] -> AssignmentState ast
makeState source nodes = AssignmentState 0 (Info.SourcePos 1 1) source nodes
-- Instances
instance Enum symbol => Alternative (Assignment (Node symbol)) where
empty :: HasCallStack => Assignment (Node symbol) a
instance Enum grammar => Alternative (Assignment ast grammar) where
empty :: HasCallStack => Assignment ast grammar a
empty = Empty `Then` return
(<|>) :: HasCallStack => Assignment (Node symbol) a -> Assignment (Node symbol) a -> Assignment (Node symbol) a
(<|>) :: HasCallStack => Assignment ast grammar a -> Assignment ast grammar a -> Assignment ast grammar a
a <|> b = case (a, b) of
(_, Empty `Then` _) -> a
(Empty `Then` _, _) -> b
(Choose choices1 `Then` continue1, Choose choices2 `Then` continue2) -> Choose (IntMap.union (fmap continue1 choices1) (fmap continue2 choices2)) `Then` identity
_ -> wrap $ Alt a b
instance Show symbol => Show1 (AssignmentF (Node symbol)) where
instance Show grammar => Show1 (AssignmentF ast grammar) where
liftShowsPrec sp sl d a = case a of
Location -> showString "Location" . sp d (Info.Range 0 0 :. Info.SourceSpan (Info.SourcePos 0 0) (Info.SourcePos 0 0) :. Nil)
Project projection -> showsUnaryWith (const (const (showChar '_'))) "Project" d projection
Source -> showString "Source" . showChar ' ' . sp d ""
Children a -> showsUnaryWith (liftShowsPrec sp sl) "Children" d a
Choose choices -> showsUnaryWith (liftShowsPrec (liftShowsPrec sp sl) (liftShowList sp sl)) "Choose" d (IntMap.toList choices)
@ -285,18 +288,10 @@ instance Show symbol => Show1 (AssignmentF (Node symbol)) where
Throw e -> showsUnaryWith showsPrec "Throw" d e
Catch during handler -> showsBinaryWith sp (const (const (showChar '_'))) "Catch" d during handler
type instance Base (Rose a) = RoseF a
data RoseF a f = RoseF a [f]
deriving (Eq, Foldable, Functor, Show, Traversable)
instance Recursive (Rose a) where project (Rose a as) = RoseF a as
instance Corecursive (Rose a) where embed (RoseF a as) = Rose a as
instance Show2 Result where
liftShowsPrec2 sp1 sl1 sp2 sl2 d (Result es a) = showsBinaryWith (liftShowsPrec (liftShowsPrec sp1 sl1) (liftShowList sp1 sl1)) (liftShowsPrec sp2 sl2) "Result" d es a
instance (Show symbol, Show a) => Show (Result symbol a) where
instance (Show grammar, Show a) => Show (Result grammar a) where
showsPrec = showsPrec2
instance Show1 Error where
@ -308,18 +303,18 @@ instance Show1 ErrorCause where
UnexpectedEndOfInput expected -> showsUnaryWith (liftShowsPrec sp sl) "UnexpectedEndOfInput" d expected
ParseError expected -> showsUnaryWith (liftShowsPrec sp sl) "ParseError" d expected
instance Applicative (Result symbol) where
instance Applicative (Result grammar) where
pure = Result Nothing . Just
Result e1 f <*> Result e2 a = Result (e1 <|> e2) (f <*> a)
instance Alternative (Result symbol) where
instance Alternative (Result grammar) where
empty = Result Nothing Nothing
Result e (Just a) <|> _ = Result e (Just a)
Result e1 Nothing <|> Result e2 b = Result (e1 <|> e2) b
instance MonadError (Error symbol) (Assignment (Node symbol)) where
throwError :: HasCallStack => Error symbol -> Assignment (Node symbol) a
instance MonadError (Error grammar) (Assignment ast grammar) where
throwError :: HasCallStack => Error grammar -> Assignment ast grammar a
throwError error = withFrozenCallStack $ Throw error `Then` return
catchError :: HasCallStack => Assignment (Node symbol) a -> (Error symbol -> Assignment (Node symbol) a) -> Assignment (Node symbol) a
catchError :: HasCallStack => Assignment ast grammar a -> (Error grammar -> Assignment ast grammar a) -> Assignment ast grammar a
catchError during handler = withFrozenCallStack $ Catch during handler `Then` identity

105
src/Data/Syntax/Markup.hs Normal file
View File

@ -0,0 +1,105 @@
{-# LANGUAGE DeriveAnyClass #-}
module Data.Syntax.Markup where
import Data.Align.Generic
import Data.Functor.Classes.Eq.Generic
import Data.Functor.Classes.Show.Generic
import GHC.Generics
import Prologue hiding (Text)
newtype Document a = Document [a]
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Eq1 Document where liftEq = genericLiftEq
instance Show1 Document where liftShowsPrec = genericLiftShowsPrec
-- Block elements
newtype Paragraph a = Paragraph [a]
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Eq1 Paragraph where liftEq = genericLiftEq
instance Show1 Paragraph where liftShowsPrec = genericLiftShowsPrec
data Heading a = Heading { headingLevel :: Int, headingContent :: [a] }
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Eq1 Heading where liftEq = genericLiftEq
instance Show1 Heading where liftShowsPrec = genericLiftShowsPrec
newtype UnorderedList a = UnorderedList [a]
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Eq1 UnorderedList where liftEq = genericLiftEq
instance Show1 UnorderedList where liftShowsPrec = genericLiftShowsPrec
newtype OrderedList a = OrderedList [a]
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Eq1 OrderedList where liftEq = genericLiftEq
instance Show1 OrderedList where liftShowsPrec = genericLiftShowsPrec
newtype BlockQuote a = BlockQuote [a]
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Eq1 BlockQuote where liftEq = genericLiftEq
instance Show1 BlockQuote where liftShowsPrec = genericLiftShowsPrec
data ThematicBreak a = ThematicBreak
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Eq1 ThematicBreak where liftEq = genericLiftEq
instance Show1 ThematicBreak where liftShowsPrec = genericLiftShowsPrec
data HTMLBlock a = HTMLBlock ByteString
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Eq1 HTMLBlock where liftEq = genericLiftEq
instance Show1 HTMLBlock where liftShowsPrec = genericLiftShowsPrec
-- Inline elements
newtype Strong a = Strong [a]
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Eq1 Strong where liftEq = genericLiftEq
instance Show1 Strong where liftShowsPrec = genericLiftShowsPrec
newtype Emphasis a = Emphasis [a]
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Eq1 Emphasis where liftEq = genericLiftEq
instance Show1 Emphasis where liftShowsPrec = genericLiftShowsPrec
newtype Text a = Text ByteString
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Eq1 Text where liftEq = genericLiftEq
instance Show1 Text where liftShowsPrec = genericLiftShowsPrec
data Link a = Link { linkURL :: ByteString, linkTitle :: Maybe ByteString }
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Eq1 Link where liftEq = genericLiftEq
instance Show1 Link where liftShowsPrec = genericLiftShowsPrec
data Image a = Image { imageURL :: ByteString, imageTitle :: Maybe ByteString }
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Eq1 Image where liftEq = genericLiftEq
instance Show1 Image where liftShowsPrec = genericLiftShowsPrec
data Code a = Code { codeLanguage :: Maybe ByteString, codeContent :: ByteString }
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Eq1 Code where liftEq = genericLiftEq
instance Show1 Code where liftShowsPrec = genericLiftShowsPrec
data LineBreak a = LineBreak
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Eq1 LineBreak where liftEq = genericLiftEq
instance Show1 LineBreak where liftShowsPrec = genericLiftShowsPrec

View File

@ -1,40 +1,73 @@
{-# LANGUAGE DataKinds #-}
module Language.Markdown where
{-# LANGUAGE DataKinds, TypeOperators #-}
module Language.Markdown
( Grammar(..)
, cmarkParser
, toGrammar
) where
import CMark
import Data.Record
import Data.Text
import Data.Syntax.Assignment (Location)
import Info
import Prologue
import Prologue hiding (Location)
import Source
import Syntax
import Text.Parser.TreeSitter.Language (Symbol(..), SymbolType(..))
cmarkParser :: Source -> IO (Cofree (Syntax Text) (Record DefaultFields))
cmarkParser source = pure . toTerm (totalRange source) (rangeToSourceSpan source $ totalRange source) $ commonmarkToNode [ optSourcePos, optSafe ] (toText source)
where toTerm :: Range -> SourceSpan -> Node -> Cofree (Syntax Text) (Record DefaultFields)
data Grammar
= Document
| ThematicBreak
| Paragraph
| BlockQuote
| HTMLBlock
| CustomBlock
| CodeBlock
| Heading
| List
| Item
| Text
| SoftBreak
| LineBreak
| HTMLInline
| CustomInline
| Code
| Emphasis
| Strong
| Link
| Image
deriving (Bounded, Enum, Eq, Ord, Show)
cmarkParser :: Source -> IO (Cofree [] (Record (NodeType ': Location)))
cmarkParser source = pure . toTerm (totalRange source) (totalSpan source) $ commonmarkToNode [ optSourcePos, optSafe ] (toText source)
where toTerm :: Range -> SourceSpan -> Node -> Cofree [] (Record (NodeType ': Location))
toTerm within withinSpan (Node position t children) =
let
range = maybe within (sourceSpanToRange source . toSpan) position
span = maybe withinSpan toSpan position
in
cofree $ (range :. toCategory t :. span :. Nil) :< case t of
-- Leaves
CODE text -> Leaf text
TEXT text -> Leaf text
CODE_BLOCK _ text -> Leaf text
-- Branches
_ -> Indexed (toTerm range span <$> children)
let range = maybe within (sourceSpanToRange source . toSpan) position
span = maybe withinSpan toSpan position
in cofree $ (t :. range :. span :. Nil) :< (toTerm range span <$> children)
toCategory :: NodeType -> Category
toCategory (TEXT _) = Other "text"
toCategory (CODE _) = Other "code"
toCategory (HTML_BLOCK _) = Other "html"
toCategory (HTML_INLINE _) = Other "html"
toCategory (HEADING _) = Other "heading"
toCategory (LIST ListAttributes{..}) = Other $ case listType of
BULLET_LIST -> "unordered list"
ORDERED_LIST -> "ordered list"
toCategory LINK{} = Other "link"
toCategory IMAGE{} = Other "image"
toCategory t = Other (show t)
toSpan PosInfo{..} = SourceSpan (SourcePos (pred startLine) (pred startColumn)) (SourcePos (pred endLine) endColumn)
toSpan PosInfo{..} = SourceSpan (SourcePos startLine startColumn) (SourcePos endLine (succ endColumn))
toGrammar :: NodeType -> Grammar
toGrammar DOCUMENT{} = Document
toGrammar THEMATIC_BREAK{} = ThematicBreak
toGrammar PARAGRAPH{} = Paragraph
toGrammar BLOCK_QUOTE{} = BlockQuote
toGrammar HTML_BLOCK{} = HTMLBlock
toGrammar CUSTOM_BLOCK{} = CustomBlock
toGrammar CODE_BLOCK{} = CodeBlock
toGrammar HEADING{} = Heading
toGrammar LIST{} = List
toGrammar ITEM{} = Item
toGrammar TEXT{} = Text
toGrammar SOFTBREAK{} = SoftBreak
toGrammar LINEBREAK{} = LineBreak
toGrammar HTML_INLINE{} = HTMLInline
toGrammar CUSTOM_INLINE{} = CustomInline
toGrammar CODE{} = Code
toGrammar EMPH{} = Emphasis
toGrammar STRONG{} = Strong
toGrammar LINK{} = Link
toGrammar IMAGE{} = Image
instance Symbol Grammar where
symbolType _ = Regular

View File

@ -0,0 +1,123 @@
{-# LANGUAGE DataKinds, DeriveAnyClass, GADTs, RankNTypes, TypeOperators #-}
module Language.Markdown.Syntax
( assignment
, Syntax
, Grammar
, Error
, Term
) where
import qualified CMark
import Data.Functor.Union
import Data.Record
import Data.Syntax.Assignment hiding (Assignment, Error)
import qualified Data.Syntax.Assignment as Assignment
import qualified Data.Syntax.Markup as Markup
import qualified Data.Syntax as Syntax
import qualified Data.Text as Text
import GHC.Stack
import Language.Markdown as Grammar (Grammar(..))
import Prologue hiding (Location, link, list)
import qualified Term
type Syntax =
'[ Markup.Document
-- Block elements
, Markup.BlockQuote
, Markup.Heading
, Markup.HTMLBlock
, Markup.OrderedList
, Markup.Paragraph
, Markup.ThematicBreak
, Markup.UnorderedList
-- Inline elements
, Markup.Code
, Markup.Emphasis
, Markup.Image
, Markup.LineBreak
, Markup.Link
, Markup.Strong
, Markup.Text
-- Assignment errors; cmark does not provide parse errors.
, Syntax.Error Error
, []
]
type Error = Assignment.Error Grammar
type Term = Term.Term (Union Syntax) (Record Location)
type Assignment = HasCallStack => Assignment.Assignment (Cofree [] (Record (CMark.NodeType ': Location))) Grammar Term
assignment :: Assignment
assignment = makeTerm <$> symbol Document <*> children (Markup.Document <$> many blockElement)
-- Block elements
blockElement :: Assignment
blockElement = paragraph <|> list <|> heading <|> blockQuote <|> codeBlock <|> thematicBreak <|> htmlBlock
paragraph :: Assignment
paragraph = makeTerm <$> symbol Paragraph <*> children (Markup.Paragraph <$> many inlineElement)
list :: Assignment
list = (cofree .) . (:<) <$> symbol List <*> (project (\ (((CMark.LIST CMark.ListAttributes{..}) :. _) :< _) -> case listType of
CMark.BULLET_LIST -> inj . Markup.UnorderedList
CMark.ORDERED_LIST -> inj . Markup.OrderedList) <*> children (many item))
item :: Assignment
item = makeTerm <$> symbol Item <*> children (many blockElement)
heading :: Assignment
heading = makeTerm <$> symbol Heading <*> (Markup.Heading <$> project (\ ((CMark.HEADING level :. _) :< _) -> level) <*> children (many inlineElement))
blockQuote :: Assignment
blockQuote = makeTerm <$> symbol BlockQuote <*> children (Markup.BlockQuote <$> many blockElement)
codeBlock :: Assignment
codeBlock = makeTerm <$> symbol CodeBlock <*> (Markup.Code <$> project (\ (((CMark.CODE_BLOCK language _) :. _) :< _) -> nullText language) <*> source)
thematicBreak :: Assignment
thematicBreak = makeTerm <$> symbol ThematicBreak <*> (Markup.ThematicBreak <$ source)
htmlBlock :: Assignment
htmlBlock = makeTerm <$> symbol HTMLBlock <*> (Markup.HTMLBlock <$> source)
-- Inline elements
inlineElement :: Assignment
inlineElement = strong <|> emphasis <|> text <|> link <|> image <|> code <|> lineBreak <|> softBreak
strong :: Assignment
strong = makeTerm <$> symbol Strong <*> children (Markup.Strong <$> many inlineElement)
emphasis :: Assignment
emphasis = makeTerm <$> symbol Emphasis <*> children (Markup.Emphasis <$> many inlineElement)
text :: Assignment
text = makeTerm <$> symbol Text <*> (Markup.Text <$> source)
link :: Assignment
link = makeTerm <$> symbol Link <*> (uncurry Markup.Link <$> project (\ (((CMark.LINK url title) :. _) :< _) -> (toS url, nullText title))) <* source
image :: Assignment
image = makeTerm <$> symbol Image <*> (uncurry Markup.Image <$> project (\ (((CMark.IMAGE url title) :. _) :< _) -> (toS url, nullText title))) <* source
code :: Assignment
code = makeTerm <$> symbol Code <*> (Markup.Code Nothing <$> source)
lineBreak :: Assignment
lineBreak = makeTerm <$> symbol LineBreak <*> (Markup.LineBreak <$ source)
softBreak :: Assignment
softBreak = makeTerm <$> symbol SoftBreak <*> (Markup.LineBreak <$ source)
-- Implementation details
makeTerm :: (InUnion fs f, HasCallStack) => a -> f (Term.Term (Union fs) a) -> Term.Term (Union fs) a
makeTerm a f = cofree $ a :< inj f
nullText :: Text.Text -> Maybe ByteString
nullText text = if Text.null text then Nothing else Just (toS text)

View File

@ -1,18 +1,19 @@
{-# LANGUAGE DataKinds, DeriveAnyClass, GeneralizedNewtypeDeriving, TypeOperators #-}
{-# LANGUAGE DataKinds, DeriveAnyClass, RankNTypes, TypeOperators #-}
module Language.Python.Syntax
( assignment
, Syntax
, Syntax'
, Grammar
, Error
, Term
) where
import Data.Align.Generic
import Data.Functor.Classes.Eq.Generic
import Data.Functor.Classes.Show.Generic
import Data.Functor.Union
import Data.Record
import qualified Data.Syntax as Syntax
import Data.Syntax.Assignment hiding (Error)
import Data.Syntax.Assignment hiding (Assignment, Error)
import qualified Data.Syntax.Assignment as Assignment
import qualified Data.Syntax.Comment as Comment
import qualified Data.Syntax.Declaration as Declaration
@ -23,10 +24,9 @@ import GHC.Generics
import GHC.Stack
import Language.Python.Grammar as Grammar
import Prologue hiding (Location)
import Term
import qualified Term
type Syntax = Union Syntax'
type Syntax' =
type Syntax =
'[ Comment.Comment
, Declaration.Comprehension
, Declaration.Function
@ -64,6 +64,8 @@ type Syntax' =
]
type Error = Assignment.Error Grammar
type Term = Term.Term (Union Syntax) (Record Location)
type Assignment = HasCallStack => Assignment.Assignment (AST Grammar) Grammar Term
-- | Ellipsis (used in splice expressions and alternatively can be used as a fill in expression, like `undefined` in Haskell)
data Ellipsis a = Ellipsis
@ -80,13 +82,13 @@ instance Eq1 Redirect where liftEq = genericLiftEq
instance Show1 Redirect where liftShowsPrec = genericLiftShowsPrec
-- | Assignment from AST in Python's grammar onto a program in Python's syntax.
assignment :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
assignment :: Assignment
assignment = makeTerm <$> symbol Module <*> children (many declaration)
declaration :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
declaration :: Assignment
declaration = handleError $ comment <|> statement <|> expression
statement :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
statement :: Assignment
statement = assertStatement
<|> assignment'
<|> augmentedAssignment
@ -99,10 +101,10 @@ statement = assertStatement
<|> printStatement
<|> returnStatement
expressionStatement :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
expressionStatement :: Assignment
expressionStatement = symbol ExpressionStatement *> children expression
expression :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
expression :: Assignment
expression = await
<|> binaryOperator
<|> booleanOperator
@ -122,13 +124,13 @@ expression = await
<|> tuple
<|> unaryOperator
dottedName :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
dottedName :: Assignment
dottedName = makeTerm <$> symbol DottedName <*> children (Expression.ScopeResolution <$> many expression)
ellipsis :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
ellipsis :: Assignment
ellipsis = makeTerm <$> symbol Grammar.Ellipsis <*> (Language.Python.Syntax.Ellipsis <$ source)
comparisonOperator :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
comparisonOperator :: Assignment
comparisonOperator = symbol ComparisonOperator >>= \ loc -> children (expression >>= \ lexpression -> makeComparison loc lexpression)
where
makeComparison loc lexpression = makeTerm loc <$ symbol AnonLAngle <*> (Expression.LessThan lexpression <$> expression)
@ -143,26 +145,26 @@ comparisonOperator = symbol ComparisonOperator >>= \ loc -> children (expression
<|> symbol AnonIs *> source *> (symbol AnonNot *> (makeTerm loc <$> Expression.Not <$> (makeTerm <$> location <*> (Expression.Equal lexpression <$> expression)))
<|> (makeTerm loc <$> Expression.Equal lexpression <$> expression))
notOperator :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
notOperator :: Assignment
notOperator = makeTerm <$> symbol NotOperator <*> children (Expression.Not <$> expression)
keywordIdentifier :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
keywordIdentifier :: Assignment
keywordIdentifier = makeTerm <$> symbol KeywordIdentifier <*> children (Syntax.Identifier <$> source)
tuple :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
tuple :: Assignment
tuple = makeTerm <$> symbol Tuple <*> children (Literal.Tuple <$> (many expression))
-- TODO: Consider flattening single element lists
expressionList :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
expressionList :: Assignment
expressionList = makeTerm <$> symbol ExpressionList <*> children (many expression)
unaryOperator :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
unaryOperator :: Assignment
unaryOperator = symbol UnaryOperator >>= \ location -> arithmetic location <|> bitwise location <|> children ( symbol AnonPlus *> expression )
where
arithmetic location = makeTerm location . Expression.Negate <$> children ( symbol AnonMinus *> expression )
bitwise location = makeTerm location . Expression.Complement <$> children ( symbol AnonTilde *> expression )
binaryOperator :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
binaryOperator :: Assignment
binaryOperator = symbol BinaryOperator >>= \ location -> children (expression >>= \ lexpression ->
makeTerm location <$> arithmetic lexpression
<|> makeTerm location <$> bitwise lexpression)
@ -180,17 +182,17 @@ binaryOperator = symbol BinaryOperator >>= \ location -> children (expression >>
<|> symbol AnonLAngleLAngle *> (Expression.LShift lexpression <$> expression)
<|> symbol AnonRAngleRAngle *> (Expression.RShift lexpression <$> expression)
booleanOperator :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
booleanOperator :: Assignment
booleanOperator = makeTerm <$> symbol BooleanOperator <*> children ( expression >>= booleanOperator' )
where
booleanOperator' lexpression = symbol AnonAnd *> (Expression.And lexpression <$> expression)
<|> symbol AnonOr *> (Expression.Or lexpression <$> expression)
assignment' :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
assignment' :: Assignment
assignment' =
makeTerm <$> symbol Assignment <*> children (Statement.Assignment <$> expressionList <*> rvalue)
augmentedAssignment :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
augmentedAssignment :: Assignment
augmentedAssignment = makeTerm <$> symbol AugmentedAssignment <*> children (expressionList >>= \ lvalue -> Statement.Assignment lvalue <$>
(makeTerm <$> symbol AnonPlusEqual <*> (Expression.Plus lvalue <$> rvalue)
<|> makeTerm <$> symbol AnonMinusEqual <*> (Expression.Minus lvalue <$> rvalue)
@ -205,56 +207,56 @@ augmentedAssignment = makeTerm <$> symbol AugmentedAssignment <*> children (expr
<|> makeTerm <$> symbol AnonLAngleLAngleEqual <*> (Expression.LShift lvalue <$> rvalue)
<|> makeTerm <$> symbol AnonCaretEqual <*> (Expression.BXOr lvalue <$> rvalue)))
yield :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
yield :: Assignment
yield = makeTerm <$> symbol Yield <*> (Statement.Yield <$> children ( expression <|> expressionList <|> emptyTerm ))
rvalue :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
rvalue :: Assignment
rvalue = expressionList <|> assignment' <|> augmentedAssignment <|> yield
identifier :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
identifier :: Assignment
identifier = makeTerm <$> symbol Identifier <*> (Syntax.Identifier <$> source)
literal :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
literal :: Assignment
literal = string <|> integer <|> float <|> boolean <|> none <|> concatenatedString <|> list' <|> dictionary <|> set
set :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
set :: Assignment
set = makeTerm <$> symbol Set <*> children (Literal.Set <$> many expression)
dictionary :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
dictionary :: Assignment
dictionary = makeTerm <$> symbol Dictionary <*> children (Literal.Hash <$> many pairs)
where pairs = makeTerm <$> symbol Pair <*> children (Literal.KeyValue <$> expression <*> expression)
list' :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
list' :: Assignment
list' = makeTerm <$> symbol List <*> children (Literal.Array <$> many expression)
-- TODO: Wrap `Literal.TextElement` with a `Literal.String`
string :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
string :: Assignment
string = makeTerm <$> symbol String <*> (Literal.TextElement <$> source)
concatenatedString :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
concatenatedString :: Assignment
concatenatedString = makeTerm <$> symbol ConcatenatedString <*> children (Literal.TextElement . mconcat <$> many (symbol String *> source))
float :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
float :: Assignment
float = makeTerm <$> symbol Float <*> (Literal.Float <$> source)
integer :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
integer :: Assignment
integer = makeTerm <$> symbol Integer <*> (Literal.Integer <$> source)
comment :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
comment :: Assignment
comment = makeTerm <$> symbol Comment <*> (Comment.Comment <$> source)
-- TODO Possibly match against children for dotted name and identifiers
import' :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
import' :: Assignment
import' = makeTerm <$> symbol ImportStatement <*> children (Declaration.Import <$> many expression)
-- TODO Possibly match against children nodes
importFrom :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
importFrom :: Assignment
importFrom = makeTerm <$> symbol ImportFromStatement <*> children (Declaration.Import <$> many expression)
assertStatement :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
assertStatement :: Assignment
assertStatement = makeTerm <$ symbol AssertStatement <*> location <*> children (Expression.Call <$> (makeTerm <$> symbol AnonAssert <*> (Syntax.Identifier <$> source)) <*> many expression)
printStatement :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
printStatement :: Assignment
printStatement = do
location <- symbol PrintStatement
children $ do
@ -265,47 +267,47 @@ printStatement = do
redirectCallTerm location keyword = makeTerm location <$ symbol Chevron <*> (flip Redirect <$> children expression <*> printCallTerm location keyword)
printCallTerm location keyword = makeTerm location . Expression.Call keyword <$> many expression
globalStatement :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
globalStatement :: Assignment
globalStatement = makeTerm <$> symbol GlobalStatement <*> children (Expression.Call <$> (makeTerm <$> symbol AnonGlobal <*> (Syntax.Identifier <$> source)) <*> many identifier)
await :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
await :: Assignment
await = makeTerm <$> symbol Await <*> children (Expression.Call <$> (makeTerm <$> symbol AnonAwait <*> (Syntax.Identifier <$> source)) <*> many expression)
returnStatement :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
returnStatement :: Assignment
returnStatement = makeTerm <$> symbol ReturnStatement <*> (Statement.Return <$> children expressionList)
ifStatement :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
ifStatement :: Assignment
ifStatement = makeTerm <$> symbol IfStatement <*> children (Statement.If <$> expression <*> statement <*> (flip (foldr makeElif) <$> many elifClause <*> optionalElse))
where elseClause = symbol ElseClause *> children statement
elifClause = (,) <$ symbol ElifClause <*> location <*> children (Statement.If <$> expression <*> statement)
optionalElse = fromMaybe <$> emptyTerm <*> optional elseClause
makeElif (loc, makeIf) rest = makeTerm loc (makeIf rest)
memberAccess :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
memberAccess :: Assignment
memberAccess = makeTerm <$> symbol Attribute <*> children (Expression.MemberAccess <$> expression <*> expression)
subscript :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
subscript :: Assignment
subscript = makeTerm <$> symbol Subscript <*> children (Expression.Subscript <$> expression <*> many expression)
call :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
call :: Assignment
call = makeTerm <$> symbol Call <*> children (Expression.Call <$> identifier <*> (symbol ArgumentList *> children (many expression)
<|> some comprehension))
boolean :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
boolean :: Assignment
boolean = makeTerm <$> symbol Grammar.True <*> (Literal.true <$ source)
<|> makeTerm <$> symbol Grammar.False <*> (Literal.false <$ source)
none :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
none :: Assignment
none = makeTerm <$> symbol None <*> (Literal.Null <$ source)
lambda :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
lambda :: Assignment
lambda = makeTerm <$> symbol Lambda <*> children (Declaration.Function <$> lambdaIdentifier <*> lambdaParameters <*> lambdaBody)
where lambdaIdentifier = makeTerm <$> symbol AnonLambda <*> (Syntax.Identifier <$> source)
lambdaParameters = many identifier
lambdaBody = expression
comprehension :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
comprehension :: Assignment
comprehension = makeTerm <$> symbol GeneratorExpression <*> children (comprehensionDeclaration expression)
<|> makeTerm <$> symbol ListComprehension <*> children (comprehensionDeclaration expression)
<|> makeTerm <$> symbol SetComprehension <*> children (comprehensionDeclaration expression)
@ -316,16 +318,16 @@ comprehension = makeTerm <$> symbol GeneratorExpression <*> children (comprehen
makeComprehension (loc, makeRest) rest = makeTerm loc (makeRest rest)
nestedComprehension = (,) <$> location <*> (Declaration.Comprehension <$> expression <* symbol Variables <*> children (many expression))
conditionalExpression :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
conditionalExpression :: Assignment
conditionalExpression = makeTerm <$> symbol ConditionalExpression <*> children (expression >>= \ thenBranch -> expression >>= \ conditional -> Statement.If conditional thenBranch <$> (expression <|> emptyTerm))
makeTerm :: HasCallStack => InUnion Syntax' f => a -> f (Term Syntax a) -> Term Syntax a
makeTerm :: (HasCallStack, InUnion fs f) => a -> f (Term.Term (Union fs) a) -> Term.Term (Union fs) a
makeTerm a f = cofree (a :< inj f)
emptyTerm :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
emptyTerm :: Assignment
emptyTerm = makeTerm <$> location <*> pure Syntax.Empty
handleError :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location) -> Assignment (Node Grammar) (Term Syntax Location)
handleError :: Assignment -> Assignment
handleError = flip catchError $ \ error -> case errorCause error of
UnexpectedEndOfInput _ -> throwError error
_ -> makeTerm <$> location <*> (Syntax.Error error <$ source)

View File

@ -1,15 +1,16 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DataKinds, RankNTypes #-}
module Language.Ruby.Syntax
( assignment
, Syntax
, Syntax'
, Grammar
, Error
, Term
) where
import Data.Functor.Union
import Data.Record
import qualified Data.Syntax as Syntax
import Data.Syntax.Assignment hiding (Error)
import Data.Syntax.Assignment hiding (Assignment, Error)
import qualified Data.Syntax.Assignment as Assignment
import qualified Data.Syntax.Comment as Comment
import qualified Data.Syntax.Declaration as Declaration
@ -19,11 +20,10 @@ import qualified Data.Syntax.Statement as Statement
import GHC.Stack
import Language.Ruby.Grammar as Grammar
import Prologue hiding (for, get, Location, state, unless)
import Term
import qualified Term
-- | The type of Ruby syntax.
type Syntax = Union Syntax'
type Syntax' =
type Syntax =
'[Comment.Comment
, Declaration.Class
, Declaration.Method
@ -54,33 +54,35 @@ type Syntax' =
]
type Error = Assignment.Error Grammar
type Term = Term.Term (Union Syntax) (Record Location)
type Assignment = HasCallStack => Assignment.Assignment (AST Grammar) Grammar Term
-- | Assignment from AST in Rubys grammar onto a program in Rubys syntax.
assignment :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
assignment :: Assignment
assignment = makeTerm <$> symbol Program <*> children (many declaration)
declaration :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
declaration :: Assignment
declaration = handleError $ comment <|> class' <|> method
class' :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
class' :: Assignment
class' = makeTerm <$> symbol Class <*> children (Declaration.Class <$> (constant <|> scopeResolution) <*> (superclass <|> pure []) <*> many declaration)
where superclass = pure <$ symbol Superclass <*> children constant
scopeResolution = symbol ScopeResolution *> children (constant <|> identifier)
constant :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
constant :: Assignment
constant = makeTerm <$> symbol Constant <*> (Syntax.Identifier <$> source)
identifier :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
identifier :: Assignment
identifier = makeTerm <$> symbol Identifier <*> (Syntax.Identifier <$> source)
method :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
method :: Assignment
method = makeTerm <$> symbol Method <*> children (Declaration.Method <$> identifier <*> pure [] <*> statements)
statements :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
statements :: Assignment
statements = makeTerm <$> location <*> many statement
statement :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
statement :: Assignment
statement = handleError
$ exit Statement.Return Return
<|> exit Statement.Yield Yield
@ -95,36 +97,36 @@ statement = handleError
<|> assignment'
where exit construct sym = makeTerm <$> symbol sym <*> children ((construct .) . fromMaybe <$> emptyTerm <*> optional (symbol ArgumentList *> children statement))
lvalue :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
lvalue :: Assignment
lvalue = identifier
expression :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
expression :: Assignment
expression = identifier <|> statement
comment :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
comment :: Assignment
comment = makeTerm <$> symbol Comment <*> (Comment.Comment <$> source)
if' :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
if' :: Assignment
if' = ifElsif If
<|> makeTerm <$> symbol IfModifier <*> children (flip Statement.If <$> statement <*> statement <*> (makeTerm <$> location <*> pure Syntax.Empty))
where ifElsif s = makeTerm <$> symbol s <*> children (Statement.If <$> statement <*> statements <*> (fromMaybe <$> emptyTerm <*> optional (ifElsif Elsif <|> makeTerm <$> symbol Else <*> children (many statement))))
unless :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
unless :: Assignment
unless = makeTerm <$> symbol Unless <*> children (Statement.If <$> invert statement <*> statements <*> (fromMaybe <$> emptyTerm <*> optional (makeTerm <$> symbol Else <*> children (many statement))))
<|> makeTerm <$> symbol UnlessModifier <*> children (flip Statement.If <$> statement <*> invert statement <*> (makeTerm <$> location <*> pure Syntax.Empty))
while :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
while :: Assignment
while = makeTerm <$> symbol While <*> children (Statement.While <$> statement <*> statements)
<|> makeTerm <$> symbol WhileModifier <*> children (flip Statement.While <$> statement <*> statement)
until :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
until :: Assignment
until = makeTerm <$> symbol Until <*> children (Statement.While <$> invert statement <*> statements)
<|> makeTerm <$> symbol UntilModifier <*> children (flip Statement.While <$> statement <*> invert statement)
for :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
for :: Assignment
for = makeTerm <$> symbol For <*> children (Statement.ForEach <$> identifier <*> statement <*> statements)
assignment' :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
assignment' :: Assignment
assignment'
= makeTerm <$> symbol Assignment <*> children (Statement.Assignment <$> lvalue <*> expression)
<|> makeTerm <$> symbol OperatorAssignment <*> children (lvalue >>= \ var -> Statement.Assignment var <$>
@ -142,23 +144,23 @@ assignment'
<|> makeTerm <$> symbol AnonLAngleLAngleEqual <*> (Expression.LShift var <$> expression)
<|> makeTerm <$> symbol AnonCaretEqual <*> (Expression.BXOr var <$> expression)))
literal :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
literal :: Assignment
literal = makeTerm <$> symbol Grammar.True <*> (Literal.true <$ source)
<|> makeTerm <$> symbol Grammar.False <*> (Literal.false <$ source)
<|> makeTerm <$> symbol Grammar.Integer <*> (Literal.Integer <$> source)
<|> makeTerm <$> symbol Symbol <*> (Literal.Symbol <$> source)
<|> makeTerm <$> symbol Range <*> children (Literal.Range <$> statement <*> statement) -- FIXME: represent the difference between .. and ...
invert :: (InUnion fs Expression.Boolean, HasCallStack) => Assignment (Node grammar) (Term (Union fs) Location) -> Assignment (Node grammar) (Term (Union fs) Location)
invert :: (InUnion fs Expression.Boolean, HasCallStack) => Assignment.Assignment ast grammar (Term.Term (Union fs) (Record Location)) -> Assignment.Assignment ast grammar (Term.Term (Union fs) (Record Location))
invert term = makeTerm <$> location <*> fmap Expression.Not term
makeTerm :: (InUnion fs f, HasCallStack) => a -> f (Term (Union fs) a) -> (Term (Union fs) a)
makeTerm :: (InUnion fs f, HasCallStack) => a -> f (Term.Term (Union fs) a) -> Term.Term (Union fs) a
makeTerm a f = cofree $ a :< inj f
emptyTerm :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location)
emptyTerm :: Assignment
emptyTerm = makeTerm <$> location <*> pure Syntax.Empty
handleError :: HasCallStack => Assignment (Node Grammar) (Term Syntax Location) -> Assignment (Node Grammar) (Term Syntax Location)
handleError :: Assignment -> Assignment
handleError = flip catchError $ \ error -> case errorCause error of
UnexpectedEndOfInput _ -> throwError error
_ -> makeTerm <$> location <*> (Syntax.Error error <$ source)

View File

@ -1,15 +1,18 @@
{-# LANGUAGE GADTs, ScopedTypeVariables #-}
{-# LANGUAGE DataKinds, GADTs, RankNTypes, ScopedTypeVariables, TypeOperators #-}
module Parser where
import qualified CMark
import Data.Functor.Union
import Data.Record
import qualified Data.Syntax as Syntax
import Data.Syntax.Assignment
import Data.Functor.Foldable hiding (fold, Nil)
import Data.Functor.Union (inj)
import qualified Data.Text as T
import Info hiding (Empty, Go)
import Language
import Language.Markdown
import qualified Language.Markdown.Syntax as Markdown
import qualified Language.Python.Syntax as Python
import qualified Language.Ruby.Syntax as Ruby
import Prologue hiding (Location)
@ -30,16 +33,17 @@ import TreeSitter
-- | A parser from 'Source' onto some term type.
data Parser term where
-- | A parser producing 'AST' using a 'TS.Language'.
ASTParser :: (Bounded grammar, Enum grammar) => Ptr TS.Language -> Parser (AST grammar)
ASTParser :: (Bounded grammar, Enum grammar) => Ptr TS.Language -> Parser (Cofree [] (Record (Maybe grammar ': Location)))
-- | A parser producing an à la carte term given an 'AST'-producing parser and an 'Assignment' onto 'Term's in some syntax type. Assignment errors will result in a top-level 'Syntax.Error' node.
AssignmentParser :: (Bounded grammar, Enum grammar, Eq grammar, Show grammar, Symbol grammar, InUnion fs (Syntax.Error (Error grammar)), Traversable (Union fs))
=> Parser (AST grammar) -- ^ A parser producing 'AST'.
-> Assignment (Node grammar) (Term (Union fs) Location) -- ^ An assignment from 'AST' onto 'Term's.
-> Parser (Term (Union fs) Location) -- ^ A parser of 'Term's.
AssignmentParser :: (Bounded grammar, Enum grammar, Eq grammar, Show grammar, Symbol grammar, InUnion fs (Syntax.Error (Error grammar)), Traversable (Union fs), Recursive ast, Foldable (Base ast))
=> Parser ast -- ^ A parser producing AST.
-> (forall x. Base ast x -> Record (Maybe grammar ': Location)) -- ^ A function extracting the symbol and location.
-> Assignment ast grammar (Term (Union fs) (Record Location)) -- ^ An assignment from AST onto 'Term's.
-> Parser (Term (Union fs) (Record Location)) -- ^ A parser producing 'Term's.
-- | A tree-sitter parser.
TreeSitterParser :: Language -> Ptr TS.Language -> Parser (SyntaxTerm Text DefaultFields)
-- | A parser for 'Markdown' using cmark.
MarkdownParser :: Parser (SyntaxTerm Text DefaultFields)
MarkdownParser :: Parser (Cofree [] (Record (CMark.NodeType ': Location)))
-- | A parser which will parse any input 'Source' into a top-level 'Term' whose children are leaves consisting of the 'Source's lines.
LineByLineParser :: Parser (SyntaxTerm Text DefaultFields)
@ -49,26 +53,29 @@ parserForLanguage Nothing = LineByLineParser
parserForLanguage (Just language) = case language of
C -> TreeSitterParser C tree_sitter_c
Go -> TreeSitterParser Go tree_sitter_go
Markdown -> MarkdownParser
Ruby -> TreeSitterParser Ruby tree_sitter_ruby
TypeScript -> TreeSitterParser TypeScript tree_sitter_typescript
_ -> LineByLineParser
rubyParser :: Parser (Term (Union Ruby.Syntax') Location)
rubyParser = AssignmentParser (ASTParser tree_sitter_ruby) Ruby.assignment
rubyParser :: Parser Ruby.Term
rubyParser = AssignmentParser (ASTParser tree_sitter_ruby) headF Ruby.assignment
pythonParser :: Parser (Term (Union Python.Syntax') Location)
pythonParser = AssignmentParser (ASTParser tree_sitter_python) Python.assignment
pythonParser :: Parser Python.Term
pythonParser = AssignmentParser (ASTParser tree_sitter_python) headF Python.assignment
markdownParser :: Parser Markdown.Term
markdownParser = AssignmentParser MarkdownParser (\ ((nodeType :. location) :< _) -> Just (toGrammar nodeType) :. location) Markdown.assignment
runParser :: Parser term -> Source -> IO term
runParser parser = case parser of
ASTParser language -> parseToAST language
AssignmentParser parser assignment -> \ source -> do
AssignmentParser parser by assignment -> \ source -> do
ast <- runParser parser source
let Result err term = assign assignment source ast
let Result err term = assignBy by assignment source ast
traverse_ (putStrLn . showError source) (toList err)
case term of
Just term -> do
let errors = toList err <> termErrors term
let errors = termErrors term `asTypeOf` toList err
traverse_ (putStrLn . showError source) errors
unless (Prologue.null errors) $
putStrLn (withSGRCode [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid Red] (shows (Prologue.length errors) . showChar ' ' . showString (if Prologue.length errors == 1 then "error" else "errors")) $ "")
@ -80,7 +87,7 @@ runParser parser = case parser of
where showSGRCode = showString . setSGRCode
withSGRCode code s = showSGRCode code . s . showSGRCode []
errorTerm :: InUnion fs (Syntax.Error (Error grammar)) => Source -> Maybe (Error grammar) -> Term (Union fs) Location
errorTerm :: InUnion fs (Syntax.Error (Error grammar)) => Source -> Maybe (Error grammar) -> Term (Union fs) (Record Location)
errorTerm source err = cofree ((totalRange source :. totalSpan source :. Nil) :< inj (Syntax.Error (fromMaybe (Error (SourcePos 0 0) (UnexpectedEndOfInput [])) err)))
termErrors :: (InUnion fs (Syntax.Error (Error grammar)), Functor (Union fs), Foldable (Union fs)) => Term (Union fs) a -> [Error grammar]

View File

@ -44,10 +44,13 @@ parseBlobs renderer = fmap toS . distributeFoldMap (parseBlob renderer) . filter
-- | A task to parse a 'SourceBlob' and render the resulting 'Term'.
parseBlob :: TermRenderer output -> SourceBlob -> Task output
parseBlob renderer blob@SourceBlob{..} = case (renderer, blobLanguage) of
(JSONTermRenderer, Just Language.Markdown) -> parse markdownParser source >>= render (renderJSONTerm blob)
(JSONTermRenderer, Just Language.Python) -> parse pythonParser source >>= render (renderJSONTerm blob)
(JSONTermRenderer, _) -> parse syntaxParser source >>= decorate identifierAlgebra >>= render (renderJSONTerm blob)
(SExpressionTermRenderer, Just Language.Markdown) -> parse markdownParser source >>= decorate (ConstructorLabel . constructorLabel) >>= render renderSExpressionTerm . fmap keepConstructorLabel
(SExpressionTermRenderer, Just Language.Python) -> parse pythonParser source >>= decorate (ConstructorLabel . constructorLabel) >>= render renderSExpressionTerm . fmap keepConstructorLabel
(SExpressionTermRenderer, _) -> parse syntaxParser source >>= render renderSExpressionTerm . fmap keepCategory
(IdentityTermRenderer, Just Language.Markdown) -> pure Nothing
(IdentityTermRenderer, Just Language.Python) -> pure Nothing
(IdentityTermRenderer, _) -> Just <$> parse syntaxParser source
where syntaxParser = parserForLanguage blobLanguage
@ -59,12 +62,16 @@ diffBlobPairs renderer = fmap toS . distributeFoldMap (diffBlobPair renderer) .
-- | A task to parse a pair of 'SourceBlob's, diff them, and render the 'Diff'.
diffBlobPair :: DiffRenderer output -> Both SourceBlob -> Task output
diffBlobPair renderer blobs = case (renderer, effectiveLanguage) of
(ToCDiffRenderer, Just Language.Markdown) -> pure mempty
(ToCDiffRenderer, Just Language.Python) -> run (\ source -> parse pythonParser source >>= decorate (declarationAlgebra (Proxy :: Proxy Python.Error) source) . hoistCofree (weaken :: Union fs a -> Union (Declaration.Method ': fs) a)) diffLinearly (renderToC blobs)
(ToCDiffRenderer, _) -> run (\ source -> parse syntaxParser source >>= decorate (syntaxDeclarationAlgebra source)) diffTerms (renderToC blobs)
(JSONDiffRenderer, Just Language.Markdown) -> run (parse markdownParser) diffLinearly (renderJSONDiff blobs)
(JSONDiffRenderer, Just Language.Python) -> run (parse pythonParser) diffLinearly (renderJSONDiff blobs)
(JSONDiffRenderer, _) -> run (decorate identifierAlgebra <=< parse syntaxParser) diffTerms (renderJSONDiff blobs)
(PatchDiffRenderer, Just Language.Markdown) -> run (parse markdownParser) diffLinearly (renderPatch blobs)
(PatchDiffRenderer, Just Language.Python) -> run (parse pythonParser) diffLinearly (renderPatch blobs)
(PatchDiffRenderer, _) -> run (parse syntaxParser) diffTerms (renderPatch blobs)
(SExpressionDiffRenderer, Just Language.Markdown) -> run (decorate (ConstructorLabel . constructorLabel) <=< parse markdownParser) diffLinearly (renderSExpressionDiff . mapAnnotations keepConstructorLabel)
(SExpressionDiffRenderer, Just Language.Python) -> run (decorate (ConstructorLabel . constructorLabel) <=< parse pythonParser) diffLinearly (renderSExpressionDiff . mapAnnotations keepConstructorLabel)
(SExpressionDiffRenderer, _) -> run (parse syntaxParser) diffTerms (renderSExpressionDiff . mapAnnotations keepCategory)
(IdentityDiffRenderer, _) -> run (\ source -> parse syntaxParser source >>= decorate (syntaxDeclarationAlgebra source)) diffTerms Just

View File

@ -1,4 +1,4 @@
{-# LANGUAGE DataKinds, ScopedTypeVariables #-}
{-# LANGUAGE DataKinds, ScopedTypeVariables, TypeOperators #-}
module TreeSitter
( treeSitterParser
, parseToAST
@ -42,7 +42,7 @@ treeSitterParser language grammar source = bracket ts_document_new ts_document_f
-- | Parse 'Source' with the given 'TS.Language' and return its AST.
parseToAST :: (Bounded grammar, Enum grammar) => Ptr TS.Language -> Source -> IO (A.AST grammar)
parseToAST :: (Bounded grammar, Enum grammar) => Ptr TS.Language -> Source -> IO (Cofree [] (Record (Maybe grammar ': A.Location)))
parseToAST language source = bracket ts_document_new ts_document_free $ \ document -> do
ts_document_set_language document language
root <- withCStringLen (toText source) $ \ (source, len) -> do
@ -54,13 +54,13 @@ parseToAST language source = bracket ts_document_new ts_document_free $ \ docume
anaM toAST root
toAST :: (Bounded grammar, Enum grammar) => Node -> IO (A.RoseF (A.Node grammar) Node)
toAST :: (Bounded grammar, Enum grammar) => Node -> IO (CofreeF [] (Record (Maybe grammar ': A.Location)) Node)
toAST node@Node{..} = do
let count = fromIntegral nodeChildCount
children <- allocaArray count $ \ childNodesPtr -> do
_ <- with nodeTSNode (\ nodePtr -> ts_node_copy_child_nodes nullPtr nodePtr childNodesPtr (fromIntegral count))
peekArray count childNodesPtr
pure $ A.RoseF (safeToEnum (fromIntegral nodeSymbol) :. nodeRange node :. nodeSpan node :. Nil) children
pure $ (safeToEnum (fromIntegral nodeSymbol) :. nodeRange node :. nodeSpan node :. Nil) :< children
anaM :: (Corecursive t, Monad m, Traversable (Base t)) => (a -> m (Base t a)) -> a -> m t
anaM g = a where a = pure . embed <=< traverse a <=< g

View File

@ -14,82 +14,82 @@ spec :: Spec
spec = do
describe "Applicative" $
it "matches in sequence" $
runAssignment ((,) <$> red <*> red) (makeState "helloworld" [Rose (rec Red 0 5) [], Rose (rec Red 5 10) []]) `shouldBe` Result Nothing (Just (AssignmentState 10 (Info.SourcePos 1 11) "" [], (Out "hello", Out "world")))
runAssignment headF ((,) <$> red <*> red) (makeState "helloworld" [node Red 0 5 [], node Red 5 10 []]) `shouldBe` Result Nothing (Just ((Out "hello", Out "world"), AssignmentState 10 (Info.SourcePos 1 11) "" []))
describe "Alternative" $ do
it "attempts multiple alternatives" $
runAssignment (green <|> red) (makeState "hello" [Rose (rec Red 0 5) []]) `shouldBe` Result Nothing (Just (AssignmentState 5 (Info.SourcePos 1 6) "" [], Out "hello"))
runAssignment headF (green <|> red) (makeState "hello" [node Red 0 5 []]) `shouldBe` Result Nothing (Just (Out "hello", AssignmentState 5 (Info.SourcePos 1 6) "" []))
it "matches repetitions" $
let s = "colourless green ideas sleep furiously"
w = words s
(_, nodes) = foldl (\ (i, prev) word -> (i + B.length word + 1, prev <> [Rose (rec Red i (i + B.length word)) []])) (0, []) w in
resultValue (runAssignment (many red) (makeState (Source s) nodes)) `shouldBe` Just (AssignmentState (B.length s) (Info.SourcePos 1 (succ (B.length s))) "" [], Out <$> w)
(_, nodes) = foldl (\ (i, prev) word -> (i + B.length word + 1, prev <> [node Red i (i + B.length word) []])) (0, []) w in
resultValue (runAssignment headF (many red) (makeState (Source s) nodes)) `shouldBe` Just (Out <$> w, AssignmentState (B.length s) (Info.SourcePos 1 (succ (B.length s))) "" [])
it "matches one-or-more repetitions against one or more input nodes" $
resultValue (runAssignment (some red) (makeState "hello" [Rose (rec Red 0 5) []])) `shouldBe` Just (AssignmentState 5 (Info.SourcePos 1 6) "" [], [Out "hello"])
resultValue (runAssignment headF (some red) (makeState "hello" [node Red 0 5 []])) `shouldBe` Just ([Out "hello"], AssignmentState 5 (Info.SourcePos 1 6) "" [])
describe "symbol" $ do
it "matches nodes with the same symbol" $
snd <$> runAssignment red (makeState "hello" [Rose (rec Red 0 5) []]) `shouldBe` Result Nothing (Just (Out "hello"))
fst <$> runAssignment headF red (makeState "hello" [node Red 0 5 []]) `shouldBe` Result Nothing (Just (Out "hello"))
it "does not advance past the current node" $
let initialState = makeState "hi" [ Rose (rec Red 0 2) [] ] in
fst <$> runAssignment (symbol Red) initialState `shouldBe` Result Nothing (Just initialState)
let initialState = makeState "hi" [ node Red 0 2 [] ] in
snd <$> runAssignment headF (symbol Red) initialState `shouldBe` Result Nothing (Just initialState)
describe "source" $ do
it "produces the nodes source" $
assign source "hi" (Rose (rec Red 0 2) []) `shouldBe` Result Nothing (Just "hi")
assignBy headF source "hi" (node Red 0 2 []) `shouldBe` Result Nothing (Just "hi")
it "advances past the current node" $
fst <$> runAssignment source (makeState "hi" [ Rose (rec Red 0 2) [] ]) `shouldBe` Result Nothing (Just (AssignmentState 2 (Info.SourcePos 1 3) "" []))
snd <$> runAssignment headF source (makeState "hi" [ node Red 0 2 [] ]) `shouldBe` Result Nothing (Just (AssignmentState 2 (Info.SourcePos 1 3) "" []))
describe "children" $ do
it "advances past the current node" $
fst <$> runAssignment (children (pure (Out ""))) (makeState "a" [Rose (rec Red 0 1) []]) `shouldBe` Result Nothing (Just (AssignmentState 1 (Info.SourcePos 1 2) "" []))
snd <$> runAssignment headF (children (pure (Out ""))) (makeState "a" [node Red 0 1 []]) `shouldBe` Result Nothing (Just (AssignmentState 1 (Info.SourcePos 1 2) "" []))
it "matches if its subrule matches" $
() <$ runAssignment (children red) (makeState "a" [Rose (rec Blue 0 1) [Rose (rec Red 0 1) []]]) `shouldBe` Result Nothing (Just ())
() <$ runAssignment headF (children red) (makeState "a" [node Blue 0 1 [node Red 0 1 []]]) `shouldBe` Result Nothing (Just ())
it "does not match if its subrule does not match" $
(runAssignment (children red) (makeState "a" [Rose (rec Blue 0 1) [Rose (rec Green 0 1) []]])) `shouldBe` Result (Just (Error (Info.SourcePos 1 1) (UnexpectedSymbol [Red] Green))) Nothing
(runAssignment headF (children red) (makeState "a" [node Blue 0 1 [node Green 0 1 []]])) `shouldBe` Result (Just (Error (Info.SourcePos 1 1) (UnexpectedSymbol [Red] Green))) Nothing
it "matches nested children" $
runAssignment
runAssignment headF
(symbol Red *> children (symbol Green *> children (symbol Blue *> source)))
(makeState "1" [ Rose (rec Red 0 1) [ Rose (rec Green 0 1) [ Rose (rec Blue 0 1) [] ] ] ])
(makeState "1" [ node Red 0 1 [ node Green 0 1 [ node Blue 0 1 [] ] ] ])
`shouldBe`
Result Nothing (Just (AssignmentState 1 (Info.SourcePos 1 2) "" [], "1"))
Result Nothing (Just ("1", AssignmentState 1 (Info.SourcePos 1 2) "" []))
it "continues after children" $
resultValue (runAssignment
resultValue (runAssignment headF
(many (symbol Red *> children (symbol Green *> source)
<|> symbol Blue *> source))
(makeState "BC" [ Rose (rec Red 0 1) [ Rose (rec Green 0 1) [] ]
, Rose (rec Blue 1 2) [] ]))
(makeState "BC" [ node Red 0 1 [ node Green 0 1 [] ]
, node Blue 1 2 [] ]))
`shouldBe`
Just (AssignmentState 2 (Info.SourcePos 1 3) "" [], ["B", "C"])
Just (["B", "C"], AssignmentState 2 (Info.SourcePos 1 3) "" [])
it "matches multiple nested children" $
runAssignment
runAssignment headF
(symbol Red *> children (many (symbol Green *> children (symbol Blue *> source))))
(makeState "12" [ Rose (rec Red 0 2) [ Rose (rec Green 0 1) [ Rose (rec Blue 0 1) [] ]
, Rose (rec Green 1 2) [ Rose (rec Blue 1 2) [] ] ] ])
(makeState "12" [ node Red 0 2 [ node Green 0 1 [ node Blue 0 1 [] ]
, node Green 1 2 [ node Blue 1 2 [] ] ] ])
`shouldBe`
Result Nothing (Just (AssignmentState 2 (Info.SourcePos 1 3) "" [], ["1", "2"]))
Result Nothing (Just (["1", "2"], AssignmentState 2 (Info.SourcePos 1 3) "" []))
describe "runAssignment" $ do
it "drops anonymous nodes before matching symbols" $
runAssignment red (makeState "magenta red" [Rose (rec Magenta 0 7) [], Rose (rec Red 8 11) []]) `shouldBe` Result Nothing (Just (AssignmentState 11 (Info.SourcePos 1 12) "" [], Out "red"))
runAssignment headF red (makeState "magenta red" [node Magenta 0 7 [], node Red 8 11 []]) `shouldBe` Result Nothing (Just (Out "red", AssignmentState 11 (Info.SourcePos 1 12) "" []))
it "does not drop anonymous nodes after matching" $
runAssignment red (makeState "red magenta" [Rose (rec Red 0 3) [], Rose (rec Magenta 4 11) []]) `shouldBe` Result Nothing (Just (AssignmentState 3 (Info.SourcePos 1 4) " magenta" [Rose (rec Magenta 4 11) []], Out "red"))
runAssignment headF red (makeState "red magenta" [node Red 0 3 [], node Magenta 4 11 []]) `shouldBe` Result Nothing (Just (Out "red", AssignmentState 3 (Info.SourcePos 1 4) " magenta" [node Magenta 4 11 []]))
it "does not drop anonymous nodes when requested" $
runAssignment ((,) <$> magenta <*> red) (makeState "magenta red" [Rose (rec Magenta 0 7) [], Rose (rec Red 8 11) []]) `shouldBe` Result Nothing (Just (AssignmentState 11 (Info.SourcePos 1 12) "" [], (Out "magenta", Out "red")))
runAssignment headF ((,) <$> magenta <*> red) (makeState "magenta red" [node Magenta 0 7 [], node Red 8 11 []]) `shouldBe` Result Nothing (Just ((Out "magenta", Out "red"), AssignmentState 11 (Info.SourcePos 1 12) "" []))
rec :: symbol -> Int -> Int -> Record '[Maybe symbol, Range, SourceSpan]
rec symbol start end = Just symbol :. Range start end :. Info.SourceSpan (Info.SourcePos 1 (succ start)) (Info.SourcePos 1 (succ end)) :. Nil
node :: symbol -> Int -> Int -> [AST symbol] -> AST symbol
node symbol start end children = cofree $ (Just symbol :. Range start end :. Info.SourceSpan (Info.SourcePos 1 (succ start)) (Info.SourcePos 1 (succ end)) :. Nil) :< children
data Grammar = Red | Green | Blue | Magenta
deriving (Enum, Eq, Show)
@ -101,14 +101,14 @@ instance Symbol Grammar where
data Out = Out ByteString
deriving (Eq, Show)
red :: Assignment (Node Grammar) Out
red :: Assignment (AST Grammar) Grammar Out
red = Out <$ symbol Red <*> source
green :: Assignment (Node Grammar) Out
green :: Assignment (AST Grammar) Grammar Out
green = Out <$ symbol Green <*> source
blue :: Assignment (Node Grammar) Out
blue :: Assignment (AST Grammar) Grammar Out
blue = Out <$ symbol Blue <*> source
magenta :: Assignment (Node Grammar) Out
magenta :: Assignment (AST Grammar) Grammar Out
magenta = Out <$ symbol Magenta <*> source