mirror of
https://github.com/github/semantic.git
synced 2024-11-29 02:44:36 +03:00
Merge branch 'master' into clean-parse-and-diff-interface
This commit is contained in:
commit
76513638a6
@ -20,7 +20,7 @@ import Diff
|
||||
import Info
|
||||
import Patch
|
||||
import Range
|
||||
import Source hiding (break)
|
||||
import Source hiding (break, drop, take)
|
||||
import SplitDiff
|
||||
import Syntax
|
||||
import Term
|
||||
|
@ -1,43 +1,85 @@
|
||||
{-# LANGUAGE GADTs, TypeFamilies #-}
|
||||
{-# LANGUAGE DataKinds, GADTs, TypeFamilies #-}
|
||||
module Data.Syntax.Assignment
|
||||
( Assignment
|
||||
, get
|
||||
, state
|
||||
, Location
|
||||
, location
|
||||
, symbol
|
||||
, range
|
||||
, sourceSpan
|
||||
, source
|
||||
, children
|
||||
, Rose(..)
|
||||
, RoseF(..)
|
||||
, Node(..)
|
||||
, Node
|
||||
, AST
|
||||
, Result(..)
|
||||
, assignAll
|
||||
, runAssignment
|
||||
, AssignmentState(..)
|
||||
) where
|
||||
|
||||
import Control.Monad.Free.Freer
|
||||
import Data.Functor.Classes
|
||||
import Data.Functor.Foldable
|
||||
import Data.Functor.Foldable hiding (Nil)
|
||||
import Data.Record
|
||||
import Data.Text (unpack)
|
||||
import Prologue hiding (Alt)
|
||||
import qualified Info
|
||||
import Prologue hiding (Alt, get, Location, state)
|
||||
import Range (offsetRange)
|
||||
import qualified Source (Source(..), drop, slice, sourceText)
|
||||
import Text.Parser.TreeSitter.Language
|
||||
import Text.Show hiding (show)
|
||||
|
||||
-- | Assignment from an AST with some set of 'symbol's onto some other value.
|
||||
--
|
||||
-- This is essentially a parser.
|
||||
type Assignment symbol = Freer (AssignmentF symbol)
|
||||
type Assignment node = Freer (AssignmentF node)
|
||||
|
||||
data AssignmentF symbol a where
|
||||
Symbol :: symbol -> AssignmentF symbol ()
|
||||
data AssignmentF node a where
|
||||
Get :: AssignmentF node node
|
||||
State :: AssignmentF (Node grammar) (AssignmentState grammar)
|
||||
Source :: AssignmentF symbol ByteString
|
||||
Children :: Assignment symbol a -> AssignmentF symbol a
|
||||
Alt :: a -> a -> AssignmentF symbol a
|
||||
Empty :: AssignmentF symbol a
|
||||
|
||||
-- | Zero-width production of the current node.
|
||||
--
|
||||
-- Since this is zero-width, care must be taken not to repeat it without chaining on other rules. I.e. 'many (get *> b)' is fine, but 'many get' is not.
|
||||
get :: Assignment (Record fields) (Record fields)
|
||||
get = Get `Then` return
|
||||
|
||||
-- | Zero-width production of the current state.
|
||||
--
|
||||
-- Since this is zero-width, care must be taken not to repeat it without chaining on other rules. I.e. 'many (state *> b)' is fine, but 'many state' is not.
|
||||
state :: Assignment (Node grammar) (AssignmentState grammar)
|
||||
state = State `Then` return
|
||||
|
||||
-- | Zero-width production of the current location.
|
||||
--
|
||||
-- If assigning at the end of input or at the end of a list of children, the loccation will be returned as an empty Range and SourceSpan at the current offset. Otherwise, it will be the Range and SourceSpan of the current node.
|
||||
location :: Assignment (Node grammar) Location
|
||||
location = rtail <$> get <|> (\ (AssignmentState o p _ _) -> Info.Range o o :. Info.SourceSpan p p :. Nil) <$> state
|
||||
|
||||
-- | Zero-width match of a node with the given symbol.
|
||||
--
|
||||
-- Since this is zero-width, care must be taken not to repeat it without chaining on other rules. I.e. 'many (rule A *> b)' is fine, but 'many (rule A)' is not.
|
||||
symbol :: symbol -> Assignment symbol ()
|
||||
symbol s = Symbol s `Then` return
|
||||
-- Since this is zero-width, care must be taken not to repeat it without chaining on other rules. I.e. 'many (symbol A *> b)' is fine, but 'many (symbol A)' is not.
|
||||
symbol :: (HasField fields symbol, Eq symbol) => symbol -> Assignment (Record fields) ()
|
||||
symbol s = Get `Then` guard . (s ==) . getField
|
||||
|
||||
-- | Zero-width production of the current node’s range.
|
||||
--
|
||||
-- Since this is zero-width, care must be taken not to repeat it without chaining on other rules. I.e. 'many (range *> b)' is fine, but 'many range' is not.
|
||||
range :: HasField fields Info.Range => Assignment (Record fields) Info.Range
|
||||
range = Get `Then` return . getField
|
||||
|
||||
-- | Zero-width production of the current node’s sourceSpan.
|
||||
--
|
||||
-- Since this is zero-width, care must be taken not to repeat it without chaining on other rules. I.e. 'many (sourceSpan *> b)' is fine, but 'many sourceSpan' is not.
|
||||
sourceSpan :: HasField fields Info.SourceSpan => Assignment (Record fields) Info.SourceSpan
|
||||
sourceSpan = Get `Then` return . getField
|
||||
|
||||
-- | A rule to produce a node’s source as a ByteString.
|
||||
source :: Assignment symbol ByteString
|
||||
@ -52,11 +94,13 @@ children forEach = Children forEach `Then` return
|
||||
data Rose a = Rose { roseValue :: !a, roseChildren :: ![Rose a] }
|
||||
deriving (Eq, Functor, Show)
|
||||
|
||||
-- | A node in the input AST. We only concern ourselves with its symbol (considered as an element of 'grammar') and source.
|
||||
data Node grammar = Node { nodeSymbol :: grammar, nodeSource :: ByteString }
|
||||
deriving (Eq, Show)
|
||||
-- | A location specified as possibly-empty intervals of bytes and line/column positions.
|
||||
type Location = Record '[Info.Range, Info.SourceSpan]
|
||||
|
||||
-- | An abstract syntax tree.
|
||||
-- | The label annotating a node in the AST, specified as the pairing of its symbol and location information.
|
||||
type Node grammar = Record '[grammar, Info.Range, Info.SourceSpan]
|
||||
|
||||
-- | An abstract syntax tree in some 'grammar', with symbols and location information annotating each node.
|
||||
type AST grammar = Rose (Node grammar)
|
||||
|
||||
|
||||
@ -66,32 +110,52 @@ data Result a = Result a | Error [Text]
|
||||
|
||||
|
||||
-- | Run an assignment of nodes in a grammar onto terms in a syntax, discarding any unparsed nodes.
|
||||
assignAll :: (Symbol grammar, Eq grammar, Show grammar) => Assignment grammar a -> [AST grammar] -> Result a
|
||||
assignAll assignment nodes = case runAssignment assignment nodes of
|
||||
Result (rest, a) -> case dropAnonymous rest of
|
||||
assignAll :: (Symbol grammar, Eq grammar, Show grammar) => Assignment (Node grammar) a -> Source.Source -> [AST grammar] -> Result a
|
||||
assignAll assignment = (assignAllFrom assignment .) . AssignmentState 0 (Info.SourcePos 1 1)
|
||||
|
||||
assignAllFrom :: (Symbol grammar, Eq grammar, Show grammar) => Assignment (Node grammar) a -> AssignmentState grammar -> Result a
|
||||
assignAllFrom assignment state = case runAssignment assignment state of
|
||||
Result (state, a) -> case stateNodes (dropAnonymous state) of
|
||||
[] -> Result a
|
||||
c:_ -> Error ["Expected end of input, but got: " <> show c]
|
||||
Error e -> Error e
|
||||
|
||||
-- | Run an assignment of nodes in a grammar onto terms in a syntax.
|
||||
runAssignment :: (Symbol grammar, Eq grammar, Show grammar) => Assignment grammar a -> [AST grammar] -> Result ([AST grammar], a)
|
||||
runAssignment = iterFreer (\ assignment yield nodes -> case (assignment, dropAnonymous nodes) of
|
||||
runAssignment :: (Symbol grammar, Eq grammar, Show grammar) => Assignment (Node grammar) a -> AssignmentState grammar -> Result (AssignmentState grammar, a)
|
||||
runAssignment = iterFreer (\ assignment yield state -> case (assignment, dropAnonymous state) of
|
||||
-- Nullability: some rules, e.g. 'pure a' and 'many a', should match at the end of input. Either side of an alternation may be nullable, ergo Alt can match at the end of input.
|
||||
(Alt a b, nodes) -> yield a nodes <|> yield b nodes -- FIXME: Symbol `Alt` Symbol `Alt` Symbol is inefficient, should build and match against an IntMap instead.
|
||||
(assignment, node@(Rose Node{..} children) : rest) -> case assignment of
|
||||
Symbol symbol -> guard (symbol == nodeSymbol) >> yield () nodes
|
||||
Source -> yield nodeSource rest
|
||||
Children childAssignment -> assignAll childAssignment children >>= flip yield rest
|
||||
_ -> Error ["No rule to match " <> show node]
|
||||
(Symbol symbol, []) -> Error [ "Expected " <> show symbol <> " but got end of input." ]
|
||||
(Source, []) -> Error [ "Expected leaf node but got end of input." ]
|
||||
(Children _, []) -> Error [ "Expected branch node but got end of input." ]
|
||||
(Alt a b, state) -> yield a state <|> yield b state -- FIXME: Symbol `Alt` Symbol `Alt` Symbol is inefficient, should build and match against an IntMap instead.
|
||||
(State, state) -> yield state state
|
||||
(assignment, AssignmentState offset _ source (subtree@(Rose node@(_ :. range :. _) children) : _)) -> case assignment of
|
||||
Get -> yield node state
|
||||
Source -> yield (Source.sourceText (Source.slice (offsetRange range (negate offset)) source)) (advanceState state)
|
||||
Children childAssignment -> do
|
||||
c <- assignAllFrom childAssignment state { stateNodes = children }
|
||||
yield c (advanceState state)
|
||||
_ -> Error ["No rule to match " <> show subtree]
|
||||
(Get, AssignmentState{}) -> Error [ "Expected node but got end of input." ]
|
||||
(Source, AssignmentState{}) -> Error [ "Expected leaf node but got end of input." ]
|
||||
(Children _, AssignmentState{}) -> Error [ "Expected branch node but got end of input." ]
|
||||
_ -> Error ["No rule to match at end of input."])
|
||||
. fmap ((Result .) . flip (,))
|
||||
. fmap (\ a state -> Result (state, a))
|
||||
|
||||
dropAnonymous :: Symbol grammar => [AST grammar] -> [AST grammar]
|
||||
dropAnonymous = dropWhile ((/= Regular) . symbolType . nodeSymbol . roseValue)
|
||||
dropAnonymous :: Symbol grammar => AssignmentState grammar -> AssignmentState grammar
|
||||
dropAnonymous state = state { stateNodes = dropWhile ((/= Regular) . symbolType . rhead . roseValue) (stateNodes state) }
|
||||
|
||||
-- | Advances the state past the current (head) node (if any), dropping it off stateNodes & its corresponding bytes off of stateSource, and updating stateOffset & statePos to its end. Exhausted 'AssignmentState's (those without any remaining nodes) are returned unchanged.
|
||||
advanceState :: AssignmentState grammar -> AssignmentState grammar
|
||||
advanceState state@AssignmentState{..}
|
||||
| Rose (_ :. range :. span :. _) _ : rest <- stateNodes = AssignmentState (Info.end range) (Info.spanEnd span) (Source.drop (Info.end range - stateOffset) stateSource) rest
|
||||
| otherwise = state
|
||||
|
||||
-- | State kept while running 'Assignment's.
|
||||
data AssignmentState grammar = AssignmentState
|
||||
{ stateOffset :: Int -- ^ The offset into the Source thus far reached, measured in bytes.
|
||||
, statePos :: Info.SourcePos -- ^ The (1-indexed) line/column position in the Source thus far reached.
|
||||
, stateSource :: Source.Source -- ^ The remaining Source. Equal to dropping 'stateOffset' bytes off the original input Source.
|
||||
, stateNodes :: [AST grammar] -- ^ The remaining nodes to assign. Note that 'children' rules recur into subterms, and thus this does not necessarily reflect all of the terms remaining to be assigned in the overall algorithm, only those “in scope.”
|
||||
}
|
||||
deriving (Eq, Show)
|
||||
|
||||
instance Alternative (Assignment symbol) where
|
||||
empty = Empty `Then` return
|
||||
@ -99,7 +163,8 @@ instance Alternative (Assignment symbol) where
|
||||
|
||||
instance Show symbol => Show1 (AssignmentF symbol) where
|
||||
liftShowsPrec sp sl d a = case a of
|
||||
Symbol s -> showsUnaryWith showsPrec "Symbol" d s . showChar ' ' . sp d ()
|
||||
Get -> showString "Get"
|
||||
State -> showString "State" . sp d (AssignmentState 0 (Info.SourcePos 0 0) (Source.Source "") [])
|
||||
Source -> showString "Source" . showChar ' ' . sp d ""
|
||||
Children a -> showsUnaryWith (liftShowsPrec sp sl) "Children" d a
|
||||
Alt a b -> showsBinaryWith sp sp "Alt" d a b
|
||||
|
@ -1,4 +1,4 @@
|
||||
{-# LANGUAGE DataKinds, TemplateHaskell #-}
|
||||
{-# LANGUAGE DataKinds, TemplateHaskell, TypeOperators #-}
|
||||
module Language.Ruby.Syntax where
|
||||
|
||||
import Data.Functor.Union
|
||||
@ -9,8 +9,8 @@ import qualified Data.Syntax.Declaration as Declaration
|
||||
import qualified Data.Syntax.Expression as Expression
|
||||
import qualified Data.Syntax.Literal as Literal
|
||||
import qualified Data.Syntax.Statement as Statement
|
||||
import Language.Haskell.TH
|
||||
import Prologue hiding (optional, unless)
|
||||
import Language.Haskell.TH hiding (location, Range(..))
|
||||
import Prologue hiding (get, Location, optional, state, unless)
|
||||
import Term
|
||||
import Text.Parser.TreeSitter.Language
|
||||
import Text.Parser.TreeSitter.Ruby
|
||||
@ -39,38 +39,34 @@ type Syntax' =
|
||||
]
|
||||
|
||||
|
||||
term :: InUnion Syntax' f => f (Term Syntax ()) -> Term Syntax ()
|
||||
term f = cofree $ () :< inj f
|
||||
|
||||
|
||||
-- | Statically-known rules corresponding to symbols in the grammar.
|
||||
mkSymbolDatatype (mkName "Grammar") tree_sitter_ruby
|
||||
|
||||
|
||||
-- | Assignment from AST in Ruby’s grammar onto a program in Ruby’s syntax.
|
||||
assignment :: Assignment Grammar [Term Syntax ()]
|
||||
assignment :: Assignment (Node Grammar) [Term Syntax Location]
|
||||
assignment = symbol Program *> children (many declaration)
|
||||
|
||||
declaration :: Assignment Grammar (Term Syntax ())
|
||||
declaration :: Assignment (Node Grammar) (Term Syntax Location)
|
||||
declaration = comment <|> class' <|> method
|
||||
|
||||
class' :: Assignment Grammar (Term Syntax ())
|
||||
class' = term <$ symbol Class
|
||||
class' :: Assignment (Node Grammar) (Term Syntax Location)
|
||||
class' = term <* symbol Class
|
||||
<*> children (Declaration.Class <$> (constant <|> scopeResolution) <*> (superclass <|> pure []) <*> many declaration)
|
||||
where superclass = pure <$ symbol Superclass <*> children constant
|
||||
scopeResolution = symbol ScopeResolution *> children (constant <|> identifier)
|
||||
|
||||
constant :: Assignment Grammar (Term Syntax ())
|
||||
constant = term . Syntax.Identifier <$ symbol Constant <*> source
|
||||
constant :: Assignment (Node Grammar) (Term Syntax Location)
|
||||
constant = term <*> (Syntax.Identifier <$ symbol Constant <*> source)
|
||||
|
||||
identifier :: Assignment Grammar (Term Syntax ())
|
||||
identifier = term . Syntax.Identifier <$ symbol Identifier <*> source
|
||||
identifier :: Assignment (Node Grammar) (Term Syntax Location)
|
||||
identifier = term <*> (Syntax.Identifier <$ symbol Identifier <*> source)
|
||||
|
||||
method :: Assignment Grammar (Term Syntax ())
|
||||
method = term <$ symbol Method
|
||||
<*> children (Declaration.Method <$> identifier <*> pure [] <*> (term <$> many statement))
|
||||
method :: Assignment (Node Grammar) (Term Syntax Location)
|
||||
method = term <* symbol Method
|
||||
<*> children (Declaration.Method <$> identifier <*> pure [] <*> (term <*> many statement))
|
||||
|
||||
statement :: Assignment Grammar (Term Syntax ())
|
||||
statement :: Assignment (Node Grammar) (Term Syntax Location)
|
||||
statement = exit Statement.Return Return
|
||||
<|> exit Statement.Yield Yield
|
||||
<|> exit Statement.Break Break
|
||||
@ -80,28 +76,32 @@ statement = exit Statement.Return Return
|
||||
<|> unless
|
||||
<|> unlessModifier
|
||||
<|> literal
|
||||
where exit construct sym = term . construct <$ symbol sym <*> children (optional (symbol ArgumentList *> children statement))
|
||||
where exit construct sym = term <*> (construct <$ symbol sym <*> children (optional (symbol ArgumentList *> children statement)))
|
||||
|
||||
comment :: Assignment Grammar (Term Syntax ())
|
||||
comment = term . Comment.Comment <$ symbol Comment <*> source
|
||||
comment :: Assignment (Node Grammar) (Term Syntax Location)
|
||||
comment = term <*> (Comment.Comment <$ symbol Comment <*> source)
|
||||
|
||||
if' :: Assignment Grammar (Term Syntax ())
|
||||
if' :: Assignment (Node Grammar) (Term Syntax Location)
|
||||
if' = go If
|
||||
where go s = term <$ symbol s <*> children (Statement.If <$> statement <*> (term <$> many statement) <*> optional (go Elsif <|> term <$ symbol Else <*> children (many statement)))
|
||||
where go s = term <* symbol s <*> children (Statement.If <$> statement <*> (term <*> many statement) <*> optional (go Elsif <|> term <* symbol Else <*> children (many statement)))
|
||||
|
||||
ifModifier :: Assignment Grammar (Term Syntax ())
|
||||
ifModifier = term <$ symbol IfModifier <*> children (flip Statement.If <$> statement <*> statement <*> pure (term Syntax.Empty))
|
||||
ifModifier :: Assignment (Node Grammar) (Term Syntax Location)
|
||||
ifModifier = term <* symbol IfModifier <*> children (flip Statement.If <$> statement <*> statement <*> (term <*> pure Syntax.Empty))
|
||||
|
||||
unless :: Assignment Grammar (Term Syntax ())
|
||||
unless = term <$ symbol Unless <*> children (Statement.If <$> (term . Expression.Not <$> statement) <*> (term <$> many statement) <*> optional (term <$ symbol Else <*> children (many statement)))
|
||||
unless :: Assignment (Node Grammar) (Term Syntax Location)
|
||||
unless = term <* symbol Unless <*> children (Statement.If <$> (term <*> (Expression.Not <$> statement)) <*> (term <*> many statement) <*> optional (term <* symbol Else <*> children (many statement)))
|
||||
|
||||
unlessModifier :: Assignment Grammar (Term Syntax ())
|
||||
unlessModifier = term <$ symbol UnlessModifier <*> children (flip Statement.If <$> statement <*> (term . Expression.Not <$> statement) <*> pure (term Syntax.Empty))
|
||||
unlessModifier :: Assignment (Node Grammar) (Term Syntax Location)
|
||||
unlessModifier = term <* symbol UnlessModifier <*> children (flip Statement.If <$> statement <*> (term <*> (Expression.Not <$> statement)) <*> (term <*> pure Syntax.Empty))
|
||||
|
||||
literal :: Assignment Grammar (Term Syntax ())
|
||||
literal = term Literal.true <$ symbol Language.Ruby.Syntax.True <* source
|
||||
<|> term Literal.false <$ symbol Language.Ruby.Syntax.False <* source
|
||||
<|> term . Literal.Integer <$ symbol Language.Ruby.Syntax.Integer <*> source
|
||||
literal :: Assignment (Node Grammar) (Term Syntax Location)
|
||||
literal = term <*> (Literal.true <$ symbol Language.Ruby.Syntax.True <* source)
|
||||
<|> term <*> (Literal.false <$ symbol Language.Ruby.Syntax.False <* source)
|
||||
<|> term <*> (Literal.Integer <$ symbol Language.Ruby.Syntax.Integer <*> source)
|
||||
|
||||
optional :: Assignment Grammar (Term Syntax ()) -> Assignment Grammar (Term Syntax ())
|
||||
optional a = a <|> pure (term Syntax.Empty)
|
||||
-- | Assignment of the current node’s annotation.
|
||||
term :: InUnion Syntax' f => Assignment (Node grammar) (f (Term Syntax Location) -> Term Syntax Location)
|
||||
term = (\ a f -> cofree $ a :< inj f) <$> location
|
||||
|
||||
optional :: Assignment (Node Grammar) (Term Syntax Location) -> Assignment (Node Grammar) (Term Syntax Location)
|
||||
optional a = a <|> term <*> pure Syntax.Empty
|
||||
|
@ -17,7 +17,7 @@ import Patch
|
||||
import Prologue hiding (fst, snd)
|
||||
import Range
|
||||
import qualified Source
|
||||
import Source hiding (break, length, null)
|
||||
import Source hiding (break, drop, length, null, take)
|
||||
import Syntax
|
||||
import SplitDiff
|
||||
|
||||
|
@ -66,9 +66,17 @@ fromText = Source . encodeUtf8
|
||||
|
||||
-- | Return a Source that contains a slice of the given Source.
|
||||
slice :: Range -> Source -> Source
|
||||
slice range = Source . take . drop . sourceText
|
||||
where drop = B.drop (start range)
|
||||
take = B.take (rangeLength range)
|
||||
slice range = take . drop
|
||||
where drop = Source.drop (start range)
|
||||
take = Source.take (rangeLength range)
|
||||
|
||||
drop :: Int -> Source -> Source
|
||||
drop i = Source . drop . sourceText
|
||||
where drop = B.drop i
|
||||
|
||||
take :: Int -> Source -> Source
|
||||
take i = Source . take . sourceText
|
||||
where take = B.take i
|
||||
|
||||
-- | Return the ByteString contained in the Source.
|
||||
toText :: Source -> Text
|
||||
@ -90,14 +98,14 @@ actualLines = fmap Source . actualLines' . sourceText
|
||||
|
||||
-- | Compute the line ranges within a given range of a string.
|
||||
actualLineRanges :: Range -> Source -> [Range]
|
||||
actualLineRanges range = drop 1 . scanl toRange (Range (start range) (start range)) . actualLines . slice range
|
||||
actualLineRanges range = Prologue.drop 1 . scanl toRange (Range (start range) (start range)) . actualLines . slice range
|
||||
where toRange previous string = Range (end previous) $ end previous + B.length (sourceText string)
|
||||
|
||||
-- | Compute the character range given a Source and a SourceSpan.
|
||||
sourceSpanToRange :: Source -> SourceSpan -> Range
|
||||
sourceSpanToRange source SourceSpan{..} = Range start end
|
||||
where start = sumLengths leadingRanges + column spanStart
|
||||
end = start + sumLengths (take (line spanEnd - line spanStart) remainingRanges) + (column spanEnd - column spanStart)
|
||||
end = start + sumLengths (Prologue.take (line spanEnd - line spanStart) remainingRanges) + (column spanEnd - column spanStart)
|
||||
(leadingRanges, remainingRanges) = splitAt (line spanStart) (actualLineRanges (Source.totalRange source) source)
|
||||
sumLengths = sum . fmap (\ Range{..} -> end - start)
|
||||
|
||||
|
@ -46,7 +46,10 @@ treeSitterParser language grammar blob = do
|
||||
pure term
|
||||
|
||||
|
||||
parseRubyToAST :: Source -> IO (A.AST Ruby.Grammar)
|
||||
-- | Parse Ruby to AST. Intended for use in ghci, e.g.:
|
||||
--
|
||||
-- > Source.readAndTranscodeFile "/Users/rob/Desktop/test.rb" >>= parseRubyToAST >>= pure . uncurry (assignAll assignment) . second pure
|
||||
parseRubyToAST :: Source -> IO (Source, A.AST Ruby.Grammar)
|
||||
parseRubyToAST source = do
|
||||
document <- ts_document_new
|
||||
ts_document_set_language document Ruby.tree_sitter_ruby
|
||||
@ -57,19 +60,17 @@ parseRubyToAST source = do
|
||||
ts_document_root_node_p document rootPtr
|
||||
peek rootPtr)
|
||||
|
||||
ast <- anaM toAST (0, source, root)
|
||||
ast <- anaM toAST root
|
||||
|
||||
ts_document_free document
|
||||
pure ast
|
||||
where toAST :: (Int, Source, Node) -> IO (A.RoseF (A.Node Ruby.Grammar) (Int, Source, Node))
|
||||
toAST (offset, source, node@Node{..}) = do
|
||||
let range = nodeRange node
|
||||
let sliced = Source.slice (offsetRange range (negate offset)) source
|
||||
pure (source, ast)
|
||||
where toAST :: Node -> IO (A.RoseF (A.Node Ruby.Grammar) Node)
|
||||
toAST node@Node{..} = do
|
||||
let count = fromIntegral nodeChildCount
|
||||
children <- allocaArray count $ \ childNodesPtr -> do
|
||||
_ <- with nodeTSNode (\ nodePtr -> ts_node_copy_child_nodes nullPtr nodePtr childNodesPtr (fromIntegral count))
|
||||
peekArray count childNodesPtr
|
||||
pure $ A.RoseF (A.Node (toEnum (fromIntegral nodeSymbol)) (Source.sourceText sliced)) ((,,) (start range) sliced <$> children)
|
||||
pure $ A.RoseF (toEnum (fromIntegral nodeSymbol) :. nodeRange node :. nodeSpan node :. Nil) children
|
||||
|
||||
anaM :: (Corecursive t, Monad m, Traversable (Base t)) => (a -> m (Base t a)) -> a -> m t
|
||||
anaM g = a where a = pure . embed <=< traverse a <=< g
|
||||
|
@ -1,8 +1,12 @@
|
||||
{-# LANGUAGE DataKinds #-}
|
||||
module Data.Syntax.Assignment.Spec where
|
||||
|
||||
import Data.ByteString.Char8 (words)
|
||||
import Data.ByteString.Char8 as B (words, length)
|
||||
import Data.Record
|
||||
import Data.Syntax.Assignment
|
||||
import Info
|
||||
import Prologue
|
||||
import Source hiding (source, length)
|
||||
import Test.Hspec
|
||||
import Text.Parser.TreeSitter.Language (Symbol(..), SymbolType(..))
|
||||
|
||||
@ -10,69 +14,75 @@ spec :: Spec
|
||||
spec = do
|
||||
describe "Applicative" $ do
|
||||
it "matches in sequence" $
|
||||
runAssignment ((,) <$> red <*> red) [ast Red "hello" [], ast Red "world" []] `shouldBe` Result ([], (Out "hello", Out "world"))
|
||||
runAssignment ((,) <$> red <*> red) (startingState "helloworld" [Rose (rec Red 0 5) [], Rose (rec Red 5 10) []]) `shouldBe` Result (AssignmentState 10 (Info.SourcePos 1 11) (Source "") [], (Out "hello", Out "world"))
|
||||
|
||||
describe "Alternative" $ do
|
||||
it "attempts multiple alternatives" $
|
||||
runAssignment (green <|> red) [ast Red "hello" []] `shouldBe` Result ([], Out "hello")
|
||||
runAssignment (green <|> red) (startingState "hello" [Rose (rec Red 0 5) []]) `shouldBe` Result (AssignmentState 5 (Info.SourcePos 1 6) (Source "") [], Out "hello")
|
||||
|
||||
it "matches repetitions" $
|
||||
let w = words "colourless green ideas sleep furiously" in
|
||||
runAssignment (many red) (flip (ast Red) [] <$> w) `shouldBe` Result ([], Out <$> w)
|
||||
let s = "colourless green ideas sleep furiously"
|
||||
w = words s
|
||||
(_, nodes) = foldl (\ (i, prev) word -> (i + B.length word + 1, prev <> [Rose (rec Red i (i + B.length word)) []])) (0, []) w in
|
||||
runAssignment (many red) (startingState s nodes) `shouldBe` Result (AssignmentState (B.length s) (Info.SourcePos 1 (succ (B.length s))) (Source "") [], Out <$> w)
|
||||
|
||||
it "matches one-or-more repetitions against one or more input nodes" $
|
||||
runAssignment (some red) [ast Red "hello" []] `shouldBe` Result ([], [Out "hello"])
|
||||
runAssignment (some red) (startingState "hello" [Rose (rec Red 0 5) []]) `shouldBe` Result (AssignmentState 5 (Info.SourcePos 1 6) (Source "") [], [Out "hello"])
|
||||
|
||||
describe "symbol" $ do
|
||||
it "matches nodes with the same symbol" $
|
||||
runAssignment red [ast Red "hello" []] `shouldBe` Result ([], Out "hello")
|
||||
snd <$> runAssignment red (startingState "hello" [Rose (rec Red 0 5) []]) `shouldBe` Result (Out "hello")
|
||||
|
||||
it "does not advance past the current node" $
|
||||
fst <$> runAssignment (symbol Red) [ Rose (Node Red "hi") [] ] `shouldBe` Result [ Rose (Node Red "hi") [] ]
|
||||
fst <$> runAssignment (symbol Red) (startingState "hi" [ Rose (rec Red 0 2) [] ]) `shouldBe` Result (AssignmentState 0 (Info.SourcePos 1 1) (Source "hi") [ Rose (rec Red 0 2) [] ])
|
||||
|
||||
describe "source" $ do
|
||||
it "produces the node’s source" $
|
||||
snd <$> runAssignment source [ Rose (Node Red "hi") [] ] `shouldBe` Result "hi"
|
||||
assignAll source (Source "hi") [ Rose (rec Red 0 2) [] ] `shouldBe` Result "hi"
|
||||
|
||||
it "advances past the current node" $
|
||||
fst <$> runAssignment source [ Rose (Node Red "hi") [] ] `shouldBe` Result []
|
||||
fst <$> runAssignment source (startingState "hi" [ Rose (rec Red 0 2) [] ]) `shouldBe` Result (AssignmentState 2 (Info.SourcePos 1 3) (Source "") [])
|
||||
|
||||
describe "children" $ do
|
||||
it "advances past the current node" $
|
||||
fst <$> runAssignment (children (pure (Out ""))) [ast Red "a" []] `shouldBe` Result []
|
||||
fst <$> runAssignment (children (pure (Out ""))) (startingState "a" [Rose (rec Red 0 1) []]) `shouldBe` Result (AssignmentState 1 (Info.SourcePos 1 2) (Source "") [])
|
||||
|
||||
it "matches if its subrule matches" $
|
||||
() <$ runAssignment (children red) [ast Blue "b" [ast Red "a" []]] `shouldBe` Result ()
|
||||
() <$ runAssignment (children red) (startingState "a" [Rose (rec Blue 0 1) [Rose (rec Red 0 1) []]]) `shouldBe` Result ()
|
||||
|
||||
it "does not match if its subrule does not match" $
|
||||
runAssignment (children red) [ast Blue "b" [ast Green "a" []]] `shouldBe` Error []
|
||||
let errors r = case r of { Result _ -> [] ; Error e -> e } in
|
||||
Prologue.length (errors (runAssignment (children red) (startingState "a" [Rose (rec Blue 0 1) [Rose (rec Green 0 1) []]]))) `shouldBe` 1
|
||||
|
||||
it "matches nested children" $ do
|
||||
runAssignment
|
||||
(symbol Red *> children (symbol Green *> children (symbol Blue *> source)))
|
||||
[ ast Red "" [ ast Green "" [ ast Blue "1" [] ] ] ]
|
||||
(startingState "1" [ Rose (rec Red 0 1) [ Rose (rec Green 0 1) [ Rose (rec Blue 0 1) [] ] ] ])
|
||||
`shouldBe`
|
||||
Result ([], "1")
|
||||
Result (AssignmentState 1 (Info.SourcePos 1 2) (Source "") [], "1")
|
||||
|
||||
it "continues after children" $ do
|
||||
runAssignment
|
||||
(many (symbol Red *> children (symbol Green *> source)
|
||||
<|> symbol Blue *> source))
|
||||
[ ast Red "" [ ast Green "B" [] ]
|
||||
, ast Blue "C" [] ]
|
||||
(startingState "BC" [ Rose (rec Red 0 1) [ Rose (rec Green 0 1) [] ]
|
||||
, Rose (rec Blue 1 2) [] ])
|
||||
`shouldBe`
|
||||
Result ([], ["B", "C"])
|
||||
Result (AssignmentState 2 (Info.SourcePos 1 3) (Source "") [], ["B", "C"])
|
||||
|
||||
it "matches multiple nested children" $ do
|
||||
runAssignment
|
||||
(symbol Red *> children (many (symbol Green *> children (symbol Blue *> source))))
|
||||
[ ast Red "" [ ast Green "" [ ast Blue "1" [] ]
|
||||
, ast Green "" [ ast Blue "2" [] ] ] ]
|
||||
(startingState "12" [ Rose (rec Red 0 2) [ Rose (rec Green 0 1) [ Rose (rec Blue 0 1) [] ]
|
||||
, Rose (rec Green 1 2) [ Rose (rec Blue 1 2) [] ] ] ])
|
||||
`shouldBe`
|
||||
Result ([], ["1", "2"])
|
||||
Result (AssignmentState 2 (Info.SourcePos 1 3) (Source "") [], ["1", "2"])
|
||||
|
||||
ast :: grammar -> ByteString -> [AST grammar] -> AST grammar
|
||||
ast g s c = Rose (Node g s) c
|
||||
rec :: symbol -> Int -> Int -> Record '[symbol, Range, SourceSpan]
|
||||
rec symbol start end = symbol :. Range start end :. Info.SourceSpan (Info.SourcePos 1 (succ start)) (Info.SourcePos 1 (succ end)) :. Nil
|
||||
|
||||
startingState :: ByteString -> [AST grammar] -> AssignmentState grammar
|
||||
startingState = AssignmentState 0 (Info.SourcePos 1 1) . Source
|
||||
|
||||
data Grammar = Red | Green | Blue
|
||||
deriving (Eq, Show)
|
||||
@ -83,11 +93,11 @@ instance Symbol Grammar where
|
||||
data Out = Out ByteString
|
||||
deriving (Eq, Show)
|
||||
|
||||
red :: Assignment Grammar Out
|
||||
red :: Assignment (Node Grammar) Out
|
||||
red = Out <$ symbol Red <*> source
|
||||
|
||||
green :: Assignment Grammar Out
|
||||
green :: Assignment (Node Grammar) Out
|
||||
green = Out <$ symbol Green <*> source
|
||||
|
||||
blue :: Assignment Grammar Out
|
||||
blue :: Assignment (Node Grammar) Out
|
||||
blue = Out <$ symbol Blue <*> source
|
||||
|
Loading…
Reference in New Issue
Block a user