diff --git a/semantic.cabal b/semantic.cabal index 1ca03f893..2d7f04a5c 100644 --- a/semantic.cabal +++ b/semantic.cabal @@ -36,6 +36,7 @@ library , Control.Abstract.Addressable , Control.Abstract.Analysis , Control.Abstract.Evaluator + , Control.Abstract.Matching , Control.Abstract.Value -- Control flow , Control.Effect @@ -234,6 +235,7 @@ test-suite test , Diffing.Algorithm.SES.Spec , Diffing.Interpreter.Spec , Integration.Spec + , Matching.Go.Spec , Rendering.Imports.Spec , Rendering.TOC.Spec , Semantic.Spec diff --git a/src/Control/Abstract/Matching.hs b/src/Control/Abstract/Matching.hs new file mode 100644 index 000000000..829403a0f --- /dev/null +++ b/src/Control/Abstract/Matching.hs @@ -0,0 +1,124 @@ +{-# LANGUAGE GADTs, TypeOperators #-} + +module Control.Abstract.Matching + ( Matcher + , TermMatcher + , target + , ensure + , match + , matchM + , narrow + , narrow' + , succeeds + , fails + , runMatcher + ) where + +import Data.Algebra +import Prologue +import Data.Term + +-- | A @Matcher t a@ is a tree automaton that matches some 'Recursive' and 'Corecursive' type @t@, yielding values of type @a@. +-- Matching operations are implicitly recursive: when you run a 'Matcher', it is applied bottom-up. +-- If a matching operation returns a value, it is assumed to have succeeded. You use the 'guard', 'narrow', and 'ensure' +-- functions to control whether a given datum is matched. The @t@ datum matched by a matcher is immutable; future APIs will +-- provide the ability to rewrite and change these data. +data Matcher t a where + -- TODO: Choice is inflexible and slow. A Union over fs can be queried for its index, and we can build a jump table over that. + -- We can copy NonDet to have fair conjunction or disjunction. + Choice :: Matcher t a -> Matcher t a -> Matcher t a + Target :: Matcher t t + Empty :: Matcher t a + -- We could have implemented this by changing the semantics of how Then is interpreted, but that would make Then and Sequence inconsistent. + Match :: (t -> Maybe u) -> Matcher u a -> Matcher t a + Pure :: a -> Matcher t a + Then :: Matcher t b -> (b -> Matcher t a) -> Matcher t a + +-- | A convenience alias for matchers that both target and return 'Term' values. +type TermMatcher fs ann = Matcher (Term (Union fs) ann) (Term (Union fs) ann) + +instance Functor (Matcher t) where + fmap = liftA + +instance Applicative (Matcher t) where + pure = Pure + -- We can add a Sequence constructor to optimize this when we need. + (<*>) = ap + +instance Alternative (Matcher t) where + empty = Empty + (<|>) = Choice + +instance Monad (Matcher t) where + (>>=) = Then + +-- | This matcher always succeeds. +succeeds :: Matcher t () +succeeds = guard True + +-- | This matcher always fails. +fails :: Matcher t () +fails = guard False + +-- | 'target' extracts the 't' that a given 'Matcher' is operating upon. +-- Similar to a reader monad's 'ask' function. +target :: Matcher t t +target = Target + +-- | 'ensure' succeeds iff the provided predicate function returns true when applied to the matcher's 'target'. +ensure :: (t -> Bool) -> Matcher t () +ensure f = target >>= \c -> guard (f c) + +-- | 'matchm' takes a modification function and a new matcher action the target parameter of which +-- is the result of the modification function. If the modification function returns 'Just' when +-- applied to the current 'target', the given matcher is executed with the result of that 'Just' +-- as the new target; if 'Nothing' is returned, the action fails. +matchM :: (t -> Maybe u) -> Matcher u a -> Matcher t a +matchM = Match + +-- | 'match' is a more specific version of 'matchM' optimized for targeting union types. If the target +-- can be projected to the type expected by the modification function, the provided matcher action will +-- execute. An example: +-- +-- @ +-- integerMatcher :: (Lit.Integer :< fs) => Matcher (Term (Union fs) ann) ByteString +-- integerMatcher = match Lit.integerContent target +-- @ +-- +-- @integerMatcher@ accepts any union type that contains an integer literal, and only succeeds if the +-- target in question is actually an integer literal. +match :: (f :< fs) + => (f (Term (Union fs) ann) -> b) + -> Matcher b a + -> Matcher (Term (Union fs) ann) a +match f = Match (fmap f . prj . termOut) + +-- | @narrow'@ attempts to project a union-type target to a more specific type. +narrow' :: (f :< fs) => Matcher (Term (Union fs) ann) (Maybe (f (Term (Union fs) ann))) +narrow' = fmap (prj . termOut) Target + +-- | 'narrow' behaves as @narrow'@, but fails if the target cannot be thus projected. +narrow :: (f :< fs) => Matcher (Term (Union fs) ann) (f (Term (Union fs) ann)) +narrow = narrow' >>= foldMapA pure + +-- | The entry point for executing matchers. +-- The Alternative parameter should be specialized by the calling context. If you want a single +-- result, specialize it to 'Maybe'; if you want a list of all terms and subterms matched by the +-- provided 'Matcher' action, specialize it to '[]'. +runMatcher :: (Alternative m, Monad m, Corecursive t, Recursive t, Foldable (Base t)) + => Matcher t a + -> t + -> m a +runMatcher m = para (paraMatcher m) + +paraMatcher :: (Alternative m, Monad m, Corecursive t, Foldable (Base t)) => Matcher t a -> RAlgebra (Base t) t (m a) +paraMatcher m t = interp (embedTerm t) m <|> foldMapA snd t + +-- Simple interpreter. +interp :: (Alternative m, Monad m) => t -> Matcher t a -> m a +interp t (Choice a b) = interp t a <|> interp t b +interp t Target = pure t +interp t (Match f m) = foldMapA (`interp` m) (f t) +interp _ (Pure a) = pure a +interp _ Empty = empty +interp t (Then m f) = interp t m >>= interp t . f diff --git a/src/Control/Abstract/Value.hs b/src/Control/Abstract/Value.hs index b880a2167..a6f9cfcdc 100644 --- a/src/Control/Abstract/Value.hs +++ b/src/Control/Abstract/Value.hs @@ -184,7 +184,7 @@ makeNamespace :: ( MonadValue value m makeNamespace name addr supers = do superEnv <- mconcat <$> traverse scopedEnvironment supers namespaceEnv <- Env.head <$> getEnv - v <- namespace name (Env.overwritingUnion superEnv namespaceEnv) + v <- namespace name (Env.mergeNewer superEnv namespaceEnv) v <$ assign addr v diff --git a/src/Data/Abstract/Environment.hs b/src/Data/Abstract/Environment.hs index 3871abdad..72b87f1af 100644 --- a/src/Data/Abstract/Environment.hs +++ b/src/Data/Abstract/Environment.hs @@ -5,7 +5,7 @@ module Data.Abstract.Environment , bind , delete , head - , overwritingUnion + , mergeNewer , insert , lookup , names @@ -77,8 +77,8 @@ head (Environment (a :| _)) = Environment (a :| []) -- | Take the union of two environments. When duplicate keys are found in the -- name to address map, the second definition wins. -overwritingUnion :: Environment l a -> Environment l a -> Environment l a -overwritingUnion (Environment (a :| as)) (Environment (b :| bs)) = +mergeNewer :: Environment l a -> Environment l a -> Environment l a +mergeNewer (Environment (a :| as)) (Environment (b :| bs)) = Environment (combine a b :| alignWith (mergeThese combine) as bs) where combine = Map.unionWith (flip const) diff --git a/src/Data/Abstract/Value.hs b/src/Data/Abstract/Value.hs index 83aa8ebc4..756035171 100644 --- a/src/Data/Abstract/Value.hs +++ b/src/Data/Abstract/Value.hs @@ -222,7 +222,7 @@ instance (Monad m, MonadEvaluatable term Value m) => MonadValue Value m where namespace n env = do maybeAddr <- lookupEnv n env' <- maybe (pure mempty) (asNamespaceEnv <=< deref) maybeAddr - pure (injValue (Namespace n (Env.overwritingUnion env' env))) + pure (injValue (Namespace n (Env.mergeNewer env' env))) where asNamespaceEnv v | Just (Namespace _ env') <- prjValue v = pure env' | otherwise = fail ("expected " <> show v <> " to be a namespace") diff --git a/src/Data/Algebra.hs b/src/Data/Algebra.hs index c0fbb806d..c2aa23682 100644 --- a/src/Data/Algebra.hs +++ b/src/Data/Algebra.hs @@ -1,18 +1,19 @@ {-# LANGUAGE RankNTypes #-} module Data.Algebra -( FAlgebra -, RAlgebra -, OpenFAlgebra -, OpenRAlgebra -, Subterm(..) -, SubtermAlgebra -, embedSubterm -, foldSubterms -, fToR -, fToOpenR -, rToOpenR -, openFToOpenR -) where + ( FAlgebra + , RAlgebra + , OpenFAlgebra + , OpenRAlgebra + , Subterm(..) + , SubtermAlgebra + , embedSubterm + , embedTerm + , foldSubterms + , fToR + , fToOpenR + , rToOpenR + , openFToOpenR + ) where import Data.Bifunctor import Data.Functor.Classes.Generic as X @@ -65,6 +66,10 @@ type SubtermAlgebra f t a = f (Subterm t a) -> a foldSubterms :: Recursive t => SubtermAlgebra (Base t) t a -> t -> a foldSubterms algebra = go where go = algebra . fmap (Subterm <*> go) . project +-- | Extract a term from the carrier tuple associated with a paramorphism. See also 'embedSubterm'. +embedTerm :: Corecursive t => Base t (t, a) -> t +embedTerm e = embed (fst <$> e) + -- | Extract a term from said term's 'Base' functor populated with 'Subterm' fields. embedSubterm :: Corecursive t => Base t (Subterm t a) -> t embedSubterm e = embed (subterm <$> e) diff --git a/test/Matching/Go/Spec.hs b/test/Matching/Go/Spec.hs new file mode 100644 index 000000000..496e4a1ac --- /dev/null +++ b/test/Matching/Go/Spec.hs @@ -0,0 +1,40 @@ +{-# LANGUAGE TypeOperators #-} + +module Matching.Go.Spec (spec) where + +import Control.Abstract.Matching +import Data.Abstract.Module +import Data.List +import qualified Data.Syntax.Declaration as Decl +import qualified Data.Syntax.Literal as Lit +import qualified Data.Syntax.Statement as Stmt +import Data.Union +import SpecHelpers + +-- This gets the ByteString contents of all integers +integerMatcher :: (Lit.Integer :< fs) => Matcher (Term (Union fs) ann) ByteString +integerMatcher = match Lit.integerContent target + +-- This matches all for-loops with its index variable new variable bound to 0, +-- e.g. `for i := 0; i < 10; i++` +loopMatcher :: ( Stmt.For :< fs + , Stmt.Assignment :< fs + , Lit.Integer :< fs) + => TermMatcher fs ann +loopMatcher = target <* go where + go = match Stmt.forBefore $ + match Stmt.assignmentValue $ + match Lit.integerContent $ + ensure (== "0") + +spec :: Spec +spec = describe "matching/go" $ do + it "extracts integers" $ do + parsed <- moduleBody <$> parseFile goParser Nothing "test/fixtures/go/matching/integers.go" + let matched = runMatcher integerMatcher parsed + sort matched `shouldBe` ["1", "2", "3"] + + it "counts for loops" $ do + parsed <- moduleBody <$> parseFile goParser Nothing "test/fixtures/go/matching/for.go" + let matched = runMatcher @[] loopMatcher parsed + length matched `shouldBe` 2 diff --git a/test/Spec.hs b/test/Spec.hs index 5a9b9bafa..85fbc1474 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -15,6 +15,7 @@ import qualified Diffing.Algorithm.RWS.Spec import qualified Diffing.Algorithm.SES.Spec import qualified Diffing.Interpreter.Spec import qualified Integration.Spec +import qualified Matching.Go.Spec import qualified Rendering.TOC.Spec import qualified Rendering.Imports.Spec import qualified Semantic.Spec @@ -41,6 +42,7 @@ main = hspec $ do describe "Diffing.Algorithm.RWS" Diffing.Algorithm.RWS.Spec.spec describe "Diffing.Algorithm.SES" Diffing.Algorithm.SES.Spec.spec describe "Diffing.Interpreter" Diffing.Interpreter.Spec.spec + describe "Matching" Matching.Go.Spec.spec describe "Rendering.TOC" Rendering.TOC.Spec.spec describe "Rendering.Imports" Rendering.Imports.Spec.spec describe "Semantic" Semantic.Spec.spec diff --git a/test/fixtures/go/matching/for.go b/test/fixtures/go/matching/for.go new file mode 100644 index 000000000..2b0e4fda7 --- /dev/null +++ b/test/fixtures/go/matching/for.go @@ -0,0 +1,9 @@ +package main + +func merle() { + for ii := 0; ii < 10; ii+=1 { + for jj := 0; jj < 10; jj+=1 { + print(ii + jj) + } + } +} diff --git a/test/fixtures/go/matching/integers.go b/test/fixtures/go/matching/integers.go new file mode 100644 index 000000000..caecef377 --- /dev/null +++ b/test/fixtures/go/matching/integers.go @@ -0,0 +1,5 @@ +package main + +func taako() { + return 1 + 2 + 3 +}