1
1
mirror of https://github.com/github/semantic.git synced 2025-01-02 12:23:08 +03:00
semantic/src/Interpreter.hs

125 lines
6.1 KiB
Haskell

{-# LANGUAGE GADTs, RankNTypes, ScopedTypeVariables #-}
module Interpreter (diffTerms, runAlgorithm, runAlgorithmSteps) where
import Algorithm
import Control.Monad.Free.Freer
import Data.Align.Generic
import Data.Functor.Both
import Data.Functor.Classes (Eq1)
import RWS
import Data.Record
import Data.These
import Diff
import Info hiding (Return)
import Patch (inserting, deleting, replacing, patchSum)
import Prologue hiding (lookup)
import Syntax as S hiding (Return)
import Term
-- | Diff two terms recursively, given functions characterizing the diffing.
diffTerms :: (Eq leaf, Hashable leaf, HasField fields Category)
=> SyntaxTerm leaf fields -- ^ A term representing the old state.
-> SyntaxTerm leaf fields -- ^ A term representing the new state.
-> SyntaxDiff leaf fields
diffTerms a b = stripDiff (runAlgorithm (decomposeWith algorithmWithTerms) ((diff `on` defaultFeatureVectorDecorator getLabel) a b))
-- | Compute the label for a given term, suitable for inclusion in a _p_,_q_-gram.
getLabel :: HasField fields Category => TermF (Syntax leaf) (Record fields) a -> (Category, Maybe leaf)
getLabel (h :< t) = (Info.category h, case t of
Leaf s -> Just s
_ -> Nothing)
-- | Run an Algorithm to completion by repeated application of a stepping operation and return its result.
runAlgorithm :: forall f result
. (forall x. f x -> Freer f x)
-> Freer f result
-> result
runAlgorithm decompose = go
where go :: Freer f x -> x
go = iterFreer (\ algorithm yield -> yield (go (decompose algorithm)))
-- | Run an Algorithm to completion by repeated application of a stepping operation, returning the list of steps taken up to and including the final result.
runAlgorithmSteps :: (forall x. f x -> Freer f x)
-> Freer f result
-> [Freer f result]
runAlgorithmSteps decompose = go
where go algorithm = case algorithm of
Return a -> [Return a]
step `Then` yield -> algorithm : go (decompose step >>= yield)
-- | Decompose a step of an algorithm into the next steps to perform using a helper function.
decomposeWith :: (Traversable f, GAlign f, Eq1 f, HasField fields (Maybe FeatureVector), HasField fields Category)
=> (Term f (Record fields) -> Term f (Record fields) -> Algorithm (Term f (Record fields)) (Diff f (Record fields)) (Diff f (Record fields)))
-> AlgorithmF (Term f (Record fields)) (Diff f (Record fields)) result
-> Algorithm (Term f (Record fields)) (Diff f (Record fields)) result
decomposeWith algorithmWithTerms step = case step of
Diff t1 t2 -> algorithmWithTerms t1 t2
Linear t1 t2 -> case galignWith diffThese (unwrap t1) (unwrap t2) of
Just result -> wrap . (both (extract t1) (extract t2) :<) <$> sequenceA result
_ -> byReplacing t1 t2
RWS as bs -> traverse diffThese (rws (editDistanceUpTo defaultM) comparable as bs)
Delete a -> pure (deleting a)
Insert b -> pure (inserting b)
Replace a b -> pure (replacing a b)
-- | Construct an algorithm to diff a pair of terms.
algorithmWithTerms :: SyntaxTerm leaf fields
-> SyntaxTerm leaf fields
-> Algorithm (SyntaxTerm leaf fields) (SyntaxDiff leaf fields) (SyntaxDiff leaf fields)
algorithmWithTerms t1 t2 = case (unwrap t1, unwrap t2) of
(Indexed a, Indexed b) ->
annotate . Indexed <$> byRWS a b
(S.Module idA a, S.Module idB b) ->
(annotate .) . S.Module <$> linearly idA idB <*> byRWS a b
(S.FunctionCall identifierA typeParamsA argsA, S.FunctionCall identifierB typeParamsB argsB) -> fmap annotate $
S.FunctionCall <$> linearly identifierA identifierB
<*> byRWS typeParamsA typeParamsB
<*> byRWS argsA argsB
(S.Switch exprA casesA, S.Switch exprB casesB) -> fmap annotate $
S.Switch <$> byRWS exprA exprB
<*> byRWS casesA casesB
(S.Object tyA a, S.Object tyB b) -> fmap annotate $
S.Object <$> diffMaybe tyA tyB
<*> byRWS a b
(Commented commentsA a, Commented commentsB b) -> fmap annotate $
Commented <$> byRWS commentsA commentsB
<*> diffMaybe a b
(Array tyA a, Array tyB b) -> fmap annotate $
Array <$> diffMaybe tyA tyB
<*> byRWS a b
(S.Class identifierA clausesA expressionsA, S.Class identifierB clausesB expressionsB) -> fmap annotate $
S.Class <$> linearly identifierA identifierB
<*> byRWS clausesA clausesB
<*> byRWS expressionsA expressionsB
(S.Method clausesA identifierA receiverA paramsA expressionsA, S.Method clausesB identifierB receiverB paramsB expressionsB) -> fmap annotate $
S.Method <$> byRWS clausesA clausesB
<*> linearly identifierA identifierB
<*> diffMaybe receiverA receiverB
<*> byRWS paramsA paramsB
<*> byRWS expressionsA expressionsB
(S.Function idA paramsA bodyA, S.Function idB paramsB bodyB) -> fmap annotate $
S.Function <$> linearly idA idB
<*> byRWS paramsA paramsB
<*> byRWS bodyA bodyB
_ -> linearly t1 t2
where
annotate = wrap . (both (extract t1) (extract t2) :<)
-- | Test whether two terms are comparable.
comparable :: (Functor f, HasField fields Category) => Term f (Record fields) -> Term f (Record fields) -> Bool
comparable = (==) `on` category . extract
-- | How many nodes to consider for our constant-time approximation to tree edit distance.
defaultM :: Integer
defaultM = 10
-- | Return an edit distance as the sum of it's term sizes, given an cutoff and a syntax of terms 'f a'.
-- | Computes a constant-time approximation to the edit distance of a diff. This is done by comparing at most _m_ nodes, & assuming the rest are zero-cost.
editDistanceUpTo :: (GAlign f, Foldable f, Functor f) => Integer -> These (Term f (Record fields)) (Term f (Record fields)) -> Int
editDistanceUpTo m = these termSize termSize (\ a b -> diffSum (patchSum termSize) (cutoff m (approximateDiff a b)))
where diffSum patchCost = sum . fmap (maybe 0 patchCost)
approximateDiff a b = maybe (replacing a b) wrap (galignWith (these deleting inserting approximateDiff) (unwrap a) (unwrap b))