1
1
mirror of https://github.com/github/semantic.git synced 2024-12-29 18:06:14 +03:00

Merge pull request #1341 from github/alternative-algorithms

Alternative Algorithms
This commit is contained in:
Rob Rix 2017-09-25 11:46:47 -04:00 committed by GitHub
commit 7e167a75b4
3 changed files with 89 additions and 54 deletions

View File

@ -1,8 +1,8 @@
{-# LANGUAGE DataKinds, DefaultSignatures, GADTs, RankNTypes, TypeOperators, UndecidableInstances #-}
module Algorithm where
import Control.Applicative (liftA2)
import Control.Monad (guard, join)
import Control.Applicative (Alternative(..))
import Control.Monad (guard)
import Control.Monad.Free.Freer
import Data.Functor.Classes
import Data.List.NonEmpty (NonEmpty(..))
@ -22,14 +22,18 @@ data AlgorithmF term diff result where
Linear :: term ann1 -> term ann2 -> AlgorithmF term (diff ann1 ann2) (diff ann1 ann2)
-- | Diff two lists of terms by each elements similarity in O(n³ log n), resulting in a list of diffs.
RWS :: [term ann1] -> [term ann2] -> AlgorithmF term (diff ann1 ann2) [diff ann1 ann2]
-- | Delete a term..
-- | Delete a term.
Delete :: term ann1 -> AlgorithmF term (diff ann1 ann2) (diff ann1 ann2)
-- | Insert a term.
Insert :: term ann2 -> AlgorithmF term (diff ann1 ann2) (diff ann1 ann2)
-- | Replace one term with another.
Replace :: term ann1 -> term ann2 -> AlgorithmF term (diff ann1 ann2) (diff ann1 ann2)
-- | An 'Algorithm' that always fails.
Empty :: AlgorithmF term diff a
-- | An 'Algorithm' to try one of two alternatives.
Alt :: a -> a -> AlgorithmF term diff a
-- | The free applicative for 'AlgorithmF'. This enables us to construct diff values using <$> and <*> notation.
-- | The free(r) monad for 'AlgorithmF'. This enables us to construct algorithms to diff using '<$>', '<*>', '>>=', and do-notation.
type Algorithm term diff = Freer (AlgorithmF term diff)
@ -73,40 +77,50 @@ byReplacing = (liftF .) . Replace
instance (Show1 term, Show ann1, Show ann2) => Show1 (AlgorithmF term (diff ann1 ann2)) where
liftShowsPrec _ _ d algorithm = case algorithm of
liftShowsPrec sp _ d algorithm = case algorithm of
Algorithm.Diff t1 t2 -> showsBinaryWith showsTerm showsTerm "Diff" d t1 t2
Linear t1 t2 -> showsBinaryWith showsTerm showsTerm "Linear" d t1 t2
RWS as bs -> showsBinaryWith (liftShowsPrec showsTerm (liftShowList showsPrec showList)) (liftShowsPrec showsTerm (liftShowList showsPrec showList)) "RWS" d as bs
Delete t1 -> showsUnaryWith showsTerm "Delete" d t1
Insert t2 -> showsUnaryWith showsTerm "Insert" d t2
Replace t1 t2 -> showsBinaryWith showsTerm showsTerm "Replace" d t1 t2
Empty -> showString "Empty"
Alt a b -> showsBinaryWith sp sp "Alt" d a b
where showsTerm :: (Show1 term, Show ann) => Int -> term ann -> ShowS
showsTerm = liftShowsPrec showsPrec showList
instance Alternative (Algorithm term diff) where
empty = Empty `Then` return
(Empty `Then` _) <|> b = b
a <|> (Empty `Then` _) = a
a <|> b = Alt a b `Then` id
-- | Diff two terms based on their generic Diffable instances. If the terms are not diffable
-- (represented by a Nothing diff returned from algorithmFor) replace one term with another.
algorithmForTerms :: (Functor syntax, Diffable syntax)
algorithmForTerms :: Diffable syntax
=> Term syntax ann1
-> Term syntax ann2
-> Algorithm (Term syntax) (Diff syntax ann1 ann2) (Diff syntax ann1 ann2)
algorithmForTerms t1 t2 = fromMaybe (byReplacing t1 t2) (algorithmForComparableTerms t1 t2)
algorithmForComparableTerms :: (Functor syntax, Diffable syntax)
=> Term syntax ann1
-> Term syntax ann2
-> Maybe (Algorithm (Term syntax) (Diff syntax ann1 ann2) (Diff syntax ann1 ann2))
algorithmForComparableTerms (Term (In ann1 f1)) (Term (In ann2 f2)) = fmap (merge (ann1, ann2)) <$> algorithmFor f1 f2
algorithmForTerms (Term (In ann1 f1)) (Term (In ann2 f2)) = merge (ann1, ann2) <$> algorithmFor f1 f2
-- | A type class for determining what algorithm to use for diffing two terms.
class Diffable f where
algorithmFor :: f (term ann1) -> f (term ann2) -> Maybe (Algorithm term (diff ann1 ann2) (f (diff ann1 ann2)))
default algorithmFor :: (Generic1 f, GDiffable (Rep1 f)) => f (term ann1) -> f (term ann2) -> Maybe (Algorithm term (diff ann1 ann2) (f (diff ann1 ann2)))
algorithmFor :: f (term ann1)
-> f (term ann2)
-> Algorithm term (diff ann1 ann2) (f (diff ann1 ann2))
default
algorithmFor :: (Generic1 f, GDiffable (Rep1 f))
=> f (term ann1)
-> f (term ann2)
-> Algorithm term (diff ann1 ann2) (f (diff ann1 ann2))
algorithmFor = genericAlgorithmFor
genericAlgorithmFor :: (Generic1 f, GDiffable (Rep1 f)) => f (term ann1) -> f (term ann2) -> Maybe (Algorithm term (diff ann1 ann2) (f (diff ann1 ann2)))
genericAlgorithmFor a b = fmap to1 <$> galgorithmFor (from1 a) (from1 b)
genericAlgorithmFor :: (Generic1 f, GDiffable (Rep1 f)) => f (term ann1) -> f (term ann2) -> Algorithm term (diff ann1 ann2) (f (diff ann1 ann2))
genericAlgorithmFor a b = to1 <$> galgorithmFor (from1 a) (from1 b)
-- | Diff a Union of Syntax terms. Left is the "rest" of the Syntax terms in the Union,
@ -114,53 +128,53 @@ genericAlgorithmFor a b = fmap to1 <$> galgorithmFor (from1 a) (from1 b)
-- diff terms from the "rest" of the Union, and 'inj' adds the diff terms into the Union.
-- NB: If Left or Right Syntax terms in our Union don't match, we fail fast by returning Nothing.
instance Apply Diffable fs => Diffable (Union fs) where
algorithmFor u1 u2 = join (apply2' (Proxy :: Proxy Diffable) (\ inj f1 f2 -> fmap inj <$> algorithmFor f1 f2) u1 u2)
algorithmFor u1 u2 = fromMaybe empty (apply2' (Proxy :: Proxy Diffable) (\ inj f1 f2 -> inj <$> algorithmFor f1 f2) u1 u2)
-- | Diff two list parameters using RWS.
instance Diffable [] where
algorithmFor a b = Just (byRWS a b)
algorithmFor a b = byRWS a b
-- | A generic type class for diffing two terms defined by the Generic1 interface.
class GDiffable f where
galgorithmFor :: f (term ann1) -> f (term ann2) -> Maybe (Algorithm term (diff ann1 ann2) (f (diff ann1 ann2)))
galgorithmFor :: f (term ann1) -> f (term ann2) -> Algorithm term (diff ann1 ann2) (f (diff ann1 ann2))
-- | Diff two constructors (M1 is the Generic1 newtype for meta-information (possibly related to type constructors, record selectors, and data types))
instance GDiffable f => GDiffable (M1 i c f) where
galgorithmFor (M1 a) (M1 b) = fmap M1 <$> galgorithmFor a b
galgorithmFor (M1 a) (M1 b) = M1 <$> galgorithmFor a b
-- | Diff the fields of a product type.
-- i.e. data Foo a b = Foo a b (the 'Foo a b' is captured by 'a :*: b').
instance (GDiffable f, GDiffable g) => GDiffable (f :*: g) where
galgorithmFor (a1 :*: b1) (a2 :*: b2) = liftA2 (:*:) <$> galgorithmFor a1 a2 <*> galgorithmFor b1 b2
galgorithmFor (a1 :*: b1) (a2 :*: b2) = (:*:) <$> galgorithmFor a1 a2 <*> galgorithmFor b1 b2
-- | Diff the constructors of a sum type.
-- i.e. data Foo a = Foo a | Bar a (the 'Foo a' is captured by L1 and 'Bar a' is R1).
instance (GDiffable f, GDiffable g) => GDiffable (f :+: g) where
galgorithmFor (L1 a) (L1 b) = fmap L1 <$> galgorithmFor a b
galgorithmFor (R1 a) (R1 b) = fmap R1 <$> galgorithmFor a b
galgorithmFor _ _ = Nothing
galgorithmFor (L1 a) (L1 b) = L1 <$> galgorithmFor a b
galgorithmFor (R1 a) (R1 b) = R1 <$> galgorithmFor a b
galgorithmFor _ _ = empty
-- | Diff two parameters (Par1 is the Generic1 newtype representing a type parameter).
-- i.e. data Foo a = Foo a (the 'a' is captured by Par1).
instance GDiffable Par1 where
galgorithmFor (Par1 a) (Par1 b) = Just (Par1 <$> linearly a b)
galgorithmFor (Par1 a) (Par1 b) = Par1 <$> linearly a b
-- | Diff two constant parameters (K1 is the Generic1 newtype representing type parameter constants).
-- i.e. data Foo = Foo Int (the 'Int' is a constant parameter).
instance Eq c => GDiffable (K1 i c) where
galgorithmFor (K1 a) (K1 b) = guard (a == b) *> Just (pure (K1 a))
galgorithmFor (K1 a) (K1 b) = guard (a == b) *> pure (K1 a)
-- | Diff two terms whose constructors contain 0 type parameters.
-- i.e. data Foo = Foo.
instance GDiffable U1 where
galgorithmFor _ _ = Just (pure U1)
galgorithmFor _ _ = pure U1
-- | Diff two lists of parameters.
instance GDiffable (Rec1 []) where
galgorithmFor a b = Just (Rec1 <$> byRWS (unRec1 a) (unRec1 b))
galgorithmFor a b = Rec1 <$> byRWS (unRec1 a) (unRec1 b)
-- | Diff two non-empty lists of parameters.
instance GDiffable (Rec1 NonEmpty) where
galgorithmFor (Rec1 (a:|as)) (Rec1 (b:|bs)) = Just $ do
galgorithmFor (Rec1 (a:|as)) (Rec1 (b:|bs)) = do
d:ds <- byRWS (a:as) (b:bs)
pure (Rec1 (d :| ds))

View File

@ -1,7 +1,7 @@
{-# LANGUAGE DeriveAnyClass, TypeOperators #-}
module Data.Syntax where
import Algorithm
import Algorithm hiding (Empty)
import Control.Applicative
import Control.Monad.Error.Class hiding (Error)
import Data.Align.Generic
@ -153,30 +153,38 @@ unError span Error{..} = Error.withCallStack (freezeCallStack (fromCallSiteList
data Context a = Context { contextTerms :: NonEmpty a, contextSubject :: a }
deriving (Diffable, Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
deriving (Eq, Foldable, Functor, GAlign, Generic1, Show, Traversable)
instance Diffable Context where
instance Eq1 Context where liftEq = genericLiftEq
instance Show1 Context where liftShowsPrec = genericLiftShowsPrec
focus :: Applicative f
=> (a -> f b)
-> (a -> f b)
-> Context a
-> f (Context b)
focus blur focus (Context n1 s1) = Context <$> traverse blur n1 <*> focus s1
algorithmDeletingContext :: (Apply Diffable fs, Apply Functor fs, Context :< fs)
=> TermF Context ann1 (Term (Union fs) ann1)
-> Term (Union fs) ann2
-> Maybe (Algorithm (Term (Union fs)) (Diff (Union fs) ann1 ann2) (TermF Context ann1 (Diff (Union fs) ann1 ann2)))
algorithmDeletingContext (In a1 (Context n1 s1)) s2 = fmap (In a1 . Context (deleting <$> n1)) <$> algorithmForComparableTerms s1 s2
-> Algorithm (Term (Union fs)) (Diff (Union fs) ann1 ann2) (Diff (Union fs) ann1 ann2)
algorithmDeletingContext (In a1 (Context n1 s1)) s2 = deleteF . In a1 . inj . Context (deleting <$> n1) <$> algorithmForTerms s1 s2
algorithmInsertingContext :: (Apply Diffable fs, Apply Functor fs, Context :< fs)
=> Term (Union fs) ann1
-> TermF Context ann2 (Term (Union fs) ann2)
-> Maybe (Algorithm (Term (Union fs)) (Diff (Union fs) ann1 ann2) (TermF Context ann2 (Diff (Union fs) ann1 ann2)))
algorithmInsertingContext s1 (In a2 (Context n2 s2)) = fmap (In a2 . Context (inserting <$> n2)) <$> algorithmForComparableTerms s1 s2
-> Algorithm (Term (Union fs)) (Diff (Union fs) ann1 ann2) (Diff (Union fs) ann1 ann2)
algorithmInsertingContext s1 (In a2 (Context n2 s2)) = insertF . In a2 . inj . Context (inserting <$> n2) <$> algorithmForTerms s1 s2
algorithmForContextUnions :: (Apply Diffable fs, Apply Functor fs, Context :< fs)
=> Term (Union fs) ann1
-> Term (Union fs) ann2
-> Maybe (Algorithm (Term (Union fs)) (Diff (Union fs) ann1 ann2) (Diff (Union fs) ann1 ann2))
-> Algorithm (Term (Union fs)) (Diff (Union fs) ann1 ann2) (Diff (Union fs) ann1 ann2)
algorithmForContextUnions t1 t2
| Just algo <- algorithmForComparableTerms t1 t2 = Just algo
| Just c1@(In _ Context{}) <- prjTermF (unTerm t1) = fmap (deleteF . hoistTermF inj) <$> algorithmDeletingContext c1 t2
| Just c2@(In _ Context{}) <- prjTermF (unTerm t2) = fmap (insertF . hoistTermF inj) <$> algorithmInsertingContext t1 c2
| otherwise = Nothing
= algorithmForTerms t1 t2
<|> maybe empty (`algorithmDeletingContext` t2) (prjTermF (unTerm t1))
<|> maybe empty (algorithmInsertingContext t1) (prjTermF (unTerm t2))
where prjTermF (In a u) = In a <$> prj u

View File

@ -8,18 +8,19 @@ module Interpreter
) where
import Algorithm
import Control.Applicative (Alternative(..))
import Control.Monad.Free.Freer
import Data.Align.Generic
import Data.Functor.Classes (Eq1(..))
import Data.Hashable (Hashable)
import Data.Maybe (isJust)
import Data.Maybe (fromMaybe, isJust)
import Data.Record
import Data.Text (Text)
import Data.These
import Data.Union
import qualified Data.Syntax.Declaration as Declaration
import Diff
import Info hiding (Return)
import Info hiding (Empty, Return)
import RWS
import Syntax as S hiding (Return)
import Term
@ -45,23 +46,35 @@ decoratingWith getLabel1 getLabel2 differ t1 t2 = stripDiff (differ (defaultFeat
-- | Diff a pair of terms recurisvely, using the supplied continuation and 'ComparabilityRelation'.
diffTermsWith :: forall syntax fields1 fields2
. (Eq1 syntax, GAlign syntax, Traversable syntax)
=> (Term syntax (Record (FeatureVector ': fields1)) -> Term syntax (Record (FeatureVector ': fields2)) -> Algorithm (Term syntax) (Diff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))) (Diff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)))) -- ^ A function producing syntax-directed continuations of the algorithm.
=> ( Term syntax (Record (FeatureVector ': fields1))
-> Term syntax (Record (FeatureVector ': fields2))
-> Algorithm
(Term syntax)
(Diff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)))
(Diff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)))) -- ^ A function producing syntax-directed continuations of the algorithm.
-> ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -- ^ A relation on terms used to determine comparability and equality.
-> (Term syntax (Record (FeatureVector ': fields1)) -> Term syntax (Record (FeatureVector ': fields2)) -> Bool) -- ^ A relation used to determine term equivalence.
-> Term syntax (Record (FeatureVector ': fields1)) -- ^ A term representing the old state.
-> Term syntax (Record (FeatureVector ': fields2)) -- ^ A term representing the new state.
-> Diff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -- ^ The resulting diff.
diffTermsWith refine comparable eqTerms t1 t2 = runFreer decompose (diff t1 t2)
where decompose :: AlgorithmF (Term syntax) (Diff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))) result -> Algorithm (Term syntax) (Diff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))) result
decompose step = case step of
Algorithm.Diff t1 t2 -> refine t1 t2
diffTermsWith refine comparable eqTerms t1 t2 = fromMaybe (replacing t1 t2) (go (diff t1 t2))
where go :: (Alternative m, Monad m)
=> Algorithm
(Term syntax)
(Diff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)))
(Diff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)))
-> m (Diff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)))
go = iterFreerA (\ step yield -> case step of
Algorithm.Diff t1 t2 -> (go (refine t1 t2) >>= yield) <|> yield (replacing t1 t2)
Linear t1 t2 -> case galignWith diffThese (unwrap t1) (unwrap t2) of
Just result -> merge (extract t1, extract t2) <$> sequenceA result
_ -> byReplacing t1 t2
RWS as bs -> traverse diffThese (rws comparable eqTerms as bs)
Delete a -> pure (deleting a)
Insert b -> pure (inserting b)
Replace a b -> pure (replacing a b)
Just result -> go (merge (extract t1, extract t2) <$> sequenceA result) >>= yield
_ -> yield (replacing t1 t2)
RWS as bs -> traverse (go . diffThese) (rws comparable eqTerms as bs) >>= yield
Delete a -> yield (deleting a)
Insert b -> yield (inserting b)
Replace a b -> yield (replacing a b)
Empty -> empty
Alt a b -> yield a <|> yield b)
-- | Compute the label for a given term, suitable for inclusion in a _p_,_q_-gram.
getLabel :: HasField fields Category => TermF Syntax (Record fields) a -> (Category, Maybe Text)