mirror of
https://github.com/github/semantic.git
synced 2024-12-29 18:06:14 +03:00
rws requires a feature vector.
This commit is contained in:
parent
60c4effabe
commit
9fc9827b34
@ -27,7 +27,7 @@ import Test.QuickCheck hiding (Fixed)
|
||||
import Test.QuickCheck.Random
|
||||
|
||||
-- | Given a function comparing two terms recursively, and a function to compute a Hashable label from an unpacked term, compute the diff of a pair of lists of terms using a random walk similarity metric, which completes in log-linear time. This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf).
|
||||
rws :: (Hashable label, Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), Typeable label) =>
|
||||
rws :: (Hashable label, Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), Typeable label, HasField fields (Vector.Vector Double)) =>
|
||||
-- | A function which comapres a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared.
|
||||
(Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) ->
|
||||
-- | A function to compute a label for an unpacked term.
|
||||
|
@ -9,6 +9,7 @@ import Data.Hashable
|
||||
import Data.RandomWalkSimilarity
|
||||
import Data.Record
|
||||
import Data.These
|
||||
import qualified Data.Vector as Vector
|
||||
import Diff
|
||||
import Info
|
||||
import Operation
|
||||
@ -25,11 +26,11 @@ type Comparable leaf annotation = Term leaf annotation -> Term leaf annotation -
|
||||
type DiffConstructor leaf annotation = CofreeF (Syntax leaf) (Both annotation) (Diff leaf annotation) -> Diff leaf annotation
|
||||
|
||||
-- | Diff two terms, given a function that determines whether two terms can be compared and a cost function.
|
||||
diffTerms :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Diff leaf (Record fields)
|
||||
diffTerms :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Diff leaf (Record fields)
|
||||
diffTerms construct comparable cost a b = fromMaybe (pure $ Replace a b) $ constructAndRun construct comparable cost a b
|
||||
|
||||
-- | Constructs an algorithm and runs it
|
||||
constructAndRun :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields))
|
||||
constructAndRun :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields))
|
||||
constructAndRun construct comparable cost t1 t2
|
||||
| not $ comparable t1 t2 = Nothing
|
||||
| (category <$> t1) == (category <$> t2) = hylo construct runCofree <$> zipTerms t1 t2
|
||||
@ -42,7 +43,7 @@ constructAndRun construct comparable cost t1 t2
|
||||
annotate = pure . construct . (both annotation1 annotation2 :<)
|
||||
|
||||
-- | Runs the diff algorithm
|
||||
run :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Algorithm leaf (Record fields) (Diff leaf (Record fields)) -> Maybe (Diff leaf (Record fields))
|
||||
run :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Algorithm leaf (Record fields) (Diff leaf (Record fields)) -> Maybe (Diff leaf (Record fields))
|
||||
run construct comparable cost algorithm = case runFree algorithm of
|
||||
Pure diff -> Just diff
|
||||
Free (Recursive t1 t2 f) -> run construct comparable cost . f $ recur a b where
|
||||
|
@ -5,6 +5,7 @@ import Category
|
||||
import Data.DList as DList hiding (toList)
|
||||
import Data.RandomWalkSimilarity
|
||||
import Data.Record
|
||||
import qualified Data.Vector as Vector
|
||||
import Diff
|
||||
import Patch
|
||||
import Prologue
|
||||
@ -33,5 +34,10 @@ spec = parallel $ do
|
||||
prop "produces correct diffs" . forAll (scale (`div` 4) arbitrary) $
|
||||
\ (as, bs) -> let tas = toTerm <$> as
|
||||
tbs = toTerm <$> bs
|
||||
diff = free (Free (pure (Program .: RNil) :< Indexed (rws compare (rhead . headF) tas tbs :: [Diff Text (Record '[Category])]))) in
|
||||
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((Program .: RNil) :< Indexed tas)), Just (cofree ((Program .: RNil) :< Indexed tbs)))
|
||||
diff = free (Free (pure (Program .: Vector.singleton 0 .: RNil) :< Indexed (rws compare (rhead . headF) tas tbs :: [Diff Text (Record '[Category, Vector.Vector Double])]))) in
|
||||
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tas)), Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tbs)))
|
||||
|
||||
|
||||
instance Arbitrary a => Arbitrary (Vector.Vector a) where
|
||||
arbitrary = Vector.fromList <$> arbitrary
|
||||
shrink a = Vector.fromList <$> shrink (Vector.toList a)
|
||||
|
@ -4,6 +4,7 @@ module Diff.Spec where
|
||||
import Category
|
||||
import Data.Record
|
||||
import Data.Text.Arbitrary ()
|
||||
import qualified Data.Vector as Vector
|
||||
import Diff
|
||||
import Diff.Arbitrary
|
||||
import Interpreter
|
||||
@ -16,23 +17,28 @@ import Test.QuickCheck
|
||||
spec :: Spec
|
||||
spec = parallel $ do
|
||||
prop "equality is reflexive" $
|
||||
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in
|
||||
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in
|
||||
diff `shouldBe` diff
|
||||
|
||||
prop "equal terms produce identity diffs" $
|
||||
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category])) in
|
||||
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category])) in
|
||||
diffCost (diffTerms wrap (==) diffCost term term) `shouldBe` 0
|
||||
|
||||
describe "beforeTerm" $ do
|
||||
prop "recovers the before term" $
|
||||
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in
|
||||
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in
|
||||
beforeTerm diff `shouldBe` Just (toTerm a)
|
||||
|
||||
describe "afterTerm" $ do
|
||||
prop "recovers the after term" $
|
||||
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in
|
||||
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in
|
||||
afterTerm diff `shouldBe` Just (toTerm b)
|
||||
|
||||
describe "ArbitraryDiff" $ do
|
||||
prop "generates diffs of a specific size" . forAll ((arbitrary >>= \ n -> (,) n <$> diffOfSize n) `suchThat` ((> 0) . fst)) $
|
||||
\ (n, diff) -> arbitraryDiffSize (diff :: ArbitraryDiff Text ()) `shouldBe` n
|
||||
|
||||
|
||||
instance Arbitrary a => Arbitrary (Vector.Vector a) where
|
||||
arbitrary = Vector.fromList <$> arbitrary
|
||||
shrink a = Vector.fromList <$> shrink (Vector.toList a)
|
||||
|
@ -3,6 +3,7 @@ module DiffSummarySpec where
|
||||
|
||||
import Prologue
|
||||
import Data.Record
|
||||
import qualified Data.Vector as Vector
|
||||
import Test.Hspec
|
||||
import Test.Hspec.QuickCheck
|
||||
import Diff
|
||||
@ -20,14 +21,15 @@ import Interpreter
|
||||
import Info
|
||||
import Source
|
||||
import Data.Functor.Both
|
||||
import Test.QuickCheck hiding (Fixed)
|
||||
|
||||
arrayInfo :: Record '[Category, Range]
|
||||
arrayInfo = ArrayLiteral .: Range 0 3 .: RNil
|
||||
arrayInfo :: Record '[Category, Range, Vector.Vector Double]
|
||||
arrayInfo = ArrayLiteral .: Range 0 3 .: Vector.singleton 0 .: RNil
|
||||
|
||||
literalInfo :: Record '[Category, Range]
|
||||
literalInfo = StringLiteral .: Range 1 2 .: RNil
|
||||
literalInfo :: Record '[Category, Range, Vector.Vector Double]
|
||||
literalInfo = StringLiteral .: Range 1 2 .: Vector.singleton 0 .: RNil
|
||||
|
||||
testDiff :: Diff Text (Record '[Category, Range])
|
||||
testDiff :: Diff Text (Record '[Category, Range, Vector.Vector Double])
|
||||
testDiff = free $ Free (pure arrayInfo :< Indexed [ free $ Pure (Insert (cofree $ literalInfo :< Leaf "a")) ])
|
||||
|
||||
testSummary :: DiffSummary DiffInfo
|
||||
@ -46,7 +48,7 @@ spec = parallel $ do
|
||||
diffSummaries sources testDiff `shouldBe` [ DiffSummary { patch = Insert (LeafInfo "string" "a"), parentAnnotations = [ ArrayLiteral ] } ]
|
||||
|
||||
prop "equal terms produce identity diffs" $
|
||||
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category, Range])) in
|
||||
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category, Range, Vector.Vector Double])) in
|
||||
diffSummaries sources (diffTerms wrap (==) diffCost term term) `shouldBe` []
|
||||
|
||||
describe "annotatedSummaries" $ do
|
||||
@ -103,3 +105,7 @@ isBranchInfo info = case info of
|
||||
|
||||
isBranchNode :: Patch DiffInfo -> Bool
|
||||
isBranchNode = any isBranchInfo
|
||||
|
||||
instance Arbitrary a => Arbitrary (Vector.Vector a) where
|
||||
arbitrary = Vector.fromList <$> arbitrary
|
||||
shrink a = Vector.fromList <$> shrink (Vector.toList a)
|
||||
|
@ -4,6 +4,7 @@ module InterpreterSpec where
|
||||
import Category
|
||||
import Diff
|
||||
import Data.Record
|
||||
import qualified Data.Vector as Vector
|
||||
import Interpreter
|
||||
import Patch
|
||||
import Prologue
|
||||
@ -11,20 +12,26 @@ import Syntax
|
||||
import Term.Arbitrary
|
||||
import Test.Hspec
|
||||
import Test.Hspec.QuickCheck
|
||||
import Test.QuickCheck
|
||||
|
||||
spec :: Spec
|
||||
spec = parallel $ do
|
||||
describe "interpret" $ do
|
||||
it "returns a replacement when comparing two unicode equivalent terms" $
|
||||
let termA = cofree $ (StringLiteral .: RNil) :< Leaf ("t\776" :: Text)
|
||||
termB = cofree $ (StringLiteral .: RNil) :< Leaf "\7831" in
|
||||
diffTerms (free . Free) ((==) `on` extract) diffCost termA termB `shouldBe` free (Pure (Replace termA termB))
|
||||
let termA = cofree $ (StringLiteral .: Vector.singleton (0 :: Double) .: RNil) :< Leaf ("t\776" :: Text)
|
||||
termB = cofree $ (StringLiteral .: Vector.singleton (0 :: Double) .: RNil) :< Leaf "\7831" in
|
||||
diffTerms wrap ((==) `on` extract) diffCost termA termB `shouldBe` free (Pure (Replace termA termB))
|
||||
|
||||
prop "produces correct diffs" $
|
||||
\ a b -> let diff = diffTerms (free . Free) ((==) `on` extract) diffCost (toTerm a) (toTerm b) :: Diff Text (Record '[Category]) in
|
||||
\ a b -> let diff = diffTerms wrap ((==) `on` extract) diffCost (toTerm a) (toTerm b) :: Diff Text (Record '[Category, Vector.Vector Double]) in
|
||||
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (toTerm a), Just (toTerm b))
|
||||
|
||||
prop "constructs zero-cost diffs of equal terms" $
|
||||
\ a -> let term = toTerm a
|
||||
diff = diffTerms (free . Free) ((==) `on` extract) diffCost term term :: Diff Text (Record '[Category]) in
|
||||
diff = diffTerms wrap ((==) `on` extract) diffCost term term :: Diff Text (Record '[Category, Vector.Vector Double]) in
|
||||
diffCost diff `shouldBe` 0
|
||||
|
||||
|
||||
instance Arbitrary a => Arbitrary (Vector.Vector a) where
|
||||
arbitrary = Vector.fromList <$> arbitrary
|
||||
shrink a = Vector.fromList <$> shrink (Vector.toList a)
|
||||
|
Loading…
Reference in New Issue
Block a user