mirror of
https://github.com/github/semantic.git
synced 2024-12-01 09:15:01 +03:00
rws requires a feature vector.
This commit is contained in:
parent
60c4effabe
commit
9fc9827b34
@ -27,7 +27,7 @@ import Test.QuickCheck hiding (Fixed)
|
|||||||
import Test.QuickCheck.Random
|
import Test.QuickCheck.Random
|
||||||
|
|
||||||
-- | Given a function comparing two terms recursively, and a function to compute a Hashable label from an unpacked term, compute the diff of a pair of lists of terms using a random walk similarity metric, which completes in log-linear time. This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf).
|
-- | Given a function comparing two terms recursively, and a function to compute a Hashable label from an unpacked term, compute the diff of a pair of lists of terms using a random walk similarity metric, which completes in log-linear time. This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf).
|
||||||
rws :: (Hashable label, Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), Typeable label) =>
|
rws :: (Hashable label, Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), Typeable label, HasField fields (Vector.Vector Double)) =>
|
||||||
-- | A function which comapres a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared.
|
-- | A function which comapres a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared.
|
||||||
(Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) ->
|
(Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) ->
|
||||||
-- | A function to compute a label for an unpacked term.
|
-- | A function to compute a label for an unpacked term.
|
||||||
|
@ -9,6 +9,7 @@ import Data.Hashable
|
|||||||
import Data.RandomWalkSimilarity
|
import Data.RandomWalkSimilarity
|
||||||
import Data.Record
|
import Data.Record
|
||||||
import Data.These
|
import Data.These
|
||||||
|
import qualified Data.Vector as Vector
|
||||||
import Diff
|
import Diff
|
||||||
import Info
|
import Info
|
||||||
import Operation
|
import Operation
|
||||||
@ -25,11 +26,11 @@ type Comparable leaf annotation = Term leaf annotation -> Term leaf annotation -
|
|||||||
type DiffConstructor leaf annotation = CofreeF (Syntax leaf) (Both annotation) (Diff leaf annotation) -> Diff leaf annotation
|
type DiffConstructor leaf annotation = CofreeF (Syntax leaf) (Both annotation) (Diff leaf annotation) -> Diff leaf annotation
|
||||||
|
|
||||||
-- | Diff two terms, given a function that determines whether two terms can be compared and a cost function.
|
-- | Diff two terms, given a function that determines whether two terms can be compared and a cost function.
|
||||||
diffTerms :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Diff leaf (Record fields)
|
diffTerms :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Diff leaf (Record fields)
|
||||||
diffTerms construct comparable cost a b = fromMaybe (pure $ Replace a b) $ constructAndRun construct comparable cost a b
|
diffTerms construct comparable cost a b = fromMaybe (pure $ Replace a b) $ constructAndRun construct comparable cost a b
|
||||||
|
|
||||||
-- | Constructs an algorithm and runs it
|
-- | Constructs an algorithm and runs it
|
||||||
constructAndRun :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields))
|
constructAndRun :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields))
|
||||||
constructAndRun construct comparable cost t1 t2
|
constructAndRun construct comparable cost t1 t2
|
||||||
| not $ comparable t1 t2 = Nothing
|
| not $ comparable t1 t2 = Nothing
|
||||||
| (category <$> t1) == (category <$> t2) = hylo construct runCofree <$> zipTerms t1 t2
|
| (category <$> t1) == (category <$> t2) = hylo construct runCofree <$> zipTerms t1 t2
|
||||||
@ -42,7 +43,7 @@ constructAndRun construct comparable cost t1 t2
|
|||||||
annotate = pure . construct . (both annotation1 annotation2 :<)
|
annotate = pure . construct . (both annotation1 annotation2 :<)
|
||||||
|
|
||||||
-- | Runs the diff algorithm
|
-- | Runs the diff algorithm
|
||||||
run :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Algorithm leaf (Record fields) (Diff leaf (Record fields)) -> Maybe (Diff leaf (Record fields))
|
run :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Algorithm leaf (Record fields) (Diff leaf (Record fields)) -> Maybe (Diff leaf (Record fields))
|
||||||
run construct comparable cost algorithm = case runFree algorithm of
|
run construct comparable cost algorithm = case runFree algorithm of
|
||||||
Pure diff -> Just diff
|
Pure diff -> Just diff
|
||||||
Free (Recursive t1 t2 f) -> run construct comparable cost . f $ recur a b where
|
Free (Recursive t1 t2 f) -> run construct comparable cost . f $ recur a b where
|
||||||
|
@ -5,6 +5,7 @@ import Category
|
|||||||
import Data.DList as DList hiding (toList)
|
import Data.DList as DList hiding (toList)
|
||||||
import Data.RandomWalkSimilarity
|
import Data.RandomWalkSimilarity
|
||||||
import Data.Record
|
import Data.Record
|
||||||
|
import qualified Data.Vector as Vector
|
||||||
import Diff
|
import Diff
|
||||||
import Patch
|
import Patch
|
||||||
import Prologue
|
import Prologue
|
||||||
@ -33,5 +34,10 @@ spec = parallel $ do
|
|||||||
prop "produces correct diffs" . forAll (scale (`div` 4) arbitrary) $
|
prop "produces correct diffs" . forAll (scale (`div` 4) arbitrary) $
|
||||||
\ (as, bs) -> let tas = toTerm <$> as
|
\ (as, bs) -> let tas = toTerm <$> as
|
||||||
tbs = toTerm <$> bs
|
tbs = toTerm <$> bs
|
||||||
diff = free (Free (pure (Program .: RNil) :< Indexed (rws compare (rhead . headF) tas tbs :: [Diff Text (Record '[Category])]))) in
|
diff = free (Free (pure (Program .: Vector.singleton 0 .: RNil) :< Indexed (rws compare (rhead . headF) tas tbs :: [Diff Text (Record '[Category, Vector.Vector Double])]))) in
|
||||||
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((Program .: RNil) :< Indexed tas)), Just (cofree ((Program .: RNil) :< Indexed tbs)))
|
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tas)), Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tbs)))
|
||||||
|
|
||||||
|
|
||||||
|
instance Arbitrary a => Arbitrary (Vector.Vector a) where
|
||||||
|
arbitrary = Vector.fromList <$> arbitrary
|
||||||
|
shrink a = Vector.fromList <$> shrink (Vector.toList a)
|
||||||
|
@ -4,6 +4,7 @@ module Diff.Spec where
|
|||||||
import Category
|
import Category
|
||||||
import Data.Record
|
import Data.Record
|
||||||
import Data.Text.Arbitrary ()
|
import Data.Text.Arbitrary ()
|
||||||
|
import qualified Data.Vector as Vector
|
||||||
import Diff
|
import Diff
|
||||||
import Diff.Arbitrary
|
import Diff.Arbitrary
|
||||||
import Interpreter
|
import Interpreter
|
||||||
@ -16,23 +17,28 @@ import Test.QuickCheck
|
|||||||
spec :: Spec
|
spec :: Spec
|
||||||
spec = parallel $ do
|
spec = parallel $ do
|
||||||
prop "equality is reflexive" $
|
prop "equality is reflexive" $
|
||||||
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in
|
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in
|
||||||
diff `shouldBe` diff
|
diff `shouldBe` diff
|
||||||
|
|
||||||
prop "equal terms produce identity diffs" $
|
prop "equal terms produce identity diffs" $
|
||||||
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category])) in
|
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category])) in
|
||||||
diffCost (diffTerms wrap (==) diffCost term term) `shouldBe` 0
|
diffCost (diffTerms wrap (==) diffCost term term) `shouldBe` 0
|
||||||
|
|
||||||
describe "beforeTerm" $ do
|
describe "beforeTerm" $ do
|
||||||
prop "recovers the before term" $
|
prop "recovers the before term" $
|
||||||
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in
|
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in
|
||||||
beforeTerm diff `shouldBe` Just (toTerm a)
|
beforeTerm diff `shouldBe` Just (toTerm a)
|
||||||
|
|
||||||
describe "afterTerm" $ do
|
describe "afterTerm" $ do
|
||||||
prop "recovers the after term" $
|
prop "recovers the after term" $
|
||||||
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in
|
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in
|
||||||
afterTerm diff `shouldBe` Just (toTerm b)
|
afterTerm diff `shouldBe` Just (toTerm b)
|
||||||
|
|
||||||
describe "ArbitraryDiff" $ do
|
describe "ArbitraryDiff" $ do
|
||||||
prop "generates diffs of a specific size" . forAll ((arbitrary >>= \ n -> (,) n <$> diffOfSize n) `suchThat` ((> 0) . fst)) $
|
prop "generates diffs of a specific size" . forAll ((arbitrary >>= \ n -> (,) n <$> diffOfSize n) `suchThat` ((> 0) . fst)) $
|
||||||
\ (n, diff) -> arbitraryDiffSize (diff :: ArbitraryDiff Text ()) `shouldBe` n
|
\ (n, diff) -> arbitraryDiffSize (diff :: ArbitraryDiff Text ()) `shouldBe` n
|
||||||
|
|
||||||
|
|
||||||
|
instance Arbitrary a => Arbitrary (Vector.Vector a) where
|
||||||
|
arbitrary = Vector.fromList <$> arbitrary
|
||||||
|
shrink a = Vector.fromList <$> shrink (Vector.toList a)
|
||||||
|
@ -3,6 +3,7 @@ module DiffSummarySpec where
|
|||||||
|
|
||||||
import Prologue
|
import Prologue
|
||||||
import Data.Record
|
import Data.Record
|
||||||
|
import qualified Data.Vector as Vector
|
||||||
import Test.Hspec
|
import Test.Hspec
|
||||||
import Test.Hspec.QuickCheck
|
import Test.Hspec.QuickCheck
|
||||||
import Diff
|
import Diff
|
||||||
@ -20,14 +21,15 @@ import Interpreter
|
|||||||
import Info
|
import Info
|
||||||
import Source
|
import Source
|
||||||
import Data.Functor.Both
|
import Data.Functor.Both
|
||||||
|
import Test.QuickCheck hiding (Fixed)
|
||||||
|
|
||||||
arrayInfo :: Record '[Category, Range]
|
arrayInfo :: Record '[Category, Range, Vector.Vector Double]
|
||||||
arrayInfo = ArrayLiteral .: Range 0 3 .: RNil
|
arrayInfo = ArrayLiteral .: Range 0 3 .: Vector.singleton 0 .: RNil
|
||||||
|
|
||||||
literalInfo :: Record '[Category, Range]
|
literalInfo :: Record '[Category, Range, Vector.Vector Double]
|
||||||
literalInfo = StringLiteral .: Range 1 2 .: RNil
|
literalInfo = StringLiteral .: Range 1 2 .: Vector.singleton 0 .: RNil
|
||||||
|
|
||||||
testDiff :: Diff Text (Record '[Category, Range])
|
testDiff :: Diff Text (Record '[Category, Range, Vector.Vector Double])
|
||||||
testDiff = free $ Free (pure arrayInfo :< Indexed [ free $ Pure (Insert (cofree $ literalInfo :< Leaf "a")) ])
|
testDiff = free $ Free (pure arrayInfo :< Indexed [ free $ Pure (Insert (cofree $ literalInfo :< Leaf "a")) ])
|
||||||
|
|
||||||
testSummary :: DiffSummary DiffInfo
|
testSummary :: DiffSummary DiffInfo
|
||||||
@ -46,7 +48,7 @@ spec = parallel $ do
|
|||||||
diffSummaries sources testDiff `shouldBe` [ DiffSummary { patch = Insert (LeafInfo "string" "a"), parentAnnotations = [ ArrayLiteral ] } ]
|
diffSummaries sources testDiff `shouldBe` [ DiffSummary { patch = Insert (LeafInfo "string" "a"), parentAnnotations = [ ArrayLiteral ] } ]
|
||||||
|
|
||||||
prop "equal terms produce identity diffs" $
|
prop "equal terms produce identity diffs" $
|
||||||
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category, Range])) in
|
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category, Range, Vector.Vector Double])) in
|
||||||
diffSummaries sources (diffTerms wrap (==) diffCost term term) `shouldBe` []
|
diffSummaries sources (diffTerms wrap (==) diffCost term term) `shouldBe` []
|
||||||
|
|
||||||
describe "annotatedSummaries" $ do
|
describe "annotatedSummaries" $ do
|
||||||
@ -103,3 +105,7 @@ isBranchInfo info = case info of
|
|||||||
|
|
||||||
isBranchNode :: Patch DiffInfo -> Bool
|
isBranchNode :: Patch DiffInfo -> Bool
|
||||||
isBranchNode = any isBranchInfo
|
isBranchNode = any isBranchInfo
|
||||||
|
|
||||||
|
instance Arbitrary a => Arbitrary (Vector.Vector a) where
|
||||||
|
arbitrary = Vector.fromList <$> arbitrary
|
||||||
|
shrink a = Vector.fromList <$> shrink (Vector.toList a)
|
||||||
|
@ -4,6 +4,7 @@ module InterpreterSpec where
|
|||||||
import Category
|
import Category
|
||||||
import Diff
|
import Diff
|
||||||
import Data.Record
|
import Data.Record
|
||||||
|
import qualified Data.Vector as Vector
|
||||||
import Interpreter
|
import Interpreter
|
||||||
import Patch
|
import Patch
|
||||||
import Prologue
|
import Prologue
|
||||||
@ -11,20 +12,26 @@ import Syntax
|
|||||||
import Term.Arbitrary
|
import Term.Arbitrary
|
||||||
import Test.Hspec
|
import Test.Hspec
|
||||||
import Test.Hspec.QuickCheck
|
import Test.Hspec.QuickCheck
|
||||||
|
import Test.QuickCheck
|
||||||
|
|
||||||
spec :: Spec
|
spec :: Spec
|
||||||
spec = parallel $ do
|
spec = parallel $ do
|
||||||
describe "interpret" $ do
|
describe "interpret" $ do
|
||||||
it "returns a replacement when comparing two unicode equivalent terms" $
|
it "returns a replacement when comparing two unicode equivalent terms" $
|
||||||
let termA = cofree $ (StringLiteral .: RNil) :< Leaf ("t\776" :: Text)
|
let termA = cofree $ (StringLiteral .: Vector.singleton (0 :: Double) .: RNil) :< Leaf ("t\776" :: Text)
|
||||||
termB = cofree $ (StringLiteral .: RNil) :< Leaf "\7831" in
|
termB = cofree $ (StringLiteral .: Vector.singleton (0 :: Double) .: RNil) :< Leaf "\7831" in
|
||||||
diffTerms (free . Free) ((==) `on` extract) diffCost termA termB `shouldBe` free (Pure (Replace termA termB))
|
diffTerms wrap ((==) `on` extract) diffCost termA termB `shouldBe` free (Pure (Replace termA termB))
|
||||||
|
|
||||||
prop "produces correct diffs" $
|
prop "produces correct diffs" $
|
||||||
\ a b -> let diff = diffTerms (free . Free) ((==) `on` extract) diffCost (toTerm a) (toTerm b) :: Diff Text (Record '[Category]) in
|
\ a b -> let diff = diffTerms wrap ((==) `on` extract) diffCost (toTerm a) (toTerm b) :: Diff Text (Record '[Category, Vector.Vector Double]) in
|
||||||
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (toTerm a), Just (toTerm b))
|
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (toTerm a), Just (toTerm b))
|
||||||
|
|
||||||
prop "constructs zero-cost diffs of equal terms" $
|
prop "constructs zero-cost diffs of equal terms" $
|
||||||
\ a -> let term = toTerm a
|
\ a -> let term = toTerm a
|
||||||
diff = diffTerms (free . Free) ((==) `on` extract) diffCost term term :: Diff Text (Record '[Category]) in
|
diff = diffTerms wrap ((==) `on` extract) diffCost term term :: Diff Text (Record '[Category, Vector.Vector Double]) in
|
||||||
diffCost diff `shouldBe` 0
|
diffCost diff `shouldBe` 0
|
||||||
|
|
||||||
|
|
||||||
|
instance Arbitrary a => Arbitrary (Vector.Vector a) where
|
||||||
|
arbitrary = Vector.fromList <$> arbitrary
|
||||||
|
shrink a = Vector.fromList <$> shrink (Vector.toList a)
|
||||||
|
Loading…
Reference in New Issue
Block a user