1
1
mirror of https://github.com/github/semantic.git synced 2024-12-01 09:15:01 +03:00

rws requires a feature vector.

This commit is contained in:
Rob Rix 2016-08-04 21:01:54 -04:00
parent 60c4effabe
commit 9fc9827b34
6 changed files with 47 additions and 21 deletions

View File

@ -27,7 +27,7 @@ import Test.QuickCheck hiding (Fixed)
import Test.QuickCheck.Random import Test.QuickCheck.Random
-- | Given a function comparing two terms recursively, and a function to compute a Hashable label from an unpacked term, compute the diff of a pair of lists of terms using a random walk similarity metric, which completes in log-linear time. This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf). -- | Given a function comparing two terms recursively, and a function to compute a Hashable label from an unpacked term, compute the diff of a pair of lists of terms using a random walk similarity metric, which completes in log-linear time. This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf).
rws :: (Hashable label, Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), Typeable label) => rws :: (Hashable label, Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), Typeable label, HasField fields (Vector.Vector Double)) =>
-- | A function which comapres a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared. -- | A function which comapres a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared.
(Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -> (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) ->
-- | A function to compute a label for an unpacked term. -- | A function to compute a label for an unpacked term.

View File

@ -9,6 +9,7 @@ import Data.Hashable
import Data.RandomWalkSimilarity import Data.RandomWalkSimilarity
import Data.Record import Data.Record
import Data.These import Data.These
import qualified Data.Vector as Vector
import Diff import Diff
import Info import Info
import Operation import Operation
@ -25,11 +26,11 @@ type Comparable leaf annotation = Term leaf annotation -> Term leaf annotation -
type DiffConstructor leaf annotation = CofreeF (Syntax leaf) (Both annotation) (Diff leaf annotation) -> Diff leaf annotation type DiffConstructor leaf annotation = CofreeF (Syntax leaf) (Both annotation) (Diff leaf annotation) -> Diff leaf annotation
-- | Diff two terms, given a function that determines whether two terms can be compared and a cost function. -- | Diff two terms, given a function that determines whether two terms can be compared and a cost function.
diffTerms :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Diff leaf (Record fields) diffTerms :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Diff leaf (Record fields)
diffTerms construct comparable cost a b = fromMaybe (pure $ Replace a b) $ constructAndRun construct comparable cost a b diffTerms construct comparable cost a b = fromMaybe (pure $ Replace a b) $ constructAndRun construct comparable cost a b
-- | Constructs an algorithm and runs it -- | Constructs an algorithm and runs it
constructAndRun :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields)) constructAndRun :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields))
constructAndRun construct comparable cost t1 t2 constructAndRun construct comparable cost t1 t2
| not $ comparable t1 t2 = Nothing | not $ comparable t1 t2 = Nothing
| (category <$> t1) == (category <$> t2) = hylo construct runCofree <$> zipTerms t1 t2 | (category <$> t1) == (category <$> t2) = hylo construct runCofree <$> zipTerms t1 t2
@ -42,7 +43,7 @@ constructAndRun construct comparable cost t1 t2
annotate = pure . construct . (both annotation1 annotation2 :<) annotate = pure . construct . (both annotation1 annotation2 :<)
-- | Runs the diff algorithm -- | Runs the diff algorithm
run :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Algorithm leaf (Record fields) (Diff leaf (Record fields)) -> Maybe (Diff leaf (Record fields)) run :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Algorithm leaf (Record fields) (Diff leaf (Record fields)) -> Maybe (Diff leaf (Record fields))
run construct comparable cost algorithm = case runFree algorithm of run construct comparable cost algorithm = case runFree algorithm of
Pure diff -> Just diff Pure diff -> Just diff
Free (Recursive t1 t2 f) -> run construct comparable cost . f $ recur a b where Free (Recursive t1 t2 f) -> run construct comparable cost . f $ recur a b where

View File

@ -5,6 +5,7 @@ import Category
import Data.DList as DList hiding (toList) import Data.DList as DList hiding (toList)
import Data.RandomWalkSimilarity import Data.RandomWalkSimilarity
import Data.Record import Data.Record
import qualified Data.Vector as Vector
import Diff import Diff
import Patch import Patch
import Prologue import Prologue
@ -33,5 +34,10 @@ spec = parallel $ do
prop "produces correct diffs" . forAll (scale (`div` 4) arbitrary) $ prop "produces correct diffs" . forAll (scale (`div` 4) arbitrary) $
\ (as, bs) -> let tas = toTerm <$> as \ (as, bs) -> let tas = toTerm <$> as
tbs = toTerm <$> bs tbs = toTerm <$> bs
diff = free (Free (pure (Program .: RNil) :< Indexed (rws compare (rhead . headF) tas tbs :: [Diff Text (Record '[Category])]))) in diff = free (Free (pure (Program .: Vector.singleton 0 .: RNil) :< Indexed (rws compare (rhead . headF) tas tbs :: [Diff Text (Record '[Category, Vector.Vector Double])]))) in
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((Program .: RNil) :< Indexed tas)), Just (cofree ((Program .: RNil) :< Indexed tbs))) (beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tas)), Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tbs)))
instance Arbitrary a => Arbitrary (Vector.Vector a) where
arbitrary = Vector.fromList <$> arbitrary
shrink a = Vector.fromList <$> shrink (Vector.toList a)

View File

@ -4,6 +4,7 @@ module Diff.Spec where
import Category import Category
import Data.Record import Data.Record
import Data.Text.Arbitrary () import Data.Text.Arbitrary ()
import qualified Data.Vector as Vector
import Diff import Diff
import Diff.Arbitrary import Diff.Arbitrary
import Interpreter import Interpreter
@ -16,23 +17,28 @@ import Test.QuickCheck
spec :: Spec spec :: Spec
spec = parallel $ do spec = parallel $ do
prop "equality is reflexive" $ prop "equality is reflexive" $
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in
diff `shouldBe` diff diff `shouldBe` diff
prop "equal terms produce identity diffs" $ prop "equal terms produce identity diffs" $
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category])) in \ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category])) in
diffCost (diffTerms wrap (==) diffCost term term) `shouldBe` 0 diffCost (diffTerms wrap (==) diffCost term term) `shouldBe` 0
describe "beforeTerm" $ do describe "beforeTerm" $ do
prop "recovers the before term" $ prop "recovers the before term" $
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in
beforeTerm diff `shouldBe` Just (toTerm a) beforeTerm diff `shouldBe` Just (toTerm a)
describe "afterTerm" $ do describe "afterTerm" $ do
prop "recovers the after term" $ prop "recovers the after term" $
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in
afterTerm diff `shouldBe` Just (toTerm b) afterTerm diff `shouldBe` Just (toTerm b)
describe "ArbitraryDiff" $ do describe "ArbitraryDiff" $ do
prop "generates diffs of a specific size" . forAll ((arbitrary >>= \ n -> (,) n <$> diffOfSize n) `suchThat` ((> 0) . fst)) $ prop "generates diffs of a specific size" . forAll ((arbitrary >>= \ n -> (,) n <$> diffOfSize n) `suchThat` ((> 0) . fst)) $
\ (n, diff) -> arbitraryDiffSize (diff :: ArbitraryDiff Text ()) `shouldBe` n \ (n, diff) -> arbitraryDiffSize (diff :: ArbitraryDiff Text ()) `shouldBe` n
instance Arbitrary a => Arbitrary (Vector.Vector a) where
arbitrary = Vector.fromList <$> arbitrary
shrink a = Vector.fromList <$> shrink (Vector.toList a)

View File

@ -3,6 +3,7 @@ module DiffSummarySpec where
import Prologue import Prologue
import Data.Record import Data.Record
import qualified Data.Vector as Vector
import Test.Hspec import Test.Hspec
import Test.Hspec.QuickCheck import Test.Hspec.QuickCheck
import Diff import Diff
@ -20,14 +21,15 @@ import Interpreter
import Info import Info
import Source import Source
import Data.Functor.Both import Data.Functor.Both
import Test.QuickCheck hiding (Fixed)
arrayInfo :: Record '[Category, Range] arrayInfo :: Record '[Category, Range, Vector.Vector Double]
arrayInfo = ArrayLiteral .: Range 0 3 .: RNil arrayInfo = ArrayLiteral .: Range 0 3 .: Vector.singleton 0 .: RNil
literalInfo :: Record '[Category, Range] literalInfo :: Record '[Category, Range, Vector.Vector Double]
literalInfo = StringLiteral .: Range 1 2 .: RNil literalInfo = StringLiteral .: Range 1 2 .: Vector.singleton 0 .: RNil
testDiff :: Diff Text (Record '[Category, Range]) testDiff :: Diff Text (Record '[Category, Range, Vector.Vector Double])
testDiff = free $ Free (pure arrayInfo :< Indexed [ free $ Pure (Insert (cofree $ literalInfo :< Leaf "a")) ]) testDiff = free $ Free (pure arrayInfo :< Indexed [ free $ Pure (Insert (cofree $ literalInfo :< Leaf "a")) ])
testSummary :: DiffSummary DiffInfo testSummary :: DiffSummary DiffInfo
@ -46,7 +48,7 @@ spec = parallel $ do
diffSummaries sources testDiff `shouldBe` [ DiffSummary { patch = Insert (LeafInfo "string" "a"), parentAnnotations = [ ArrayLiteral ] } ] diffSummaries sources testDiff `shouldBe` [ DiffSummary { patch = Insert (LeafInfo "string" "a"), parentAnnotations = [ ArrayLiteral ] } ]
prop "equal terms produce identity diffs" $ prop "equal terms produce identity diffs" $
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category, Range])) in \ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category, Range, Vector.Vector Double])) in
diffSummaries sources (diffTerms wrap (==) diffCost term term) `shouldBe` [] diffSummaries sources (diffTerms wrap (==) diffCost term term) `shouldBe` []
describe "annotatedSummaries" $ do describe "annotatedSummaries" $ do
@ -103,3 +105,7 @@ isBranchInfo info = case info of
isBranchNode :: Patch DiffInfo -> Bool isBranchNode :: Patch DiffInfo -> Bool
isBranchNode = any isBranchInfo isBranchNode = any isBranchInfo
instance Arbitrary a => Arbitrary (Vector.Vector a) where
arbitrary = Vector.fromList <$> arbitrary
shrink a = Vector.fromList <$> shrink (Vector.toList a)

View File

@ -4,6 +4,7 @@ module InterpreterSpec where
import Category import Category
import Diff import Diff
import Data.Record import Data.Record
import qualified Data.Vector as Vector
import Interpreter import Interpreter
import Patch import Patch
import Prologue import Prologue
@ -11,20 +12,26 @@ import Syntax
import Term.Arbitrary import Term.Arbitrary
import Test.Hspec import Test.Hspec
import Test.Hspec.QuickCheck import Test.Hspec.QuickCheck
import Test.QuickCheck
spec :: Spec spec :: Spec
spec = parallel $ do spec = parallel $ do
describe "interpret" $ do describe "interpret" $ do
it "returns a replacement when comparing two unicode equivalent terms" $ it "returns a replacement when comparing two unicode equivalent terms" $
let termA = cofree $ (StringLiteral .: RNil) :< Leaf ("t\776" :: Text) let termA = cofree $ (StringLiteral .: Vector.singleton (0 :: Double) .: RNil) :< Leaf ("t\776" :: Text)
termB = cofree $ (StringLiteral .: RNil) :< Leaf "\7831" in termB = cofree $ (StringLiteral .: Vector.singleton (0 :: Double) .: RNil) :< Leaf "\7831" in
diffTerms (free . Free) ((==) `on` extract) diffCost termA termB `shouldBe` free (Pure (Replace termA termB)) diffTerms wrap ((==) `on` extract) diffCost termA termB `shouldBe` free (Pure (Replace termA termB))
prop "produces correct diffs" $ prop "produces correct diffs" $
\ a b -> let diff = diffTerms (free . Free) ((==) `on` extract) diffCost (toTerm a) (toTerm b) :: Diff Text (Record '[Category]) in \ a b -> let diff = diffTerms wrap ((==) `on` extract) diffCost (toTerm a) (toTerm b) :: Diff Text (Record '[Category, Vector.Vector Double]) in
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (toTerm a), Just (toTerm b)) (beforeTerm diff, afterTerm diff) `shouldBe` (Just (toTerm a), Just (toTerm b))
prop "constructs zero-cost diffs of equal terms" $ prop "constructs zero-cost diffs of equal terms" $
\ a -> let term = toTerm a \ a -> let term = toTerm a
diff = diffTerms (free . Free) ((==) `on` extract) diffCost term term :: Diff Text (Record '[Category]) in diff = diffTerms wrap ((==) `on` extract) diffCost term term :: Diff Text (Record '[Category, Vector.Vector Double]) in
diffCost diff `shouldBe` 0 diffCost diff `shouldBe` 0
instance Arbitrary a => Arbitrary (Vector.Vector a) where
arbitrary = Vector.fromList <$> arbitrary
shrink a = Vector.fromList <$> shrink (Vector.toList a)