1
1
mirror of https://github.com/github/semantic.git synced 2024-12-29 18:06:14 +03:00

rws requires a feature vector.

This commit is contained in:
Rob Rix 2016-08-04 21:01:54 -04:00
parent 60c4effabe
commit 9fc9827b34
6 changed files with 47 additions and 21 deletions

View File

@ -27,7 +27,7 @@ import Test.QuickCheck hiding (Fixed)
import Test.QuickCheck.Random
-- | Given a function comparing two terms recursively, and a function to compute a Hashable label from an unpacked term, compute the diff of a pair of lists of terms using a random walk similarity metric, which completes in log-linear time. This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf).
rws :: (Hashable label, Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), Typeable label) =>
rws :: (Hashable label, Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), Typeable label, HasField fields (Vector.Vector Double)) =>
-- | A function which comapres a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared.
(Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) ->
-- | A function to compute a label for an unpacked term.

View File

@ -9,6 +9,7 @@ import Data.Hashable
import Data.RandomWalkSimilarity
import Data.Record
import Data.These
import qualified Data.Vector as Vector
import Diff
import Info
import Operation
@ -25,11 +26,11 @@ type Comparable leaf annotation = Term leaf annotation -> Term leaf annotation -
type DiffConstructor leaf annotation = CofreeF (Syntax leaf) (Both annotation) (Diff leaf annotation) -> Diff leaf annotation
-- | Diff two terms, given a function that determines whether two terms can be compared and a cost function.
diffTerms :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Diff leaf (Record fields)
diffTerms :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Diff leaf (Record fields)
diffTerms construct comparable cost a b = fromMaybe (pure $ Replace a b) $ constructAndRun construct comparable cost a b
-- | Constructs an algorithm and runs it
constructAndRun :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields))
constructAndRun :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields))
constructAndRun construct comparable cost t1 t2
| not $ comparable t1 t2 = Nothing
| (category <$> t1) == (category <$> t2) = hylo construct runCofree <$> zipTerms t1 t2
@ -42,7 +43,7 @@ constructAndRun construct comparable cost t1 t2
annotate = pure . construct . (both annotation1 annotation2 :<)
-- | Runs the diff algorithm
run :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Algorithm leaf (Record fields) (Diff leaf (Record fields)) -> Maybe (Diff leaf (Record fields))
run :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Algorithm leaf (Record fields) (Diff leaf (Record fields)) -> Maybe (Diff leaf (Record fields))
run construct comparable cost algorithm = case runFree algorithm of
Pure diff -> Just diff
Free (Recursive t1 t2 f) -> run construct comparable cost . f $ recur a b where

View File

@ -5,6 +5,7 @@ import Category
import Data.DList as DList hiding (toList)
import Data.RandomWalkSimilarity
import Data.Record
import qualified Data.Vector as Vector
import Diff
import Patch
import Prologue
@ -33,5 +34,10 @@ spec = parallel $ do
prop "produces correct diffs" . forAll (scale (`div` 4) arbitrary) $
\ (as, bs) -> let tas = toTerm <$> as
tbs = toTerm <$> bs
diff = free (Free (pure (Program .: RNil) :< Indexed (rws compare (rhead . headF) tas tbs :: [Diff Text (Record '[Category])]))) in
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((Program .: RNil) :< Indexed tas)), Just (cofree ((Program .: RNil) :< Indexed tbs)))
diff = free (Free (pure (Program .: Vector.singleton 0 .: RNil) :< Indexed (rws compare (rhead . headF) tas tbs :: [Diff Text (Record '[Category, Vector.Vector Double])]))) in
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tas)), Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tbs)))
instance Arbitrary a => Arbitrary (Vector.Vector a) where
arbitrary = Vector.fromList <$> arbitrary
shrink a = Vector.fromList <$> shrink (Vector.toList a)

View File

@ -4,6 +4,7 @@ module Diff.Spec where
import Category
import Data.Record
import Data.Text.Arbitrary ()
import qualified Data.Vector as Vector
import Diff
import Diff.Arbitrary
import Interpreter
@ -16,23 +17,28 @@ import Test.QuickCheck
spec :: Spec
spec = parallel $ do
prop "equality is reflexive" $
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in
diff `shouldBe` diff
prop "equal terms produce identity diffs" $
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category])) in
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category])) in
diffCost (diffTerms wrap (==) diffCost term term) `shouldBe` 0
describe "beforeTerm" $ do
prop "recovers the before term" $
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in
beforeTerm diff `shouldBe` Just (toTerm a)
describe "afterTerm" $ do
prop "recovers the after term" $
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in
afterTerm diff `shouldBe` Just (toTerm b)
describe "ArbitraryDiff" $ do
prop "generates diffs of a specific size" . forAll ((arbitrary >>= \ n -> (,) n <$> diffOfSize n) `suchThat` ((> 0) . fst)) $
\ (n, diff) -> arbitraryDiffSize (diff :: ArbitraryDiff Text ()) `shouldBe` n
instance Arbitrary a => Arbitrary (Vector.Vector a) where
arbitrary = Vector.fromList <$> arbitrary
shrink a = Vector.fromList <$> shrink (Vector.toList a)

View File

@ -3,6 +3,7 @@ module DiffSummarySpec where
import Prologue
import Data.Record
import qualified Data.Vector as Vector
import Test.Hspec
import Test.Hspec.QuickCheck
import Diff
@ -20,14 +21,15 @@ import Interpreter
import Info
import Source
import Data.Functor.Both
import Test.QuickCheck hiding (Fixed)
arrayInfo :: Record '[Category, Range]
arrayInfo = ArrayLiteral .: Range 0 3 .: RNil
arrayInfo :: Record '[Category, Range, Vector.Vector Double]
arrayInfo = ArrayLiteral .: Range 0 3 .: Vector.singleton 0 .: RNil
literalInfo :: Record '[Category, Range]
literalInfo = StringLiteral .: Range 1 2 .: RNil
literalInfo :: Record '[Category, Range, Vector.Vector Double]
literalInfo = StringLiteral .: Range 1 2 .: Vector.singleton 0 .: RNil
testDiff :: Diff Text (Record '[Category, Range])
testDiff :: Diff Text (Record '[Category, Range, Vector.Vector Double])
testDiff = free $ Free (pure arrayInfo :< Indexed [ free $ Pure (Insert (cofree $ literalInfo :< Leaf "a")) ])
testSummary :: DiffSummary DiffInfo
@ -46,7 +48,7 @@ spec = parallel $ do
diffSummaries sources testDiff `shouldBe` [ DiffSummary { patch = Insert (LeafInfo "string" "a"), parentAnnotations = [ ArrayLiteral ] } ]
prop "equal terms produce identity diffs" $
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category, Range])) in
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category, Range, Vector.Vector Double])) in
diffSummaries sources (diffTerms wrap (==) diffCost term term) `shouldBe` []
describe "annotatedSummaries" $ do
@ -103,3 +105,7 @@ isBranchInfo info = case info of
isBranchNode :: Patch DiffInfo -> Bool
isBranchNode = any isBranchInfo
instance Arbitrary a => Arbitrary (Vector.Vector a) where
arbitrary = Vector.fromList <$> arbitrary
shrink a = Vector.fromList <$> shrink (Vector.toList a)

View File

@ -4,6 +4,7 @@ module InterpreterSpec where
import Category
import Diff
import Data.Record
import qualified Data.Vector as Vector
import Interpreter
import Patch
import Prologue
@ -11,20 +12,26 @@ import Syntax
import Term.Arbitrary
import Test.Hspec
import Test.Hspec.QuickCheck
import Test.QuickCheck
spec :: Spec
spec = parallel $ do
describe "interpret" $ do
it "returns a replacement when comparing two unicode equivalent terms" $
let termA = cofree $ (StringLiteral .: RNil) :< Leaf ("t\776" :: Text)
termB = cofree $ (StringLiteral .: RNil) :< Leaf "\7831" in
diffTerms (free . Free) ((==) `on` extract) diffCost termA termB `shouldBe` free (Pure (Replace termA termB))
let termA = cofree $ (StringLiteral .: Vector.singleton (0 :: Double) .: RNil) :< Leaf ("t\776" :: Text)
termB = cofree $ (StringLiteral .: Vector.singleton (0 :: Double) .: RNil) :< Leaf "\7831" in
diffTerms wrap ((==) `on` extract) diffCost termA termB `shouldBe` free (Pure (Replace termA termB))
prop "produces correct diffs" $
\ a b -> let diff = diffTerms (free . Free) ((==) `on` extract) diffCost (toTerm a) (toTerm b) :: Diff Text (Record '[Category]) in
\ a b -> let diff = diffTerms wrap ((==) `on` extract) diffCost (toTerm a) (toTerm b) :: Diff Text (Record '[Category, Vector.Vector Double]) in
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (toTerm a), Just (toTerm b))
prop "constructs zero-cost diffs of equal terms" $
\ a -> let term = toTerm a
diff = diffTerms (free . Free) ((==) `on` extract) diffCost term term :: Diff Text (Record '[Category]) in
diff = diffTerms wrap ((==) `on` extract) diffCost term term :: Diff Text (Record '[Category, Vector.Vector Double]) in
diffCost diff `shouldBe` 0
instance Arbitrary a => Arbitrary (Vector.Vector a) where
arbitrary = Vector.fromList <$> arbitrary
shrink a = Vector.fromList <$> shrink (Vector.toList a)