1
1
mirror of https://github.com/github/semantic.git synced 2024-11-29 02:44:36 +03:00

🔥 the computation of costs.

This commit is contained in:
Rob Rix 2017-02-21 14:34:24 -05:00
parent 691f7cfebf
commit 01c47fa0c1
6 changed files with 16 additions and 40 deletions

View File

@ -7,8 +7,6 @@ import Data.Functor.Both
import Data.RandomWalkSimilarity (defaultFeatureVectorDecorator, stripDiff)
import Data.Record
import qualified Data.Text.IO as TextIO
import Data.These
import Diff
import Info
import Interpreter
import Patch
@ -34,7 +32,7 @@ import Data.Aeson.Encoding (encodingToLazyByteString)
-- | result.
-- | Returns the rendered result strictly, so it's always fully evaluated
-- | with respect to other IO actions.
diffFiles :: (HasField fields Category, HasField fields Cost)
diffFiles :: HasField fields Category
=> Parser (Syntax Text) (Record fields)
-> Renderer (Record fields)
-> Both SourceBlob
@ -48,14 +46,9 @@ diffFiles parse render sourceBlobs = do
(True, False) -> pure $ Insert (snd terms)
(False, True) -> pure $ Delete (fst terms)
(_, _) ->
runBothWith (diffTerms construct compareCategoryEq diffCostWithCachedTermCosts) terms
runBothWith (diffTerms wrap compareCategoryEq) terms
areNullOids a b = (hasNullOid a, hasNullOid b)
hasNullOid blob = oid blob == nullOid || Source.null (source blob)
construct (info :< syntax) = free (Free ((setCost <$> info <*> sumCost syntax) :< syntax))
sumCost = fmap getSum . foldMap (fmap Sum . getCost)
getCost diff = case runFree diff of
Free (info :< _) -> cost <$> info
Pure patch -> uncurry both (fromThese 0 0 (unPatch (cost . extract <$> patch)))
getLabel :: HasField fields Category => CofreeF (Syntax leaf) (Record fields) b -> (Category, Maybe leaf)
getLabel (h :< t) = (category h, case t of
@ -66,12 +59,6 @@ getLabel (h :< t) = (category h, case t of
compareCategoryEq :: Functor f => HasField fields Category => Term f (Record fields) -> Term f (Record fields) -> Bool
compareCategoryEq = (==) `on` category . extract
-- | The sum of the node count of the diffs patches.
diffCostWithCachedTermCosts :: Functor f => HasField fields Cost => Diff f (Record fields) -> Int
diffCostWithCachedTermCosts diff = unCost $ case runFree diff of
Free (info :< _) -> sum (cost <$> info)
Pure patch -> sum (cost . extract <$> patch)
-- | Returns a rendered diff given a parser, diff arguments and two source blobs.
textDiff :: (ToJSON (Record fields), DefaultFields fields, HasField fields Cost) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO Output
textDiff parser arguments = diffFiles parser $ case format arguments of

View File

@ -12,7 +12,6 @@ import Diff
import Info
import Patch
import Prologue hiding (lookup)
import SES
import Syntax as S
import Term
@ -26,24 +25,22 @@ type DiffConstructor f annotation = TermF f (Both annotation) (Diff f annotation
diffTerms :: (Eq leaf, HasField fields Category, HasField fields (Maybe FeatureVector))
=> DiffConstructor (Syntax leaf) (Record fields) -- ^ A function to wrap up & possibly annotate every produced diff.
-> Comparable (Syntax leaf) (Record fields) -- ^ A function to determine whether or not two terms should even be compared.
-> SES.Cost (SyntaxDiff leaf fields) -- ^ A function to compute the cost of a given diff node.
-> SyntaxTerm leaf fields -- ^ A term representing the old state.
-> SyntaxTerm leaf fields -- ^ A term representing the new state.
-> SyntaxDiff leaf fields
diffTerms construct comparable cost a b = fromMaybe (replacing a b) $ diffComparableTerms construct comparable cost a b
diffTerms construct comparable a b = fromMaybe (replacing a b) $ diffComparableTerms construct comparable a b
-- | Diff two terms recursively, given functions characterizing the diffing. If the terms are incomparable, returns 'Nothing'.
diffComparableTerms :: (Eq leaf, HasField fields Category, HasField fields (Maybe FeatureVector))
=> DiffConstructor (Syntax leaf) (Record fields)
-> Comparable (Syntax leaf) (Record fields)
-> SES.Cost (SyntaxDiff leaf fields)
-> SyntaxTerm leaf fields
-> SyntaxTerm leaf fields
-> Maybe (SyntaxDiff leaf fields)
diffComparableTerms construct comparable cost = recur
diffComparableTerms construct comparable = recur
where recur a b
| (category <$> a) == (category <$> b) = hylo construct runCofree <$> zipTerms a b
| comparable a b = runAlgorithm construct recur cost (Just <$> algorithmWithTerms construct a b)
| comparable a b = runAlgorithm construct recur (Just <$> algorithmWithTerms construct a b)
| otherwise = Nothing
-- | Construct an algorithm to diff a pair of terms.
@ -102,12 +99,10 @@ algorithmWithTerms construct t1 t2 = maybe (linearly t1 t2) (fmap annotate) $ ca
runAlgorithm :: (GAlign f, HasField fields Category, Eq (f (Cofree f Category)), Traversable f, HasField fields (Maybe FeatureVector))
=> (CofreeF f (Both (Record fields)) (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) -> Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) -- ^ A function to wrap up & possibly annotate every produced diff.
-> (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -- ^ A function to diff two subterms recursively, if they are comparable, or else return 'Nothing'.
-> SES.Cost (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) -- ^ A function to compute the cost of a given diff node.
-> Algorithm (Cofree f (Record fields)) (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) a -- ^ The algorithm to run.
-> a
runAlgorithm construct recur cost = iterAp $ \ r cont -> case r of
runAlgorithm construct recur = iterAp $ \ r cont -> case r of
Linear a b -> cont . maybe (replacing a b) (construct . (both (extract a) (extract b) :<)) $ do
aligned <- galign (unwrap a) (unwrap b)
traverse (these (Just . deleting) (Just . inserting) recur) aligned
SES as bs -> cont (ses recur cost as bs)
RWS as bs -> cont (rws recur as bs)

View File

@ -24,16 +24,16 @@ spec = parallel $ do
prop "equal terms produce identity diffs" $
\ a -> let term = decorate (unListableF a :: SyntaxTerm String '[Category]) in
diffCost (diffTerms wrap (==) diffCost term term) `shouldBe` 0
diffCost (diffTerms wrap (==) term term) `shouldBe` 0
describe "beforeTerm" $ do
prop "recovers the before term" $
\ a b -> let diff = stripDiff $ diffTerms wrap (==) diffCost (decorate (unListableF a)) (decorate (unListableF b :: SyntaxTerm String '[Category])) in
\ a b -> let diff = stripDiff $ diffTerms wrap (==) (decorate (unListableF a)) (decorate (unListableF b :: SyntaxTerm String '[Category])) in
beforeTerm diff `shouldBe` Just (unListableF a)
describe "afterTerm" $ do
prop "recovers the after term" $
\ a b -> let diff = stripDiff $ diffTerms wrap (==) diffCost (decorate (unListableF a)) (decorate (unListableF b :: SyntaxTerm String '[Category])) in
\ a b -> let diff = stripDiff $ diffTerms wrap (==) (decorate (unListableF a)) (decorate (unListableF b :: SyntaxTerm String '[Category])) in
afterTerm diff `shouldBe` Just (unListableF b)
unListableDiff :: Functor f => ListableF (Free (TermF f (ListableF (Join (,)) annotation))) (Patch (ListableF (Term f) annotation)) -> Diff f annotation

View File

@ -51,7 +51,7 @@ spec = parallel $ do
prop "equal terms produce identity diffs" $
\ a -> let term = defaultFeatureVectorDecorator (category . headF) (unListableF a :: SyntaxTerm String '[Category, Range, SourceSpan]) in
diffSummaries blobs (diffTerms wrap (==) diffCost term term) `shouldBe` []
diffSummaries blobs (diffTerms wrap (==) term term) `shouldBe` []
describe "DiffInfo" $ do
prop "patches in summaries match the patches in diffs" $

View File

@ -27,18 +27,18 @@ spec = parallel $ do
it "returns a replacement when comparing two unicode equivalent terms" $
let termA = cofree $ (StringLiteral :. Nil) :< Leaf ("t\776" :: String)
termB = cofree $ (StringLiteral :. Nil) :< Leaf "\7831" in
stripDiff (diffTerms wrap compare diffCost (decorate termA) (decorate termB)) `shouldBe` replacing termA termB
stripDiff (diffTerms wrap compare (decorate termA) (decorate termB)) `shouldBe` replacing termA termB
prop "produces correct diffs" $
\ a b -> let diff = stripDiff $ diffTerms wrap compare diffCost (decorate (unListableF a)) (decorate (unListableF b :: SyntaxTerm String '[Category])) in
\ a b -> let diff = stripDiff $ diffTerms wrap compare (decorate (unListableF a)) (decorate (unListableF b :: SyntaxTerm String '[Category])) in
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (unListableF a), Just (unListableF b))
prop "constructs zero-cost diffs of equal terms" $
\ a -> let term = decorate (unListableF a :: SyntaxTerm String '[Category])
diff = diffTerms wrap compare diffCost term term in
diff = diffTerms wrap compare term term in
diffCost diff `shouldBe` 0
it "produces unbiased insertions within branches" $
let term s = decorate (cofree ((StringLiteral :. Nil) :< Indexed [ cofree ((StringLiteral :. Nil) :< Leaf s) ]))
root = cofree . ((Just (listArray (0, defaultD) (repeat 0)) :. Program :. Nil) :<) . Indexed in
stripDiff (diffTerms wrap compare diffCost (root [ term "b" ]) (root [ term "a", term "b" ])) `shouldBe` wrap (pure (Program :. Nil) :< Indexed [ inserting (stripTerm (term "a")), cata wrap (fmap pure (stripTerm (term "b"))) ])
stripDiff (diffTerms wrap compare (root [ term "b" ]) (root [ term "a", term "b" ])) `shouldBe` wrap (pure (Program :. Nil) :< Indexed [ inserting (stripTerm (term "a")), cata wrap (fmap pure (stripTerm (term "b"))) ])

View File

@ -6,7 +6,6 @@ import Data.Functor.Both
import Data.Functor.Listable
import Data.RandomWalkSimilarity
import Data.Record
import Data.These
import Data.String
import Diff
import Diffing
@ -95,7 +94,7 @@ spec = parallel $ do
prop "equal terms produce identity diffs" $
\a -> let term = defaultFeatureVectorDecorator (Info.category . headF) (unListableF a :: Term') in
diffTOC blankDiffBlobs (diffTerms wrap (==) diffCost term term) `shouldBe` []
diffTOC blankDiffBlobs (diffTerms wrap (==) term term) `shouldBe` []
type Diff' = SyntaxDiff String '[Range, Category, SourceSpan]
type Term' = SyntaxTerm String '[Range, Category, SourceSpan]
@ -166,14 +165,9 @@ testDiff sourceBlobs = do
diffTerms' terms blobs = case runBothWith areNullOids blobs of
(True, False) -> pure $ Insert (snd terms)
(False, True) -> pure $ Delete (fst terms)
(_, _) -> runBothWith (diffTerms construct compareCategoryEq diffCostWithCachedTermCosts) terms
(_, _) -> runBothWith (diffTerms wrap compareCategoryEq) terms
areNullOids a b = (hasNullOid a, hasNullOid b)
hasNullOid blob = oid blob == nullOid || Source.null (source blob)
construct (info :< syntax) = free (Free ((setCost <$> info <*> sumCost syntax) :< syntax))
sumCost = fmap getSum . foldMap (fmap Sum . getCost)
getCost diff = case runFree diff of
Free (info :< _) -> cost <$> info
Pure patch -> uncurry both (fromThese 0 0 (unPatch (cost . extract <$> patch)))
blobsForPaths :: Both FilePath -> IO (Both SourceBlob)
blobsForPaths paths = do