1
1
mirror of https://github.com/github/semantic.git synced 2024-11-24 08:54:07 +03:00

Merge pull request #252 from github/actually-diff

Diff, Actually
This commit is contained in:
Josh Vera 2015-12-01 13:06:34 -05:00
commit d65ee2c68e
9 changed files with 92 additions and 52 deletions

View File

@ -1,12 +1,17 @@
module Main where
import Categorizable
import Diff
import Interpreter
import Patch
import Term
import Syntax
import Term
import Unified
import Control.Comonad.Cofree
import Control.Monad
import Control.Monad.Free hiding (unfoldM)
import qualified Data.Map as Map
import qualified Data.ByteString.Char8 as ByteString
import Data.Maybe
import Data.Set
import System.Environment
@ -49,18 +54,19 @@ foreign import ccall "app/bridge.h ts_node_p_size_chars" ts_node_p_size_chars ::
main :: IO ()
main = do
args <- getArgs
let (a, b) = files args in do
a' <- parseTreeSitterFile a
b' <- parseTreeSitterFile b
return (a', b')
return ()
output <- let (a, b) = files args in do
aContents <- readFile a
bContents <- readFile b
aTerm <- parseTreeSitterFile aContents
bTerm <- parseTreeSitterFile bContents
unified (interpret comparable aTerm bTerm) aContents bContents
ByteString.putStr output
parseTreeSitterFile :: FilePath -> IO (Term String Info)
parseTreeSitterFile file = do
parseTreeSitterFile :: String -> IO (Term String Info)
parseTreeSitterFile contents = do
document <- ts_document_make
language <- ts_language_c
ts_document_set_language document language
contents <- readFile file
withCString contents (\source -> do
ts_document_set_input_string document source
ts_document_parse document
@ -107,6 +113,3 @@ range node = do
files (a : as) = (a, file as) where
file (a : as) = a
files [] = error "expected two files to diff"
substring :: Range -> String -> String
substring range = take (end range) . drop (start range)

View File

@ -1,4 +1,4 @@
module Unified (unified) where
module Unified (unified, substring) where
import Diff
import Patch
@ -14,38 +14,38 @@ import Rainbow
unified :: Diff a Info -> String -> String -> IO ByteString
unified diff before after = do
renderer <- byteStringMakerFromEnvironment
return . mconcat . chunksToByteStrings renderer . pure . fst $ iter g mapped where
return . mconcat . chunksToByteStrings renderer . fst $ iter g mapped where
mapped = fmap (unifiedPatch &&& range) diff
g (Annotated (_, info) syntax) = f info syntax
f (Info range _) (Leaf _) = (substring range after, Just range)
f (Info range _) (Indexed i) = (unifiedRange range i after, Just range)
f (Info range _) (Fixed f) = (unifiedRange range f after, Just range)
f (Info range _) (Keyed k) = (unifiedRange range (sort $ snd <$> Map.toList k) after, Just range)
g (Annotated (_, info) syntax) = annotationAndSyntaxToChunks after info syntax
annotationAndSyntaxToChunks source (Info range _) (Leaf _) = (pure . chunk $ substring range source, Just range)
annotationAndSyntaxToChunks source (Info range _) (Indexed i) = (unifiedRange range i source, Just range)
annotationAndSyntaxToChunks source (Info range _) (Fixed f) = (unifiedRange range f source, Just range)
annotationAndSyntaxToChunks source (Info range _) (Keyed k) = (unifiedRange range (sort $ snd <$> Map.toList k) source, Just range)
unifiedPatch :: Patch (Term a Info) -> Chunk String
unifiedPatch patch = (beforeChunk & fore red & bold) <> (afterChunk & fore green & bold) where
beforeChunk = maybe (chunk "") (change "-" . unifiedTerm before) $ Patch.before patch
afterChunk = maybe (chunk "") (change "+" . unifiedTerm after) $ Patch.after patch
unifiedPatch :: Patch (Term a Info) -> [Chunk String]
unifiedPatch patch = (fore red . bold <$> beforeChunk) <> (fore green . bold <$> afterChunk) where
beforeChunk = maybe [] (change "-" . unifiedTerm before) $ Patch.before patch
afterChunk = maybe [] (change "+" . unifiedTerm after) $ Patch.after patch
unifiedTerm :: String -> Term a Info -> Chunk String
unifiedTerm source term = fst $ cata f term
unifiedTerm :: String -> Term a Info -> [Chunk String]
unifiedTerm source term = fst $ cata (annotationAndSyntaxToChunks source) term
unifiedRange :: Range -> [(Chunk String, Maybe Range)] -> String -> Chunk String
unifiedRange range children source = out <> substring Range { start = previous, end = end range } after where
(out, previous) = foldl accumulateContext (chunk "", start range) children
accumulateContext (out, previous) (child, Just range) = (mconcat [ out, substring Range { start = previous, end = start range } source, child ], end range)
unifiedRange :: Range -> [([Chunk String], Maybe Range)] -> String -> [Chunk String]
unifiedRange range children source = out <> (pure . chunk $ substring Range { start = previous, end = end range } source) where
(out, previous) = foldl accumulateContext ([], start range) children
accumulateContext (out, previous) (child, Just range) = (mconcat [ out, pure . chunk $ substring Range { start = previous, end = start range } source, child ], end range)
accumulateContext (out, previous) (child, _) = (out <> child, previous)
substring :: Range -> String -> Chunk String
substring range = chunk . take (end range) . drop (start range)
substring :: Range -> String -> String
substring range = take (end range - start range) . drop (start range)
range :: Patch (Term a Info) -> Maybe Range
range patch = range . extract <$> after patch where
extract (annotation :< _) = annotation
range (Info range _) = range
change :: String -> Chunk String -> Chunk String
change bound content = mconcat [ chunk "{", chunk bound, content, chunk bound, chunk "}" ]
change :: String -> [Chunk String] -> [Chunk String]
change bound content = [ chunk "{", chunk bound ] ++ content ++ [ chunk bound, chunk "}" ]
instance Ord Range where
a <= b = start a <= start b

View File

@ -25,6 +25,7 @@ library
build-depends: base >= 4.8 && < 5
, containers
, free
, mtl
default-language: Haskell2010
default-extensions: DeriveFunctor, FlexibleInstances, DeriveFoldable, DeriveTraversable
ghc-options: -Wall -Werror
@ -38,6 +39,7 @@ executable semantic-diff-exe
, free
, semantic-diff
, rainbow
, bytestring
default-language: Haskell2010
other-modules: Unified
extra-libraries: bridge

View File

@ -11,4 +11,4 @@ instance Categorizable annotation => Categorizable (Term a annotation) where
categories (annotation :< _) = categories annotation
comparable :: Categorizable a => a -> a -> Bool
comparable a b = Data.Set.null $ intersection (categories a) (categories b)
comparable a b = not . Data.Set.null $ intersection (categories a) (categories b)

View File

@ -1,7 +1,6 @@
module Diff where
import Syntax
import Data.Map
import Data.Set
import Control.Monad.Free
import Patch
@ -9,7 +8,7 @@ import Term
import Categorizable
data Annotated a annotation f = Annotated annotation (Syntax a f)
deriving (Functor, Eq, Show)
deriving (Functor, Eq, Show, Foldable)
data Range = Range { start :: Int, end :: Int }
deriving (Eq, Show)
@ -23,10 +22,8 @@ instance Categorizable Info where
type Diff a annotation = Free (Annotated a (annotation, annotation)) (Patch (Term a annotation))
diffSum :: (Patch (Term a annotation) -> Integer) -> Diff a annotation -> Integer
diffSum patchCost diff = sum $ fmap patchCost diff
diffCost :: Diff a annotation -> Integer
diffCost f = iter (c . unwrap) $ fmap (const 1) f where
c (Leaf _) = 0
c (Keyed xs) = sum $ snd <$> Data.Map.toList xs
c (Indexed xs) = sum xs
c (Fixed xs) = sum xs
unwrap (Annotated _ syntax) = syntax
diffCost = diffSum $ patchSum termSize

View File

@ -16,6 +16,7 @@ hylo :: Functor f => (t -> f b -> b) -> (a -> (t, f a)) -> a -> b
hylo down up a = down annotation $ hylo down up <$> syntax where
(annotation, syntax) = up a
-- | Constructs an algorithm and runs it
constructAndRun :: (Eq a, Eq annotation) => Comparable a annotation -> Term a annotation -> Term a annotation -> Maybe (Diff a annotation)
constructAndRun _ a b | a == b = hylo introduce eliminate <$> zipTerms a b where
eliminate :: Cofree f a -> (a, f (Cofree f a))
@ -31,6 +32,7 @@ constructAndRun comparable (annotation1 :< a) (annotation2 :< b) =
algorithm a' b' = Free $ Recursive (annotation1 :< a') (annotation2 :< b') Pure
annotate = Pure . Free . Annotated (annotation1, annotation2)
-- | Runs the diff algorithm
run :: (Eq a, Eq annotation) => Comparable a annotation -> Algorithm a annotation (Diff a annotation) -> Maybe (Diff a annotation)
run _ (Pure diff) = Just diff

View File

@ -15,3 +15,6 @@ before :: Patch a -> Maybe a
before (Replace a _) = Just a
before (Delete a) = Just a
before _ = Nothing
patchSum :: (a -> Integer) -> Patch a -> Integer
patchSum termCost patch = (maybe 0 termCost $ before patch) + (maybe 0 termCost $ after patch)

View File

@ -4,20 +4,46 @@ import Patch
import Diff
import Term
import Control.Monad.Free
import Control.Monad.State
import Data.Foldable (minimumBy)
import Data.List (uncons)
import qualified Data.Map as Map
import Data.Ord (comparing)
type Compare a annotation = Term a annotation -> Term a annotation -> Maybe (Diff a annotation)
type Cost a annotation = Diff a annotation -> Integer
ses :: Compare a annotation -> Cost a annotation -> [Term a annotation] -> [Term a annotation] -> [Diff a annotation]
ses _ _ [] b = (Pure . Insert) <$> b
ses _ _ a [] = (Pure . Delete) <$> a
ses diffTerms cost (a : as) (b : bs) = case diffTerms a b of
Just f -> minimumBy (comparing sumCost) [ delete, insert, copy f ]
Nothing -> minimumBy (comparing sumCost) [ delete, insert ]
ses diffTerms cost as bs = fmap fst $ evalState diffState Map.empty where
diffState = diffAt diffTerms cost (0, 0) as bs
diffAt :: Compare a annotation -> Cost a annotation -> (Integer, Integer) -> [Term a annotation] -> [Term a annotation] -> State (Map.Map (Integer, Integer) [(Diff a annotation, Integer)]) [(Diff a annotation, Integer)]
diffAt _ _ _ [] [] = return []
diffAt _ cost _ [] bs = return $ foldr toInsertions [] bs where
toInsertions each rest = consWithCost cost (Pure . Insert $ each) rest
diffAt _ cost _ as [] = return $ foldr toDeletions [] as where
toDeletions each rest = consWithCost cost (Pure . Delete $ each) rest
diffAt diffTerms cost (i, j) (a : as) (b : bs) = do
cachedDiffs <- get
case Map.lookup (i, j) cachedDiffs of
Just diffs -> return diffs
Nothing -> do
down <- recur (i, succ j) as (b : bs)
right <- recur (succ i, j) (a : as) bs
nomination <- fmap best $ case diffTerms a b of
Just diff -> do
diagonal <- recur (succ i, succ j) as bs
return $ [ delete down, insert right, consWithCost cost diff diagonal ]
Nothing -> return [ delete down, insert right ]
cachedDiffs' <- get
put $ Map.insert (i, j) nomination cachedDiffs'
return nomination
where
delete = (Pure . Delete $ a) : ses diffTerms cost as (b : bs)
insert = (Pure . Insert $ b) : ses diffTerms cost (a : as) bs
sumCost script = sum $ cost <$> script
copy diff = diff : ses diffTerms cost as bs
delete = consWithCost cost (Pure . Delete $ a)
insert = consWithCost cost (Pure . Insert $ b)
sumCost script = sum $ snd <$> script
best options = minimumBy (comparing sumCost) options
recur = diffAt diffTerms cost
consWithCost :: Cost a annotation -> Diff a annotation -> [(Diff a annotation, Integer)] -> [(Diff a annotation, Integer)]
consWithCost cost diff rest = (diff, cost diff + (maybe 0 snd $ fst <$> uncons rest)) : rest

View File

@ -1,6 +1,6 @@
module Term where
import Data.Map
import Data.Map hiding (size)
import Data.Maybe
import Control.Comonad.Cofree
import Syntax
@ -20,3 +20,10 @@ zipTerms (annotation1 :< a) (annotation2 :< b) = annotate $ zipUnwrap a b
cata :: (annotation -> Syntax a b -> b) -> Term a annotation -> b
cata f (annotation :< syntax) = f annotation $ cata f <$> syntax
termSize :: Term a annotation -> Integer
termSize term = cata size term where
size _ (Leaf _) = 1
size _ (Indexed i) = sum i
size _ (Fixed f) = sum f
size _ (Keyed k) = sum $ snd <$> toList k