2016-02-17 00:09:58 +03:00
|
|
|
|
module Diffing where
|
|
|
|
|
|
2016-04-11 21:49:25 +03:00
|
|
|
|
import Diff
|
2016-03-31 00:26:52 +03:00
|
|
|
|
import Info
|
2016-02-17 00:09:58 +03:00
|
|
|
|
import Interpreter
|
|
|
|
|
import Language
|
|
|
|
|
import Parser
|
|
|
|
|
import Range
|
|
|
|
|
import Renderer
|
|
|
|
|
import Source hiding ((++))
|
|
|
|
|
import Syntax
|
|
|
|
|
import Term
|
|
|
|
|
import TreeSitter
|
2016-02-28 03:31:54 +03:00
|
|
|
|
import Text.Parser.TreeSitter.Language
|
2016-02-17 00:09:58 +03:00
|
|
|
|
|
2016-04-12 18:46:37 +03:00
|
|
|
|
import Control.Monad.Free
|
2016-02-17 00:09:58 +03:00
|
|
|
|
import Control.Comonad.Cofree
|
2016-04-11 22:09:21 +03:00
|
|
|
|
import Data.Copointed
|
2016-02-29 05:29:59 +03:00
|
|
|
|
import Data.Functor.Both
|
2016-02-17 00:09:58 +03:00
|
|
|
|
import qualified Data.ByteString.Char8 as B1
|
|
|
|
|
import Data.Foldable
|
2016-04-11 23:51:10 +03:00
|
|
|
|
import Data.Monoid
|
2016-02-17 00:09:58 +03:00
|
|
|
|
import qualified Data.Text as T
|
|
|
|
|
import qualified Data.Text.ICU.Detect as Detect
|
|
|
|
|
import qualified Data.Text.ICU.Convert as Convert
|
|
|
|
|
import System.FilePath
|
|
|
|
|
|
|
|
|
|
-- | Return a parser based on the file extension (including the ".").
|
|
|
|
|
parserForType :: T.Text -> Parser
|
|
|
|
|
parserForType mediaType = case languageForType mediaType of
|
|
|
|
|
Just C -> treeSitterParser C ts_language_c
|
|
|
|
|
Just JavaScript -> treeSitterParser JavaScript ts_language_javascript
|
2016-02-22 07:01:34 +03:00
|
|
|
|
Just Ruby -> treeSitterParser Ruby ts_language_ruby
|
2016-02-17 00:09:58 +03:00
|
|
|
|
_ -> lineByLineParser
|
|
|
|
|
|
|
|
|
|
-- | A fallback parser that treats a file simply as rows of strings.
|
|
|
|
|
lineByLineParser :: Parser
|
2016-04-11 22:02:58 +03:00
|
|
|
|
lineByLineParser input = return . root $ case foldl' annotateLeaves ([], 0) lines of
|
2016-02-17 00:09:58 +03:00
|
|
|
|
(leaves, _) -> leaves
|
|
|
|
|
where
|
|
|
|
|
lines = actualLines input
|
2016-04-11 22:02:58 +03:00
|
|
|
|
root children = Info (Range 0 $ length input) mempty (1 + fromIntegral (length children)) :< Indexed children
|
|
|
|
|
leaf charIndex line = Info (Range charIndex $ charIndex + T.length line) mempty 1 :< Leaf line
|
2016-02-17 00:09:58 +03:00
|
|
|
|
annotateLeaves (accum, charIndex) line =
|
|
|
|
|
(accum ++ [ leaf charIndex (toText line) ]
|
|
|
|
|
, charIndex + length line)
|
|
|
|
|
toText = T.pack . Source.toString
|
|
|
|
|
|
2016-02-17 00:39:36 +03:00
|
|
|
|
-- | Return the parser that should be used for a given path.
|
2016-02-17 00:09:58 +03:00
|
|
|
|
parserForFilepath :: FilePath -> Parser
|
|
|
|
|
parserForFilepath = parserForType . T.pack . takeExtension
|
|
|
|
|
|
|
|
|
|
-- | Replace every string leaf with leaves of the words in the string.
|
|
|
|
|
breakDownLeavesByWord :: Source Char -> Term T.Text Info -> Term T.Text Info
|
|
|
|
|
breakDownLeavesByWord source = cata replaceIn
|
|
|
|
|
where
|
2016-04-12 18:13:29 +03:00
|
|
|
|
replaceIn (Info range categories _) (Leaf _) | ranges <- rangesAndWordsInSource range
|
|
|
|
|
, length ranges > 1
|
|
|
|
|
= Info range categories (1 + fromIntegral (length ranges)) :< Indexed (makeLeaf categories <$> ranges)
|
2016-04-12 18:22:02 +03:00
|
|
|
|
replaceIn info@(Info range categories _) syntax = Info range categories (1 + sum (size . copoint <$> syntax)) :< syntax
|
2016-03-12 02:31:01 +03:00
|
|
|
|
rangesAndWordsInSource range = rangesAndWordsFrom (start range) (toString $ slice range source)
|
2016-04-11 22:02:58 +03:00
|
|
|
|
makeLeaf categories (range, substring) = Info range categories 1 :< Leaf (T.pack substring)
|
2016-02-17 00:09:58 +03:00
|
|
|
|
|
|
|
|
|
-- | Transcode a file to a unicode source.
|
|
|
|
|
transcode :: B1.ByteString -> IO (Source Char)
|
|
|
|
|
transcode text = fromText <$> do
|
|
|
|
|
match <- Detect.detectCharset text
|
|
|
|
|
converter <- Convert.open match Nothing
|
|
|
|
|
return $ Convert.toUnicode converter text
|
|
|
|
|
|
|
|
|
|
-- | Read the file and convert it to Unicode.
|
|
|
|
|
readAndTranscodeFile :: FilePath -> IO (Source Char)
|
|
|
|
|
readAndTranscodeFile path = do
|
|
|
|
|
text <- B1.readFile path
|
|
|
|
|
transcode text
|
|
|
|
|
|
2016-02-17 00:39:36 +03:00
|
|
|
|
-- | Given a parser and renderer, diff two sources and return the rendered
|
|
|
|
|
-- | result.
|
2016-04-05 00:03:35 +03:00
|
|
|
|
-- | Returns the rendered result strictly, so it's always fully evaluated
|
|
|
|
|
-- | with respect to other IO actions.
|
2016-04-05 00:20:26 +03:00
|
|
|
|
diffFiles :: Parser -> Renderer T.Text -> Both SourceBlob -> IO T.Text
|
2016-02-22 23:54:32 +03:00
|
|
|
|
diffFiles parser renderer sourceBlobs = do
|
2016-02-29 05:10:56 +03:00
|
|
|
|
let sources = source <$> sourceBlobs
|
2016-02-22 23:54:32 +03:00
|
|
|
|
terms <- sequence $ parser <$> sources
|
|
|
|
|
let replaceLeaves = breakDownLeavesByWord <$> sources
|
2016-04-12 18:53:56 +03:00
|
|
|
|
return $! renderer (runBothWith (diffTerms diffCostWithAbsoluteDifferenceOfCachedDiffSizes) $ replaceLeaves <*> terms) sourceBlobs
|
2016-04-11 22:09:21 +03:00
|
|
|
|
|
|
|
|
|
-- | The sum of the node count of the diff’s patches.
|
|
|
|
|
diffCostWithCachedTermSizes :: Diff a Info -> Integer
|
2016-04-11 23:51:10 +03:00
|
|
|
|
diffCostWithCachedTermSizes = diffSum (getSum . foldMap (Sum . size . copoint))
|
2016-04-12 18:46:37 +03:00
|
|
|
|
|
2016-04-12 18:53:51 +03:00
|
|
|
|
-- | The absolute difference between the node counts of a diff.
|
|
|
|
|
diffCostWithAbsoluteDifferenceOfCachedDiffSizes :: Diff a Info -> Integer
|
|
|
|
|
diffCostWithAbsoluteDifferenceOfCachedDiffSizes (Free (Annotated (Both (before, after)) _)) = abs $ size before - size after
|
|
|
|
|
diffCostWithAbsoluteDifferenceOfCachedDiffSizes (Pure patch) = sum $ size . copoint <$> patch
|