2016-02-17 00:09:58 +03:00
|
|
|
|
module Diffing where
|
|
|
|
|
|
2016-06-02 00:41:12 +03:00
|
|
|
|
import Prologue hiding (fst, snd)
|
2016-05-27 16:35:26 +03:00
|
|
|
|
import qualified Data.ByteString.Char8 as B1
|
|
|
|
|
import Data.Functor.Both
|
|
|
|
|
import Data.Functor.Foldable
|
|
|
|
|
import qualified Data.Text as T
|
|
|
|
|
import qualified Data.Text.ICU.Detect as Detect
|
|
|
|
|
import qualified Data.Text.ICU.Convert as Convert
|
2016-06-03 09:59:48 +03:00
|
|
|
|
import Data.These
|
2016-04-11 21:49:25 +03:00
|
|
|
|
import Diff
|
2016-03-31 00:26:52 +03:00
|
|
|
|
import Info
|
2016-05-31 23:13:01 +03:00
|
|
|
|
import Category
|
2016-02-17 00:09:58 +03:00
|
|
|
|
import Interpreter
|
|
|
|
|
import Language
|
|
|
|
|
import Parser
|
2016-06-02 00:41:12 +03:00
|
|
|
|
import Patch
|
2016-02-17 00:09:58 +03:00
|
|
|
|
import Range
|
|
|
|
|
import Renderer
|
|
|
|
|
import Source hiding ((++))
|
|
|
|
|
import Syntax
|
2016-05-27 16:35:26 +03:00
|
|
|
|
import System.FilePath
|
2016-02-17 00:09:58 +03:00
|
|
|
|
import Term
|
|
|
|
|
import TreeSitter
|
2016-02-28 03:31:54 +03:00
|
|
|
|
import Text.Parser.TreeSitter.Language
|
2016-02-17 00:09:58 +03:00
|
|
|
|
|
|
|
|
|
-- | Return a parser based on the file extension (including the ".").
|
|
|
|
|
parserForType :: T.Text -> Parser
|
|
|
|
|
parserForType mediaType = case languageForType mediaType of
|
|
|
|
|
Just C -> treeSitterParser C ts_language_c
|
|
|
|
|
Just JavaScript -> treeSitterParser JavaScript ts_language_javascript
|
2016-02-22 07:01:34 +03:00
|
|
|
|
Just Ruby -> treeSitterParser Ruby ts_language_ruby
|
2016-02-17 00:09:58 +03:00
|
|
|
|
_ -> lineByLineParser
|
|
|
|
|
|
|
|
|
|
-- | A fallback parser that treats a file simply as rows of strings.
|
|
|
|
|
lineByLineParser :: Parser
|
2016-05-26 20:40:54 +03:00
|
|
|
|
lineByLineParser input = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
|
2016-05-04 21:37:24 +03:00
|
|
|
|
(leaves, _) -> cofree <$> leaves
|
2016-02-17 00:09:58 +03:00
|
|
|
|
where
|
|
|
|
|
lines = actualLines input
|
2016-06-03 19:49:08 +03:00
|
|
|
|
root children = let size = 1 + fromIntegral (length children) in
|
|
|
|
|
Info (Range 0 $ length input) (Other "program") size size :< Indexed children
|
|
|
|
|
leaf charIndex line = Info (Range charIndex $ charIndex + T.length line) (Other "program") 1 1 :< Leaf line
|
2016-02-17 00:09:58 +03:00
|
|
|
|
annotateLeaves (accum, charIndex) line =
|
|
|
|
|
(accum ++ [ leaf charIndex (toText line) ]
|
|
|
|
|
, charIndex + length line)
|
|
|
|
|
toText = T.pack . Source.toString
|
|
|
|
|
|
2016-02-17 00:39:36 +03:00
|
|
|
|
-- | Return the parser that should be used for a given path.
|
2016-02-17 00:09:58 +03:00
|
|
|
|
parserForFilepath :: FilePath -> Parser
|
|
|
|
|
parserForFilepath = parserForType . T.pack . takeExtension
|
|
|
|
|
|
|
|
|
|
-- | Replace every string leaf with leaves of the words in the string.
|
|
|
|
|
breakDownLeavesByWord :: Source Char -> Term T.Text Info -> Term T.Text Info
|
|
|
|
|
breakDownLeavesByWord source = cata replaceIn
|
|
|
|
|
where
|
2016-05-03 19:36:59 +03:00
|
|
|
|
replaceIn :: TermF T.Text Info (Term T.Text Info) -> Term T.Text Info
|
2016-06-03 19:48:54 +03:00
|
|
|
|
replaceIn (info :< syntax) = let size' = 1 + sum (size . extract <$> syntax') in cofree $ info { size = size', cost = size' } :< syntax'
|
2016-06-03 05:43:25 +03:00
|
|
|
|
where syntax' = case (ranges, syntax) of
|
2016-06-03 05:56:59 +03:00
|
|
|
|
(_:_:_, Leaf _) -> Indexed (makeLeaf info <$> ranges)
|
2016-06-03 05:43:25 +03:00
|
|
|
|
_ -> syntax
|
|
|
|
|
ranges = rangesAndWordsInSource (characterRange info)
|
2016-03-12 02:31:01 +03:00
|
|
|
|
rangesAndWordsInSource range = rangesAndWordsFrom (start range) (toString $ slice range source)
|
2016-06-03 05:56:59 +03:00
|
|
|
|
makeLeaf info (range, substring) = cofree $ info { characterRange = range } :< Leaf (T.pack substring)
|
2016-02-17 00:09:58 +03:00
|
|
|
|
|
|
|
|
|
-- | Transcode a file to a unicode source.
|
|
|
|
|
transcode :: B1.ByteString -> IO (Source Char)
|
|
|
|
|
transcode text = fromText <$> do
|
|
|
|
|
match <- Detect.detectCharset text
|
|
|
|
|
converter <- Convert.open match Nothing
|
2016-05-26 19:58:04 +03:00
|
|
|
|
pure $ Convert.toUnicode converter text
|
2016-02-17 00:09:58 +03:00
|
|
|
|
|
|
|
|
|
-- | Read the file and convert it to Unicode.
|
|
|
|
|
readAndTranscodeFile :: FilePath -> IO (Source Char)
|
|
|
|
|
readAndTranscodeFile path = do
|
|
|
|
|
text <- B1.readFile path
|
|
|
|
|
transcode text
|
|
|
|
|
|
2016-02-17 00:39:36 +03:00
|
|
|
|
-- | Given a parser and renderer, diff two sources and return the rendered
|
|
|
|
|
-- | result.
|
2016-04-05 00:03:35 +03:00
|
|
|
|
-- | Returns the rendered result strictly, so it's always fully evaluated
|
|
|
|
|
-- | with respect to other IO actions.
|
2016-05-18 19:01:16 +03:00
|
|
|
|
diffFiles :: Parser -> Renderer -> Both SourceBlob -> IO T.Text
|
2016-02-22 23:54:32 +03:00
|
|
|
|
diffFiles parser renderer sourceBlobs = do
|
2016-02-29 05:10:56 +03:00
|
|
|
|
let sources = source <$> sourceBlobs
|
2016-02-22 23:54:32 +03:00
|
|
|
|
terms <- sequence $ parser <$> sources
|
2016-06-02 00:55:47 +03:00
|
|
|
|
|
2016-02-22 23:54:32 +03:00
|
|
|
|
let replaceLeaves = breakDownLeavesByWord <$> sources
|
2016-06-02 00:55:47 +03:00
|
|
|
|
let areNullOids = runJoin $ (== nullOid) . oid <$> sourceBlobs
|
|
|
|
|
let textDiff = case areNullOids of
|
2016-06-02 00:41:12 +03:00
|
|
|
|
(True, False) -> pure $ Insert (snd terms)
|
|
|
|
|
(False, True) -> pure $ Delete (fst terms)
|
2016-06-03 19:14:06 +03:00
|
|
|
|
(_, _) -> runBothWith (diffTerms construct ((==) `on` category . extract) diffCostWithCachedTermSizes) $ replaceLeaves <*> terms
|
2016-06-02 00:55:47 +03:00
|
|
|
|
|
2016-06-02 00:41:12 +03:00
|
|
|
|
pure $! renderer textDiff sourceBlobs
|
2016-06-03 09:59:48 +03:00
|
|
|
|
where construct :: CofreeF (Syntax Text) (Both Info) (Diff Text Info) -> Diff Text Info
|
2016-06-03 22:31:07 +03:00
|
|
|
|
construct (info :< syntax) = free (Free ((setCost <$> info <*> sumCost syntax) :< syntax))
|
|
|
|
|
setCost info cost = info { cost = cost }
|
|
|
|
|
sumCost = fmap getSum . foldMap (fmap Sum . getCost)
|
|
|
|
|
getCost diff = case runFree diff of
|
|
|
|
|
Free (info :< _) -> cost <$> info
|
|
|
|
|
Pure patch -> uncurry both (fromThese 0 0 (unPatch (cost . extract <$> patch)))
|
2016-04-11 22:09:21 +03:00
|
|
|
|
|
|
|
|
|
-- | The sum of the node count of the diff’s patches.
|
2016-05-30 21:05:23 +03:00
|
|
|
|
diffCostWithCachedTermSizes :: Diff a Info -> Integer
|
2016-06-03 10:08:09 +03:00
|
|
|
|
diffCostWithCachedTermSizes diff = case runFree diff of
|
2016-06-03 22:31:02 +03:00
|
|
|
|
Free (info :< _) -> sum (cost <$> info)
|
2016-06-03 16:12:41 +03:00
|
|
|
|
Pure patch -> sum (cost . extract <$> patch)
|