1
1
mirror of https://github.com/github/semantic.git synced 2024-12-23 14:54:16 +03:00
semantic/src/Diffing.hs

99 lines
3.9 KiB
Haskell
Raw Normal View History

module Diffing where
import Prologue hiding (fst, snd)
import qualified Data.ByteString.Char8 as B1
import Data.Functor.Both
import Data.Functor.Foldable
import qualified Data.Text as T
import qualified Data.Text.ICU.Detect as Detect
import qualified Data.Text.ICU.Convert as Convert
2016-04-11 21:49:25 +03:00
import Diff
2016-03-31 00:26:52 +03:00
import Info
import Category
import Interpreter
import Language
import Parser
import Patch
import Range
import Renderer
import Source hiding ((++))
import Syntax
import System.FilePath
import Term
import TreeSitter
import Text.Parser.TreeSitter.Language
-- | Return a parser based on the file extension (including the ".").
parserForType :: T.Text -> Parser
parserForType mediaType = case languageForType mediaType of
Just C -> treeSitterParser C ts_language_c
Just JavaScript -> treeSitterParser JavaScript ts_language_javascript
2016-02-22 07:01:34 +03:00
Just Ruby -> treeSitterParser Ruby ts_language_ruby
_ -> lineByLineParser
-- | A fallback parser that treats a file simply as rows of strings.
lineByLineParser :: Parser
lineByLineParser input = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
2016-05-04 21:37:24 +03:00
(leaves, _) -> cofree <$> leaves
where
lines = actualLines input
2016-05-31 23:44:44 +03:00
root children = Info (Range 0 $ length input) (Other "program") (1 + fromIntegral (length children)) :< Indexed children
leaf charIndex line = Info (Range charIndex $ charIndex + T.length line) (Other "program") 1 :< Leaf line
annotateLeaves (accum, charIndex) line =
(accum ++ [ leaf charIndex (toText line) ]
, charIndex + length line)
toText = T.pack . Source.toString
-- | Return the parser that should be used for a given path.
parserForFilepath :: FilePath -> Parser
parserForFilepath = parserForType . T.pack . takeExtension
-- | Replace every string leaf with leaves of the words in the string.
breakDownLeavesByWord :: Source Char -> Term T.Text Info -> Term T.Text Info
breakDownLeavesByWord source = cata replaceIn
where
2016-05-03 19:36:59 +03:00
replaceIn :: TermF T.Text Info (Term T.Text Info) -> Term T.Text Info
2016-06-03 02:10:59 +03:00
replaceIn (info :< Leaf _)
| ranges <- rangesAndWordsInSource (characterRange info)
2016-04-13 01:00:09 +03:00
, length ranges > 1
2016-06-03 02:10:59 +03:00
= cofree $ info { size = 1 + fromIntegral (length ranges) } :< Indexed (makeLeaf (category info) <$> ranges)
replaceIn (info :< syntax)
= cofree $ info { size = 1 + sum (size . extract <$> syntax) } :< syntax
rangesAndWordsInSource range = rangesAndWordsFrom (start range) (toString $ slice range source)
2016-05-04 21:37:24 +03:00
makeLeaf categories (range, substring) = cofree $ Info range categories 1 :< Leaf (T.pack substring)
-- | Transcode a file to a unicode source.
transcode :: B1.ByteString -> IO (Source Char)
transcode text = fromText <$> do
match <- Detect.detectCharset text
converter <- Convert.open match Nothing
pure $ Convert.toUnicode converter text
-- | Read the file and convert it to Unicode.
readAndTranscodeFile :: FilePath -> IO (Source Char)
readAndTranscodeFile path = do
text <- B1.readFile path
transcode text
-- | Given a parser and renderer, diff two sources and return the rendered
-- | result.
-- | Returns the rendered result strictly, so it's always fully evaluated
-- | with respect to other IO actions.
2016-05-18 19:01:16 +03:00
diffFiles :: Parser -> Renderer -> Both SourceBlob -> IO T.Text
diffFiles parser renderer sourceBlobs = do
2016-02-29 05:10:56 +03:00
let sources = source <$> sourceBlobs
terms <- sequence $ parser <$> sources
2016-06-02 00:55:47 +03:00
let replaceLeaves = breakDownLeavesByWord <$> sources
2016-06-02 00:55:47 +03:00
let areNullOids = runJoin $ (== nullOid) . oid <$> sourceBlobs
let textDiff = case areNullOids of
(True, False) -> pure $ Insert (snd terms)
(False, True) -> pure $ Delete (fst terms)
(_, _) -> runBothWith (diffTerms ((==) `on` category . extract) diffCostWithCachedTermSizes) $ replaceLeaves <*> terms
2016-06-02 00:55:47 +03:00
pure $! renderer textDiff sourceBlobs
-- | The sum of the node count of the diffs patches.
diffCostWithCachedTermSizes :: Diff a Info -> Integer
2016-05-26 21:55:46 +03:00
diffCostWithCachedTermSizes = diffSum (getSum . foldMap (Sum . size . extract))