1
1
mirror of https://github.com/github/semantic.git synced 2024-12-23 23:11:50 +03:00
semantic/src/Diffing.hs

88 lines
3.4 KiB
Haskell
Raw Normal View History

module Diffing where
2016-04-11 21:49:25 +03:00
import Diff
2016-03-31 00:26:52 +03:00
import Info
import Interpreter
import Language
import Parser
import Range
import Renderer
import Source hiding ((++))
import Syntax
import Term
import TreeSitter
import Text.Parser.TreeSitter.Language
import Control.Comonad.Cofree
import Data.Copointed
import Data.Functor.Both
import qualified Data.ByteString.Char8 as B1
import Data.Foldable
2016-04-11 23:51:10 +03:00
import Data.Monoid
import qualified Data.Text as T
import qualified Data.Text.ICU.Detect as Detect
import qualified Data.Text.ICU.Convert as Convert
import System.FilePath
-- | Return a parser based on the file extension (including the ".").
parserForType :: T.Text -> Parser
parserForType mediaType = case languageForType mediaType of
Just C -> treeSitterParser C ts_language_c
Just JavaScript -> treeSitterParser JavaScript ts_language_javascript
2016-02-22 07:01:34 +03:00
Just Ruby -> treeSitterParser Ruby ts_language_ruby
_ -> lineByLineParser
-- | A fallback parser that treats a file simply as rows of strings.
lineByLineParser :: Parser
2016-04-11 22:02:58 +03:00
lineByLineParser input = return . root $ case foldl' annotateLeaves ([], 0) lines of
(leaves, _) -> leaves
where
lines = actualLines input
2016-04-11 22:02:58 +03:00
root children = Info (Range 0 $ length input) mempty (1 + fromIntegral (length children)) :< Indexed children
leaf charIndex line = Info (Range charIndex $ charIndex + T.length line) mempty 1 :< Leaf line
annotateLeaves (accum, charIndex) line =
(accum ++ [ leaf charIndex (toText line) ]
, charIndex + length line)
toText = T.pack . Source.toString
-- | Return the parser that should be used for a given path.
parserForFilepath :: FilePath -> Parser
parserForFilepath = parserForType . T.pack . takeExtension
-- | Replace every string leaf with leaves of the words in the string.
breakDownLeavesByWord :: Source Char -> Term T.Text Info -> Term T.Text Info
breakDownLeavesByWord source = cata replaceIn
where
2016-04-11 22:02:58 +03:00
replaceIn (Info range categories _) (Leaf _) | ranges <- rangesAndWordsInSource range, length ranges > 1 = Info range categories (1 + fromIntegral (length ranges)) :< Indexed (makeLeaf categories <$> ranges)
replaceIn info syntax = info :< syntax
rangesAndWordsInSource range = rangesAndWordsFrom (start range) (toString $ slice range source)
2016-04-11 22:02:58 +03:00
makeLeaf categories (range, substring) = Info range categories 1 :< Leaf (T.pack substring)
-- | Transcode a file to a unicode source.
transcode :: B1.ByteString -> IO (Source Char)
transcode text = fromText <$> do
match <- Detect.detectCharset text
converter <- Convert.open match Nothing
return $ Convert.toUnicode converter text
-- | Read the file and convert it to Unicode.
readAndTranscodeFile :: FilePath -> IO (Source Char)
readAndTranscodeFile path = do
text <- B1.readFile path
transcode text
-- | Given a parser and renderer, diff two sources and return the rendered
-- | result.
-- | Returns the rendered result strictly, so it's always fully evaluated
-- | with respect to other IO actions.
2016-04-05 00:20:26 +03:00
diffFiles :: Parser -> Renderer T.Text -> Both SourceBlob -> IO T.Text
diffFiles parser renderer sourceBlobs = do
2016-02-29 05:10:56 +03:00
let sources = source <$> sourceBlobs
terms <- sequence $ parser <$> sources
let replaceLeaves = breakDownLeavesByWord <$> sources
2016-04-11 22:09:45 +03:00
return $! renderer (runBothWith (diffTerms diffCostWithCachedTermSizes) $ replaceLeaves <*> terms) sourceBlobs
-- | The sum of the node count of the diffs patches.
diffCostWithCachedTermSizes :: Diff a Info -> Integer
2016-04-11 23:51:10 +03:00
diffCostWithCachedTermSizes = diffSum (getSum . foldMap (Sum . size . copoint))