1
1
mirror of https://github.com/github/semantic.git synced 2025-01-03 21:16:12 +03:00
semantic/src/Semantic.hs

125 lines
5.1 KiB
Haskell

{-# LANGUAGE GADTs #-}
module Semantic
( diffBlobPairs
, diffBlobPair
, parseBlobs
, parseBlob
, parserForLanguage
) where
import Control.Parallel.Strategies
import qualified Control.Concurrent.Async as Async
import qualified Data.Text as T
import Data.Functor.Both
import RWS
import Data.Record
import Diff
import Info
import Interpreter
import Language
import Language.Markdown
import Parser
import Patch
import Prologue
import Renderer
import Source
import Syntax
import Term
import Text.Parser.TreeSitter.C
import Text.Parser.TreeSitter.Go
import Text.Parser.TreeSitter.JavaScript
import Text.Parser.TreeSitter.Ruby
import Text.Parser.TreeSitter.TypeScript
import TreeSitter
-- TODO: Shouldn't need to depend on System.FilePath in here, but this is
-- currently the way we do language detection.
import System.FilePath
-- This is the primary interface to the Semantic library which provides two
-- major classes of functionality: semantic parsing and diffing of source code
-- blobs.
--
-- Design goals:
-- - No knowledge of the filesystem or Git.
-- - Built in concurrency where appropriate.
-- - Easy to consume this interface from other application (e.g a cmdline or web server app).
-- | Diff a list of SourceBlob pairs to produce ByteString output using the specified renderer.
diffBlobPairs :: (Monoid output, StringConv output ByteString) => DiffRenderer DefaultFields output -> [Both SourceBlob] -> IO ByteString
diffBlobPairs renderer blobs = do
diffs <- Async.mapConcurrently go blobs
let diffs' = diffs >>= \ (blobs, diff) -> (,) blobs <$> toList diff
toS <$> renderConcurrently (resolveDiffRenderer renderer) (diffs' `using` parTraversable (parTuple2 r0 rdeepseq))
where
go blobPair = do
diff <- diffBlobPair blobPair
pure (blobPair, diff)
-- | Diff a pair of SourceBlobs.
diffBlobPair :: Both SourceBlob -> IO (Maybe (Diff (Syntax Text) (Record DefaultFields)))
diffBlobPair blobs = do
terms <- Async.mapConcurrently parseBlob blobs
pure $ case (runJoin blobs, runJoin terms) of
((left, right), (a, b)) | nonExistentBlob left && nonExistentBlob right -> Nothing
| nonExistentBlob right -> Just . pure $ Delete a
| nonExistentBlob left -> Just . pure $ Insert b
| otherwise -> Just $ runDiff terms
where
runDiff terms = stripDiff (runBothWith diffTerms (fmap decorate (terms `using` parTraversable rdeepseq)))
decorate = defaultFeatureVectorDecorator getLabel
getLabel :: HasField fields Category => TermF (Syntax Text) (Record fields) a -> (Category, Maybe Text)
getLabel (h :< t) = (Info.category h, case t of
Leaf s -> Just s
_ -> Nothing)
-- | Parse a list of SourceBlobs and use the specified renderer to produce ByteString output.
parseBlobs :: (Monoid output, StringConv output ByteString) => ParseTreeRenderer DefaultFields output -> [SourceBlob] -> IO ByteString
parseBlobs renderer blobs = do
terms <- traverse go blobs
toS <$> renderConcurrently (resolveParseTreeRenderer renderer) (terms `using` parTraversable (parTuple2 r0 rdeepseq))
where
go blob = do
term <- parseBlob blob
pure (blob, term)
-- | Parse a SourceBlob.
parseBlob :: SourceBlob -> IO (Term (Syntax Text) (Record DefaultFields))
parseBlob blob@SourceBlob{..} = parserForFilePath path blob
-- | Return a parser for a given langauge or the lineByLineParser parser.
parserForLanguage :: Maybe Language -> Parser (Syntax Text) (Record DefaultFields)
parserForLanguage Nothing = lineByLineParser
parserForLanguage (Just language) = case language of
C -> treeSitterParser C tree_sitter_c
JavaScript -> treeSitterParser JavaScript tree_sitter_javascript
TypeScript -> treeSitterParser TypeScript tree_sitter_typescript
Markdown -> cmarkParser
Ruby -> treeSitterParser Ruby tree_sitter_ruby
Language.Go -> treeSitterParser Language.Go tree_sitter_go
-- Internal
renderConcurrently :: (Monoid output, StringConv output ByteString) => (a -> b -> output) -> [(a, b)] -> IO output
renderConcurrently f diffs = do
outputs <- Async.mapConcurrently (pure . uncurry f) diffs
pure $ mconcat (outputs `using` parTraversable rseq)
-- | Return a parser based on the FilePath's extension (including the ".").
-- | TODO: Remove this.
parserForFilePath :: FilePath -> Parser (Syntax Text) (Record DefaultFields)
parserForFilePath = parserForLanguage . languageForType . toS . takeExtension
-- | A fallback parser that treats a file simply as rows of strings.
lineByLineParser :: Parser (Syntax Text) (Record DefaultFields)
lineByLineParser SourceBlob{..} = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
(leaves, _) -> cofree <$> leaves
where
lines = actualLines source
root children = (sourceRange :. Program :. rangeToSourceSpan source sourceRange :. Nil) :< Indexed children
sourceRange = Source.totalRange source
leaf charIndex line = (Range charIndex (charIndex + T.length line) :. Program :. rangeToSourceSpan source (Range charIndex (charIndex + T.length line)) :. Nil) :< Leaf line
annotateLeaves (accum, charIndex) line =
(accum <> [ leaf charIndex (Source.toText line) ] , charIndex + Source.length line)